poi中word转pdf-Java POI 如何操作word 格式

2022-12-13 06:48:00

java poi 如何操作word 格式

1、环境支持

1.1 添加poi支持：包下载地址 http://www.apache.org/dyn/closer.cgi/poi/release/

1.2 poi对excel文件的读取操作比较方便，poi还提供对word的doc格式文件的读取。但在它的发行版本中没有发布对word支持的模块，需要另外下载一个poi的扩展的jar包。下载地址为 http://www.ibiblio.org/maven2/org/textmining/tm-extractors/0.4/ 下载extractors-0.4_zip这个文件

package com.ray.poi.util;

import java.io.bytearrayinputstream;
import java.io.file;
import java.io.fileinputstream;
import java.io.fileoutputstream;
import java.io.ioexception;

import org.apache.poi.poifs.filesystem.directoryentry;
import org.apache.poi.poifs.filesystem.documententry;
import org.apache.poi.poifs.filesystem.poifsfilesystem;
import org.textmining.text.extraction.wordextractor;

/**
* 读写doc
* @author wangzonghao
*
*/
public class poiwordutil {
/**
* 读入doc
* @param doc
* @return
* @throws exception
*/
public static string readdoc(string doc) throws exception {
// 创建输入流读取doc文件
fileinputstream in = new fileinputstream(new file(doc));
wordextractor extractor = null;
string text = null;
// 创建wordextractor
extractor = new wordextractor();
// 对doc文件进行提取
text = extractor.extracttext(in);
return text;
}
/**
* 写出doc
* @param path
* @param content
* @return
*/
public static boolean writedoc(string path, string content) {
boolean w = false;
try {

// byte b[] = content.getbytes("iso-8859-1");
byte b[] = content.getbytes();

bytearrayinputstream bais = new bytearrayinputstream(b);

poifsfilesystem fs = new poifsfilesystem();
directoryentry directory = fs.getroot();

documententry de = directory.createdocument("worddocument", bais);

fileoutputstream ostream = new fileoutputstream(path);

fs.writefilesystem(ostream);

bais.close();
ostream.close();

} catch (ioexception e) {
e.printstacktrace();
}
return w;
}

}
测试

package com.ray.poi.util;

import junit.framework.testcase;

public class poiutiltest extends testcase {

public void testreaddoc() {
try{
string text = poiwordutil.readdoc("e:/work_space/poi/com/ray/poi/util/demo.doc");
system.out.println(text);
}catch(exception e){
e.printstacktrace();
}

}

public void testwritedoc() {
string wr;
try {
wr = poiwordutil.readdoc("e:/work_space/poi/com/ray/poi/util/demo.doc");

boolean b = poiwordutil.writedoc("c:\\demo.doc",wr);
} catch (exception e) {
// todo auto-generated catch block
e.printstacktrace();
}

}

}

用poi读取word总是抱异常,我真无语了,我的qq是45071...

/**
* 读取office文件
* @param office
* @return
* @throws exception
*/
public string readoffice(string officepath) {
string text = null;
fileinputstream in = null;
try {
in = new fileinputstream(officepath);
poitextextractor extractor = extractorfactory.createextractor(in);
text = extractor.gettext();
} catch (exception e) {
e.printstacktrace();
}finally{
try {
in.close();
} catch (ioexception e) {
e.printstacktrace();
}
}
return text;
}

不知道你需要实现什么功能，这个是我通过poi读取office文件的方法，你试试，看是否会报错，通过office文件路径读取文件中字符串，如果遇到是图片的肯定不能读取的。

急求poi 将数据导出到word的实例

import java.io.*;
import java.util.*;
import org.apache.poi.poifs.filesystem.*;
import org.apache.poi.util.littleendian;

public class wordtest {
public wordtest() {
}
public static boolean writewordfile(string path, string content) {
boolean w = false;
try {

// byte b[] = content.getbytes("iso-8859-1");
byte b[] = content.getbytes();

bytearrayinputstream bais = new bytearrayinputstream(b);

poifsfilesystem fs = new poifsfilesystem();
directoryentry directory = fs.getroot();

documententry de = directory.createdocument("worddocument", bais);

fileoutputstream ostream = new fileoutputstream(path);

fs.writefilesystem(ostream);

bais.close();
ostream.close();

} catch (ioexception e) {
e.printstacktrace();
}
return w;
}
public static void main(string[] args){
boolean b = writewordfile("e://test.doc","hello");
}
}
/*
public string extracttext(inputstream in) throws ioexception {
arraylist text = new arraylist();
poifsfilesystem fsys = new poifsfilesystem(in);

documententry headerprops = (documententry) fsys.getroot().getentry("worddocument");
documentinputstream din = fsys.createdocumentinputstream("worddocument");
byte[] header = new byte[headerprops.getsize()];

din.read(header);
din.close();
// prende le informazioni dall'header del documento
int info = littleendian.getshort(header, 0xa);

boolean usetable1 = (info & 0x200) != 0;

//boolean usetable1 = true;

// prende informazioni dalla piece table
int complexoffset = littleendian.getint(header, 0x1a2);
//int complexoffset = littleendian.getint(header);

string tablename = null;
if (usetable1) {
tablename = "1table";
} else {
tablename = "0table";
}

documententry table = (documententry) fsys.getroot().getentry(tablename);
byte[] tablestream = new byte[table.getsize()];

din = fsys.createdocumentinputstream(tablename);

din.read(tablestream);
din.close();

din = null;
fsys = null;
table = null;
headerprops = null;

int multiple = findtext(tablestream, complexoffset, text);

stringbuffer sb = new stringbuffer();
int size = text.size();
tablestream = null;

for (int x = 0; x < size; x++) {

wordtextpiece nextpiece = (wordtextpiece) text.get(x);
int start = nextpiece.getstart();
int length = nextpiece.getlength();

boolean unicode = nextpiece.usesunicode();
string tostr = null;
if (unicode) {
tostr = new string(header, start, length * multiple, "utf-16le");
} else {
tostr = new string(header, start, length, "iso-8859-1");
}
sb.append(tostr).append(" ");

}
return sb.tostring();
}

private static int findtext(byte[] tablestream, int complexoffset, arraylist text)
throws ioexception {
//actual text
int pos = complexoffset;
int multiple = 2;
//skips through the prms before we reach the piece table. these contain data
//for actual fast saved files
while (tablestream[pos] == 1) {
pos++;
int skip = littleendian.getshort(tablestream, pos);
pos += 2 + skip;
}
if (tablestream[pos] != 2) {
throw new ioexception("corrupted word file");
} else {
//parse out the text pieces
int piecetablesize = littleendian.getint(tablestream, ++pos);
pos += 4;
int pieces = (piecetablesize - 4) / 12;
for (int x = 0; x < pieces; x++) {
int filepos =
littleendian.getint(tablestream, pos + ((pieces + 1) * 4) + (x *<img src="/images/forum/smiles/icon_cool.gif"/> + 2);
boolean unicode = false;
if ((filepos & 0x40000000) == 0) {
unicode = true;
} else {
unicode = false;
multiple = 1;
filepos &= ~(0x40000000); //gives me fc in doc stream
filepos /= 2;
}
int totlength =
littleendian.getint(tablestream, pos + (x + 1) * 4)
- littleendian.getint(tablestream, pos + (x * 4));

wordtextpiece piece = new wordtextpiece(filepos, totlength, unicode);
text.add(piece);

}

}
return multiple;
}
public static void main(string[] args){
wordtest w = new wordtest();
poifsfilesystem ps = new poifsfilesystem();
try{

file file = new file("c:\\test.doc");

inputstream in = new fileinputstream(file);
string s = w.extracttext(in);
system.out.println(s);

}catch(exception e){
e.printstacktrace();
}

}
public boolean writewordfile(string path, string content) {
boolean w = false;
try {

// byte b[] = content.getbytes("iso-8859-1");
byte b[] = content.getbytes();

bytearrayinputstream bais = new bytearrayinputstream(b);

poifsfilesystem fs = new poifsfilesystem();
directoryentry directory = fs.getroot();

documententry de = directory.createdocument("worddocument", bais);

fileoutputstream ostream = new fileoutputstream(path);

fs.writefilesystem(ostream);

bais.close();
ostream.close();

} catch (ioexception e) {
e.printstacktrace();
}

return w;
}

}

class wordtextpiece {
private int _fcstart;
private boolean _usesunicode;
private int _length;

public wordtextpiece(int start, int length, boolean unicode) {
_usesunicode = unicode;
_length = length;
_fcstart = start;
}
public boolean usesunicode() {
return _usesunicode;
}

public int getstart() {
return _fcstart;
}
public int getlength() {
return _length;
}

}
*/

poi 根据模板导出word

zipfile docxfile = new zipfile(new file("c:/3.docx"));
zipentry documentxml = docxfile.getentry("word/document.xml");
inputstream documentxmlis = docxfile.getinputstream(documentxml);
string s = "";

inputstreamreader reader = new inputstreamreader(documentxmlis,"utf-8");
bufferedreader br = new bufferedreader(reader);
string str = null;

while ((str = br.readline()) != null) {
s = s+str;
}
s = s.replaceall("${key}", "替换内容");
system.out.println(s);
reader.close();
br.close();
if(true){
//return;
}
//zipentry imgfile = docxfile.getentry("word/media/image1.png");

documentbuilderfactory dbf = documentbuilderfactory.newinstance();
inputstream documentxmlis1 = docxfile.getinputstream(documentxml);
document doc = dbf.newdocumentbuilder().parse(documentxmlis1);
element docelement = doc.getdocumentelement();
//assertequals("w:document", docelement.gettagname());
element bodyelement = (element) docelement.getelementsbytagname(
"w:body").item(0);
//assertequals("w:body", bodyelement.gettagname());
element pelement = (element) bodyelement.getelementsbytagname("w:p")
.item(0);
//assertequals("w:p", pelement.gettagname());
element relement = (element) pelement.getelementsbytagname("w:r").item(
0);
//assertequals("w:r", relement.gettagname());
element telement = (element) relement.getelementsbytagname("w:t").item(
0);
//assertequals("w:t", telement.gettagname());
//assertequals("这是第一个测试文档", telement.gettextcontent());
//telement.settextcontent("这是第一个用java写的测试文档");
transformer t = transformerfactory.newinstance().newtransformer();
bytearrayoutputstream baos = new bytearrayoutputstream();
t.transform(new domsource(doc), new streamresult(baos));
zipoutputstream docxoutfile = new zipoutputstream(new fileoutputstream(
"response.docx"));
enumeration<zipentry> entriesiter = (enumeration<zipentry>) docxfile
.entries();
while (entriesiter.hasmoreelements()) {
zipentry entry = entriesiter.nextelement();
system.out.println(entry.getname());

if (entry.getname().equals("word/document.xml")) {
byte[] data = baos.tobytearray();
docxoutfile.putnextentry(new zipentry(entry.getname()));
byte[] datas = s.getbytes("utf-8");
docxoutfile.write(datas, 0, datas.length);
//docxoutfile.write(data, 0, data.length);
docxoutfile.closeentry();
} else if(entry.getname().equals("word/media/image1.png")){
inputstream incoming = new fileinputstream("c:/aaa.jpg");
byte[] data = new byte[incoming.available()];
int readcount = incoming.read(data, 0, data.length);
docxoutfile.putnextentry(new zipentry(entry.getname()));
docxoutfile.write(data, 0, readcount);
docxoutfile.closeentry();
}else {
inputstream incoming = docxfile.getinputstream(entry);
byte[] data = new byte[incoming.available()];
int readcount = incoming.read(data, 0, data.length);
docxoutfile.putnextentry(new zipentry(entry.getname()));
docxoutfile.write(data, 0, readcount);
docxoutfile.closeentry();
}
}
docxoutfile.close();

上一页：PDF转Word使用哪款浏览器最方便？

下一页：pdf文字转换为word-如何把pdf转成word文字