Thursday, November 30, 2017

Examples using Apache POI

Here are several examples of working with Apache POI through the years.

The following example was tested using:
Windows 7
Oracle Java 1.8.0_66
Apache poi 3.10.1

1. Write out a Microsoft XLSX file with two tabs, one for SUCCESS and one for FAILURE data
Problem
Write out a Microsoft XLSX file with two tabs, one for SUCCESS and one for FAILURE data.

Solution

/*
 * XSSF is prefixed before the class name to indicate operations related to a Microsoft Excel 2007 file or later
 */
@Test
public void createXlsxTest() throws Exception {
    XSSFWorkbook workbook = new XSSFWorkbook();
    
    // SUCCESS tab
    // column headers
    List<String> columnHeaderList = new ArrayList<String>(3);
    columnHeaderList.add("ID");
    columnHeaderList.add("Status");
    columnHeaderList.add("Description");

    // create hard-coded data for SUCCESS an FAILURE status
    Object[][] successDataTypes = {
            // Don't include headers since data would be dynamic
            {1, "SUCCESS", "Description 1"},
            {2, "SUCCESS", "Description 2"},
            {3, "SUCCESS", "Description 3"},
            {4, "SUCCESS", "Description 4"},
            {5, "SUCCESS", "None"}
    };
    Object[][] failureDataTypes = {
            // Don't include headers since data would be dynamic
            {4, "FAILURE", "Description 1"},
            {5, "FAILURE", "Description 2"},
    };
    
    createTab(workbook, columnHeaderList, "SUCCESS", successDataTypes);
    createTab(workbook, columnHeaderList, "FAILURE", failureDataTypes);

    // write data out to file
    FileOutputStream outputStream = new FileOutputStream("./src/test/resources/Sample1.xlsx");
    try {
        workbook.write(outputStream);
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        if (outputStream != null) outputStream.close();
    }

}

private void createTab(XSSFWorkbook workbook, List<String> columnHeaderList, String status,
        Object[][] dataTypes) throws Exception {
    XSSFSheet sheet = workbook.createSheet(status);
    
    // create header, 9 columns
    int colNum = 0;
    int rowNum = 0;
    
    Row row = sheet.createRow(rowNum++);
    for (String header : columnHeaderList)
    {
        Cell cell = row.createCell(colNum++);
        cell.setCellValue(header);
    }
    
    for (Object[] datatype : dataTypes) {
        row = sheet.createRow(rowNum++);
        colNum = 0;
        for (Object field : datatype) {
            Cell cell = row.createCell(colNum++);
            if (field instanceof String) {
                cell.setCellValue((String) field);
            } else if (field instanceof Integer) {
                cell.setCellValue((Integer) field);
            }
        }
    }
}


The following four examples were tested using:
Windows 7 64-bit
Oracle java jdk 1.7.0_51
apache poi 3.10.1

1. How to add image to word document
Problem
Below is an example to insert an image into an existing word document (.docx).

Solution
You’ll need to update the second, fourth, and fifth parameters depending on the source image file properties.  Second parameter is the image type, fourth parameter is the width in pixels, and the last fifth parameter is the height in pixels.
File targetWordFile = null;
FileInputStream fisTargetWordFile = null;
File imageFile = null;
FileInputStream fisImageFile = null;
try {
// set up target word file
targetWordFile = new File(“./src/test/resources/Target1.docx”);
fisTargetWordFile = new FileInputStream(targetWordFile);
// add picture to document
XWPFDocument updatedTargetWordDoc = new XWPFDocument(OPCPackage.open(fisTargetWordFile));
imageFile = new File(“./src/test/resources/sample.png”);
fisImageFile = new FileInputStream(imageFile);
updatedTargetWordDoc.createParagraph().createRun().addPicture(fisImageFile,
XWPFDocument.PICTURE_TYPE_PNG, “fileName”, Units.toEMU(300), Units.toEMU(285)); // 300×285 pixels
updatedTargetWordDoc.write(new FileOutputStream(targetWordFile.toPath().toString()));
} catch (IOException ex) {
ex.printStackTrace();
   } catch (Exception e) {
   e.printStackTrace();
   }
finally {
       if (fisTargetWordFile != null)
       fisTargetWordFile.close();
       fisTargetWordFile = null;
       if (fisImageFile != null)
       fisImageFile.close();
       fisImageFile = null;
   }
}


2. Can’t obtain the input stream from /docProps/app.xml
Problem

When using apache poi for microsoft word file manipulation, received the following error “Can’t obtain the input stream from /docProps/app.xml”

org.apache.poi.POIXMLException: java.io.IOException: Can’t obtain the input stream from /docProps/app.xml
at org.apache.poi.POIXMLDocument.getProperties(POIXMLDocument.java:141)
at org.apache.poi.POIXMLDocument.write(POIXMLDocument.java:177)
at com.codified.word.PoiTest.copyTemplateDoc_bad(PoiTest.java:129)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source)
at java.lang.reflect.Method.invoke(Unknown Source)
at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:47)
at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:44)
at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17)
at org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:26)
at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:271)
at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:70)
at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:50)
at org.junit.runners.ParentRunner$3.run(ParentRunner.java:238)
at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:63)
at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:236)
at org.junit.runners.ParentRunner.access$000(ParentRunner.java:53)
at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:229)
at org.junit.runners.ParentRunner.run(ParentRunner.java:309)
at org.eclipse.jdt.internal.junit4.runner.JUnit4TestReference.run(JUnit4TestReference.java:50)
at org.eclipse.jdt.internal.junit.runner.TestExecution.run(TestExecution.java:38)
at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.runTests(RemoteTestRunner.java:459)
at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.runTests(RemoteTestRunner.java:675)
at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.run(RemoteTestRunner.java:382)
at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.main(RemoteTestRunner.java:192)
Caused by: java.io.IOException: Can’t obtain the input stream from /docProps/app.xml
at org.apache.poi.openxml4j.opc.PackagePart.getInputStream(PackagePart.java:500)
at org.apache.poi.POIXMLProperties.<init>(POIXMLProperties.java:75)
at org.apache.poi.POIXMLDocument.getProperties(POIXMLDocument.java:139)
… 26 more

The issue is that the following code is attempting to instantiate a new XWPFDocument with the OPCPackage parameter using a path to an existing file.

XWPFDocument updatedTargetWordDoc = new XWPFDocument(OPCPackage.open(targetWordFile.toPath().toString()));
XWPFDocument updatedTargetWordDoc = new XWPFDocument(OPCPackage.open(targetWordFile.toPath().toString()));

Solution
The workaround for this issue is to instantiate a new XWPFDocument using with the OPCPackage parameter using a FileInputStream parameter instead.
XWPFDocument updatedTargetWordDoc = new XWPFDocument(OPCPackage.open(fisTargetWordFile));


3. How to copy existing word document to template word document
Problem

Below is an example to copy over an existing word document (.docx) body contents, into another word document template(.docx).  It assumes that the template word document has custom header and footer info/images/etc in it.

Solution
File templateWordFile = null;

File bodyWordFile = null;
FileInputStream fisBodyWordFile = null;

File targetWordFile = null;
FileOutputStream fosTargetWordFile = null;
FileInputStream fisTargetWordFile = null;

XWPFWordExtractor sourceExtractor = null;
try {
 // copy template to new target doc
 templateWordFile = new File(“. / src / test / resources / Template1.docx”);
 targetWordFile = new File(“. / src / test / resources / Target1.docx”);
 Files.copy(templateWordFile.toPath(), targetWordFile.toPath(), StandardCopyOption.REPLACE_EXISTING);

 // extract all text from the “body word document”, no formatting at this point
 bodyWordFile = new File(“. / src / test / resources / Body1.docx”);
 fisBodyWordFile = new FileInputStream(bodyWordFile);
 XWPFDocument sourceDoc = new XWPFDocument(fisBodyWordFile);
 sourceExtractor = new XWPFWordExtractor(sourceDoc);

 // set the body of the new target doc
 fisTargetWordFile = new FileInputStream(targetWordFile);
 XWPFDocument updatedTargetWordDoc = new XWPFDocument(OPCPackage.open(fisTargetWordFile));
 XWPFParagraph tempBody = updatedTargetWordDoc.createParagraph();
 XWPFRun tempRun = tempBody.createRun();
 tempRun.setText(sourceExtractor.getText());

 updatedTargetWordDoc.write(new FileOutputStream(targetWordFile.toPath().toString()));

} catch (IOException ex) {
 ex.printStackTrace();
} catch (Exception e) {
 e.printStackTrace();
} finally {
 if (fisBodyWordFile != null)
  fisBodyWordFile.close();
 fisBodyWordFile = null;

 if (fosTargetWordFile != null)
  fosTargetWordFile.close();
 fosTargetWordFile = null;
 if (fisTargetWordFile != null)
  fisTargetWordFile.close();
 fisTargetWordFile = null;

 if (sourceExtractor != null)
  sourceExtractor.close();
}


4. Get all paragraphs in word doc
Problem

Below is an example at how to retrieve all paragraphs from a word .docx document, and print out the contents.

Solution
File bodyWordFile = null;
FileInputStream fisBodyWordFile = null;

try {
// Get the word document you want to parse
bodyWordFile = new File(“./src/test/resources/Body1.docx”);
fisBodyWordFile = new FileInputStream(bodyWordFile);
XWPFDocument sourceDoc = new XWPFDocument(fisBodyWordFile);

// grab all paragraphs
Iterator<XWPFParagraph> paragraphsIt = sourceDoc.getParagraphsIterator();
while(paragraphsIt.hasNext()) {
XWPFParagraph paragraph = paragraphsIt.next();
System.out.println(“—” + paragraph.getText() + “—“);
}

} catch (IOException ex) {
ex.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
}
finally {
if (fisBodyWordFile != null)
fisBodyWordFile.close();
fisBodyWordFile = null;
}


No comments:

Post a Comment

I appreciate your time in leaving a comment!