public final class WordExtractor extends java.lang.Object implements POIOLE2TextExtractor
Constructor and Description |
---|
WordExtractor(DirectoryNode dir) |
WordExtractor(HWPFDocument doc)
Create a new Word Extractor
|
WordExtractor(java.io.InputStream is)
Create a new Word Extractor
|
WordExtractor(POIFSFileSystem fs)
Create a new Word Extractor
|
Modifier and Type | Method and Description |
---|---|
java.lang.String[] |
getCommentsText() |
HWPFDocument |
getDocument()
Return the underlying POIDocument
|
java.lang.String[] |
getEndnoteText() |
HWPFDocument |
getFilesystem() |
java.lang.String |
getFooterText()
Deprecated.
3.8 beta 4
|
java.lang.String[] |
getFootnoteText() |
java.lang.String |
getHeaderText()
Deprecated.
3.8 beta 4
|
java.lang.String[] |
getMainTextboxText() |
java.lang.String[] |
getParagraphText()
Get the text from the word file, as an array with one String per
paragraph
|
java.lang.String |
getText()
Grab the text, based on the WordToTextConverter.
|
java.lang.String |
getTextFromPieces()
Grab the text out of the text pieces.
|
boolean |
isCloseFilesystem() |
void |
setCloseFilesystem(boolean doCloseFilesystem) |
static java.lang.String |
stripFields(java.lang.String text)
Removes any fields (eg macros, page markers etc) from the string.
|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
getDocSummaryInformation, getMetadataTextExtractor, getRoot, getSummaryInformation
close
public WordExtractor(java.io.InputStream is) throws java.io.IOException
is
- InputStream containing the word filejava.io.IOException
public WordExtractor(POIFSFileSystem fs) throws java.io.IOException
fs
- POIFSFileSystem containing the word filejava.io.IOException
public WordExtractor(DirectoryNode dir) throws java.io.IOException
java.io.IOException
public WordExtractor(HWPFDocument doc)
doc
- The HWPFDocument to extract frompublic java.lang.String[] getParagraphText()
public java.lang.String[] getFootnoteText()
public java.lang.String[] getMainTextboxText()
public java.lang.String[] getEndnoteText()
public java.lang.String[] getCommentsText()
@Deprecated public java.lang.String getHeaderText()
@Deprecated public java.lang.String getFooterText()
public java.lang.String getTextFromPieces()
public java.lang.String getText()
getText
in interface POITextExtractor
public static java.lang.String stripFields(java.lang.String text)
public HWPFDocument getDocument()
POIOLE2TextExtractor
getDocument
in interface POIOLE2TextExtractor
getDocument
in interface POITextExtractor
public void setCloseFilesystem(boolean doCloseFilesystem)
setCloseFilesystem
in interface POITextExtractor
doCloseFilesystem
- true
(default), if underlying resources/filesystem should be
closed on POITextExtractor.close()
public boolean isCloseFilesystem()
isCloseFilesystem
in interface POITextExtractor
true
, if resources/filesystem should be closed on POITextExtractor.close()
public HWPFDocument getFilesystem()
getFilesystem
in interface POITextExtractor
Copyright 2022 The Apache Software Foundation or its licensors, as applicable.