public final class WordExtractor extends POIOLE2TextExtractor
document
Constructor and Description |
---|
WordExtractor(DirectoryNode dir) |
WordExtractor(HWPFDocument doc)
Create a new Word Extractor
|
WordExtractor(java.io.InputStream is)
Create a new Word Extractor
|
WordExtractor(POIFSFileSystem fs)
Create a new Word Extractor
|
Modifier and Type | Method and Description |
---|---|
java.lang.String[] |
getCommentsText() |
java.lang.String[] |
getEndnoteText() |
java.lang.String |
getFooterText()
Deprecated.
3.8 beta 4
|
java.lang.String[] |
getFootnoteText() |
java.lang.String |
getHeaderText()
Deprecated.
3.8 beta 4
|
java.lang.String[] |
getMainTextboxText() |
java.lang.String[] |
getParagraphText()
Get the text from the word file, as an array with one String per
paragraph
|
protected static java.lang.String[] |
getParagraphText(Range r) |
java.lang.String |
getText()
Grab the text, based on the WordToTextConverter.
|
java.lang.String |
getTextFromPieces()
Grab the text out of the text pieces.
|
static void |
main(java.lang.String[] args)
Command line extractor, so people will stop moaning that they can't just
run this.
|
static java.lang.String |
stripFields(java.lang.String text)
Removes any fields (eg macros, page markers etc) from the string.
|
getDocSummaryInformation, getDocument, getMetadataTextExtractor, getRoot, getSummaryInformation
close, setFilesystem
public WordExtractor(java.io.InputStream is) throws java.io.IOException
is
- InputStream containing the word filejava.io.IOException
public WordExtractor(POIFSFileSystem fs) throws java.io.IOException
fs
- POIFSFileSystem containing the word filejava.io.IOException
public WordExtractor(DirectoryNode dir) throws java.io.IOException
java.io.IOException
public WordExtractor(HWPFDocument doc)
doc
- The HWPFDocument to extract frompublic static void main(java.lang.String[] args) throws java.io.IOException
java.io.IOException
public java.lang.String[] getParagraphText()
public java.lang.String[] getFootnoteText()
public java.lang.String[] getMainTextboxText()
public java.lang.String[] getEndnoteText()
public java.lang.String[] getCommentsText()
protected static java.lang.String[] getParagraphText(Range r)
@Deprecated public java.lang.String getHeaderText()
@Deprecated public java.lang.String getFooterText()
public java.lang.String getTextFromPieces()
public java.lang.String getText()
getText
in class POITextExtractor
public static java.lang.String stripFields(java.lang.String text)
Copyright 2018 The Apache Software Foundation or its licensors, as applicable.