@Beta public class WordToHtmlConverter extends AbstractWordConverter
This implementation doesn't create images or links to them. This can be
changed by overriding AbstractWordConverter.processImage(Element, boolean, Picture)
method.
UNICODECHAR_NO_BREAK_SPACE, UNICODECHAR_NONBREAKING_HYPHEN, UNICODECHAR_ZERO_WIDTH_SPACE
Constructor and Description |
---|
WordToHtmlConverter(org.w3c.dom.Document document)
Creates new instance of
WordToHtmlConverter . |
WordToHtmlConverter(HtmlDocumentFacade htmlDocumentFacade) |
Modifier and Type | Method and Description |
---|---|
protected void |
afterProcess()
Special actions that need to be called after processing complete, like
updating stylesheets or building document notes list.
|
org.w3c.dom.Document |
getDocument() |
static void |
main(java.lang.String[] args)
Java main() interface to interact with
WordToHtmlConverter |
protected void |
outputCharacters(org.w3c.dom.Element pElement,
CharacterRun characterRun,
java.lang.String text) |
protected void |
processBookmarks(HWPFDocumentCore wordDocument,
org.w3c.dom.Element currentBlock,
Range range,
int currentTableLevel,
java.util.List<Bookmark> rangeBookmarks)
Wrap range into bookmark(s) and process it.
|
protected void |
processDocumentInformation(SummaryInformation summaryInformation) |
void |
processDocumentPart(HWPFDocumentCore wordDocument,
Range range) |
protected void |
processDrawnObject(HWPFDocument doc,
CharacterRun characterRun,
OfficeDrawing officeDrawing,
java.lang.String path,
org.w3c.dom.Element block) |
protected void |
processDropDownList(org.w3c.dom.Element block,
CharacterRun characterRun,
java.lang.String[] values,
int defaultIndex) |
protected void |
processEndnoteAutonumbered(HWPFDocument wordDocument,
int noteIndex,
org.w3c.dom.Element block,
Range endnoteTextRange) |
protected void |
processFootnoteAutonumbered(HWPFDocument wordDocument,
int noteIndex,
org.w3c.dom.Element block,
Range footnoteTextRange) |
protected void |
processHyperlink(HWPFDocumentCore wordDocument,
org.w3c.dom.Element currentBlock,
Range textRange,
int currentTableLevel,
java.lang.String hyperlink) |
protected void |
processImage(org.w3c.dom.Element currentBlock,
boolean inlined,
Picture picture,
java.lang.String imageSourcePath) |
protected void |
processImageWithoutPicturesManager(org.w3c.dom.Element currentBlock,
boolean inlined,
Picture picture) |
protected void |
processLineBreak(org.w3c.dom.Element block,
CharacterRun characterRun) |
protected void |
processNoteAutonumbered(HWPFDocument doc,
java.lang.String type,
int noteIndex,
org.w3c.dom.Element block,
Range noteTextRange) |
protected void |
processPageBreak(HWPFDocumentCore wordDocument,
org.w3c.dom.Element flow) |
protected void |
processPageref(HWPFDocumentCore hwpfDocument,
org.w3c.dom.Element currentBlock,
Range textRange,
int currentTableLevel,
java.lang.String pageref) |
protected void |
processParagraph(HWPFDocumentCore hwpfDocument,
org.w3c.dom.Element parentElement,
int currentTableLevel,
Paragraph paragraph,
java.lang.String bulletText) |
protected void |
processSection(HWPFDocumentCore wordDocument,
Section section,
int sectionCounter) |
protected void |
processSingleSection(HWPFDocumentCore wordDocument,
Section section) |
protected void |
processTable(HWPFDocumentCore hwpfDocument,
org.w3c.dom.Element flow,
Table table) |
getCharacterRunTriplet, getFontReplacer, getNumberColumnsSpanned, getNumberRowsSpanned, getPicturesManager, processCharacters, processDeadField, processDocument, processDrawnObject, processField, processImage, processNoteAnchor, processOle2, processParagraphes, processSymbol, setFontReplacer, setPicturesManager, tryDeadField
public WordToHtmlConverter(org.w3c.dom.Document document)
WordToHtmlConverter
. Can be used for
output several HWPFDocument
s into single HTML document.document
- XML DOM Document used as HTML documentpublic WordToHtmlConverter(HtmlDocumentFacade htmlDocumentFacade)
public static void main(java.lang.String[] args) throws java.io.IOException, javax.xml.parsers.ParserConfigurationException, javax.xml.transform.TransformerException
WordToHtmlConverter
Usage: WordToHtmlConverter infile outfile
Where infile is an input .doc file ( Word 95-2007) which will be rendered as HTML into outfile
java.io.IOException
javax.xml.parsers.ParserConfigurationException
javax.xml.transform.TransformerException
protected void afterProcess()
AbstractWordConverter
afterProcess
in class AbstractWordConverter
public org.w3c.dom.Document getDocument()
getDocument
in class AbstractWordConverter
protected void outputCharacters(org.w3c.dom.Element pElement, CharacterRun characterRun, java.lang.String text)
outputCharacters
in class AbstractWordConverter
protected void processBookmarks(HWPFDocumentCore wordDocument, org.w3c.dom.Element currentBlock, Range range, int currentTableLevel, java.util.List<Bookmark> rangeBookmarks)
AbstractWordConverter
processBookmarks
in class AbstractWordConverter
protected void processDocumentInformation(SummaryInformation summaryInformation)
processDocumentInformation
in class AbstractWordConverter
public void processDocumentPart(HWPFDocumentCore wordDocument, Range range)
processDocumentPart
in class AbstractWordConverter
protected void processDropDownList(org.w3c.dom.Element block, CharacterRun characterRun, java.lang.String[] values, int defaultIndex)
processDropDownList
in class AbstractWordConverter
protected void processDrawnObject(HWPFDocument doc, CharacterRun characterRun, OfficeDrawing officeDrawing, java.lang.String path, org.w3c.dom.Element block)
processDrawnObject
in class AbstractWordConverter
protected void processEndnoteAutonumbered(HWPFDocument wordDocument, int noteIndex, org.w3c.dom.Element block, Range endnoteTextRange)
processEndnoteAutonumbered
in class AbstractWordConverter
protected void processFootnoteAutonumbered(HWPFDocument wordDocument, int noteIndex, org.w3c.dom.Element block, Range footnoteTextRange)
processFootnoteAutonumbered
in class AbstractWordConverter
protected void processHyperlink(HWPFDocumentCore wordDocument, org.w3c.dom.Element currentBlock, Range textRange, int currentTableLevel, java.lang.String hyperlink)
processHyperlink
in class AbstractWordConverter
protected void processImage(org.w3c.dom.Element currentBlock, boolean inlined, Picture picture, java.lang.String imageSourcePath)
processImage
in class AbstractWordConverter
protected void processImageWithoutPicturesManager(org.w3c.dom.Element currentBlock, boolean inlined, Picture picture)
processImageWithoutPicturesManager
in class AbstractWordConverter
protected void processLineBreak(org.w3c.dom.Element block, CharacterRun characterRun)
processLineBreak
in class AbstractWordConverter
protected void processNoteAutonumbered(HWPFDocument doc, java.lang.String type, int noteIndex, org.w3c.dom.Element block, Range noteTextRange)
protected void processPageBreak(HWPFDocumentCore wordDocument, org.w3c.dom.Element flow)
processPageBreak
in class AbstractWordConverter
protected void processPageref(HWPFDocumentCore hwpfDocument, org.w3c.dom.Element currentBlock, Range textRange, int currentTableLevel, java.lang.String pageref)
processPageref
in class AbstractWordConverter
protected void processParagraph(HWPFDocumentCore hwpfDocument, org.w3c.dom.Element parentElement, int currentTableLevel, Paragraph paragraph, java.lang.String bulletText)
processParagraph
in class AbstractWordConverter
protected void processSection(HWPFDocumentCore wordDocument, Section section, int sectionCounter)
processSection
in class AbstractWordConverter
protected void processSingleSection(HWPFDocumentCore wordDocument, Section section)
processSingleSection
in class AbstractWordConverter
protected void processTable(HWPFDocumentCore hwpfDocument, org.w3c.dom.Element flow, Table table)
processTable
in class AbstractWordConverter
Copyright 2021 The Apache Software Foundation or its licensors, as applicable.