OOXMLWordAndPowerPointTextHandler.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.parser.microsoft.ooxml;
import java.util.Date;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.poi.xwpf.usermodel.UnderlinePatterns;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Office;
import org.apache.tika.utils.DateUtils;
/**
* This class is intended to handle anything that might contain IBodyElements:
* main document, headers, footers, notes, slides, etc.
* <p>
* <p/>
* <p>
* This class does not generally check for namespaces, and it can be applied
* to PPTX and DOCX for text extraction.
* <p>
* <p/>
* This can be used to scrape content from charts. It currently ignores
* formula (<c:f/>) elements
* <p>
* <p/>
* This does not work with .xlsx or .vsdx.
* <p>
* TODO: move this into POI?
*/
public class OOXMLWordAndPowerPointTextHandler extends DefaultHandler {
public final static String W_NS =
"http://schemas.openxmlformats.org/wordprocessingml/2006/main";
private final static String R = "r";
private final static String FLD = "fld";
private final static String RPR = "rPr";
private final static String P = "p";
private final static String P_STYLE = "pStyle";
private final static String PPR = "pPr";
private final static String T = "t";
private final static String TAB = "tab";
private final static String B = "b";
private final static String ILVL = "ilvl";
private final static String NUM_ID = "numId";
private final static String TC = "tc";
private final static String TR = "tr";
private final static String I = "i";
private final static String U = "u";
private final static String STRIKE = "strike";
private final static String NUM_PR = "numPr";
private final static String BR = "br";
private final static String HYPERLINK = "hyperlink";
private final static String HLINK_CLICK = "hlinkClick"; //pptx hlink
private final static String TBL = "tbl";
private final static String PIC = "pic";
private final static String PICT = "pict";
private final static String IMAGEDATA = "imagedata";
private final static String BLIP = "blip";
private final static String CHOICE = "Choice";
private final static String FALLBACK = "Fallback";
private final static String OLE_OBJECT = "OLEObject";
private final static String CR = "cr";
private final static String V = "v";
private final static String RUBY = "ruby"; //phonetic section
private final static String RT = "rt"; //phonetic run
private static final String VAL = "val";
private static final String SLIDE = "sld";
private static final String SHOW = "show";
private final static String MC_NS =
"http://schemas.openxmlformats.org/markup-compatibility/2006";
private final static String O_NS = "urn:schemas-microsoft-com:office:office";
private final static String PIC_NS = "http://schemas.openxmlformats.org/drawingml/2006/picture";
private final static String DRAWING_MAIN_NS =
"http://schemas.openxmlformats.org/drawingml/2006/main";
private final static String V_NS = "urn:schemas-microsoft-com:vml";
private final static String C_NS = "http://schemas.openxmlformats.org/drawingml/2006/chart";
private final static String OFFICE_DOC_RELATIONSHIP_NS =
"http://schemas.openxmlformats.org/officeDocument/2006/relationships";
private final static char[] TAB_CHAR = new char[]{'\t'};
private final static char NEWLINE = '\n';
private final static String BOOKMARK_START = "bookmarkStart";
private final static String BOOKMARK_END = "bookmarkEnd";
private final static String FOOTNOTE_REFERENCE = "footnoteReference";
private final static String INS = "ins";
private final static String DEL = "del";
private final static String DEL_TEXT = "delText";
private final static String MOVE_FROM = "moveFrom";
private final static String MOVE_TO = "moveTo";
private final static String ENDNOTE_REFERENCE = "endnoteReference";
private static final String TEXTBOX = "textbox";
private final static String FLD_CHAR = "fldChar";
private final static String INSTR_TEXT = "instrText";
private final static String FLD_CHAR_TYPE = "fldCharType";
// DrawingML hyperlinks on shapes/pictures
private final static String HLINK_HOVER = "hlinkHover";
private final static String C_NV_PR = "cNvPr";
// VML shape hyperlinks
private final static String SHAPE = "shape";
private final static String HREF = "href";
// Patterns for extracting URLs from field codes
private static final Pattern HYPERLINK_PATTERN =
Pattern.compile("HYPERLINK\\s{1,100}\"([^\"]{1,10000})\"", Pattern.CASE_INSENSITIVE);
private static final Pattern INCLUDEPICTURE_PATTERN =
Pattern.compile("INCLUDEPICTURE\\s{1,100}\"([^\"]{1,10000})\"", Pattern.CASE_INSENSITIVE);
private static final Pattern INCLUDETEXT_PATTERN =
Pattern.compile("INCLUDETEXT\\s{1,100}\"([^\"]{1,10000})\"", Pattern.CASE_INSENSITIVE);
private static final Pattern IMPORT_PATTERN =
Pattern.compile("IMPORT\\s{1,100}\"([^\"]{1,10000})\"", Pattern.CASE_INSENSITIVE);
private static final Pattern LINK_PATTERN =
Pattern.compile("LINK\\s{1,100}[\\w.]{1,50}\\s{1,100}\"([^\"]{1,10000})\"", Pattern.CASE_INSENSITIVE);
private final XWPFBodyContentsHandler bodyContentsHandler;
private final Map<String, String> linkedRelationships;
private final RunProperties currRunProperties = new RunProperties();
private final ParagraphProperties currPProperties = new ParagraphProperties();
private final boolean includeTextBox;
private final boolean concatenatePhoneticRuns;
private final Metadata metadata;
private final StringBuilder runBuffer = new StringBuilder();
private final StringBuilder rubyBuffer = new StringBuilder();
private boolean inR = false;
//in run or in field. TODO: convert this to an integer because you can have a run within a run
private boolean inT = false;
private boolean inRPr = false;
private boolean inNumPr = false;
private boolean inRt = false;
private boolean inPic = false;
private boolean inPict = false;
private String picDescription = null;
private String picRId = null;
private String picFilename = null;
//mechanism used to determine when to
//signal the start of the p, and still
//handle p with pPr and those without
private boolean lastStartElementWasP = false;
//have we signaled the start of a p?
//pPr can happen multiple times within a p
//<p><pPr/><r><t>text</t></r><pPr></p>
private boolean pStarted = false;
//alternate content can be embedded in itself.
//need to track depth.
//if in alternate, choose fallback, maybe make this configurable?
private int inACChoiceDepth = 0;
private int inACFallbackDepth = 0;
private boolean inDelText = false;
//buffers rt in ruby sections (see 17.3.3.25)
private boolean inHlinkClick = false;
private boolean inTextBox = false;
private boolean inV = false; //in c:v in chart file
// Field code tracking for instrText-based hyperlinks
private boolean inField = false;
private boolean inInstrText = false;
private boolean inFieldHyperlink = false;
private final StringBuilder instrTextBuffer = new StringBuilder();
private OOXMLWordAndPowerPointTextHandler.EditType editType =
OOXMLWordAndPowerPointTextHandler.EditType.NONE;
private DateUtils dateUtils = new DateUtils();
private boolean hiddenSlide = false;
public OOXMLWordAndPowerPointTextHandler(XWPFBodyContentsHandler bodyContentsHandler,
Map<String, String> hyperlinks) {
this(bodyContentsHandler, hyperlinks, true, true, null);
}
public OOXMLWordAndPowerPointTextHandler(XWPFBodyContentsHandler bodyContentsHandler,
Map<String, String> hyperlinks, boolean includeTextBox,
boolean concatenatePhoneticRuns) {
this(bodyContentsHandler, hyperlinks, includeTextBox, concatenatePhoneticRuns, null);
}
public OOXMLWordAndPowerPointTextHandler(XWPFBodyContentsHandler bodyContentsHandler,
Map<String, String> hyperlinks, boolean includeTextBox,
boolean concatenatePhoneticRuns, Metadata metadata) {
this.bodyContentsHandler = bodyContentsHandler;
this.linkedRelationships = hyperlinks;
this.includeTextBox = includeTextBox;
this.concatenatePhoneticRuns = concatenatePhoneticRuns;
this.metadata = metadata;
}
@Override
public void startDocument() throws SAXException {
}
@Override
public void endDocument() throws SAXException {
}
@Override
public void startPrefixMapping(String prefix, String uri) throws SAXException {
}
@Override
public void endPrefixMapping(String prefix) throws SAXException {
}
@Override
public void startElement(String uri, String localName, String qName, Attributes atts)
throws SAXException {
//TODO: checkBox, textBox, sym, headerReference, footerReference, commentRangeEnd
if (lastStartElementWasP && !PPR.equals(localName)) {
bodyContentsHandler.startParagraph(currPProperties);
}
lastStartElementWasP = false;
if (uri != null && uri.equals(MC_NS)) {
if (CHOICE.equals(localName)) {
inACChoiceDepth++;
} else if (FALLBACK.equals(localName)) {
inACFallbackDepth++;
}
}
if (inACChoiceDepth > 0) {
return;
}
if (!includeTextBox && localName.equals(TEXTBOX)) {
inTextBox = true;
return;
}
//these are sorted descending by frequency within docx files
//in our regression corpus.
//yes, I know, likely premature optimization...
if (RPR.equals(localName)) {
inRPr = true;
} else if (R.equals(localName)) {
inR = true;
} else if (T.equals(localName)) {
inT = true;
} else if (TAB.equals(localName)) {
runBuffer.append(TAB_CHAR);
} else if (P.equals(localName)) {
lastStartElementWasP = true;
} else if (B.equals(localName)) { //TODO: add bCs
if (inR && inRPr) {
currRunProperties.setBold(true);
}
} else if (TC.equals(localName)) {
bodyContentsHandler.startTableCell();
} else if (P_STYLE.equals(localName)) {
String styleId = atts.getValue(W_NS, "val");
currPProperties.setStyleID(styleId);
} else if (I.equals(localName)) { //TODO: add iCs
//rprs don't have to be inR; ignore those that aren't
if (inR && inRPr) {
currRunProperties.setItalics(true);
}
} else if (STRIKE.equals(localName)) {
if (inR && inRPr) {
currRunProperties.setStrike(true);
}
} else if (U.equals(localName)) {
if (inR && inRPr) {
currRunProperties.setUnderline(getStringVal(atts));
}
} else if (TR.equals(localName)) {
bodyContentsHandler.startTableRow();
} else if (NUM_PR.equals(localName)) {
inNumPr = true;
} else if (ILVL.equals(localName)) {
if (inNumPr) {
currPProperties.setIlvl(getIntVal(atts));
}
} else if (NUM_ID.equals(localName)) {
if (inNumPr) {
currPProperties.setNumId(getIntVal(atts));
}
} else if (BR.equals(localName)) {
runBuffer.append(NEWLINE);
} else if (BOOKMARK_START.equals(localName)) {
String name = atts.getValue(W_NS, "name");
String id = atts.getValue(W_NS, "id");
bodyContentsHandler.startBookmark(id, name);
} else if (BOOKMARK_END.equals(localName)) {
String id = atts.getValue(W_NS, "id");
bodyContentsHandler.endBookmark(id);
} else if (HYPERLINK.equals(localName)) { //docx hyperlink
String hyperlinkId = atts.getValue(OFFICE_DOC_RELATIONSHIP_NS, "id");
String hyperlink = null;
if (hyperlinkId != null) {
hyperlink = linkedRelationships.get(hyperlinkId);
bodyContentsHandler.hyperlinkStart(hyperlink);
} else {
String anchor = atts.getValue(W_NS, "anchor");
if (anchor != null) {
anchor = "#" + anchor;
}
bodyContentsHandler.hyperlinkStart(anchor);
}
} else if (HLINK_CLICK.equals(localName)) { //pptx hyperlink
String hyperlinkId = atts.getValue(OFFICE_DOC_RELATIONSHIP_NS, "id");
String hyperlink = null;
if (hyperlinkId != null) {
hyperlink = linkedRelationships.get(hyperlinkId);
bodyContentsHandler.hyperlinkStart(hyperlink);
inHlinkClick = true;
}
} else if (TBL.equals(localName)) {
bodyContentsHandler.startTable();
} else if (BLIP.equals(localName)) { //check for DRAWING_NS
picRId = atts.getValue(OFFICE_DOC_RELATIONSHIP_NS, "embed");
} else if ("cNvPr".equals(localName)) { //check for PIC_NS?
picDescription = atts.getValue("", "descr");
} else if (PIC.equals(localName)) {
inPic = true; //check for PIC_NS?
} //TODO: add sdt, sdtPr, sdtContent goes here statistically
else if (FOOTNOTE_REFERENCE.equals(localName)) {
String id = atts.getValue(W_NS, "id");
bodyContentsHandler.footnoteReference(id);
} else if (IMAGEDATA.equals(localName)) {
picRId = atts.getValue(OFFICE_DOC_RELATIONSHIP_NS, "id");
picDescription = atts.getValue(O_NS, "title");
} else if (INS.equals(localName)) {
startEditedSection(editType.INSERT, atts);
} else if (DEL_TEXT.equals(localName)) {
inDelText = true;
} else if (DEL.equals(localName)) {
startEditedSection(editType.DELETE, atts);
} else if (MOVE_TO.equals(localName)) {
startEditedSection(EditType.MOVE_TO, atts);
} else if (MOVE_FROM.equals(localName)) {
startEditedSection(editType.MOVE_FROM, atts);
} else if (OLE_OBJECT.equals(localName)) { //check for O_NS?
String type = null;
String refId = null;
//TODO: clean this up and ...want to get ProgID?
for (int i = 0; i < atts.getLength(); i++) {
String attLocalName = atts.getLocalName(i);
String attValue = atts.getValue(i);
if (attLocalName.equals("Type")) {
type = attValue;
} else if (OFFICE_DOC_RELATIONSHIP_NS.equals(atts.getURI(i)) &&
attLocalName.equals("id")) {
refId = attValue;
}
}
if ("Embed".equals(type)) {
bodyContentsHandler.embeddedOLERef(refId);
} else if ("Link".equals(type)) {
// Linked OLE object - references external file
bodyContentsHandler.linkedOLERef(refId);
if (metadata != null) {
metadata.set(Office.HAS_LINKED_OLE_OBJECTS, true);
}
}
} else if (CR.equals(localName)) {
runBuffer.append(NEWLINE);
} else if (ENDNOTE_REFERENCE.equals(localName)) {
String id = atts.getValue(W_NS, "id");
bodyContentsHandler.endnoteReference(id);
} else if (V.equals(localName) && C_NS.equals(uri)) { // in value in a chart
inV = true;
} else if (RT.equals(localName)) {
inRt = true;
} else if (SLIDE.equals(localName)) {
String val = atts.getValue("show");
if ("0".equals(val) || "false".equals(val)) {
hiddenSlide = true;
}
} else if (FLD_CHAR.equals(localName)) {
String fldCharType = atts.getValue(W_NS, FLD_CHAR_TYPE);
if ("begin".equals(fldCharType)) {
inField = true;
instrTextBuffer.setLength(0);
} else if ("separate".equals(fldCharType)) {
// Parse instrText for HYPERLINK
String url = parseHyperlinkFromInstrText(instrTextBuffer.toString());
if (url != null) {
bodyContentsHandler.hyperlinkStart(url);
inFieldHyperlink = true;
if (metadata != null) {
metadata.set(Office.HAS_FIELD_HYPERLINKS, true);
}
} else {
// Check for external reference fields (INCLUDEPICTURE, INCLUDETEXT, etc.)
StringBuilder fieldType = new StringBuilder();
String extUrl = parseExternalRefFromInstrText(instrTextBuffer.toString(), fieldType);
if (extUrl != null) {
bodyContentsHandler.externalRef(fieldType.toString(), extUrl);
if (metadata != null) {
metadata.set(Office.HAS_FIELD_HYPERLINKS, true);
}
}
}
} else if ("end".equals(fldCharType)) {
if (inFieldHyperlink) {
bodyContentsHandler.hyperlinkEnd();
inFieldHyperlink = false;
}
inField = false;
instrTextBuffer.setLength(0);
}
} else if (INSTR_TEXT.equals(localName)) {
inInstrText = true;
} else if (HLINK_HOVER.equals(localName)) {
// DrawingML hover hyperlink on shapes/pictures
String hyperlinkId = atts.getValue(OFFICE_DOC_RELATIONSHIP_NS, "id");
if (hyperlinkId != null) {
String hyperlink = linkedRelationships.get(hyperlinkId);
if (hyperlink != null) {
bodyContentsHandler.externalRef("hlinkHover", hyperlink);
if (metadata != null) {
metadata.set(Office.HAS_HOVER_HYPERLINKS, true);
}
}
}
} else if (SHAPE.equals(localName) && V_NS.equals(uri)) {
// VML shape with href attribute
String href = atts.getValue(HREF);
if (href == null) {
href = atts.getValue(O_NS, HREF);
}
if (href != null && !href.isEmpty()) {
bodyContentsHandler.externalRef("vml-shape-href", href);
if (metadata != null) {
metadata.set(Office.HAS_VML_HYPERLINKS, true);
}
}
}
}
private void startEditedSection(EditType editType, Attributes atts) throws SAXException {
String editAuthor = atts.getValue(W_NS, "author");
String editDateString = atts.getValue(W_NS, "date");
Date editDate = null;
if (editDateString != null) {
editDate = dateUtils.tryToParse(editDateString);
}
bodyContentsHandler.startEditedSection(editAuthor, editDate, editType);
this.editType = editType;
}
private String getStringVal(Attributes atts) {
String valString = atts.getValue(W_NS, VAL);
if (valString != null) {
return valString;
}
return "";
}
private int getIntVal(Attributes atts) {
String valString = atts.getValue(W_NS, VAL);
if (valString != null) {
try {
return Integer.parseInt(valString);
} catch (NumberFormatException e) {
//swallow
}
}
return -1;
}
/**
* Parses a HYPERLINK URL from instrText field code content.
* Field codes like: HYPERLINK "https://example.com"
*
* @param instrText the accumulated instrText content
* @return the URL if found, or null
*/
private String parseHyperlinkFromInstrText(String instrText) {
if (instrText == null || instrText.isEmpty()) {
return null;
}
Matcher m = HYPERLINK_PATTERN.matcher(instrText.trim());
if (m.find()) {
return m.group(1);
}
return null;
}
/**
* Parses URLs from instrText field codes that reference external resources.
* This includes INCLUDEPICTURE, INCLUDETEXT, IMPORT, and LINK fields.
*
* @param instrText the accumulated instrText content
* @param fieldType output parameter - will contain the field type if found
* @return the URL if found, or null
*/
private String parseExternalRefFromInstrText(String instrText, StringBuilder fieldType) {
if (instrText == null || instrText.isEmpty()) {
return null;
}
String trimmed = instrText.trim();
Matcher m = INCLUDEPICTURE_PATTERN.matcher(trimmed);
if (m.find()) {
fieldType.append("INCLUDEPICTURE");
return m.group(1);
}
m = INCLUDETEXT_PATTERN.matcher(trimmed);
if (m.find()) {
fieldType.append("INCLUDETEXT");
return m.group(1);
}
m = IMPORT_PATTERN.matcher(trimmed);
if (m.find()) {
fieldType.append("IMPORT");
return m.group(1);
}
m = LINK_PATTERN.matcher(trimmed);
if (m.find()) {
fieldType.append("LINK");
return m.group(1);
}
return null;
}
@Override
public void endElement(String uri, String localName, String qName) throws SAXException {
if (CHOICE.equals(localName)) {
inACChoiceDepth--;
} else if (FALLBACK.equals(localName)) {
inACFallbackDepth--;
}
if (inACChoiceDepth > 0) {
return;
}
if (!includeTextBox && localName.equals(TEXTBOX)) {
inTextBox = false;
return;
}
if (PIC.equals(localName)) { //PIC_NS
handlePict();
inPic = false;
return;
} else if (RPR.equals(localName)) {
inRPr = false;
} else if (R.equals(localName)) {
handleEndOfRun();
} else if (T.equals(localName)) {
inT = false;
} else if (PPR.equals(localName)) {
if (!pStarted) {
bodyContentsHandler.startParagraph(currPProperties);
pStarted = true;
}
currPProperties.reset();
} else if (P.equals(localName)) {
if (runBuffer.length() > 0) {
//<p><tab></p>...this will treat that as if it were
//a run...TODO: should we swallow whitespace that doesn't occur in a run?
bodyContentsHandler.run(currRunProperties, runBuffer.toString());
runBuffer.setLength(0);
}
pStarted = false;
bodyContentsHandler.endParagraph();
} else if (TC.equals(localName)) {
bodyContentsHandler.endTableCell();
} else if (TR.equals(localName)) {
bodyContentsHandler.endTableRow();
} else if (TBL.equals(localName)) {
bodyContentsHandler.endTable();
} else if (FLD.equals(localName)) {
handleEndOfRun();
} else if (DEL_TEXT.equals(localName)) {
inDelText = false;
} else if (INS.equals(localName) || DEL.equals(localName) || MOVE_TO.equals(localName) ||
MOVE_FROM.equals(localName)) {
editType = EditType.NONE;
} else if (HYPERLINK.equals(localName)) {
bodyContentsHandler.hyperlinkEnd();
} else if (PICT.equals(localName)) {
handlePict();
} else if (V.equals(localName) && C_NS.equals(uri)) { // in value in a chart
inV = false;
handleEndOfRun();
} else if (RT.equals(localName)) {
inRt = false;
} else if (RUBY.equals(localName)) {
handleEndOfRuby();
} else if (INSTR_TEXT.equals(localName)) {
inInstrText = false;
}
}
private void handleEndOfRuby() throws SAXException {
if (rubyBuffer.length() > 0) {
if (concatenatePhoneticRuns) {
bodyContentsHandler.run(currRunProperties, " (" + rubyBuffer.toString() + ")");
}
rubyBuffer.setLength(0);
}
}
private void handleEndOfRun() throws SAXException {
bodyContentsHandler.run(currRunProperties, runBuffer.toString());
if (inHlinkClick) {
bodyContentsHandler.hyperlinkEnd();
inHlinkClick = false;
}
inR = false;
runBuffer.setLength(0);
currRunProperties.setBold(false);
currRunProperties.setItalics(false);
currRunProperties.setStrike(false);
currRunProperties.setUnderline(UnderlinePatterns.NONE.name());
}
private void handlePict() throws SAXException {
String picFileName = null;
if (picRId != null) {
picFileName = linkedRelationships.get(picRId);
}
bodyContentsHandler.embeddedPicRef(picFileName, picDescription);
picDescription = null;
picRId = null;
inPic = false;
}
@Override
public void characters(char[] ch, int start, int length) throws SAXException {
if (inACChoiceDepth > 0) {
return;
} else if (!includeTextBox && inTextBox) {
return;
}
if (editType.equals(EditType.MOVE_FROM) && inT) {
if (bodyContentsHandler.isIncludeMoveFromText()) {
appendToBuffer(ch, start, length);
}
} else if (inT) {
appendToBuffer(ch, start, length);
} else if (bodyContentsHandler.isIncludeDeletedText() && editType.equals(EditType.DELETE)) {
appendToBuffer(ch, start, length);
} else if (inV) {
appendToBuffer(ch, start, length);
appendToBuffer(TAB_CHAR, 0, 1);
} else if (inInstrText && inField) {
// Accumulate instrText content for field code parsing (e.g., HYPERLINK)
instrTextBuffer.append(ch, start, length);
}
}
@Override
public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
if (inACChoiceDepth > 0) {
return;
} else if (!includeTextBox && inTextBox) {
return;
}
if (inT) {
appendToBuffer(ch, start, length);
} else if (bodyContentsHandler.isIncludeDeletedText() && inDelText) {
appendToBuffer(ch, start, length);
}
}
private void appendToBuffer(char[] ch, int start, int length) throws SAXException {
if (inRt) {
rubyBuffer.append(ch, start, length);
} else {
runBuffer.append(ch, start, length);
}
}
public enum EditType {
NONE, INSERT, DELETE, MOVE_TO, MOVE_FROM
}
public interface XWPFBodyContentsHandler {
void run(RunProperties runProperties, String contents) throws SAXException;
/**
* @param link the link; can be null
*/
void hyperlinkStart(String link) throws SAXException;
void hyperlinkEnd() throws SAXException;
void startParagraph(ParagraphProperties paragraphProperties) throws SAXException;
void endParagraph() throws SAXException;
void startTable() throws SAXException;
void endTable() throws SAXException;
void startTableRow() throws SAXException;
void endTableRow() throws SAXException;
void startTableCell() throws SAXException;
void endTableCell() throws SAXException;
void startSDT() throws SAXException;
void endSDT() throws SAXException;
void startEditedSection(String editor, Date date, EditType editType) throws SAXException;
void endEditedSection() throws SAXException;
boolean isIncludeDeletedText() throws SAXException;
void footnoteReference(String id) throws SAXException;
void endnoteReference(String id) throws SAXException;
boolean isIncludeMoveFromText() throws SAXException;
void embeddedOLERef(String refId) throws SAXException;
/**
* Called when a linked (vs embedded) OLE object is found.
* These reference external files and are a security concern.
*/
void linkedOLERef(String refId) throws SAXException;
void embeddedPicRef(String picFileName, String picDescription) throws SAXException;
void startBookmark(String id, String name) throws SAXException;
void endBookmark(String id) throws SAXException;
/**
* Called when an external reference URL is found in a field code.
* This includes INCLUDEPICTURE, INCLUDETEXT, IMPORT, LINK fields,
* and DrawingML/VML hyperlinks on shapes.
*
* @param fieldType the type of field (e.g., "INCLUDEPICTURE", "hlinkHover", "vml-href")
* @param url the external URL
*/
default void externalRef(String fieldType, String url) throws SAXException {
// Default no-op implementation for backward compatibility
}
}
public boolean isHiddenSlide() {
return hiddenSlide;
}
}