XWPFNumberingShim.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.parser.microsoft.ooxml.xwpf;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Map;
import org.apache.poi.openxml4j.opc.PackagePart;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
import org.apache.tika.exception.TikaException;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.microsoft.AbstractListManager.LevelTuple;
import org.apache.tika.parser.microsoft.ooxml.OOXMLWordAndPowerPointTextHandler;
import org.apache.tika.utils.XMLReaderUtils;
/**
* SAX-based parser for numbering.xml that replaces the XMLBeans-dependent
* POI XWPFNumbering. This eliminates the need for ooxml-lite schema classes
* in the SAX parsing chain.
* <p>
* Follows the same pattern as {@link XWPFStylesShim}.
*/
public class XWPFNumberingShim {
public static final XWPFNumberingShim EMPTY = new EmptyNumberingShim();
// abstractNumId -> list of LevelTuples (indexed by ilvl)
private final Map<Integer, LevelTuple[]> abstractNumLevels = new HashMap<>();
// numId -> abstractNumId
private final Map<Integer, Integer> numToAbstractNum = new HashMap<>();
// numId -> override LevelTuples (indexed by ilvl), null entries for non-overridden levels
private final Map<Integer, Map<Integer, LevelTuple>> overrideLevels = new HashMap<>();
private XWPFNumberingShim() {
}
public XWPFNumberingShim(PackagePart part, ParseContext parseContext)
throws IOException, TikaException, SAXException {
try (InputStream is = part.getInputStream()) {
XMLReaderUtils.parseSAX(is, new NumberingHandler(), parseContext);
}
}
/**
* @return the abstractNumId for the given numId, or -1 if not found
*/
public int getAbstractNumId(int numId) {
Integer id = numToAbstractNum.get(numId);
return id != null ? id : -1;
}
/**
* @return the level tuples for the given abstractNumId, or null if not found
*/
public LevelTuple[] getAbstractNumLevels(int abstractNumId) {
return abstractNumLevels.get(abstractNumId);
}
/**
* Build override level tuples array for a given numId with the specified length.
* Returns null if there are no overrides for this numId.
*/
public LevelTuple[] getOverrideLevels(int numId, int length) {
Map<Integer, LevelTuple> overrides = overrideLevels.get(numId);
if (overrides == null || overrides.isEmpty()) {
return null;
}
LevelTuple[] result = new LevelTuple[length];
for (int i = 0; i < length; i++) {
LevelTuple override = overrides.get(i);
if (override != null) {
result[i] = override;
} else {
result[i] = new LevelTuple("%" + i + ".");
}
}
return result;
}
private static class EmptyNumberingShim extends XWPFNumberingShim {
@Override
public int getAbstractNumId(int numId) {
return -1;
}
@Override
public LevelTuple[] getAbstractNumLevels(int abstractNumId) {
return null;
}
@Override
public LevelTuple[] getOverrideLevels(int numId, int length) {
return null;
}
}
private class NumberingHandler extends DefaultHandler {
private static final String W_NS = OOXMLWordAndPowerPointTextHandler.W_NS;
// Current context
private boolean inAbstractNum = false;
private int currentAbstractNumId = -1;
private boolean inNum = false;
private int currentNumId = -1;
private boolean inLvl = false;
private boolean inLvlOverride = false;
private int currentIlvl = -1;
// Level accumulators (reset for each lvl element)
private int lvlStart = -1;
private int lvlRestart = -1;
private String lvlText = null;
private String lvlNumFmt = null;
private boolean lvlIsLegal = false;
// Collecting levels for current abstractNum
private final Map<Integer, LevelTuple> currentAbstractLevels = new HashMap<>();
// Collecting overrides for current num
private final Map<Integer, LevelTuple> currentOverrides = new HashMap<>();
@Override
public void startElement(String uri, String localName, String qName, Attributes atts)
throws SAXException {
if (!W_NS.equals(uri)) {
return;
}
switch (localName) {
case "abstractNum":
inAbstractNum = true;
currentAbstractNumId = getIntAttr(atts, W_NS, "abstractNumId", -1);
currentAbstractLevels.clear();
break;
case "num":
inNum = true;
currentNumId = getIntAttr(atts, W_NS, "numId", -1);
currentOverrides.clear();
break;
case "lvlOverride":
if (inNum) {
inLvlOverride = true;
currentIlvl = getIntAttr(atts, W_NS, "ilvl", -1);
}
break;
case "lvl":
inLvl = true;
currentIlvl = getIntAttr(atts, W_NS, "ilvl", -1);
// Reset accumulators
lvlStart = -1;
lvlRestart = -1;
lvlText = null;
lvlNumFmt = null;
lvlIsLegal = false;
break;
case "start":
if (inLvl) {
lvlStart = getIntAttr(atts, W_NS, "val", -1);
}
break;
case "numFmt":
if (inLvl) {
lvlNumFmt = atts.getValue(W_NS, "val");
}
break;
case "lvlText":
if (inLvl) {
lvlText = atts.getValue(W_NS, "val");
}
break;
case "lvlRestart":
if (inLvl) {
lvlRestart = getIntAttr(atts, W_NS, "val", -1);
}
break;
case "isLgl":
if (inLvl) {
lvlIsLegal = true;
}
break;
case "abstractNumId":
if (inNum && !inLvl) {
int absId = getIntAttr(atts, W_NS, "val", -1);
if (currentNumId >= 0 && absId >= 0) {
numToAbstractNum.put(currentNumId, absId);
}
}
break;
default:
break;
}
}
@Override
public void endElement(String uri, String localName, String qName) throws SAXException {
if (!W_NS.equals(uri)) {
return;
}
switch (localName) {
case "lvl":
if (inLvl && currentIlvl >= 0) {
LevelTuple tuple = buildLevelTuple(currentIlvl);
if (inLvlOverride && inNum) {
currentOverrides.put(currentIlvl, tuple);
} else if (inAbstractNum) {
currentAbstractLevels.put(currentIlvl, tuple);
}
}
inLvl = false;
break;
case "lvlOverride":
inLvlOverride = false;
break;
case "abstractNum":
if (inAbstractNum && currentAbstractNumId >= 0 &&
!currentAbstractLevels.isEmpty()) {
int maxLevel = currentAbstractLevels.keySet().stream()
.mapToInt(Integer::intValue).max().orElse(-1);
LevelTuple[] levels = new LevelTuple[maxLevel + 1];
for (int i = 0; i <= maxLevel; i++) {
LevelTuple t = currentAbstractLevels.get(i);
levels[i] = t != null ? t : new LevelTuple("%" + i + ".");
}
abstractNumLevels.put(currentAbstractNumId, levels);
}
inAbstractNum = false;
currentAbstractNumId = -1;
break;
case "num":
if (inNum && currentNumId >= 0 && !currentOverrides.isEmpty()) {
overrideLevels.put(currentNumId, new HashMap<>(currentOverrides));
}
inNum = false;
currentNumId = -1;
break;
default:
break;
}
}
private LevelTuple buildLevelTuple(int level) {
int start = lvlStart;
int restart = lvlRestart;
String text = lvlText != null ? lvlText : "%" + level + ".";
String numFmt = lvlNumFmt != null ? lvlNumFmt : "decimal";
if (start < 0) {
// Same hack as XWPFListManager.buildTuple
if ("decimal".equals(numFmt) || "ordinal".equals(numFmt) ||
"decimalZero".equals(numFmt)) {
start = 0;
} else {
start = 1;
}
}
return new LevelTuple(start, restart, text, numFmt, lvlIsLegal);
}
private int getIntAttr(Attributes atts, String ns, String localName, int defaultVal) {
String val = atts.getValue(ns, localName);
if (val == null) {
return defaultVal;
}
try {
return Integer.parseInt(val);
} catch (NumberFormatException e) {
return defaultVal;
}
}
}
}