PdfConformanceXmpMetaDataUtil.java
/*
This file is part of the iText (R) project.
Copyright (c) 1998-2026 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
AGPL licensing:
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package com.itextpdf.kernel.pdf;
import com.itextpdf.commons.utils.StringNormalizer;
import com.itextpdf.kernel.xmp.XMPConst;
import com.itextpdf.kernel.xmp.XMPException;
import com.itextpdf.kernel.xmp.XMPMeta;
import com.itextpdf.kernel.xmp.XMPMetaFactory;
import com.itextpdf.kernel.xmp.XMPUtils;
import com.itextpdf.kernel.xmp.options.PropertyOptions;
import com.itextpdf.kernel.xmp.properties.XMPProperty;
import java.util.ArrayList;
import java.util.List;
/**
* Utility class for extracting and setting PDF conformance information in XMP metadata.
*
* <p>This class handles the mapping between {@link PdfConformance} instances and
* their XMP metadata representations for PDF/A, PDF/UA, and Well Tagged PDF (WTPDF)
* conformance levels.
*/
final class PdfConformanceXmpMetaDataUtil {
private PdfConformanceXmpMetaDataUtil() {
// Utility class, no need to create an instance.
}
/**
* XMP property path for the first conformsTo declaration inside the declarations bag.
*/
private static final String FIRST_CONFORMS_TO_PATH =
XMPConst.DECLARATIONS + "/[1]/" + XMPConst.CONFORMS_TO;
private static final String WELL_TAGGED_FOR_ACCESSIBILITY_SCHEMA =
" <x:xmpmeta xmlns:x=\"adobe:ns:meta/\">\n" +
" <rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n" +
" <rdf:Description rdf:about=\"\" xmlns:pdfd=\"http://pdfa.org/declarations/\">\n" +
" <pdfd:declarations>\n" +
" <rdf:Bag>\n" +
" <rdf:li rdf:parseType=\"Resource\">\n" +
" <pdfd:conformsTo>http://pdfa.org/declarations/wtpdf#accessibility1.0</pdfd:conformsTo>\n" +
" </rdf:li>\n" +
" </rdf:Bag>\n" +
" </pdfd:declarations>\n" +
" </rdf:Description>\n" +
" </rdf:RDF>\n" +
" </x:xmpmeta>";
private static final String WELL_TAGGED_FOR_REUSE_SCHEMA =
" <x:xmpmeta xmlns:x=\"adobe:ns:meta/\">\n" +
" <rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n" +
" <rdf:Description rdf:about=\"\" xmlns:pdfd=\"http://pdfa.org/declarations/\">\n" +
" <pdfd:declarations>\n" +
" <rdf:Bag>\n" +
" <rdf:li rdf:parseType=\"Resource\">\n" +
" <pdfd:conformsTo>http://pdfa.org/declarations/wtpdf#reuse1.0</pdfd:conformsTo>\n" +
" </rdf:li>\n" +
" </rdf:Bag>\n" +
" </pdfd:declarations>\n" +
" </rdf:Description>\n" +
" </rdf:RDF>\n" +
" </x:xmpmeta>";
private static final String PDF_UA_EXTENSION =
" <x:xmpmeta xmlns:x=\"adobe:ns:meta/\">\n" +
" <rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n" +
" <rdf:Description rdf:about=\"\" xmlns:pdfaExtension=\"http://www.aiim"
+ ".org/pdfa/ns/extension/\" xmlns:pdfaSchema=\"http://www.aiim.org/pdfa/ns/schema#\" "
+ "xmlns:pdfaProperty=\"http://www.aiim.org/pdfa/ns/property#\">\n"
+
" <pdfaExtension:schemas>\n" +
" <rdf:Bag>\n" +
" <rdf:li rdf:parseType=\"Resource\">\n" +
" <pdfaSchema:namespaceURI rdf:resource=\"http://www.aiim.org/pdfua/ns/id/\"/>\n" +
" <pdfaSchema:prefix>pdfuaid</pdfaSchema:prefix>\n" +
" <pdfaSchema:schema>PDF/UA identification schema</pdfaSchema:schema>\n" +
" <pdfaSchema:property>\n" +
" <rdf:Seq>\n" +
" <rdf:li rdf:parseType=\"Resource\">\n" +
" <pdfaProperty:category>internal</pdfaProperty:category>\n" +
" <pdfaProperty:description>PDF/UA version "
+ "identifier</pdfaProperty:description>\n"
+
" <pdfaProperty:name>part</pdfaProperty:name>\n" +
" <pdfaProperty:valueType>Integer</pdfaProperty:valueType>\n" +
" </rdf:li>\n" +
" <rdf:li rdf:parseType=\"Resource\">\n" +
" <pdfaProperty:category>internal</pdfaProperty:category>\n" +
" <pdfaProperty:description>PDF/UA amendment "
+ "identifier</pdfaProperty:description>\n"
+
" <pdfaProperty:name>amd</pdfaProperty:name>\n" +
" <pdfaProperty:valueType>Text</pdfaProperty:valueType>\n" +
" </rdf:li>\n" +
" <rdf:li rdf:parseType=\"Resource\">\n" +
" <pdfaProperty:category>internal</pdfaProperty:category>\n" +
" <pdfaProperty:description>PDF/UA corrigenda "
+ "identifier</pdfaProperty:description>\n"
+
" <pdfaProperty:name>corr</pdfaProperty:name>\n" +
" <pdfaProperty:valueType>Text</pdfaProperty:valueType>\n" +
" </rdf:li>\n" +
" </rdf:Seq>\n" +
" </pdfaSchema:property>\n" +
" </rdf:li>\n" +
" </rdf:Bag>\n" +
" </pdfaExtension:schemas>\n" +
" </rdf:Description>\n" +
" </rdf:RDF>\n" +
" </x:xmpmeta>";
/**
* Sets the required XMP metadata properties for the given PDF conformance.
*
* <p>Existing property values are preserved; only missing properties are populated.
* This ensures that if something was invalid in the source document, it is left as-is.
* However, if a required property is absent (e.g. revision for PDF/A-4), it will be added.
*
* @param conformance the conformance whose properties should be written
* @param xmpMeta the XMP metadata instance to update
*
* @throws XMPException if the XMP metadata cannot be parsed or modified
*/
static void setConformanceToXmp(PdfConformance conformance, XMPMeta xmpMeta) throws XMPException {
if (conformance.isPdfUA()) {
PdfUAConformance uaConformance = conformance.getUAConformance();
if (xmpMeta.getProperty(XMPConst.NS_PDFUA_ID, XMPConst.PART) == null) {
xmpMeta.setPropertyInteger(XMPConst.NS_PDFUA_ID, XMPConst.PART,
Integer.parseInt(uaConformance.getPart()),
new PropertyOptions(PropertyOptions.SEPARATE_NODE));
}
if (conformance.conformsTo(PdfUAConformance.PDF_UA_2)
&& xmpMeta.getProperty(XMPConst.NS_PDFUA_ID, XMPConst.REV) == null) {
xmpMeta.setPropertyInteger(XMPConst.NS_PDFUA_ID, XMPConst.REV, 2024);
}
}
boolean missingConformsTo =
xmpMeta.getProperty(XMPConst.NS_DECLARATIONS, FIRST_CONFORMS_TO_PATH) == null;
if (missingConformsTo) {
if (conformance.conformsTo(WellTaggedPdfConformance.FOR_ACCESSIBILITY) || conformance.conformsTo(
PdfUAConformance.PDF_UA_2)) {
XMPMeta wtpdfMeta = XMPMetaFactory.parseFromString(WELL_TAGGED_FOR_ACCESSIBILITY_SCHEMA);
XMPUtils.appendProperties(wtpdfMeta, xmpMeta, true, false, true);
}
if (conformance.conformsTo(WellTaggedPdfConformance.FOR_REUSE)) {
XMPMeta wtpdfMeta = XMPMetaFactory.parseFromString(WELL_TAGGED_FOR_REUSE_SCHEMA);
XMPUtils.appendProperties(wtpdfMeta, xmpMeta, true, false, true);
}
}
if (conformance.isPdfA()) {
PdfAConformance aConformance = conformance.getAConformance();
if (xmpMeta.getProperty(XMPConst.NS_PDFA_ID, XMPConst.PART) == null) {
xmpMeta.setProperty(XMPConst.NS_PDFA_ID, XMPConst.PART, aConformance.getPart());
}
if (aConformance.getLevel() != null
&& xmpMeta.getProperty(XMPConst.NS_PDFA_ID, XMPConst.CONFORMANCE) == null) {
xmpMeta.setProperty(XMPConst.NS_PDFA_ID, XMPConst.CONFORMANCE, aConformance.getLevel());
}
if ("4".equals(aConformance.getPart()) && xmpMeta.getProperty(XMPConst.NS_PDFA_ID, XMPConst.REV) == null) {
xmpMeta.setProperty(XMPConst.NS_PDFA_ID, XMPConst.REV, PdfConformance.PDF_A_4_REVISION);
}
if (xmpMeta.getPropertyInteger(XMPConst.NS_PDFUA_ID, XMPConst.PART) != null) {
XMPMeta taggedExtensionMeta = XMPMetaFactory.parseFromString(PDF_UA_EXTENSION);
XMPUtils.appendProperties(taggedExtensionMeta, xmpMeta, true, false);
}
}
}
/**
* Extracts all Well Tagged PDF conformance levels from the XMP metadata declarations bag.
*
* <p>The declarations bag may contain multiple entries (e.g. both accessibility and reuse),
* so this method iterates over all items in the bag.
*
* @param meta the XMP metadata to inspect
*
* @return a list of {@link WellTaggedPdfConformance} values found; never {@code null}, may be empty
*/
static List<WellTaggedPdfConformance> getWtpdfConformanceFromXmp(XMPMeta meta) {
final List<WellTaggedPdfConformance> wtpdfConformanceList = new ArrayList<>();
try {
int itemCount = meta.countArrayItems(XMPConst.NS_DECLARATIONS, XMPConst.DECLARATIONS);
for (int i = 1; i <= itemCount; i++) {
String path = XMPConst.DECLARATIONS + "/[" + i + "]/" + XMPConst.CONFORMS_TO;
XMPProperty wtpdfProperty = meta.getProperty(XMPConst.NS_DECLARATIONS, path);
if (wtpdfProperty == null) {
continue;
}
if (XMPConst.NS_WTPDF_ACCESSIBILITY_ID.equals(wtpdfProperty.getValue())) {
wtpdfConformanceList.add(WellTaggedPdfConformance.FOR_ACCESSIBILITY);
} else if (XMPConst.NS_WTPDF_REUSE_ID.equals(wtpdfProperty.getValue())) {
wtpdfConformanceList.add(WellTaggedPdfConformance.FOR_REUSE);
}
}
} catch (XMPException ignored) {
// If the declarations property is absent or malformed, return an empty list.
}
return wtpdfConformanceList;
}
/**
* Extracts the PDF/A conformance level from the XMP metadata.
*
* @param meta the XMP metadata to inspect
*
* @return the {@link PdfAConformance} found, or {@code null} if none is present
*/
static PdfAConformance getAConformance(XMPMeta meta) {
XMPProperty conformanceAXmpProperty = null;
XMPProperty partAXmpProperty = null;
PdfAConformance aLevel = null;
try {
conformanceAXmpProperty = meta.getProperty(XMPConst.NS_PDFA_ID, XMPConst.CONFORMANCE);
partAXmpProperty = meta.getProperty(XMPConst.NS_PDFA_ID, XMPConst.PART);
} catch (XMPException ignored) {
}
if (partAXmpProperty != null && (conformanceAXmpProperty != null || "4".equals(partAXmpProperty.getValue()))) {
aLevel = getAConformance(partAXmpProperty.getValue(),
conformanceAXmpProperty == null ? null : conformanceAXmpProperty.getValue());
}
return aLevel;
}
/**
* Extracts the PDF/UA conformance level from the XMP metadata.
*
* @param meta the XMP metadata to inspect
*
* @return the {@link PdfUAConformance} found, or {@code null} if none is present
*/
static PdfUAConformance getUAConformanceFromXmp(XMPMeta meta) {
XMPProperty partUAXmpProperty = null;
PdfUAConformance uaLevel = null;
try {
partUAXmpProperty = meta.getProperty(XMPConst.NS_PDFUA_ID, XMPConst.PART);
} catch (XMPException ignored) {
}
if (partUAXmpProperty != null) {
uaLevel = getUAConformance(partUAXmpProperty.getValue());
}
return uaLevel;
}
/**
* Maps a PDF/A part and level string to the corresponding {@link PdfAConformance} enum constant.
*
* @param part the PDF/A part (e.g. {@code "1"}, {@code "2"}, {@code "3"}, or {@code "4"})
* @param level the PDF/A conformance level (e.g. {@code "A"}, {@code "B"}, {@code "U"}, {@code "E"}, or
* {@code "F"}); may be {@code null} for part 4
*
* @return the matching {@link PdfAConformance}, or {@code null} if the combination is not recognised
*/
static PdfAConformance getAConformance(String part, String level) {
String upperLevel = StringNormalizer.toUpperCase(level);
boolean aLevel = "A".equals(upperLevel);
boolean bLevel = "B".equals(upperLevel);
boolean uLevel = "U".equals(upperLevel);
boolean eLevel = "E".equals(upperLevel);
boolean fLevel = "F".equals(upperLevel);
switch (part) {
case "1":
if (aLevel) {
return PdfAConformance.PDF_A_1A;
}
if (bLevel) {
return PdfAConformance.PDF_A_1B;
}
break;
case "2":
if (aLevel) {
return PdfAConformance.PDF_A_2A;
}
if (bLevel) {
return PdfAConformance.PDF_A_2B;
}
if (uLevel) {
return PdfAConformance.PDF_A_2U;
}
break;
case "3":
if (aLevel) {
return PdfAConformance.PDF_A_3A;
}
if (bLevel) {
return PdfAConformance.PDF_A_3B;
}
if (uLevel) {
return PdfAConformance.PDF_A_3U;
}
break;
case "4":
if (eLevel) {
return PdfAConformance.PDF_A_4E;
}
if (fLevel) {
return PdfAConformance.PDF_A_4F;
}
return PdfAConformance.PDF_A_4;
}
return null;
}
/**
* Maps a PDF/UA part string to the corresponding {@link PdfUAConformance} enum constant.
*
* @param part the PDF/UA part (e.g. {@code "1"} or {@code "2"})
*
* @return the matching {@link PdfUAConformance}, or {@code null} if the part is not recognised
*/
static PdfUAConformance getUAConformance(String part) {
if ("1".equals(part)) {
return PdfUAConformance.PDF_UA_1;
}
if ("2".equals(part)) {
return PdfUAConformance.PDF_UA_2;
}
return null;
}
}