PdfCheckersUtil.java

/*
    This file is part of the iText (R) project.
    Copyright (c) 1998-2025 Apryse Group NV
    Authors: Apryse Software.

    This program is offered under a commercial and under the AGPL license.
    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.

    AGPL licensing:
    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Affero General Public License for more details.

    You should have received a copy of the GNU Affero General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */
package com.itextpdf.kernel.utils.checkers;

import com.itextpdf.commons.utils.MessageFormatUtil;
import com.itextpdf.kernel.exceptions.KernelExceptionMessageConstant;
import com.itextpdf.kernel.exceptions.PdfException;
import com.itextpdf.kernel.pdf.PdfArray;
import com.itextpdf.kernel.pdf.PdfConformance;
import com.itextpdf.kernel.pdf.PdfDictionary;
import com.itextpdf.kernel.pdf.PdfName;
import com.itextpdf.kernel.pdf.PdfObject;
import com.itextpdf.kernel.pdf.PdfStream;
import com.itextpdf.kernel.xmp.XMPConst;
import com.itextpdf.kernel.xmp.XMPException;
import com.itextpdf.kernel.xmp.XMPMeta;
import com.itextpdf.kernel.xmp.XMPMetaFactory;
import com.itextpdf.kernel.xmp.properties.XMPProperty;

import java.io.ByteArrayInputStream;
import java.util.function.Function;

/**
 * Utility class that contains common checks used in both PDF/A and PDF/UA modules.
 */
public final class PdfCheckersUtil {

    private PdfCheckersUtil() {
        // Private constructor will prevent the instantiation of this class directly.
    }

    /**
     * Checks that natural language is declared using the methods described in ISO 32000-2:2020, 14.9.2 or
     * ISO 32000-1:2008, 14.9.2 (same requirements).
     *
     * @param catalogDict {@link PdfDictionary} document catalog dictionary containing {@code Lang} entry to check
     * @param exceptionSupplier {@code Function<String, PdfException>} in order to provide correct exception
     */
    public static void validateLang(PdfDictionary catalogDict, Function<String, PdfException> exceptionSupplier) {
        if (!BCP47Validator.validate(catalogDict.get(PdfName.Lang).toString())) {
            throw exceptionSupplier.apply(KernelExceptionMessageConstant.DOCUMENT_SHALL_CONTAIN_VALID_LANG_ENTRY);
        }
    }

    /**
     * Checks that the {@code Catalog} dictionary of a conforming file contains the {@code Metadata} key whose value is
     * a metadata stream as defined in ISO 32000-2:2020. Also checks that the value of either {@code pdfuaid:part} or
     * {@code pdfaid:part} is the provided one for conforming PDF files and validates required {@code pdfuaid:rev} or
     * {@code pdfaid:rev} value.
     *
     * <p>
     * For PDF/UA, checks that the {@code Metadata} stream as specified in ISO 32000-2:2020, 14.3 in the document
     * catalog dictionary includes a {@code dc:title} entry reflecting the title of the document.
     *
     * <p>
     * For PDF/A, checks that {@code pdfa:conformance} value is correct if specified.
     *
     * @param catalog {@link PdfDictionary} document catalog dictionary
     * @param conformance either PDF/A or PDF/UA conformance to check
     * @param exceptionSupplier {@code Function<String, PdfException>} in order to provide correct exception
     */
    public static void checkMetadata(PdfDictionary catalog, PdfConformance conformance,
                                     Function<String, PdfException> exceptionSupplier) {
        if (!catalog.containsKey(PdfName.Metadata)) {
            throw exceptionSupplier.apply(
                    KernelExceptionMessageConstant.METADATA_SHALL_BE_PRESENT_IN_THE_CATALOG_DICTIONARY);
        }
        try {
            final PdfStream xmpMetadata = catalog.getAsStream(PdfName.Metadata);
            if (xmpMetadata == null) {
                throw exceptionSupplier.apply(KernelExceptionMessageConstant.INVALID_METADATA_VALUE);
            }
            final XMPMeta metadata = XMPMetaFactory.parse(new ByteArrayInputStream(xmpMetadata.getBytes()));

            final String NS_ID = conformance.isPdfA() ? XMPConst.NS_PDFA_ID : XMPConst.NS_PDFUA_ID;

            XMPProperty actualPart = metadata.getProperty(NS_ID, XMPConst.PART);
            String expectedPart = conformance.isPdfA() ? conformance.getAConformance().getPart() :
                    conformance.getUAConformance().getPart();
            if (actualPart == null || !expectedPart.equals(actualPart.getValue())) {
                throw exceptionSupplier.apply(MessageFormatUtil.format(KernelExceptionMessageConstant
                        .XMP_METADATA_HEADER_SHALL_CONTAIN_VERSION_IDENTIFIER_PART, expectedPart,
                        (actualPart != null && actualPart.getValue().isEmpty()) ? null : actualPart));
            }

            XMPProperty rev = metadata.getProperty(NS_ID, XMPConst.REV);
            if (rev == null || !isValidXmpRevision(rev.getValue())) {
                throw exceptionSupplier.apply(KernelExceptionMessageConstant
                        .XMP_METADATA_HEADER_SHALL_CONTAIN_VERSION_IDENTIFIER_REV);
            }
        } catch (XMPException e) {
            throw exceptionSupplier.apply(KernelExceptionMessageConstant.INVALID_METADATA_VALUE);
        }
    }

    /**
     * Gets all the descending kids including widgets for a given {@link PdfArray} representing array of form fields.
     *
     * @param array the {@link PdfArray} of form fields {@link PdfDictionary} objects
     *
     * @return the {@link PdfArray} of all form fields
     */
    public static PdfArray getFormFields(PdfArray array) {
        PdfArray fields = new PdfArray();
        for (PdfObject field : array) {
            PdfArray kids = ((PdfDictionary) field).getAsArray(PdfName.Kids);
            fields.add(field);
            if (kids != null) {
                fields.addAll(getFormFields(kids));
            }
        }
        return fields;
    }

    /**
     * Validates {@code pdfuaid:rev} value which is four-digit year of the date of publication or revision.
     *
     * @param value {@code pdfuaid:rev} value to check
     *
     * @return {@code true} if {@code pdfuaid:rev} value is valid, {@code false} otherwise
     */
    private static boolean isValidXmpRevision(String value) {
        if (value == null || value.length() != 4) {
            return false;
        }
        for (final char c : value.toCharArray()) {
            if (!Character.isDigit(c)) {
                return false;
            }
        }
        return true;
    }

    /**
     * Checks if the specified flag is set.
     *
     * @param flags a set of flags specifying various characteristics of the PDF object
     * @param flag  to be checked
     *
     * @return {@code true} if the specified flag is set, {@code false} otherwise
     */
    public static boolean checkFlag(int flags, int flag) {
        return (flags & flag) != 0;
    }
}