PdfCheckerTest.java
/*
This file is part of the iText (R) project.
Copyright (c) 1998-2025 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
AGPL licensing:
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package com.itextpdf.kernel.validation;
import com.itextpdf.commons.utils.MessageFormatUtil;
import com.itextpdf.io.logs.IoLogMessageConstant;
import com.itextpdf.kernel.exceptions.KernelExceptionMessageConstant;
import com.itextpdf.kernel.exceptions.Pdf20ConformanceException;
import com.itextpdf.kernel.exceptions.PdfException;
import com.itextpdf.kernel.pdf.PdfCatalog;
import com.itextpdf.kernel.pdf.PdfConformance;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfName;
import com.itextpdf.kernel.pdf.PdfPage;
import com.itextpdf.kernel.pdf.PdfStream;
import com.itextpdf.kernel.pdf.PdfString;
import com.itextpdf.kernel.pdf.PdfVersion;
import com.itextpdf.kernel.pdf.PdfWriter;
import com.itextpdf.kernel.pdf.WriterProperties;
import com.itextpdf.kernel.pdf.tagging.PdfNamespace;
import com.itextpdf.kernel.pdf.tagging.PdfStructElem;
import com.itextpdf.kernel.pdf.tagging.StandardNamespaces;
import com.itextpdf.kernel.utils.checkers.PdfCheckersUtil;
import com.itextpdf.test.AssertUtil;
import com.itextpdf.test.ExtendedITextTest;
import com.itextpdf.test.annotations.LogMessage;
import com.itextpdf.test.annotations.LogMessages;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Tag;
import org.junit.jupiter.api.Test;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.function.Function;
@Tag("UnitTest")
public class PdfCheckerTest extends ExtendedITextTest {
public static final String SOURCE_FOLDER = "./src/test/resources/com/itextpdf/kernel/validation/PdfCheckerTest/";
private static final Function<String, PdfException> EXCEPTION_SUPPLIER = (msg) -> new PdfException(msg);
@Test
public void invalidTypeSubtypeMetadataUA2Test() throws IOException {
try (PdfDocument pdfDocument = new PdfDocument(new PdfWriter(new ByteArrayOutputStream(),
new WriterProperties().setPdfVersion(PdfVersion.PDF_2_0)))) {
pdfDocument.addNewPage();
PdfCatalog catalog = pdfDocument.getCatalog();
byte[] bytes = Files.readAllBytes(Paths.get(SOURCE_FOLDER + "metadata_ua2.xmp"));
PdfStream metadata = new PdfStream(bytes);
catalog.put(PdfName.Metadata, metadata);
Pdf20Checker checker = new Pdf20Checker(pdfDocument);
Exception e = Assertions.assertThrows(Pdf20ConformanceException.class,
() -> checker.checkMetadata(catalog));
Assertions.assertEquals(
KernelExceptionMessageConstant.METADATA_STREAM_REQUIRES_METADATA_TYPE_AND_XML_SUBTYPE,
e.getMessage());
metadata.put(PdfName.Type, PdfName.XML);
e = Assertions.assertThrows(Pdf20ConformanceException.class, () -> checker.checkMetadata(catalog));
Assertions.assertEquals(
KernelExceptionMessageConstant.METADATA_STREAM_REQUIRES_METADATA_TYPE_AND_XML_SUBTYPE,
e.getMessage());
metadata.put(PdfName.Type, PdfName.Metadata);
e = Assertions.assertThrows(Pdf20ConformanceException.class, () -> checker.checkMetadata(catalog));
Assertions.assertEquals(
KernelExceptionMessageConstant.METADATA_STREAM_REQUIRES_METADATA_TYPE_AND_XML_SUBTYPE,
e.getMessage());
metadata.put(PdfName.Type, PdfName.Metadata);
metadata.put(PdfName.Subtype, PdfName.Metadata);
e = Assertions.assertThrows(Pdf20ConformanceException.class, () -> checker.checkMetadata(catalog));
Assertions.assertEquals(
KernelExceptionMessageConstant.METADATA_STREAM_REQUIRES_METADATA_TYPE_AND_XML_SUBTYPE,
e.getMessage());
}
}
@Test
public void noMetadataUA2Test() {
try (PdfDocument pdfDocument = new PdfDocument(new PdfWriter(new ByteArrayOutputStream(),
new WriterProperties().setPdfVersion(PdfVersion.PDF_2_0)))) {
pdfDocument.addNewPage();
PdfCatalog catalog = pdfDocument.getCatalog();
Exception e = Assertions.assertThrows(PdfException.class, () ->
PdfCheckersUtil.checkMetadata(catalog.getPdfObject(), PdfConformance.PDF_UA_2, EXCEPTION_SUPPLIER));
Assertions.assertEquals(KernelExceptionMessageConstant.METADATA_SHALL_BE_PRESENT_IN_THE_CATALOG_DICTIONARY,
e.getMessage());
}
}
@Test
public void notStreamMetadataUA2Test() {
try (PdfDocument pdfDocument = new PdfDocument(new PdfWriter(new ByteArrayOutputStream(),
new WriterProperties().setPdfVersion(PdfVersion.PDF_2_0)))) {
pdfDocument.addNewPage();
PdfCatalog catalog = pdfDocument.getCatalog();
catalog.put(PdfName.Metadata, PdfName.Metadata);
Exception e = Assertions.assertThrows(PdfException.class, () ->
PdfCheckersUtil.checkMetadata(catalog.getPdfObject(), PdfConformance.PDF_UA_2, EXCEPTION_SUPPLIER));
Assertions.assertEquals(KernelExceptionMessageConstant.INVALID_METADATA_VALUE, e.getMessage());
}
}
@Test
@LogMessages(messages = {@LogMessage(messageTemplate = IoLogMessageConstant.EXCEPTION_WHILE_UPDATING_XMPMETADATA)})
public void brokenMetadataUA2Test() {
try (PdfDocument pdfDocument = new PdfDocument(new PdfWriter(new ByteArrayOutputStream(),
new WriterProperties().setPdfVersion(PdfVersion.PDF_2_0)))) {
pdfDocument.addNewPage();
PdfCatalog catalog = pdfDocument.getCatalog();
catalog.put(PdfName.Metadata, new PdfStream(new byte[]{1, 2, 3}));
Exception e = Assertions.assertThrows(PdfException.class, () ->
PdfCheckersUtil.checkMetadata(catalog.getPdfObject(), PdfConformance.PDF_UA_2, EXCEPTION_SUPPLIER));
Assertions.assertEquals(KernelExceptionMessageConstant.INVALID_METADATA_VALUE, e.getMessage());
}
}
@Test
public void noPartInMetadataUA2Test() throws IOException {
try (PdfDocument pdfDocument = new PdfDocument(new PdfWriter(new ByteArrayOutputStream(),
new WriterProperties().setPdfVersion(PdfVersion.PDF_2_0)))) {
pdfDocument.addNewPage();
PdfCatalog catalog = pdfDocument.getCatalog();
byte[] bytes = Files.readAllBytes(Paths.get(SOURCE_FOLDER + "no_version_metadata_ua2.xmp"));
PdfStream metadata = new PdfStream(bytes);
catalog.put(PdfName.Metadata, metadata);
catalog.put(PdfName.Type, PdfName.Metadata);
catalog.put(PdfName.Subtype, PdfName.XML);
Exception e = Assertions.assertThrows(PdfException.class, () ->
PdfCheckersUtil.checkMetadata(catalog.getPdfObject(), PdfConformance.PDF_UA_2, EXCEPTION_SUPPLIER));
Assertions.assertEquals(MessageFormatUtil.format(
KernelExceptionMessageConstant.XMP_METADATA_HEADER_SHALL_CONTAIN_VERSION_IDENTIFIER_PART, 2, null),
e.getMessage());
}
}
@Test
public void noRevInMetadataUA2Test() throws IOException {
try (PdfDocument pdfDocument = new PdfDocument(new PdfWriter(new ByteArrayOutputStream(),
new WriterProperties().setPdfVersion(PdfVersion.PDF_2_0)))) {
pdfDocument.addNewPage();
PdfCatalog catalog = pdfDocument.getCatalog();
byte[] bytes = Files.readAllBytes(Paths.get(SOURCE_FOLDER + "no_revision_metadata_ua2.xmp"));
PdfStream metadata = new PdfStream(bytes);
catalog.put(PdfName.Metadata, metadata);
catalog.put(PdfName.Type, PdfName.Metadata);
catalog.put(PdfName.Subtype, PdfName.XML);
Exception e = Assertions.assertThrows(PdfException.class, () ->
PdfCheckersUtil.checkMetadata(catalog.getPdfObject(), PdfConformance.PDF_UA_2, EXCEPTION_SUPPLIER));
Assertions.assertEquals(
KernelExceptionMessageConstant.XMP_METADATA_HEADER_SHALL_CONTAIN_VERSION_IDENTIFIER_REV,
e.getMessage());
}
}
@Test
public void validMetadataUA2Test() throws IOException {
try (PdfDocument pdfDocument = new PdfDocument(new PdfWriter(new ByteArrayOutputStream(),
new WriterProperties().setPdfVersion(PdfVersion.PDF_2_0)))) {
pdfDocument.addNewPage();
PdfCatalog catalog = pdfDocument.getCatalog();
byte[] bytes = Files.readAllBytes(Paths.get(SOURCE_FOLDER + "metadata_ua2.xmp"));
PdfStream metadata = new PdfStream(bytes);
catalog.put(PdfName.Metadata, metadata);
catalog.put(PdfName.Type, PdfName.Metadata);
catalog.put(PdfName.Subtype, PdfName.XML);
AssertUtil.doesNotThrow(() ->
PdfCheckersUtil.checkMetadata(catalog.getPdfObject(), PdfConformance.PDF_UA_2, EXCEPTION_SUPPLIER));
}
}
@Test
public void validLangTest() {
try (PdfDocument pdfDocument = new PdfDocument(new PdfWriter(new ByteArrayOutputStream(),
new WriterProperties().setPdfVersion(PdfVersion.PDF_2_0)))) {
pdfDocument.addNewPage();
PdfCatalog catalog = pdfDocument.getCatalog();
catalog.setLang(new PdfString("en-US"));
Pdf20Checker checker = new Pdf20Checker(pdfDocument);
AssertUtil.doesNotThrow(() -> checker.checkLang(catalog));
}
}
@Test
public void emptyLangTest() {
try (PdfDocument pdfDocument = new PdfDocument(new PdfWriter(new ByteArrayOutputStream(),
new WriterProperties().setPdfVersion(PdfVersion.PDF_2_0)))) {
pdfDocument.addNewPage();
PdfCatalog catalog = pdfDocument.getCatalog();
catalog.setLang(new PdfString(""));
Pdf20Checker checker = new Pdf20Checker(pdfDocument);
AssertUtil.doesNotThrow(() -> checker.checkLang(catalog));
}
}
@Test
public void invalidLangTest() {
try (PdfDocument pdfDocument = new PdfDocument(new PdfWriter(new ByteArrayOutputStream(),
new WriterProperties().setPdfVersion(PdfVersion.PDF_2_0)))) {
pdfDocument.addNewPage();
PdfCatalog catalog = pdfDocument.getCatalog();
catalog.setLang(new PdfString("inva:lid"));
Pdf20Checker checker = new Pdf20Checker(pdfDocument);
Exception e = Assertions.assertThrows(Pdf20ConformanceException.class,
() -> checker.checkLang(catalog));
Assertions.assertEquals(
KernelExceptionMessageConstant.DOCUMENT_SHALL_CONTAIN_VALID_LANG_ENTRY,
e.getMessage());
}
}
@Test
public void roleIsNotMappedToStandardNamespaceTest() {
try (PdfDocument pdfDocument = new PdfDocument(new PdfWriter(new ByteArrayOutputStream(),
new WriterProperties().setPdfVersion(PdfVersion.PDF_2_0)))) {
pdfDocument.setTagged();
PdfPage page = pdfDocument.addNewPage();
PdfStructElem doc = pdfDocument.getStructTreeRoot().addKid(new PdfStructElem(pdfDocument, PdfName.Document));
PdfNamespace namespace = new PdfNamespace(StandardNamespaces.PDF_2_0);
doc.setNamespace(namespace);
pdfDocument.getStructTreeRoot().addNamespace(namespace);
PdfStructElem paragraph = doc.addKid(new PdfStructElem(pdfDocument, PdfName.P));
paragraph.addKid(new PdfStructElem(pdfDocument, new PdfName("chapter"), page));
Pdf20Checker checker = new Pdf20Checker(pdfDocument);
Exception e = Assertions.assertThrows(Pdf20ConformanceException.class,
() -> checker.checkStructureTreeRoot(pdfDocument.getStructTreeRoot()));
Assertions.assertEquals(MessageFormatUtil.format(
KernelExceptionMessageConstant.ROLE_IS_NOT_MAPPED_TO_ANY_STANDARD_ROLE, "chapter"), e.getMessage());
}
}
@Test
public void roleWithNamespaceIsNotMappedToStandardNamespaceTest() {
try (PdfDocument pdfDocument = new PdfDocument(new PdfWriter(new ByteArrayOutputStream(),
new WriterProperties().setPdfVersion(PdfVersion.PDF_2_0)))) {
pdfDocument.setTagged();
PdfPage page = pdfDocument.addNewPage();
PdfStructElem doc = pdfDocument.getStructTreeRoot().addKid(new PdfStructElem(pdfDocument, PdfName.Document));
PdfNamespace namespace20 = new PdfNamespace(StandardNamespaces.PDF_2_0);
doc.setNamespace(namespace20);
PdfStructElem paragraph = doc.addKid(new PdfStructElem(pdfDocument, PdfName.P));
PdfStructElem chapter = paragraph.addKid(new PdfStructElem(pdfDocument, new PdfName("chapter"), page));
PdfNamespace namespace = new PdfNamespace("http://www.w3.org/1999/xhtml");
chapter.setNamespace(namespace);
PdfNamespace otherNamespace = new PdfNamespace("http://www.w3.org/2000/svg");
namespace.addNamespaceRoleMapping("chapter", "chapterChild", otherNamespace);
otherNamespace.addNamespaceRoleMapping("chapterChild", "chapterGrandchild");
pdfDocument.getStructTreeRoot().addNamespace(namespace20);
pdfDocument.getStructTreeRoot().addNamespace(namespace);
pdfDocument.getStructTreeRoot().addNamespace(otherNamespace);
Pdf20Checker checker = new Pdf20Checker(pdfDocument);
Exception e = Assertions.assertThrows(Pdf20ConformanceException.class,
() -> checker.checkStructureTreeRoot(pdfDocument.getStructTreeRoot()));
Assertions.assertEquals(MessageFormatUtil.format(
KernelExceptionMessageConstant.ROLE_IN_NAMESPACE_IS_NOT_MAPPED_TO_ANY_STANDARD_ROLE, "chapter",
namespace.getNamespaceName()), e.getMessage());
}
}
}