PdfCanvasProcessorIntegrationTest.java
/*
This file is part of the iText (R) project.
Copyright (c) 1998-2025 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
AGPL licensing:
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package com.itextpdf.kernel.pdf.canvas.parser;
import com.itextpdf.io.logs.IoLogMessageConstant;
import com.itextpdf.io.source.ByteArrayOutputStream;
import com.itextpdf.kernel.colors.Color;
import com.itextpdf.kernel.exceptions.PdfException;
import com.itextpdf.kernel.geom.Matrix;
import com.itextpdf.kernel.logs.KernelLogMessageConstant;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfPage;
import com.itextpdf.kernel.pdf.PdfReader;
import com.itextpdf.kernel.pdf.PdfResources;
import com.itextpdf.kernel.pdf.PdfStream;
import com.itextpdf.kernel.pdf.PdfWriter;
import com.itextpdf.kernel.pdf.canvas.parser.data.ClippingPathInfo;
import com.itextpdf.kernel.pdf.canvas.parser.data.IEventData;
import com.itextpdf.kernel.pdf.canvas.parser.data.ImageRenderInfo;
import com.itextpdf.kernel.pdf.canvas.parser.data.PathRenderInfo;
import com.itextpdf.kernel.pdf.canvas.parser.data.TextRenderInfo;
import com.itextpdf.kernel.pdf.canvas.parser.listener.IEventListener;
import com.itextpdf.kernel.pdf.canvas.parser.listener.LocationTextExtractionStrategy;
import com.itextpdf.kernel.pdf.canvas.parser.listener.SimpleTextExtractionStrategy;
import com.itextpdf.kernel.pdf.colorspace.PdfColorSpace;
import com.itextpdf.kernel.pdf.colorspace.PdfSpecialCs;
import com.itextpdf.test.AssertUtil;
import com.itextpdf.test.ExtendedITextTest;
import com.itextpdf.test.TestUtil;
import com.itextpdf.test.annotations.LogMessage;
import com.itextpdf.test.annotations.LogMessages;
import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Tag;
import org.junit.jupiter.api.Test;
@Tag("IntegrationTest")
public class PdfCanvasProcessorIntegrationTest extends ExtendedITextTest {
private static final String SOURCE_FOLDER = "./src/test/resources/com/itextpdf/kernel/parser/PdfCanvasProcessorTest/";
private static final String DESTINATION_FOLDER = TestUtil.getOutputPath() + "/kernel/parser/PdfCanvasProcessorTest/";
@BeforeAll
public static void setUp() {
createDestinationFolder(DESTINATION_FOLDER);
}
@Test
public void contentStreamProcessorTest() throws IOException {
PdfDocument document = new PdfDocument(new PdfReader(SOURCE_FOLDER + "tableWithImageAndText.pdf"),
new PdfWriter(new ByteArrayOutputStream()));
StringBuilder pageEventsLog = new StringBuilder();
for (int i = 1; i <= document.getNumberOfPages(); ++i) {
PdfPage page = document.getPage(i);
PdfCanvasProcessor processor = new PdfCanvasProcessor(new RecordEveryHighLevelEventListener(pageEventsLog));
processor.processPageContent(page);
}
byte[] logBytes = Files.readAllBytes(Paths.get(SOURCE_FOLDER + "contentStreamProcessorTest_events_log.dat"));
String expectedPageEventsLog = new String(logBytes, StandardCharsets.UTF_8);
Assertions.assertEquals(expectedPageEventsLog, pageEventsLog.toString());
}
@Test
public void processGraphicsStateResourceOperatorFillOpacityTest() throws IOException {
PdfDocument document = new PdfDocument(new PdfReader(SOURCE_FOLDER + "transparentText.pdf"));
Float expOpacity = 0.5f;
Map<String, Object> textRenderInfo = new HashMap<>();
for (int i = 1; i <= document.getNumberOfPages(); ++i) {
PdfPage page = document.getPage(i);
PdfCanvasProcessor processor = new PdfCanvasProcessor(new RecordEveryTextRenderEvent(textRenderInfo));
processor.processPageContent(page);
}
Assertions.assertEquals(expOpacity, textRenderInfo.get("FillOpacity"), "Expected fill opacity not found");
}
@Test
public void processGraphicsStateResourceOperatorStrokeOpacityTest() throws IOException {
PdfDocument document = new PdfDocument(new PdfReader(SOURCE_FOLDER + "hiddenText.pdf"));
Float expOpacity = 0.0f;
Map<String, Object> textRenderInfo = new HashMap<>();
for (int i = 1; i <= document.getNumberOfPages(); ++i) {
PdfPage page = document.getPage(i);
PdfCanvasProcessor processor = new PdfCanvasProcessor(new RecordEveryTextRenderEvent(textRenderInfo));
processor.processPageContent(page);
}
Assertions.assertEquals(expOpacity, textRenderInfo.get("StrokeOpacity"), "Expected stroke opacity not found");
}
@Test
public void testClosingEmptyPath() throws IOException {
String fileName = "closingEmptyPath.pdf";
PdfDocument document = new PdfDocument(new PdfReader(SOURCE_FOLDER + fileName));
PdfCanvasProcessor processor = new PdfCanvasProcessor(new NoOpEventListener());
// Assert than no exception is thrown when an empty path is handled
AssertUtil.doesNotThrow(() -> processor.processPageContent(document.getPage(1)));
}
@Test
@LogMessages(messages = @LogMessage(messageTemplate = IoLogMessageConstant.FAILED_TO_PROCESS_A_TRANSFORMATION_MATRIX, count = 1))
public void testNoninvertibleMatrix() throws IOException {
String fileName = "noninvertibleMatrix.pdf";
PdfDocument pdfDocument = new PdfDocument(new PdfReader(SOURCE_FOLDER + fileName));
LocationTextExtractionStrategy strategy = new LocationTextExtractionStrategy();
PdfCanvasProcessor processor = new PdfCanvasProcessor(strategy);
PdfPage page = pdfDocument.getFirstPage();
processor.processPageContent(page);
String resultantText = strategy.getResultantText();
pdfDocument.close();
Assertions.assertEquals("Hello World!\nHello World!\nHello World!\nHello World! Hello World! Hello World!", resultantText);
}
@Test
@Disabled("DEVSIX-3608: this test currently throws StackOverflowError, which cannot be caught in .NET")
public void parseCircularReferencesInResourcesTest() throws IOException {
String fileName = "circularReferencesInResources.pdf";
try (PdfDocument pdfDocument = new PdfDocument(new PdfReader(SOURCE_FOLDER + fileName))) {
PdfCanvasProcessor processor = new PdfCanvasProcessor(new NoOpEventListener());
PdfPage page = pdfDocument.getFirstPage();
Assertions.assertThrows(StackOverflowError.class, () -> processor.processPageContent(page));
}
}
@Test
@LogMessages(messages = @LogMessage(messageTemplate = KernelLogMessageConstant.UNABLE_TO_PARSE_COLOR_WITHIN_COLORSPACE))
public void patternColorParsingNotValidPdfTest() throws IOException {
String inputFile = SOURCE_FOLDER + "patternColorParsingNotValidPdfTest.pdf";
PdfDocument pdfDocument = new PdfDocument(new PdfReader(inputFile));
for (int i = 1; i <= pdfDocument.getNumberOfPages(); ++i) {
PdfPage page = pdfDocument.getPage(i);
ColorParsingEventListener colorParsingEventListener = new ColorParsingEventListener();
PdfCanvasProcessor processor = new PdfCanvasProcessor(colorParsingEventListener);
processor.processPageContent(page);
Color renderInfo = colorParsingEventListener.getEncounteredPath().getFillColor();
Assertions.assertNull(renderInfo);
}
}
@Test
public void patternColorParsingValidPdfTest() throws IOException {
String inputFile = SOURCE_FOLDER + "patternColorParsingValidPdfTest.pdf";
PdfDocument pdfDocument = new PdfDocument(new PdfReader(inputFile));
for (int i = 1; i <= pdfDocument.getNumberOfPages(); ++i) {
PdfPage page = pdfDocument.getPage(i);
ColorParsingEventListener colorParsingEventListener = new ColorParsingEventListener();
PdfCanvasProcessor processor = new PdfCanvasProcessor(colorParsingEventListener);
processor.processPageContent(page);
PathRenderInfo renderInfo = colorParsingEventListener.getEncounteredPath();
PdfColorSpace colorSpace = renderInfo.getGraphicsState().getFillColor().getColorSpace();
Assertions.assertTrue(colorSpace instanceof PdfSpecialCs.Pattern);
}
}
@Test
public void checkImageRenderInfoProcessorTest() throws IOException {
PdfDocument document = new PdfDocument(new PdfReader(SOURCE_FOLDER + "tableWithImageAndText.pdf"),
new PdfWriter(new ByteArrayOutputStream()));
PdfPage page = document.getPage(1);
RecordFirstImageEventListener eventListener = new RecordFirstImageEventListener();
PdfCanvasProcessor processor = new PdfCanvasProcessor(eventListener);
processor.processPageContent(page);
// Check caught image's ImageRenderInfo
ImageRenderInfo imageRenderInfo = eventListener.getImageRenderInfo();
final float EPS = 0.001f;
Assertions.assertFalse(imageRenderInfo.isInline());
Assertions.assertEquals(1024, imageRenderInfo.getImage().getWidth(), EPS);
Assertions.assertEquals(768, imageRenderInfo.getImage().getHeight(), EPS);
Assertions.assertEquals("/Im1", imageRenderInfo.getImageResourceName().toString());
Assertions.assertEquals(new com.itextpdf.kernel.geom.Vector(212.67f, 676.25f, 1),
imageRenderInfo.getStartPoint());
Assertions.assertEquals(new Matrix(169.67f, 0, 0, 0, 127.25f, 0, 212.67f, 676.25f, 1),
imageRenderInfo.getImageCtm());
Assertions.assertEquals(21590.508, imageRenderInfo.getArea(), EPS);
Assertions.assertNull(imageRenderInfo.getColorSpaceDictionary());
Assertions.assertEquals(1, imageRenderInfo.getCanvasTagHierarchy().size());
Assertions.assertTrue(imageRenderInfo.hasMcid(5, true));
Assertions.assertTrue(imageRenderInfo.hasMcid(5));
Assertions.assertFalse(imageRenderInfo.hasMcid(1));
Assertions.assertEquals(5, imageRenderInfo.getMcid());
}
@Test
public void brokenStreamTest() throws IOException {
PdfDocument document = new PdfDocument(new PdfReader(new File(SOURCE_FOLDER + "splitTj.pdf")));
SimpleTextExtractionStrategy listener = new SimpleTextExtractionStrategy();
PdfCanvasProcessor parser = new PdfCanvasProcessor(listener);
byte[] streamBytes = ((PdfStream)document.getPdfObject(7)).getBytes();
PdfResources resources = document.getPage(1).getResources();
//Class cast exception is expected, using generic exception for autoport
Assertions.assertThrows(Exception.class, () -> parser.processContent(streamBytes, resources));
}
@Test
public void withPageResourcesStreamTest() throws IOException {
PdfDocument document = new PdfDocument(new PdfReader(new File(SOURCE_FOLDER + "splitTj.pdf")));
SimpleTextExtractionStrategy listener = new SimpleTextExtractionStrategy();
PdfCanvasProcessor parser = new PdfCanvasProcessor(listener);
parser.processContent(document.getPage(1).getContentBytes(), document.getPage(1).getResources());
Assertions.assertEquals(listener.getResultantText(), "test 1\ntest 2");
}
private static class ColorParsingEventListener implements IEventListener {
private List<IEventData> content = new ArrayList<>();
private static final String pathDataExpected = "Path data expected.";
public void eventOccurred(IEventData data, EventType type) {
if (type.equals(EventType.RENDER_PATH)) {
PathRenderInfo pathRenderInfo = (PathRenderInfo) data;
pathRenderInfo.preserveGraphicsState();
content.add(data);
}
}
/**
* Get the last encountered PathRenderInfo, then clears the internal buffer
*
* @return the PathRenderInfo object that was encountered when processing the last path rendering operation
*/
PathRenderInfo getEncounteredPath() {
if (content.size() == 0) {
return null;
}
IEventData eventData = content.get(0);
if (!(eventData instanceof PathRenderInfo)) {
throw new PdfException(pathDataExpected);
}
content.clear();
return (PathRenderInfo) eventData;
}
public Set<EventType> getSupportedEvents() {
return null;
}
}
private static class NoOpEventListener implements IEventListener {
@Override
public void eventOccurred(IEventData data, EventType type) {
}
@Override
public Set<EventType> getSupportedEvents() {
return null;
}
}
private static class RecordFirstImageEventListener implements IEventListener {
private ImageRenderInfo imageRenderInfo = null;
RecordFirstImageEventListener() {
}
public void eventOccurred(IEventData data, EventType type) {
switch (type) {
case RENDER_IMAGE:
if (imageRenderInfo == null) {
imageRenderInfo = (ImageRenderInfo) data;
}
break;
}
}
public Set<EventType> getSupportedEvents() {
return null;
}
public ImageRenderInfo getImageRenderInfo() {
return imageRenderInfo;
}
}
private static class RecordEveryHighLevelEventListener implements IEventListener {
private static final String END_EVENT_OCCURRENCE = "------------------------------------";
private StringBuilder sb;
RecordEveryHighLevelEventListener(StringBuilder outStream) {
this.sb = outStream;
}
public void eventOccurred(IEventData data, EventType type) {
switch (type) {
case BEGIN_TEXT:
sb.append("-------- BEGIN TEXT ---------").append("\n");
sb.append(END_EVENT_OCCURRENCE).append("\n");
break;
case RENDER_TEXT:
sb.append("-------- RENDER TEXT --------").append("\n");
TextRenderInfo renderInfo = (TextRenderInfo) data;
sb.append("String: ").append(renderInfo.getPdfString().toUnicodeString()).append("\n");
sb.append(END_EVENT_OCCURRENCE).append("\n");
break;
case END_TEXT:
sb.append("-------- END TEXT -----------").append("\n");
sb.append(END_EVENT_OCCURRENCE).append("\n");
break;
case RENDER_IMAGE:
sb.append("-------- RENDER IMAGE ---------").append("\n");
ImageRenderInfo imageRenderInfo = (ImageRenderInfo) data;
sb.append("Image: ").append(imageRenderInfo.getImageResourceName()).append("\n");
sb.append(END_EVENT_OCCURRENCE).append("\n");
break;
case RENDER_PATH:
sb.append("-------- RENDER PATH --------").append("\n");
PathRenderInfo pathRenderInfo = (PathRenderInfo) data;
sb.append("Operation type: ").append(pathRenderInfo.getOperation()).append("\n");
sb.append("Num of subpaths: ").append(pathRenderInfo.getPath().getSubpaths().size()).append("\n");
sb.append(END_EVENT_OCCURRENCE).append("\n");
break;
case CLIP_PATH_CHANGED:
sb.append("-------- CLIPPING PATH ------").append("\n");
ClippingPathInfo clippingPathRenderInfo = (ClippingPathInfo) data;
sb.append("Num of subpaths: ").append(clippingPathRenderInfo.getClippingPath().getSubpaths().size())
.append("\n");
sb.append(END_EVENT_OCCURRENCE).append("\n");
break;
}
}
public Set<EventType> getSupportedEvents() {
return null;
}
}
private static class RecordEveryTextRenderEvent implements IEventListener {
private Map<String, Object> map;
RecordEveryTextRenderEvent(Map<String, Object> map) {
this.map = map;
}
public void eventOccurred(IEventData data, EventType type) {
if (data instanceof TextRenderInfo) {
TextRenderInfo renderInfo = (TextRenderInfo) data;
map.put("String", renderInfo.getPdfString().toUnicodeString());
map.put("FillOpacity", renderInfo.getGraphicsState().getFillOpacity());
map.put("StrokeOpacity", renderInfo.getGraphicsState().getStrokeOpacity());
}
}
public Set<EventType> getSupportedEvents() {
return null;
}
}
}