PdfContentExtractionTest.java
/*
This file is part of the iText (R) project.
Copyright (c) 1998-2025 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
AGPL licensing:
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package com.itextpdf.kernel.pdf.canvas.parser;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfReader;
import com.itextpdf.kernel.pdf.canvas.parser.clipper.ClipperBridge;
import com.itextpdf.kernel.pdf.canvas.parser.clipper.ClipperException;
import com.itextpdf.kernel.pdf.canvas.parser.clipper.ClipperExceptionConstant;
import com.itextpdf.kernel.pdf.canvas.parser.listener.LocationTextExtractionStrategy;
import com.itextpdf.test.AssertUtil;
import com.itextpdf.test.ExtendedITextTest;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Tag;
import org.junit.jupiter.api.Test;
import java.io.IOException;
@Tag("IntegrationTest")
public class PdfContentExtractionTest extends ExtendedITextTest {
private static final String SOURCE_FOLDER =
"./src/test/resources/com/itextpdf/kernel/parser/PdfContentExtractionTest/";
@Test
public void contentExtractionInDocWithBigCoordinatesTest() throws IOException {
String inputFileName = SOURCE_FOLDER + "docWithBigCoordinates.pdf";
// In this document the CTM shrinks coordinates and these coordinates are large numbers.
// At the moment creation of this test clipper has a problem with handling large numbers
// since internally it deals with integers and has to multiply large numbers even more
// for internal purposes
try (PdfDocument pdfDocument = new PdfDocument(new PdfReader(inputFileName))) {
PdfDocumentContentParser contentParser = new PdfDocumentContentParser(pdfDocument);
AssertUtil.doesNotThrow(() -> contentParser.processContent(1, new LocationTextExtractionStrategy()));
}
}
@Test
public void contentExtractionInDocWithStaticFloatMultiplierTest() throws IOException {
String inputFileName = SOURCE_FOLDER + "docWithBigCoordinates.pdf";
// In this document the CTM shrinks coordinates and these coordinates are large numbers.
// At the moment creation of this test clipper has a problem with handling large numbers
// since internally it deals with integers and has to multiply large numbers even more
// for internal purposes
try (PdfDocument pdfDocument = new PdfDocument(new PdfReader(inputFileName))) {
PdfDocumentContentParser contentParser = new PdfDocumentContentParser(pdfDocument);
ClipperBridge.floatMultiplier = Math.pow(10, 14);
Exception e = Assertions.assertThrows(ClipperException.class,
() -> contentParser.processContent(1, new LocationTextExtractionStrategy())
);
Assertions.assertEquals(ClipperExceptionConstant.COORDINATE_OUTSIDE_ALLOWED_RANGE, e.getMessage());
ClipperBridge.floatMultiplier = null;
}
}
}