EncodingTest.java
/*
This file is part of the iText (R) project.
Copyright (c) 1998-2025 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
AGPL licensing:
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package com.itextpdf.kernel.pdf;
import com.itextpdf.io.font.PdfEncodings;
import com.itextpdf.io.font.constants.StandardFonts;
import com.itextpdf.kernel.font.PdfFont;
import com.itextpdf.kernel.font.PdfFontFactory;
import com.itextpdf.kernel.font.PdfFontFactory.EmbeddingStrategy;
import com.itextpdf.kernel.pdf.canvas.PdfCanvas;
import com.itextpdf.kernel.pdf.canvas.parser.PdfTextExtractor;
import com.itextpdf.kernel.utils.CompareTool;
import com.itextpdf.test.ExtendedITextTest;
import com.itextpdf.test.TestUtil;
import java.io.IOException;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Tag;
import org.junit.jupiter.api.Test;
@Tag("IntegrationTest")
public class EncodingTest extends ExtendedITextTest {
public static final String sourceFolder = "./src/test/resources/com/itextpdf/kernel/pdf/EncodingTest/";
public static final String outputFolder = TestUtil.getOutputPath() + "/kernel/pdf/EncodingTest/";
@BeforeAll
public static void beforeClass() {
createDestinationFolder(outputFolder);
}
@AfterAll
public static void afterClass() {
CompareTool.cleanup(outputFolder);
}
@Test
public void surrogatePairTest() throws IOException, InterruptedException {
String fileName = "surrogatePairTest.pdf";
PdfWriter writer = CompareTool.createTestPdfWriter(outputFolder + fileName);
PdfDocument doc = new PdfDocument(writer);
PdfFont font = PdfFontFactory.createFont(sourceFolder + "DejaVuSans.ttf", PdfEncodings.IDENTITY_H);
PdfCanvas canvas = new PdfCanvas(doc.addNewPage());
canvas.
saveState().
beginText().
moveText(36, 750).
setFontAndSize(font, 72).
showText("\uD835\uDD59\uD835\uDD56\uD835\uDD5D\uD835\uDD5D\uD835\uDD60\uD83D\uDE09\uD835\uDD68" +
"\uD835\uDD60\uD835\uDD63\uD835\uDD5D\uD835\uDD55").
endText().
restoreState();
canvas.release();
doc.close();
Assertions.assertNull(new CompareTool().compareByContent(outputFolder + fileName, sourceFolder + "cmp_" + fileName, outputFolder, "diff_"));
}
@Test
public void customSimpleEncodingTimesRomanTest() throws IOException, InterruptedException {
String fileName = "customSimpleEncodingTimesRomanTest.pdf";
PdfWriter writer = CompareTool.createTestPdfWriter(outputFolder + fileName);
PdfDocument doc = new PdfDocument(writer);
PdfFont font = PdfFontFactory.createFont(sourceFolder + "FreeSans.ttf",
"# simple 1 0020 041c 0456 0440 044a 0050 0065 0061 0063",
EmbeddingStrategy.PREFER_EMBEDDED);
PdfCanvas canvas = new PdfCanvas(doc.addNewPage());
canvas.
saveState().
beginText().
moveText(36, 806).
setFontAndSize(font, 12).
// �������� Peace
showText("\u041C\u0456\u0440\u044A Peace").
endText().
restoreState();
doc.close();
Assertions.assertNull(new CompareTool().compareByContent(outputFolder + fileName, sourceFolder + "cmp_" + fileName, outputFolder, "diff_"));
}
@Test
public void customFullEncodingTimesRomanTest() throws IOException, InterruptedException {
String fileName = "customFullEncodingTimesRomanTest.pdf";
PdfWriter writer = CompareTool.createTestPdfWriter(outputFolder + fileName);
PdfDocument doc = new PdfDocument(writer);
PdfFont font = PdfFontFactory.createFont(StandardFonts.TIMES_ROMAN,
"# full 'A' Aring 0041 'E' Egrave 0045 32 space 0020");
PdfCanvas canvas = new PdfCanvas(doc.addNewPage());
canvas.
saveState().
beginText().
moveText(36, 806).
setFontAndSize(font, 12).
showText("A E").
endText().
restoreState();
doc.close();
Assertions.assertNull(new CompareTool().compareByContent(outputFolder + fileName, sourceFolder + "cmp_" + fileName, outputFolder, "diff_"));
}
@Test
public void notdefInStandardFontTest() throws IOException, InterruptedException {
String fileName = "notdefInStandardFontTest.pdf";
PdfWriter writer = CompareTool.createTestPdfWriter(outputFolder + fileName);
PdfDocument doc = new PdfDocument(writer);
PdfFont font = PdfFontFactory.createFont(StandardFonts.HELVETICA,
"# full 'A' Aring 0041 'E' abc11 0045 32 space 0020");
PdfCanvas canvas = new PdfCanvas(doc.addNewPage());
canvas.
saveState().
beginText().
moveText(36, 786).
setFontAndSize(font, 36).
showText("A E").
endText().
restoreState();
font = PdfFontFactory.createFont(StandardFonts.HELVETICA, PdfEncodings.WINANSI);
canvas.
saveState().
beginText().
moveText(36, 756).
setFontAndSize(font, 36).
showText("\u0188").
endText().
restoreState();
doc.close();
Assertions.assertNull(new CompareTool().compareByContent(outputFolder + fileName, sourceFolder + "cmp_" + fileName, outputFolder, "diff_"));
}
@Test
public void notdefInTrueTypeFontTest() throws IOException, InterruptedException {
String fileName = "notdefInTrueTypeFontTest.pdf";
PdfWriter writer = CompareTool.createTestPdfWriter(outputFolder + fileName);
PdfDocument doc = new PdfDocument(writer);
PdfFont font = PdfFontFactory.createFont(sourceFolder + "FreeSans.ttf",
"# simple 32 0020 00C5 1987", EmbeddingStrategy.PREFER_EMBEDDED);
PdfCanvas canvas = new PdfCanvas(doc.addNewPage());
canvas.
saveState().
beginText().
moveText(36, 786).
setFontAndSize(font, 36).
showText("\u00C5 \u1987").
endText().
restoreState();
font = PdfFontFactory.createFont(sourceFolder + "FreeSans.ttf",
PdfEncodings.WINANSI, EmbeddingStrategy.PREFER_EMBEDDED);
canvas.
saveState().
beginText().
moveText(36, 756).
setFontAndSize(font, 36).
showText("\u1987").
endText().
restoreState();
doc.close();
Assertions.assertNull(new CompareTool().compareByContent(outputFolder + fileName, sourceFolder + "cmp_" + fileName, outputFolder, "diff_"));
}
@Test
public void notdefInType0Test() throws IOException, InterruptedException {
String fileName = "notdefInType0Test.pdf";
PdfWriter writer = CompareTool.createTestPdfWriter(outputFolder + fileName);
PdfDocument doc = new PdfDocument(writer);
PdfFont font = PdfFontFactory.createFont(sourceFolder + "FreeSans.ttf", PdfEncodings.IDENTITY_H);
PdfCanvas canvas = new PdfCanvas(doc.addNewPage());
canvas.
saveState().
beginText().
moveText(36, 786).
setFontAndSize(font, 36).
showText("\u00C5 \u1987").
endText().
restoreState();
doc.close();
Assertions.assertNull(new CompareTool().compareByContent(outputFolder + fileName, sourceFolder + "cmp_" + fileName, outputFolder, "diff_"));
}
@Test
public void symbolDefaultFontTest() throws IOException, InterruptedException {
String fileName = "symbolDefaultFontTest.pdf";
PdfWriter writer = CompareTool.createTestPdfWriter(outputFolder + fileName);
PdfDocument doc = new PdfDocument(writer);
PdfFont font = PdfFontFactory.createFont(StandardFonts.SYMBOL);
fillSymbolDefaultPage(font, doc.addNewPage());
//WinAnsi encoding doesn't support special symbols
font = PdfFontFactory.createFont(StandardFonts.SYMBOL, PdfEncodings.WINANSI);
fillSymbolDefaultPage(font, doc.addNewPage());
doc.close();
Assertions.assertNull(new CompareTool().compareByContent(outputFolder + fileName, sourceFolder + "cmp_" + fileName, outputFolder, "diff_"));
}
private void fillSymbolDefaultPage(PdfFont font, PdfPage page) {
PdfCanvas canvas = new PdfCanvas(page);
StringBuilder builder = new StringBuilder();
for (int i = 32; i <= 100; i++) {
builder.append((char) i);
}
canvas.
saveState().
beginText().
setFontAndSize(font, 12).
moveText(36, 806).
showText(builder.toString()).
endText().
restoreState();
builder = new StringBuilder();
for (int i = 101; i <= 190; i++) {
builder.append((char) i);
}
canvas.
saveState().
beginText().
setFontAndSize(font, 12).
moveText(36, 786).
showText(builder.toString()).
endText();
builder = new StringBuilder();
for (int i = 191; i <= 254; i++) {
builder.append((char) i);
}
canvas.
beginText().
moveText(36, 766).
showText(builder.toString()).
endText().
restoreState();
}
@Test
public void symbolTrueTypeFontWinAnsiTest() throws IOException, InterruptedException {
String fileName = "symbolTrueTypeFontWinAnsiTest.pdf";
PdfWriter writer = CompareTool.createTestPdfWriter(outputFolder + fileName);
PdfDocument doc = new PdfDocument(writer);
PdfFont font = PdfFontFactory.createFont(sourceFolder + "Symbols1.ttf", PdfEncodings.WINANSI,
EmbeddingStrategy.PREFER_EMBEDDED);
PdfCanvas canvas = new PdfCanvas(doc.addNewPage());
StringBuilder str = new StringBuilder();
for (int i = 32; i <= 65; i++) {
str.append((char) i);
}
canvas.
saveState().
beginText().
moveText(36, 786).
setFontAndSize(font, 36).
showText(str.toString()).
endText();
str = new StringBuilder();
for (int i = 65; i <= 190; i++) {
str.append((char) i);
}
canvas.
saveState().
beginText().
moveText(36, 756).
setFontAndSize(font, 36).
showText(str.toString()).
endText();
str = new StringBuilder();
for (int i = 191; i <= 254; i++) {
str.append((char) i);
}
canvas.
beginText().
moveText(36, 726).
setFontAndSize(font, 36).
showText(str.toString()).
endText().
restoreState();
doc.close();
Assertions.assertNull(new CompareTool().compareByContent(outputFolder + fileName, sourceFolder + "cmp_" + fileName, outputFolder, "diff_"));
}
@Test
public void symbolTrueTypeFontIdentityTest() throws IOException, InterruptedException {
String fileName = "symbolTrueTypeFontIdentityTest.pdf";
PdfWriter writer = CompareTool.createTestPdfWriter(outputFolder + fileName);
PdfDocument doc = new PdfDocument(writer);
PdfFont font = PdfFontFactory.createFont(sourceFolder + "Symbols1.ttf", PdfEncodings.IDENTITY_H);
PdfCanvas canvas = new PdfCanvas(doc.addNewPage());
StringBuilder builder = new StringBuilder();
for (int i = 32; i <= 100; i++) {
builder.append((char) i);
}
StringBuilder str = new StringBuilder(builder.toString());
canvas.
saveState().
beginText().
setFontAndSize(font, 36).
moveText(36, 786).
showText(str.toString()).
endText().
restoreState();
str = new StringBuilder();
for (int i = 101; i <= 190; i++) {
str.append((char) i);
}
canvas.
saveState().
beginText().
setFontAndSize(font, 36).
moveText(36, 746).
showText(str.toString()).
endText().
restoreState();
str = new StringBuilder();
for (int i = 191; i <= 254; i++) {
str.append((char) i);
}
canvas.
saveState().
beginText().
setFontAndSize(font, 36).
moveText(36, 766).
showText(str.toString()).
endText().
restoreState();
doc.close();
Assertions.assertNull(new CompareTool().compareByContent(outputFolder + fileName, sourceFolder + "cmp_" + fileName, outputFolder, "diff_"));
}
@Test
public void symbolTrueTypeFontSameCharsIdentityTest() throws IOException, InterruptedException {
String fileName = "symbolTrueTypeFontSameCharsIdentityTest.pdf";
PdfWriter writer = CompareTool.createTestPdfWriter(outputFolder + fileName);
PdfDocument doc = new PdfDocument(writer);
PdfFont font = PdfFontFactory.createFont(sourceFolder + "Symbols1.ttf", PdfEncodings.IDENTITY_H);
PdfCanvas canvas = new PdfCanvas(doc.addNewPage());
String line = "AABBCCDDEEFFGGHHIIJJ";
canvas.
saveState().
beginText().
setFontAndSize(font, 36).
moveText(36, 786).
showText(line).
endText().
restoreState();
doc.close();
Assertions.assertNull(new CompareTool().compareByContent(outputFolder + fileName, sourceFolder + "cmp_" + fileName, outputFolder, "diff_"));
}
@Test
public void encodingStreamExtractionTest() throws IOException {
String fileName = sourceFolder + "encodingStream01.pdf";
PdfDocument pdfDocument = new PdfDocument(new PdfReader(fileName));
String extractedText = PdfTextExtractor.getTextFromPage(pdfDocument.getPage(1));
Assertions.assertEquals("abc", extractedText);
}
@Test
public void differentCodeSpaceRangeLengthsExtractionTest() throws IOException {
String fileName = sourceFolder + "differentCodeSpaceRangeLengths01.pdf";
PdfDocument pdfDocument = new PdfDocument(new PdfReader(fileName));
String extractedText = PdfTextExtractor.getTextFromPage(pdfDocument.getPage(1));
Assertions.assertEquals("Hello\u7121\u540dworld\u6b98\u528d", extractedText);
}
}