BCP47ValidatorTest.java
/*
This file is part of the iText (R) project.
Copyright (c) 1998-2025 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
AGPL licensing:
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package com.itextpdf.kernel.utils.checkers;
import com.itextpdf.test.ExtendedITextTest;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Tag;
import org.junit.jupiter.api.Test;
@Tag("UnitTest")
public class BCP47ValidatorTest extends ExtendedITextTest {
@Test
public void simpleLanguageSubtagTest() {
Assertions.assertTrue(BCP47Validator.validate("de"));
Assertions.assertTrue(BCP47Validator.validate("fr"));
// Example of a grandfathered tag.
Assertions.assertTrue(BCP47Validator.validate("i-enochian"));
}
@Test
public void languageSubtagAndScriptSubtagTest() {
// Chinese written using the Traditional Chinese script.
Assertions.assertTrue(BCP47Validator.validate("zh-Hant"));
// Chinese written using the Simplified Chinese script.
Assertions.assertTrue(BCP47Validator.validate("zh-Hans"));
// Serbian written using the Cyrillic script.
Assertions.assertTrue(BCP47Validator.validate("sr-Cyrl"));
// Serbian written using the Latin script.
Assertions.assertTrue(BCP47Validator.validate("sr-Latn"));
}
@Test
public void extLangSubtagsAndPrimaryLangSubtagsTest() {
// Chinese, Mandarin, Simplified script, as used in China.
Assertions.assertTrue(BCP47Validator.validate("zh-cmn-Hans-CN"));
// Mandarin Chinese, Simplified script, as used in China.
Assertions.assertTrue(BCP47Validator.validate("cmn-Hans-CN"));
// Chinese, Cantonese, as used in Hong Kong SAR.
Assertions.assertTrue(BCP47Validator.validate("zh-yue-HK"));
Assertions.assertTrue(BCP47Validator.validate("sr-Latn"));
}
@Test
public void languageScriptRegionsTest() {
// Chinese written using the Simplified script as used in mainland China.
Assertions.assertTrue(BCP47Validator.validate("zh-Hans-CN"));
// Serbian written using the Latin script as used in Serbia.
Assertions.assertTrue(BCP47Validator.validate("sr-Latn-RS"));
}
@Test
public void languageVariantTest() {
// Resian dialect of Slovenian.
Assertions.assertTrue(BCP47Validator.validate("sl-rozaj"));
// San Giorgio dialect of Resian dialect of Slovenian.
Assertions.assertTrue(BCP47Validator.validate("sl-rozaj-biske"));
// Nadiza dialect of Slovenian.
Assertions.assertTrue(BCP47Validator.validate("sl-nedis"));
}
@Test
public void languageRegionVariantTest() {
// German as used in Switzerland using the 1901 variant [orthography].
Assertions.assertTrue(BCP47Validator.validate("de-CH-1901"));
// Slovenian as used in Italy, Nadiza dialect.
Assertions.assertTrue(BCP47Validator.validate("sl-IT-nedis"));
}
@Test
public void languageScriptRegionVariantTest() {
// Eastern Armenian written in Latin script, as used in Italy.
Assertions.assertTrue(BCP47Validator.validate("hy-Latn-IT-arevela"));
}
@Test
public void languageRegionTest() {
// German for Germany.
Assertions.assertTrue(BCP47Validator.validate("de-DE"));
// English as used in the United States.
Assertions.assertTrue(BCP47Validator.validate("en-US"));
// Spanish appropriate for the Latin America and Caribbean region using the UN region code.
Assertions.assertTrue(BCP47Validator.validate("es-419"));
// Invalid, two region tags.
Assertions.assertFalse(BCP47Validator.validate("de-419-DE"));
// Use of a single-character subtag in primary position; note
// that there are a few grandfathered tags that start with "i-" that
// are valid.
Assertions.assertFalse(BCP47Validator.validate("a-DE"));
}
@Test
public void privateUseSubtagsTest() {
Assertions.assertTrue(BCP47Validator.validate("de-CH-x-phonebk"));
Assertions.assertTrue(BCP47Validator.validate("az-Arab-x-AZE-derbend"));
}
@Test
public void privateUseRegistryValuesTest() {
// Private use using the singleton 'x'.
Assertions.assertTrue(BCP47Validator.validate("x-whatever"));
// All private tags.
Assertions.assertTrue(BCP47Validator.validate("qaa-Qaaa-QM-x-southern"));
// German, with a private script.
Assertions.assertTrue(BCP47Validator.validate("de-Qaaa"));
// Serbian, Latin script, private region.
Assertions.assertTrue(BCP47Validator.validate("sr-Latn-QM"));
// Serbian, private script, for Serbia.
Assertions.assertTrue(BCP47Validator.validate("sr-Qaaa-RS"));
}
@Test
public void tagsWithExtensions() {
Assertions.assertTrue(BCP47Validator.validate("en-US-u-islamcal"));
Assertions.assertTrue(BCP47Validator.validate("zh-CN-a-myext-x-private"));
Assertions.assertTrue(BCP47Validator.validate("en-a-myext-b-another"));
}
}