UnicodePropertiesTest.java
/*
* Copyright (c) 2008 Steve Rowe
* SPDX-License-Identifier: BSD-3-Clause
*/
package jflex.unicode;
import static com.google.common.truth.Truth.assertWithMessage;
import static org.junit.Assert.fail;
import java.util.Objects;
import jflex.core.unicode.IntCharSet;
import jflex.core.unicode.UnicodeProperties;
import org.junit.Test;
public class UnicodePropertiesTest {
@Test
public void testSupportedVersions() {
String[] versions =
new String[] {
"1.1", "1.1.5", "2", "2.0", "2.1", "3", "3.0", "3.1", "3.2", "4", "4.0", "4.1", "5",
"5.0", "5.1", "5.2", "6", "6.0"
};
for (String version : versions) {
try {
UnicodeProperties properties = new UnicodeProperties(version);
IntCharSet intervals = properties.getIntCharSet("Lu");
assertWithMessage(
"intervals for 'Lu' property value for version "
+ version
+ " should not be null\n"
+ "Supported properties: "
+ properties.getPropertyValues())
.that(intervals)
.isNotNull();
assertWithMessage("intervals for 'Lu' property value should have an interval")
.that(intervals.numIntervals() > 0)
.isTrue();
} catch (UnicodeProperties.UnsupportedUnicodeVersionException e) {
fail("Unsupported version '" + version + "' should be supported: " + e);
}
}
}
@Test
public void testUnsupportedVersion() {
try {
new UnicodeProperties("1.0");
fail(
"new UnicodeProperties(\"1.0\") should trigger an"
+ " UnsupportedUnicodeVersionException, but it did not.");
} catch (UnicodeProperties.UnsupportedUnicodeVersionException e) {
// Drop the exception - it is expected.
}
}
@Test
public void testDefaultVersion() {
try {
UnicodeProperties properties = new UnicodeProperties();
IntCharSet intervals = properties.getIntCharSet("Lu");
assertWithMessage(
"intervals for 'Lu' property value for default Unicode "
+ "version should not be null\n"
+ "Supported properties: "
+ properties.getPropertyValues())
.that(intervals)
.isNotNull();
assertWithMessage("intervals for 'Lu' property value should have an interval")
.that(intervals.numIntervals() > 0)
.isTrue();
} catch (UnicodeProperties.UnsupportedUnicodeVersionException e) {
fail("Default version is unsupported: " + e);
}
}
@Test
public void testDefaultVersionAliases() {
try {
UnicodeProperties properties = new UnicodeProperties();
IntCharSet set_1 = properties.getIntCharSet("General Category : Other Letter");
assertWithMessage("Null interval set returned for " + "\\p{General Category : Other Letter}")
.that(set_1)
.isNotNull();
assertWithMessage("Empty interval set returned for " + "\\p{General Category : Other Letter}")
.that(set_1.containsElements())
.isTrue();
IntCharSet set_2 = properties.getIntCharSet("Lo");
assertWithMessage("Null interval set returned for \\p{Lo}").that(set_2).isNotNull();
assertWithMessage("Empty interval set returned for \\p{Lo}")
.that(set_1.containsElements())
.isTrue();
assertWithMessage(
"\\p{General Category : Other Letter} and \\p{Lo} should" + " return the same thing.")
.that(Objects.equals(set_1, set_2))
.isTrue();
set_1 = properties.getIntCharSet(" Script:Tibetan ");
assertWithMessage("Null interval set returned for \\p{ Script:Tibetan }")
.that(set_1)
.isNotNull();
assertWithMessage("Empty interval set returned for \\p{ Script:Tibetan }")
.that(set_1.containsElements())
.isTrue();
set_2 = properties.getIntCharSet("-_T i b t_-");
assertWithMessage("Null interval set returned for \\p{-_T i b t_-}").that(set_2).isNotNull();
assertWithMessage("Empty interval set returned for \\p{-_T i b t_-}")
.that(set_1.containsElements())
.isTrue();
assertWithMessage(
"\\p{ Script:Tibetan } and \\p{-_T i b t_-} should" + " return the same thing.")
.that(Objects.equals(set_1, set_2))
.isTrue();
} catch (UnicodeProperties.UnsupportedUnicodeVersionException e) {
fail("Default version is unsupported: " + e);
}
}
@Test
public void testCaselessMatches_1_1() {
try {
UnicodeProperties properties = new UnicodeProperties("1.1");
IntCharSet caselessMatches = properties.getCaselessMatches('i');
assertWithMessage("'i' has no caseless matches except itself, but it should.")
.that(caselessMatches)
.isNotNull();
assertWithMessage("Caseless match set for 'i' should contain 'i', but it doesn't.")
.that(caselessMatches.contains('i'))
.isTrue();
assertWithMessage("Caseless match set for 'i' should contain 'I', but it doesn't.")
.that(caselessMatches.contains('I'))
.isTrue();
assertWithMessage(
"Caseless match set for 'i' should contain 2 members, but"
+ " instead contains "
+ caselessMatches.numIntervals())
.that(caselessMatches.numIntervals() == 2)
.isTrue();
} catch (UnicodeProperties.UnsupportedUnicodeVersionException e) {
fail("Unsupported version '1.1' should be supported: " + e);
}
}
@Test
public void testCaselessMatches_2_0() {
try {
UnicodeProperties properties = new UnicodeProperties("2.0");
checkCaseless_i_matches(properties);
} catch (UnicodeProperties.UnsupportedUnicodeVersionException e) {
fail("Unsupported version '2.0' should be supported: " + e);
}
}
private static void checkCaseless_i_matches(UnicodeProperties properties) {
IntCharSet caselessMatches = properties.getCaselessMatches('i');
assertWithMessage("'i' has no caseless matches except itself, but it should.")
.that(caselessMatches)
.isNotNull();
assertWithMessage("Caseless match set for 'i' should contain 'i', but it doesn't.")
.that(caselessMatches.contains('i'))
.isTrue();
assertWithMessage("Caseless match set for 'i' should contain 'I', but it doesn't.")
.that(caselessMatches.contains('I'))
.isTrue();
assertWithMessage(
"Caseless match set for 'i' should contain uppercase 'I' with"
+ " dot above, but it doesn't.")
.that(caselessMatches.contains('\u0130'))
.isTrue();
assertWithMessage(
"Caseless match set for 'i' should contain lowercase dotless" + " 'i', but it doesn't.")
.that(caselessMatches.contains('\u0131'))
.isTrue();
int charCount = caselessMatches.size();
assertWithMessage(
"Caseless match set for 'i' should contain 4 members, but"
+ " instead contains "
+ charCount)
.that(charCount == 4)
.isTrue();
}
@Test
public void testCaselessMatches_2_1() {
try {
UnicodeProperties properties = new UnicodeProperties("2.1");
checkCaseless_i_matches(properties);
} catch (UnicodeProperties.UnsupportedUnicodeVersionException e) {
fail("Unsupported version '2.1' should be supported: " + e);
}
}
@Test
public void testCaselessMatches_3_0() {
try {
UnicodeProperties properties = new UnicodeProperties("3.0");
checkCaseless_i_matches(properties);
} catch (UnicodeProperties.UnsupportedUnicodeVersionException e) {
fail("Unsupported version '3.0' should be supported: " + e);
}
}
@Test
public void testCaselessMatches_3_1() {
try {
UnicodeProperties properties = new UnicodeProperties("3.1");
checkCaseless_i_matches(properties);
} catch (UnicodeProperties.UnsupportedUnicodeVersionException e) {
fail("Unsupported version '3.1' should be supported: " + e);
}
}
@Test
public void testCaselessMatches_3_2() {
try {
UnicodeProperties properties = new UnicodeProperties("3.2");
checkCaseless_i_matches(properties);
} catch (UnicodeProperties.UnsupportedUnicodeVersionException e) {
fail("Unsupported version '3.2' should be supported: " + e);
}
}
@Test
public void testCaselessMatches_4_0() {
try {
UnicodeProperties properties = new UnicodeProperties("4.0");
checkCaseless_i_matches(properties);
} catch (UnicodeProperties.UnsupportedUnicodeVersionException e) {
fail("Unsupported version '4.0' should be supported: " + e);
}
}
@Test
public void testCaselessMatches_4_1() {
try {
UnicodeProperties properties = new UnicodeProperties("4.1");
checkCaseless_i_matches(properties);
} catch (UnicodeProperties.UnsupportedUnicodeVersionException e) {
fail("Unsupported version '4.1' should be supported: " + e);
}
}
@Test
public void testCaselessMatches_5_0() {
try {
UnicodeProperties properties = new UnicodeProperties("5.0");
checkCaseless_i_matches(properties);
} catch (UnicodeProperties.UnsupportedUnicodeVersionException e) {
fail("Unsupported version '5.0' should be supported: " + e);
}
}
@Test
public void testSingleLetterProperties_5_0() {
try {
UnicodeProperties properties = new UnicodeProperties("5.0");
IntCharSet set_1 = properties.getIntCharSet("S");
assertWithMessage("Null interval set for \\p{S}").that(set_1).isNotNull();
assertWithMessage("Empty interval set for \\p{S}").that(set_1.containsElements()).isTrue();
IntCharSet set_2 = properties.getIntCharSet("Symbol");
assertWithMessage("Null interval set for \\p{Symbol}").that(set_2).isNotNull();
assertWithMessage("Empty interval set for \\p{Symbol}")
.that(set_2.containsElements())
.isTrue();
assertWithMessage("\\p{S} is not the same as \\p{Symbol}")
.that(Objects.equals(set_1, set_2))
.isTrue();
// 0024;DOLLAR SIGN;Sc;0;ET;;;;;N;;;;;
assertWithMessage("\\p{S} does not contain \\u0024 '\u0024' (\\p{Sc})")
.that(set_1.contains('\u0024'))
.isTrue();
// 002B;PLUS SIGN;Sm;0;ES;;;;;N;;;;;
assertWithMessage("\\p{S} does not contain \\u002B '\u002B' (\\p{Sm})")
.that(set_1.contains('\u002B'))
.isTrue();
// 005E;CIRCUMFLEX ACCENT;Sk;0;ON;;;;;N;SPACING CIRCUMFLEX;;;;
assertWithMessage("\\p{S} does not contain \\u005E '\u005E' (\\p{Sk})")
.that(set_1.contains('\u005E'))
.isTrue();
// 2196;NORTH WEST ARROW;So;0;ON;;;;;N;UPPER LEFT ARROW;;;;
assertWithMessage("\\p{S} does not contain \\u2196 (\\p{So})")
.that(set_1.contains('\u2196'))
.isTrue();
// FF04;FULLWIDTH DOLLAR SIGN;Sc;0;ET;<wide> 0024;;;;N;;;;;
assertWithMessage("\\p{S} does not contain \\uFF04 (\\p{Sc}")
.that(set_1.contains('\uFF04'))
.isTrue();
} catch (UnicodeProperties.UnsupportedUnicodeVersionException e) {
fail("Version '5.0' not supported: " + e);
}
}
@Test
public void testCaselessMatches_5_1() {
try {
UnicodeProperties properties = new UnicodeProperties("5.1");
checkCaseless_i_matches(properties);
} catch (UnicodeProperties.UnsupportedUnicodeVersionException e) {
fail("Unsupported version '5.1' should be supported: " + e);
}
}
@Test
public void testSingleLetterProperties_5_1() {
try {
UnicodeProperties properties = new UnicodeProperties("5.1");
IntCharSet set_1 = properties.getIntCharSet("S");
assertWithMessage("Null interval set for \\p{S}").that(set_1).isNotNull();
assertWithMessage("Empty interval set for \\p{S}").that(set_1.containsElements()).isTrue();
IntCharSet set_2 = properties.getIntCharSet("Symbol");
assertWithMessage("Null interval set for \\p{Symbol}").that(set_2).isNotNull();
assertWithMessage("Empty interval set for \\p{Symbol}")
.that(set_2.containsElements())
.isTrue();
assertWithMessage("\\p{S} is not the same as \\p{Symbol}")
.that(Objects.equals(set_1, set_2))
.isTrue();
// 0024;DOLLAR SIGN;Sc;0;ET;;;;;N;;;;;
assertWithMessage("\\p{S} does not contain \\u0024 '\u0024' (\\p{Sc})")
.that(set_1.contains('\u0024'))
.isTrue();
// 002B;PLUS SIGN;Sm;0;ES;;;;;N;;;;;
assertWithMessage("\\p{S} does not contain \\u002B '\u002B' (\\p{Sm})")
.that(set_1.contains('\u002B'))
.isTrue();
// 005E;CIRCUMFLEX ACCENT;Sk;0;ON;;;;;N;SPACING CIRCUMFLEX;;;;
assertWithMessage("\\p{S} does not contain \\u005E '\u005E' (\\p{Sk})")
.that(set_1.contains('\u005E'))
.isTrue();
// 2196;NORTH WEST ARROW;So;0;ON;;;;;N;UPPER LEFT ARROW;;;;
assertWithMessage("\\p{S} does not contain \\u2196 (\\p{So})")
.that(set_1.contains('\u2196'))
.isTrue();
// FF04;FULLWIDTH DOLLAR SIGN;Sc;0;ET;<wide> 0024;;;;N;;;;;
assertWithMessage("\\p{S} does not contain \\uFF04 (\\p{Sc}")
.that(set_1.contains('\uFF04'))
.isTrue();
} catch (UnicodeProperties.UnsupportedUnicodeVersionException e) {
fail("Version '5.1' not supported: " + e);
}
}
@Test
public void testCaselessMatches_5_2() {
try {
UnicodeProperties properties = new UnicodeProperties("5.2");
checkCaseless_i_matches(properties);
} catch (UnicodeProperties.UnsupportedUnicodeVersionException e) {
fail("Unsupported version '5.2' should be supported: " + e);
}
}
@Test
public void testSingleLetterProperties_5_2() {
try {
UnicodeProperties properties = new UnicodeProperties("5.2");
IntCharSet set_1 = properties.getIntCharSet("S");
assertWithMessage("Null interval set for \\p{S}").that(set_1).isNotNull();
assertWithMessage("Empty interval set for \\p{S}").that(set_1.containsElements()).isTrue();
IntCharSet set_2 = properties.getIntCharSet("Symbol");
assertWithMessage("Null interval set for \\p{Symbol}").that(set_2).isNotNull();
assertWithMessage("Empty interval set for \\p{Symbol}")
.that(set_2.containsElements())
.isTrue();
assertWithMessage("\\p{S} is not the same as \\p{Symbol}")
.that(Objects.equals(set_1, set_2))
.isTrue();
// 0024;DOLLAR SIGN;Sc;0;ET;;;;;N;;;;;
assertWithMessage("\\p{S} does not contain \\u0024 '\u0024' (\\p{Sc})")
.that(set_1.contains('\u0024'))
.isTrue();
// 002B;PLUS SIGN;Sm;0;ES;;;;;N;;;;;
assertWithMessage("\\p{S} does not contain \\u002B '\u002B' (\\p{Sm})")
.that(set_1.contains('\u002B'))
.isTrue();
// 005E;CIRCUMFLEX ACCENT;Sk;0;ON;;;;;N;SPACING CIRCUMFLEX;;;;
assertWithMessage("\\p{S} does not contain \\u005E '\u005E' (\\p{Sk})")
.that(set_1.contains('\u005E'))
.isTrue();
// 2196;NORTH WEST ARROW;So;0;ON;;;;;N;UPPER LEFT ARROW;;;;
assertWithMessage("\\p{S} does not contain \\u2196 (\\p{So})")
.that(set_1.contains('\u2196'))
.isTrue();
// FF04;FULLWIDTH DOLLAR SIGN;Sc;0;ET;<wide> 0024;;;;N;;;;;
assertWithMessage("\\p{S} does not contain \\uFF04 (\\p{Sc}")
.that(set_1.contains('\uFF04'))
.isTrue();
} catch (UnicodeProperties.UnsupportedUnicodeVersionException e) {
fail("Version '5.2' not supported: " + e);
}
}
@Test
public void testCaselessMatches_6_0() {
try {
UnicodeProperties properties = new UnicodeProperties("6.0");
checkCaseless_i_matches(properties);
} catch (UnicodeProperties.UnsupportedUnicodeVersionException e) {
fail("Unsupported version '6.0' should be supported: " + e);
}
}
@Test
public void testSingleLetterProperties_6_0() {
try {
UnicodeProperties properties = new UnicodeProperties("6.0");
IntCharSet set_1 = properties.getIntCharSet("S");
assertWithMessage("Null interval set for \\p{S}").that(set_1).isNotNull();
assertWithMessage("Empty interval set for \\p{S}").that(set_1.containsElements()).isTrue();
IntCharSet set_2 = properties.getIntCharSet("Symbol");
assertWithMessage("Null interval set for \\p{Symbol}").that(set_2).isNotNull();
assertWithMessage("Empty interval set for \\p{Symbol}")
.that(set_2.containsElements())
.isTrue();
assertWithMessage("\\p{S} is not the same as \\p{Symbol}")
.that(Objects.equals(set_1, set_2))
.isTrue();
// 0024;DOLLAR SIGN;Sc;0;ET;;;;;N;;;;;
assertWithMessage("\\p{S} does not contain \\u0024 '\u0024' (\\p{Sc})")
.that(set_1.contains('\u0024'))
.isTrue();
// 002B;PLUS SIGN;Sm;0;ES;;;;;N;;;;;
assertWithMessage("\\p{S} does not contain \\u002B '\u002B' (\\p{Sm})")
.that(set_1.contains('\u002B'))
.isTrue();
// 005E;CIRCUMFLEX ACCENT;Sk;0;ON;;;;;N;SPACING CIRCUMFLEX;;;;
assertWithMessage("\\p{S} does not contain \\u005E '\u005E' (\\p{Sk})")
.that(set_1.contains('\u005E'))
.isTrue();
// 2196;NORTH WEST ARROW;So;0;ON;;;;;N;UPPER LEFT ARROW;;;;
assertWithMessage("\\p{S} does not contain \\u2196 (\\p{So})")
.that(set_1.contains('\u2196'))
.isTrue();
// FF04;FULLWIDTH DOLLAR SIGN;Sc;0;ET;<wide> 0024;;;;N;;;;;
assertWithMessage("\\p{S} does not contain \\uFF04 (\\p{Sc}")
.that(set_1.contains('\uFF04'))
.isTrue();
} catch (UnicodeProperties.UnsupportedUnicodeVersionException e) {
fail("Version '6.0' not supported: " + e);
}
}
}