IntCharSetTest.java
/*
* Copyright (C) 2022, Gerwin Klein, R��gis D��camps, Steve Rowe
* SPDX-License-Identifier: BSD-3-Clause
*/
package jflex.core.unicode;
import static com.google.common.truth.Truth.assertThat;
import static com.google.common.truth.Truth.assertWithMessage;
import jflex.chars.Interval;
import org.junit.Test;
public class IntCharSetTest {
@Test
public void add_overlappingIntervals1() {
IntCharSet set = new IntCharSet();
set.add(new Interval('a', 'h'));
set.add(new Interval('o', 'z'));
set.add(new Interval('A', 'Z'));
set.add(new Interval('h', 'o'));
assertThat(set).isEqualTo(IntCharSet.of(new Interval('A', 'Z'), new Interval('a', 'z')));
}
@Test
public void add_overlappingIntervals2() {
IntCharSet set = new IntCharSet();
set.add(new Interval('a', 'h'));
set.add(new Interval('o', 'z'));
set.add(new Interval('A', 'Z'));
set.add(new Interval('i', 'n'));
assertThat(set).isEqualTo(IntCharSet.of(new Interval('A', 'Z'), new Interval('a', 'z')));
}
@Test
public void add_overlappingIntervals3() {
IntCharSet set = new IntCharSet();
set.add(new Interval('a', 'h'));
set.add(new Interval('o', 'z'));
set.add(new Interval('A', 'Z'));
set.add(new Interval('a', 'n'));
assertThat(set).isEqualTo(IntCharSet.of(new Interval('A', 'Z'), new Interval('a', 'z')));
}
@Test
public void add_lastCharacter() {
IntCharSet set = IntCharSet.ofCharacterRange('a', 'k');
assertThat(set).isEqualTo(IntCharSet.ofCharacterRange('a', 'k'));
set.add('l');
assertThat(set).isEqualTo(IntCharSet.ofCharacterRange('a', 'l'));
}
@Test
public void add_firstCharacter() {
IntCharSet set = new IntCharSet();
set.add(new Interval('o', 'z'));
set.add('n');
assertThat(set).isEqualTo(IntCharSet.ofCharacterRange('n', 'z'));
}
@Test
public void add_disjointCharacter() {
IntCharSet set = new IntCharSet();
set.add(new Interval('x', 'z'));
set.add(new Interval('a', 'c'));
set.add('n');
assertThat(set)
.isEqualTo(
IntCharSet.of(
new Interval('a', 'c'), Interval.ofCharacter('n'), new Interval('x', 'z')));
}
@Test
public void copy() {
IntCharSet set = IntCharSet.of(new Interval('a', 'z'));
IntCharSet copy = IntCharSet.copyOf(set);
Interval i = set.getIntervals().get(0);
i.end = 'X';
assertThat(copy).isNotEqualTo(set);
}
@Test
public void getCaseless() throws Exception {
UnicodeProperties unicodeProperties = new UnicodeProperties("4.0");
IntCharSet set = new IntCharSet();
set.add(new Interval('a', 'c'));
set.add(new Interval('h', 'o'));
// From <http://unicode.org/Public/4.0-Update1/UnicodeData-4.0.1.txt>:
//
// 0049;LATIN CAPITAL LETTER I;Lu;0;L;;;;;N;;;;0069;
// 0069;LATIN SMALL LETTER I;Ll;0;L;;;;;N;;;0049;;0049
// 0130;LATIN CAPITAL LETTER I WITH DOT ABOVE;Lu;0;L;0049 0307;;;;N;LATIN CAPITAL LETTER I
// DOT;;;0069;
// 0131;LATIN SMALL LETTER DOTLESS I;Ll;0;L;;;;;N;;;0049;;0049
//
// 006B;LATIN SMALL LETTER K;Ll;0;L;;;;;N;;;004B;;004B
// 212A;KELVIN SIGN;Lu;0;L;004B;;;;N;DEGREES KELVIN;;;006B;
assertWithMessage(
"The caseless version should be ['A'-'C']['H'-'O']['a'-'c']['h'-'o'][304-305][8490]")
.that(set.getCaseless(unicodeProperties))
.isEqualTo(
IntCharSet.of(
new Interval('A', 'C'),
new Interval('H', 'O'),
new Interval('a', 'c'),
new Interval('h', 'o'),
new Interval(0x130, 0x131),
Interval.ofCharacter(0x212A)));
}
@Test
public void testToString() {
IntCharSet set =
IntCharSet.of(
new Interval('A', 'C'),
new Interval('H', 'O'),
new Interval('a', 'c'),
new Interval('h', 'o'),
new Interval('��', '��'), // http://www.fileformat.info/info/unicode/char/130/
Interval.ofCharacter('���') // http://www.fileformat.info/info/unicode/char/212A/
);
assertThat(set.toString()).isEqualTo("{ ['A'-'C']['H'-'O']['a'-'c']['h'-'o'][304-305][8490] }");
}
@Test
public void addIntCharSet() {
IntCharSet set = IntCharSet.ofCharacterRange(0, 10);
set.add(IntCharSet.of(new Interval(11, 20), new Interval(41, 42)));
assertThat(set).isEqualTo(IntCharSet.of(new Interval(0, 20), new Interval(41, 42)));
}
@Test
public void sub() {
IntCharSet a = IntCharSet.of(new Interval(1, 7), new Interval(10, 42));
a.sub(IntCharSet.ofCharacterRange(4, 7));
a.sub(IntCharSet.ofCharacterRange(10, 41));
assertThat(a).isEqualTo(IntCharSet.of(new Interval(1, 3), Interval.ofCharacter(42)));
}
@Test
public void contains() {
IntCharSet a = IntCharSet.of(new Interval(3, 7), new Interval(10, 15));
IntCharSet b = IntCharSet.ofCharacterRange(4, 6);
IntCharSet c = IntCharSet.ofCharacterRange(1, 5);
IntCharSet d = IntCharSet.ofCharacterRange(1, 20);
IntCharSet e = IntCharSet.of(new Interval(4, 6), new Interval(10, 15));
assertThat(a.contains(b)).isTrue();
assertThat(a.contains(e)).isTrue();
assertThat(a.contains(c)).isFalse();
assertThat(a.contains(d)).isFalse();
}
}