CompoundCharacterTokenizerTest.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.fontbox.ttf.gsub;

import static org.junit.jupiter.api.Assertions.assertEquals;

import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;

import org.junit.jupiter.api.Test;

class CompoundCharacterTokenizerTest
{
    @Test
    void testTokenize_happyPath_2()
    {

        // given
        CompoundCharacterTokenizer tokenizer = new CompoundCharacterTokenizer(
                new HashSet<>(Arrays.asList(new String[] { "_84_93_", "_104_82_", "_104_87_" })));
        String text = "_84_112_93_104_82_61_96_102_93_104_87_110_";

        // when
        List<String> tokens = tokenizer.tokenize(text);

        // then
        assertEquals(Arrays.asList("_84_112_93", "_104_82_", "_61_96_102_93", "_104_87_", "_110_"),
                tokens);
    }

    @Test
    void testTokenize_happyPath_3()
    {

        // given
        CompoundCharacterTokenizer tokenizer = new CompoundCharacterTokenizer(
                new HashSet<>(Arrays.asList(new String[] { "_67_112_96_", "_74_112_76_" })));
        String text = "_67_112_96_103_93_108_93_";

        // when
        List<String> tokens = tokenizer.tokenize(text);

        // then
        assertEquals(Arrays.asList("_67_112_96_", "_103_93_108_93_"), tokens);
    }

    @Test
    void testTokenize_happyPath_4()
    {

        // given
        CompoundCharacterTokenizer tokenizer = new CompoundCharacterTokenizer(
                new HashSet<>(Arrays.asList(new String[] { "_67_112_96_", "_74_112_76_" })));
        String text = "_94_67_112_96_112_91_103_";

        // when
        List<String> tokens = tokenizer.tokenize(text);

        // then
        assertEquals(Arrays.asList("_94", "_67_112_96_", "_112_91_103_"), tokens);
    }

    @Test
    void testTokenize_happyPath_5()
    {

        // given
        CompoundCharacterTokenizer tokenizer = new CompoundCharacterTokenizer(
                new HashSet<>(Arrays.asList(new String[] { "_67_112_", "_76_112_" })));
        String text = "_94_167_112_91_103_";

        // when
        List<String> tokens = tokenizer.tokenize(text);

        // then
        assertEquals(Arrays.asList("_94_167_112_91_103_"), tokens);
    }
    
    @Test
    void testTokenize_happyPath_6()
    {

        // given
        CompoundCharacterTokenizer tokenizer = new CompoundCharacterTokenizer(
                new HashSet<>(Arrays.asList("_100_", "_101_", "_102_", "_103_", "_104_")));
        String text = "_100_101_102_103_104_";

        // when
        List<String> tokens = tokenizer.tokenize(text);

        // then
        assertEquals(Arrays.asList("_100_", "_101_", "_102_", "_103_", "_104_"), tokens);
    }

    @Test
    void testTokenize_happyPath_7()
    {

        // given
        CompoundCharacterTokenizer tokenizer = new CompoundCharacterTokenizer(
                new HashSet<>(Arrays.asList("_100_101_", "_102_", "_103_104_")));
        String text = "_100_101_102_103_104_";

        // when
        List<String> tokens = tokenizer.tokenize(text);

        // then
        assertEquals(Arrays.asList("_100_101_", "_102_", "_103_104_"), tokens);
    }

    @Test
    void testTokenize_happyPath_8()
    {
        // given
        CompoundCharacterTokenizer tokenizer = new CompoundCharacterTokenizer(
                new HashSet<>(Arrays.asList("_100_101_102_", "_101_102_", "_103_104_")));
        String text = "_100_101_102_103_104_";

        // when
        List<String> tokens = tokenizer.tokenize(text);

        // then
        assertEquals(Arrays.asList("_100_101_102_", "_103_104_"), tokens);
    }

    @Test
    void testTokenize_happyPath_9()
    {
        // given
        CompoundCharacterTokenizer tokenizer = new CompoundCharacterTokenizer(
                new HashSet<>(Arrays.asList("_101_102_", "_101_102_")));
        String text = "_100_101_102_103_104_";

        // when
        List<String> tokens = tokenizer.tokenize(text);

        // then
        assertEquals(Arrays.asList("_100", "_101_102_", "_103_104_"), tokens);
    }

    @Test
    void testTokenize_happyPath_10()
    {
        // given
        CompoundCharacterTokenizer tokenizer = new CompoundCharacterTokenizer(
                new HashSet<>(Arrays.asList("_201_", "_202_")));
        String text = "_100_101_102_103_104_";

        // when
        List<String> tokens = tokenizer.tokenize(text);

        // then
        assertEquals(Collections.singletonList("_100_101_102_103_104_"), tokens);
    }

}