EnglishInflectorTest.java
/*
* Copyright 2011 Atteo.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/
package org.atteo.evo.inflector;
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.assertj.core.api.Assertions.assertThat;
import java.io.BufferedWriter;
import java.io.IOException;
import java.net.URLEncoder;
import java.nio.file.Files;
import java.nio.file.Path;
import java.time.LocalDate;
import java.util.Optional;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
import org.junit.jupiter.api.Test;
class EnglishInflectorTest {
private static final Path INCORRECT_COUNTABLE_REPORT = Path.of("target/reports/incorrect-countable.md");
private final English inflector = new English();
@Test
void wiktionaryTest() throws Exception {
var all = new AtomicInteger();
var countable = new AtomicInteger();
var correctCountable = new AtomicInteger();
var uncountable = new AtomicInteger();
var correctUncountable = new AtomicInteger();
var pluralNotAttested = new AtomicInteger();
var pluralUnknown = new AtomicInteger();
Files.createDirectories(INCORRECT_COUNTABLE_REPORT.getParent());
try (BufferedWriter incorrectCountable = Files.newBufferedWriter(INCORRECT_COUNTABLE_REPORT, UTF_8)) {
incorrectCountable.append("|Singular|Evo-Inflector plural|Wiktionary plurals|\n");
incorrectCountable.append("|--------|--------------------|------------------|\n");
new WiktionaryCorpus().forEach(wikiNouns -> {
all.getAndIncrement();
var calculatedPlural = inflector.getPlural(wikiNouns.get(0).singular());
Optional<WikiNoun> correctNoun = wikiNouns.stream()
.filter(noun -> noun.plurals().contains(calculatedPlural))
.findFirst();
var correct = correctNoun.isPresent();
var wikiNoun = correctNoun.orElse(wikiNouns.get(0));
if (wikiNoun.isUncountable()) {
uncountable.getAndIncrement();
if (correct) {
correctUncountable.getAndIncrement();
}
return;
}
if (wikiNoun.isPluralNotAttested()) {
pluralNotAttested.getAndIncrement();
return;
}
if (wikiNoun.isPluralUnknown()) {
pluralUnknown.getAndIncrement();
return;
}
countable.getAndIncrement();
if (correct) {
correctCountable.getAndIncrement();
return;
}
try {
var wiktionaryPlurals = wikiNouns.stream()
.flatMap(noun -> noun.plurals().stream())
.collect(Collectors.joining(","));
String uriEncodedSingular = URLEncoder.encode(wikiNoun.singular(), UTF_8);
incorrectCountable.append("|" + wikiNoun.singular() + " | " + calculatedPlural + " | ["
+ wiktionaryPlurals + "](https://en.wiktionary.org/wiki/" + uriEncodedSingular + ") |\n");
} catch (IOException e) {
throw new RuntimeException(e);
}
});
}
printSummary(countable, correctCountable, uncountable, correctUncountable, pluralNotAttested, pluralUnknown);
}
private void printSummary(
AtomicInteger countable,
AtomicInteger correctCountable,
AtomicInteger uncountable,
AtomicInteger correctUncountable,
AtomicInteger pluralNotAttested,
AtomicInteger pluralUnknown) {
var all = countable.get() + uncountable.get() + pluralNotAttested.get() + pluralUnknown.get();
System.out.println("");
System.out.println("There are (" + LocalDate.now().toString() + ") " + all
+ " single word nouns in the English Wiktionary of which:");
System.out.println("- " + percent(countable.get(), all) + " are countable nouns,");
System.out.println("- " + percent(uncountable.get(), all) + " are uncountable nouns,");
System.out.println("- for " + percent(pluralUnknown.get(), all) + " nouns plural is unknown,");
System.out.println("- for " + percent(pluralNotAttested.get(), all) + " nouns plural is not attested.");
System.out.println("");
System.out.println("Evo Inflector returns correct answer for: ");
System.out.println("- " + percent(correctCountable.get(), countable.get())
+ " of all countable nouns, see [this report](target/reports/incorrect-countable.md),");
System.out.println(
"- but only for " + percent(correctUncountable.get(), uncountable.get()) + " of uncountable nouns.");
System.out.println("In overall it returns correct answer for "
+ percent(correctCountable.get() + correctUncountable.get(), all) + " of all nouns");
System.out.println("");
}
private String percent(int count, int all) {
var percent = count * 100 / (float) all;
return percent + "% (" + count + ")";
}
@Test
void exampleWordList() {
check(new String[][] {
{"alga", "algae"},
{"nova", "novas"},
{"dogma", "dogmas"},
{"Woman", "Women"},
{"church", "churches"},
{"quick_chateau", "quick_chateaus"},
{"codex", "codices"},
{"index", "indexes"},
{"basis", "bases"},
{"iris", "irises"},
{"phalanx", "phalanxes"},
{"tempo", "tempos"},
{"foot", "feet"},
{"series", "series"},
{"wish", "wishes"},
{"Bacterium", "Bacteria"},
{"medium", "mediums"},
{"Genus", "Genera"},
{"stimulus", "stimuli"},
{"opus", "opuses"},
{"status", "statuses"},
{"Box", "Boxes"},
{"ferry", "ferries"},
{"protozoon", "protozoa"},
{"cherub", "cherubs"},
{"human", "humans"},
{"sugar", "sugar"},
{"virus", "viruses"},
{"gastrostomy", "gastrostomies"},
{"baculum", "bacula"},
{"pancreas", "pancreases"},
{"todo", "todos"},
{"person", "persons"},
{"baculumulum", "baculumula"}, // https://github.com/atteo/evo-inflector/pull/18
{"", ""},
});
}
@Test
void shouldPreserveCapitalLetters() {
check(new String[][] {
{"Milieu", "Milieus"},
{"NightWolf", "NightWolves"},
{"WorldAtlas", "WorldAtlases"},
{"SMS", "SMSes"},
{"bacuLum", "bacuLa"},
{"alO", "alOes"},
{"luO", "luOs"},
{"boY", "boYs"},
{"Foot", "Feet"},
{"Goose", "Geese"},
{"Man", "Men"},
{"Tooth", "Teeth"},
});
}
@Test
void withCount() {
assertThat(inflector.getPlural("cat", 1)).isEqualTo("cat");
assertThat(inflector.getPlural("cat", 2)).isEqualTo("cats");
assertThat(inflector.getPlural("demoness", 1)).isEqualTo("demoness");
assertThat(inflector.getPlural("demoness", 2)).isEqualTo("demonesses");
}
@Test
void staticMethods() {
assertThat(English.plural("sulfimide")).isEqualTo("sulfimides");
assertThat(English.plural("semifluid", 2)).isEqualTo("semifluids");
}
private void check(String[][] list) {
for (String[] pair : list) {
check(pair[0], pair[1]);
}
}
private void check(String singular, String plural) {
assertThat(inflector.getPlural(singular)).isEqualTo(plural);
}
}