WikiNoun.java
package org.atteo.evo.inflector;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class WikiNoun {
private static final Pattern enNounPattern = Pattern.compile("\\{\\{en-noun((\\|[\\-\\+~!?a-z\\[\\] =]+)*)\\}\\}");
private final String singular;
private final List<String> plurals = new ArrayList<>();
private final String ennoun;
private NounType nounType;
public WikiNoun(String word, String ennoun) {
this.singular = word;
this.ennoun = ennoun;
calculatePlurals(singular, ennoun);
}
private void calculatePlurals(String singular, String ennoun) {
String[] split = ennoun.split("\\|");
for (int i = 1; i < split.length; i++) {
if (split[i].startsWith("head=")) {
continue;
}
Noun noun = interpretInflection(singular, split[i]);
if (nounType == null) {
// first entry is the most common
nounType = noun.type;
}
plurals.add(noun.plural);
}
if (plurals.isEmpty()) {
plurals.add(defaultPlural(singular));
}
}
static class Noun {
NounType type;
String plural;
private Noun(NounType type, String plural) {
this.type = type;
this.plural = plural;
}
public static Noun countable(String plural) {
return new Noun(NounType.COUNTABLE, plural);
}
public static Noun uncountable(String plural) {
return new Noun(NounType.UNCOUNTABLE, plural);
}
public static Noun pluralNotAttested() {
return new Noun(NounType.PLURAL_NOT_ATTESTED, "");
}
public static Noun unknownPlural() {
return new Noun(NounType.UNKNOWN_PLURAL, "");
}
}
private Noun interpretInflection(String singular, String inflection) {
if ("-".equals(inflection)) {
return Noun.uncountable(singular);
}
if ("~".equals(inflection)) {
return Noun.countable(defaultPlural(singular));
}
if ("+".equals(inflection)) {
return Noun.countable(defaultPlural(singular));
}
if ("!".equals(inflection)) {
return Noun.pluralNotAttested();
}
// unknown or uncertain plural
if ("?".equals(inflection)) {
return Noun.unknownPlural();
}
if ("s".equals(inflection)) {
return Noun.countable(singular + "s");
}
if ("es".equals(inflection)) {
return Noun.countable(singular + "es");
}
return Noun.countable(inflection);
}
public String singular() {
return singular;
}
public List<String> plurals() {
return plurals;
}
public boolean isCountable() {
return nounType == NounType.COUNTABLE;
}
public boolean isUncountable() {
return nounType == NounType.UNCOUNTABLE;
}
public boolean isPluralUnknown() {
return nounType == NounType.UNKNOWN_PLURAL;
}
public boolean isPluralNotAttested() {
return nounType == NounType.PLURAL_NOT_ATTESTED;
}
public String ennoun() {
return ennoun;
}
private static String defaultPlural(String singular) {
if (singular.matches(".*(s|x|z|sh|ch)$")) {
return singular + "es";
} else {
String plural = new RegExpRule("([aeiou])y$", "$1ys").getPlural(singular);
if (plural != null) {
return plural;
}
plural = new RegExpRule("y$", "ies").getPlural(singular);
if (plural != null) {
return plural;
}
return new RegExpRule("$", "s").getPlural(singular);
}
}
public static List<WikiNoun> find(Page page) {
Matcher matcher = enNounPattern.matcher(page.getRevision().getText());
List<WikiNoun> nouns = new ArrayList<>();
while (matcher.find()) {
nouns.add(new WikiNoun(page.getTitle(), matcher.group(1)));
}
return nouns;
}
}