TestCharDFAConversion.java

/*
 * [The "BSD license"]
 *  Copyright (c) 2010 Terence Parr
 *  All rights reserved.
 *
 *  Redistribution and use in source and binary forms, with or without
 *  modification, are permitted provided that the following conditions
 *  are met:
 *  1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *  2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *  3. The name of the author may not be used to endorse or promote products
 *      derived from this software without specific prior written permission.
 *
 *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
package org.antlr.test;

import org.antlr.analysis.DFA;
import org.antlr.analysis.DFAOptimizer;
import org.antlr.codegen.CodeGenerator;
import org.antlr.tool.*;
import org.junit.Test;

import java.util.List;

import static org.junit.Assert.*;

public class TestCharDFAConversion extends BaseTest {

	/** Public default constructor used by TestRig */
	public TestCharDFAConversion() {
	}

	// R A N G E S  &  S E T S

	@Test public void testSimpleRangeVersusChar() throws Exception {
		Grammar g = new Grammar(
			"lexer grammar t;\n"+
			"A : 'a'..'z' '@' | 'k' '$' ;");
		g.createLookaheadDFAs();
		String expecting =
			".s0-'k'->.s1\n" +
			".s0-{'a'..'j', 'l'..'z'}->:s2=>1\n" +
			".s1-'$'->:s3=>2\n" +
			".s1-'@'->:s2=>1\n";
		checkDecision(g, 1, expecting, null);
	}

	@Test public void testRangeWithDisjointSet() throws Exception {
		Grammar g = new Grammar(
			"lexer grammar t;\n"+
			"A : 'a'..'z' '@'\n" +
			"  | ('k'|'9'|'p') '$'\n" +
			"  ;\n");
		g.createLookaheadDFAs();
		// must break up a..z into {'a'..'j', 'l'..'o', 'q'..'z'}
		String expecting =
			".s0-'9'->:s3=>2\n" +
			".s0-{'a'..'j', 'l'..'o', 'q'..'z'}->:s2=>1\n" +
			".s0-{'k', 'p'}->.s1\n" +
			".s1-'$'->:s3=>2\n" +
			".s1-'@'->:s2=>1\n";
		checkDecision(g, 1, expecting, null);
	}

	@Test public void testDisjointSetCollidingWithTwoRanges() throws Exception {
		Grammar g = new Grammar(
			"lexer grammar t;\n"+
			"A : ('a'..'z'|'0'..'9') '@'\n" +
			"  | ('k'|'9'|'p') '$'\n" +
			"  ;\n");
		g.createLookaheadDFAs(false);
		// must break up a..z into {'a'..'j', 'l'..'o', 'q'..'z'} and 0..9
		// into 0..8
		String expecting =
			".s0-{'0'..'8', 'a'..'j', 'l'..'o', 'q'..'z'}->:s2=>1\n" +
			".s0-{'9', 'k', 'p'}->.s1\n" +
			".s1-'$'->:s3=>2\n" +
			".s1-'@'->:s2=>1\n";
		checkDecision(g, 1, expecting, null);
	}

	@Test public void testDisjointSetCollidingWithTwoRangesCharsFirst() throws Exception {
		Grammar g = new Grammar(
			"lexer grammar t;\n"+
			"A : ('k'|'9'|'p') '$'\n" +
			"  | ('a'..'z'|'0'..'9') '@'\n" +
			"  ;\n");
		// must break up a..z into {'a'..'j', 'l'..'o', 'q'..'z'} and 0..9
		// into 0..8
		String expecting =
			".s0-{'0'..'8', 'a'..'j', 'l'..'o', 'q'..'z'}->:s3=>2\n" +
			".s0-{'9', 'k', 'p'}->.s1\n" +
			".s1-'$'->:s2=>1\n" +
			".s1-'@'->:s3=>2\n";
		checkDecision(g, 1, expecting, null);
	}

	@Test public void testDisjointSetCollidingWithTwoRangesAsSeparateAlts() throws Exception {
		Grammar g = new Grammar(
			"lexer grammar t;\n"+
			"A : 'a'..'z' '@'\n" +
			"  | 'k' '$'\n" +
			"  | '9' '$'\n" +
			"  | 'p' '$'\n" +
			"  | '0'..'9' '@'\n" +
			"  ;\n");
		// must break up a..z into {'a'..'j', 'l'..'o', 'q'..'z'} and 0..9
		// into 0..8
		String expecting =
			".s0-'0'..'8'->:s8=>5\n" +
			".s0-'9'->.s6\n" +
			".s0-'k'->.s1\n" +
			".s0-'p'->.s4\n" +
			".s0-{'a'..'j', 'l'..'o', 'q'..'z'}->:s2=>1\n" +
			".s1-'$'->:s3=>2\n" +
			".s1-'@'->:s2=>1\n" +
			".s4-'$'->:s5=>4\n" +
			".s4-'@'->:s2=>1\n" +
			".s6-'$'->:s7=>3\n" +
			".s6-'@'->:s8=>5\n";
		checkDecision(g, 1, expecting, null);
	}

	@Test public void testKeywordVersusID() throws Exception {
		Grammar g = new Grammar(
			"lexer grammar t;\n"+
			"IF : 'if' ;\n" + // choose this over ID
			"ID : ('a'..'z')+ ;\n");
		String expecting =
			".s0-'a'..'z'->:s2=>1\n" +
			".s0-<EOT>->:s1=>2\n";
		checkDecision(g, 1, expecting, null);
		expecting =
			".s0-'i'->.s1\n" +
			".s0-{'a'..'h', 'j'..'z'}->:s4=>2\n" +
			".s1-'f'->.s2\n" +
			".s1-<EOT>->:s4=>2\n" +
			".s2-'a'..'z'->:s4=>2\n" +
			".s2-<EOT>->:s3=>1\n";
		checkDecision(g, 2, expecting, null);
	}

	@Test public void testIdenticalRules() throws Exception {
		Grammar g = new Grammar(
			"lexer grammar t;\n"+
			"A : 'a' ;\n" +
			"B : 'a' ;\n"); // can't reach this
		String expecting =
			".s0-'a'->.s1\n" +
			".s1-<EOT>->:s2=>1\n";

		ErrorQueue equeue = new ErrorQueue();
		ErrorManager.setErrorListener(equeue);

		checkDecision(g, 1, expecting, new int[] {2});

		assertEquals("unexpected number of expected problems",
				    1, equeue.size());
		Message msg = equeue.errors.get(0);
		assertTrue("warning must be an unreachable alt",
				    msg instanceof GrammarUnreachableAltsMessage);
		GrammarUnreachableAltsMessage u = (GrammarUnreachableAltsMessage)msg;
		assertEquals("[2]", u.alts.toString());

	}

	@Test public void testAdjacentNotCharLoops() throws Exception {
		Grammar g = new Grammar(
			"lexer grammar t;\n"+
			"A : (~'r')+ ;\n" +
			"B : (~'s')+ ;\n");
		String expecting =
			".s0-'r'->:s3=>2\n" +
			".s0-'s'->:s2=>1\n" +
			".s0-{'\\u0000'..'q', 't'..'\\uFFFF'}->.s1\n" +
			".s1-'r'->:s3=>2\n" +
			".s1-<EOT>->:s2=>1\n" +
			".s1-{'\\u0000'..'q', 't'..'\\uFFFF'}->.s1\n";
		checkDecision(g, 3, expecting, null);
	}

	@Test public void testNonAdjacentNotCharLoops() throws Exception {
		Grammar g = new Grammar(
			"lexer grammar t;\n"+
			"A : (~'r')+ ;\n" +
			"B : (~'t')+ ;\n");
		String expecting =
			".s0-'r'->:s3=>2\n" +
			".s0-'t'->:s2=>1\n" +
			".s0-{'\\u0000'..'q', 's', 'u'..'\\uFFFF'}->.s1\n" +
			".s1-'r'->:s3=>2\n" +
			".s1-<EOT>->:s2=>1\n" +
			".s1-{'\\u0000'..'q', 's', 'u'..'\\uFFFF'}->.s1\n";
		checkDecision(g, 3, expecting, null);
	}

	@Test public void testLoopsWithOptimizedOutExitBranches() throws Exception {
		Grammar g = new Grammar(
			"lexer grammar t;\n"+
			"A : 'x'* ~'x'+ ;\n");
		String expecting =
			".s0-'x'->:s1=>1\n" +
			".s0-{'\\u0000'..'w', 'y'..'\\uFFFF'}->:s2=>2\n";
		checkDecision(g, 1, expecting, null);

		// The optimizer yanks out all exit branches from EBNF blocks
		// This is ok because we've already verified there are no problems
		// with the enter/exit decision
		DFAOptimizer optimizer = new DFAOptimizer(g);
		optimizer.optimize();
		FASerializer serializer = new FASerializer(g);
		DFA dfa = g.getLookaheadDFA(1);
		String result = serializer.serialize(dfa.startState);
		expecting = ".s0-'x'->:s1=>1\n";
		assertEquals(expecting, result);
	}

	// N O N G R E E D Y

	@Test public void testNonGreedy() throws Exception {
		Grammar g = new Grammar(
			"lexer grammar t;\n"+
			"CMT : '/*' ( options {greedy=false;} : . )* '*/' ;");
		String expecting =
			".s0-'*'->.s1\n" +
			".s0-{'\\u0000'..')', '+'..'\\uFFFF'}->:s3=>1\n" +
			".s1-'/'->:s2=>2\n" +
			".s1-{'\\u0000'..'.', '0'..'\\uFFFF'}->:s3=>1\n";
		checkDecision(g, 1, expecting, null);
	}

	@Test public void testNonGreedyWildcardStar() throws Exception {
		Grammar g = new Grammar(
			"lexer grammar t;\n"+
			"SLCMT : '//' ( options {greedy=false;} : . )* '\n' ;");
		String expecting =
			".s0-'\\n'->:s1=>2\n" +
			".s0-{'\\u0000'..'\\t', '\\u000B'..'\\uFFFF'}->:s2=>1\n";
		checkDecision(g, 1, expecting, null);
	}

	@Test public void testNonGreedyByDefaultWildcardStar() throws Exception {
		Grammar g = new Grammar(
			"lexer grammar t;\n"+
			"SLCMT : '//' .* '\n' ;");
		String expecting =
			".s0-'\\n'->:s1=>2\n" +
			".s0-{'\\u0000'..'\\t', '\\u000B'..'\\uFFFF'}->:s2=>1\n";
		checkDecision(g, 1, expecting, null);
	}

	@Test public void testNonGreedyWildcardPlus() throws Exception {
		// same DFA as nongreedy .* but code gen checks number of
		// iterations at runtime
		Grammar g = new Grammar(
			"lexer grammar t;\n"+
			"SLCMT : '//' ( options {greedy=false;} : . )+ '\n' ;");
		String expecting =
			".s0-'\\n'->:s1=>2\n" +
			".s0-{'\\u0000'..'\\t', '\\u000B'..'\\uFFFF'}->:s2=>1\n";
		checkDecision(g, 1, expecting, null);
	}

	@Test public void testNonGreedyByDefaultWildcardPlus() throws Exception {
		Grammar g = new Grammar(
			"lexer grammar t;\n"+
			"SLCMT : '//' .+ '\n' ;");
		String expecting =
			".s0-'\\n'->:s1=>2\n" +
			".s0-{'\\u0000'..'\\t', '\\u000B'..'\\uFFFF'}->:s2=>1\n";
		checkDecision(g, 1, expecting, null);
	}

	@Test public void testNonGreedyByDefaultWildcardPlusWithParens() throws Exception {
		Grammar g = new Grammar(
			"lexer grammar t;\n"+
			"SLCMT : '//' (.)+ '\n' ;");
		String expecting =
			".s0-'\\n'->:s1=>2\n" +
			".s0-{'\\u0000'..'\\t', '\\u000B'..'\\uFFFF'}->:s2=>1\n";
		checkDecision(g, 1, expecting, null);
	}

	@Test public void testNonWildcardNonGreedy() throws Exception {
		Grammar g = new Grammar(
			"lexer grammar t;\n"+
			"DUH : (options {greedy=false;}:'x'|'y')* 'xy' ;");
		String expecting =
			".s0-'x'->.s1\n" +
			".s0-'y'->:s4=>2\n" +
			".s1-'x'->:s3=>1\n" +
			".s1-'y'->:s2=>3\n";
		checkDecision(g, 1, expecting, null);
	}

	@Test public void testNonWildcardEOTMakesItWorkWithoutNonGreedyOption() throws Exception {
		Grammar g = new Grammar(
			"lexer grammar t;\n"+
			"DUH : ('x'|'y')* 'xy' ;");
		String expecting =
			".s0-'x'->.s1\n" +
			".s0-'y'->:s4=>1\n" +
			".s1-'x'->:s4=>1\n" +
			".s1-'y'->.s2\n" +
			".s2-'x'..'y'->:s4=>1\n" +
			".s2-<EOT>->:s3=>2\n";
		checkDecision(g, 1, expecting, null);
	}

	@Test public void testAltConflictsWithLoopThenExit() throws Exception {
		// \" predicts alt 1, but wildcard then " can predict exit also
		Grammar g = new Grammar(
			"lexer grammar t;\n"+
			"STRING : '\"' (options {greedy=false;}: '\\\\\"' | .)* '\"' ;\n"
		);
		String expecting =
			".s0-'\"'->:s1=>3\n" +
				".s0-'\\\\'->.s2\n" +
				".s0-{'\\u0000'..'!', '#'..'[', ']'..'\\uFFFF'}->:s4=>2\n" +
				".s2-'\"'->:s3=>1\n" +
				".s2-{'\\u0000'..'!', '#'..'\\uFFFF'}->:s4=>2\n";
		checkDecision(g, 1, expecting, null);
	}

	@Test public void testNonGreedyLoopThatNeverLoops() throws Exception {
		Grammar g = new Grammar(
			"lexer grammar t;\n"+
			"DUH : (options {greedy=false;}:'x')+ ;"); // loop never matched
		String expecting =
			":s0=>2\n";

		ErrorQueue equeue = new ErrorQueue();
		ErrorManager.setErrorListener(equeue);

		checkDecision(g, 1, expecting, new int[] {1});

		assertEquals("unexpected number of expected problems",
				    1, equeue.size());
		Message msg = equeue.errors.get(0);
		assertTrue("warning must be an unreachable alt",
				   msg instanceof GrammarUnreachableAltsMessage);
		GrammarUnreachableAltsMessage u = (GrammarUnreachableAltsMessage)msg;
		assertEquals("[1]", u.alts.toString());
	}

	@Test public void testRecursive() throws Exception {
		// this is cool because the 3rd alt includes !(all other possibilities)
		Grammar g = new Grammar(
			"lexer grammar duh;\n" +
			"SUBTEMPLATE\n" +
			"        :       '{'\n" +
			"                ( SUBTEMPLATE\n" +
			"                | ESC\n" +
			"                | ~('}'|'\\\\'|'{')\n" +
			"                )*\n" +
			"                '}'\n" +
			"        ;\n" +
			"fragment\n" +
			"ESC     :       '\\\\' . ;");
		g.createLookaheadDFAs();
		String expecting =
			".s0-'\\\\'->:s2=>2\n" +
			".s0-'{'->:s1=>1\n" +
			".s0-'}'->:s4=>4\n" +
			".s0-{'\\u0000'..'[', ']'..'z', '|', '~'..'\\uFFFF'}->:s3=>3\n";
		checkDecision(g, 1, expecting, null);
	}

	@Test public void testRecursive2() throws Exception {
		// this is also cool because it resolves \\ to be ESC alt; it's just
		// less efficient of a DFA
		Grammar g = new Grammar(
			"lexer grammar duh;\n" +
			"SUBTEMPLATE\n" +
			"        :       '{'\n" +
			"                ( SUBTEMPLATE\n" +
			"                | ESC\n" +
			"                | ~('}'|'{')\n" +
			"                )*\n" +
			"                '}'\n" +
			"        ;\n" +
			"fragment\n" +
			"ESC     :       '\\\\' . ;");
		g.createLookaheadDFAs();
		String expecting =
			".s0-'\\\\'->.s3\n" +
			".s0-'{'->:s2=>1\n" +
			".s0-'}'->:s1=>4\n" +
			".s0-{'\\u0000'..'[', ']'..'z', '|', '~'..'\\uFFFF'}->:s5=>3\n" +
			".s3-'\\\\'->:s8=>2\n" +
			".s3-'{'->:s7=>2\n" +
			".s3-'}'->.s4\n" +
			".s3-{'\\u0000'..'[', ']'..'z', '|', '~'..'\\uFFFF'}->:s6=>2\n" +
			".s4-'\\u0000'..'\\uFFFF'->:s6=>2\n" +
			".s4-<EOT>->:s5=>3\n";
		checkDecision(g, 1, expecting, null);
	}

	@Test public void testNotFragmentInLexer() throws Exception {
		Grammar g = new Grammar(
			"lexer grammar T;\n"+
			"A : 'a' | ~B {;} ;\n" +
			"fragment B : 'a' ;\n");
		g.createLookaheadDFAs();
		String expecting =
			".s0-'a'->:s1=>1\n" +
			".s0-{'\\u0000'..'`', 'b'..'\\uFFFF'}->:s2=>2\n";
		checkDecision(g, 1, expecting, null);
	}

	@Test public void testNotSetFragmentInLexer() throws Exception {
		Grammar g = new Grammar(
			"lexer grammar T;\n"+
			"A : B | ~B {;} ;\n" +
			"fragment B : 'a'|'b' ;\n");
		g.createLookaheadDFAs();
		String expecting =
			".s0-'a'..'b'->:s1=>1\n" +
			".s0-{'\\u0000'..'`', 'c'..'\\uFFFF'}->:s2=>2\n";
		checkDecision(g, 1, expecting, null);
	}

	@Test public void testNotTokenInLexer() throws Exception {
		Grammar g = new Grammar(
			"lexer grammar T;\n"+
			"A : 'x' ('a' | ~B {;}) ;\n" +
			"B : 'a' ;\n");
		g.createLookaheadDFAs();
		String expecting =
			".s0-'a'->:s1=>1\n" +
			".s0-{'\\u0000'..'`', 'b'..'\\uFFFF'}->:s2=>2\n";
		checkDecision(g, 1, expecting, null);
	}

	@Test public void testNotComplicatedSetRuleInLexer() throws Exception {
		Grammar g = new Grammar(
			"lexer grammar T;\n"+
			"A : B | ~B {;} ;\n" +
			"fragment B : 'a'|'b'|'c'..'e'|C ;\n" +
			"fragment C : 'f' ;\n"); // has to seen from B to C
		String expecting =
			".s0-'a'..'f'->:s1=>1\n" +
			".s0-{'\\u0000'..'`', 'g'..'\\uFFFF'}->:s2=>2\n";
		checkDecision(g, 1, expecting, null);
	}

	@Test public void testNotSetWithRuleInLexer() throws Exception {
		Grammar g = new Grammar(
			"lexer grammar T;\n"+
			"T : ~('a' | B) | 'a';\n" +
			"fragment\n" +
			"B : 'b' ;\n" +
			"C : ~'x'{;} ;"); // force Tokens to not collapse T|C
		String expecting =
			".s0-'b'->:s3=>2\n" +
			".s0-'x'->:s2=>1\n" +
			".s0-{'\\u0000'..'a', 'c'..'w', 'y'..'\\uFFFF'}->.s1\n" +
			".s1-<EOT>->:s2=>1\n";
		checkDecision(g, 1, expecting, null);
	}

	@Test public void testSetCallsRuleWithNot() throws Exception {
		Grammar g = new Grammar(
			"lexer grammar A;\n" +
			"T : ~'x' ;\n" +
			"S : 'x' (T | 'x') ;\n");
		String expecting =
			".s0-'x'->:s2=>2\n" +
			".s0-{'\\u0000'..'w', 'y'..'\\uFFFF'}->:s1=>1\n";
		checkDecision(g, 1, expecting, null);
	}

	@Test public void testSynPredInLexer() throws Exception {
		Grammar g = new Grammar(
			"lexer grammar T;\n"+
			"LT:  '<' ' '*\n" +
			"  |  ('<' IDENT) => '<' IDENT '>'\n" + // this was causing syntax error
			"  ;\n" +
			"IDENT:    'a'+;\n");
		// basically, Tokens rule should not do set compression test
		String expecting =
			".s0-'<'->:s1=>1\n" +
			".s0-'a'->:s2=>2\n";
		checkDecision(g, 4, expecting, null); // 4 is Tokens rule
	}

	// S U P P O R T

	private void _template() throws Exception {
		Grammar g = new Grammar(
			"grammar T;\n"+
			"a : A | B;");
		String expecting =
			"\n";
		checkDecision(g, 1, expecting, null);
	}

	protected void checkDecision(Grammar g,
								 int decision,
								 String expecting,
								 int[] expectingUnreachableAlts)
		throws Exception
	{

		// mimic actions of org.antlr.Tool first time for grammar g
		if ( g.getCodeGenerator()==null ) {
			CodeGenerator generator = new CodeGenerator(null, g, "Java");
			g.setCodeGenerator(generator);
			g.buildNFA();
			g.createLookaheadDFAs(false);
		}

		DFA dfa = g.getLookaheadDFA(decision);
		assertNotNull("unknown decision #"+decision, dfa);
		FASerializer serializer = new FASerializer(g);
		String result = serializer.serialize(dfa.startState);
		//System.out.print(result);
		List<Integer> nonDetAlts = dfa.getUnreachableAlts();
		//System.out.println("alts w/o predict state="+nonDetAlts);

		// first make sure nondeterministic alts are as expected
		if ( expectingUnreachableAlts==null ) {
			if ( nonDetAlts!=null && !nonDetAlts.isEmpty() ) {
				System.err.println("nondeterministic alts (should be empty): "+nonDetAlts);
			}
			assertEquals("unreachable alts mismatch", 0, nonDetAlts!=null?nonDetAlts.size():0);
		}
		else {
			for (int i=0; i<expectingUnreachableAlts.length; i++) {
				assertTrue("unreachable alts mismatch",
						   nonDetAlts!=null?nonDetAlts.contains(new Integer(expectingUnreachableAlts[i])):false);
			}
		}
		assertEquals(expecting, result);
	}

}