IntervalSet.java

/*
 * [The "BSD license"]
 *  Copyright (c) 2010 Terence Parr
 *  All rights reserved.
 *
 *  Redistribution and use in source and binary forms, with or without
 *  modification, are permitted provided that the following conditions
 *  are met:
 *  1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *  2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *  3. The name of the author may not be used to endorse or promote products
 *      derived from this software without specific prior written permission.
 *
 *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
package org.antlr.misc;

import org.antlr.analysis.Label;
import org.antlr.tool.Grammar;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.ListIterator;

/** A set of integers that relies on ranges being common to do
 *  "run-length-encoded" like compression (if you view an IntSet like
 *  a BitSet with runs of 0s and 1s).  Only ranges are recorded so that
 *  a few ints up near value 1000 don't cause massive bitsets, just two
 *  integer intervals.
 *
 *  element values may be negative.  Useful for sets of EPSILON and EOF.
 *
 *  0..9 char range is index pair ['\u0030','\u0039'].
 *  Multiple ranges are encoded with multiple index pairs.  Isolated
 *  elements are encoded with an index pair where both intervals are the same.
 *
 *  The ranges are ordered and disjoint so that 2..6 appears before 101..103.
 */
public class IntervalSet implements IntSet {
	public static final IntervalSet COMPLETE_SET = IntervalSet.of(0,Label.MAX_CHAR_VALUE);

	/** The list of sorted, disjoint intervals. */
    protected List<Interval> intervals;

	/** Create a set with no elements */
    public IntervalSet() {
        intervals = new ArrayList<Interval>(2); // most sets are 1 or 2 elements
    }

	public IntervalSet(List<Interval> intervals) {
		this.intervals = intervals;
	}

	/** Create a set with a single element, el. */
    public static IntervalSet of(int a) {
		IntervalSet s = new IntervalSet();
        s.add(a);
        return s;
    }

    /** Create a set with all ints within range [a..b] (inclusive) */
    public static IntervalSet of(int a, int b) {
        IntervalSet s = new IntervalSet();
        s.add(a,b);
        return s;
    }

    /** Add a single element to the set.  An isolated element is stored
     *  as a range el..el.
     */
	@Override
    public void add(int el) {
        add(el,el);
    }

    /** Add interval; i.e., add all integers from a to b to set.
     *  If b&lt;a, do nothing.
     *  Keep list in sorted order (by left range value).
     *  If overlap, combine ranges.  For example,
     *  If this is {1..5, 10..20}, adding 6..7 yields
     *  {1..5, 6..7, 10..20}.  Adding 4..8 yields {1..8, 10..20}.
     */
    public void add(int a, int b) {
        add(Interval.create(a,b));
    }

	// copy on write so we can cache a..a intervals and sets of that
	protected void add(Interval addition) {
		//System.out.println("add "+addition+" to "+intervals.toString());
		if ( addition.b<addition.a ) {
			return;
		}
		// find position in list
		// Use iterators as we modify list in place
		for (ListIterator<Interval> iter = intervals.listIterator(); iter.hasNext();) {
			Interval r = iter.next();
			if ( addition.equals(r) ) {
				return;
			}
			if ( addition.adjacent(r) || !addition.disjoint(r) ) {
				// next to each other, make a single larger interval
				Interval bigger = addition.union(r);
				iter.set(bigger);
				// make sure we didn't just create an interval that
				// should be merged with next interval in list
				while ( iter.hasNext() ) {
					Interval next = iter.next();
					if ( !bigger.adjacent(next) && bigger.disjoint(next) ) {
						break;
					}

					// if we bump up against or overlap next, merge
					iter.remove();   // remove this one
					iter.previous(); // move backwards to what we just set
					iter.set(bigger.union(next)); // set to 3 merged ones
					iter.next(); // first call to next after previous duplicates the result
				}
				return;
			}
			if ( addition.startsBeforeDisjoint(r) ) {
				// insert before r
				iter.previous();
				iter.add(addition);
				return;
			}
			// if disjoint and after r, a future iteration will handle it
		}
		// ok, must be after last interval (and disjoint from last interval)
		// just add it
		intervals.add(addition);
	}

	/*
	protected void add(Interval addition) {
        //System.out.println("add "+addition+" to "+intervals.toString());
        if ( addition.b<addition.a ) {
            return;
        }
        // find position in list
        //for (ListIterator iter = intervals.listIterator(); iter.hasNext();) {
		int n = intervals.size();
		for (int i=0; i<n; i++) {
			Interval r = (Interval)intervals.get(i);
            if ( addition.equals(r) ) {
                return;
            }
            if ( addition.adjacent(r) || !addition.disjoint(r) ) {
                // next to each other, make a single larger interval
                Interval bigger = addition.union(r);
				intervals.set(i, bigger);
                // make sure we didn't just create an interval that
                // should be merged with next interval in list
				if ( (i+1)<n ) {
					i++;
					Interval next = (Interval)intervals.get(i);
                    if ( bigger.adjacent(next)||!bigger.disjoint(next) ) {
                        // if we bump up against or overlap next, merge
						intervals.remove(i); // remove next one
						i--;
						intervals.set(i, bigger.union(next)); // set to 3 merged ones
                    }
                }
                return;
            }
            if ( addition.startsBeforeDisjoint(r) ) {
                // insert before r
				intervals.add(i, addition);
                return;
            }
            // if disjoint and after r, a future iteration will handle it
        }
        // ok, must be after last interval (and disjoint from last interval)
        // just add it
        intervals.add(addition);
    }
*/

	@Override
	public void addAll(IntSet set) {
		if ( set==null ) {
			return;
		}
        if ( !(set instanceof IntervalSet) ) {
            throw new IllegalArgumentException("can't add non IntSet ("+
											   set.getClass().getName()+
											   ") to IntervalSet");
        }
        IntervalSet other = (IntervalSet)set;
        // walk set and add each interval
		int n = other.intervals.size();
		for (int i = 0; i < n; i++) {
			Interval I = other.intervals.get(i);
			this.add(I.a,I.b);
		}
    }

    public IntervalSet complement(int minElement, int maxElement) {
        return this.complement(IntervalSet.of(minElement,maxElement));
    }

    /** Given the set of possible values (rather than, say UNICODE or MAXINT),
     *  return a new set containing all elements in vocabulary, but not in
     *  this.  The computation is (vocabulary - this).
     *
     *  'this' is assumed to be either a subset or equal to vocabulary.
     */
	@Override
    public IntervalSet complement(IntSet vocabulary) {
        if ( vocabulary==null ) {
            return null; // nothing in common with null set
        }
		if ( !(vocabulary instanceof IntervalSet ) ) {
			throw new IllegalArgumentException("can't complement with non IntervalSet ("+
											   vocabulary.getClass().getName()+")");
		}
		IntervalSet vocabularyIS = ((IntervalSet)vocabulary);
		int maxElement = vocabularyIS.getMaxElement();

		IntervalSet compl = new IntervalSet();
		int n = intervals.size();
		if ( n ==0 ) {
			return compl;
		}
		Interval first = intervals.get(0);
		// add a range from 0 to first.a constrained to vocab
		if ( first.a > 0 ) {
			IntervalSet s = IntervalSet.of(0, first.a-1);
			IntervalSet a = s.and(vocabularyIS);
			compl.addAll(a);
		}
		for (int i=1; i<n; i++) { // from 2nd interval .. nth
			Interval previous = intervals.get(i-1);
			Interval current = intervals.get(i);
			IntervalSet s = IntervalSet.of(previous.b+1, current.a-1);
			IntervalSet a = s.and(vocabularyIS);
			compl.addAll(a);
		}
		Interval last = intervals.get(n -1);
		// add a range from last.b to maxElement constrained to vocab
		if ( last.b < maxElement ) {
			IntervalSet s = IntervalSet.of(last.b+1, maxElement);
			IntervalSet a = s.and(vocabularyIS);
			compl.addAll(a);
		}
		return compl;
    }

	/** Compute this-other via this&amp;~other.
	 *  Return a new set containing all elements in this but not in other.
	 *  other is assumed to be a subset of this;
     *  anything that is in other but not in this will be ignored.
	 */
	@Override
	public IntervalSet subtract(IntSet other) {
		// assume the whole unicode range here for the complement
		// because it doesn't matter.  Anything beyond the max of this' set
		// will be ignored since we are doing this & ~other.  The intersection
		// will be empty.  The only problem would be when this' set max value
		// goes beyond MAX_CHAR_VALUE, but hopefully the constant MAX_CHAR_VALUE
		// will prevent this.
		return this.and(((IntervalSet)other).complement(COMPLETE_SET));
	}

	/** return a new set containing all elements in this but not in other.
     *  Intervals may have to be broken up when ranges in this overlap
     *  with ranges in other.  other is assumed to be a subset of this;
     *  anything that is in other but not in this will be ignored.
	 *
	 *  Keep around, but 10-20-2005, I decided to make complement work w/o
	 *  subtract and so then subtract can simply be a&~b
	 *
    public IntSet subtract(IntSet other) {
        if ( other==null || !(other instanceof IntervalSet) ) {
            return null; // nothing in common with null set
        }

        IntervalSet diff = new IntervalSet();

        // iterate down both interval lists
        ListIterator thisIter = this.intervals.listIterator();
        ListIterator otherIter = ((IntervalSet)other).intervals.listIterator();
        Interval mine=null;
        Interval theirs=null;
        if ( thisIter.hasNext() ) {
            mine = (Interval)thisIter.next();
        }
        if ( otherIter.hasNext() ) {
            theirs = (Interval)otherIter.next();
        }
        while ( mine!=null ) {
            //System.out.println("mine="+mine+", theirs="+theirs);
            // CASE 1: nothing in theirs removes a chunk from mine
            if ( theirs==null || mine.disjoint(theirs) ) {
                // SUBCASE 1a: finished traversing theirs; keep adding mine now
                if ( theirs==null ) {
                    // add everything in mine to difference since theirs done
                    diff.add(mine);
                    mine = null;
                    if ( thisIter.hasNext() ) {
                        mine = (Interval)thisIter.next();
                    }
                }
                else {
                    // SUBCASE 1b: mine is completely to the left of theirs
                    // so we can add to difference; move mine, but not theirs
                    if ( mine.startsBeforeDisjoint(theirs) ) {
                        diff.add(mine);
                        mine = null;
                        if ( thisIter.hasNext() ) {
                            mine = (Interval)thisIter.next();
                        }
                    }
                    // SUBCASE 1c: theirs is completely to the left of mine
                    else {
                        // keep looking in theirs
                        theirs = null;
                        if ( otherIter.hasNext() ) {
                            theirs = (Interval)otherIter.next();
                        }
                    }
                }
            }
            else {
                // CASE 2: theirs breaks mine into two chunks
                if ( mine.properlyContains(theirs) ) {
                    // must add two intervals: stuff to left and stuff to right
                    diff.add(mine.a, theirs.a-1);
                    // don't actually add stuff to right yet as next 'theirs'
                    // might overlap with it
                    // The stuff to the right might overlap with next "theirs".
                    // so it is considered next
                    Interval right = new Interval(theirs.b+1, mine.b);
                    mine = right;
                    // move theirs forward
                    theirs = null;
                    if ( otherIter.hasNext() ) {
                        theirs = (Interval)otherIter.next();
                    }
                }

                // CASE 3: theirs covers mine; nothing to add to diff
                else if ( theirs.properlyContains(mine) ) {
                    // nothing to add, theirs forces removal totally of mine
                    // just move mine looking for an overlapping interval
                    mine = null;
                    if ( thisIter.hasNext() ) {
                        mine = (Interval)thisIter.next();
                    }
                }

                // CASE 4: non proper overlap
                else {
                    // overlap, but not properly contained
                    diff.add(mine.differenceNotProperlyContained(theirs));
                    // update iterators
                    boolean moveTheirs = true;
                    if ( mine.startsBeforeNonDisjoint(theirs) ||
                         theirs.b > mine.b )
                    {
                        // uh oh, right of theirs extends past right of mine
                        // therefore could overlap with next of mine so don't
                        // move theirs iterator yet
                        moveTheirs = false;
                    }
                    // always move mine
                    mine = null;
                    if ( thisIter.hasNext() ) {
                        mine = (Interval)thisIter.next();
                    }
                    if ( moveTheirs ) {
                        theirs = null;
                        if ( otherIter.hasNext() ) {
                            theirs = (Interval)otherIter.next();
                        }
                    }
                }
            }
        }
        return diff;
    }
	 */

    /** TODO: implement this! */
	@Override
	public IntSet or(IntSet a) {
		IntervalSet o = new IntervalSet();
		o.addAll(this);
		o.addAll(a);
		//throw new NoSuchMethodError();
		return o;
	}

    /** Return a new set with the intersection of this set with other.  Because
     *  the intervals are sorted, we can use an iterator for each list and
     *  just walk them together.  This is roughly O(min(n,m)) for interval
     *  list lengths n and m.
     */
	@Override
	public IntervalSet and(IntSet other) {
		if ( other==null ) { //|| !(other instanceof IntervalSet) ) {
			return null; // nothing in common with null set
		}

		List<Interval> myIntervals = this.intervals;
		List<Interval> theirIntervals = ((IntervalSet)other).intervals;
		IntervalSet intersection = null;
		int mySize = myIntervals.size();
		int theirSize = theirIntervals.size();
		int i = 0;
		int j = 0;
		// iterate down both interval lists looking for nondisjoint intervals
		while ( i<mySize && j<theirSize ) {
			Interval mine = myIntervals.get(i);
			Interval theirs = theirIntervals.get(j);
			//System.out.println("mine="+mine+" and theirs="+theirs);
			if ( mine.startsBeforeDisjoint(theirs) ) {
				// move this iterator looking for interval that might overlap
				i++;
			}
			else if ( theirs.startsBeforeDisjoint(mine) ) {
				// move other iterator looking for interval that might overlap
				j++;
			}
			else if ( mine.properlyContains(theirs) ) {
				// overlap, add intersection, get next theirs
				if ( intersection==null ) {
					intersection = new IntervalSet();
				}
				intersection.add(mine.intersection(theirs));
				j++;
			}
			else if ( theirs.properlyContains(mine) ) {
				// overlap, add intersection, get next mine
				if ( intersection==null ) {
					intersection = new IntervalSet();
				}
				intersection.add(mine.intersection(theirs));
				i++;
			}
			else if ( !mine.disjoint(theirs) ) {
				// overlap, add intersection
				if ( intersection==null ) {
					intersection = new IntervalSet();
				}
				intersection.add(mine.intersection(theirs));
				// Move the iterator of lower range [a..b], but not
				// the upper range as it may contain elements that will collide
				// with the next iterator. So, if mine=[0..115] and
				// theirs=[115..200], then intersection is 115 and move mine
				// but not theirs as theirs may collide with the next range
				// in thisIter.
				// move both iterators to next ranges
				if ( mine.startsAfterNonDisjoint(theirs) ) {
					j++;
				}
				else if ( theirs.startsAfterNonDisjoint(mine) ) {
					i++;
				}
			}
		}
		if ( intersection==null ) {
			return new IntervalSet();
		}
		return intersection;
	}

    /** Is el in any range of this set? */
	@Override
    public boolean member(int el) {
		int n = intervals.size();
		for (int i = 0; i < n; i++) {
			Interval I = intervals.get(i);
			int a = I.a;
			int b = I.b;
			if ( el<a ) {
				break; // list is sorted and el is before this interval; not here
			}
			if ( el>=a && el<=b ) {
				return true; // found in this interval
			}
		}
		return false;
/*
		for (ListIterator iter = intervals.listIterator(); iter.hasNext();) {
            Interval I = (Interval) iter.next();
            if ( el<I.a ) {
                break; // list is sorted and el is before this interval; not here
            }
            if ( el>=I.a && el<=I.b ) {
                return true; // found in this interval
            }
        }
        return false;
        */
    }

    /** return true if this set has no members */
	@Override
    public boolean isNil() {
        return intervals==null || intervals.isEmpty();
    }

    /** If this set is a single integer, return it otherwise Label.INVALID */
	@Override
    public int getSingleElement() {
        if ( intervals!=null && intervals.size()==1 ) {
            Interval I = intervals.get(0);
            if ( I.a == I.b ) {
                return I.a;
            }
        }
        return Label.INVALID;
    }

	public int getMaxElement() {
		if ( isNil() ) {
			return Label.INVALID;
		}
		Interval last = intervals.get(intervals.size()-1);
		return last.b;
	}

	/** Return minimum element &gt;= 0 */
	public int getMinElement() {
		if ( isNil() ) {
			return Label.INVALID;
		}
		int n = intervals.size();
		for (int i = 0; i < n; i++) {
			Interval I = intervals.get(i);
			int a = I.a;
			int b = I.b;
			for (int v=a; v<=b; v++) {
				if ( v>=0 ) return v;
			}
		}
		return Label.INVALID;
	}

    /** Return a list of Interval objects. */
    public List<Interval> getIntervals() {
        return intervals;
    }

    /** Are two IntervalSets equal?  Because all intervals are sorted
     *  and disjoint, equals is a simple linear walk over both lists
     *  to make sure they are the same.  Interval.equals() is used
     *  by the List.equals() method to check the ranges.
     */
	@Override
    public boolean equals(Object obj) {
        if ( !(obj instanceof IntervalSet) ) {
            return false;
        }
        IntervalSet other = (IntervalSet)obj;
        return this.intervals.equals(other.intervals);
    }

	@Override
    public String toString() {
        return toString(null);
    }

	@Override
    public String toString(Grammar g) {
        StringBuilder buf = new StringBuilder();
		if ( this.intervals==null || this.intervals.isEmpty() ) {
			return "{}";
		}
        if ( this.intervals.size()>1 ) {
            buf.append("{");
        }
        Iterator<Interval> iter = this.intervals.iterator();
        while (iter.hasNext()) {
            Interval I = iter.next();
            int a = I.a;
            int b = I.b;
            if ( a==b ) {
                if ( g!=null ) {
                    buf.append(g.getTokenDisplayName(a));
                }
                else {
                    buf.append(a);
                }
            }
            else {
                if ( g!=null ) {
                    buf.append(g.getTokenDisplayName(a)).append("..").append(g.getTokenDisplayName(b));
                }
                else {
                    buf.append(a).append("..").append(b);
                }
            }
            if ( iter.hasNext() ) {
                buf.append(", ");
            }
        }
        if ( this.intervals.size()>1 ) {
            buf.append("}");
        }
        return buf.toString();
    }

	@Override
    public int size() {
		int n = 0;
		int numIntervals = intervals.size();
		if ( numIntervals==1 ) {
			Interval firstInterval = this.intervals.get(0);
			return firstInterval.b-firstInterval.a+1;
		}
		for (int i = 0; i < numIntervals; i++) {
			Interval I = intervals.get(i);
			n += (I.b-I.a+1);
		}
		return n;
    }

	@Override
    public List<Integer> toList() {
		List<Integer> values = new ArrayList<Integer>();
		int n = intervals.size();
		for (int i = 0; i < n; i++) {
			Interval I = intervals.get(i);
			int a = I.a;
			int b = I.b;
			for (int v=a; v<=b; v++) {
				values.add(Utils.integer(v));
			}
		}
		return values;
    }

	/** Get the ith element of ordered set.  Used only by RandomPhrase so
	 *  don't bother to implement if you're not doing that for a new
	 *  ANTLR code gen target.
	 */
	public int get(int i) {
		int n = intervals.size();
		int index = 0;
		for (int j = 0; j < n; j++) {
			Interval I = intervals.get(j);
			int a = I.a;
			int b = I.b;
			for (int v=a; v<=b; v++) {
				if ( index==i ) {
					return v;
				}
				index++;
			}
		}
		return -1;
	}

	public int[] toArray() {
		int[] values = new int[size()];
		int n = intervals.size();
		int j = 0;
		for (int i = 0; i < n; i++) {
			Interval I = intervals.get(i);
			int a = I.a;
			int b = I.b;
			for (int v=a; v<=b; v++) {
				values[j] = v;
				j++;
			}
		}
		return values;
	}

	public org.antlr.runtime.BitSet toRuntimeBitSet() {
		org.antlr.runtime.BitSet s =
			new org.antlr.runtime.BitSet(getMaxElement()+1);
		int n = intervals.size();
		for (int i = 0; i < n; i++) {
			Interval I = intervals.get(i);
			int a = I.a;
			int b = I.b;
			for (int v=a; v<=b; v++) {
				s.add(v);
			}
		}
		return s;
	}

	@Override
	public void remove(int el) {
        throw new NoSuchMethodError("IntervalSet.remove() unimplemented");
    }

	/*
	protected void finalize() throws Throwable {
		super.finalize();
		System.out.println("size "+intervals.size()+" "+size());
	}
	*/
}