CNFConverter.java
/*-
* #%L
* JSQLParser library
* %%
* Copyright (C) 2004 - 2019 JSQLParser
* %%
* Dual licensed under GNU LGPL 2.1 or Apache License 2.0
* #L%
*/
package net.sf.jsqlparser.util.cnfexpression;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import java.util.Queue;
import java.util.Stack;
import net.sf.jsqlparser.expression.Expression;
import net.sf.jsqlparser.expression.NotExpression;
/**
* This class handles the conversion from a normal expression tree into the CNF form.
* <p>
* Here is the definition of CNF form: https://en.wikipedia.org/wiki/Conjunctive_normal_form
* <p>
* Basically it will follow these steps:
* <p>
* To help understanding, I will generate an example: Here is the original tree: OR / \ OR NOT / \ |
* NOT H AND | / \ NOT G OR | / \ F H NOT | OR / \ AND L / \ ( ) ( ) | | J K
* <p>
* 1. rebuild the tree by replacing the "and" and "or" operators (which are binary) into their
* counterparts node that could hold multiple elements. Also, leave out the parenthesis node between
* the conditional operators to make the tree uniform.
* <p>
* After the transform, the result should be like this: OR(M) / \ OR(M) NOT / \ | NOT H AND(M) | / \
* NOT G OR(M) | / \ F H NOT | OR(M) / \ AND(M) L / \ J K
* <p>
* 2. push the not operators into the bottom of the expression. That means the not operator will be
* the root of the expression tree where no "and" or "or" exists. Be sure use the De Morgan's law
* and double not law.
* <p>
* How to use De Morgan law: For example, here is the original expression tree: NOT | AND(M) / \ G H
* <p>
* After we use the De Morgan law, the result should be like this: OR(M) / \ NOT NOT | | G H
* <p>
* After the transform, the result should be like this: OR(M) / \ OR(M) OR(M) / \ / \ F H NOT AND(M)
* | / \ G NOT OR(M) | / \ H AND(M) L / \ J K
* <p>
* 3. gather all the adjacent "and" or "or" operator together. After doing that, the expression tree
* will be presented as: all the and expression will be in either odd or even levels, this will be
* the same for the or operator.
* <p>
* After the transform, the expression tree should be like this: OR(M) / / \ \ F H NOT AND(M) | / \
* G NOT OR(M) | / \ H AND(M) L / \ J K
* <p>
* 4. push the and operator upwards until the root is an and operator and all the children are or
* operators with multiple components. At this time we get the result: an expression in CNF form.
* How do we push and up? Use distribution law!
* <p>
* For example, here is the way to push the and up and merge them. OR / \ AND L / \ J K
* <p>
* In the normal form, it could be: (J AND K) OR L. If we apply the distribution law, we will get
* the result like this: (J OR L) AND (K OR L), the tree form of this should be like: AND / \ OR OR
* / \ / \ J L K L
* <p>
* So after we push the AND at the deepest level up and merge it with the existing add, we get this
* result. OR(M) / / \ \ F H NOT AND(M) | / | \ G NOT OR(M) OR(M) | / \ / \ H J L K L
* <p>
* Now let us push the and up and we will get the result like this: AND(M) / | \ OR(M) OR(M) OR(M) /
* / \ \ / / | \ \ / / | \ \ F H NOT NOT F H NOT J L F H NOT K L | | | | G H G G
* <p>
* 5. The last step, convert the Multiple Expression back to the binary form. Note the final tree
* shall be left-inclined.
* <p>
* The final expression tree shall be like this: AND / \ AND ( ) / \ | ( ) ( ) part1 | | OR part2 /
* \ OR NOT / \ | OR NOT H / \ | F H G
* <p>
* part1: OR / \ OR L / \ OR K / \ OR NOT / \ | F H G
* <p>
* part2: OR / \ OR L / \ OR J / \ OR NOT / \ | F H G
*
* @author messfish
*/
public class CNFConverter {
private Expression root;
// the variable that stores the newly generated root.
private Expression dummy;
// this variable mainly serves as the dummy root of the true root.
// generally it will be a multi and operator with root as the child.
private Expression temp1;
private Expression temp2;
private Expression child;
// these two variable mainly serves as nodes that traverse through
// the expression tree to change the structure of expression tree.
// notice temp1 will be settled as the root and temp2 will be
// settled as the dummy root.
private boolean isUsed = false;
public static Expression convertToCNF(Expression expr) {
CNFConverter cnf = new CNFConverter();
return cnf.convert(expr);
}
/**
* this method takes an expression tree and converts that into a CNF form. Notice the 5 steps
* shown above will turn into 5 different methods. For the sake of testing, I set them public.
* return the converted expression.
*
* @param express the original expression tree.
*/
private Expression convert(Expression express)
throws IllegalStateException {
if (isUsed) {
throw new IllegalStateException("The class could only be used once!");
} else {
isUsed = true;
}
reorder(express);
pushNotDown();
/*
* notice for the gather() function, we do not change the variable that points to the root
* by pointing to others. Also, we do not change those temp variables. So there is no need
* to set those variables back to their modified state.
*/
gather();
pushAndUp();
changeBack();
return root;
}
/**
* this is the first step that rebuild the expression tree. Use the standard specified in the
* above class. Traverse the original tree recursively and rebuild the tree from that.
*
* @param express the original expression tree.
*/
private void reorder(Expression express) {
root = CloneHelper.modify(express);
List<Expression> list = new ArrayList<Expression>();
list.add(root);
dummy = new MultiAndExpression(list);
}
/**
* This method is used to deal with pushing not operators down. Since it needs an extra
* parameter, I will create a new method to handle this.
*/
private void pushNotDown() {
/* set the two temp parameters to their staring point. */
temp1 = root;
temp2 = dummy;
/*
* I set it to zero since if the modification happens at the root, the parent will have the
* correct pointer to the children.
*/
pushNot(0);
/* do not forget to set the operators back! */
root = ((MultiAndExpression) dummy).getChild(0);
temp1 = root;
temp2 = dummy;
}
/**
* This method is the helper function to push not operators down. traverse the tree thoroughly,
* when we meet the not operator. We only need to consider these three operators:
* MultiAndOperator, MultiOrOperator, NotOperator. Handle them in a seperate way. when we finish
* the traverse, the expression tree will have all the not operators pushed as downwards as they
* could. In the method, I use two global variables: temp1 and temp2 to traverse the expression
* tree. Notice that temp2 will always be the parent of temp1.
*
* @param index the index of the children appeared in parents array.
*/
private void pushNot(int index) {
/*
* what really matters is the three logical operators: and, or, not. so we only deal with
* these three operators.
*/
if (temp1 instanceof MultiAndExpression) {
MultiAndExpression and = (MultiAndExpression) temp1;
for (int i = 0; i < and.size(); i++) {
temp2 = and;
temp1 = and.getChild(i);
pushNot(i);
}
} else if (temp1 instanceof MultiOrExpression) {
MultiOrExpression or = (MultiOrExpression) temp1;
for (int i = 0; i < or.size(); i++) {
temp2 = or;
temp1 = or.getChild(i);
pushNot(i);
}
} else if (temp1 instanceof NotExpression) {
handleNot(index);
}
}
/**
* This function mainly deals with pushing not operators down. check the child. If it is not a
* logic operator(and or or). stop at that point. Else use De Morgan law to push not downwards.
*
* @param index the index of the children appeared in parents array.
*/
private void handleNot(int index) {
child = ((NotExpression) temp1).getExpression();
int nums = 1; // takes down the number of not operators.
while (child instanceof NotExpression) {
child = ((NotExpression) child).getExpression();
nums++;
}
/*
* if the number of not operators are even. we could get rid of all the not operators. set
* the child to the parent.
*/
if (nums % 2 == 0) {
((MultipleExpression) temp2).setChild(index, child);
temp1 = child;
pushNot(-1);
} else {
/*
* otherwise there will be one not left to push. if the child is not these two types of
* operators. that means we reach the leaves of the logical part. set a new not operator
* whose child is the current one and connect that operator with the parent and return.
*/
if (!(child instanceof MultiAndExpression)
&& !(child instanceof MultiOrExpression)) {
// if (child instanceof LikeExpression) {
// ((LikeExpression) child).setNot();
// } else if (child instanceof BinaryExpression) {
// ((BinaryExpression) child).setNot();
// } else {
child = new NotExpression(child);
// }
((MultipleExpression) temp2).setChild(index, child);
// return;
} else if (child instanceof MultiAndExpression) {
MultiAndExpression and = (MultiAndExpression) child;
List<Expression> list = new ArrayList<Expression>();
for (int i = 0; i < and.size(); i++) {
/* push not to every element in the operator. */
NotExpression not = new NotExpression(and.getChild(i));
list.add(not);
}
/* the De Morgan law shows we need to change and to or. */
temp1 = new MultiOrExpression(list);
((MultipleExpression) temp2).setChild(index, temp1);
pushNot(-1);
} else if (child instanceof MultiOrExpression) {
MultiOrExpression or = (MultiOrExpression) child;
List<Expression> list = new ArrayList<Expression>();
for (int i = 0; i < or.size(); i++) {
/* push not to every element in the operator. */
NotExpression not = new NotExpression(or.getChild(i));
list.add(not);
}
/* the De Morgan law shows we need to change or to and. */
temp1 = new MultiAndExpression(list);
((MultipleExpression) temp2).setChild(index, temp1);
pushNot(-1);
}
}
}
/**
* This method serves as dealing with the third step. It is used to put all the adjacent same
* multi operators together. BFS the tree and do it node by node. In the end we will get the
* tree where all the same multi operators store in the same odd level of the tree or in the
* same even level of the tree.
*/
@SuppressWarnings({"PMD.CyclomaticComplexity"})
private void gather() {
Queue<Expression> queue = new LinkedList<Expression>();
queue.offer(temp1);
while (!queue.isEmpty()) {
Expression express = queue.poll();
/*
* at this level, we only deal with "multi and" and "multi or" operators, so we only
* consider these two operators. that means we do nothing if the operator is not those
* two.
*/
if (express instanceof MultiAndExpression) {
MultiAndExpression and = (MultiAndExpression) express;
while (true) {
int index = 0;
Expression get = null;
for (; index < and.size(); index++) {
get = and.getChild(index);
if (get instanceof MultiAndExpression) {
break;
}
}
/*
* if the index is the size of the multi operator, that means this is already
* valid. jump out of the loop.
*/
if (index == and.size()) {
break;
} else {
/*
* if not, remove the child out and push the child of that child in the
* operator, starting from the index where the child is removed.
*/
and.removeChild(index);
MultipleExpression order = (MultipleExpression) get;
for (int i = 0; i < order.size(); i++) {
and.addChild(index, order.getChild(i));
index++;
}
}
}
/* Do the standard BFS now since all children are not and operators. */
for (int i = 0; i < and.size(); i++) {
queue.offer(and.getChild(i));
}
} else if (express instanceof MultiOrExpression) {
/* for the multi or operator, the logic is the similar. */
MultiOrExpression or = (MultiOrExpression) express;
while (true) {
int index = 0;
Expression get = null;
for (; index < or.size(); index++) {
get = or.getChild(index);
if (get instanceof MultiOrExpression) {
break;
}
}
/*
* if the index is the size of the multi operator, that means this is already
* valid. jump out of the loop.
*/
if (index == or.size()) {
break;
} else {
/*
* if not, remove the child out and push the child of that child in the
* operator, starting from the index where the child is removed.
*/
or.removeChild(index);
MultipleExpression order = (MultipleExpression) get;
for (int i = 0; i < order.size(); i++) {
or.addChild(index, order.getChild(i));
index++;
}
}
}
/* Do the standard BFS now since all children are not or operators. */
for (int i = 0; i < or.size(); i++) {
queue.offer(or.getChild(i));
}
}
}
}
/**
* First, BFS the tree and gather all the or operators and their parents into a stack. Next, pop
* them out and push the and operators under the or operators upwards(if there are). Do this
* level by level, which means during each level we will call the gather() method to make the
* tree uniform. When we move out of the stack. The expression tree shall be in CNF form.
*/
private void pushAndUp() {
Queue<Mule> queue = new LinkedList<Mule>();
Stack<Mule> stack = new Stack<Mule>();
Mule root = new Mule(temp2, temp1, 0);
queue.offer(root);
int level = 1;
/*
* do the BFS and store valid mule into the stack. Notice the first parameter is parent and
* the second parameter is children.
*/
while (!queue.isEmpty()) {
int size = queue.size();
for (int i = 0; i < size; i++) {
Mule mule = queue.poll();
Expression parent = mule.parent;
Expression child = mule.child;
if (parent instanceof MultiAndExpression
&& child instanceof MultiOrExpression) {
stack.push(mule);
}
/* Note the child may not be an instance of multiple expression!. */
if (child instanceof MultipleExpression) {
MultipleExpression multi = (MultipleExpression) child;
for (int j = 0; j < multi.size(); j++) {
Expression get = multi.getChild(j);
if (get instanceof MultipleExpression) {
Mule added = new Mule(child, get, level);
queue.offer(added);
}
}
}
}
level++;
}
/* use another function to handle pushing and up. */
pushAnd(stack);
/* do not forget to set the operators back! */
this.root = ((MultiAndExpression) dummy).getChild(0);
temp1 = this.root;
temp2 = dummy;
/*
* at last, remember to gather again since there are no gather() method called if there are
* some movements on the root.
*/
gather();
}
/**
* This helper function is used to deal with pushing and up: generally, pop the top element out
* of the stack, use BFS to traverse the tree and push and up. It will case the expression tree
* to have the and as the new root and multiple or as the children. Push them on the queue and
* repeat the same process until the newly generated or operator does not have any and operators
* in it(which means no elements will be added into the queue). when one level is finished,
* regroup the tree. Do this until the stack is empty, the result will be the expression in CNF
* form.
*
* @param stack the stack stores a list of combined data.
*/
@SuppressWarnings({"PMD.CyclomaticComplexity"})
private void pushAnd(Stack<Mule> stack) {
int level = 0;
if (!stack.isEmpty()) {
level = stack.peek().level;
}
while (!stack.isEmpty()) {
Mule mule = stack.pop();
/* we finish a level, uniform the tree by calling gather. */
if (level != mule.level) {
gather();
level = mule.level;
}
Queue<Mule> queue = new LinkedList<Mule>();
/*
* this time we do not need to take down the level of the tree, so simply set a 0 to the
* last parameter.
*/
Mule combined = new Mule(mule.parent, mule.child, 0);
queue.offer(combined);
while (!queue.isEmpty()) {
Mule get = queue.poll();
Expression parent = get.parent;
Expression child = get.child;
/*
* based on the code above, the stack only have the expression which they are multi
* operators. so safely convert them.
*/
MultipleExpression children = (MultipleExpression) child;
int index = 0;
MultiAndExpression and = null;
/* find the children that the child is an multi and operator. */
for (; index < children.size(); index++) {
if (children.getChild(index) instanceof MultiAndExpression) {
and = (MultiAndExpression) children.getChild(index);
break;
}
}
if (index == children.size() || and == null) {
continue;
}
children.removeChild(index);
MultipleExpression parents = (MultipleExpression) parent;
List<Expression> list = new ArrayList<Expression>();
MultiAndExpression newand = new MultiAndExpression(list);
parents.setChild(parents.getIndex(children), newand);
for (int i = 0; i < and.size(); i++) {
Expression temp = CloneHelper.shallowCopy(children);
MultipleExpression mtemp = (MultipleExpression) temp;
mtemp.addChild(mtemp.size(), and.getChild(i));
newand.addChild(i, mtemp);
queue.offer(new Mule(newand, mtemp, 0));
}
}
}
}
/**
* This is the final step of the CNF conversion: now we have the Expression tree that has one
* multiple and expression with a list of multiple or expression as the child. So we need to
* convert the multiple expression back to the binary counterparts. Note the converted tree is
* left inclined. Also I attach a parenthesis node before the or expression that is attached to
* the and expression to make the generated result resembles the CNF form.
*/
private void changeBack() {
if (!(root instanceof MultiAndExpression)) {
return;
}
MultipleExpression temp = (MultipleExpression) root;
for (int i = 0; i < temp.size(); i++) {
temp.setChild(i, CloneHelper.changeBack(true, temp.getChild(i)));
}
root = CloneHelper.changeBack(false, temp);
}
private class Mule {
private Expression parent;
private Expression child;
private int level;
private Mule(Expression parent, Expression child, int level) {
this.parent = parent;
this.child = child;
this.level = level;
}
}
}