/src/spirv-tools/source/opt/scalar_analysis_simplification.cpp

Source
// Copyright (c) 2018 Google LLC.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <functional>
#include <map>
#include <memory>
#include <set>
#include <utility>
#include <vector>

#include "source/opt/scalar_analysis.h"

// Simplifies scalar analysis DAGs.
//
// 1. Given a node passed to SimplifyExpression we first simplify the graph by
// calling SimplifyPolynomial. This groups like nodes following basic arithmetic
// rules, so multiple adds of the same load instruction could be grouped into a
// single multiply of that instruction. SimplifyPolynomial will traverse the DAG
// and build up an accumulator buffer for each class of instruction it finds.
// For example take the loop:
// for (i=0, i<N; i++) { i+B+23+4+B+C; }
// In this example the expression "i+B+23+4+B+C" has four classes of
// instruction, induction variable i, the two value unknowns B and C, and the
// constants. The accumulator buffer is then used to rebuild the graph using
// the accumulation of each type. This example would then be folded into
// i+2*B+C+27.
//
// This new graph contains a single add node (or if only one type found then
// just that node) with each of the like terms (or multiplication node) as a
// child.
//
// 2. FoldRecurrentAddExpressions is then called on this new DAG. This will take
// RecurrentAddExpressions which are with respect to the same loop and fold them
// into a single new RecurrentAddExpression with respect to that same loop. An
// expression can have multiple RecurrentAddExpression's with respect to
// different loops in the case of nested loops. These expressions cannot be
// folded further. For example:
//
// for (i=0; i<N;i++) for(j=0,k=1; j<N;++j,++k)
//
// The 'j' and 'k' are RecurrentAddExpression with respect to the second loop
// and 'i' to the first. If 'j' and 'k' are used in an expression together then
// they will be folded into a new RecurrentAddExpression with respect to the
// second loop in that expression.
//
//
// 3. If the DAG now only contains a single RecurrentAddExpression we can now
// perform a final optimization SimplifyRecurrentAddExpression. This will
// transform the entire DAG into a RecurrentAddExpression. Additions to the
// RecurrentAddExpression are added to the offset field and multiplications to
// the coefficient.
//

namespace spvtools {
namespace opt {

// Implementation of the functions which are used to simplify the graph. Graphs
// of unknowns, multiplies, additions, and constants can be turned into a linear
// add node with each term as a child. For instance a large graph built from, X
// + X*2 + Y - Y*3 + 4 - 1, would become a single add expression with the
// children X*3, -Y*2, and the constant 3. Graphs containing a recurrent
// expression will be simplified to represent the entire graph around a single
// recurrent expression. So for an induction variable (i=0, i++) if you add 1 to
// i in an expression we can rewrite the graph of that expression to be a single
// recurrent expression of (i=1,i++).
class SENodeSimplifyImpl {
 public:
  SENodeSimplifyImpl(ScalarEvolutionAnalysis* analysis,
                     SENode* node_to_simplify)
      : analysis_(*analysis),
        node_(node_to_simplify),
        constant_accumulator_(0) {}

  // Return the result of the simplification.
  SENode* Simplify();

 private:
  // Recursively descend through the graph to build up the accumulator objects
  // which are used to flatten the graph. |child| is the node currently being
  // traversed and the |negation| flag is used to signify that this operation
  // was preceded by a unary negative operation and as such the result should be
  // negated.
  void GatherAccumulatorsFromChildNodes(SENode* new_node, SENode* child,
                                        bool negation);

  // Given a |multiply| node add to the accumulators for the term type within
  // the |multiply| expression. Will return true if the accumulators could be
  // calculated successfully. If the |multiply| is in any form other than
  // unknown*constant then we return false. |negation| signifies that the
  // operation was preceded by a unary negative.
  bool AccumulatorsFromMultiply(SENode* multiply, bool negation);

  SERecurrentNode* UpdateCoefficient(SERecurrentNode* recurrent,
                                     int64_t coefficient_update) const;

  // If the graph contains a recurrent expression, ie, an expression with the
  // loop iterations as a term in the expression, then the whole expression
  // can be rewritten to be a recurrent expression.
  SENode* SimplifyRecurrentAddExpression(SERecurrentNode* node);

  // Simplify the whole graph by linking like terms together in a single flat
  // add node. So X*2 + Y -Y + 3 +6 would become X*2 + 9. Where X and Y are a
  // ValueUnknown node (i.e, a load) or a recurrent expression.
  SENode* SimplifyPolynomial();

  // Each recurrent expression is an expression with respect to a specific loop.
  // If we have two different recurrent terms with respect to the same loop in a
  // single expression then we can fold those terms into a single new term.
  // For instance:
  //
  // induction i = 0, i++
  // temp = i*10
  // array[i+temp]
  //
  // We can fold the i + temp into a single expression. Rec(0,1) + Rec(0,10) can
  // become Rec(0,11).
  SENode* FoldRecurrentAddExpressions(SENode*);

  // We can eliminate recurrent expressions which have a coefficient of zero by
  // replacing them with their offset value. We are able to do this because a
  // recurrent expression represents the equation coefficient*iterations +
  // offset.
  SENode* EliminateZeroCoefficientRecurrents(SENode* node);

  // A reference the analysis which requested the simplification.
  ScalarEvolutionAnalysis& analysis_;

  // The node being simplified.
  SENode* node_;

  // An accumulator of the net result of all the constant operations performed
  // in a graph.
  int64_t constant_accumulator_;

  // An accumulator for each of the non constant terms in the graph.
  std::map<SENode*, int64_t> accumulators_;
};

// From a |multiply| build up the accumulator objects.
bool SENodeSimplifyImpl::AccumulatorsFromMultiply(SENode* multiply,
                                                  bool negation) {
  if (multiply->GetChildren().size() != 2 ||
      multiply->GetType() != SENode::Multiply)
    return false;

  SENode* operand_1 = multiply->GetChild(0);
  SENode* operand_2 = multiply->GetChild(1);

  SENode* value_unknown = nullptr;
  SENode* constant = nullptr;

  // Work out which operand is the unknown value.
  if (operand_1->GetType() == SENode::ValueUnknown ||
      operand_1->GetType() == SENode::RecurrentAddExpr)
    value_unknown = operand_1;
  else if (operand_2->GetType() == SENode::ValueUnknown ||
           operand_2->GetType() == SENode::RecurrentAddExpr)
    value_unknown = operand_2;

  // Work out which operand is the constant coefficient.
  if (operand_1->GetType() == SENode::Constant)
    constant = operand_1;
  else if (operand_2->GetType() == SENode::Constant)
    constant = operand_2;

  // If the expression is not a variable multiplied by a constant coefficient,
  // exit out.
  if (!(value_unknown && constant)) {
    return false;
  }

  int64_t sign = negation ? -1 : 1;

  auto iterator = accumulators_.find(value_unknown);
  int64_t new_value = constant->AsSEConstantNode()->FoldToSingleValue() * sign;
  // Add the result of the multiplication to the accumulators.
  if (iterator != accumulators_.end()) {
    (*iterator).second += new_value;
  } else {
    accumulators_.insert({value_unknown, new_value});
  }

  return true;
}

SENode* SENodeSimplifyImpl::Simplify() {
  // We only handle graphs with an addition, multiplication, or negation, at the
  // root.
  if (node_->GetType() != SENode::Add && node_->GetType() != SENode::Multiply &&
      node_->GetType() != SENode::Negative)
    return node_;

  SENode* simplified_polynomial = SimplifyPolynomial();

  SERecurrentNode* recurrent_expr = nullptr;
  node_ = simplified_polynomial;

  // Fold recurrent expressions which are with respect to the same loop into a
  // single recurrent expression.
  simplified_polynomial = FoldRecurrentAddExpressions(simplified_polynomial);

  simplified_polynomial =
      EliminateZeroCoefficientRecurrents(simplified_polynomial);

  // Traverse the immediate children of the new node to find the recurrent
  // expression. If there is more than one there is nothing further we can do.
  for (SENode* child : simplified_polynomial->GetChildren()) {
    if (child->GetType() == SENode::RecurrentAddExpr) {
      recurrent_expr = child->AsSERecurrentNode();
    }
  }

  // We need to count the number of unique recurrent expressions in the DAG to
  // ensure there is only one.
  for (auto child_iterator = simplified_polynomial->graph_begin();
       child_iterator != simplified_polynomial->graph_end(); ++child_iterator) {
    if (child_iterator->GetType() == SENode::RecurrentAddExpr &&
        recurrent_expr != child_iterator->AsSERecurrentNode()) {
      return simplified_polynomial;
    }
  }

  if (recurrent_expr) {
    return SimplifyRecurrentAddExpression(recurrent_expr);
  }

  return simplified_polynomial;
}

// Traverse the graph to build up the accumulator objects.
void SENodeSimplifyImpl::GatherAccumulatorsFromChildNodes(SENode* new_node,
                                                          SENode* child,
                                                          bool negation) {
  int32_t sign = negation ? -1 : 1;

  if (child->GetType() == SENode::Constant) {
    // Collect all the constants and add them together.
    constant_accumulator_ +=
        child->AsSEConstantNode()->FoldToSingleValue() * sign;

  } else if (child->GetType() == SENode::ValueUnknown ||
             child->GetType() == SENode::RecurrentAddExpr) {
    // To rebuild the graph of X+X+X*2 into 4*X we count the occurrences of X
    // and create a new node of count*X after. X can either be a ValueUnknown or
    // a RecurrentAddExpr. The count for each X is stored in the accumulators_
    // map.

    auto iterator = accumulators_.find(child);
    // If we've encountered this term before add to the accumulator for it.
    if (iterator == accumulators_.end())
      accumulators_.insert({child, sign});
    else
      iterator->second += sign;

  } else if (child->GetType() == SENode::Multiply) {
    if (!AccumulatorsFromMultiply(child, negation)) {
      new_node->AddChild(child);
    }

  } else if (child->GetType() == SENode::Add) {
    for (SENode* next_child : *child) {
      GatherAccumulatorsFromChildNodes(new_node, next_child, negation);
    }

  } else if (child->GetType() == SENode::Negative) {
    SENode* negated_node = child->GetChild(0);
    GatherAccumulatorsFromChildNodes(new_node, negated_node, !negation);
  } else {
    // If we can't work out how to fold the expression just add it back into
    // the graph.
    new_node->AddChild(child);
  }
}

SERecurrentNode* SENodeSimplifyImpl::UpdateCoefficient(
    SERecurrentNode* recurrent, int64_t coefficient_update) const {
  std::unique_ptr<SERecurrentNode> new_recurrent_node{new SERecurrentNode(
      recurrent->GetParentAnalysis(), recurrent->GetLoop())};

  SENode* new_coefficient = analysis_.CreateMultiplyNode(
      recurrent->GetCoefficient(),
      analysis_.CreateConstant(coefficient_update));

  // See if the node can be simplified.
  SENode* simplified = analysis_.SimplifyExpression(new_coefficient);
  if (simplified->GetType() != SENode::CanNotCompute)
    new_coefficient = simplified;

  if (coefficient_update < 0) {
    new_recurrent_node->AddOffset(
        analysis_.CreateNegation(recurrent->GetOffset()));
  } else {
    new_recurrent_node->AddOffset(recurrent->GetOffset());
  }

  new_recurrent_node->AddCoefficient(new_coefficient);

  return analysis_.GetCachedOrAdd(std::move(new_recurrent_node))
      ->AsSERecurrentNode();
}

// Simplify all the terms in the polynomial function.
SENode* SENodeSimplifyImpl::SimplifyPolynomial() {
  std::unique_ptr<SENode> new_add{new SEAddNode(node_->GetParentAnalysis())};

  // Traverse the graph and gather the accumulators from it.
  GatherAccumulatorsFromChildNodes(new_add.get(), node_, false);

  // Fold all the constants into a single constant node.
  if (constant_accumulator_ != 0) {
    new_add->AddChild(analysis_.CreateConstant(constant_accumulator_));
  }

  for (auto& pair : accumulators_) {
    SENode* term = pair.first;
    int64_t count = pair.second;

    // We can eliminate the term completely.
    if (count == 0) continue;

    if (count == 1) {
      new_add->AddChild(term);
    } else if (count == -1 && term->GetType() != SENode::RecurrentAddExpr) {
      // If the count is -1 we can just add a negative version of that node,
      // unless it is a recurrent expression as we would rather the negative
      // goes on the recurrent expressions children. This makes it easier to
      // work with in other places.
      new_add->AddChild(analysis_.CreateNegation(term));
    } else {
      // Output value unknown terms as count*term and output recurrent
      // expression terms as rec(offset, coefficient + count) offset and
      // coefficient are the same as in the original expression.
      if (term->GetType() == SENode::ValueUnknown) {
        SENode* count_as_constant = analysis_.CreateConstant(count);
        new_add->AddChild(
            analysis_.CreateMultiplyNode(count_as_constant, term));
      } else {
        assert(term->GetType() == SENode::RecurrentAddExpr &&
               "We only handle value unknowns or recurrent expressions");

        // Create a new recurrent expression by adding the count to the
        // coefficient of the old one.
        new_add->AddChild(UpdateCoefficient(term->AsSERecurrentNode(), count));
      }
    }
  }

  // If there is only one term in the addition left just return that term.
  if (new_add->GetChildren().size() == 1) {
    return new_add->GetChild(0);
  }

  // If there are no terms left in the addition just return 0.
  if (new_add->GetChildren().size() == 0) {
    return analysis_.CreateConstant(0);
  }

  return analysis_.GetCachedOrAdd(std::move(new_add));
}

SENode* SENodeSimplifyImpl::FoldRecurrentAddExpressions(SENode* root) {
  std::unique_ptr<SEAddNode> new_node{new SEAddNode(&analysis_)};

  // A mapping of loops to the list of recurrent expressions which are with
  // respect to those loops.
  std::map<const Loop*, std::vector<std::pair<SERecurrentNode*, bool>>>
      loops_to_recurrent{};

  bool has_multiple_same_loop_recurrent_terms = false;

  for (SENode* child : *root) {
    bool negation = false;

    if (child->GetType() == SENode::Negative) {
      child = child->GetChild(0);
      negation = true;
    }

    if (child->GetType() == SENode::RecurrentAddExpr) {
      const Loop* loop = child->AsSERecurrentNode()->GetLoop();

      SERecurrentNode* rec = child->AsSERecurrentNode();
      if (loops_to_recurrent.find(loop) == loops_to_recurrent.end()) {
        loops_to_recurrent[loop] = {std::make_pair(rec, negation)};
      } else {
        loops_to_recurrent[loop].push_back(std::make_pair(rec, negation));
        has_multiple_same_loop_recurrent_terms = true;
      }
    } else {
      new_node->AddChild(child);
    }
  }

  if (!has_multiple_same_loop_recurrent_terms) return root;

  for (auto pair : loops_to_recurrent) {
    std::vector<std::pair<SERecurrentNode*, bool>>& recurrent_expressions =
        pair.second;
    const Loop* loop = pair.first;

    std::unique_ptr<SENode> new_coefficient{new SEAddNode(&analysis_)};
    std::unique_ptr<SENode> new_offset{new SEAddNode(&analysis_)};

    for (auto node_pair : recurrent_expressions) {
      SERecurrentNode* node = node_pair.first;
      bool negative = node_pair.second;

      if (!negative) {
        new_coefficient->AddChild(node->GetCoefficient());
        new_offset->AddChild(node->GetOffset());
      } else {
        new_coefficient->AddChild(
            analysis_.CreateNegation(node->GetCoefficient()));
        new_offset->AddChild(analysis_.CreateNegation(node->GetOffset()));
      }
    }

    std::unique_ptr<SERecurrentNode> new_recurrent{
        new SERecurrentNode(&analysis_, loop)};

    SENode* new_coefficient_simplified =
        analysis_.SimplifyExpression(new_coefficient.get());

    SENode* new_offset_simplified =
        analysis_.SimplifyExpression(new_offset.get());

    if (new_coefficient_simplified->GetType() == SENode::Constant &&
        new_coefficient_simplified->AsSEConstantNode()->FoldToSingleValue() ==
            0) {
      return new_offset_simplified;
    }

    new_recurrent->AddCoefficient(new_coefficient_simplified);
    new_recurrent->AddOffset(new_offset_simplified);

    new_node->AddChild(analysis_.GetCachedOrAdd(std::move(new_recurrent)));
  }

  // If we only have one child in the add just return that.
  if (new_node->GetChildren().size() == 1) {
    return new_node->GetChild(0);
  }

  return analysis_.GetCachedOrAdd(std::move(new_node));
}

SENode* SENodeSimplifyImpl::EliminateZeroCoefficientRecurrents(SENode* node) {
  if (node->GetType() != SENode::Add) return node;

  bool has_change = false;

  std::vector<SENode*> new_children{};
  for (SENode* child : *node) {
    if (child->GetType() == SENode::RecurrentAddExpr) {
      SENode* coefficient = child->AsSERecurrentNode()->GetCoefficient();
      // If coefficient is zero then we can eliminate the recurrent expression
      // entirely and just return the offset as the recurrent expression is
      // representing the equation coefficient*iterations + offset.
      if (coefficient->GetType() == SENode::Constant &&
          coefficient->AsSEConstantNode()->FoldToSingleValue() == 0) {
        new_children.push_back(child->AsSERecurrentNode()->GetOffset());
        has_change = true;
      } else {
        new_children.push_back(child);
      }
    } else {
      new_children.push_back(child);
    }
  }

  if (!has_change) return node;

  std::unique_ptr<SENode> new_add{new SEAddNode(node_->GetParentAnalysis())};

  for (SENode* child : new_children) {
    new_add->AddChild(child);
  }

  return analysis_.GetCachedOrAdd(std::move(new_add));
}

SENode* SENodeSimplifyImpl::SimplifyRecurrentAddExpression(
    SERecurrentNode* recurrent_expr) {
  const std::vector<SENode*>& children = node_->GetChildren();

  std::unique_ptr<SERecurrentNode> recurrent_node{new SERecurrentNode(
      recurrent_expr->GetParentAnalysis(), recurrent_expr->GetLoop())};

  // Create and simplify the new offset node.
  std::unique_ptr<SENode> new_offset{
      new SEAddNode(recurrent_expr->GetParentAnalysis())};
  new_offset->AddChild(recurrent_expr->GetOffset());

  for (SENode* child : children) {
    if (child->GetType() != SENode::RecurrentAddExpr) {
      new_offset->AddChild(child);
    }
  }

  // Simplify the new offset.
  SENode* simplified_child = analysis_.SimplifyExpression(new_offset.get());

  // If the child can be simplified, add the simplified form otherwise, add it
  // via the usual caching mechanism.
  if (simplified_child->GetType() != SENode::CanNotCompute) {
    recurrent_node->AddOffset(simplified_child);
  } else {
    recurrent_expr->AddOffset(analysis_.GetCachedOrAdd(std::move(new_offset)));
  }

  recurrent_node->AddCoefficient(recurrent_expr->GetCoefficient());

  return analysis_.GetCachedOrAdd(std::move(recurrent_node));
}

/*
 * Scalar Analysis simplification public methods.
 */

SENode* ScalarEvolutionAnalysis::SimplifyExpression(SENode* node) {
  SENodeSimplifyImpl impl{this, node};

  return impl.Simplify();
}

}  // namespace opt
}  // namespace spvtools

Coverage Report

Created: 2025-11-16 06:14

Line	Count	Source
1		// Copyright (c) 2018 Google LLC.
2		//
3		// Licensed under the Apache License, Version 2.0 (the "License");
4		// you may not use this file except in compliance with the License.
5		// You may obtain a copy of the License at
6		//
7		// http://www.apache.org/licenses/LICENSE-2.0
8		//
9		// Unless required by applicable law or agreed to in writing, software
10		// distributed under the License is distributed on an "AS IS" BASIS,
11		// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12		// See the License for the specific language governing permissions and
13		// limitations under the License.
14
15		#include <functional>
16		#include <map>
17		#include <memory>
18		#include <set>
19		#include <utility>
20		#include <vector>
21
22		#include "source/opt/scalar_analysis.h"
23
24		// Simplifies scalar analysis DAGs.
25		//
26		// 1. Given a node passed to SimplifyExpression we first simplify the graph by
27		// calling SimplifyPolynomial. This groups like nodes following basic arithmetic
28		// rules, so multiple adds of the same load instruction could be grouped into a
29		// single multiply of that instruction. SimplifyPolynomial will traverse the DAG
30		// and build up an accumulator buffer for each class of instruction it finds.
31		// For example take the loop:
32		// for (i=0, i<N; i++) { i+B+23+4+B+C; }
33		// In this example the expression "i+B+23+4+B+C" has four classes of
34		// instruction, induction variable i, the two value unknowns B and C, and the
35		// constants. The accumulator buffer is then used to rebuild the graph using
36		// the accumulation of each type. This example would then be folded into
37		// i+2*B+C+27.
38		//
39		// This new graph contains a single add node (or if only one type found then
40		// just that node) with each of the like terms (or multiplication node) as a
41		// child.
42		//
43		// 2. FoldRecurrentAddExpressions is then called on this new DAG. This will take
44		// RecurrentAddExpressions which are with respect to the same loop and fold them
45		// into a single new RecurrentAddExpression with respect to that same loop. An
46		// expression can have multiple RecurrentAddExpression's with respect to
47		// different loops in the case of nested loops. These expressions cannot be
48		// folded further. For example:
49		//
50		// for (i=0; i<N;i++) for(j=0,k=1; j<N;++j,++k)
51		//
52		// The 'j' and 'k' are RecurrentAddExpression with respect to the second loop
53		// and 'i' to the first. If 'j' and 'k' are used in an expression together then
54		// they will be folded into a new RecurrentAddExpression with respect to the
55		// second loop in that expression.
56		//
57		//
58		// 3. If the DAG now only contains a single RecurrentAddExpression we can now
59		// perform a final optimization SimplifyRecurrentAddExpression. This will
60		// transform the entire DAG into a RecurrentAddExpression. Additions to the
61		// RecurrentAddExpression are added to the offset field and multiplications to
62		// the coefficient.
63		//
64
65		namespace spvtools {
66		namespace opt {
67
68		// Implementation of the functions which are used to simplify the graph. Graphs
69		// of unknowns, multiplies, additions, and constants can be turned into a linear
70		// add node with each term as a child. For instance a large graph built from, X
71		// + X2 + Y - Y3 + 4 - 1, would become a single add expression with the
72		// children X3, -Y2, and the constant 3. Graphs containing a recurrent
73		// expression will be simplified to represent the entire graph around a single
74		// recurrent expression. So for an induction variable (i=0, i++) if you add 1 to
75		// i in an expression we can rewrite the graph of that expression to be a single
76		// recurrent expression of (i=1,i++).
77		class SENodeSimplifyImpl {
78		public:
79		SENodeSimplifyImpl(ScalarEvolutionAnalysis* analysis,
80		SENode* node_to_simplify)
81	0	: analysis_(*analysis),
82	0	node_(node_to_simplify),
83	0	constant_accumulator_(0) {}
84
85		// Return the result of the simplification.
86		SENode* Simplify();
87
88		private:
89		// Recursively descend through the graph to build up the accumulator objects
90		// which are used to flatten the graph. \|child\| is the node currently being
91		// traversed and the \|negation\| flag is used to signify that this operation
92		// was preceded by a unary negative operation and as such the result should be
93		// negated.
94		void GatherAccumulatorsFromChildNodes(SENode* new_node, SENode* child,
95		bool negation);
96
97		// Given a \|multiply\| node add to the accumulators for the term type within
98		// the \|multiply\| expression. Will return true if the accumulators could be
99		// calculated successfully. If the \|multiply\| is in any form other than
100		// unknown*constant then we return false. \|negation\| signifies that the
101		// operation was preceded by a unary negative.
102		bool AccumulatorsFromMultiply(SENode* multiply, bool negation);
103
104		SERecurrentNode* UpdateCoefficient(SERecurrentNode* recurrent,
105		int64_t coefficient_update) const;
106
107		// If the graph contains a recurrent expression, ie, an expression with the
108		// loop iterations as a term in the expression, then the whole expression
109		// can be rewritten to be a recurrent expression.
110		SENode* SimplifyRecurrentAddExpression(SERecurrentNode* node);
111
112		// Simplify the whole graph by linking like terms together in a single flat
113		// add node. So X2 + Y -Y + 3 +6 would become X2 + 9. Where X and Y are a
114		// ValueUnknown node (i.e, a load) or a recurrent expression.
115		SENode* SimplifyPolynomial();
116
117		// Each recurrent expression is an expression with respect to a specific loop.
118		// If we have two different recurrent terms with respect to the same loop in a
119		// single expression then we can fold those terms into a single new term.
120		// For instance:
121		//
122		// induction i = 0, i++
123		// temp = i*10
124		// array[i+temp]
125		//
126		// We can fold the i + temp into a single expression. Rec(0,1) + Rec(0,10) can
127		// become Rec(0,11).
128		SENode* FoldRecurrentAddExpressions(SENode*);
129
130		// We can eliminate recurrent expressions which have a coefficient of zero by
131		// replacing them with their offset value. We are able to do this because a
132		// recurrent expression represents the equation coefficient*iterations +
133		// offset.
134		SENode* EliminateZeroCoefficientRecurrents(SENode* node);
135
136		// A reference the analysis which requested the simplification.
137		ScalarEvolutionAnalysis& analysis_;
138
139		// The node being simplified.
140		SENode* node_;
141
142		// An accumulator of the net result of all the constant operations performed
143		// in a graph.
144		int64_t constant_accumulator_;
145
146		// An accumulator for each of the non constant terms in the graph.
147		std::map<SENode*, int64_t> accumulators_;
148		};
149
150		// From a \|multiply\| build up the accumulator objects.
151		bool SENodeSimplifyImpl::AccumulatorsFromMultiply(SENode* multiply,
152	0	bool negation) {
153	0	if (multiply->GetChildren().size() != 2 \|\|
154	0	multiply->GetType() != SENode::Multiply)
155	0	return false;
156
157	0	SENode* operand_1 = multiply->GetChild(0);
158	0	SENode* operand_2 = multiply->GetChild(1);
159
160	0	SENode* value_unknown = nullptr;
161	0	SENode* constant = nullptr;
162
163		// Work out which operand is the unknown value.
164	0	if (operand_1->GetType() == SENode::ValueUnknown \|\|
165	0	operand_1->GetType() == SENode::RecurrentAddExpr)
166	0	value_unknown = operand_1;
167	0	else if (operand_2->GetType() == SENode::ValueUnknown \|\|
168	0	operand_2->GetType() == SENode::RecurrentAddExpr)
169	0	value_unknown = operand_2;
170
171		// Work out which operand is the constant coefficient.
172	0	if (operand_1->GetType() == SENode::Constant)
173	0	constant = operand_1;
174	0	else if (operand_2->GetType() == SENode::Constant)
175	0	constant = operand_2;
176
177		// If the expression is not a variable multiplied by a constant coefficient,
178		// exit out.
179	0	if (!(value_unknown && constant)) {
180	0	return false;
181	0	}
182
183	0	int64_t sign = negation ? -1 : 1;
184
185	0	auto iterator = accumulators_.find(value_unknown);
186	0	int64_t new_value = constant->AsSEConstantNode()->FoldToSingleValue() * sign;
187		// Add the result of the multiplication to the accumulators.
188	0	if (iterator != accumulators_.end()) {
189	0	(*iterator).second += new_value;
190	0	} else {
191	0	accumulators_.insert({value_unknown, new_value});
192	0	}
193
194	0	return true;
195	0	}
196
197	0	SENode* SENodeSimplifyImpl::Simplify() {
198		// We only handle graphs with an addition, multiplication, or negation, at the
199		// root.
200	0	if (node_->GetType() != SENode::Add && node_->GetType() != SENode::Multiply &&
201	0	node_->GetType() != SENode::Negative)
202	0	return node_;
203
204	0	SENode* simplified_polynomial = SimplifyPolynomial();
205
206	0	SERecurrentNode* recurrent_expr = nullptr;
207	0	node_ = simplified_polynomial;
208
209		// Fold recurrent expressions which are with respect to the same loop into a
210		// single recurrent expression.
211	0	simplified_polynomial = FoldRecurrentAddExpressions(simplified_polynomial);
212
213	0	simplified_polynomial =
214	0	EliminateZeroCoefficientRecurrents(simplified_polynomial);
215
216		// Traverse the immediate children of the new node to find the recurrent
217		// expression. If there is more than one there is nothing further we can do.
218	0	for (SENode* child : simplified_polynomial->GetChildren()) {
219	0	if (child->GetType() == SENode::RecurrentAddExpr) {
220	0	recurrent_expr = child->AsSERecurrentNode();
221	0	}
222	0	}
223
224		// We need to count the number of unique recurrent expressions in the DAG to
225		// ensure there is only one.
226	0	for (auto child_iterator = simplified_polynomial->graph_begin();
227	0	child_iterator != simplified_polynomial->graph_end(); ++child_iterator) {
228	0	if (child_iterator->GetType() == SENode::RecurrentAddExpr &&
229	0	recurrent_expr != child_iterator->AsSERecurrentNode()) {
230	0	return simplified_polynomial;
231	0	}
232	0	}
233
234	0	if (recurrent_expr) {
235	0	return SimplifyRecurrentAddExpression(recurrent_expr);
236	0	}
237
238	0	return simplified_polynomial;
239	0	}
240
241		// Traverse the graph to build up the accumulator objects.
242		void SENodeSimplifyImpl::GatherAccumulatorsFromChildNodes(SENode* new_node,
243		SENode* child,
244	0	bool negation) {
245	0	int32_t sign = negation ? -1 : 1;
246
247	0	if (child->GetType() == SENode::Constant) {
248		// Collect all the constants and add them together.
249	0	constant_accumulator_ +=
250	0	child->AsSEConstantNode()->FoldToSingleValue() * sign;
251
252	0	} else if (child->GetType() == SENode::ValueUnknown \|\|
253	0	child->GetType() == SENode::RecurrentAddExpr) {
254		// To rebuild the graph of X+X+X2 into 4X we count the occurrences of X
255		// and create a new node of count*X after. X can either be a ValueUnknown or
256		// a RecurrentAddExpr. The count for each X is stored in the accumulators_
257		// map.
258
259	0	auto iterator = accumulators_.find(child);
260		// If we've encountered this term before add to the accumulator for it.
261	0	if (iterator == accumulators_.end())
262	0	accumulators_.insert({child, sign});
263	0	else
264	0	iterator->second += sign;
265
266	0	} else if (child->GetType() == SENode::Multiply) {
267	0	if (!AccumulatorsFromMultiply(child, negation)) {
268	0	new_node->AddChild(child);
269	0	}
270
271	0	} else if (child->GetType() == SENode::Add) {
272	0	for (SENode* next_child : *child) {
273	0	GatherAccumulatorsFromChildNodes(new_node, next_child, negation);
274	0	}
275
276	0	} else if (child->GetType() == SENode::Negative) {
277	0	SENode* negated_node = child->GetChild(0);
278	0	GatherAccumulatorsFromChildNodes(new_node, negated_node, !negation);
279	0	} else {
280		// If we can't work out how to fold the expression just add it back into
281		// the graph.
282	0	new_node->AddChild(child);
283	0	}
284	0	}
285
286		SERecurrentNode* SENodeSimplifyImpl::UpdateCoefficient(
287	0	SERecurrentNode* recurrent, int64_t coefficient_update) const {
288	0	std::unique_ptr<SERecurrentNode> new_recurrent_node{new SERecurrentNode(
289	0	recurrent->GetParentAnalysis(), recurrent->GetLoop())};
290
291	0	SENode* new_coefficient = analysis_.CreateMultiplyNode(
292	0	recurrent->GetCoefficient(),
293	0	analysis_.CreateConstant(coefficient_update));
294
295		// See if the node can be simplified.
296	0	SENode* simplified = analysis_.SimplifyExpression(new_coefficient);
297	0	if (simplified->GetType() != SENode::CanNotCompute)
298	0	new_coefficient = simplified;
299
300	0	if (coefficient_update < 0) {
301	0	new_recurrent_node->AddOffset(
302	0	analysis_.CreateNegation(recurrent->GetOffset()));
303	0	} else {
304	0	new_recurrent_node->AddOffset(recurrent->GetOffset());
305	0	}
306
307	0	new_recurrent_node->AddCoefficient(new_coefficient);
308
309	0	return analysis_.GetCachedOrAdd(std::move(new_recurrent_node))
310	0	->AsSERecurrentNode();
311	0	}
312
313		// Simplify all the terms in the polynomial function.
314	0	SENode* SENodeSimplifyImpl::SimplifyPolynomial() {
315	0	std::unique_ptr<SENode> new_add{new SEAddNode(node_->GetParentAnalysis())};
316
317		// Traverse the graph and gather the accumulators from it.
318	0	GatherAccumulatorsFromChildNodes(new_add.get(), node_, false);
319
320		// Fold all the constants into a single constant node.
321	0	if (constant_accumulator_ != 0) {
322	0	new_add->AddChild(analysis_.CreateConstant(constant_accumulator_));
323	0	}
324
325	0	for (auto& pair : accumulators_) {
326	0	SENode* term = pair.first;
327	0	int64_t count = pair.second;
328
329		// We can eliminate the term completely.
330	0	if (count == 0) continue;
331
332	0	if (count == 1) {
333	0	new_add->AddChild(term);
334	0	} else if (count == -1 && term->GetType() != SENode::RecurrentAddExpr) {
335		// If the count is -1 we can just add a negative version of that node,
336		// unless it is a recurrent expression as we would rather the negative
337		// goes on the recurrent expressions children. This makes it easier to
338		// work with in other places.
339	0	new_add->AddChild(analysis_.CreateNegation(term));
340	0	} else {
341		// Output value unknown terms as count*term and output recurrent
342		// expression terms as rec(offset, coefficient + count) offset and
343		// coefficient are the same as in the original expression.
344	0	if (term->GetType() == SENode::ValueUnknown) {
345	0	SENode* count_as_constant = analysis_.CreateConstant(count);
346	0	new_add->AddChild(
347	0	analysis_.CreateMultiplyNode(count_as_constant, term));
348	0	} else {
349	0	assert(term->GetType() == SENode::RecurrentAddExpr &&
350	0	"We only handle value unknowns or recurrent expressions");
351
352		// Create a new recurrent expression by adding the count to the
353		// coefficient of the old one.
354	0	new_add->AddChild(UpdateCoefficient(term->AsSERecurrentNode(), count));
355	0	}
356	0	}
357	0	}
358
359		// If there is only one term in the addition left just return that term.
360	0	if (new_add->GetChildren().size() == 1) {
361	0	return new_add->GetChild(0);
362	0	}
363
364		// If there are no terms left in the addition just return 0.
365	0	if (new_add->GetChildren().size() == 0) {
366	0	return analysis_.CreateConstant(0);
367	0	}
368
369	0	return analysis_.GetCachedOrAdd(std::move(new_add));
370	0	}
371
372	0	SENode* SENodeSimplifyImpl::FoldRecurrentAddExpressions(SENode* root) {
373	0	std::unique_ptr<SEAddNode> new_node{new SEAddNode(&analysis_)};
374
375		// A mapping of loops to the list of recurrent expressions which are with
376		// respect to those loops.
377	0	std::map<const Loop, std::vector<std::pair<SERecurrentNode, bool>>>
378	0	loops_to_recurrent{};
379
380	0	bool has_multiple_same_loop_recurrent_terms = false;
381
382	0	for (SENode* child : *root) {
383	0	bool negation = false;
384
385	0	if (child->GetType() == SENode::Negative) {
386	0	child = child->GetChild(0);
387	0	negation = true;
388	0	}
389
390	0	if (child->GetType() == SENode::RecurrentAddExpr) {
391	0	const Loop* loop = child->AsSERecurrentNode()->GetLoop();
392
393	0	SERecurrentNode* rec = child->AsSERecurrentNode();
394	0	if (loops_to_recurrent.find(loop) == loops_to_recurrent.end()) {
395	0	loops_to_recurrent[loop] = {std::make_pair(rec, negation)};
396	0	} else {
397	0	loops_to_recurrent[loop].push_back(std::make_pair(rec, negation));
398	0	has_multiple_same_loop_recurrent_terms = true;
399	0	}
400	0	} else {
401	0	new_node->AddChild(child);
402	0	}
403	0	}
404
405	0	if (!has_multiple_same_loop_recurrent_terms) return root;
406
407	0	for (auto pair : loops_to_recurrent) {
408	0	std::vector<std::pair<SERecurrentNode*, bool>>& recurrent_expressions =
409	0	pair.second;
410	0	const Loop* loop = pair.first;
411
412	0	std::unique_ptr<SENode> new_coefficient{new SEAddNode(&analysis_)};
413	0	std::unique_ptr<SENode> new_offset{new SEAddNode(&analysis_)};
414
415	0	for (auto node_pair : recurrent_expressions) {
416	0	SERecurrentNode* node = node_pair.first;
417	0	bool negative = node_pair.second;
418
419	0	if (!negative) {
420	0	new_coefficient->AddChild(node->GetCoefficient());
421	0	new_offset->AddChild(node->GetOffset());
422	0	} else {
423	0	new_coefficient->AddChild(
424	0	analysis_.CreateNegation(node->GetCoefficient()));
425	0	new_offset->AddChild(analysis_.CreateNegation(node->GetOffset()));
426	0	}
427	0	}
428
429	0	std::unique_ptr<SERecurrentNode> new_recurrent{
430	0	new SERecurrentNode(&analysis_, loop)};
431
432	0	SENode* new_coefficient_simplified =
433	0	analysis_.SimplifyExpression(new_coefficient.get());
434
435	0	SENode* new_offset_simplified =
436	0	analysis_.SimplifyExpression(new_offset.get());
437
438	0	if (new_coefficient_simplified->GetType() == SENode::Constant &&
439	0	new_coefficient_simplified->AsSEConstantNode()->FoldToSingleValue() ==
440	0	0) {
441	0	return new_offset_simplified;
442	0	}
443
444	0	new_recurrent->AddCoefficient(new_coefficient_simplified);
445	0	new_recurrent->AddOffset(new_offset_simplified);
446
447	0	new_node->AddChild(analysis_.GetCachedOrAdd(std::move(new_recurrent)));
448	0	}
449
450		// If we only have one child in the add just return that.
451	0	if (new_node->GetChildren().size() == 1) {
452	0	return new_node->GetChild(0);
453	0	}
454
455	0	return analysis_.GetCachedOrAdd(std::move(new_node));
456	0	}
457
458	0	SENode* SENodeSimplifyImpl::EliminateZeroCoefficientRecurrents(SENode* node) {
459	0	if (node->GetType() != SENode::Add) return node;
460
461	0	bool has_change = false;
462
463	0	std::vector<SENode*> new_children{};
464	0	for (SENode* child : *node) {
465	0	if (child->GetType() == SENode::RecurrentAddExpr) {
466	0	SENode* coefficient = child->AsSERecurrentNode()->GetCoefficient();
467		// If coefficient is zero then we can eliminate the recurrent expression
468		// entirely and just return the offset as the recurrent expression is
469		// representing the equation coefficient*iterations + offset.
470	0	if (coefficient->GetType() == SENode::Constant &&
471	0	coefficient->AsSEConstantNode()->FoldToSingleValue() == 0) {
472	0	new_children.push_back(child->AsSERecurrentNode()->GetOffset());
473	0	has_change = true;
474	0	} else {
475	0	new_children.push_back(child);
476	0	}
477	0	} else {
478	0	new_children.push_back(child);
479	0	}
480	0	}
481
482	0	if (!has_change) return node;
483
484	0	std::unique_ptr<SENode> new_add{new SEAddNode(node_->GetParentAnalysis())};
485
486	0	for (SENode* child : new_children) {
487	0	new_add->AddChild(child);
488	0	}
489
490	0	return analysis_.GetCachedOrAdd(std::move(new_add));
491	0	}
492
493		SENode* SENodeSimplifyImpl::SimplifyRecurrentAddExpression(
494	0	SERecurrentNode* recurrent_expr) {
495	0	const std::vector<SENode*>& children = node_->GetChildren();
496
497	0	std::unique_ptr<SERecurrentNode> recurrent_node{new SERecurrentNode(
498	0	recurrent_expr->GetParentAnalysis(), recurrent_expr->GetLoop())};
499
500		// Create and simplify the new offset node.
501	0	std::unique_ptr<SENode> new_offset{
502	0	new SEAddNode(recurrent_expr->GetParentAnalysis())};
503	0	new_offset->AddChild(recurrent_expr->GetOffset());
504
505	0	for (SENode* child : children) {
506	0	if (child->GetType() != SENode::RecurrentAddExpr) {
507	0	new_offset->AddChild(child);
508	0	}
509	0	}
510
511		// Simplify the new offset.
512	0	SENode* simplified_child = analysis_.SimplifyExpression(new_offset.get());
513
514		// If the child can be simplified, add the simplified form otherwise, add it
515		// via the usual caching mechanism.
516	0	if (simplified_child->GetType() != SENode::CanNotCompute) {
517	0	recurrent_node->AddOffset(simplified_child);
518	0	} else {
519	0	recurrent_expr->AddOffset(analysis_.GetCachedOrAdd(std::move(new_offset)));
520	0	}
521
522	0	recurrent_node->AddCoefficient(recurrent_expr->GetCoefficient());
523
524	0	return analysis_.GetCachedOrAdd(std::move(recurrent_node));
525	0	}
526
527		/*
528		* Scalar Analysis simplification public methods.
529		*/
530
531	0	SENode* ScalarEvolutionAnalysis::SimplifyExpression(SENode* node) {
532	0	SENodeSimplifyImpl impl{this, node};
533
534	0	return impl.Simplify();
535	0	}
536
537		} // namespace opt
538		} // namespace spvtools