Coverage Report

Created: 2025-11-16 06:14

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/spirv-tools/source/opt/scalar_analysis_simplification.cpp
Line
Count
Source
1
// Copyright (c) 2018 Google LLC.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//     http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
15
#include <functional>
16
#include <map>
17
#include <memory>
18
#include <set>
19
#include <utility>
20
#include <vector>
21
22
#include "source/opt/scalar_analysis.h"
23
24
// Simplifies scalar analysis DAGs.
25
//
26
// 1. Given a node passed to SimplifyExpression we first simplify the graph by
27
// calling SimplifyPolynomial. This groups like nodes following basic arithmetic
28
// rules, so multiple adds of the same load instruction could be grouped into a
29
// single multiply of that instruction. SimplifyPolynomial will traverse the DAG
30
// and build up an accumulator buffer for each class of instruction it finds.
31
// For example take the loop:
32
// for (i=0, i<N; i++) { i+B+23+4+B+C; }
33
// In this example the expression "i+B+23+4+B+C" has four classes of
34
// instruction, induction variable i, the two value unknowns B and C, and the
35
// constants. The accumulator buffer is then used to rebuild the graph using
36
// the accumulation of each type. This example would then be folded into
37
// i+2*B+C+27.
38
//
39
// This new graph contains a single add node (or if only one type found then
40
// just that node) with each of the like terms (or multiplication node) as a
41
// child.
42
//
43
// 2. FoldRecurrentAddExpressions is then called on this new DAG. This will take
44
// RecurrentAddExpressions which are with respect to the same loop and fold them
45
// into a single new RecurrentAddExpression with respect to that same loop. An
46
// expression can have multiple RecurrentAddExpression's with respect to
47
// different loops in the case of nested loops. These expressions cannot be
48
// folded further. For example:
49
//
50
// for (i=0; i<N;i++) for(j=0,k=1; j<N;++j,++k)
51
//
52
// The 'j' and 'k' are RecurrentAddExpression with respect to the second loop
53
// and 'i' to the first. If 'j' and 'k' are used in an expression together then
54
// they will be folded into a new RecurrentAddExpression with respect to the
55
// second loop in that expression.
56
//
57
//
58
// 3. If the DAG now only contains a single RecurrentAddExpression we can now
59
// perform a final optimization SimplifyRecurrentAddExpression. This will
60
// transform the entire DAG into a RecurrentAddExpression. Additions to the
61
// RecurrentAddExpression are added to the offset field and multiplications to
62
// the coefficient.
63
//
64
65
namespace spvtools {
66
namespace opt {
67
68
// Implementation of the functions which are used to simplify the graph. Graphs
69
// of unknowns, multiplies, additions, and constants can be turned into a linear
70
// add node with each term as a child. For instance a large graph built from, X
71
// + X*2 + Y - Y*3 + 4 - 1, would become a single add expression with the
72
// children X*3, -Y*2, and the constant 3. Graphs containing a recurrent
73
// expression will be simplified to represent the entire graph around a single
74
// recurrent expression. So for an induction variable (i=0, i++) if you add 1 to
75
// i in an expression we can rewrite the graph of that expression to be a single
76
// recurrent expression of (i=1,i++).
77
class SENodeSimplifyImpl {
78
 public:
79
  SENodeSimplifyImpl(ScalarEvolutionAnalysis* analysis,
80
                     SENode* node_to_simplify)
81
0
      : analysis_(*analysis),
82
0
        node_(node_to_simplify),
83
0
        constant_accumulator_(0) {}
84
85
  // Return the result of the simplification.
86
  SENode* Simplify();
87
88
 private:
89
  // Recursively descend through the graph to build up the accumulator objects
90
  // which are used to flatten the graph. |child| is the node currently being
91
  // traversed and the |negation| flag is used to signify that this operation
92
  // was preceded by a unary negative operation and as such the result should be
93
  // negated.
94
  void GatherAccumulatorsFromChildNodes(SENode* new_node, SENode* child,
95
                                        bool negation);
96
97
  // Given a |multiply| node add to the accumulators for the term type within
98
  // the |multiply| expression. Will return true if the accumulators could be
99
  // calculated successfully. If the |multiply| is in any form other than
100
  // unknown*constant then we return false. |negation| signifies that the
101
  // operation was preceded by a unary negative.
102
  bool AccumulatorsFromMultiply(SENode* multiply, bool negation);
103
104
  SERecurrentNode* UpdateCoefficient(SERecurrentNode* recurrent,
105
                                     int64_t coefficient_update) const;
106
107
  // If the graph contains a recurrent expression, ie, an expression with the
108
  // loop iterations as a term in the expression, then the whole expression
109
  // can be rewritten to be a recurrent expression.
110
  SENode* SimplifyRecurrentAddExpression(SERecurrentNode* node);
111
112
  // Simplify the whole graph by linking like terms together in a single flat
113
  // add node. So X*2 + Y -Y + 3 +6 would become X*2 + 9. Where X and Y are a
114
  // ValueUnknown node (i.e, a load) or a recurrent expression.
115
  SENode* SimplifyPolynomial();
116
117
  // Each recurrent expression is an expression with respect to a specific loop.
118
  // If we have two different recurrent terms with respect to the same loop in a
119
  // single expression then we can fold those terms into a single new term.
120
  // For instance:
121
  //
122
  // induction i = 0, i++
123
  // temp = i*10
124
  // array[i+temp]
125
  //
126
  // We can fold the i + temp into a single expression. Rec(0,1) + Rec(0,10) can
127
  // become Rec(0,11).
128
  SENode* FoldRecurrentAddExpressions(SENode*);
129
130
  // We can eliminate recurrent expressions which have a coefficient of zero by
131
  // replacing them with their offset value. We are able to do this because a
132
  // recurrent expression represents the equation coefficient*iterations +
133
  // offset.
134
  SENode* EliminateZeroCoefficientRecurrents(SENode* node);
135
136
  // A reference the analysis which requested the simplification.
137
  ScalarEvolutionAnalysis& analysis_;
138
139
  // The node being simplified.
140
  SENode* node_;
141
142
  // An accumulator of the net result of all the constant operations performed
143
  // in a graph.
144
  int64_t constant_accumulator_;
145
146
  // An accumulator for each of the non constant terms in the graph.
147
  std::map<SENode*, int64_t> accumulators_;
148
};
149
150
// From a |multiply| build up the accumulator objects.
151
bool SENodeSimplifyImpl::AccumulatorsFromMultiply(SENode* multiply,
152
0
                                                  bool negation) {
153
0
  if (multiply->GetChildren().size() != 2 ||
154
0
      multiply->GetType() != SENode::Multiply)
155
0
    return false;
156
157
0
  SENode* operand_1 = multiply->GetChild(0);
158
0
  SENode* operand_2 = multiply->GetChild(1);
159
160
0
  SENode* value_unknown = nullptr;
161
0
  SENode* constant = nullptr;
162
163
  // Work out which operand is the unknown value.
164
0
  if (operand_1->GetType() == SENode::ValueUnknown ||
165
0
      operand_1->GetType() == SENode::RecurrentAddExpr)
166
0
    value_unknown = operand_1;
167
0
  else if (operand_2->GetType() == SENode::ValueUnknown ||
168
0
           operand_2->GetType() == SENode::RecurrentAddExpr)
169
0
    value_unknown = operand_2;
170
171
  // Work out which operand is the constant coefficient.
172
0
  if (operand_1->GetType() == SENode::Constant)
173
0
    constant = operand_1;
174
0
  else if (operand_2->GetType() == SENode::Constant)
175
0
    constant = operand_2;
176
177
  // If the expression is not a variable multiplied by a constant coefficient,
178
  // exit out.
179
0
  if (!(value_unknown && constant)) {
180
0
    return false;
181
0
  }
182
183
0
  int64_t sign = negation ? -1 : 1;
184
185
0
  auto iterator = accumulators_.find(value_unknown);
186
0
  int64_t new_value = constant->AsSEConstantNode()->FoldToSingleValue() * sign;
187
  // Add the result of the multiplication to the accumulators.
188
0
  if (iterator != accumulators_.end()) {
189
0
    (*iterator).second += new_value;
190
0
  } else {
191
0
    accumulators_.insert({value_unknown, new_value});
192
0
  }
193
194
0
  return true;
195
0
}
196
197
0
SENode* SENodeSimplifyImpl::Simplify() {
198
  // We only handle graphs with an addition, multiplication, or negation, at the
199
  // root.
200
0
  if (node_->GetType() != SENode::Add && node_->GetType() != SENode::Multiply &&
201
0
      node_->GetType() != SENode::Negative)
202
0
    return node_;
203
204
0
  SENode* simplified_polynomial = SimplifyPolynomial();
205
206
0
  SERecurrentNode* recurrent_expr = nullptr;
207
0
  node_ = simplified_polynomial;
208
209
  // Fold recurrent expressions which are with respect to the same loop into a
210
  // single recurrent expression.
211
0
  simplified_polynomial = FoldRecurrentAddExpressions(simplified_polynomial);
212
213
0
  simplified_polynomial =
214
0
      EliminateZeroCoefficientRecurrents(simplified_polynomial);
215
216
  // Traverse the immediate children of the new node to find the recurrent
217
  // expression. If there is more than one there is nothing further we can do.
218
0
  for (SENode* child : simplified_polynomial->GetChildren()) {
219
0
    if (child->GetType() == SENode::RecurrentAddExpr) {
220
0
      recurrent_expr = child->AsSERecurrentNode();
221
0
    }
222
0
  }
223
224
  // We need to count the number of unique recurrent expressions in the DAG to
225
  // ensure there is only one.
226
0
  for (auto child_iterator = simplified_polynomial->graph_begin();
227
0
       child_iterator != simplified_polynomial->graph_end(); ++child_iterator) {
228
0
    if (child_iterator->GetType() == SENode::RecurrentAddExpr &&
229
0
        recurrent_expr != child_iterator->AsSERecurrentNode()) {
230
0
      return simplified_polynomial;
231
0
    }
232
0
  }
233
234
0
  if (recurrent_expr) {
235
0
    return SimplifyRecurrentAddExpression(recurrent_expr);
236
0
  }
237
238
0
  return simplified_polynomial;
239
0
}
240
241
// Traverse the graph to build up the accumulator objects.
242
void SENodeSimplifyImpl::GatherAccumulatorsFromChildNodes(SENode* new_node,
243
                                                          SENode* child,
244
0
                                                          bool negation) {
245
0
  int32_t sign = negation ? -1 : 1;
246
247
0
  if (child->GetType() == SENode::Constant) {
248
    // Collect all the constants and add them together.
249
0
    constant_accumulator_ +=
250
0
        child->AsSEConstantNode()->FoldToSingleValue() * sign;
251
252
0
  } else if (child->GetType() == SENode::ValueUnknown ||
253
0
             child->GetType() == SENode::RecurrentAddExpr) {
254
    // To rebuild the graph of X+X+X*2 into 4*X we count the occurrences of X
255
    // and create a new node of count*X after. X can either be a ValueUnknown or
256
    // a RecurrentAddExpr. The count for each X is stored in the accumulators_
257
    // map.
258
259
0
    auto iterator = accumulators_.find(child);
260
    // If we've encountered this term before add to the accumulator for it.
261
0
    if (iterator == accumulators_.end())
262
0
      accumulators_.insert({child, sign});
263
0
    else
264
0
      iterator->second += sign;
265
266
0
  } else if (child->GetType() == SENode::Multiply) {
267
0
    if (!AccumulatorsFromMultiply(child, negation)) {
268
0
      new_node->AddChild(child);
269
0
    }
270
271
0
  } else if (child->GetType() == SENode::Add) {
272
0
    for (SENode* next_child : *child) {
273
0
      GatherAccumulatorsFromChildNodes(new_node, next_child, negation);
274
0
    }
275
276
0
  } else if (child->GetType() == SENode::Negative) {
277
0
    SENode* negated_node = child->GetChild(0);
278
0
    GatherAccumulatorsFromChildNodes(new_node, negated_node, !negation);
279
0
  } else {
280
    // If we can't work out how to fold the expression just add it back into
281
    // the graph.
282
0
    new_node->AddChild(child);
283
0
  }
284
0
}
285
286
SERecurrentNode* SENodeSimplifyImpl::UpdateCoefficient(
287
0
    SERecurrentNode* recurrent, int64_t coefficient_update) const {
288
0
  std::unique_ptr<SERecurrentNode> new_recurrent_node{new SERecurrentNode(
289
0
      recurrent->GetParentAnalysis(), recurrent->GetLoop())};
290
291
0
  SENode* new_coefficient = analysis_.CreateMultiplyNode(
292
0
      recurrent->GetCoefficient(),
293
0
      analysis_.CreateConstant(coefficient_update));
294
295
  // See if the node can be simplified.
296
0
  SENode* simplified = analysis_.SimplifyExpression(new_coefficient);
297
0
  if (simplified->GetType() != SENode::CanNotCompute)
298
0
    new_coefficient = simplified;
299
300
0
  if (coefficient_update < 0) {
301
0
    new_recurrent_node->AddOffset(
302
0
        analysis_.CreateNegation(recurrent->GetOffset()));
303
0
  } else {
304
0
    new_recurrent_node->AddOffset(recurrent->GetOffset());
305
0
  }
306
307
0
  new_recurrent_node->AddCoefficient(new_coefficient);
308
309
0
  return analysis_.GetCachedOrAdd(std::move(new_recurrent_node))
310
0
      ->AsSERecurrentNode();
311
0
}
312
313
// Simplify all the terms in the polynomial function.
314
0
SENode* SENodeSimplifyImpl::SimplifyPolynomial() {
315
0
  std::unique_ptr<SENode> new_add{new SEAddNode(node_->GetParentAnalysis())};
316
317
  // Traverse the graph and gather the accumulators from it.
318
0
  GatherAccumulatorsFromChildNodes(new_add.get(), node_, false);
319
320
  // Fold all the constants into a single constant node.
321
0
  if (constant_accumulator_ != 0) {
322
0
    new_add->AddChild(analysis_.CreateConstant(constant_accumulator_));
323
0
  }
324
325
0
  for (auto& pair : accumulators_) {
326
0
    SENode* term = pair.first;
327
0
    int64_t count = pair.second;
328
329
    // We can eliminate the term completely.
330
0
    if (count == 0) continue;
331
332
0
    if (count == 1) {
333
0
      new_add->AddChild(term);
334
0
    } else if (count == -1 && term->GetType() != SENode::RecurrentAddExpr) {
335
      // If the count is -1 we can just add a negative version of that node,
336
      // unless it is a recurrent expression as we would rather the negative
337
      // goes on the recurrent expressions children. This makes it easier to
338
      // work with in other places.
339
0
      new_add->AddChild(analysis_.CreateNegation(term));
340
0
    } else {
341
      // Output value unknown terms as count*term and output recurrent
342
      // expression terms as rec(offset, coefficient + count) offset and
343
      // coefficient are the same as in the original expression.
344
0
      if (term->GetType() == SENode::ValueUnknown) {
345
0
        SENode* count_as_constant = analysis_.CreateConstant(count);
346
0
        new_add->AddChild(
347
0
            analysis_.CreateMultiplyNode(count_as_constant, term));
348
0
      } else {
349
0
        assert(term->GetType() == SENode::RecurrentAddExpr &&
350
0
               "We only handle value unknowns or recurrent expressions");
351
352
        // Create a new recurrent expression by adding the count to the
353
        // coefficient of the old one.
354
0
        new_add->AddChild(UpdateCoefficient(term->AsSERecurrentNode(), count));
355
0
      }
356
0
    }
357
0
  }
358
359
  // If there is only one term in the addition left just return that term.
360
0
  if (new_add->GetChildren().size() == 1) {
361
0
    return new_add->GetChild(0);
362
0
  }
363
364
  // If there are no terms left in the addition just return 0.
365
0
  if (new_add->GetChildren().size() == 0) {
366
0
    return analysis_.CreateConstant(0);
367
0
  }
368
369
0
  return analysis_.GetCachedOrAdd(std::move(new_add));
370
0
}
371
372
0
SENode* SENodeSimplifyImpl::FoldRecurrentAddExpressions(SENode* root) {
373
0
  std::unique_ptr<SEAddNode> new_node{new SEAddNode(&analysis_)};
374
375
  // A mapping of loops to the list of recurrent expressions which are with
376
  // respect to those loops.
377
0
  std::map<const Loop*, std::vector<std::pair<SERecurrentNode*, bool>>>
378
0
      loops_to_recurrent{};
379
380
0
  bool has_multiple_same_loop_recurrent_terms = false;
381
382
0
  for (SENode* child : *root) {
383
0
    bool negation = false;
384
385
0
    if (child->GetType() == SENode::Negative) {
386
0
      child = child->GetChild(0);
387
0
      negation = true;
388
0
    }
389
390
0
    if (child->GetType() == SENode::RecurrentAddExpr) {
391
0
      const Loop* loop = child->AsSERecurrentNode()->GetLoop();
392
393
0
      SERecurrentNode* rec = child->AsSERecurrentNode();
394
0
      if (loops_to_recurrent.find(loop) == loops_to_recurrent.end()) {
395
0
        loops_to_recurrent[loop] = {std::make_pair(rec, negation)};
396
0
      } else {
397
0
        loops_to_recurrent[loop].push_back(std::make_pair(rec, negation));
398
0
        has_multiple_same_loop_recurrent_terms = true;
399
0
      }
400
0
    } else {
401
0
      new_node->AddChild(child);
402
0
    }
403
0
  }
404
405
0
  if (!has_multiple_same_loop_recurrent_terms) return root;
406
407
0
  for (auto pair : loops_to_recurrent) {
408
0
    std::vector<std::pair<SERecurrentNode*, bool>>& recurrent_expressions =
409
0
        pair.second;
410
0
    const Loop* loop = pair.first;
411
412
0
    std::unique_ptr<SENode> new_coefficient{new SEAddNode(&analysis_)};
413
0
    std::unique_ptr<SENode> new_offset{new SEAddNode(&analysis_)};
414
415
0
    for (auto node_pair : recurrent_expressions) {
416
0
      SERecurrentNode* node = node_pair.first;
417
0
      bool negative = node_pair.second;
418
419
0
      if (!negative) {
420
0
        new_coefficient->AddChild(node->GetCoefficient());
421
0
        new_offset->AddChild(node->GetOffset());
422
0
      } else {
423
0
        new_coefficient->AddChild(
424
0
            analysis_.CreateNegation(node->GetCoefficient()));
425
0
        new_offset->AddChild(analysis_.CreateNegation(node->GetOffset()));
426
0
      }
427
0
    }
428
429
0
    std::unique_ptr<SERecurrentNode> new_recurrent{
430
0
        new SERecurrentNode(&analysis_, loop)};
431
432
0
    SENode* new_coefficient_simplified =
433
0
        analysis_.SimplifyExpression(new_coefficient.get());
434
435
0
    SENode* new_offset_simplified =
436
0
        analysis_.SimplifyExpression(new_offset.get());
437
438
0
    if (new_coefficient_simplified->GetType() == SENode::Constant &&
439
0
        new_coefficient_simplified->AsSEConstantNode()->FoldToSingleValue() ==
440
0
            0) {
441
0
      return new_offset_simplified;
442
0
    }
443
444
0
    new_recurrent->AddCoefficient(new_coefficient_simplified);
445
0
    new_recurrent->AddOffset(new_offset_simplified);
446
447
0
    new_node->AddChild(analysis_.GetCachedOrAdd(std::move(new_recurrent)));
448
0
  }
449
450
  // If we only have one child in the add just return that.
451
0
  if (new_node->GetChildren().size() == 1) {
452
0
    return new_node->GetChild(0);
453
0
  }
454
455
0
  return analysis_.GetCachedOrAdd(std::move(new_node));
456
0
}
457
458
0
SENode* SENodeSimplifyImpl::EliminateZeroCoefficientRecurrents(SENode* node) {
459
0
  if (node->GetType() != SENode::Add) return node;
460
461
0
  bool has_change = false;
462
463
0
  std::vector<SENode*> new_children{};
464
0
  for (SENode* child : *node) {
465
0
    if (child->GetType() == SENode::RecurrentAddExpr) {
466
0
      SENode* coefficient = child->AsSERecurrentNode()->GetCoefficient();
467
      // If coefficient is zero then we can eliminate the recurrent expression
468
      // entirely and just return the offset as the recurrent expression is
469
      // representing the equation coefficient*iterations + offset.
470
0
      if (coefficient->GetType() == SENode::Constant &&
471
0
          coefficient->AsSEConstantNode()->FoldToSingleValue() == 0) {
472
0
        new_children.push_back(child->AsSERecurrentNode()->GetOffset());
473
0
        has_change = true;
474
0
      } else {
475
0
        new_children.push_back(child);
476
0
      }
477
0
    } else {
478
0
      new_children.push_back(child);
479
0
    }
480
0
  }
481
482
0
  if (!has_change) return node;
483
484
0
  std::unique_ptr<SENode> new_add{new SEAddNode(node_->GetParentAnalysis())};
485
486
0
  for (SENode* child : new_children) {
487
0
    new_add->AddChild(child);
488
0
  }
489
490
0
  return analysis_.GetCachedOrAdd(std::move(new_add));
491
0
}
492
493
SENode* SENodeSimplifyImpl::SimplifyRecurrentAddExpression(
494
0
    SERecurrentNode* recurrent_expr) {
495
0
  const std::vector<SENode*>& children = node_->GetChildren();
496
497
0
  std::unique_ptr<SERecurrentNode> recurrent_node{new SERecurrentNode(
498
0
      recurrent_expr->GetParentAnalysis(), recurrent_expr->GetLoop())};
499
500
  // Create and simplify the new offset node.
501
0
  std::unique_ptr<SENode> new_offset{
502
0
      new SEAddNode(recurrent_expr->GetParentAnalysis())};
503
0
  new_offset->AddChild(recurrent_expr->GetOffset());
504
505
0
  for (SENode* child : children) {
506
0
    if (child->GetType() != SENode::RecurrentAddExpr) {
507
0
      new_offset->AddChild(child);
508
0
    }
509
0
  }
510
511
  // Simplify the new offset.
512
0
  SENode* simplified_child = analysis_.SimplifyExpression(new_offset.get());
513
514
  // If the child can be simplified, add the simplified form otherwise, add it
515
  // via the usual caching mechanism.
516
0
  if (simplified_child->GetType() != SENode::CanNotCompute) {
517
0
    recurrent_node->AddOffset(simplified_child);
518
0
  } else {
519
0
    recurrent_expr->AddOffset(analysis_.GetCachedOrAdd(std::move(new_offset)));
520
0
  }
521
522
0
  recurrent_node->AddCoefficient(recurrent_expr->GetCoefficient());
523
524
0
  return analysis_.GetCachedOrAdd(std::move(recurrent_node));
525
0
}
526
527
/*
528
 * Scalar Analysis simplification public methods.
529
 */
530
531
0
SENode* ScalarEvolutionAnalysis::SimplifyExpression(SENode* node) {
532
0
  SENodeSimplifyImpl impl{this, node};
533
534
0
  return impl.Simplify();
535
0
}
536
537
}  // namespace opt
538
}  // namespace spvtools