/src/spirv-tools/source/opt/loop_fission.cpp

Source (jump to first uncovered line)
// Copyright (c) 2018 Google LLC.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "source/opt/loop_fission.h"

#include <set>

#include "source/opt/register_pressure.h"

// Implement loop fission with an optional parameter to split only
// if the register pressure in a given loop meets a certain criteria. This is
// controlled via the constructors of LoopFissionPass.
//
// 1 - Build a list of loops to be split, these are top level loops (loops
// without child loops themselves) which meet the register pressure criteria, as
// determined by the ShouldSplitLoop method of LoopFissionPass.
//
// 2 - For each loop in the list, group each instruction into a set of related
// instructions by traversing each instructions users and operands recursively.
// We stop if we encounter an instruction we have seen before or an instruction
// which we don't consider relevant (i.e OpLoopMerge). We then group these
// groups into two different sets, one for the first loop and one for the
// second.
//
// 3 - We then run CanPerformSplit to check that it would be legal to split a
// loop using those two sets. We check that we haven't altered the relative
// order load/stores appear in the binary and that we aren't breaking any
// dependency between load/stores by splitting them into two loops. We also
// check that none of the OpBranch instructions are dependent on a load as we
// leave control flow structure intact and move only instructions in the body so
// we want to avoid any loads with side affects or aliasing.
//
// 4 - We then split the loop by calling SplitLoop. This function clones the
// loop and attaches it to the preheader and connects the new loops merge block
// to the current loop header block. We then use the two sets built in step 2 to
// remove instructions from each loop. If an instruction appears in the first
// set it is removed from the second loop and vice versa.
//
// 5 - If the multiple split passes flag is set we check if each of the loops
// still meet the register pressure criteria. If they do then we add them to the
// list of loops to be split (created in step one) to allow for loops to be
// split multiple times.
//

namespace spvtools {
namespace opt {

class LoopFissionImpl {
 public:
  LoopFissionImpl(IRContext* context, Loop* loop)
      : context_(context), loop_(loop), load_used_in_condition_(false) {}

  // Group each instruction in the loop into sets of instructions related by
  // their usedef chains. An instruction which uses another will appear in the
  // same set. Then merge those sets into just two sets. Returns false if there
  // was one or less sets created.
  bool GroupInstructionsByUseDef();

  // Check if the sets built by GroupInstructionsByUseDef violate any data
  // dependence rules.
  bool CanPerformSplit();

  // Split the loop and return a pointer to the new loop.
  Loop* SplitLoop();

  // Checks if |inst| is safe to move. We can only move instructions which don't
  // have any side effects and OpLoads and OpStores.
  bool MovableInstruction(const Instruction& inst) const;

 private:
  // Traverse the def use chain of |inst| and add the users and uses of |inst|
  // which are in the same loop to the |returned_set|.
  void TraverseUseDef(Instruction* inst, std::set<Instruction*>* returned_set,
                      bool ignore_phi_users = false, bool report_loads = false);

  // We group the instructions in the block into two different groups, the
  // instructions to be kept in the original loop and the ones to be cloned into
  // the new loop. As the cloned loop is attached to the preheader it will be
  // the first loop and the second loop will be the original.
  std::set<Instruction*> cloned_loop_instructions_;
  std::set<Instruction*> original_loop_instructions_;

  // We need a set of all the instructions to be seen so we can break any
  // recursion and also so we can ignore certain instructions by preemptively
  // adding them to this set.
  std::set<Instruction*> seen_instructions_;

  // A map of instructions to their relative position in the function.
  std::map<Instruction*, size_t> instruction_order_;

  IRContext* context_;

  Loop* loop_;

  // This is set to true by TraverseUseDef when traversing the instructions
  // related to the loop condition and any if conditions should any of those
  // instructions be a load.
  bool load_used_in_condition_;
};

bool LoopFissionImpl::MovableInstruction(const Instruction& inst) const {
  return inst.opcode() == spv::Op::OpLoad ||
         inst.opcode() == spv::Op::OpStore ||
         inst.opcode() == spv::Op::OpSelectionMerge ||
         inst.opcode() == spv::Op::OpPhi || inst.IsOpcodeCodeMotionSafe();
}

void LoopFissionImpl::TraverseUseDef(Instruction* inst,
                                     std::set<Instruction*>* returned_set,
                                     bool ignore_phi_users, bool report_loads) {
  assert(returned_set && "Set to be returned cannot be null.");

  analysis::DefUseManager* def_use = context_->get_def_use_mgr();
  std::set<Instruction*>& inst_set = *returned_set;

  // We create this functor to traverse the use def chain to build the
  // grouping of related instructions. The lambda captures the std::function
  // to allow it to recurse.
  std::function<void(Instruction*)> traverser_functor;
  traverser_functor = [this, def_use, &inst_set, &traverser_functor,
                       ignore_phi_users, report_loads](Instruction* user) {
    // If we've seen the instruction before or it is not inside the loop end the
    // traversal.
    if (!user || seen_instructions_.count(user) != 0 ||
        !context_->get_instr_block(user) ||
        !loop_->IsInsideLoop(context_->get_instr_block(user))) {
      return;
    }

    // Don't include labels or loop merge instructions in the instruction sets.
    // Including them would mean we group instructions related only by using the
    // same labels (i.e phis). We already preempt the inclusion of
    // OpSelectionMerge by adding related instructions to the seen_instructions_
    // set.
    if (user->opcode() == spv::Op::OpLoopMerge ||
        user->opcode() == spv::Op::OpLabel)
      return;

    // If the |report_loads| flag is set, set the class field
    // load_used_in_condition_ to false. This is used to check that none of the
    // condition checks in the loop rely on loads.
    if (user->opcode() == spv::Op::OpLoad && report_loads) {
      load_used_in_condition_ = true;
    }

    // Add the instruction to the set of instructions already seen, this breaks
    // recursion and allows us to ignore certain instructions.
    seen_instructions_.insert(user);

    inst_set.insert(user);

    // Wrapper functor to traverse the operands of each instruction.
    auto traverse_operand = [&traverser_functor, def_use](const uint32_t* id) {
      traverser_functor(def_use->GetDef(*id));
    };
    user->ForEachInOperand(traverse_operand);

    // For the first traversal we want to ignore the users of the phi.
    if (ignore_phi_users && user->opcode() == spv::Op::OpPhi) return;

    // Traverse each user with this lambda.
    def_use->ForEachUser(user, traverser_functor);

    // Wrapper functor for the use traversal.
    auto traverse_use = [&traverser_functor](Instruction* use, uint32_t) {
      traverser_functor(use);
    };
    def_use->ForEachUse(user, traverse_use);

  };

  // We start the traversal of the use def graph by invoking the above
  // lambda with the |inst| parameter.
  traverser_functor(inst);
}

bool LoopFissionImpl::GroupInstructionsByUseDef() {
  std::vector<std::set<Instruction*>> sets{};

  // We want to ignore all the instructions stemming from the loop condition
  // instruction.
  BasicBlock* condition_block = loop_->FindConditionBlock();

  if (!condition_block) return false;
  Instruction* condition = &*condition_block->tail();

  // We iterate over the blocks via iterating over all the blocks in the
  // function, we do this so we are iterating in the same order which the blocks
  // appear in the binary.
  Function& function = *loop_->GetHeaderBlock()->GetParent();

  // Create a temporary set to ignore certain groups of instructions within the
  // loop. We don't want any instructions related to control flow to be removed
  // from either loop only instructions within the control flow bodies.
  std::set<Instruction*> instructions_to_ignore{};
  TraverseUseDef(condition, &instructions_to_ignore, true, true);

  // Traverse control flow instructions to ensure they are added to the
  // seen_instructions_ set and will be ignored when it it called with actual
  // sets.
  for (BasicBlock& block : function) {
    if (!loop_->IsInsideLoop(block.id())) continue;

    for (Instruction& inst : block) {
      // Ignore all instructions related to control flow.
      if (inst.opcode() == spv::Op::OpSelectionMerge || inst.IsBranch()) {
        TraverseUseDef(&inst, &instructions_to_ignore, true, true);
      }
    }
  }

  // Traverse the instructions and generate the sets, automatically ignoring any
  // instructions in instructions_to_ignore.
  for (BasicBlock& block : function) {
    if (!loop_->IsInsideLoop(block.id()) ||
        loop_->GetHeaderBlock()->id() == block.id())
      continue;

    for (Instruction& inst : block) {
      // Record the order that each load/store is seen.
      if (inst.opcode() == spv::Op::OpLoad ||
          inst.opcode() == spv::Op::OpStore) {
        instruction_order_[&inst] = instruction_order_.size();
      }

      // Ignore instructions already seen in a traversal.
      if (seen_instructions_.count(&inst) != 0) {
        continue;
      }

      // Build the set.
      std::set<Instruction*> inst_set{};
      TraverseUseDef(&inst, &inst_set);
      if (!inst_set.empty()) sets.push_back(std::move(inst_set));
    }
  }

  // If we have one or zero sets return false to indicate that due to
  // insufficient instructions we couldn't split the loop into two groups and
  // thus the loop can't be split any further.
  if (sets.size() < 2) {
    return false;
  }

  // Merge the loop sets into two different sets. In CanPerformSplit we will
  // validate that we don't break the relative ordering of loads/stores by doing
  // this.
  for (size_t index = 0; index < sets.size() / 2; ++index) {
    cloned_loop_instructions_.insert(sets[index].begin(), sets[index].end());
  }
  for (size_t index = sets.size() / 2; index < sets.size(); ++index) {
    original_loop_instructions_.insert(sets[index].begin(), sets[index].end());
  }

  return true;
}

bool LoopFissionImpl::CanPerformSplit() {
  // Return false if any of the condition instructions in the loop depend on a
  // load.
  if (load_used_in_condition_) {
    return false;
  }

  // Build a list of all parent loops of this loop. Loop dependence analysis
  // needs this structure.
  std::vector<const Loop*> loops;
  Loop* parent_loop = loop_;
  while (parent_loop) {
    loops.push_back(parent_loop);
    parent_loop = parent_loop->GetParent();
  }

  LoopDependenceAnalysis analysis{context_, loops};

  // A list of all the stores in the cloned loop.
  std::vector<Instruction*> set_one_stores{};

  // A list of all the loads in the cloned loop.
  std::vector<Instruction*> set_one_loads{};

  // Populate the above lists.
  for (Instruction* inst : cloned_loop_instructions_) {
    if (inst->opcode() == spv::Op::OpStore) {
      set_one_stores.push_back(inst);
    } else if (inst->opcode() == spv::Op::OpLoad) {
      set_one_loads.push_back(inst);
    }

    // If we find any instruction which we can't move (such as a barrier),
    // return false.
    if (!MovableInstruction(*inst)) return false;
  }

  // We need to calculate the depth of the loop to create the loop dependency
  // distance vectors.
  const size_t loop_depth = loop_->GetDepth();

  // Check the dependencies between loads in the cloned loop and stores in the
  // original and vice versa.
  for (Instruction* inst : original_loop_instructions_) {
    // If we find any instruction which we can't move (such as a barrier),
    // return false.
    if (!MovableInstruction(*inst)) return false;

    // Look at the dependency between the loads in the original and stores in
    // the cloned loops.
    if (inst->opcode() == spv::Op::OpLoad) {
      for (Instruction* store : set_one_stores) {
        DistanceVector vec{loop_depth};

        // If the store actually should appear after the load, return false.
        // This means the store has been placed in the wrong grouping.
        if (instruction_order_[store] > instruction_order_[inst]) {
          return false;
        }
        // If not independent check the distance vector.
        if (!analysis.GetDependence(store, inst, &vec)) {
          for (DistanceEntry& entry : vec.GetEntries()) {
            // A distance greater than zero means that the store in the cloned
            // loop has a dependency on the load in the original loop.
            if (entry.distance > 0) return false;
          }
        }
      }
    } else if (inst->opcode() == spv::Op::OpStore) {
      for (Instruction* load : set_one_loads) {
        DistanceVector vec{loop_depth};

        // If the load actually should appear after the store, return false.
        if (instruction_order_[load] > instruction_order_[inst]) {
          return false;
        }

        // If not independent check the distance vector.
        if (!analysis.GetDependence(inst, load, &vec)) {
          for (DistanceEntry& entry : vec.GetEntries()) {
            // A distance less than zero means the load in the cloned loop is
            // dependent on the store instruction in the original loop.
            if (entry.distance < 0) return false;
          }
        }
      }
    }
  }
  return true;
}

Loop* LoopFissionImpl::SplitLoop() {
  // Clone the loop.
  LoopUtils util{context_, loop_};
  LoopUtils::LoopCloningResult clone_results;
  Loop* cloned_loop = util.CloneAndAttachLoopToHeader(&clone_results);

  // Update the OpLoopMerge in the cloned loop.
  cloned_loop->UpdateLoopMergeInst();

  // Add the loop_ to the module.
  // TODO(1841): Handle failure to create pre-header.
  Function::iterator it =
      util.GetFunction()->FindBlock(loop_->GetOrCreatePreHeaderBlock()->id());
  util.GetFunction()->AddBasicBlocks(clone_results.cloned_bb_.begin(),
                                     clone_results.cloned_bb_.end(), ++it);
  loop_->SetPreHeaderBlock(cloned_loop->GetMergeBlock());

  std::vector<Instruction*> instructions_to_kill{};

  // Kill all the instructions which should appear in the cloned loop but not in
  // the original loop.
  for (uint32_t id : loop_->GetBlocks()) {
    BasicBlock* block = context_->cfg()->block(id);

    for (Instruction& inst : *block) {
      // If the instruction appears in the cloned loop instruction group, kill
      // it.
      if (cloned_loop_instructions_.count(&inst) == 1 &&
          original_loop_instructions_.count(&inst) == 0) {
        instructions_to_kill.push_back(&inst);
        if (inst.opcode() == spv::Op::OpPhi) {
          context_->ReplaceAllUsesWith(
              inst.result_id(), clone_results.value_map_[inst.result_id()]);
        }
      }
    }
  }

  // Kill all instructions which should appear in the original loop and not in
  // the cloned loop.
  for (uint32_t id : cloned_loop->GetBlocks()) {
    BasicBlock* block = context_->cfg()->block(id);
    for (Instruction& inst : *block) {
      Instruction* old_inst = clone_results.ptr_map_[&inst];
      // If the instruction belongs to the original loop instruction group, kill
      // it.
      if (cloned_loop_instructions_.count(old_inst) == 0 &&
          original_loop_instructions_.count(old_inst) == 1) {
        instructions_to_kill.push_back(&inst);
      }
    }
  }

  for (Instruction* i : instructions_to_kill) {
    context_->KillInst(i);
  }

  return cloned_loop;
}

LoopFissionPass::LoopFissionPass(const size_t register_threshold_to_split,
                                 bool split_multiple_times)
    : split_multiple_times_(split_multiple_times) {
  // Split if the number of registers in the loop exceeds
  // |register_threshold_to_split|.
  split_criteria_ =
      [register_threshold_to_split](
          const RegisterLiveness::RegionRegisterLiveness& liveness) {
        return liveness.used_registers_ > register_threshold_to_split;
      };
}

LoopFissionPass::LoopFissionPass() : split_multiple_times_(false) {
  // Split by default.
  split_criteria_ = [](const RegisterLiveness::RegionRegisterLiveness&) {
    return true;
  };
}

bool LoopFissionPass::ShouldSplitLoop(const Loop& loop, IRContext* c) {
  LivenessAnalysis* analysis = c->GetLivenessAnalysis();

  RegisterLiveness::RegionRegisterLiveness liveness{};

  Function* function = loop.GetHeaderBlock()->GetParent();
  analysis->Get(function)->ComputeLoopRegisterPressure(loop, &liveness);

  return split_criteria_(liveness);
}

Pass::Status LoopFissionPass::Process() {
  bool changed = false;

  for (Function& f : *context()->module()) {
    // We collect all the inner most loops in the function and run the loop
    // splitting util on each. The reason we do this is to allow us to iterate
    // over each, as creating new loops will invalidate the loop iterator.
    std::vector<Loop*> inner_most_loops{};
    LoopDescriptor& loop_descriptor = *context()->GetLoopDescriptor(&f);
    for (Loop& loop : loop_descriptor) {
      if (!loop.HasChildren() && ShouldSplitLoop(loop, context())) {
        inner_most_loops.push_back(&loop);
      }
    }

    // List of new loops which meet the criteria to be split again.
    std::vector<Loop*> new_loops_to_split{};

    while (!inner_most_loops.empty()) {
      for (Loop* loop : inner_most_loops) {
        LoopFissionImpl impl{context(), loop};

        // Group the instructions in the loop into two different sets of related
        // instructions. If we can't group the instructions into the two sets
        // then we can't split the loop any further.
        if (!impl.GroupInstructionsByUseDef()) {
          continue;
        }

        if (impl.CanPerformSplit()) {
          Loop* second_loop = impl.SplitLoop();
          changed = true;
          context()->InvalidateAnalysesExceptFor(
              IRContext::kAnalysisLoopAnalysis);

          // If the newly created loop meets the criteria to be split, split it
          // again.
          if (ShouldSplitLoop(*second_loop, context()))
            new_loops_to_split.push_back(second_loop);

          // If the original loop (now split) still meets the criteria to be
          // split, split it again.
          if (ShouldSplitLoop(*loop, context()))
            new_loops_to_split.push_back(loop);
        }
      }

      // If the split multiple times flag has been set add the new loops which
      // meet the splitting criteria into the list of loops to be split on the
      // next iteration.
      if (split_multiple_times_) {
        inner_most_loops = std::move(new_loops_to_split);
      } else {
        break;
      }
    }
  }

  return changed ? Pass::Status::SuccessWithChange
                 : Pass::Status::SuccessWithoutChange;
}

}  // namespace opt
}  // namespace spvtools

Coverage Report

Created: 2024-09-11 07:09

Line	Count	Source (jump to first uncovered line)
1		// Copyright (c) 2018 Google LLC.
2		//
3		// Licensed under the Apache License, Version 2.0 (the "License");
4		// you may not use this file except in compliance with the License.
5		// You may obtain a copy of the License at
6		//
7		// http://www.apache.org/licenses/LICENSE-2.0
8		//
9		// Unless required by applicable law or agreed to in writing, software
10		// distributed under the License is distributed on an "AS IS" BASIS,
11		// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12		// See the License for the specific language governing permissions and
13		// limitations under the License.
14
15		#include "source/opt/loop_fission.h"
16
17		#include <set>
18
19		#include "source/opt/register_pressure.h"
20
21		// Implement loop fission with an optional parameter to split only
22		// if the register pressure in a given loop meets a certain criteria. This is
23		// controlled via the constructors of LoopFissionPass.
24		//
25		// 1 - Build a list of loops to be split, these are top level loops (loops
26		// without child loops themselves) which meet the register pressure criteria, as
27		// determined by the ShouldSplitLoop method of LoopFissionPass.
28		//
29		// 2 - For each loop in the list, group each instruction into a set of related
30		// instructions by traversing each instructions users and operands recursively.
31		// We stop if we encounter an instruction we have seen before or an instruction
32		// which we don't consider relevant (i.e OpLoopMerge). We then group these
33		// groups into two different sets, one for the first loop and one for the
34		// second.
35		//
36		// 3 - We then run CanPerformSplit to check that it would be legal to split a
37		// loop using those two sets. We check that we haven't altered the relative
38		// order load/stores appear in the binary and that we aren't breaking any
39		// dependency between load/stores by splitting them into two loops. We also
40		// check that none of the OpBranch instructions are dependent on a load as we
41		// leave control flow structure intact and move only instructions in the body so
42		// we want to avoid any loads with side affects or aliasing.
43		//
44		// 4 - We then split the loop by calling SplitLoop. This function clones the
45		// loop and attaches it to the preheader and connects the new loops merge block
46		// to the current loop header block. We then use the two sets built in step 2 to
47		// remove instructions from each loop. If an instruction appears in the first
48		// set it is removed from the second loop and vice versa.
49		//
50		// 5 - If the multiple split passes flag is set we check if each of the loops
51		// still meet the register pressure criteria. If they do then we add them to the
52		// list of loops to be split (created in step one) to allow for loops to be
53		// split multiple times.
54		//
55
56		namespace spvtools {
57		namespace opt {
58
59		class LoopFissionImpl {
60		public:
61		LoopFissionImpl(IRContext* context, Loop* loop)
62	0	: context_(context), loop_(loop), load_used_in_condition_(false) {}
63
64		// Group each instruction in the loop into sets of instructions related by
65		// their usedef chains. An instruction which uses another will appear in the
66		// same set. Then merge those sets into just two sets. Returns false if there
67		// was one or less sets created.
68		bool GroupInstructionsByUseDef();
69
70		// Check if the sets built by GroupInstructionsByUseDef violate any data
71		// dependence rules.
72		bool CanPerformSplit();
73
74		// Split the loop and return a pointer to the new loop.
75		Loop* SplitLoop();
76
77		// Checks if \|inst\| is safe to move. We can only move instructions which don't
78		// have any side effects and OpLoads and OpStores.
79		bool MovableInstruction(const Instruction& inst) const;
80
81		private:
82		// Traverse the def use chain of \|inst\| and add the users and uses of \|inst\|
83		// which are in the same loop to the \|returned_set\|.
84		void TraverseUseDef(Instruction* inst, std::set<Instruction> returned_set,
85		bool ignore_phi_users = false, bool report_loads = false);
86
87		// We group the instructions in the block into two different groups, the
88		// instructions to be kept in the original loop and the ones to be cloned into
89		// the new loop. As the cloned loop is attached to the preheader it will be
90		// the first loop and the second loop will be the original.
91		std::set<Instruction*> cloned_loop_instructions_;
92		std::set<Instruction*> original_loop_instructions_;
93
94		// We need a set of all the instructions to be seen so we can break any
95		// recursion and also so we can ignore certain instructions by preemptively
96		// adding them to this set.
97		std::set<Instruction*> seen_instructions_;
98
99		// A map of instructions to their relative position in the function.
100		std::map<Instruction*, size_t> instruction_order_;
101
102		IRContext* context_;
103
104		Loop* loop_;
105
106		// This is set to true by TraverseUseDef when traversing the instructions
107		// related to the loop condition and any if conditions should any of those
108		// instructions be a load.
109		bool load_used_in_condition_;
110		};
111
112	0	bool LoopFissionImpl::MovableInstruction(const Instruction& inst) const {
113	0	return inst.opcode() == spv::Op::OpLoad \|\|
114	0	inst.opcode() == spv::Op::OpStore \|\|
115	0	inst.opcode() == spv::Op::OpSelectionMerge \|\|
116	0	inst.opcode() == spv::Op::OpPhi \|\| inst.IsOpcodeCodeMotionSafe();
117	0	}
118
119		void LoopFissionImpl::TraverseUseDef(Instruction* inst,
120		std::set<Instruction> returned_set,
121	0	bool ignore_phi_users, bool report_loads) {
122	0	assert(returned_set && "Set to be returned cannot be null.");
123
124	0	analysis::DefUseManager* def_use = context_->get_def_use_mgr();
125	0	std::set<Instruction>& inst_set = returned_set;
126
127		// We create this functor to traverse the use def chain to build the
128		// grouping of related instructions. The lambda captures the std::function
129		// to allow it to recurse.
130	0	std::function<void(Instruction*)> traverser_functor;
131	0	traverser_functor = [this, def_use, &inst_set, &traverser_functor,
132	0	ignore_phi_users, report_loads](Instruction* user) {
133		// If we've seen the instruction before or it is not inside the loop end the
134		// traversal.
135	0	if (!user \|\| seen_instructions_.count(user) != 0 \|\|
136	0	!context_->get_instr_block(user) \|\|
137	0	!loop_->IsInsideLoop(context_->get_instr_block(user))) {
138	0	return;
139	0	}
140
141		// Don't include labels or loop merge instructions in the instruction sets.
142		// Including them would mean we group instructions related only by using the
143		// same labels (i.e phis). We already preempt the inclusion of
144		// OpSelectionMerge by adding related instructions to the seen_instructions_
145		// set.
146	0	if (user->opcode() == spv::Op::OpLoopMerge \|\|
147	0	user->opcode() == spv::Op::OpLabel)
148	0	return;
149
150		// If the \|report_loads\| flag is set, set the class field
151		// load_used_in_condition_ to false. This is used to check that none of the
152		// condition checks in the loop rely on loads.
153	0	if (user->opcode() == spv::Op::OpLoad && report_loads) {
154	0	load_used_in_condition_ = true;
155	0	}
156
157		// Add the instruction to the set of instructions already seen, this breaks
158		// recursion and allows us to ignore certain instructions.
159	0	seen_instructions_.insert(user);
160
161	0	inst_set.insert(user);
162
163		// Wrapper functor to traverse the operands of each instruction.
164	0	auto traverse_operand = [&traverser_functor, def_use](const uint32_t* id) {
165	0	traverser_functor(def_use->GetDef(*id));
166	0	};
167	0	user->ForEachInOperand(traverse_operand);
168
169		// For the first traversal we want to ignore the users of the phi.
170	0	if (ignore_phi_users && user->opcode() == spv::Op::OpPhi) return;
171
172		// Traverse each user with this lambda.
173	0	def_use->ForEachUser(user, traverser_functor);
174
175		// Wrapper functor for the use traversal.
176	0	auto traverse_use = [&traverser_functor](Instruction* use, uint32_t) {
177	0	traverser_functor(use);
178	0	};
179	0	def_use->ForEachUse(user, traverse_use);
180
181	0	};
182
183		// We start the traversal of the use def graph by invoking the above
184		// lambda with the \|inst\| parameter.
185	0	traverser_functor(inst);
186	0	}
187
188	0	bool LoopFissionImpl::GroupInstructionsByUseDef() {
189	0	std::vector<std::set<Instruction*>> sets{};
190
191		// We want to ignore all the instructions stemming from the loop condition
192		// instruction.
193	0	BasicBlock* condition_block = loop_->FindConditionBlock();
194
195	0	if (!condition_block) return false;
196	0	Instruction* condition = &*condition_block->tail();
197
198		// We iterate over the blocks via iterating over all the blocks in the
199		// function, we do this so we are iterating in the same order which the blocks
200		// appear in the binary.
201	0	Function& function = *loop_->GetHeaderBlock()->GetParent();
202
203		// Create a temporary set to ignore certain groups of instructions within the
204		// loop. We don't want any instructions related to control flow to be removed
205		// from either loop only instructions within the control flow bodies.
206	0	std::set<Instruction*> instructions_to_ignore{};
207	0	TraverseUseDef(condition, &instructions_to_ignore, true, true);
208
209		// Traverse control flow instructions to ensure they are added to the
210		// seen_instructions_ set and will be ignored when it it called with actual
211		// sets.
212	0	for (BasicBlock& block : function) {
213	0	if (!loop_->IsInsideLoop(block.id())) continue;
214
215	0	for (Instruction& inst : block) {
216		// Ignore all instructions related to control flow.
217	0	if (inst.opcode() == spv::Op::OpSelectionMerge \|\| inst.IsBranch()) {
218	0	TraverseUseDef(&inst, &instructions_to_ignore, true, true);
219	0	}
220	0	}
221	0	}
222
223		// Traverse the instructions and generate the sets, automatically ignoring any
224		// instructions in instructions_to_ignore.
225	0	for (BasicBlock& block : function) {
226	0	if (!loop_->IsInsideLoop(block.id()) \|\|
227	0	loop_->GetHeaderBlock()->id() == block.id())
228	0	continue;
229
230	0	for (Instruction& inst : block) {
231		// Record the order that each load/store is seen.
232	0	if (inst.opcode() == spv::Op::OpLoad \|\|
233	0	inst.opcode() == spv::Op::OpStore) {
234	0	instruction_order_[&inst] = instruction_order_.size();
235	0	}
236
237		// Ignore instructions already seen in a traversal.
238	0	if (seen_instructions_.count(&inst) != 0) {
239	0	continue;
240	0	}
241
242		// Build the set.
243	0	std::set<Instruction*> inst_set{};
244	0	TraverseUseDef(&inst, &inst_set);
245	0	if (!inst_set.empty()) sets.push_back(std::move(inst_set));
246	0	}
247	0	}
248
249		// If we have one or zero sets return false to indicate that due to
250		// insufficient instructions we couldn't split the loop into two groups and
251		// thus the loop can't be split any further.
252	0	if (sets.size() < 2) {
253	0	return false;
254	0	}
255
256		// Merge the loop sets into two different sets. In CanPerformSplit we will
257		// validate that we don't break the relative ordering of loads/stores by doing
258		// this.
259	0	for (size_t index = 0; index < sets.size() / 2; ++index) {
260	0	cloned_loop_instructions_.insert(sets[index].begin(), sets[index].end());
261	0	}
262	0	for (size_t index = sets.size() / 2; index < sets.size(); ++index) {
263	0	original_loop_instructions_.insert(sets[index].begin(), sets[index].end());
264	0	}
265
266	0	return true;
267	0	}
268
269	0	bool LoopFissionImpl::CanPerformSplit() {
270		// Return false if any of the condition instructions in the loop depend on a
271		// load.
272	0	if (load_used_in_condition_) {
273	0	return false;
274	0	}
275
276		// Build a list of all parent loops of this loop. Loop dependence analysis
277		// needs this structure.
278	0	std::vector<const Loop*> loops;
279	0	Loop* parent_loop = loop_;
280	0	while (parent_loop) {
281	0	loops.push_back(parent_loop);
282	0	parent_loop = parent_loop->GetParent();
283	0	}
284
285	0	LoopDependenceAnalysis analysis{context_, loops};
286
287		// A list of all the stores in the cloned loop.
288	0	std::vector<Instruction*> set_one_stores{};
289
290		// A list of all the loads in the cloned loop.
291	0	std::vector<Instruction*> set_one_loads{};
292
293		// Populate the above lists.
294	0	for (Instruction* inst : cloned_loop_instructions_) {
295	0	if (inst->opcode() == spv::Op::OpStore) {
296	0	set_one_stores.push_back(inst);
297	0	} else if (inst->opcode() == spv::Op::OpLoad) {
298	0	set_one_loads.push_back(inst);
299	0	}
300
301		// If we find any instruction which we can't move (such as a barrier),
302		// return false.
303	0	if (!MovableInstruction(*inst)) return false;
304	0	}
305
306		// We need to calculate the depth of the loop to create the loop dependency
307		// distance vectors.
308	0	const size_t loop_depth = loop_->GetDepth();
309
310		// Check the dependencies between loads in the cloned loop and stores in the
311		// original and vice versa.
312	0	for (Instruction* inst : original_loop_instructions_) {
313		// If we find any instruction which we can't move (such as a barrier),
314		// return false.
315	0	if (!MovableInstruction(*inst)) return false;
316
317		// Look at the dependency between the loads in the original and stores in
318		// the cloned loops.
319	0	if (inst->opcode() == spv::Op::OpLoad) {
320	0	for (Instruction* store : set_one_stores) {
321	0	DistanceVector vec{loop_depth};
322
323		// If the store actually should appear after the load, return false.
324		// This means the store has been placed in the wrong grouping.
325	0	if (instruction_order_[store] > instruction_order_[inst]) {
326	0	return false;
327	0	}
328		// If not independent check the distance vector.
329	0	if (!analysis.GetDependence(store, inst, &vec)) {
330	0	for (DistanceEntry& entry : vec.GetEntries()) {
331		// A distance greater than zero means that the store in the cloned
332		// loop has a dependency on the load in the original loop.
333	0	if (entry.distance > 0) return false;
334	0	}
335	0	}
336	0	}
337	0	} else if (inst->opcode() == spv::Op::OpStore) {
338	0	for (Instruction* load : set_one_loads) {
339	0	DistanceVector vec{loop_depth};
340
341		// If the load actually should appear after the store, return false.
342	0	if (instruction_order_[load] > instruction_order_[inst]) {
343	0	return false;
344	0	}
345
346		// If not independent check the distance vector.
347	0	if (!analysis.GetDependence(inst, load, &vec)) {
348	0	for (DistanceEntry& entry : vec.GetEntries()) {
349		// A distance less than zero means the load in the cloned loop is
350		// dependent on the store instruction in the original loop.
351	0	if (entry.distance < 0) return false;
352	0	}
353	0	}
354	0	}
355	0	}
356	0	}
357	0	return true;
358	0	}
359
360	0	Loop* LoopFissionImpl::SplitLoop() {
361		// Clone the loop.
362	0	LoopUtils util{context_, loop_};
363	0	LoopUtils::LoopCloningResult clone_results;
364	0	Loop* cloned_loop = util.CloneAndAttachLoopToHeader(&clone_results);
365
366		// Update the OpLoopMerge in the cloned loop.
367	0	cloned_loop->UpdateLoopMergeInst();
368
369		// Add the loop_ to the module.
370		// TODO(1841): Handle failure to create pre-header.
371	0	Function::iterator it =
372	0	util.GetFunction()->FindBlock(loop_->GetOrCreatePreHeaderBlock()->id());
373	0	util.GetFunction()->AddBasicBlocks(clone_results.cloned_bb_.begin(),
374	0	clone_results.cloned_bb_.end(), ++it);
375	0	loop_->SetPreHeaderBlock(cloned_loop->GetMergeBlock());
376
377	0	std::vector<Instruction*> instructions_to_kill{};
378
379		// Kill all the instructions which should appear in the cloned loop but not in
380		// the original loop.
381	0	for (uint32_t id : loop_->GetBlocks()) {
382	0	BasicBlock* block = context_->cfg()->block(id);
383
384	0	for (Instruction& inst : *block) {
385		// If the instruction appears in the cloned loop instruction group, kill
386		// it.
387	0	if (cloned_loop_instructions_.count(&inst) == 1 &&
388	0	original_loop_instructions_.count(&inst) == 0) {
389	0	instructions_to_kill.push_back(&inst);
390	0	if (inst.opcode() == spv::Op::OpPhi) {
391	0	context_->ReplaceAllUsesWith(
392	0	inst.result_id(), clone_results.value_map_[inst.result_id()]);
393	0	}
394	0	}
395	0	}
396	0	}
397
398		// Kill all instructions which should appear in the original loop and not in
399		// the cloned loop.
400	0	for (uint32_t id : cloned_loop->GetBlocks()) {
401	0	BasicBlock* block = context_->cfg()->block(id);
402	0	for (Instruction& inst : *block) {
403	0	Instruction* old_inst = clone_results.ptr_map_[&inst];
404		// If the instruction belongs to the original loop instruction group, kill
405		// it.
406	0	if (cloned_loop_instructions_.count(old_inst) == 0 &&
407	0	original_loop_instructions_.count(old_inst) == 1) {
408	0	instructions_to_kill.push_back(&inst);
409	0	}
410	0	}
411	0	}
412
413	0	for (Instruction* i : instructions_to_kill) {
414	0	context_->KillInst(i);
415	0	}
416
417	0	return cloned_loop;
418	0	}
419
420		LoopFissionPass::LoopFissionPass(const size_t register_threshold_to_split,
421		bool split_multiple_times)
422	0	: split_multiple_times_(split_multiple_times) {
423		// Split if the number of registers in the loop exceeds
424		// \|register_threshold_to_split\|.
425	0	split_criteria_ =
426	0	[register_threshold_to_split](
427	0	const RegisterLiveness::RegionRegisterLiveness& liveness) {
428	0	return liveness.used_registers_ > register_threshold_to_split;
429	0	};
430	0	}
431
432	0	LoopFissionPass::LoopFissionPass() : split_multiple_times_(false) {
433		// Split by default.
434	0	split_criteria_ = [](const RegisterLiveness::RegionRegisterLiveness&) {
435	0	return true;
436	0	};
437	0	}
438
439	0	bool LoopFissionPass::ShouldSplitLoop(const Loop& loop, IRContext* c) {
440	0	LivenessAnalysis* analysis = c->GetLivenessAnalysis();
441
442	0	RegisterLiveness::RegionRegisterLiveness liveness{};
443
444	0	Function* function = loop.GetHeaderBlock()->GetParent();
445	0	analysis->Get(function)->ComputeLoopRegisterPressure(loop, &liveness);
446
447	0	return split_criteria_(liveness);
448	0	}
449
450	0	Pass::Status LoopFissionPass::Process() {
451	0	bool changed = false;
452
453	0	for (Function& f : *context()->module()) {
454		// We collect all the inner most loops in the function and run the loop
455		// splitting util on each. The reason we do this is to allow us to iterate
456		// over each, as creating new loops will invalidate the loop iterator.
457	0	std::vector<Loop*> inner_most_loops{};
458	0	LoopDescriptor& loop_descriptor = *context()->GetLoopDescriptor(&f);
459	0	for (Loop& loop : loop_descriptor) {
460	0	if (!loop.HasChildren() && ShouldSplitLoop(loop, context())) {
461	0	inner_most_loops.push_back(&loop);
462	0	}
463	0	}
464
465		// List of new loops which meet the criteria to be split again.
466	0	std::vector<Loop*> new_loops_to_split{};
467
468	0	while (!inner_most_loops.empty()) {
469	0	for (Loop* loop : inner_most_loops) {
470	0	LoopFissionImpl impl{context(), loop};
471
472		// Group the instructions in the loop into two different sets of related
473		// instructions. If we can't group the instructions into the two sets
474		// then we can't split the loop any further.
475	0	if (!impl.GroupInstructionsByUseDef()) {
476	0	continue;
477	0	}
478
479	0	if (impl.CanPerformSplit()) {
480	0	Loop* second_loop = impl.SplitLoop();
481	0	changed = true;
482	0	context()->InvalidateAnalysesExceptFor(
483	0	IRContext::kAnalysisLoopAnalysis);
484
485		// If the newly created loop meets the criteria to be split, split it
486		// again.
487	0	if (ShouldSplitLoop(*second_loop, context()))
488	0	new_loops_to_split.push_back(second_loop);
489
490		// If the original loop (now split) still meets the criteria to be
491		// split, split it again.
492	0	if (ShouldSplitLoop(*loop, context()))
493	0	new_loops_to_split.push_back(loop);
494	0	}
495	0	}
496
497		// If the split multiple times flag has been set add the new loops which
498		// meet the splitting criteria into the list of loops to be split on the
499		// next iteration.
500	0	if (split_multiple_times_) {
501	0	inner_most_loops = std::move(new_loops_to_split);
502	0	} else {
503	0	break;
504	0	}
505	0	}
506	0	}
507
508	0	return changed ? Pass::Status::SuccessWithChange
509	0	: Pass::Status::SuccessWithoutChange;
510	0	}
511
512		} // namespace opt
513		} // namespace spvtools