aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephen McGroarty <stephen@codeplay.com>2018-04-23 21:01:12 +0100
committerDiego Novillo <dnovillo@google.com>2018-05-01 15:15:10 -0400
commit9a5dd6fe88882005ea0433857e688581608bb210 (patch)
treea7598954e6c35b24d6548f1b2a818302743a0b81
parent9ba0879ddf00cc2462e581772cd6869e2a2dc984 (diff)
downloadspirv-tools-9a5dd6fe88882005ea0433857e688581608bb210.tar.gz
Support loop fission.
Adds support for spliting loops whose register pressure exceeds a user provided level. This pass will split a loop into two or more loops given that the loop is a top level loop and that spliting the loop is legal. Control flow is left intact for dead code elimination to remove. This pass is enabled with the --loop-fission flag to spirv-opt.
-rw-r--r--Android.mk1
-rw-r--r--include/spirv-tools/optimizer.hpp5
-rw-r--r--source/opt/CMakeLists.txt2
-rw-r--r--source/opt/instruction.cpp4
-rw-r--r--source/opt/loop_descriptor.h1
-rw-r--r--source/opt/loop_fission.cpp508
-rw-r--r--source/opt/loop_fission.h78
-rw-r--r--source/opt/loop_utils.cpp86
-rw-r--r--source/opt/loop_utils.h9
-rw-r--r--source/opt/optimizer.cpp5
-rw-r--r--source/opt/passes.h1
-rw-r--r--test/opt/loop_optimizations/CMakeLists.txt8
-rw-r--r--test/opt/loop_optimizations/loop_fission.cpp3492
-rw-r--r--tools/opt/opt.cpp23
14 files changed, 4217 insertions, 6 deletions
diff --git a/Android.mk b/Android.mk
index 812209e3..c450fd0e 100644
--- a/Android.mk
+++ b/Android.mk
@@ -107,6 +107,7 @@ SPVTOOLS_OPT_SRC_FILES := \
source/opt/loop_dependence.cpp \
source/opt/loop_dependence_helpers.cpp \
source/opt/loop_descriptor.cpp \
+ source/opt/loop_fission.cpp \
source/opt/loop_peeling.cpp \
source/opt/loop_unroller.cpp \
source/opt/loop_unswitch_pass.cpp \
diff --git a/include/spirv-tools/optimizer.hpp b/include/spirv-tools/optimizer.hpp
index f177849a..4e2bc265 100644
--- a/include/spirv-tools/optimizer.hpp
+++ b/include/spirv-tools/optimizer.hpp
@@ -483,6 +483,11 @@ Optimizer::PassToken CreateLocalRedundancyEliminationPass();
// the loops preheader.
Optimizer::PassToken CreateLoopInvariantCodeMotionPass();
+// Creates a loop fission pass.
+// This pass will split all top level loops whose register pressure exceedes the
+// given |threshold|.
+Optimizer::PassToken CreateLoopFissionPass(size_t threshold);
+
// Creates a loop peeling pass.
// This pass will look for conditions inside a loop that are true or false only
// for the N first or last iteration. For loop with such condition, those N
diff --git a/source/opt/CMakeLists.txt b/source/opt/CMakeLists.txt
index 278bc0f6..1ad75642 100644
--- a/source/opt/CMakeLists.txt
+++ b/source/opt/CMakeLists.txt
@@ -60,6 +60,7 @@ add_library(SPIRV-Tools-opt
log.h
loop_dependence.h
loop_descriptor.h
+ loop_fission.h
loop_peeling.h
loop_unroller.h
loop_utils.h
@@ -143,6 +144,7 @@ add_library(SPIRV-Tools-opt
loop_dependence.cpp
loop_dependence_helpers.cpp
loop_descriptor.cpp
+ loop_fission.cpp
loop_peeling.cpp
loop_utils.cpp
loop_unroller.cpp
diff --git a/source/opt/instruction.cpp b/source/opt/instruction.cpp
index 88553ad8..03e9059c 100644
--- a/source/opt/instruction.cpp
+++ b/source/opt/instruction.cpp
@@ -590,6 +590,10 @@ bool Instruction::IsOpcodeCodeMotionSafe() const {
case SpvOpBitwiseXor:
case SpvOpBitwiseAnd:
case SpvOpNot:
+ case SpvOpAccessChain:
+ case SpvOpInBoundsAccessChain:
+ case SpvOpPtrAccessChain:
+ case SpvOpInBoundsPtrAccessChain:
return true;
default:
return false;
diff --git a/source/opt/loop_descriptor.h b/source/opt/loop_descriptor.h
index 210ec893..ebd67381 100644
--- a/source/opt/loop_descriptor.h
+++ b/source/opt/loop_descriptor.h
@@ -153,6 +153,7 @@ class Loop {
inline size_t NumImmediateChildren() const { return nested_loops_.size(); }
+ inline bool HasChildren() const { return !nested_loops_.empty(); }
// Adds |nested| as a nested loop of this loop. Automatically register |this|
// as the parent of |nested|.
inline void AddNestedLoop(Loop* nested) {
diff --git a/source/opt/loop_fission.cpp b/source/opt/loop_fission.cpp
new file mode 100644
index 00000000..10d52207
--- /dev/null
+++ b/source/opt/loop_fission.cpp
@@ -0,0 +1,508 @@
+// Copyright (c) 2018 Google LLC.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "opt/loop_fission.h"
+#include "opt/register_pressure.h"
+
+// Implement loop fission with an optional parameter to split only
+// if the register pressure in a given loop meets a certain criteria. This is
+// controlled via the constructors of LoopFissionPass.
+//
+// 1 - Build a list of loops to be split, these are top level loops (loops
+// without child loops themselves) which meet the register pressure criteria, as
+// determined by the ShouldSplitLoop method of LoopFissionPass.
+//
+// 2 - For each loop in the list, group each instruction into a set of related
+// instructions by traversing each instructions users and operands recursively.
+// We stop if we encounter an instruction we have seen before or an instruction
+// which we don't consider relevent (i.e OpLoopMerge). We then group these
+// groups into two different sets, one for the first loop and one for the
+// second.
+//
+// 3 - We then run CanPerformSplit to check that it would be legal to split a
+// loop using those two sets. We check that we haven't altered the relative
+// order load/stores appear in the binary and that we aren't breaking any
+// dependency between load/stores by splitting them into two loops. We also
+// check that none of the OpBranch instructions are dependent on a load as we
+// leave control flow structure intact and move only instructions in the body so
+// we want to avoid any loads with side affects or aliasing.
+//
+// 4 - We then split the loop by calling SplitLoop. This function clones the
+// loop and attaches it to the preheader and connects the new loops merge block
+// to the current loop header block. We then use the two sets built in step 2 to
+// remove instructions from each loop. If an instruction appears in the first
+// set it is removed from the second loop and vice versa.
+//
+// 5 - If the multiple split passes flag is set we check if each of the loops
+// still meet the register pressure criteria. If they do then we add them to the
+// list of loops to be split (created in step one) to allow for loops to be
+// split multiple times.
+//
+
+namespace spvtools {
+namespace opt {
+
+class LoopFissionImpl {
+ public:
+ LoopFissionImpl(ir::IRContext* context, ir::Loop* loop)
+ : context_(context), loop_(loop), load_used_in_condition_(false) {}
+
+ // Group each instruction in the loop into sets of instructions related by
+ // their usedef chains. An instruction which uses another will appear in the
+ // same set. Then merge those sets into just two sets. Returns false if there
+ // was one or less sets created.
+ bool GroupInstructionsByUseDef();
+
+ // Check if the sets built by GroupInstructionsByUseDef violate any data
+ // dependence rules.
+ bool CanPerformSplit();
+
+ // Split the loop and return a pointer to the new loop.
+ ir::Loop* SplitLoop();
+
+ // Checks if |inst| is safe to move. We can only move instructions which don't
+ // have any side effects and OpLoads and OpStores.
+ bool MovableInstruction(const ir::Instruction& inst) const;
+
+ private:
+ // Traverse the def use chain of |inst| and add the users and uses of |inst|
+ // which are in the same loop to the |returned_set|.
+ void TraverseUseDef(ir::Instruction* inst,
+ std::set<ir::Instruction*>* returned_set,
+ bool ignore_phi_users = false, bool report_loads = false);
+
+ // We group the instructions in the block into two different groups, the
+ // instructions to be kept in the original loop and the ones to be cloned into
+ // the new loop. As the cloned loop is attached to the preheader it will be
+ // the first loop and the second loop will be the original.
+ std::set<ir::Instruction*> cloned_loop_instructions_;
+ std::set<ir::Instruction*> original_loop_instructions_;
+
+ // We need a set of all the instructions to be seen so we can break any
+ // recursion and also so we can ignore certain instructions by preemptively
+ // adding them to this set.
+ std::set<ir::Instruction*> seen_instructions_;
+
+ // A map of instructions to their relative position in the function.
+ std::map<ir::Instruction*, size_t> instruction_order_;
+
+ ir::IRContext* context_;
+
+ ir::Loop* loop_;
+
+ // This is set to true by TraverseUseDef when traversing the instructions
+ // related to the loop condition and any if conditions should any of those
+ // instructions be a load.
+ bool load_used_in_condition_;
+};
+
+bool LoopFissionImpl::MovableInstruction(const ir::Instruction& inst) const {
+ return inst.opcode() == SpvOp::SpvOpLoad ||
+ inst.opcode() == SpvOp::SpvOpStore ||
+ inst.opcode() == SpvOp::SpvOpSelectionMerge ||
+ inst.opcode() == SpvOp::SpvOpPhi || inst.IsOpcodeCodeMotionSafe();
+}
+
+void LoopFissionImpl::TraverseUseDef(ir::Instruction* inst,
+ std::set<ir::Instruction*>* returned_set,
+ bool ignore_phi_users, bool report_loads) {
+ assert(returned_set && "Set to be returned cannot be null.");
+
+ opt::analysis::DefUseManager* def_use = context_->get_def_use_mgr();
+ std::set<ir::Instruction*>& inst_set = *returned_set;
+
+ // We create this functor to traverse the use def chain to build the
+ // grouping of related instructions. The lambda captures the std::function
+ // to allow it to recurse.
+ std::function<void(ir::Instruction*)> traverser_functor;
+ traverser_functor = [this, def_use, &inst_set, &traverser_functor,
+ ignore_phi_users, report_loads](ir::Instruction* user) {
+ // If we've seen the instruction before or it is not inside the loop end the
+ // traversal.
+ if (!user || seen_instructions_.count(user) != 0 ||
+ !context_->get_instr_block(user) ||
+ !loop_->IsInsideLoop(context_->get_instr_block(user))) {
+ return;
+ }
+
+ // Don't include labels or loop merge instructions in the instruction sets.
+ // Including them would mean we group instructions related only by using the
+ // same labels (i.e phis). We already preempt the inclusion of
+ // OpSelectionMerge by adding related instructions to the seen_instructions_
+ // set.
+ if (user->opcode() == SpvOp::SpvOpLoopMerge ||
+ user->opcode() == SpvOp::SpvOpLabel)
+ return;
+
+ // If the |report_loads| flag is set, set the class field
+ // load_used_in_condition_ to false. This is used to check that none of the
+ // condition checks in the loop rely on loads.
+ if (user->opcode() == SpvOp::SpvOpLoad && report_loads) {
+ load_used_in_condition_ = true;
+ }
+
+ // Add the instruction to the set of instructions already seen, this breaks
+ // recursion and allows us to ignore certain instructions.
+ seen_instructions_.insert(user);
+
+ inst_set.insert(user);
+
+ // Wrapper functor to traverse the operands of each instruction.
+ auto traverse_operand = [&traverser_functor, def_use](const uint32_t* id) {
+ traverser_functor(def_use->GetDef(*id));
+ };
+ user->ForEachInOperand(traverse_operand);
+
+ // For the first traversal we want to ignore the users of the phi.
+ if (ignore_phi_users && user->opcode() == SpvOp::SpvOpPhi) return;
+
+ // Traverse each user with this lambda.
+ def_use->ForEachUser(user, traverser_functor);
+
+ // Wrapper functor for the use traversal.
+ auto traverse_use = [&traverser_functor](ir::Instruction* use, uint32_t) {
+ traverser_functor(use);
+ };
+ def_use->ForEachUse(user, traverse_use);
+
+ };
+
+ // We start the traversal of the use def graph by invoking the above
+ // lambda with the |inst| parameter.
+ traverser_functor(inst);
+}
+
+bool LoopFissionImpl::GroupInstructionsByUseDef() {
+ std::vector<std::set<ir::Instruction*>> sets{};
+
+ // We want to ignore all the instructions stemming from the loop condition
+ // instruction.
+ ir::BasicBlock* condition_block = loop_->FindConditionBlock();
+
+ if (!condition_block) return false;
+ ir::Instruction* condition = &*condition_block->tail();
+
+ // We iterate over the blocks via iterating over all the blocks in the
+ // function, we do this so we are iterating in the same order which the blocks
+ // appear in the binary.
+ ir::Function& function = *loop_->GetHeaderBlock()->GetParent();
+
+ // Create a temporary set to ignore certain groups of instructions within the
+ // loop. We don't want any instructions related to control flow to be removed
+ // from either loop only instructions within the control flow bodies.
+ std::set<ir::Instruction*> instructions_to_ignore{};
+ TraverseUseDef(condition, &instructions_to_ignore, true, true);
+
+ // Traverse control flow instructions to ensure they are added to the
+ // seen_instructions_ set and will be ignored when it it called with actual
+ // sets.
+ for (ir::BasicBlock& block : function) {
+ if (!loop_->IsInsideLoop(block.id())) continue;
+
+ for (ir::Instruction& inst : block) {
+ // Ignore all instructions related to control flow.
+ if (inst.opcode() == SpvOp::SpvOpSelectionMerge || inst.IsBranch()) {
+ TraverseUseDef(&inst, &instructions_to_ignore, true, true);
+ }
+ }
+ }
+
+ // Traverse the instructions and generate the sets, automatically ignoring any
+ // instructions in instructions_to_ignore.
+ for (ir::BasicBlock& block : function) {
+ if (!loop_->IsInsideLoop(block.id()) ||
+ loop_->GetHeaderBlock()->id() == block.id())
+ continue;
+
+ for (ir::Instruction& inst : block) {
+ // Record the order that each load/store is seen.
+ if (inst.opcode() == SpvOp::SpvOpLoad ||
+ inst.opcode() == SpvOp::SpvOpStore) {
+ instruction_order_[&inst] = instruction_order_.size();
+ }
+
+ // Ignore instructions already seen in a traversal.
+ if (seen_instructions_.count(&inst) != 0) {
+ continue;
+ }
+
+ // Build the set.
+ std::set<ir::Instruction*> inst_set{};
+ TraverseUseDef(&inst, &inst_set);
+ if (!inst_set.empty()) sets.push_back(std::move(inst_set));
+ }
+ }
+
+ // If we have one or zero sets return false to indicate that due to
+ // insufficient instructions we couldn't split the loop into two groups and
+ // thus the loop can't be split any further.
+ if (sets.size() < 2) {
+ return false;
+ }
+
+ // Merge the loop sets into two different sets. In CanPerformSplit we will
+ // validate that we don't break the relative ordering of loads/stores by doing
+ // this.
+ for (size_t index = 0; index < sets.size() / 2; ++index) {
+ cloned_loop_instructions_.insert(sets[index].begin(), sets[index].end());
+ }
+ for (size_t index = sets.size() / 2; index < sets.size(); ++index) {
+ original_loop_instructions_.insert(sets[index].begin(), sets[index].end());
+ }
+
+ return true;
+}
+
+bool LoopFissionImpl::CanPerformSplit() {
+ // Return false if any of the condition instructions in the loop depend on a
+ // load.
+ if (load_used_in_condition_) {
+ return false;
+ }
+
+ // Build a list of all parent loops of this loop. Loop dependence analysis
+ // needs this structure.
+ std::vector<const ir::Loop*> loops;
+ ir::Loop* parent_loop = loop_;
+ while (parent_loop) {
+ loops.push_back(parent_loop);
+ parent_loop = parent_loop->GetParent();
+ }
+
+ LoopDependenceAnalysis analysis{context_, loops};
+
+ // A list of all the stores in the cloned loop.
+ std::vector<ir::Instruction*> set_one_stores{};
+
+ // A list of all the loads in the cloned loop.
+ std::vector<ir::Instruction*> set_one_loads{};
+
+ // Populate the above lists.
+ for (ir::Instruction* inst : cloned_loop_instructions_) {
+ if (inst->opcode() == SpvOp::SpvOpStore) {
+ set_one_stores.push_back(inst);
+ } else if (inst->opcode() == SpvOp::SpvOpLoad) {
+ set_one_loads.push_back(inst);
+ }
+
+ // If we find any instruction which we can't move (such as a barrier),
+ // return false.
+ if (!MovableInstruction(*inst)) return false;
+ }
+
+ // We need to calculate the depth of the loop to create the loop dependency
+ // distance vectors.
+ const size_t loop_depth = loop_->GetDepth();
+
+ // Check the dependencies between loads in the cloned loop and stores in the
+ // original and vice versa.
+ for (ir::Instruction* inst : original_loop_instructions_) {
+ // If we find any instruction which we can't move (such as a barrier),
+ // return false.
+ if (!MovableInstruction(*inst)) return false;
+
+ // Look at the dependency between the loads in the original and stores in
+ // the cloned loops.
+ if (inst->opcode() == SpvOp::SpvOpLoad) {
+ for (ir::Instruction* store : set_one_stores) {
+ DistanceVector vec{loop_depth};
+
+ // If the store actually should appear after the load, return false.
+ // This means the store has been placed in the wrong grouping.
+ if (instruction_order_[store] > instruction_order_[inst]) {
+ return false;
+ }
+ // If not independent check the distance vector.
+ if (!analysis.GetDependence(store, inst, &vec)) {
+ for (DistanceEntry& entry : vec.GetEntries()) {
+ // A distance greater than zero means that the store in the cloned
+ // loop has a dependency on the load in the original loop.
+ if (entry.distance > 0) return false;
+ }
+ }
+ }
+ } else if (inst->opcode() == SpvOp::SpvOpStore) {
+ for (ir::Instruction* load : set_one_loads) {
+ DistanceVector vec{loop_depth};
+
+ // If the load actually should appear after the store, return false.
+ if (instruction_order_[load] > instruction_order_[inst]) {
+ return false;
+ }
+
+ // If not independent check the distance vector.
+ if (!analysis.GetDependence(inst, load, &vec)) {
+ for (DistanceEntry& entry : vec.GetEntries()) {
+ // A distance less than zero means the load in the cloned loop is
+ // dependent on the store instruction in the original loop.
+ if (entry.distance < 0) return false;
+ }
+ }
+ }
+ }
+ }
+ return true;
+}
+
+ir::Loop* LoopFissionImpl::SplitLoop() {
+ // Clone the loop.
+ LoopUtils util{context_, loop_};
+ LoopUtils::LoopCloningResult clone_results;
+ ir::Loop* cloned_loop = util.CloneAndAttachLoopToHeader(&clone_results);
+
+ // Update the OpLoopMerge in the cloned loop.
+ cloned_loop->UpdateLoopMergeInst();
+
+ // Add the loop_ to the module.
+ ir::Function::iterator it =
+ util.GetFunction()->FindBlock(loop_->GetOrCreatePreHeaderBlock()->id());
+ util.GetFunction()->AddBasicBlocks(clone_results.cloned_bb_.begin(),
+ clone_results.cloned_bb_.end(), ++it);
+ loop_->SetPreHeaderBlock(cloned_loop->GetMergeBlock());
+
+ std::vector<ir::Instruction*> instructions_to_kill{};
+
+ // Kill all the instructions which should appear in the cloned loop but not in
+ // the original loop.
+ for (uint32_t id : loop_->GetBlocks()) {
+ ir::BasicBlock* block = context_->cfg()->block(id);
+
+ for (ir::Instruction& inst : *block) {
+ // If the instruction appears in the cloned loop instruction group, kill
+ // it.
+ if (cloned_loop_instructions_.count(&inst) == 1 &&
+ original_loop_instructions_.count(&inst) == 0) {
+ instructions_to_kill.push_back(&inst);
+ if (inst.opcode() == SpvOp::SpvOpPhi) {
+ context_->ReplaceAllUsesWith(
+ inst.result_id(), clone_results.value_map_[inst.result_id()]);
+ }
+ }
+ }
+ }
+
+ // Kill all instructions which should appear in the original loop and not in
+ // the cloned loop.
+ for (uint32_t id : cloned_loop->GetBlocks()) {
+ ir::BasicBlock* block = context_->cfg()->block(id);
+ for (ir::Instruction& inst : *block) {
+ ir::Instruction* old_inst = clone_results.ptr_map_[&inst];
+ // If the instruction belongs to the original loop instruction group, kill
+ // it.
+ if (cloned_loop_instructions_.count(old_inst) == 0 &&
+ original_loop_instructions_.count(old_inst) == 1) {
+ instructions_to_kill.push_back(&inst);
+ }
+ }
+ }
+
+ for (ir::Instruction* i : instructions_to_kill) {
+ context_->KillInst(i);
+ }
+
+ return cloned_loop;
+}
+
+LoopFissionPass::LoopFissionPass(const size_t register_threshold_to_split,
+ bool split_multiple_times)
+ : split_multiple_times_(split_multiple_times) {
+ // Split if the number of registers in the loop exceeds
+ // |register_threshold_to_split|.
+ split_criteria_ =
+ [register_threshold_to_split](
+ const RegisterLiveness::RegionRegisterLiveness& liveness) {
+ return liveness.used_registers_ > register_threshold_to_split;
+ };
+}
+
+LoopFissionPass::LoopFissionPass() : split_multiple_times_(false) {
+ // Split by default.
+ split_criteria_ = [](const RegisterLiveness::RegionRegisterLiveness&) {
+ return true;
+ };
+}
+
+bool LoopFissionPass::ShouldSplitLoop(const ir::Loop& loop, ir::IRContext* c) {
+ LivenessAnalysis* analysis = c->GetLivenessAnalysis();
+
+ RegisterLiveness::RegionRegisterLiveness liveness{};
+
+ ir::Function* function = loop.GetHeaderBlock()->GetParent();
+ analysis->Get(function)->ComputeLoopRegisterPressure(loop, &liveness);
+
+ return split_criteria_(liveness);
+}
+
+Pass::Status LoopFissionPass::Process(ir::IRContext* c) {
+ bool changed = false;
+
+ for (ir::Function& f : *c->module()) {
+ // We collect all the inner most loops in the function and run the loop
+ // splitting util on each. The reason we do this is to allow us to iterate
+ // over each, as creating new loops will invalidate the the loop iterator.
+ std::vector<ir::Loop*> inner_most_loops{};
+ ir::LoopDescriptor& loop_descriptor = *c->GetLoopDescriptor(&f);
+ for (ir::Loop& loop : loop_descriptor) {
+ if (!loop.HasChildren() && ShouldSplitLoop(loop, c)) {
+ inner_most_loops.push_back(&loop);
+ }
+ }
+
+ // List of new loops which meet the criteria to be split again.
+ std::vector<ir::Loop*> new_loops_to_split{};
+
+ while (!inner_most_loops.empty()) {
+ for (ir::Loop* loop : inner_most_loops) {
+ LoopFissionImpl impl{c, loop};
+
+ // Group the instructions in the loop into two different sets of related
+ // instructions. If we can't group the instructions into the two sets
+ // then we can't split the loop any further.
+ if (!impl.GroupInstructionsByUseDef()) {
+ continue;
+ }
+
+ if (impl.CanPerformSplit()) {
+ ir::Loop* second_loop = impl.SplitLoop();
+ changed = true;
+ c->InvalidateAnalysesExceptFor(ir::IRContext::kAnalysisLoopAnalysis);
+
+ // If the newly created loop meets the criteria to be split, split it
+ // again.
+ if (ShouldSplitLoop(*second_loop, c))
+ new_loops_to_split.push_back(second_loop);
+
+ // If the original loop (now split) still meets the criteria to be
+ // split, split it again.
+ if (ShouldSplitLoop(*loop, c)) new_loops_to_split.push_back(loop);
+ }
+ }
+
+ // If the split multiple times flag has been set add the new loops which
+ // meet the splitting criteria into the list of loops to be split on the
+ // next iteration.
+ if (split_multiple_times_) {
+ inner_most_loops = std::move(new_loops_to_split);
+ } else {
+ break;
+ }
+ }
+ }
+
+ return changed ? Pass::Status::SuccessWithChange
+ : Pass::Status::SuccessWithoutChange;
+}
+
+} // namespace opt
+} // namespace spvtools
diff --git a/source/opt/loop_fission.h b/source/opt/loop_fission.h
new file mode 100644
index 00000000..8a7424dc
--- /dev/null
+++ b/source/opt/loop_fission.h
@@ -0,0 +1,78 @@
+// Copyright (c) 2018 Google LLC.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LIBSPIRV_OPT_LOOP_FISSION_H_
+#define LIBSPIRV_OPT_LOOP_FISSION_H_
+
+#include <algorithm>
+#include <cstdint>
+#include <map>
+#include <utility>
+#include <vector>
+
+#include "cfg.h"
+#include "module.h"
+#include "opt/loop_dependence.h"
+#include "opt/loop_utils.h"
+#include "pass.h"
+#include "tree_iterator.h"
+
+namespace spvtools {
+namespace opt {
+
+class LoopFissionPass : public Pass {
+ public:
+ // Fuction used to determine if a given loop should be split. Takes register
+ // pressure region for that loop as a parameter and returns true if the loop
+ // should be split.
+ using FissionCriteriaFunction =
+ std::function<bool(const RegisterLiveness::RegionRegisterLiveness&)>;
+
+ // Pass built with this constructor will split all loops regardless of
+ // register pressure. Will not split loops more than once.
+ LoopFissionPass();
+
+ // Split the loop if the number of registers used in the loop exceeds
+ // |register_threshold_to_split|. |split_multiple_times| flag determines
+ // whether or not the pass should split loops after already splitting them
+ // once.
+ LoopFissionPass(size_t register_threshold_to_split,
+ bool split_multiple_times = true);
+
+ // Split loops whose register pressure meets the criteria of |functor|.
+ LoopFissionPass(FissionCriteriaFunction functor,
+ bool split_multiple_times = true)
+ : split_criteria_(functor), split_multiple_times_(split_multiple_times) {}
+
+ const char* name() const override { return "Loop Fission"; }
+
+ Pass::Status Process(ir::IRContext* context) override;
+
+ // Checks if |loop| meets the register pressure criteria to be split.
+ bool ShouldSplitLoop(const ir::Loop& loop, ir::IRContext* context);
+
+ private:
+ // Functor to run in ShouldSplitLoop to determine if the register pressure
+ // criteria is met for splitting the loop.
+ FissionCriteriaFunction split_criteria_;
+
+ // Flag designating whether or not we should also split the result of
+ // previously split loops if they meet the register presure criteria.
+ bool split_multiple_times_;
+};
+
+} // namespace opt
+} // namespace spvtools
+
+#endif // LIBSPIRV_OPT_LOOP_FISSION_H_
diff --git a/source/opt/loop_utils.cpp b/source/opt/loop_utils.cpp
index b87f659d..f8ed413f 100644
--- a/source/opt/loop_utils.cpp
+++ b/source/opt/loop_utils.cpp
@@ -481,6 +481,78 @@ void LoopUtils::MakeLoopClosedSSA() {
ir::IRContext::Analysis::kAnalysisLoopAnalysis);
}
+ir::Loop* LoopUtils::CloneLoop(LoopCloningResult* cloning_result) const {
+ // Compute the structured order of the loop basic blocks and store it in the
+ // vector ordered_loop_blocks.
+ std::vector<ir::BasicBlock*> ordered_loop_blocks;
+ loop_->ComputeLoopStructuredOrder(&ordered_loop_blocks);
+
+ // Clone the loop.
+ return CloneLoop(cloning_result, ordered_loop_blocks);
+}
+
+ir::Loop* LoopUtils::CloneAndAttachLoopToHeader(
+ LoopCloningResult* cloning_result) {
+ // Clone the loop.
+ ir::Loop* new_loop = CloneLoop(cloning_result);
+
+ // Create a new exit block/label for the new loop.
+ std::unique_ptr<ir::Instruction> new_label{new ir::Instruction(
+ context_, SpvOp::SpvOpLabel, 0, context_->TakeNextId(), {})};
+ std::unique_ptr<ir::BasicBlock> new_exit_bb{
+ new ir::BasicBlock(std::move(new_label))};
+ new_exit_bb->SetParent(loop_->GetMergeBlock()->GetParent());
+
+ // Create an unconditional branch to the header block.
+ opt::InstructionBuilder builder{context_, new_exit_bb.get()};
+ builder.AddBranch(loop_->GetHeaderBlock()->id());
+
+ // Save the ids of the new and old merge block.
+ const uint32_t old_merge_block = loop_->GetMergeBlock()->id();
+ const uint32_t new_merge_block = new_exit_bb->id();
+
+ // Replace the uses of the old merge block in the new loop with the new merge
+ // block.
+ for (std::unique_ptr<ir::BasicBlock>& basic_block :
+ cloning_result->cloned_bb_) {
+ for (ir::Instruction& inst : *basic_block) {
+ // For each operand in each instruction check if it is using the old merge
+ // block and change it to be the new merge block.
+ auto replace_merge_use = [old_merge_block,
+ new_merge_block](uint32_t* id) {
+ if (*id == old_merge_block) *id = new_merge_block;
+ };
+ inst.ForEachInOperand(replace_merge_use);
+ }
+ }
+
+ const uint32_t old_header = loop_->GetHeaderBlock()->id();
+ const uint32_t new_header = new_loop->GetHeaderBlock()->id();
+ opt::analysis::DefUseManager* def_use = context_->get_def_use_mgr();
+
+ def_use->ForEachUse(
+ old_header, [new_header, this](ir::Instruction* inst, uint32_t operand) {
+ if (!this->loop_->IsInsideLoop(inst))
+ inst->SetOperand(operand, {new_header});
+ });
+
+ def_use->ForEachUse(
+ loop_->GetOrCreatePreHeaderBlock()->id(),
+ [new_merge_block, this](ir::Instruction* inst, uint32_t operand) {
+ if (this->loop_->IsInsideLoop(inst))
+ inst->SetOperand(operand, {new_merge_block});
+
+ });
+ new_loop->SetMergeBlock(new_exit_bb.get());
+
+ new_loop->SetPreHeaderBlock(loop_->GetPreHeaderBlock());
+
+ // Add the new block into the cloned instructions.
+ cloning_result->cloned_bb_.push_back(std::move(new_exit_bb));
+
+ return new_loop;
+}
+
ir::Loop* LoopUtils::CloneLoop(
LoopCloningResult* cloning_result,
const std::vector<ir::BasicBlock*>& ordered_loop_blocks) const {
@@ -507,14 +579,16 @@ ir::Loop* LoopUtils::CloneLoop(
if (loop_->IsInsideLoop(old_bb)) new_loop->AddBasicBlock(new_bb);
- for (auto& inst : *new_bb) {
- if (inst.HasResultId()) {
- uint32_t old_result_id = inst.result_id();
- inst.SetResultId(context_->TakeNextId());
- cloning_result->value_map_[old_result_id] = inst.result_id();
+ for (auto new_inst = new_bb->begin(), old_inst = old_bb->begin();
+ new_inst != new_bb->end(); ++new_inst, ++old_inst) {
+ cloning_result->ptr_map_[&*new_inst] = &*old_inst;
+ if (new_inst->HasResultId()) {
+ new_inst->SetResultId(context_->TakeNextId());
+ cloning_result->value_map_[old_inst->result_id()] =
+ new_inst->result_id();
// Only look at the defs for now, uses are not updated yet.
- def_use_mgr->AnalyzeInstDef(&inst);
+ def_use_mgr->AnalyzeInstDef(&*new_inst);
}
}
}
diff --git a/source/opt/loop_utils.h b/source/opt/loop_utils.h
index 47f78e67..de3ff2bf 100644
--- a/source/opt/loop_utils.h
+++ b/source/opt/loop_utils.h
@@ -46,6 +46,9 @@ class LoopUtils {
struct LoopCloningResult {
using ValueMapTy = std::unordered_map<uint32_t, uint32_t>;
using BlockMapTy = std::unordered_map<uint32_t, ir::BasicBlock*>;
+ using PtrMap = std::unordered_map<ir::Instruction*, ir::Instruction*>;
+
+ PtrMap ptr_map_;
// Mapping between the original loop ids and the new one.
ValueMapTy value_map_;
@@ -111,6 +114,12 @@ class LoopUtils {
ir::Loop* CloneLoop(
LoopCloningResult* cloning_result,
const std::vector<ir::BasicBlock*>& ordered_loop_blocks) const;
+ // Clone |loop_| and remap its instructions, as above. Overload to compute
+ // loop block ordering within method rather than taking in as parameter.
+ ir::Loop* CloneLoop(LoopCloningResult* cloning_result) const;
+
+ // Clone the |loop_| and make the new loop branch to the second loop on exit.
+ ir::Loop* CloneAndAttachLoopToHeader(LoopCloningResult* cloning_result);
// Perfom a partial unroll of |loop| by given |factor|. This will copy the
// body of the loop |factor| times. So a |factor| of one would give a new loop
diff --git a/source/opt/optimizer.cpp b/source/opt/optimizer.cpp
index 5558fbb3..f184a9a4 100644
--- a/source/opt/optimizer.cpp
+++ b/source/opt/optimizer.cpp
@@ -379,6 +379,11 @@ Optimizer::PassToken CreateLocalRedundancyEliminationPass() {
MakeUnique<opt::LocalRedundancyEliminationPass>());
}
+Optimizer::PassToken CreateLoopFissionPass(size_t threshold) {
+ return MakeUnique<Optimizer::PassToken::Impl>(
+ MakeUnique<opt::LoopFissionPass>(threshold));
+}
+
Optimizer::PassToken CreateLoopInvariantCodeMotionPass() {
return MakeUnique<Optimizer::PassToken::Impl>(MakeUnique<opt::LICMPass>());
}
diff --git a/source/opt/passes.h b/source/opt/passes.h
index 4e81864c..4f44d613 100644
--- a/source/opt/passes.h
+++ b/source/opt/passes.h
@@ -42,6 +42,7 @@
#include "local_single_block_elim_pass.h"
#include "local_single_store_elim_pass.h"
#include "local_ssa_elim_pass.h"
+#include "loop_fission.h"
#include "loop_peeling.h"
#include "loop_unroller.h"
#include "loop_unswitch_pass.h"
diff --git a/test/opt/loop_optimizations/CMakeLists.txt b/test/opt/loop_optimizations/CMakeLists.txt
index 7b464fa4..26f32386 100644
--- a/test/opt/loop_optimizations/CMakeLists.txt
+++ b/test/opt/loop_optimizations/CMakeLists.txt
@@ -108,3 +108,11 @@ add_spvtools_unittest(TARGET loop_dependence_analysis_helpers
dependence_analysis_helpers.cpp
LIBS SPIRV-Tools-opt
)
+
+add_spvtools_unittest(TARGET loop_fission
+ SRCS ../function_utils.h
+ loop_fission.cpp
+ LIBS SPIRV-Tools-opt
+)
+
+
diff --git a/test/opt/loop_optimizations/loop_fission.cpp b/test/opt/loop_optimizations/loop_fission.cpp
new file mode 100644
index 00000000..24e89595
--- /dev/null
+++ b/test/opt/loop_optimizations/loop_fission.cpp
@@ -0,0 +1,3492 @@
+// Copyright (c) 2018 Google LLC.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <string>
+#include <vector>
+
+#include <gmock/gmock.h>
+
+#include "../assembly_builder.h"
+#include "../function_utils.h"
+#include "../pass_fixture.h"
+#include "../pass_utils.h"
+#include "opt/loop_fission.h"
+#include "opt/loop_unroller.h"
+#include "opt/loop_utils.h"
+#include "opt/pass.h"
+namespace {
+
+using namespace spvtools;
+using ::testing::UnorderedElementsAre;
+
+using FissionClassTest = PassTest<::testing::Test>;
+
+/*
+Generated from the following GLSL
+
+#version 430
+
+void main(void) {
+ float A[10];
+ float B[10];
+ for (int i = 0; i < 10; i++) {
+ A[i] = B[i];
+ B[i] = A[i];
+ }
+}
+
+Result should be equivalent to:
+
+void main(void) {
+ float A[10];
+ float B[10];
+ for (int i = 0; i < 10; i++) {
+ A[i] = B[i];
+ }
+
+ for (int i = 0; i < 10; i++) {
+ B[i] = A[i];
+ }
+}
+*/
+TEST_F(FissionClassTest, SimpleFission) {
+ // clang-format off
+ // With opt::LocalMultiStoreElimPass
+const std::string source = R"(OpCapability Shader
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %2 "main"
+OpExecutionMode %2 OriginUpperLeft
+OpSource GLSL 430
+OpName %2 "main"
+OpName %3 "i"
+OpName %4 "A"
+OpName %5 "B"
+%6 = OpTypeVoid
+%7 = OpTypeFunction %6
+%8 = OpTypeInt 32 1
+%9 = OpTypePointer Function %8
+%10 = OpConstant %8 0
+%11 = OpConstant %8 10
+%12 = OpTypeBool
+%13 = OpTypeFloat 32
+%14 = OpTypeInt 32 0
+%15 = OpConstant %14 10
+%16 = OpTypeArray %13 %15
+%17 = OpTypePointer Function %16
+%18 = OpTypePointer Function %13
+%19 = OpConstant %8 1
+%2 = OpFunction %6 None %7
+%20 = OpLabel
+%3 = OpVariable %9 Function
+%4 = OpVariable %17 Function
+%5 = OpVariable %17 Function
+OpBranch %21
+%21 = OpLabel
+%22 = OpPhi %8 %10 %20 %23 %24
+OpLoopMerge %25 %24 None
+OpBranch %26
+%26 = OpLabel
+%27 = OpSLessThan %12 %22 %11
+OpBranchConditional %27 %28 %25
+%28 = OpLabel
+%29 = OpAccessChain %18 %5 %22
+%30 = OpLoad %13 %29
+%31 = OpAccessChain %18 %4 %22
+OpStore %31 %30
+%32 = OpAccessChain %18 %4 %22
+%33 = OpLoad %13 %32
+%34 = OpAccessChain %18 %5 %22
+OpStore %34 %33
+OpBranch %24
+%24 = OpLabel
+%23 = OpIAdd %8 %22 %19
+OpBranch %21
+%25 = OpLabel
+OpReturn
+OpFunctionEnd
+)";
+
+const std::string expected = R"(OpCapability Shader
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %2 "main"
+OpExecutionMode %2 OriginUpperLeft
+OpSource GLSL 430
+OpName %2 "main"
+OpName %3 "i"
+OpName %4 "A"
+OpName %5 "B"
+%6 = OpTypeVoid
+%7 = OpTypeFunction %6
+%8 = OpTypeInt 32 1
+%9 = OpTypePointer Function %8
+%10 = OpConstant %8 0
+%11 = OpConstant %8 10
+%12 = OpTypeBool
+%13 = OpTypeFloat 32
+%14 = OpTypeInt 32 0
+%15 = OpConstant %14 10
+%16 = OpTypeArray %13 %15
+%17 = OpTypePointer Function %16
+%18 = OpTypePointer Function %13
+%19 = OpConstant %8 1
+%2 = OpFunction %6 None %7
+%20 = OpLabel
+%3 = OpVariable %9 Function
+%4 = OpVariable %17 Function
+%5 = OpVariable %17 Function
+OpBranch %35
+%35 = OpLabel
+%36 = OpPhi %8 %10 %20 %47 %46
+OpLoopMerge %48 %46 None
+OpBranch %37
+%37 = OpLabel
+%38 = OpSLessThan %12 %36 %11
+OpBranchConditional %38 %39 %48
+%39 = OpLabel
+%40 = OpAccessChain %18 %5 %36
+%41 = OpLoad %13 %40
+%42 = OpAccessChain %18 %4 %36
+OpStore %42 %41
+OpBranch %46
+%46 = OpLabel
+%47 = OpIAdd %8 %36 %19
+OpBranch %35
+%48 = OpLabel
+OpBranch %21
+%21 = OpLabel
+%22 = OpPhi %8 %10 %48 %23 %24
+OpLoopMerge %25 %24 None
+OpBranch %26
+%26 = OpLabel
+%27 = OpSLessThan %12 %22 %11
+OpBranchConditional %27 %28 %25
+%28 = OpLabel
+%32 = OpAccessChain %18 %4 %22
+%33 = OpLoad %13 %32
+%34 = OpAccessChain %18 %5 %22
+OpStore %34 %33
+OpBranch %24
+%24 = OpLabel
+%23 = OpIAdd %8 %22 %19
+OpBranch %21
+%25 = OpLabel
+OpReturn
+OpFunctionEnd
+)";
+ // clang-format on
+ std::unique_ptr<ir::IRContext> context =
+ BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, source,
+ SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
+ ir::Module* module = context->module();
+ EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n"
+ << source << std::endl;
+
+ SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER);
+ SinglePassRunAndCheck<opt::LoopFissionPass>(source, expected, true);
+
+ // Check that the loop will NOT be split when provided with a pass-through
+ // register pressure functor which just returns false.
+ SinglePassRunAndCheck<opt::LoopFissionPass>(
+ source, source, true,
+ [](const opt::RegisterLiveness::RegionRegisterLiveness&) {
+ return false;
+ });
+}
+
+/*
+Generated from the following GLSL
+
+#version 430
+
+void main(void) {
+ float A[10];
+ float B[10];
+ for (int i = 0; i < 10; i++) {
+ A[i] = B[i];
+ B[i] = A[i+1];
+ }
+}
+
+This loop should not be split, as the i+1 dependence would be broken by
+splitting the loop.
+*/
+
+TEST_F(FissionClassTest, FissionInterdependency) {
+ // clang-format off
+ // With opt::LocalMultiStoreElimPass
+ const std::string source = R"(OpCapability Shader
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %2 "main"
+OpExecutionMode %2 OriginUpperLeft
+OpSource GLSL 430
+OpName %2 "main"
+OpName %3 "i"
+OpName %4 "A"
+OpName %5 "B"
+%6 = OpTypeVoid
+%7 = OpTypeFunction %6
+%8 = OpTypeInt 32 1
+%9 = OpTypePointer Function %8
+%10 = OpConstant %8 0
+%11 = OpConstant %8 10
+%12 = OpTypeBool
+%13 = OpTypeFloat 32
+%14 = OpTypeInt 32 0
+%15 = OpConstant %14 10
+%16 = OpTypeArray %13 %15
+%17 = OpTypePointer Function %16
+%18 = OpTypePointer Function %13
+%19 = OpConstant %8 1
+%2 = OpFunction %6 None %7
+%20 = OpLabel
+%3 = OpVariable %9 Function
+%4 = OpVariable %17 Function
+%5 = OpVariable %17 Function
+OpBranch %21
+%21 = OpLabel
+%22 = OpPhi %8 %10 %20 %23 %24
+OpLoopMerge %25 %24 None
+OpBranch %26
+%26 = OpLabel
+%27 = OpSLessThan %12 %22 %11
+OpBranchConditional %27 %28 %25
+%28 = OpLabel
+%29 = OpAccessChain %18 %5 %22
+%30 = OpLoad %13 %29
+%31 = OpAccessChain %18 %4 %22
+OpStore %31 %30
+%32 = OpIAdd %8 %22 %19
+%33 = OpAccessChain %18 %4 %32
+%34 = OpLoad %13 %33
+%35 = OpAccessChain %18 %5 %22
+OpStore %35 %34
+OpBranch %24
+%24 = OpLabel
+%23 = OpIAdd %8 %22 %19
+OpBranch %21
+%25 = OpLabel
+OpReturn
+OpFunctionEnd
+)";
+ // clang-format on
+ std::unique_ptr<ir::IRContext> context =
+ BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, source,
+ SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
+ ir::Module* module = context->module();
+ EXPECT_NE(nullptr, module) << "Assembling failed for ushader:\n"
+ << source << std::endl;
+
+ SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER);
+ SinglePassRunAndCheck<opt::LoopFissionPass>(source, source, true);
+}
+
+/*
+Generated from the following GLSL
+
+#version 430
+
+void main(void) {
+ float A[10];
+ float B[10];
+ for (int i = 0; i < 10; i++) {
+ A[i] = B[i];
+ B[i+1] = A[i];
+ }
+}
+
+
+This should not be split as the load B[i] is dependent on the store B[i+1]
+*/
+TEST_F(FissionClassTest, FissionInterdependency2) {
+ // clang-format off
+ // With opt::LocalMultiStoreElimPass
+const std::string source = R"(OpCapability Shader
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %2 "main"
+OpExecutionMode %2 OriginUpperLeft
+OpSource GLSL 430
+OpName %2 "main"
+OpName %3 "i"
+OpName %4 "A"
+OpName %5 "B"
+%6 = OpTypeVoid
+%7 = OpTypeFunction %6
+%8 = OpTypeInt 32 1
+%9 = OpTypePointer Function %8
+%10 = OpConstant %8 0
+%11 = OpConstant %8 10
+%12 = OpTypeBool
+%13 = OpTypeFloat 32
+%14 = OpTypeInt 32 0
+%15 = OpConstant %14 10
+%16 = OpTypeArray %13 %15
+%17 = OpTypePointer Function %16
+%18 = OpTypePointer Function %13
+%19 = OpConstant %8 1
+%2 = OpFunction %6 None %7
+%20 = OpLabel
+%3 = OpVariable %9 Function
+%4 = OpVariable %17 Function
+%5 = OpVariable %17 Function
+OpBranch %21
+%21 = OpLabel
+%22 = OpPhi %8 %10 %20 %23 %24
+OpLoopMerge %25 %24 None
+OpBranch %26
+%26 = OpLabel
+%27 = OpSLessThan %12 %22 %11
+OpBranchConditional %27 %28 %25
+%28 = OpLabel
+%29 = OpAccessChain %18 %5 %22
+%30 = OpLoad %13 %29
+%31 = OpAccessChain %18 %4 %22
+OpStore %31 %30
+%32 = OpIAdd %8 %22 %19
+%33 = OpAccessChain %18 %4 %22
+%34 = OpLoad %13 %33
+%35 = OpAccessChain %18 %5 %32
+OpStore %35 %34
+OpBranch %24
+%24 = OpLabel
+%23 = OpIAdd %8 %22 %19
+OpBranch %21
+%25 = OpLabel
+OpReturn
+OpFunctionEnd
+)";
+ // clang-format on
+ std::unique_ptr<ir::IRContext> context =
+ BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, source,
+ SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
+ ir::Module* module = context->module();
+ EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n"
+ << source << std::endl;
+
+ SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER);
+ SinglePassRunAndCheck<opt::LoopFissionPass>(source, source, true);
+}
+
+/*
+#version 430
+void main(void) {
+ float A[10];
+ float B[10];
+ float C[10]
+ float D[10]
+ for (int i = 0; i < 10; i++) {
+ A[i] = B[i];
+ B[i] = A[i];
+ C[i] = D[i];
+ D[i] = C[i];
+ }
+}
+
+This should be split into the equivalent of:
+
+ for (int i = 0; i < 10; i++) {
+ A[i] = B[i];
+ B[i] = A[i];
+ }
+ for (int i = 0; i < 10; i++) {
+ C[i] = D[i];
+ D[i] = C[i];
+ }
+
+We then check that the loop is broken into four for loops like so, if the pass
+is run twice:
+ for (int i = 0; i < 10; i++)
+ A[i] = B[i];
+ for (int i = 0; i < 10; i++)
+ B[i] = A[i];
+ for (int i = 0; i < 10; i++)
+ C[i] = D[i];
+ for (int i = 0; i < 10; i++)
+ D[i] = C[i];
+
+*/
+
+TEST_F(FissionClassTest, FissionMultipleLoadStores) {
+ // clang-format off
+ // With opt::LocalMultiStoreElimPass
+ const std::string source = R"(
+ OpCapability Shader
+ %1 = OpExtInstImport "GLSL.std.450"
+ OpMemoryModel Logical GLSL450
+ OpEntryPoint Fragment %2 "main"
+ OpExecutionMode %2 OriginUpperLeft
+ OpSource GLSL 430
+ OpName %2 "main"
+ OpName %3 "i"
+ OpName %4 "A"
+ OpName %5 "B"
+ OpName %6 "C"
+ OpName %7 "D"
+ %8 = OpTypeVoid
+ %9 = OpTypeFunction %8
+ %10 = OpTypeInt 32 1
+ %11 = OpTypePointer Function %10
+ %12 = OpConstant %10 0
+ %13 = OpConstant %10 10
+ %14 = OpTypeBool
+ %15 = OpTypeFloat 32
+ %16 = OpTypeInt 32 0
+ %17 = OpConstant %16 10
+ %18 = OpTypeArray %15 %17
+ %19 = OpTypePointer Function %18
+ %20 = OpTypePointer Function %15
+ %21 = OpConstant %10 1
+ %2 = OpFunction %8 None %9
+ %22 = OpLabel
+ %3 = OpVariable %11 Function
+ %4 = OpVariable %19 Function
+ %5 = OpVariable %19 Function
+ %6 = OpVariable %19 Function
+ %7 = OpVariable %19 Function
+ OpBranch %23
+ %23 = OpLabel
+ %24 = OpPhi %10 %12 %22 %25 %26
+ OpLoopMerge %27 %26 None
+ OpBranch %28
+ %28 = OpLabel
+ %29 = OpSLessThan %14 %24 %13
+ OpBranchConditional %29 %30 %27
+ %30 = OpLabel
+ %31 = OpAccessChain %20 %5 %24
+ %32 = OpLoad %15 %31
+ %33 = OpAccessChain %20 %4 %24
+ OpStore %33 %32
+ %34 = OpAccessChain %20 %4 %24
+ %35 = OpLoad %15 %34
+ %36 = OpAccessChain %20 %5 %24
+ OpStore %36 %35
+ %37 = OpAccessChain %20 %7 %24
+ %38 = OpLoad %15 %37
+ %39 = OpAccessChain %20 %6 %24
+ OpStore %39 %38
+ %40 = OpAccessChain %20 %6 %24
+ %41 = OpLoad %15 %40
+ %42 = OpAccessChain %20 %7 %24
+ OpStore %42 %41
+ OpBranch %26
+ %26 = OpLabel
+ %25 = OpIAdd %10 %24 %21
+ OpBranch %23
+ %27 = OpLabel
+ OpReturn
+ OpFunctionEnd
+ )";
+
+ const std::string expected = R"(OpCapability Shader
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %2 "main"
+OpExecutionMode %2 OriginUpperLeft
+OpSource GLSL 430
+OpName %2 "main"
+OpName %3 "i"
+OpName %4 "A"
+OpName %5 "B"
+OpName %6 "C"
+OpName %7 "D"
+%8 = OpTypeVoid
+%9 = OpTypeFunction %8
+%10 = OpTypeInt 32 1
+%11 = OpTypePointer Function %10
+%12 = OpConstant %10 0
+%13 = OpConstant %10 10
+%14 = OpTypeBool
+%15 = OpTypeFloat 32
+%16 = OpTypeInt 32 0
+%17 = OpConstant %16 10
+%18 = OpTypeArray %15 %17
+%19 = OpTypePointer Function %18
+%20 = OpTypePointer Function %15
+%21 = OpConstant %10 1
+%2 = OpFunction %8 None %9
+%22 = OpLabel
+%3 = OpVariable %11 Function
+%4 = OpVariable %19 Function
+%5 = OpVariable %19 Function
+%6 = OpVariable %19 Function
+%7 = OpVariable %19 Function
+OpBranch %43
+%43 = OpLabel
+%44 = OpPhi %10 %12 %22 %61 %60
+OpLoopMerge %62 %60 None
+OpBranch %45
+%45 = OpLabel
+%46 = OpSLessThan %14 %44 %13
+OpBranchConditional %46 %47 %62
+%47 = OpLabel
+%48 = OpAccessChain %20 %5 %44
+%49 = OpLoad %15 %48
+%50 = OpAccessChain %20 %4 %44
+OpStore %50 %49
+%51 = OpAccessChain %20 %4 %44
+%52 = OpLoad %15 %51
+%53 = OpAccessChain %20 %5 %44
+OpStore %53 %52
+OpBranch %60
+%60 = OpLabel
+%61 = OpIAdd %10 %44 %21
+OpBranch %43
+%62 = OpLabel
+OpBranch %23
+%23 = OpLabel
+%24 = OpPhi %10 %12 %62 %25 %26
+OpLoopMerge %27 %26 None
+OpBranch %28
+%28 = OpLabel
+%29 = OpSLessThan %14 %24 %13
+OpBranchConditional %29 %30 %27
+%30 = OpLabel
+%37 = OpAccessChain %20 %7 %24
+%38 = OpLoad %15 %37
+%39 = OpAccessChain %20 %6 %24
+OpStore %39 %38
+%40 = OpAccessChain %20 %6 %24
+%41 = OpLoad %15 %40
+%42 = OpAccessChain %20 %7 %24
+OpStore %42 %41
+OpBranch %26
+%26 = OpLabel
+%25 = OpIAdd %10 %24 %21
+OpBranch %23
+%27 = OpLabel
+OpReturn
+OpFunctionEnd
+)";
+
+
+const std::string expected_multiple_passes = R"(OpCapability Shader
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %2 "main"
+OpExecutionMode %2 OriginUpperLeft
+OpSource GLSL 430
+OpName %2 "main"
+OpName %3 "i"
+OpName %4 "A"
+OpName %5 "B"
+OpName %6 "C"
+OpName %7 "D"
+%8 = OpTypeVoid
+%9 = OpTypeFunction %8
+%10 = OpTypeInt 32 1
+%11 = OpTypePointer Function %10
+%12 = OpConstant %10 0
+%13 = OpConstant %10 10
+%14 = OpTypeBool
+%15 = OpTypeFloat 32
+%16 = OpTypeInt 32 0
+%17 = OpConstant %16 10
+%18 = OpTypeArray %15 %17
+%19 = OpTypePointer Function %18
+%20 = OpTypePointer Function %15
+%21 = OpConstant %10 1
+%2 = OpFunction %8 None %9
+%22 = OpLabel
+%3 = OpVariable %11 Function
+%4 = OpVariable %19 Function
+%5 = OpVariable %19 Function
+%6 = OpVariable %19 Function
+%7 = OpVariable %19 Function
+OpBranch %63
+%63 = OpLabel
+%64 = OpPhi %10 %12 %22 %75 %74
+OpLoopMerge %76 %74 None
+OpBranch %65
+%65 = OpLabel
+%66 = OpSLessThan %14 %64 %13
+OpBranchConditional %66 %67 %76
+%67 = OpLabel
+%68 = OpAccessChain %20 %5 %64
+%69 = OpLoad %15 %68
+%70 = OpAccessChain %20 %4 %64
+OpStore %70 %69
+OpBranch %74
+%74 = OpLabel
+%75 = OpIAdd %10 %64 %21
+OpBranch %63
+%76 = OpLabel
+OpBranch %43
+%43 = OpLabel
+%44 = OpPhi %10 %12 %76 %61 %60
+OpLoopMerge %62 %60 None
+OpBranch %45
+%45 = OpLabel
+%46 = OpSLessThan %14 %44 %13
+OpBranchConditional %46 %47 %62
+%47 = OpLabel
+%51 = OpAccessChain %20 %4 %44
+%52 = OpLoad %15 %51
+%53 = OpAccessChain %20 %5 %44
+OpStore %53 %52
+OpBranch %60
+%60 = OpLabel
+%61 = OpIAdd %10 %44 %21
+OpBranch %43
+%62 = OpLabel
+OpBranch %77
+%77 = OpLabel
+%78 = OpPhi %10 %12 %62 %89 %88
+OpLoopMerge %90 %88 None
+OpBranch %79
+%79 = OpLabel
+%80 = OpSLessThan %14 %78 %13
+OpBranchConditional %80 %81 %90
+%81 = OpLabel
+%82 = OpAccessChain %20 %7 %78
+%83 = OpLoad %15 %82
+%84 = OpAccessChain %20 %6 %78
+OpStore %84 %83
+OpBranch %88
+%88 = OpLabel
+%89 = OpIAdd %10 %78 %21
+OpBranch %77
+%90 = OpLabel
+OpBranch %23
+%23 = OpLabel
+%24 = OpPhi %10 %12 %90 %25 %26
+OpLoopMerge %27 %26 None
+OpBranch %28
+%28 = OpLabel
+%29 = OpSLessThan %14 %24 %13
+OpBranchConditional %29 %30 %27
+%30 = OpLabel
+%40 = OpAccessChain %20 %6 %24
+%41 = OpLoad %15 %40
+%42 = OpAccessChain %20 %7 %24
+OpStore %42 %41
+OpBranch %26
+%26 = OpLabel
+%25 = OpIAdd %10 %24 %21
+OpBranch %23
+%27 = OpLabel
+OpReturn
+OpFunctionEnd
+)";
+ // clang-format on
+ std::unique_ptr<ir::IRContext> context =
+ BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, source,
+ SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
+ ir::Module* module = context->module();
+ EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n"
+ << source << std::endl;
+
+ SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER);
+ SinglePassRunAndCheck<opt::LoopFissionPass>(source, expected, true);
+
+ // By passing 1 as argument we are using the constructor which makes the
+ // critera to split the loop be if the registers in the loop exceede 1. By
+ // using this constructor we are also enabling multiple passes (disabled by
+ // default).
+ SinglePassRunAndCheck<opt::LoopFissionPass>(source, expected_multiple_passes,
+ true, 1);
+}
+
+/*
+#version 430
+void main(void) {
+ int accumulator = 0;
+ float X[10];
+ float Y[10];
+
+ for (int i = 0; i < 10; i++) {
+ X[i] = Y[i];
+ Y[i] = X[i];
+ accumulator += i;
+ }
+}
+
+This should be split into the equivalent of:
+
+#version 430
+void main(void) {
+ int accumulator = 0;
+ float X[10];
+ float Y[10];
+
+ for (int i = 0; i < 10; i++) {
+ X[i] = Y[i];
+ }
+ for (int i = 0; i < 10; i++) {
+ Y[i] = X[i];
+ accumulator += i;
+ }
+}
+*/
+TEST_F(FissionClassTest, FissionWithAccumulator) {
+ // clang-format off
+ // With opt::LocalMultiStoreElimPass
+ const std::string source = R"(OpCapability Shader
+ %1 = OpExtInstImport "GLSL.std.450"
+ OpMemoryModel Logical GLSL450
+ OpEntryPoint Fragment %2 "main"
+ OpExecutionMode %2 OriginUpperLeft
+ OpSource GLSL 430
+ OpName %2 "main"
+ OpName %3 "accumulator"
+ OpName %4 "i"
+ OpName %5 "X"
+ OpName %6 "Y"
+ %7 = OpTypeVoid
+ %8 = OpTypeFunction %7
+ %9 = OpTypeInt 32 1
+ %10 = OpTypePointer Function %9
+ %11 = OpConstant %9 0
+ %12 = OpConstant %9 10
+ %13 = OpTypeBool
+ %14 = OpTypeFloat 32
+ %15 = OpTypeInt 32 0
+ %16 = OpConstant %15 10
+ %17 = OpTypeArray %14 %16
+ %18 = OpTypePointer Function %17
+ %19 = OpTypePointer Function %14
+ %20 = OpConstant %9 1
+ %2 = OpFunction %7 None %8
+ %21 = OpLabel
+ %3 = OpVariable %10 Function
+ %4 = OpVariable %10 Function
+ %5 = OpVariable %18 Function
+ %6 = OpVariable %18 Function
+ OpBranch %22
+ %22 = OpLabel
+ %23 = OpPhi %9 %11 %21 %24 %25
+ %26 = OpPhi %9 %11 %21 %27 %25
+ OpLoopMerge %28 %25 None
+ OpBranch %29
+ %29 = OpLabel
+ %30 = OpSLessThan %13 %26 %12
+ OpBranchConditional %30 %31 %28
+ %31 = OpLabel
+ %32 = OpAccessChain %19 %6 %26
+ %33 = OpLoad %14 %32
+ %34 = OpAccessChain %19 %5 %26
+ OpStore %34 %33
+ %35 = OpAccessChain %19 %5 %26
+ %36 = OpLoad %14 %35
+ %37 = OpAccessChain %19 %6 %26
+ OpStore %37 %36
+ %24 = OpIAdd %9 %23 %26
+ OpBranch %25
+ %25 = OpLabel
+ %27 = OpIAdd %9 %26 %20
+ OpBranch %22
+ %28 = OpLabel
+ OpReturn
+ OpFunctionEnd
+ )";
+
+ const std::string expected = R"(OpCapability Shader
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %2 "main"
+OpExecutionMode %2 OriginUpperLeft
+OpSource GLSL 430
+OpName %2 "main"
+OpName %3 "accumulator"
+OpName %4 "i"
+OpName %5 "X"
+OpName %6 "Y"
+%7 = OpTypeVoid
+%8 = OpTypeFunction %7
+%9 = OpTypeInt 32 1
+%10 = OpTypePointer Function %9
+%11 = OpConstant %9 0
+%12 = OpConstant %9 10
+%13 = OpTypeBool
+%14 = OpTypeFloat 32
+%15 = OpTypeInt 32 0
+%16 = OpConstant %15 10
+%17 = OpTypeArray %14 %16
+%18 = OpTypePointer Function %17
+%19 = OpTypePointer Function %14
+%20 = OpConstant %9 1
+%2 = OpFunction %7 None %8
+%21 = OpLabel
+%3 = OpVariable %10 Function
+%4 = OpVariable %10 Function
+%5 = OpVariable %18 Function
+%6 = OpVariable %18 Function
+OpBranch %38
+%38 = OpLabel
+%40 = OpPhi %9 %11 %21 %52 %51
+OpLoopMerge %53 %51 None
+OpBranch %41
+%41 = OpLabel
+%42 = OpSLessThan %13 %40 %12
+OpBranchConditional %42 %43 %53
+%43 = OpLabel
+%44 = OpAccessChain %19 %6 %40
+%45 = OpLoad %14 %44
+%46 = OpAccessChain %19 %5 %40
+OpStore %46 %45
+OpBranch %51
+%51 = OpLabel
+%52 = OpIAdd %9 %40 %20
+OpBranch %38
+%53 = OpLabel
+OpBranch %22
+%22 = OpLabel
+%23 = OpPhi %9 %11 %53 %24 %25
+%26 = OpPhi %9 %11 %53 %27 %25
+OpLoopMerge %28 %25 None
+OpBranch %29
+%29 = OpLabel
+%30 = OpSLessThan %13 %26 %12
+OpBranchConditional %30 %31 %28
+%31 = OpLabel
+%35 = OpAccessChain %19 %5 %26
+%36 = OpLoad %14 %35
+%37 = OpAccessChain %19 %6 %26
+OpStore %37 %36
+%24 = OpIAdd %9 %23 %26
+OpBranch %25
+%25 = OpLabel
+%27 = OpIAdd %9 %26 %20
+OpBranch %22
+%28 = OpLabel
+OpReturn
+OpFunctionEnd
+)";
+ // clang-format on
+ std::unique_ptr<ir::IRContext> context =
+ BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, source,
+ SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
+ ir::Module* module = context->module();
+ EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n"
+ << source << std::endl;
+
+ SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER);
+ SinglePassRunAndCheck<opt::LoopFissionPass>(source, expected, true);
+}
+
+/*
+Generated from the following glsl:
+
+#version 430
+layout(location=0) out float x;
+layout(location=1) out float y;
+
+void main(void) {
+ float accumulator_1 = 0;
+ float accumulator_2 = 0;
+ for (int i = 0; i < 10; i++) {
+ accumulator_1 += i;
+ accumulator_2 += i;
+ }
+
+ x = accumulator_1;
+ y = accumulator_2;
+}
+
+Should be split into equivalent of:
+
+void main(void) {
+ float accumulator_1 = 0;
+ float accumulator_2 = 0;
+ for (int i = 0; i < 10; i++) {
+ accumulator_1 += i;
+ }
+
+ for (int i = 0; i < 10; i++) {
+ accumulator_2 += i;
+ }
+ x = accumulator_1;
+ y = accumulator_2;
+}
+
+*/
+TEST_F(FissionClassTest, FissionWithPhisUsedOutwithLoop) {
+ // clang-format off
+ // With opt::LocalMultiStoreElimPass
+ const std::string source = R"(OpCapability Shader
+ %1 = OpExtInstImport "GLSL.std.450"
+ OpMemoryModel Logical GLSL450
+ OpEntryPoint Fragment %2 "main" %3 %4
+ OpExecutionMode %2 OriginUpperLeft
+ OpSource GLSL 430
+ OpName %2 "main"
+ OpName %5 "accumulator_1"
+ OpName %6 "accumulator_2"
+ OpName %7 "i"
+ OpName %3 "x"
+ OpName %4 "y"
+ OpDecorate %3 Location 0
+ OpDecorate %4 Location 1
+ %8 = OpTypeVoid
+ %9 = OpTypeFunction %8
+ %10 = OpTypeFloat 32
+ %11 = OpTypePointer Function %10
+ %12 = OpConstant %10 0
+ %13 = OpTypeInt 32 1
+ %14 = OpTypePointer Function %13
+ %15 = OpConstant %13 0
+ %16 = OpConstant %13 10
+ %17 = OpTypeBool
+ %18 = OpConstant %13 1
+ %19 = OpTypePointer Output %10
+ %3 = OpVariable %19 Output
+ %4 = OpVariable %19 Output
+ %2 = OpFunction %8 None %9
+ %20 = OpLabel
+ %5 = OpVariable %11 Function
+ %6 = OpVariable %11 Function
+ %7 = OpVariable %14 Function
+ OpBranch %21
+ %21 = OpLabel
+ %22 = OpPhi %10 %12 %20 %23 %24
+ %25 = OpPhi %10 %12 %20 %26 %24
+ %27 = OpPhi %13 %15 %20 %28 %24
+ OpLoopMerge %29 %24 None
+ OpBranch %30
+ %30 = OpLabel
+ %31 = OpSLessThan %17 %27 %16
+ OpBranchConditional %31 %32 %29
+ %32 = OpLabel
+ %33 = OpConvertSToF %10 %27
+ %26 = OpFAdd %10 %25 %33
+ %34 = OpConvertSToF %10 %27
+ %23 = OpFAdd %10 %22 %34
+ OpBranch %24
+ %24 = OpLabel
+ %28 = OpIAdd %13 %27 %18
+ OpStore %7 %28
+ OpBranch %21
+ %29 = OpLabel
+ OpStore %3 %25
+ OpStore %4 %22
+ OpReturn
+ OpFunctionEnd
+ )";
+
+ const std::string expected = R"(OpCapability Shader
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %2 "main" %3 %4
+OpExecutionMode %2 OriginUpperLeft
+OpSource GLSL 430
+OpName %2 "main"
+OpName %5 "accumulator_1"
+OpName %6 "accumulator_2"
+OpName %7 "i"
+OpName %3 "x"
+OpName %4 "y"
+OpDecorate %3 Location 0
+OpDecorate %4 Location 1
+%8 = OpTypeVoid
+%9 = OpTypeFunction %8
+%10 = OpTypeFloat 32
+%11 = OpTypePointer Function %10
+%12 = OpConstant %10 0
+%13 = OpTypeInt 32 1
+%14 = OpTypePointer Function %13
+%15 = OpConstant %13 0
+%16 = OpConstant %13 10
+%17 = OpTypeBool
+%18 = OpConstant %13 1
+%19 = OpTypePointer Output %10
+%3 = OpVariable %19 Output
+%4 = OpVariable %19 Output
+%2 = OpFunction %8 None %9
+%20 = OpLabel
+%5 = OpVariable %11 Function
+%6 = OpVariable %11 Function
+%7 = OpVariable %14 Function
+OpBranch %35
+%35 = OpLabel
+%37 = OpPhi %10 %12 %20 %43 %46
+%38 = OpPhi %13 %15 %20 %47 %46
+OpLoopMerge %48 %46 None
+OpBranch %39
+%39 = OpLabel
+%40 = OpSLessThan %17 %38 %16
+OpBranchConditional %40 %41 %48
+%41 = OpLabel
+%42 = OpConvertSToF %10 %38
+%43 = OpFAdd %10 %37 %42
+OpBranch %46
+%46 = OpLabel
+%47 = OpIAdd %13 %38 %18
+OpStore %7 %47
+OpBranch %35
+%48 = OpLabel
+OpBranch %21
+%21 = OpLabel
+%22 = OpPhi %10 %12 %48 %23 %24
+%27 = OpPhi %13 %15 %48 %28 %24
+OpLoopMerge %29 %24 None
+OpBranch %30
+%30 = OpLabel
+%31 = OpSLessThan %17 %27 %16
+OpBranchConditional %31 %32 %29
+%32 = OpLabel
+%34 = OpConvertSToF %10 %27
+%23 = OpFAdd %10 %22 %34
+OpBranch %24
+%24 = OpLabel
+%28 = OpIAdd %13 %27 %18
+OpStore %7 %28
+OpBranch %21
+%29 = OpLabel
+OpStore %3 %37
+OpStore %4 %22
+OpReturn
+OpFunctionEnd
+)";
+
+ // clang-format on
+ std::unique_ptr<ir::IRContext> context =
+ BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, source,
+ SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
+ ir::Module* module = context->module();
+ EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n"
+ << source << std::endl;
+
+ SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER);
+ SinglePassRunAndCheck<opt::LoopFissionPass>(source, expected, true);
+}
+
+/*
+#version 430
+void main(void) {
+ float A[10][10];
+ float B[10][10];
+ for (int i = 0; i < 10; i++) {
+ for (int j = 0; j < 10; j++) {
+ A[i][j] = B[i][j];
+ B[i][j] = A[i][j];
+ }
+ }
+}
+
+Should be split into equivalent of:
+
+#version 430
+void main(void) {
+ float A[10][10];
+ float B[10][10];
+ for (int i = 0; i < 10; i++) {
+ for (int j = 0; j < 10; j++) {
+ A[i][j] = B[i][j];
+ }
+ for (int j = 0; j < 10; j++) {
+ B[i][j] = A[i][j];
+ }
+ }
+}
+
+
+*/
+TEST_F(FissionClassTest, FissionNested) {
+ // clang-format off
+ // With opt::LocalMultiStoreElimPass
+ const std::string source = R"(
+ OpCapability Shader
+ %1 = OpExtInstImport "GLSL.std.450"
+ OpMemoryModel Logical GLSL450
+ OpEntryPoint Fragment %2 "main"
+ OpExecutionMode %2 OriginUpperLeft
+ OpSource GLSL 430
+ OpName %2 "main"
+ OpName %3 "i"
+ OpName %4 "j"
+ OpName %5 "A"
+ OpName %6 "B"
+ %7 = OpTypeVoid
+ %8 = OpTypeFunction %7
+ %9 = OpTypeInt 32 1
+ %10 = OpTypePointer Function %9
+ %11 = OpConstant %9 0
+ %12 = OpConstant %9 10
+ %13 = OpTypeBool
+ %14 = OpTypeFloat 32
+ %15 = OpTypeInt 32 0
+ %16 = OpConstant %15 10
+ %17 = OpTypeArray %14 %16
+ %18 = OpTypeArray %17 %16
+ %19 = OpTypePointer Function %18
+ %20 = OpTypePointer Function %14
+ %21 = OpConstant %9 1
+ %2 = OpFunction %7 None %8
+ %22 = OpLabel
+ %3 = OpVariable %10 Function
+ %4 = OpVariable %10 Function
+ %5 = OpVariable %19 Function
+ %6 = OpVariable %19 Function
+ OpStore %3 %11
+ OpBranch %23
+ %23 = OpLabel
+ %24 = OpPhi %9 %11 %22 %25 %26
+ OpLoopMerge %27 %26 None
+ OpBranch %28
+ %28 = OpLabel
+ %29 = OpSLessThan %13 %24 %12
+ OpBranchConditional %29 %30 %27
+ %30 = OpLabel
+ OpStore %4 %11
+ OpBranch %31
+ %31 = OpLabel
+ %32 = OpPhi %9 %11 %30 %33 %34
+ OpLoopMerge %35 %34 None
+ OpBranch %36
+ %36 = OpLabel
+ %37 = OpSLessThan %13 %32 %12
+ OpBranchConditional %37 %38 %35
+ %38 = OpLabel
+ %39 = OpAccessChain %20 %6 %24 %32
+ %40 = OpLoad %14 %39
+ %41 = OpAccessChain %20 %5 %24 %32
+ OpStore %41 %40
+ %42 = OpAccessChain %20 %5 %24 %32
+ %43 = OpLoad %14 %42
+ %44 = OpAccessChain %20 %6 %24 %32
+ OpStore %44 %43
+ OpBranch %34
+ %34 = OpLabel
+ %33 = OpIAdd %9 %32 %21
+ OpStore %4 %33
+ OpBranch %31
+ %35 = OpLabel
+ OpBranch %26
+ %26 = OpLabel
+ %25 = OpIAdd %9 %24 %21
+ OpStore %3 %25
+ OpBranch %23
+ %27 = OpLabel
+ OpReturn
+ OpFunctionEnd
+ )";
+
+ const std::string expected = R"(OpCapability Shader
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %2 "main"
+OpExecutionMode %2 OriginUpperLeft
+OpSource GLSL 430
+OpName %2 "main"
+OpName %3 "i"
+OpName %4 "j"
+OpName %5 "A"
+OpName %6 "B"
+%7 = OpTypeVoid
+%8 = OpTypeFunction %7
+%9 = OpTypeInt 32 1
+%10 = OpTypePointer Function %9
+%11 = OpConstant %9 0
+%12 = OpConstant %9 10
+%13 = OpTypeBool
+%14 = OpTypeFloat 32
+%15 = OpTypeInt 32 0
+%16 = OpConstant %15 10
+%17 = OpTypeArray %14 %16
+%18 = OpTypeArray %17 %16
+%19 = OpTypePointer Function %18
+%20 = OpTypePointer Function %14
+%21 = OpConstant %9 1
+%2 = OpFunction %7 None %8
+%22 = OpLabel
+%3 = OpVariable %10 Function
+%4 = OpVariable %10 Function
+%5 = OpVariable %19 Function
+%6 = OpVariable %19 Function
+OpStore %3 %11
+OpBranch %23
+%23 = OpLabel
+%24 = OpPhi %9 %11 %22 %25 %26
+OpLoopMerge %27 %26 None
+OpBranch %28
+%28 = OpLabel
+%29 = OpSLessThan %13 %24 %12
+OpBranchConditional %29 %30 %27
+%30 = OpLabel
+OpStore %4 %11
+OpBranch %45
+%45 = OpLabel
+%46 = OpPhi %9 %11 %30 %57 %56
+OpLoopMerge %58 %56 None
+OpBranch %47
+%47 = OpLabel
+%48 = OpSLessThan %13 %46 %12
+OpBranchConditional %48 %49 %58
+%49 = OpLabel
+%50 = OpAccessChain %20 %6 %24 %46
+%51 = OpLoad %14 %50
+%52 = OpAccessChain %20 %5 %24 %46
+OpStore %52 %51
+OpBranch %56
+%56 = OpLabel
+%57 = OpIAdd %9 %46 %21
+OpStore %4 %57
+OpBranch %45
+%58 = OpLabel
+OpBranch %31
+%31 = OpLabel
+%32 = OpPhi %9 %11 %58 %33 %34
+OpLoopMerge %35 %34 None
+OpBranch %36
+%36 = OpLabel
+%37 = OpSLessThan %13 %32 %12
+OpBranchConditional %37 %38 %35
+%38 = OpLabel
+%42 = OpAccessChain %20 %5 %24 %32
+%43 = OpLoad %14 %42
+%44 = OpAccessChain %20 %6 %24 %32
+OpStore %44 %43
+OpBranch %34
+%34 = OpLabel
+%33 = OpIAdd %9 %32 %21
+OpStore %4 %33
+OpBranch %31
+%35 = OpLabel
+OpBranch %26
+%26 = OpLabel
+%25 = OpIAdd %9 %24 %21
+OpStore %3 %25
+OpBranch %23
+%27 = OpLabel
+OpReturn
+OpFunctionEnd
+)";
+
+ // clang-format on
+ std::unique_ptr<ir::IRContext> context =
+ BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, source,
+ SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
+ ir::Module* module = context->module();
+ EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n"
+ << source << std::endl;
+
+ SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER);
+ SinglePassRunAndCheck<opt::LoopFissionPass>(source, expected, true);
+}
+
+/*
+#version 430
+void main(void) {
+ int accumulator = 0;
+ float A[10];
+ float B[10];
+ float C[10];
+
+ for (int i = 0; i < 10; i++) {
+ int c = C[i];
+ A[i] = B[i];
+ B[i] = A[i] + c;
+ }
+}
+
+This loop should not be split as we would have to break the order of the loads
+to do so. It would be grouped into two sets:
+
+1
+ int c = C[i];
+ B[i] = A[i] + c;
+
+2
+ A[i] = B[i];
+
+To keep the load C[i] in the same order we would need to put B[i] ahead of that
+*/
+TEST_F(FissionClassTest, FissionLoad) {
+ // clang-format off
+ // With opt::LocalMultiStoreElimPass
+const std::string source = R"(OpCapability Shader
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %2 "main"
+OpExecutionMode %2 OriginUpperLeft
+OpSource GLSL 430
+OpName %2 "main"
+OpName %3 "i"
+OpName %4 "c"
+OpName %5 "C"
+OpName %6 "A"
+OpName %7 "B"
+%8 = OpTypeVoid
+%9 = OpTypeFunction %8
+%10 = OpTypeInt 32 1
+%11 = OpTypePointer Function %10
+%12 = OpConstant %10 0
+%13 = OpConstant %10 10
+%14 = OpTypeBool
+%15 = OpTypeFloat 32
+%16 = OpTypePointer Function %15
+%17 = OpTypeInt 32 0
+%18 = OpConstant %17 10
+%19 = OpTypeArray %15 %18
+%20 = OpTypePointer Function %19
+%21 = OpConstant %10 1
+%2 = OpFunction %8 None %9
+%22 = OpLabel
+%3 = OpVariable %11 Function
+%4 = OpVariable %16 Function
+%5 = OpVariable %20 Function
+%6 = OpVariable %20 Function
+%7 = OpVariable %20 Function
+OpBranch %23
+%23 = OpLabel
+%24 = OpPhi %10 %12 %22 %25 %26
+OpLoopMerge %27 %26 None
+OpBranch %28
+%28 = OpLabel
+%29 = OpSLessThan %14 %24 %13
+OpBranchConditional %29 %30 %27
+%30 = OpLabel
+%31 = OpAccessChain %16 %5 %24
+%32 = OpLoad %15 %31
+OpStore %4 %32
+%33 = OpAccessChain %16 %7 %24
+%34 = OpLoad %15 %33
+%35 = OpAccessChain %16 %6 %24
+OpStore %35 %34
+%36 = OpAccessChain %16 %6 %24
+%37 = OpLoad %15 %36
+%38 = OpFAdd %15 %37 %32
+%39 = OpAccessChain %16 %7 %24
+OpStore %39 %38
+OpBranch %26
+%26 = OpLabel
+%25 = OpIAdd %10 %24 %21
+OpBranch %23
+%27 = OpLabel
+OpReturn
+OpFunctionEnd
+)";
+ // clang-format on
+ std::unique_ptr<ir::IRContext> context =
+ BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, source,
+ SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
+ ir::Module* module = context->module();
+ EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n"
+ << source << std::endl;
+
+ SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER);
+ SinglePassRunAndCheck<opt::LoopFissionPass>(source, source, true);
+}
+
+/*
+#version 430
+layout(location=0) flat in int condition;
+void main(void) {
+ float A[10];
+ float B[10];
+
+ for (int i = 0; i < 10; i++) {
+ if (condition == 1)
+ A[i] = B[i];
+ else
+ B[i] = A[i];
+ }
+}
+
+
+When this is split we leave the condition check and control flow inplace and
+leave its removal for dead code elimination.
+
+#version 430
+layout(location=0) flat in int condition;
+void main(void) {
+ float A[10];
+ float B[10];
+
+ for (int i = 0; i < 10; i++) {
+ if (condition == 1)
+ A[i] = B[i];
+ else
+ ;
+ }
+ for (int i = 0; i < 10; i++) {
+ if (condition == 1)
+ ;
+ else
+ B[i] = A[i];
+ }
+}
+
+
+*/
+TEST_F(FissionClassTest, FissionControlFlow) {
+ // clang-format off
+ // With opt::LocalMultiStoreElimPass
+ const std::string source = R"(
+ OpCapability Shader
+ %1 = OpExtInstImport "GLSL.std.450"
+ OpMemoryModel Logical GLSL450
+ OpEntryPoint Fragment %2 "main" %3
+ OpExecutionMode %2 OriginUpperLeft
+ OpSource GLSL 430
+ OpName %2 "main"
+ OpName %4 "i"
+ OpName %3 "condition"
+ OpName %5 "A"
+ OpName %6 "B"
+ OpDecorate %3 Flat
+ OpDecorate %3 Location 0
+ %7 = OpTypeVoid
+ %8 = OpTypeFunction %7
+ %9 = OpTypeInt 32 1
+ %10 = OpTypePointer Function %9
+ %11 = OpConstant %9 0
+ %12 = OpConstant %9 10
+ %13 = OpTypeBool
+ %14 = OpTypePointer Input %9
+ %3 = OpVariable %14 Input
+ %15 = OpConstant %9 1
+ %16 = OpTypeFloat 32
+ %17 = OpTypeInt 32 0
+ %18 = OpConstant %17 10
+ %19 = OpTypeArray %16 %18
+ %20 = OpTypePointer Function %19
+ %21 = OpTypePointer Function %16
+ %2 = OpFunction %7 None %8
+ %22 = OpLabel
+ %4 = OpVariable %10 Function
+ %5 = OpVariable %20 Function
+ %6 = OpVariable %20 Function
+ %31 = OpLoad %9 %3
+ OpStore %4 %11
+ OpBranch %23
+ %23 = OpLabel
+ %24 = OpPhi %9 %11 %22 %25 %26
+ OpLoopMerge %27 %26 None
+ OpBranch %28
+ %28 = OpLabel
+ %29 = OpSLessThan %13 %24 %12
+ OpBranchConditional %29 %30 %27
+ %30 = OpLabel
+ %32 = OpIEqual %13 %31 %15
+ OpSelectionMerge %33 None
+ OpBranchConditional %32 %34 %35
+ %34 = OpLabel
+ %36 = OpAccessChain %21 %6 %24
+ %37 = OpLoad %16 %36
+ %38 = OpAccessChain %21 %5 %24
+ OpStore %38 %37
+ OpBranch %33
+ %35 = OpLabel
+ %39 = OpAccessChain %21 %5 %24
+ %40 = OpLoad %16 %39
+ %41 = OpAccessChain %21 %6 %24
+ OpStore %41 %40
+ OpBranch %33
+ %33 = OpLabel
+ OpBranch %26
+ %26 = OpLabel
+ %25 = OpIAdd %9 %24 %15
+ OpStore %4 %25
+ OpBranch %23
+ %27 = OpLabel
+ OpReturn
+ OpFunctionEnd
+ )";
+
+ const std::string expected = R"(OpCapability Shader
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %2 "main" %3
+OpExecutionMode %2 OriginUpperLeft
+OpSource GLSL 430
+OpName %2 "main"
+OpName %4 "i"
+OpName %3 "condition"
+OpName %5 "A"
+OpName %6 "B"
+OpDecorate %3 Flat
+OpDecorate %3 Location 0
+%7 = OpTypeVoid
+%8 = OpTypeFunction %7
+%9 = OpTypeInt 32 1
+%10 = OpTypePointer Function %9
+%11 = OpConstant %9 0
+%12 = OpConstant %9 10
+%13 = OpTypeBool
+%14 = OpTypePointer Input %9
+%3 = OpVariable %14 Input
+%15 = OpConstant %9 1
+%16 = OpTypeFloat 32
+%17 = OpTypeInt 32 0
+%18 = OpConstant %17 10
+%19 = OpTypeArray %16 %18
+%20 = OpTypePointer Function %19
+%21 = OpTypePointer Function %16
+%2 = OpFunction %7 None %8
+%22 = OpLabel
+%4 = OpVariable %10 Function
+%5 = OpVariable %20 Function
+%6 = OpVariable %20 Function
+%23 = OpLoad %9 %3
+OpStore %4 %11
+OpBranch %42
+%42 = OpLabel
+%43 = OpPhi %9 %11 %22 %58 %57
+OpLoopMerge %59 %57 None
+OpBranch %44
+%44 = OpLabel
+%45 = OpSLessThan %13 %43 %12
+OpBranchConditional %45 %46 %59
+%46 = OpLabel
+%47 = OpIEqual %13 %23 %15
+OpSelectionMerge %56 None
+OpBranchConditional %47 %52 %48
+%48 = OpLabel
+OpBranch %56
+%52 = OpLabel
+%53 = OpAccessChain %21 %6 %43
+%54 = OpLoad %16 %53
+%55 = OpAccessChain %21 %5 %43
+OpStore %55 %54
+OpBranch %56
+%56 = OpLabel
+OpBranch %57
+%57 = OpLabel
+%58 = OpIAdd %9 %43 %15
+OpStore %4 %58
+OpBranch %42
+%59 = OpLabel
+OpBranch %24
+%24 = OpLabel
+%25 = OpPhi %9 %11 %59 %26 %27
+OpLoopMerge %28 %27 None
+OpBranch %29
+%29 = OpLabel
+%30 = OpSLessThan %13 %25 %12
+OpBranchConditional %30 %31 %28
+%31 = OpLabel
+%32 = OpIEqual %13 %23 %15
+OpSelectionMerge %33 None
+OpBranchConditional %32 %34 %35
+%34 = OpLabel
+OpBranch %33
+%35 = OpLabel
+%39 = OpAccessChain %21 %5 %25
+%40 = OpLoad %16 %39
+%41 = OpAccessChain %21 %6 %25
+OpStore %41 %40
+OpBranch %33
+%33 = OpLabel
+OpBranch %27
+%27 = OpLabel
+%26 = OpIAdd %9 %25 %15
+OpStore %4 %26
+OpBranch %24
+%28 = OpLabel
+OpReturn
+OpFunctionEnd
+)";
+
+ // clang-format on
+ std::unique_ptr<ir::IRContext> context =
+ BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, source,
+ SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
+ ir::Module* module = context->module();
+ EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n"
+ << source << std::endl;
+
+ SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER);
+ SinglePassRunAndCheck<opt::LoopFissionPass>(source, expected, true);
+}
+
+/*
+#version 430
+void main(void) {
+ float A[10];
+ float B[10];
+ for (int i = 0; i < 10; i++) {
+ if (i == 1)
+ B[i] = A[i];
+ else if (i == 2)
+ A[i] = B[i];
+ else
+ A[i] = 0;
+ }
+}
+
+After running the pass with multiple splits enabled (via register threshold of
+1) we expect the equivalent of:
+
+#version 430
+void main(void) {
+ float A[10];
+ float B[10];
+ for (int i = 0; i < 10; i++) {
+ if (i == 1)
+ B[i] = A[i];
+ else if (i == 2)
+ else
+ }
+ for (int i = 0; i < 10; i++) {
+ if (i == 1)
+ else if (i == 2)
+ A[i] = B[i];
+ else
+ }
+ for (int i = 0; i < 10; i++) {
+ if (i == 1)
+ else if (i == 2)
+ else
+ A[i] = 0;
+ }
+
+}
+
+*/
+TEST_F(FissionClassTest, FissionControlFlow2) {
+ // clang-format off
+ // With opt::LocalMultiStoreElimPass
+ const std::string source = R"(OpCapability Shader
+ %1 = OpExtInstImport "GLSL.std.450"
+ OpMemoryModel Logical GLSL450
+ OpEntryPoint Fragment %2 "main"
+ OpExecutionMode %2 OriginUpperLeft
+ OpSource GLSL 430
+ OpName %2 "main"
+ OpName %3 "i"
+ OpName %4 "B"
+ OpName %5 "A"
+ %6 = OpTypeVoid
+ %7 = OpTypeFunction %6
+ %8 = OpTypeInt 32 1
+ %9 = OpTypePointer Function %8
+ %10 = OpConstant %8 0
+ %11 = OpConstant %8 10
+ %12 = OpTypeBool
+ %13 = OpConstant %8 1
+ %14 = OpTypeFloat 32
+ %15 = OpTypeInt 32 0
+ %16 = OpConstant %15 10
+ %17 = OpTypeArray %14 %16
+ %18 = OpTypePointer Function %17
+ %19 = OpTypePointer Function %14
+ %20 = OpConstant %8 2
+ %21 = OpConstant %14 0
+ %2 = OpFunction %6 None %7
+ %22 = OpLabel
+ %3 = OpVariable %9 Function
+ %4 = OpVariable %18 Function
+ %5 = OpVariable %18 Function
+ OpStore %3 %10
+ OpBranch %23
+ %23 = OpLabel
+ %24 = OpPhi %8 %10 %22 %25 %26
+ OpLoopMerge %27 %26 None
+ OpBranch %28
+ %28 = OpLabel
+ %29 = OpSLessThan %12 %24 %11
+ OpBranchConditional %29 %30 %27
+ %30 = OpLabel
+ %31 = OpIEqual %12 %24 %13
+ OpSelectionMerge %32 None
+ OpBranchConditional %31 %33 %34
+ %33 = OpLabel
+ %35 = OpAccessChain %19 %5 %24
+ %36 = OpLoad %14 %35
+ %37 = OpAccessChain %19 %4 %24
+ OpStore %37 %36
+ OpBranch %32
+ %34 = OpLabel
+ %38 = OpIEqual %12 %24 %20
+ OpSelectionMerge %39 None
+ OpBranchConditional %38 %40 %41
+ %40 = OpLabel
+ %42 = OpAccessChain %19 %4 %24
+ %43 = OpLoad %14 %42
+ %44 = OpAccessChain %19 %5 %24
+ OpStore %44 %43
+ OpBranch %39
+ %41 = OpLabel
+ %45 = OpAccessChain %19 %5 %24
+ OpStore %45 %21
+ OpBranch %39
+ %39 = OpLabel
+ OpBranch %32
+ %32 = OpLabel
+ OpBranch %26
+ %26 = OpLabel
+ %25 = OpIAdd %8 %24 %13
+ OpStore %3 %25
+ OpBranch %23
+ %27 = OpLabel
+ OpReturn
+ OpFunctionEnd
+ )";
+
+ const std::string expected = R"(OpCapability Shader
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %2 "main"
+OpExecutionMode %2 OriginUpperLeft
+OpSource GLSL 430
+OpName %2 "main"
+OpName %3 "i"
+OpName %4 "B"
+OpName %5 "A"
+%6 = OpTypeVoid
+%7 = OpTypeFunction %6
+%8 = OpTypeInt 32 1
+%9 = OpTypePointer Function %8
+%10 = OpConstant %8 0
+%11 = OpConstant %8 10
+%12 = OpTypeBool
+%13 = OpConstant %8 1
+%14 = OpTypeFloat 32
+%15 = OpTypeInt 32 0
+%16 = OpConstant %15 10
+%17 = OpTypeArray %14 %16
+%18 = OpTypePointer Function %17
+%19 = OpTypePointer Function %14
+%20 = OpConstant %8 2
+%21 = OpConstant %14 0
+%2 = OpFunction %6 None %7
+%22 = OpLabel
+%3 = OpVariable %9 Function
+%4 = OpVariable %18 Function
+%5 = OpVariable %18 Function
+OpStore %3 %10
+OpBranch %46
+%46 = OpLabel
+%47 = OpPhi %8 %10 %22 %67 %66
+OpLoopMerge %68 %66 None
+OpBranch %48
+%48 = OpLabel
+%49 = OpSLessThan %12 %47 %11
+OpBranchConditional %49 %50 %68
+%50 = OpLabel
+%51 = OpIEqual %12 %47 %13
+OpSelectionMerge %65 None
+OpBranchConditional %51 %61 %52
+%52 = OpLabel
+%53 = OpIEqual %12 %47 %20
+OpSelectionMerge %60 None
+OpBranchConditional %53 %56 %54
+%54 = OpLabel
+OpBranch %60
+%56 = OpLabel
+OpBranch %60
+%60 = OpLabel
+OpBranch %65
+%61 = OpLabel
+%62 = OpAccessChain %19 %5 %47
+%63 = OpLoad %14 %62
+%64 = OpAccessChain %19 %4 %47
+OpStore %64 %63
+OpBranch %65
+%65 = OpLabel
+OpBranch %66
+%66 = OpLabel
+%67 = OpIAdd %8 %47 %13
+OpStore %3 %67
+OpBranch %46
+%68 = OpLabel
+OpBranch %69
+%69 = OpLabel
+%70 = OpPhi %8 %10 %68 %87 %86
+OpLoopMerge %88 %86 None
+OpBranch %71
+%71 = OpLabel
+%72 = OpSLessThan %12 %70 %11
+OpBranchConditional %72 %73 %88
+%73 = OpLabel
+%74 = OpIEqual %12 %70 %13
+OpSelectionMerge %85 None
+OpBranchConditional %74 %84 %75
+%75 = OpLabel
+%76 = OpIEqual %12 %70 %20
+OpSelectionMerge %83 None
+OpBranchConditional %76 %79 %77
+%77 = OpLabel
+OpBranch %83
+%79 = OpLabel
+%80 = OpAccessChain %19 %4 %70
+%81 = OpLoad %14 %80
+%82 = OpAccessChain %19 %5 %70
+OpStore %82 %81
+OpBranch %83
+%83 = OpLabel
+OpBranch %85
+%84 = OpLabel
+OpBranch %85
+%85 = OpLabel
+OpBranch %86
+%86 = OpLabel
+%87 = OpIAdd %8 %70 %13
+OpStore %3 %87
+OpBranch %69
+%88 = OpLabel
+OpBranch %23
+%23 = OpLabel
+%24 = OpPhi %8 %10 %88 %25 %26
+OpLoopMerge %27 %26 None
+OpBranch %28
+%28 = OpLabel
+%29 = OpSLessThan %12 %24 %11
+OpBranchConditional %29 %30 %27
+%30 = OpLabel
+%31 = OpIEqual %12 %24 %13
+OpSelectionMerge %32 None
+OpBranchConditional %31 %33 %34
+%33 = OpLabel
+OpBranch %32
+%34 = OpLabel
+%38 = OpIEqual %12 %24 %20
+OpSelectionMerge %39 None
+OpBranchConditional %38 %40 %41
+%40 = OpLabel
+OpBranch %39
+%41 = OpLabel
+%45 = OpAccessChain %19 %5 %24
+OpStore %45 %21
+OpBranch %39
+%39 = OpLabel
+OpBranch %32
+%32 = OpLabel
+OpBranch %26
+%26 = OpLabel
+%25 = OpIAdd %8 %24 %13
+OpStore %3 %25
+OpBranch %23
+%27 = OpLabel
+OpReturn
+OpFunctionEnd
+)";
+
+ // clang-format on
+ std::unique_ptr<ir::IRContext> context =
+ BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, source,
+ SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
+ ir::Module* module = context->module();
+ EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n"
+ << source << std::endl;
+
+ SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER);
+ SinglePassRunAndCheck<opt::LoopFissionPass>(source, expected, true, 1);
+}
+
+/*
+#version 430
+layout(location=0) flat in int condition;
+void main(void) {
+ float A[10];
+ float B[10];
+ for (int i = 0; i < 10; i++) {
+ B[i] = A[i];
+ memoryBarrier();
+ A[i] = B[i];
+ }
+}
+
+This should not be split due to the memory barrier.
+*/
+TEST_F(FissionClassTest, FissionBarrier) {
+ // clang-format off
+ // With opt::LocalMultiStoreElimPass
+const std::string source = R"(OpCapability Shader
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %2 "main" %3
+OpExecutionMode %2 OriginUpperLeft
+OpSource GLSL 430
+OpName %2 "main"
+OpName %4 "i"
+OpName %5 "B"
+OpName %6 "A"
+OpName %3 "condition"
+OpDecorate %3 Flat
+OpDecorate %3 Location 0
+%7 = OpTypeVoid
+%8 = OpTypeFunction %7
+%9 = OpTypeInt 32 1
+%10 = OpTypePointer Function %9
+%11 = OpConstant %9 0
+%12 = OpConstant %9 10
+%13 = OpTypeBool
+%14 = OpTypeFloat 32
+%15 = OpTypeInt 32 0
+%16 = OpConstant %15 10
+%17 = OpTypeArray %14 %16
+%18 = OpTypePointer Function %17
+%19 = OpTypePointer Function %14
+%20 = OpConstant %15 1
+%21 = OpConstant %15 4048
+%22 = OpConstant %9 1
+%23 = OpTypePointer Input %9
+%3 = OpVariable %23 Input
+%2 = OpFunction %7 None %8
+%24 = OpLabel
+%4 = OpVariable %10 Function
+%5 = OpVariable %18 Function
+%6 = OpVariable %18 Function
+OpStore %4 %11
+OpBranch %25
+%25 = OpLabel
+%26 = OpPhi %9 %11 %24 %27 %28
+OpLoopMerge %29 %28 None
+OpBranch %30
+%30 = OpLabel
+%31 = OpSLessThan %13 %26 %12
+OpBranchConditional %31 %32 %29
+%32 = OpLabel
+%33 = OpAccessChain %19 %6 %26
+%34 = OpLoad %14 %33
+%35 = OpAccessChain %19 %5 %26
+OpStore %35 %34
+OpMemoryBarrier %20 %21
+%36 = OpAccessChain %19 %5 %26
+%37 = OpLoad %14 %36
+%38 = OpAccessChain %19 %6 %26
+OpStore %38 %37
+OpBranch %28
+%28 = OpLabel
+%27 = OpIAdd %9 %26 %22
+OpStore %4 %27
+OpBranch %25
+%29 = OpLabel
+OpReturn
+OpFunctionEnd
+)";
+ // clang-format on
+ std::unique_ptr<ir::IRContext> context =
+ BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, source,
+ SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
+ ir::Module* module = context->module();
+ EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n"
+ << source << std::endl;
+
+ SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER);
+ SinglePassRunAndCheck<opt::LoopFissionPass>(source, source, true);
+}
+
+/*
+#version 430
+void main(void) {
+ float A[10];
+ float B[10];
+ for (int i = 0; i < 10; i++) {
+ B[i] = A[i];
+ if ( i== 1)
+ break;
+ A[i] = B[i];
+ }
+}
+
+This should not be split due to the break.
+*/
+TEST_F(FissionClassTest, FissionBreak) {
+ // clang-format off
+ // With opt::LocalMultiStoreElimPass
+const std::string source = R"(OpCapability Shader
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %2 "main"
+OpExecutionMode %2 OriginUpperLeft
+OpSource GLSL 430
+OpName %2 "main"
+OpName %3 "i"
+OpName %4 "B"
+OpName %5 "A"
+%6 = OpTypeVoid
+%7 = OpTypeFunction %6
+%8 = OpTypeInt 32 1
+%9 = OpTypePointer Function %8
+%10 = OpConstant %8 0
+%11 = OpConstant %8 10
+%12 = OpTypeBool
+%13 = OpTypeFloat 32
+%14 = OpTypeInt 32 0
+%15 = OpConstant %14 10
+%16 = OpTypeArray %13 %15
+%17 = OpTypePointer Function %16
+%18 = OpTypePointer Function %13
+%19 = OpConstant %8 1
+%2 = OpFunction %6 None %7
+%20 = OpLabel
+%3 = OpVariable %9 Function
+%4 = OpVariable %17 Function
+%5 = OpVariable %17 Function
+OpStore %3 %10
+OpBranch %21
+%21 = OpLabel
+%22 = OpPhi %8 %10 %20 %23 %24
+OpLoopMerge %25 %24 None
+OpBranch %26
+%26 = OpLabel
+%27 = OpSLessThan %12 %22 %11
+OpBranchConditional %27 %28 %25
+%28 = OpLabel
+%29 = OpAccessChain %18 %5 %22
+%30 = OpLoad %13 %29
+%31 = OpAccessChain %18 %4 %22
+OpStore %31 %30
+%32 = OpIEqual %12 %22 %19
+OpSelectionMerge %33 None
+OpBranchConditional %32 %34 %33
+%34 = OpLabel
+OpBranch %25
+%33 = OpLabel
+%35 = OpAccessChain %18 %4 %22
+%36 = OpLoad %13 %35
+%37 = OpAccessChain %18 %5 %22
+OpStore %37 %36
+OpBranch %24
+%24 = OpLabel
+%23 = OpIAdd %8 %22 %19
+OpStore %3 %23
+OpBranch %21
+%25 = OpLabel
+OpReturn
+OpFunctionEnd
+)";
+ // clang-format on
+ std::unique_ptr<ir::IRContext> context =
+ BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, source,
+ SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
+ ir::Module* module = context->module();
+ EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n"
+ << source << std::endl;
+
+ SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER);
+ SinglePassRunAndCheck<opt::LoopFissionPass>(source, source, true);
+}
+
+/*
+#version 430
+void main(void) {
+ float A[10];
+ float B[10];
+ for (int i = 0; i < 10; i++) {
+ B[i] = A[i];
+ if ( i== 1)
+ continue;
+ A[i] = B[i];
+ }
+}
+
+This loop should be split into:
+
+ for (int i = 0; i < 10; i++) {
+ B[i] = A[i];
+ if ( i== 1)
+ continue;
+ }
+ for (int i = 0; i < 10; i++) {
+ if ( i== 1)
+ continue;
+ A[i] = B[i];
+ }
+The continue block in the first loop is left to DCE.
+}
+
+
+*/
+TEST_F(FissionClassTest, FissionContinue) {
+ // clang-format off
+ // With opt::LocalMultiStoreElimPass
+const std::string source = R"(OpCapability Shader
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %2 "main"
+OpExecutionMode %2 OriginUpperLeft
+OpSource GLSL 430
+OpName %2 "main"
+OpName %3 "i"
+OpName %4 "B"
+OpName %5 "A"
+%6 = OpTypeVoid
+%7 = OpTypeFunction %6
+%8 = OpTypeInt 32 1
+%9 = OpTypePointer Function %8
+%10 = OpConstant %8 0
+%11 = OpConstant %8 10
+%12 = OpTypeBool
+%13 = OpTypeFloat 32
+%14 = OpTypeInt 32 0
+%15 = OpConstant %14 10
+%16 = OpTypeArray %13 %15
+%17 = OpTypePointer Function %16
+%18 = OpTypePointer Function %13
+%19 = OpConstant %8 1
+%2 = OpFunction %6 None %7
+%20 = OpLabel
+%3 = OpVariable %9 Function
+%4 = OpVariable %17 Function
+%5 = OpVariable %17 Function
+OpStore %3 %10
+OpBranch %21
+%21 = OpLabel
+%22 = OpPhi %8 %10 %20 %23 %24
+OpLoopMerge %25 %24 None
+OpBranch %26
+%26 = OpLabel
+%27 = OpSLessThan %12 %22 %11
+OpBranchConditional %27 %28 %25
+%28 = OpLabel
+%29 = OpAccessChain %18 %5 %22
+%30 = OpLoad %13 %29
+%31 = OpAccessChain %18 %4 %22
+OpStore %31 %30
+%32 = OpIEqual %12 %22 %19
+OpSelectionMerge %33 None
+OpBranchConditional %32 %34 %33
+%34 = OpLabel
+OpBranch %24
+%33 = OpLabel
+%35 = OpAccessChain %18 %4 %22
+%36 = OpLoad %13 %35
+%37 = OpAccessChain %18 %5 %22
+OpStore %37 %36
+OpBranch %24
+%24 = OpLabel
+%23 = OpIAdd %8 %22 %19
+OpStore %3 %23
+OpBranch %21
+%25 = OpLabel
+OpReturn
+OpFunctionEnd
+)";
+
+const std::string expected = R"(OpCapability Shader
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %2 "main"
+OpExecutionMode %2 OriginUpperLeft
+OpSource GLSL 430
+OpName %2 "main"
+OpName %3 "i"
+OpName %4 "B"
+OpName %5 "A"
+%6 = OpTypeVoid
+%7 = OpTypeFunction %6
+%8 = OpTypeInt 32 1
+%9 = OpTypePointer Function %8
+%10 = OpConstant %8 0
+%11 = OpConstant %8 10
+%12 = OpTypeBool
+%13 = OpTypeFloat 32
+%14 = OpTypeInt 32 0
+%15 = OpConstant %14 10
+%16 = OpTypeArray %13 %15
+%17 = OpTypePointer Function %16
+%18 = OpTypePointer Function %13
+%19 = OpConstant %8 1
+%2 = OpFunction %6 None %7
+%20 = OpLabel
+%3 = OpVariable %9 Function
+%4 = OpVariable %17 Function
+%5 = OpVariable %17 Function
+OpStore %3 %10
+OpBranch %38
+%38 = OpLabel
+%39 = OpPhi %8 %10 %20 %53 %52
+OpLoopMerge %54 %52 None
+OpBranch %40
+%40 = OpLabel
+%41 = OpSLessThan %12 %39 %11
+OpBranchConditional %41 %42 %54
+%42 = OpLabel
+%43 = OpAccessChain %18 %5 %39
+%44 = OpLoad %13 %43
+%45 = OpAccessChain %18 %4 %39
+OpStore %45 %44
+%46 = OpIEqual %12 %39 %19
+OpSelectionMerge %47 None
+OpBranchConditional %46 %51 %47
+%47 = OpLabel
+OpBranch %52
+%51 = OpLabel
+OpBranch %52
+%52 = OpLabel
+%53 = OpIAdd %8 %39 %19
+OpStore %3 %53
+OpBranch %38
+%54 = OpLabel
+OpBranch %21
+%21 = OpLabel
+%22 = OpPhi %8 %10 %54 %23 %24
+OpLoopMerge %25 %24 None
+OpBranch %26
+%26 = OpLabel
+%27 = OpSLessThan %12 %22 %11
+OpBranchConditional %27 %28 %25
+%28 = OpLabel
+%32 = OpIEqual %12 %22 %19
+OpSelectionMerge %33 None
+OpBranchConditional %32 %34 %33
+%34 = OpLabel
+OpBranch %24
+%33 = OpLabel
+%35 = OpAccessChain %18 %4 %22
+%36 = OpLoad %13 %35
+%37 = OpAccessChain %18 %5 %22
+OpStore %37 %36
+OpBranch %24
+%24 = OpLabel
+%23 = OpIAdd %8 %22 %19
+OpStore %3 %23
+OpBranch %21
+%25 = OpLabel
+OpReturn
+OpFunctionEnd
+)";
+ // clang-format on
+ std::unique_ptr<ir::IRContext> context =
+ BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, source,
+ SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
+ ir::Module* module = context->module();
+ EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n"
+ << source << std::endl;
+
+ SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER);
+ SinglePassRunAndCheck<opt::LoopFissionPass>(source, expected, true);
+}
+
+/*
+#version 430
+void main(void) {
+ float A[10];
+ float B[10];
+ int i = 0;
+ do {
+ B[i] = A[i];
+ A[i] = B[i];
+ ++i;
+ } while (i < 10);
+}
+
+
+Check that this is split into:
+ int i = 0;
+ do {
+ B[i] = A[i];
+ ++i;
+ } while (i < 10);
+
+ i = 0;
+ do {
+ A[i] = B[i];
+ ++i;
+ } while (i < 10);
+
+
+*/
+TEST_F(FissionClassTest, FissionDoWhile) {
+ // clang-format off
+ // With opt::LocalMultiStoreElimPass
+const std::string source = R"(OpCapability Shader
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %2 "main"
+OpExecutionMode %2 OriginUpperLeft
+OpSource GLSL 430
+OpName %2 "main"
+OpName %3 "i"
+OpName %4 "B"
+OpName %5 "A"
+%6 = OpTypeVoid
+%7 = OpTypeFunction %6
+%8 = OpTypeInt 32 1
+%9 = OpTypePointer Function %8
+%10 = OpConstant %8 0
+%11 = OpTypeFloat 32
+%12 = OpTypeInt 32 0
+%13 = OpConstant %12 10
+%14 = OpTypeArray %11 %13
+%15 = OpTypePointer Function %14
+%16 = OpTypePointer Function %11
+%17 = OpConstant %8 1
+%18 = OpConstant %8 10
+%19 = OpTypeBool
+%2 = OpFunction %6 None %7
+%20 = OpLabel
+%3 = OpVariable %9 Function
+%4 = OpVariable %15 Function
+%5 = OpVariable %15 Function
+OpStore %3 %10
+OpBranch %21
+%21 = OpLabel
+%22 = OpPhi %8 %10 %20 %23 %24
+OpLoopMerge %25 %24 None
+OpBranch %26
+%26 = OpLabel
+%27 = OpAccessChain %16 %5 %22
+%28 = OpLoad %11 %27
+%29 = OpAccessChain %16 %4 %22
+OpStore %29 %28
+%30 = OpAccessChain %16 %4 %22
+%31 = OpLoad %11 %30
+%32 = OpAccessChain %16 %5 %22
+OpStore %32 %31
+%23 = OpIAdd %8 %22 %17
+OpStore %3 %23
+OpBranch %24
+%24 = OpLabel
+%33 = OpSLessThan %19 %23 %18
+OpBranchConditional %33 %21 %25
+%25 = OpLabel
+OpReturn
+OpFunctionEnd
+)";
+
+const std::string expected = R"(OpCapability Shader
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %2 "main"
+OpExecutionMode %2 OriginUpperLeft
+OpSource GLSL 430
+OpName %2 "main"
+OpName %3 "i"
+OpName %4 "B"
+OpName %5 "A"
+%6 = OpTypeVoid
+%7 = OpTypeFunction %6
+%8 = OpTypeInt 32 1
+%9 = OpTypePointer Function %8
+%10 = OpConstant %8 0
+%11 = OpTypeFloat 32
+%12 = OpTypeInt 32 0
+%13 = OpConstant %12 10
+%14 = OpTypeArray %11 %13
+%15 = OpTypePointer Function %14
+%16 = OpTypePointer Function %11
+%17 = OpConstant %8 1
+%18 = OpConstant %8 10
+%19 = OpTypeBool
+%2 = OpFunction %6 None %7
+%20 = OpLabel
+%3 = OpVariable %9 Function
+%4 = OpVariable %15 Function
+%5 = OpVariable %15 Function
+OpStore %3 %10
+OpBranch %34
+%34 = OpLabel
+%35 = OpPhi %8 %10 %20 %43 %44
+OpLoopMerge %46 %44 None
+OpBranch %36
+%36 = OpLabel
+%37 = OpAccessChain %16 %5 %35
+%38 = OpLoad %11 %37
+%39 = OpAccessChain %16 %4 %35
+OpStore %39 %38
+%43 = OpIAdd %8 %35 %17
+OpStore %3 %43
+OpBranch %44
+%44 = OpLabel
+%45 = OpSLessThan %19 %43 %18
+OpBranchConditional %45 %34 %46
+%46 = OpLabel
+OpBranch %21
+%21 = OpLabel
+%22 = OpPhi %8 %10 %46 %23 %24
+OpLoopMerge %25 %24 None
+OpBranch %26
+%26 = OpLabel
+%30 = OpAccessChain %16 %4 %22
+%31 = OpLoad %11 %30
+%32 = OpAccessChain %16 %5 %22
+OpStore %32 %31
+%23 = OpIAdd %8 %22 %17
+OpStore %3 %23
+OpBranch %24
+%24 = OpLabel
+%33 = OpSLessThan %19 %23 %18
+OpBranchConditional %33 %21 %25
+%25 = OpLabel
+OpReturn
+OpFunctionEnd
+)";
+ // clang-format on
+ std::unique_ptr<ir::IRContext> context =
+ BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, source,
+ SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
+ ir::Module* module = context->module();
+ EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n"
+ << source << std::endl;
+
+ SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER);
+ SinglePassRunAndCheck<opt::LoopFissionPass>(source, expected, true);
+}
+
+/*
+
+#version 430
+void main(void) {
+ float A[10][10];
+ float B[10][10];
+ for (int j = 0; j < 10; ++j) {
+ for (int i = 0; i < 10; ++i) {
+ B[i][j] = A[i][i];
+ A[i][i] = B[i][j + 1];
+ }
+ }
+}
+
+
+This loop can't be split because the load B[i][j + 1] is dependent on the store
+B[i][j].
+
+*/
+TEST_F(FissionClassTest, FissionNestedDependency) {
+ // clang-format off
+ // With opt::LocalMultiStoreElimPass
+const std::string source = R"(OpCapability Shader
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %2 "main"
+OpExecutionMode %2 OriginUpperLeft
+OpSource GLSL 430
+OpName %2 "main"
+OpName %3 "j"
+OpName %4 "i"
+OpName %5 "B"
+OpName %6 "A"
+%7 = OpTypeVoid
+%8 = OpTypeFunction %7
+%9 = OpTypeInt 32 1
+%10 = OpTypePointer Function %9
+%11 = OpConstant %9 0
+%12 = OpConstant %9 10
+%13 = OpTypeBool
+%14 = OpTypeFloat 32
+%15 = OpTypeInt 32 0
+%16 = OpConstant %15 10
+%17 = OpTypeArray %14 %16
+%18 = OpTypeArray %17 %16
+%19 = OpTypePointer Function %18
+%20 = OpTypePointer Function %14
+%21 = OpConstant %9 1
+%2 = OpFunction %7 None %8
+%22 = OpLabel
+%3 = OpVariable %10 Function
+%4 = OpVariable %10 Function
+%5 = OpVariable %19 Function
+%6 = OpVariable %19 Function
+OpBranch %23
+%23 = OpLabel
+%24 = OpPhi %9 %11 %22 %25 %26
+OpLoopMerge %27 %26 None
+OpBranch %28
+%28 = OpLabel
+%29 = OpSLessThan %13 %24 %12
+OpBranchConditional %29 %30 %27
+%30 = OpLabel
+OpBranch %31
+%31 = OpLabel
+%32 = OpPhi %9 %11 %30 %33 %34
+OpLoopMerge %35 %34 None
+OpBranch %36
+%36 = OpLabel
+%37 = OpSLessThan %13 %32 %12
+OpBranchConditional %37 %38 %35
+%38 = OpLabel
+%39 = OpAccessChain %20 %6 %32 %32
+%40 = OpLoad %14 %39
+%41 = OpAccessChain %20 %5 %32 %24
+OpStore %41 %40
+%42 = OpIAdd %9 %24 %21
+%43 = OpAccessChain %20 %5 %32 %42
+%44 = OpLoad %14 %43
+%45 = OpAccessChain %20 %6 %32 %32
+OpStore %45 %44
+OpBranch %34
+%34 = OpLabel
+%33 = OpIAdd %9 %32 %21
+OpBranch %31
+%35 = OpLabel
+OpBranch %26
+%26 = OpLabel
+%25 = OpIAdd %9 %24 %21
+OpBranch %23
+%27 = OpLabel
+OpReturn
+OpFunctionEnd
+)";
+
+ // clang-format on
+ std::unique_ptr<ir::IRContext> context =
+ BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, source,
+ SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
+ ir::Module* module = context->module();
+ EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n"
+ << source << std::endl;
+
+ SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER);
+ SinglePassRunAndCheck<opt::LoopFissionPass>(source, source, true);
+}
+
+/*
+#version 430
+void main(void) {
+ float A[10][10];
+ float B[10][10];
+ for (int j = 0; j < 10; ++j) {
+ for (int i = 0; i < 10; ++i) {
+ B[i][i] = A[i][j];
+ A[i][j+1] = B[i][i];
+ }
+ }
+}
+
+This loop should not be split as the load A[i][j+1] would be reading a value
+written in the store A[i][j] which would be hit before A[i][j+1] if the loops
+where split but would not get hit before the read currently.
+
+*/
+TEST_F(FissionClassTest, FissionNestedDependency2) {
+ // clang-format off
+ // With opt::LocalMultiStoreElimPass
+const std::string source = R"(OpCapability Shader
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %2 "main"
+OpExecutionMode %2 OriginUpperLeft
+OpSource GLSL 430
+OpName %2 "main"
+OpName %3 "j"
+OpName %4 "i"
+OpName %5 "B"
+OpName %6 "A"
+%7 = OpTypeVoid
+%8 = OpTypeFunction %7
+%9 = OpTypeInt 32 1
+%10 = OpTypePointer Function %9
+%11 = OpConstant %9 0
+%12 = OpConstant %9 10
+%13 = OpTypeBool
+%14 = OpTypeFloat 32
+%15 = OpTypeInt 32 0
+%16 = OpConstant %15 10
+%17 = OpTypeArray %14 %16
+%18 = OpTypeArray %17 %16
+%19 = OpTypePointer Function %18
+%20 = OpTypePointer Function %14
+%21 = OpConstant %9 1
+%2 = OpFunction %7 None %8
+%22 = OpLabel
+%3 = OpVariable %10 Function
+%4 = OpVariable %10 Function
+%5 = OpVariable %19 Function
+%6 = OpVariable %19 Function
+OpStore %3 %11
+OpBranch %23
+%23 = OpLabel
+%24 = OpPhi %9 %11 %22 %25 %26
+OpLoopMerge %27 %26 None
+OpBranch %28
+%28 = OpLabel
+%29 = OpSLessThan %13 %24 %12
+OpBranchConditional %29 %30 %27
+%30 = OpLabel
+OpStore %4 %11
+OpBranch %31
+%31 = OpLabel
+%32 = OpPhi %9 %11 %30 %33 %34
+OpLoopMerge %35 %34 None
+OpBranch %36
+%36 = OpLabel
+%37 = OpSLessThan %13 %32 %12
+OpBranchConditional %37 %38 %35
+%38 = OpLabel
+%39 = OpAccessChain %20 %6 %32 %24
+%40 = OpLoad %14 %39
+%41 = OpAccessChain %20 %5 %32 %32
+OpStore %41 %40
+%42 = OpIAdd %9 %24 %21
+%43 = OpAccessChain %20 %5 %32 %32
+%44 = OpLoad %14 %43
+%45 = OpAccessChain %20 %6 %32 %42
+OpStore %45 %44
+OpBranch %34
+%34 = OpLabel
+%33 = OpIAdd %9 %32 %21
+OpStore %4 %33
+OpBranch %31
+%35 = OpLabel
+OpBranch %26
+%26 = OpLabel
+%25 = OpIAdd %9 %24 %21
+OpStore %3 %25
+OpBranch %23
+%27 = OpLabel
+OpReturn
+OpFunctionEnd
+)";
+
+ // clang-format on
+ std::unique_ptr<ir::IRContext> context =
+ BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, source,
+ SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
+ ir::Module* module = context->module();
+ EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n"
+ << source << std::endl;
+
+ SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER);
+ SinglePassRunAndCheck<opt::LoopFissionPass>(source, source, true);
+}
+
+/*
+#version 430
+void main(void) {
+ float A[10][10];
+ float B[10][10];
+ for (int j = 0; j < 10; ++j) {
+ for (int i = 0; i < 10; ++i) {
+ B[i][j] = A[i][j];
+ A[i][j] = B[i][j];
+ }
+ for (int i = 0; i < 10; ++i) {
+ B[i][j] = A[i][j];
+ A[i][j] = B[i][j];
+ }
+ }
+}
+
+
+
+Should be split into:
+
+for (int j = 0; j < 10; ++j) {
+ for (int i = 0; i < 10; ++i)
+ B[i][j] = A[i][j];
+ for (int i = 0; i < 10; ++i)
+ A[i][j] = B[i][j];
+ for (int i = 0; i < 10; ++i)
+ B[i][j] = A[i][j];
+ for (int i = 0; i < 10; ++i)
+ A[i][j] = B[i][j];
+*/
+TEST_F(FissionClassTest, FissionMultipleLoopsNested) {
+ // clang-format off
+ // With opt::LocalMultiStoreElimPass
+const std::string source = R"(OpCapability Shader
+ %1 = OpExtInstImport "GLSL.std.450"
+ OpMemoryModel Logical GLSL450
+ OpEntryPoint Fragment %2 "main"
+ OpExecutionMode %2 OriginUpperLeft
+ OpSource GLSL 430
+ OpName %2 "main"
+ OpName %3 "j"
+ OpName %4 "i"
+ OpName %5 "B"
+ OpName %6 "A"
+ OpName %7 "i"
+ %8 = OpTypeVoid
+ %9 = OpTypeFunction %8
+ %10 = OpTypeInt 32 1
+ %11 = OpTypePointer Function %10
+ %12 = OpConstant %10 0
+ %13 = OpConstant %10 10
+ %14 = OpTypeBool
+ %15 = OpTypeFloat 32
+ %16 = OpTypeInt 32 0
+ %17 = OpConstant %16 10
+ %18 = OpTypeArray %15 %17
+ %19 = OpTypeArray %18 %17
+ %20 = OpTypePointer Function %19
+ %21 = OpTypePointer Function %15
+ %22 = OpConstant %10 1
+ %2 = OpFunction %8 None %9
+ %23 = OpLabel
+ %3 = OpVariable %11 Function
+ %4 = OpVariable %11 Function
+ %5 = OpVariable %20 Function
+ %6 = OpVariable %20 Function
+ %7 = OpVariable %11 Function
+ OpStore %3 %12
+ OpBranch %24
+ %24 = OpLabel
+ %25 = OpPhi %10 %12 %23 %26 %27
+ OpLoopMerge %28 %27 None
+ OpBranch %29
+ %29 = OpLabel
+ %30 = OpSLessThan %14 %25 %13
+ OpBranchConditional %30 %31 %28
+ %31 = OpLabel
+ OpStore %4 %12
+ OpBranch %32
+ %32 = OpLabel
+ %33 = OpPhi %10 %12 %31 %34 %35
+ OpLoopMerge %36 %35 None
+ OpBranch %37
+ %37 = OpLabel
+ %38 = OpSLessThan %14 %33 %13
+ OpBranchConditional %38 %39 %36
+ %39 = OpLabel
+ %40 = OpAccessChain %21 %6 %33 %25
+ %41 = OpLoad %15 %40
+ %42 = OpAccessChain %21 %5 %33 %25
+ OpStore %42 %41
+ %43 = OpAccessChain %21 %5 %33 %25
+ %44 = OpLoad %15 %43
+ %45 = OpAccessChain %21 %6 %33 %25
+ OpStore %45 %44
+ OpBranch %35
+ %35 = OpLabel
+ %34 = OpIAdd %10 %33 %22
+ OpStore %4 %34
+ OpBranch %32
+ %36 = OpLabel
+ OpStore %7 %12
+ OpBranch %46
+ %46 = OpLabel
+ %47 = OpPhi %10 %12 %36 %48 %49
+ OpLoopMerge %50 %49 None
+ OpBranch %51
+ %51 = OpLabel
+ %52 = OpSLessThan %14 %47 %13
+ OpBranchConditional %52 %53 %50
+ %53 = OpLabel
+ %54 = OpAccessChain %21 %6 %47 %25
+ %55 = OpLoad %15 %54
+ %56 = OpAccessChain %21 %5 %47 %25
+ OpStore %56 %55
+ %57 = OpAccessChain %21 %5 %47 %25
+ %58 = OpLoad %15 %57
+ %59 = OpAccessChain %21 %6 %47 %25
+ OpStore %59 %58
+ OpBranch %49
+ %49 = OpLabel
+ %48 = OpIAdd %10 %47 %22
+ OpStore %7 %48
+ OpBranch %46
+ %50 = OpLabel
+ OpBranch %27
+ %27 = OpLabel
+ %26 = OpIAdd %10 %25 %22
+ OpStore %3 %26
+ OpBranch %24
+ %28 = OpLabel
+ OpReturn
+ OpFunctionEnd
+)";
+
+const std::string expected = R"(OpCapability Shader
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %2 "main"
+OpExecutionMode %2 OriginUpperLeft
+OpSource GLSL 430
+OpName %2 "main"
+OpName %3 "j"
+OpName %4 "i"
+OpName %5 "B"
+OpName %6 "A"
+OpName %7 "i"
+%8 = OpTypeVoid
+%9 = OpTypeFunction %8
+%10 = OpTypeInt 32 1
+%11 = OpTypePointer Function %10
+%12 = OpConstant %10 0
+%13 = OpConstant %10 10
+%14 = OpTypeBool
+%15 = OpTypeFloat 32
+%16 = OpTypeInt 32 0
+%17 = OpConstant %16 10
+%18 = OpTypeArray %15 %17
+%19 = OpTypeArray %18 %17
+%20 = OpTypePointer Function %19
+%21 = OpTypePointer Function %15
+%22 = OpConstant %10 1
+%2 = OpFunction %8 None %9
+%23 = OpLabel
+%3 = OpVariable %11 Function
+%4 = OpVariable %11 Function
+%5 = OpVariable %20 Function
+%6 = OpVariable %20 Function
+%7 = OpVariable %11 Function
+OpStore %3 %12
+OpBranch %24
+%24 = OpLabel
+%25 = OpPhi %10 %12 %23 %26 %27
+OpLoopMerge %28 %27 None
+OpBranch %29
+%29 = OpLabel
+%30 = OpSLessThan %14 %25 %13
+OpBranchConditional %30 %31 %28
+%31 = OpLabel
+OpStore %4 %12
+OpBranch %60
+%60 = OpLabel
+%61 = OpPhi %10 %12 %31 %72 %71
+OpLoopMerge %73 %71 None
+OpBranch %62
+%62 = OpLabel
+%63 = OpSLessThan %14 %61 %13
+OpBranchConditional %63 %64 %73
+%64 = OpLabel
+%65 = OpAccessChain %21 %6 %61 %25
+%66 = OpLoad %15 %65
+%67 = OpAccessChain %21 %5 %61 %25
+OpStore %67 %66
+OpBranch %71
+%71 = OpLabel
+%72 = OpIAdd %10 %61 %22
+OpStore %4 %72
+OpBranch %60
+%73 = OpLabel
+OpBranch %32
+%32 = OpLabel
+%33 = OpPhi %10 %12 %73 %34 %35
+OpLoopMerge %36 %35 None
+OpBranch %37
+%37 = OpLabel
+%38 = OpSLessThan %14 %33 %13
+OpBranchConditional %38 %39 %36
+%39 = OpLabel
+%43 = OpAccessChain %21 %5 %33 %25
+%44 = OpLoad %15 %43
+%45 = OpAccessChain %21 %6 %33 %25
+OpStore %45 %44
+OpBranch %35
+%35 = OpLabel
+%34 = OpIAdd %10 %33 %22
+OpStore %4 %34
+OpBranch %32
+%36 = OpLabel
+OpStore %7 %12
+OpBranch %74
+%74 = OpLabel
+%75 = OpPhi %10 %12 %36 %86 %85
+OpLoopMerge %87 %85 None
+OpBranch %76
+%76 = OpLabel
+%77 = OpSLessThan %14 %75 %13
+OpBranchConditional %77 %78 %87
+%78 = OpLabel
+%79 = OpAccessChain %21 %6 %75 %25
+%80 = OpLoad %15 %79
+%81 = OpAccessChain %21 %5 %75 %25
+OpStore %81 %80
+OpBranch %85
+%85 = OpLabel
+%86 = OpIAdd %10 %75 %22
+OpStore %7 %86
+OpBranch %74
+%87 = OpLabel
+OpBranch %46
+%46 = OpLabel
+%47 = OpPhi %10 %12 %87 %48 %49
+OpLoopMerge %50 %49 None
+OpBranch %51
+%51 = OpLabel
+%52 = OpSLessThan %14 %47 %13
+OpBranchConditional %52 %53 %50
+%53 = OpLabel
+%57 = OpAccessChain %21 %5 %47 %25
+%58 = OpLoad %15 %57
+%59 = OpAccessChain %21 %6 %47 %25
+OpStore %59 %58
+OpBranch %49
+%49 = OpLabel
+%48 = OpIAdd %10 %47 %22
+OpStore %7 %48
+OpBranch %46
+%50 = OpLabel
+OpBranch %27
+%27 = OpLabel
+%26 = OpIAdd %10 %25 %22
+OpStore %3 %26
+OpBranch %24
+%28 = OpLabel
+OpReturn
+OpFunctionEnd
+)";
+ // clang-format on
+ std::unique_ptr<ir::IRContext> context =
+ BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, source,
+ SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
+ ir::Module* module = context->module();
+ EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n"
+ << source << std::endl;
+ const ir::Function* function = spvtest::GetFunction(module, 2);
+ ir::LoopDescriptor& pre_pass_descriptor =
+ *context->GetLoopDescriptor(function);
+ EXPECT_EQ(pre_pass_descriptor.NumLoops(), 3u);
+ EXPECT_EQ(pre_pass_descriptor.pre_begin()->NumImmediateChildren(), 2u);
+
+ // Test that the pass transforms the ir into the expected output.
+ SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER);
+ SinglePassRunAndCheck<opt::LoopFissionPass>(source, expected, true);
+
+ // Test that the loop descriptor is correctly maintained and updated by the
+ // pass.
+ opt::LoopFissionPass loop_fission{};
+ loop_fission.Process(context.get());
+
+ function = spvtest::GetFunction(module, 2);
+ ir::LoopDescriptor& post_pass_descriptor =
+ *context->GetLoopDescriptor(function);
+ EXPECT_EQ(post_pass_descriptor.NumLoops(), 5u);
+ EXPECT_EQ(post_pass_descriptor.pre_begin()->NumImmediateChildren(), 4u);
+}
+
+/*
+#version 430
+void main(void) {
+ float A[10][10];
+ float B[10][10];
+ for (int i = 0; i < 10; ++i) {
+ B[i][i] = A[i][i];
+ A[i][i] = B[i][i];
+ }
+ for (int i = 0; i < 10; ++i) {
+ B[i][i] = A[i][i];
+ A[i][i] = B[i][i]
+ }
+}
+
+
+
+Should be split into:
+
+ for (int i = 0; i < 10; ++i)
+ B[i][i] = A[i][i];
+ for (int i = 0; i < 10; ++i)
+ A[i][i] = B[i][i];
+ for (int i = 0; i < 10; ++i)
+ B[i][i] = A[i][i];
+ for (int i = 0; i < 10; ++i)
+ A[i][i] = B[i][i];
+*/
+TEST_F(FissionClassTest, FissionMultipleLoops) {
+ // clang-format off
+ // With opt::LocalMultiStoreElimPass
+const std::string source = R"(OpCapability Shader
+ %1 = OpExtInstImport "GLSL.std.450"
+ OpMemoryModel Logical GLSL450
+ OpEntryPoint Fragment %2 "main"
+ OpExecutionMode %2 OriginUpperLeft
+ OpSource GLSL 430
+ OpName %2 "main"
+ OpName %3 "i"
+ OpName %4 "B"
+ OpName %5 "A"
+ OpName %6 "i"
+ %7 = OpTypeVoid
+ %8 = OpTypeFunction %7
+ %9 = OpTypeInt 32 1
+ %10 = OpTypePointer Function %9
+ %11 = OpConstant %9 0
+ %12 = OpConstant %9 10
+ %13 = OpTypeBool
+ %14 = OpTypeFloat 32
+ %15 = OpTypeInt 32 0
+ %16 = OpConstant %15 10
+ %17 = OpTypeArray %14 %16
+ %18 = OpTypePointer Function %17
+ %19 = OpTypePointer Function %14
+ %20 = OpConstant %9 1
+ %2 = OpFunction %7 None %8
+ %21 = OpLabel
+ %3 = OpVariable %10 Function
+ %4 = OpVariable %18 Function
+ %5 = OpVariable %18 Function
+ %6 = OpVariable %10 Function
+ OpStore %3 %11
+ OpBranch %22
+ %22 = OpLabel
+ %23 = OpPhi %9 %11 %21 %24 %25
+ OpLoopMerge %26 %25 None
+ OpBranch %27
+ %27 = OpLabel
+ %28 = OpSLessThan %13 %23 %12
+ OpBranchConditional %28 %29 %26
+ %29 = OpLabel
+ %30 = OpAccessChain %19 %5 %23
+ %31 = OpLoad %14 %30
+ %32 = OpAccessChain %19 %4 %23
+ OpStore %32 %31
+ %33 = OpAccessChain %19 %4 %23
+ %34 = OpLoad %14 %33
+ %35 = OpAccessChain %19 %5 %23
+ OpStore %35 %34
+ OpBranch %25
+ %25 = OpLabel
+ %24 = OpIAdd %9 %23 %20
+ OpStore %3 %24
+ OpBranch %22
+ %26 = OpLabel
+ OpStore %6 %11
+ OpBranch %36
+ %36 = OpLabel
+ %37 = OpPhi %9 %11 %26 %38 %39
+ OpLoopMerge %40 %39 None
+ OpBranch %41
+ %41 = OpLabel
+ %42 = OpSLessThan %13 %37 %12
+ OpBranchConditional %42 %43 %40
+ %43 = OpLabel
+ %44 = OpAccessChain %19 %5 %37
+ %45 = OpLoad %14 %44
+ %46 = OpAccessChain %19 %4 %37
+ OpStore %46 %45
+ %47 = OpAccessChain %19 %4 %37
+ %48 = OpLoad %14 %47
+ %49 = OpAccessChain %19 %5 %37
+ OpStore %49 %48
+ OpBranch %39
+ %39 = OpLabel
+ %38 = OpIAdd %9 %37 %20
+ OpStore %6 %38
+ OpBranch %36
+ %40 = OpLabel
+ OpReturn
+ OpFunctionEnd
+)";
+
+const std::string expected = R"(OpCapability Shader
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %2 "main"
+OpExecutionMode %2 OriginUpperLeft
+OpSource GLSL 430
+OpName %2 "main"
+OpName %3 "i"
+OpName %4 "B"
+OpName %5 "A"
+OpName %6 "i"
+%7 = OpTypeVoid
+%8 = OpTypeFunction %7
+%9 = OpTypeInt 32 1
+%10 = OpTypePointer Function %9
+%11 = OpConstant %9 0
+%12 = OpConstant %9 10
+%13 = OpTypeBool
+%14 = OpTypeFloat 32
+%15 = OpTypeInt 32 0
+%16 = OpConstant %15 10
+%17 = OpTypeArray %14 %16
+%18 = OpTypePointer Function %17
+%19 = OpTypePointer Function %14
+%20 = OpConstant %9 1
+%2 = OpFunction %7 None %8
+%21 = OpLabel
+%3 = OpVariable %10 Function
+%4 = OpVariable %18 Function
+%5 = OpVariable %18 Function
+%6 = OpVariable %10 Function
+OpStore %3 %11
+OpBranch %64
+%64 = OpLabel
+%65 = OpPhi %9 %11 %21 %76 %75
+OpLoopMerge %77 %75 None
+OpBranch %66
+%66 = OpLabel
+%67 = OpSLessThan %13 %65 %12
+OpBranchConditional %67 %68 %77
+%68 = OpLabel
+%69 = OpAccessChain %19 %5 %65
+%70 = OpLoad %14 %69
+%71 = OpAccessChain %19 %4 %65
+OpStore %71 %70
+OpBranch %75
+%75 = OpLabel
+%76 = OpIAdd %9 %65 %20
+OpStore %3 %76
+OpBranch %64
+%77 = OpLabel
+OpBranch %22
+%22 = OpLabel
+%23 = OpPhi %9 %11 %77 %24 %25
+OpLoopMerge %26 %25 None
+OpBranch %27
+%27 = OpLabel
+%28 = OpSLessThan %13 %23 %12
+OpBranchConditional %28 %29 %26
+%29 = OpLabel
+%33 = OpAccessChain %19 %4 %23
+%34 = OpLoad %14 %33
+%35 = OpAccessChain %19 %5 %23
+OpStore %35 %34
+OpBranch %25
+%25 = OpLabel
+%24 = OpIAdd %9 %23 %20
+OpStore %3 %24
+OpBranch %22
+%26 = OpLabel
+OpStore %6 %11
+OpBranch %50
+%50 = OpLabel
+%51 = OpPhi %9 %11 %26 %62 %61
+OpLoopMerge %63 %61 None
+OpBranch %52
+%52 = OpLabel
+%53 = OpSLessThan %13 %51 %12
+OpBranchConditional %53 %54 %63
+%54 = OpLabel
+%55 = OpAccessChain %19 %5 %51
+%56 = OpLoad %14 %55
+%57 = OpAccessChain %19 %4 %51
+OpStore %57 %56
+OpBranch %61
+%61 = OpLabel
+%62 = OpIAdd %9 %51 %20
+OpStore %6 %62
+OpBranch %50
+%63 = OpLabel
+OpBranch %36
+%36 = OpLabel
+%37 = OpPhi %9 %11 %63 %38 %39
+OpLoopMerge %40 %39 None
+OpBranch %41
+%41 = OpLabel
+%42 = OpSLessThan %13 %37 %12
+OpBranchConditional %42 %43 %40
+%43 = OpLabel
+%47 = OpAccessChain %19 %4 %37
+%48 = OpLoad %14 %47
+%49 = OpAccessChain %19 %5 %37
+OpStore %49 %48
+OpBranch %39
+%39 = OpLabel
+%38 = OpIAdd %9 %37 %20
+OpStore %6 %38
+OpBranch %36
+%40 = OpLabel
+OpReturn
+OpFunctionEnd
+)";
+ // clang-format on
+ std::unique_ptr<ir::IRContext> context =
+ BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, source,
+ SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
+ ir::Module* module = context->module();
+ EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n"
+ << source << std::endl;
+
+ SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER);
+ SinglePassRunAndCheck<opt::LoopFissionPass>(source, expected, true);
+
+ const ir::Function* function = spvtest::GetFunction(module, 2);
+ ir::LoopDescriptor& pre_pass_descriptor =
+ *context->GetLoopDescriptor(function);
+ EXPECT_EQ(pre_pass_descriptor.NumLoops(), 2u);
+ EXPECT_EQ(pre_pass_descriptor.pre_begin()->NumImmediateChildren(), 0u);
+
+ // Test that the pass transforms the ir into the expected output.
+ SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER);
+ SinglePassRunAndCheck<opt::LoopFissionPass>(source, expected, true);
+
+ // Test that the loop descriptor is correctly maintained and updated by the
+ // pass.
+ opt::LoopFissionPass loop_fission{};
+ loop_fission.Process(context.get());
+
+ function = spvtest::GetFunction(module, 2);
+ ir::LoopDescriptor& post_pass_descriptor =
+ *context->GetLoopDescriptor(function);
+ EXPECT_EQ(post_pass_descriptor.NumLoops(), 4u);
+ EXPECT_EQ(post_pass_descriptor.pre_begin()->NumImmediateChildren(), 0u);
+}
+
+/*
+#version 430
+int foo() { return 1; }
+void main(void) {
+ float A[10];
+ float B[10];
+ for (int i = 0; i < 10; ++i) {
+ B[i] = A[i];
+ foo();
+ A[i] = B[i];
+ }
+}
+
+This should not be split as it has a function call in it so we can't determine
+if it has side effects.
+*/
+TEST_F(FissionClassTest, FissionFunctionCall) {
+ // clang-format off
+ // With opt::LocalMultiStoreElimPass
+const std::string source = R"(OpCapability Shader
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %2 "main"
+OpExecutionMode %2 OriginUpperLeft
+OpSource GLSL 430
+OpName %2 "main"
+OpName %3 "foo("
+OpName %4 "i"
+OpName %5 "B"
+OpName %6 "A"
+%7 = OpTypeVoid
+%8 = OpTypeFunction %7
+%9 = OpTypeInt 32 1
+%10 = OpTypeFunction %9
+%11 = OpConstant %9 1
+%12 = OpTypePointer Function %9
+%13 = OpConstant %9 0
+%14 = OpConstant %9 10
+%15 = OpTypeBool
+%16 = OpTypeFloat 32
+%17 = OpTypeInt 32 0
+%18 = OpConstant %17 10
+%19 = OpTypeArray %16 %18
+%20 = OpTypePointer Function %19
+%21 = OpTypePointer Function %16
+%2 = OpFunction %7 None %8
+%22 = OpLabel
+%4 = OpVariable %12 Function
+%5 = OpVariable %20 Function
+%6 = OpVariable %20 Function
+OpStore %4 %13
+OpBranch %23
+%23 = OpLabel
+%24 = OpPhi %9 %13 %22 %25 %26
+OpLoopMerge %27 %26 None
+OpBranch %28
+%28 = OpLabel
+%29 = OpSLessThan %15 %24 %14
+OpBranchConditional %29 %30 %27
+%30 = OpLabel
+%31 = OpAccessChain %21 %6 %24
+%32 = OpLoad %16 %31
+%33 = OpAccessChain %21 %5 %24
+OpStore %33 %32
+%34 = OpFunctionCall %9 %3
+%35 = OpAccessChain %21 %5 %24
+%36 = OpLoad %16 %35
+%37 = OpAccessChain %21 %6 %24
+OpStore %37 %36
+OpBranch %26
+%26 = OpLabel
+%25 = OpIAdd %9 %24 %11
+OpStore %4 %25
+OpBranch %23
+%27 = OpLabel
+OpReturn
+OpFunctionEnd
+%3 = OpFunction %9 None %10
+%38 = OpLabel
+OpReturnValue %11
+OpFunctionEnd
+)";
+
+ // clang-format on
+ std::unique_ptr<ir::IRContext> context =
+ BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, source,
+ SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
+ ir::Module* module = context->module();
+ EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n"
+ << source << std::endl;
+
+ SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER);
+ SinglePassRunAndCheck<opt::LoopFissionPass>(source, source, true);
+}
+
+/*
+#version 430
+void main(void) {
+ float A[10];
+ float B[10];
+ for (int i = 0; i < 10; ++i) {
+ switch (i) {
+ case 1:
+ B[i] = A[i];
+ break;
+ default:
+ A[i] = B[i];
+ }
+ }
+}
+
+This should be split into:
+ for (int i = 0; i < 10; ++i) {
+ switch (i) {
+ case 1:
+ break;
+ default:
+ A[i] = B[i];
+ }
+ }
+
+ for (int i = 0; i < 10; ++i) {
+ switch (i) {
+ case 1:
+ B[i] = A[i];
+ break;
+ default:
+ break;
+ }
+ }
+
+*/
+TEST_F(FissionClassTest, FissionSwitchStatement) {
+ // clang-format off
+ // With opt::LocalMultiStoreElimPass
+const std::string source = R"(OpCapability Shader
+ %1 = OpExtInstImport "GLSL.std.450"
+ OpMemoryModel Logical GLSL450
+ OpEntryPoint Fragment %2 "main"
+ OpExecutionMode %2 OriginUpperLeft
+ OpSource GLSL 430
+ OpName %2 "main"
+ OpName %3 "i"
+ OpName %4 "B"
+ OpName %5 "A"
+ %6 = OpTypeVoid
+ %7 = OpTypeFunction %6
+ %8 = OpTypeInt 32 1
+ %9 = OpTypePointer Function %8
+ %10 = OpConstant %8 0
+ %11 = OpConstant %8 10
+ %12 = OpTypeBool
+ %13 = OpTypeFloat 32
+ %14 = OpTypeInt 32 0
+ %15 = OpConstant %14 10
+ %16 = OpTypeArray %13 %15
+ %17 = OpTypePointer Function %16
+ %18 = OpTypePointer Function %13
+ %19 = OpConstant %8 1
+ %2 = OpFunction %6 None %7
+ %20 = OpLabel
+ %3 = OpVariable %9 Function
+ %4 = OpVariable %17 Function
+ %5 = OpVariable %17 Function
+ OpStore %3 %10
+ OpBranch %21
+ %21 = OpLabel
+ %22 = OpPhi %8 %10 %20 %23 %24
+ OpLoopMerge %25 %24 None
+ OpBranch %26
+ %26 = OpLabel
+ %27 = OpSLessThan %12 %22 %11
+ OpBranchConditional %27 %28 %25
+ %28 = OpLabel
+ OpSelectionMerge %29 None
+ OpSwitch %22 %30 1 %31
+ %30 = OpLabel
+ %32 = OpAccessChain %18 %4 %22
+ %33 = OpLoad %13 %32
+ %34 = OpAccessChain %18 %5 %22
+ OpStore %34 %33
+ OpBranch %29
+ %31 = OpLabel
+ %35 = OpAccessChain %18 %5 %22
+ %36 = OpLoad %13 %35
+ %37 = OpAccessChain %18 %4 %22
+ OpStore %37 %36
+ OpBranch %29
+ %29 = OpLabel
+ OpBranch %24
+ %24 = OpLabel
+ %23 = OpIAdd %8 %22 %19
+ OpStore %3 %23
+ OpBranch %21
+ %25 = OpLabel
+ OpReturn
+ OpFunctionEnd
+)";
+
+const std::string expected = R"(OpCapability Shader
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %2 "main"
+OpExecutionMode %2 OriginUpperLeft
+OpSource GLSL 430
+OpName %2 "main"
+OpName %3 "i"
+OpName %4 "B"
+OpName %5 "A"
+%6 = OpTypeVoid
+%7 = OpTypeFunction %6
+%8 = OpTypeInt 32 1
+%9 = OpTypePointer Function %8
+%10 = OpConstant %8 0
+%11 = OpConstant %8 10
+%12 = OpTypeBool
+%13 = OpTypeFloat 32
+%14 = OpTypeInt 32 0
+%15 = OpConstant %14 10
+%16 = OpTypeArray %13 %15
+%17 = OpTypePointer Function %16
+%18 = OpTypePointer Function %13
+%19 = OpConstant %8 1
+%2 = OpFunction %6 None %7
+%20 = OpLabel
+%3 = OpVariable %9 Function
+%4 = OpVariable %17 Function
+%5 = OpVariable %17 Function
+OpStore %3 %10
+OpBranch %38
+%38 = OpLabel
+%39 = OpPhi %8 %10 %20 %53 %52
+OpLoopMerge %54 %52 None
+OpBranch %40
+%40 = OpLabel
+%41 = OpSLessThan %12 %39 %11
+OpBranchConditional %41 %42 %54
+%42 = OpLabel
+OpSelectionMerge %51 None
+OpSwitch %39 %47 1 %43
+%43 = OpLabel
+OpBranch %51
+%47 = OpLabel
+%48 = OpAccessChain %18 %4 %39
+%49 = OpLoad %13 %48
+%50 = OpAccessChain %18 %5 %39
+OpStore %50 %49
+OpBranch %51
+%51 = OpLabel
+OpBranch %52
+%52 = OpLabel
+%53 = OpIAdd %8 %39 %19
+OpStore %3 %53
+OpBranch %38
+%54 = OpLabel
+OpBranch %21
+%21 = OpLabel
+%22 = OpPhi %8 %10 %54 %23 %24
+OpLoopMerge %25 %24 None
+OpBranch %26
+%26 = OpLabel
+%27 = OpSLessThan %12 %22 %11
+OpBranchConditional %27 %28 %25
+%28 = OpLabel
+OpSelectionMerge %29 None
+OpSwitch %22 %30 1 %31
+%30 = OpLabel
+OpBranch %29
+%31 = OpLabel
+%35 = OpAccessChain %18 %5 %22
+%36 = OpLoad %13 %35
+%37 = OpAccessChain %18 %4 %22
+OpStore %37 %36
+OpBranch %29
+%29 = OpLabel
+OpBranch %24
+%24 = OpLabel
+%23 = OpIAdd %8 %22 %19
+OpStore %3 %23
+OpBranch %21
+%25 = OpLabel
+OpReturn
+OpFunctionEnd
+)";
+ // clang-format on
+ std::unique_ptr<ir::IRContext> context =
+ BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, source,
+ SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
+ ir::Module* module = context->module();
+ EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n"
+ << source << std::endl;
+
+ SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER);
+ SinglePassRunAndCheck<opt::LoopFissionPass>(source, expected, true);
+}
+
+} // namespace
diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp
index fec58526..db069c0b 100644
--- a/tools/opt/opt.cpp
+++ b/tools/opt/opt.cpp
@@ -176,6 +176,10 @@ Options (in lexicographical order):
--local-redundancy-elimination
Looks for instructions in the same basic block that compute the
same value, and deletes the redundant ones.
+ --loop-fission
+ Splits any top level loops in which the register pressure has exceeded
+ a given threshold. The threshold must follow the use of this flag and
+ must be a positive integer value.
--loop-unroll
Fully unrolls loops marked with the Unroll flag
--loop-unroll-partial
@@ -403,6 +407,20 @@ OptStatus ParseOconfigFlag(const char* prog_name, const char* opt_flag,
in_file, out_file, nullptr, &skip_validator);
}
+OptStatus ParseLoopFissionArg(int argc, const char** argv, int argi,
+ Optimizer* optimizer) {
+ if (argi < argc) {
+ int register_threshold_to_split = atoi(argv[argi]);
+ optimizer->RegisterPass(CreateLoopFissionPass(
+ static_cast<size_t>(register_threshold_to_split)));
+ return {OPT_CONTINUE, 0};
+ }
+ fprintf(
+ stderr,
+ "error: --loop-fission must be followed by a positive integer value\n");
+ return {OPT_STOP, 1};
+}
+
OptStatus ParseLoopUnrollPartialArg(int argc, const char** argv, int argi,
Optimizer* optimizer) {
if (argi < argc) {
@@ -557,6 +575,11 @@ OptStatus ParseFlags(int argc, const char** argv, Optimizer* optimizer,
optimizer->RegisterPass(CreateSSARewritePass());
} else if (0 == strcmp(cur_arg, "--copy-propagate-arrays")) {
optimizer->RegisterPass(CreateCopyPropagateArraysPass());
+ } else if (0 == strcmp(cur_arg, "--loop-fission")) {
+ OptStatus status = ParseLoopFissionArg(argc, argv, ++argi, optimizer);
+ if (status.action != OPT_CONTINUE) {
+ return status;
+ }
} else if (0 == strcmp(cur_arg, "--loop-unroll")) {
optimizer->RegisterPass(CreateLoopUnrollPass(true));
} else if (0 == strcmp(cur_arg, "--vector-dce")) {