diff options
author | Stephen McGroarty <stephen@codeplay.com> | 2018-04-23 21:01:12 +0100 |
---|---|---|
committer | Diego Novillo <dnovillo@google.com> | 2018-05-01 15:15:10 -0400 |
commit | 9a5dd6fe88882005ea0433857e688581608bb210 (patch) | |
tree | a7598954e6c35b24d6548f1b2a818302743a0b81 | |
parent | 9ba0879ddf00cc2462e581772cd6869e2a2dc984 (diff) | |
download | spirv-tools-9a5dd6fe88882005ea0433857e688581608bb210.tar.gz |
Support loop fission.
Adds support for spliting loops whose register pressure exceeds a user
provided level. This pass will split a loop into two or more loops given
that the loop is a top level loop and that spliting the loop is legal.
Control flow is left intact for dead code elimination to remove.
This pass is enabled with the --loop-fission flag to spirv-opt.
-rw-r--r-- | Android.mk | 1 | ||||
-rw-r--r-- | include/spirv-tools/optimizer.hpp | 5 | ||||
-rw-r--r-- | source/opt/CMakeLists.txt | 2 | ||||
-rw-r--r-- | source/opt/instruction.cpp | 4 | ||||
-rw-r--r-- | source/opt/loop_descriptor.h | 1 | ||||
-rw-r--r-- | source/opt/loop_fission.cpp | 508 | ||||
-rw-r--r-- | source/opt/loop_fission.h | 78 | ||||
-rw-r--r-- | source/opt/loop_utils.cpp | 86 | ||||
-rw-r--r-- | source/opt/loop_utils.h | 9 | ||||
-rw-r--r-- | source/opt/optimizer.cpp | 5 | ||||
-rw-r--r-- | source/opt/passes.h | 1 | ||||
-rw-r--r-- | test/opt/loop_optimizations/CMakeLists.txt | 8 | ||||
-rw-r--r-- | test/opt/loop_optimizations/loop_fission.cpp | 3492 | ||||
-rw-r--r-- | tools/opt/opt.cpp | 23 |
14 files changed, 4217 insertions, 6 deletions
@@ -107,6 +107,7 @@ SPVTOOLS_OPT_SRC_FILES := \ source/opt/loop_dependence.cpp \ source/opt/loop_dependence_helpers.cpp \ source/opt/loop_descriptor.cpp \ + source/opt/loop_fission.cpp \ source/opt/loop_peeling.cpp \ source/opt/loop_unroller.cpp \ source/opt/loop_unswitch_pass.cpp \ diff --git a/include/spirv-tools/optimizer.hpp b/include/spirv-tools/optimizer.hpp index f177849a..4e2bc265 100644 --- a/include/spirv-tools/optimizer.hpp +++ b/include/spirv-tools/optimizer.hpp @@ -483,6 +483,11 @@ Optimizer::PassToken CreateLocalRedundancyEliminationPass(); // the loops preheader. Optimizer::PassToken CreateLoopInvariantCodeMotionPass(); +// Creates a loop fission pass. +// This pass will split all top level loops whose register pressure exceedes the +// given |threshold|. +Optimizer::PassToken CreateLoopFissionPass(size_t threshold); + // Creates a loop peeling pass. // This pass will look for conditions inside a loop that are true or false only // for the N first or last iteration. For loop with such condition, those N diff --git a/source/opt/CMakeLists.txt b/source/opt/CMakeLists.txt index 278bc0f6..1ad75642 100644 --- a/source/opt/CMakeLists.txt +++ b/source/opt/CMakeLists.txt @@ -60,6 +60,7 @@ add_library(SPIRV-Tools-opt log.h loop_dependence.h loop_descriptor.h + loop_fission.h loop_peeling.h loop_unroller.h loop_utils.h @@ -143,6 +144,7 @@ add_library(SPIRV-Tools-opt loop_dependence.cpp loop_dependence_helpers.cpp loop_descriptor.cpp + loop_fission.cpp loop_peeling.cpp loop_utils.cpp loop_unroller.cpp diff --git a/source/opt/instruction.cpp b/source/opt/instruction.cpp index 88553ad8..03e9059c 100644 --- a/source/opt/instruction.cpp +++ b/source/opt/instruction.cpp @@ -590,6 +590,10 @@ bool Instruction::IsOpcodeCodeMotionSafe() const { case SpvOpBitwiseXor: case SpvOpBitwiseAnd: case SpvOpNot: + case SpvOpAccessChain: + case SpvOpInBoundsAccessChain: + case SpvOpPtrAccessChain: + case SpvOpInBoundsPtrAccessChain: return true; default: return false; diff --git a/source/opt/loop_descriptor.h b/source/opt/loop_descriptor.h index 210ec893..ebd67381 100644 --- a/source/opt/loop_descriptor.h +++ b/source/opt/loop_descriptor.h @@ -153,6 +153,7 @@ class Loop { inline size_t NumImmediateChildren() const { return nested_loops_.size(); } + inline bool HasChildren() const { return !nested_loops_.empty(); } // Adds |nested| as a nested loop of this loop. Automatically register |this| // as the parent of |nested|. inline void AddNestedLoop(Loop* nested) { diff --git a/source/opt/loop_fission.cpp b/source/opt/loop_fission.cpp new file mode 100644 index 00000000..10d52207 --- /dev/null +++ b/source/opt/loop_fission.cpp @@ -0,0 +1,508 @@ +// Copyright (c) 2018 Google LLC. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "opt/loop_fission.h" +#include "opt/register_pressure.h" + +// Implement loop fission with an optional parameter to split only +// if the register pressure in a given loop meets a certain criteria. This is +// controlled via the constructors of LoopFissionPass. +// +// 1 - Build a list of loops to be split, these are top level loops (loops +// without child loops themselves) which meet the register pressure criteria, as +// determined by the ShouldSplitLoop method of LoopFissionPass. +// +// 2 - For each loop in the list, group each instruction into a set of related +// instructions by traversing each instructions users and operands recursively. +// We stop if we encounter an instruction we have seen before or an instruction +// which we don't consider relevent (i.e OpLoopMerge). We then group these +// groups into two different sets, one for the first loop and one for the +// second. +// +// 3 - We then run CanPerformSplit to check that it would be legal to split a +// loop using those two sets. We check that we haven't altered the relative +// order load/stores appear in the binary and that we aren't breaking any +// dependency between load/stores by splitting them into two loops. We also +// check that none of the OpBranch instructions are dependent on a load as we +// leave control flow structure intact and move only instructions in the body so +// we want to avoid any loads with side affects or aliasing. +// +// 4 - We then split the loop by calling SplitLoop. This function clones the +// loop and attaches it to the preheader and connects the new loops merge block +// to the current loop header block. We then use the two sets built in step 2 to +// remove instructions from each loop. If an instruction appears in the first +// set it is removed from the second loop and vice versa. +// +// 5 - If the multiple split passes flag is set we check if each of the loops +// still meet the register pressure criteria. If they do then we add them to the +// list of loops to be split (created in step one) to allow for loops to be +// split multiple times. +// + +namespace spvtools { +namespace opt { + +class LoopFissionImpl { + public: + LoopFissionImpl(ir::IRContext* context, ir::Loop* loop) + : context_(context), loop_(loop), load_used_in_condition_(false) {} + + // Group each instruction in the loop into sets of instructions related by + // their usedef chains. An instruction which uses another will appear in the + // same set. Then merge those sets into just two sets. Returns false if there + // was one or less sets created. + bool GroupInstructionsByUseDef(); + + // Check if the sets built by GroupInstructionsByUseDef violate any data + // dependence rules. + bool CanPerformSplit(); + + // Split the loop and return a pointer to the new loop. + ir::Loop* SplitLoop(); + + // Checks if |inst| is safe to move. We can only move instructions which don't + // have any side effects and OpLoads and OpStores. + bool MovableInstruction(const ir::Instruction& inst) const; + + private: + // Traverse the def use chain of |inst| and add the users and uses of |inst| + // which are in the same loop to the |returned_set|. + void TraverseUseDef(ir::Instruction* inst, + std::set<ir::Instruction*>* returned_set, + bool ignore_phi_users = false, bool report_loads = false); + + // We group the instructions in the block into two different groups, the + // instructions to be kept in the original loop and the ones to be cloned into + // the new loop. As the cloned loop is attached to the preheader it will be + // the first loop and the second loop will be the original. + std::set<ir::Instruction*> cloned_loop_instructions_; + std::set<ir::Instruction*> original_loop_instructions_; + + // We need a set of all the instructions to be seen so we can break any + // recursion and also so we can ignore certain instructions by preemptively + // adding them to this set. + std::set<ir::Instruction*> seen_instructions_; + + // A map of instructions to their relative position in the function. + std::map<ir::Instruction*, size_t> instruction_order_; + + ir::IRContext* context_; + + ir::Loop* loop_; + + // This is set to true by TraverseUseDef when traversing the instructions + // related to the loop condition and any if conditions should any of those + // instructions be a load. + bool load_used_in_condition_; +}; + +bool LoopFissionImpl::MovableInstruction(const ir::Instruction& inst) const { + return inst.opcode() == SpvOp::SpvOpLoad || + inst.opcode() == SpvOp::SpvOpStore || + inst.opcode() == SpvOp::SpvOpSelectionMerge || + inst.opcode() == SpvOp::SpvOpPhi || inst.IsOpcodeCodeMotionSafe(); +} + +void LoopFissionImpl::TraverseUseDef(ir::Instruction* inst, + std::set<ir::Instruction*>* returned_set, + bool ignore_phi_users, bool report_loads) { + assert(returned_set && "Set to be returned cannot be null."); + + opt::analysis::DefUseManager* def_use = context_->get_def_use_mgr(); + std::set<ir::Instruction*>& inst_set = *returned_set; + + // We create this functor to traverse the use def chain to build the + // grouping of related instructions. The lambda captures the std::function + // to allow it to recurse. + std::function<void(ir::Instruction*)> traverser_functor; + traverser_functor = [this, def_use, &inst_set, &traverser_functor, + ignore_phi_users, report_loads](ir::Instruction* user) { + // If we've seen the instruction before or it is not inside the loop end the + // traversal. + if (!user || seen_instructions_.count(user) != 0 || + !context_->get_instr_block(user) || + !loop_->IsInsideLoop(context_->get_instr_block(user))) { + return; + } + + // Don't include labels or loop merge instructions in the instruction sets. + // Including them would mean we group instructions related only by using the + // same labels (i.e phis). We already preempt the inclusion of + // OpSelectionMerge by adding related instructions to the seen_instructions_ + // set. + if (user->opcode() == SpvOp::SpvOpLoopMerge || + user->opcode() == SpvOp::SpvOpLabel) + return; + + // If the |report_loads| flag is set, set the class field + // load_used_in_condition_ to false. This is used to check that none of the + // condition checks in the loop rely on loads. + if (user->opcode() == SpvOp::SpvOpLoad && report_loads) { + load_used_in_condition_ = true; + } + + // Add the instruction to the set of instructions already seen, this breaks + // recursion and allows us to ignore certain instructions. + seen_instructions_.insert(user); + + inst_set.insert(user); + + // Wrapper functor to traverse the operands of each instruction. + auto traverse_operand = [&traverser_functor, def_use](const uint32_t* id) { + traverser_functor(def_use->GetDef(*id)); + }; + user->ForEachInOperand(traverse_operand); + + // For the first traversal we want to ignore the users of the phi. + if (ignore_phi_users && user->opcode() == SpvOp::SpvOpPhi) return; + + // Traverse each user with this lambda. + def_use->ForEachUser(user, traverser_functor); + + // Wrapper functor for the use traversal. + auto traverse_use = [&traverser_functor](ir::Instruction* use, uint32_t) { + traverser_functor(use); + }; + def_use->ForEachUse(user, traverse_use); + + }; + + // We start the traversal of the use def graph by invoking the above + // lambda with the |inst| parameter. + traverser_functor(inst); +} + +bool LoopFissionImpl::GroupInstructionsByUseDef() { + std::vector<std::set<ir::Instruction*>> sets{}; + + // We want to ignore all the instructions stemming from the loop condition + // instruction. + ir::BasicBlock* condition_block = loop_->FindConditionBlock(); + + if (!condition_block) return false; + ir::Instruction* condition = &*condition_block->tail(); + + // We iterate over the blocks via iterating over all the blocks in the + // function, we do this so we are iterating in the same order which the blocks + // appear in the binary. + ir::Function& function = *loop_->GetHeaderBlock()->GetParent(); + + // Create a temporary set to ignore certain groups of instructions within the + // loop. We don't want any instructions related to control flow to be removed + // from either loop only instructions within the control flow bodies. + std::set<ir::Instruction*> instructions_to_ignore{}; + TraverseUseDef(condition, &instructions_to_ignore, true, true); + + // Traverse control flow instructions to ensure they are added to the + // seen_instructions_ set and will be ignored when it it called with actual + // sets. + for (ir::BasicBlock& block : function) { + if (!loop_->IsInsideLoop(block.id())) continue; + + for (ir::Instruction& inst : block) { + // Ignore all instructions related to control flow. + if (inst.opcode() == SpvOp::SpvOpSelectionMerge || inst.IsBranch()) { + TraverseUseDef(&inst, &instructions_to_ignore, true, true); + } + } + } + + // Traverse the instructions and generate the sets, automatically ignoring any + // instructions in instructions_to_ignore. + for (ir::BasicBlock& block : function) { + if (!loop_->IsInsideLoop(block.id()) || + loop_->GetHeaderBlock()->id() == block.id()) + continue; + + for (ir::Instruction& inst : block) { + // Record the order that each load/store is seen. + if (inst.opcode() == SpvOp::SpvOpLoad || + inst.opcode() == SpvOp::SpvOpStore) { + instruction_order_[&inst] = instruction_order_.size(); + } + + // Ignore instructions already seen in a traversal. + if (seen_instructions_.count(&inst) != 0) { + continue; + } + + // Build the set. + std::set<ir::Instruction*> inst_set{}; + TraverseUseDef(&inst, &inst_set); + if (!inst_set.empty()) sets.push_back(std::move(inst_set)); + } + } + + // If we have one or zero sets return false to indicate that due to + // insufficient instructions we couldn't split the loop into two groups and + // thus the loop can't be split any further. + if (sets.size() < 2) { + return false; + } + + // Merge the loop sets into two different sets. In CanPerformSplit we will + // validate that we don't break the relative ordering of loads/stores by doing + // this. + for (size_t index = 0; index < sets.size() / 2; ++index) { + cloned_loop_instructions_.insert(sets[index].begin(), sets[index].end()); + } + for (size_t index = sets.size() / 2; index < sets.size(); ++index) { + original_loop_instructions_.insert(sets[index].begin(), sets[index].end()); + } + + return true; +} + +bool LoopFissionImpl::CanPerformSplit() { + // Return false if any of the condition instructions in the loop depend on a + // load. + if (load_used_in_condition_) { + return false; + } + + // Build a list of all parent loops of this loop. Loop dependence analysis + // needs this structure. + std::vector<const ir::Loop*> loops; + ir::Loop* parent_loop = loop_; + while (parent_loop) { + loops.push_back(parent_loop); + parent_loop = parent_loop->GetParent(); + } + + LoopDependenceAnalysis analysis{context_, loops}; + + // A list of all the stores in the cloned loop. + std::vector<ir::Instruction*> set_one_stores{}; + + // A list of all the loads in the cloned loop. + std::vector<ir::Instruction*> set_one_loads{}; + + // Populate the above lists. + for (ir::Instruction* inst : cloned_loop_instructions_) { + if (inst->opcode() == SpvOp::SpvOpStore) { + set_one_stores.push_back(inst); + } else if (inst->opcode() == SpvOp::SpvOpLoad) { + set_one_loads.push_back(inst); + } + + // If we find any instruction which we can't move (such as a barrier), + // return false. + if (!MovableInstruction(*inst)) return false; + } + + // We need to calculate the depth of the loop to create the loop dependency + // distance vectors. + const size_t loop_depth = loop_->GetDepth(); + + // Check the dependencies between loads in the cloned loop and stores in the + // original and vice versa. + for (ir::Instruction* inst : original_loop_instructions_) { + // If we find any instruction which we can't move (such as a barrier), + // return false. + if (!MovableInstruction(*inst)) return false; + + // Look at the dependency between the loads in the original and stores in + // the cloned loops. + if (inst->opcode() == SpvOp::SpvOpLoad) { + for (ir::Instruction* store : set_one_stores) { + DistanceVector vec{loop_depth}; + + // If the store actually should appear after the load, return false. + // This means the store has been placed in the wrong grouping. + if (instruction_order_[store] > instruction_order_[inst]) { + return false; + } + // If not independent check the distance vector. + if (!analysis.GetDependence(store, inst, &vec)) { + for (DistanceEntry& entry : vec.GetEntries()) { + // A distance greater than zero means that the store in the cloned + // loop has a dependency on the load in the original loop. + if (entry.distance > 0) return false; + } + } + } + } else if (inst->opcode() == SpvOp::SpvOpStore) { + for (ir::Instruction* load : set_one_loads) { + DistanceVector vec{loop_depth}; + + // If the load actually should appear after the store, return false. + if (instruction_order_[load] > instruction_order_[inst]) { + return false; + } + + // If not independent check the distance vector. + if (!analysis.GetDependence(inst, load, &vec)) { + for (DistanceEntry& entry : vec.GetEntries()) { + // A distance less than zero means the load in the cloned loop is + // dependent on the store instruction in the original loop. + if (entry.distance < 0) return false; + } + } + } + } + } + return true; +} + +ir::Loop* LoopFissionImpl::SplitLoop() { + // Clone the loop. + LoopUtils util{context_, loop_}; + LoopUtils::LoopCloningResult clone_results; + ir::Loop* cloned_loop = util.CloneAndAttachLoopToHeader(&clone_results); + + // Update the OpLoopMerge in the cloned loop. + cloned_loop->UpdateLoopMergeInst(); + + // Add the loop_ to the module. + ir::Function::iterator it = + util.GetFunction()->FindBlock(loop_->GetOrCreatePreHeaderBlock()->id()); + util.GetFunction()->AddBasicBlocks(clone_results.cloned_bb_.begin(), + clone_results.cloned_bb_.end(), ++it); + loop_->SetPreHeaderBlock(cloned_loop->GetMergeBlock()); + + std::vector<ir::Instruction*> instructions_to_kill{}; + + // Kill all the instructions which should appear in the cloned loop but not in + // the original loop. + for (uint32_t id : loop_->GetBlocks()) { + ir::BasicBlock* block = context_->cfg()->block(id); + + for (ir::Instruction& inst : *block) { + // If the instruction appears in the cloned loop instruction group, kill + // it. + if (cloned_loop_instructions_.count(&inst) == 1 && + original_loop_instructions_.count(&inst) == 0) { + instructions_to_kill.push_back(&inst); + if (inst.opcode() == SpvOp::SpvOpPhi) { + context_->ReplaceAllUsesWith( + inst.result_id(), clone_results.value_map_[inst.result_id()]); + } + } + } + } + + // Kill all instructions which should appear in the original loop and not in + // the cloned loop. + for (uint32_t id : cloned_loop->GetBlocks()) { + ir::BasicBlock* block = context_->cfg()->block(id); + for (ir::Instruction& inst : *block) { + ir::Instruction* old_inst = clone_results.ptr_map_[&inst]; + // If the instruction belongs to the original loop instruction group, kill + // it. + if (cloned_loop_instructions_.count(old_inst) == 0 && + original_loop_instructions_.count(old_inst) == 1) { + instructions_to_kill.push_back(&inst); + } + } + } + + for (ir::Instruction* i : instructions_to_kill) { + context_->KillInst(i); + } + + return cloned_loop; +} + +LoopFissionPass::LoopFissionPass(const size_t register_threshold_to_split, + bool split_multiple_times) + : split_multiple_times_(split_multiple_times) { + // Split if the number of registers in the loop exceeds + // |register_threshold_to_split|. + split_criteria_ = + [register_threshold_to_split]( + const RegisterLiveness::RegionRegisterLiveness& liveness) { + return liveness.used_registers_ > register_threshold_to_split; + }; +} + +LoopFissionPass::LoopFissionPass() : split_multiple_times_(false) { + // Split by default. + split_criteria_ = [](const RegisterLiveness::RegionRegisterLiveness&) { + return true; + }; +} + +bool LoopFissionPass::ShouldSplitLoop(const ir::Loop& loop, ir::IRContext* c) { + LivenessAnalysis* analysis = c->GetLivenessAnalysis(); + + RegisterLiveness::RegionRegisterLiveness liveness{}; + + ir::Function* function = loop.GetHeaderBlock()->GetParent(); + analysis->Get(function)->ComputeLoopRegisterPressure(loop, &liveness); + + return split_criteria_(liveness); +} + +Pass::Status LoopFissionPass::Process(ir::IRContext* c) { + bool changed = false; + + for (ir::Function& f : *c->module()) { + // We collect all the inner most loops in the function and run the loop + // splitting util on each. The reason we do this is to allow us to iterate + // over each, as creating new loops will invalidate the the loop iterator. + std::vector<ir::Loop*> inner_most_loops{}; + ir::LoopDescriptor& loop_descriptor = *c->GetLoopDescriptor(&f); + for (ir::Loop& loop : loop_descriptor) { + if (!loop.HasChildren() && ShouldSplitLoop(loop, c)) { + inner_most_loops.push_back(&loop); + } + } + + // List of new loops which meet the criteria to be split again. + std::vector<ir::Loop*> new_loops_to_split{}; + + while (!inner_most_loops.empty()) { + for (ir::Loop* loop : inner_most_loops) { + LoopFissionImpl impl{c, loop}; + + // Group the instructions in the loop into two different sets of related + // instructions. If we can't group the instructions into the two sets + // then we can't split the loop any further. + if (!impl.GroupInstructionsByUseDef()) { + continue; + } + + if (impl.CanPerformSplit()) { + ir::Loop* second_loop = impl.SplitLoop(); + changed = true; + c->InvalidateAnalysesExceptFor(ir::IRContext::kAnalysisLoopAnalysis); + + // If the newly created loop meets the criteria to be split, split it + // again. + if (ShouldSplitLoop(*second_loop, c)) + new_loops_to_split.push_back(second_loop); + + // If the original loop (now split) still meets the criteria to be + // split, split it again. + if (ShouldSplitLoop(*loop, c)) new_loops_to_split.push_back(loop); + } + } + + // If the split multiple times flag has been set add the new loops which + // meet the splitting criteria into the list of loops to be split on the + // next iteration. + if (split_multiple_times_) { + inner_most_loops = std::move(new_loops_to_split); + } else { + break; + } + } + } + + return changed ? Pass::Status::SuccessWithChange + : Pass::Status::SuccessWithoutChange; +} + +} // namespace opt +} // namespace spvtools diff --git a/source/opt/loop_fission.h b/source/opt/loop_fission.h new file mode 100644 index 00000000..8a7424dc --- /dev/null +++ b/source/opt/loop_fission.h @@ -0,0 +1,78 @@ +// Copyright (c) 2018 Google LLC. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef LIBSPIRV_OPT_LOOP_FISSION_H_ +#define LIBSPIRV_OPT_LOOP_FISSION_H_ + +#include <algorithm> +#include <cstdint> +#include <map> +#include <utility> +#include <vector> + +#include "cfg.h" +#include "module.h" +#include "opt/loop_dependence.h" +#include "opt/loop_utils.h" +#include "pass.h" +#include "tree_iterator.h" + +namespace spvtools { +namespace opt { + +class LoopFissionPass : public Pass { + public: + // Fuction used to determine if a given loop should be split. Takes register + // pressure region for that loop as a parameter and returns true if the loop + // should be split. + using FissionCriteriaFunction = + std::function<bool(const RegisterLiveness::RegionRegisterLiveness&)>; + + // Pass built with this constructor will split all loops regardless of + // register pressure. Will not split loops more than once. + LoopFissionPass(); + + // Split the loop if the number of registers used in the loop exceeds + // |register_threshold_to_split|. |split_multiple_times| flag determines + // whether or not the pass should split loops after already splitting them + // once. + LoopFissionPass(size_t register_threshold_to_split, + bool split_multiple_times = true); + + // Split loops whose register pressure meets the criteria of |functor|. + LoopFissionPass(FissionCriteriaFunction functor, + bool split_multiple_times = true) + : split_criteria_(functor), split_multiple_times_(split_multiple_times) {} + + const char* name() const override { return "Loop Fission"; } + + Pass::Status Process(ir::IRContext* context) override; + + // Checks if |loop| meets the register pressure criteria to be split. + bool ShouldSplitLoop(const ir::Loop& loop, ir::IRContext* context); + + private: + // Functor to run in ShouldSplitLoop to determine if the register pressure + // criteria is met for splitting the loop. + FissionCriteriaFunction split_criteria_; + + // Flag designating whether or not we should also split the result of + // previously split loops if they meet the register presure criteria. + bool split_multiple_times_; +}; + +} // namespace opt +} // namespace spvtools + +#endif // LIBSPIRV_OPT_LOOP_FISSION_H_ diff --git a/source/opt/loop_utils.cpp b/source/opt/loop_utils.cpp index b87f659d..f8ed413f 100644 --- a/source/opt/loop_utils.cpp +++ b/source/opt/loop_utils.cpp @@ -481,6 +481,78 @@ void LoopUtils::MakeLoopClosedSSA() { ir::IRContext::Analysis::kAnalysisLoopAnalysis); } +ir::Loop* LoopUtils::CloneLoop(LoopCloningResult* cloning_result) const { + // Compute the structured order of the loop basic blocks and store it in the + // vector ordered_loop_blocks. + std::vector<ir::BasicBlock*> ordered_loop_blocks; + loop_->ComputeLoopStructuredOrder(&ordered_loop_blocks); + + // Clone the loop. + return CloneLoop(cloning_result, ordered_loop_blocks); +} + +ir::Loop* LoopUtils::CloneAndAttachLoopToHeader( + LoopCloningResult* cloning_result) { + // Clone the loop. + ir::Loop* new_loop = CloneLoop(cloning_result); + + // Create a new exit block/label for the new loop. + std::unique_ptr<ir::Instruction> new_label{new ir::Instruction( + context_, SpvOp::SpvOpLabel, 0, context_->TakeNextId(), {})}; + std::unique_ptr<ir::BasicBlock> new_exit_bb{ + new ir::BasicBlock(std::move(new_label))}; + new_exit_bb->SetParent(loop_->GetMergeBlock()->GetParent()); + + // Create an unconditional branch to the header block. + opt::InstructionBuilder builder{context_, new_exit_bb.get()}; + builder.AddBranch(loop_->GetHeaderBlock()->id()); + + // Save the ids of the new and old merge block. + const uint32_t old_merge_block = loop_->GetMergeBlock()->id(); + const uint32_t new_merge_block = new_exit_bb->id(); + + // Replace the uses of the old merge block in the new loop with the new merge + // block. + for (std::unique_ptr<ir::BasicBlock>& basic_block : + cloning_result->cloned_bb_) { + for (ir::Instruction& inst : *basic_block) { + // For each operand in each instruction check if it is using the old merge + // block and change it to be the new merge block. + auto replace_merge_use = [old_merge_block, + new_merge_block](uint32_t* id) { + if (*id == old_merge_block) *id = new_merge_block; + }; + inst.ForEachInOperand(replace_merge_use); + } + } + + const uint32_t old_header = loop_->GetHeaderBlock()->id(); + const uint32_t new_header = new_loop->GetHeaderBlock()->id(); + opt::analysis::DefUseManager* def_use = context_->get_def_use_mgr(); + + def_use->ForEachUse( + old_header, [new_header, this](ir::Instruction* inst, uint32_t operand) { + if (!this->loop_->IsInsideLoop(inst)) + inst->SetOperand(operand, {new_header}); + }); + + def_use->ForEachUse( + loop_->GetOrCreatePreHeaderBlock()->id(), + [new_merge_block, this](ir::Instruction* inst, uint32_t operand) { + if (this->loop_->IsInsideLoop(inst)) + inst->SetOperand(operand, {new_merge_block}); + + }); + new_loop->SetMergeBlock(new_exit_bb.get()); + + new_loop->SetPreHeaderBlock(loop_->GetPreHeaderBlock()); + + // Add the new block into the cloned instructions. + cloning_result->cloned_bb_.push_back(std::move(new_exit_bb)); + + return new_loop; +} + ir::Loop* LoopUtils::CloneLoop( LoopCloningResult* cloning_result, const std::vector<ir::BasicBlock*>& ordered_loop_blocks) const { @@ -507,14 +579,16 @@ ir::Loop* LoopUtils::CloneLoop( if (loop_->IsInsideLoop(old_bb)) new_loop->AddBasicBlock(new_bb); - for (auto& inst : *new_bb) { - if (inst.HasResultId()) { - uint32_t old_result_id = inst.result_id(); - inst.SetResultId(context_->TakeNextId()); - cloning_result->value_map_[old_result_id] = inst.result_id(); + for (auto new_inst = new_bb->begin(), old_inst = old_bb->begin(); + new_inst != new_bb->end(); ++new_inst, ++old_inst) { + cloning_result->ptr_map_[&*new_inst] = &*old_inst; + if (new_inst->HasResultId()) { + new_inst->SetResultId(context_->TakeNextId()); + cloning_result->value_map_[old_inst->result_id()] = + new_inst->result_id(); // Only look at the defs for now, uses are not updated yet. - def_use_mgr->AnalyzeInstDef(&inst); + def_use_mgr->AnalyzeInstDef(&*new_inst); } } } diff --git a/source/opt/loop_utils.h b/source/opt/loop_utils.h index 47f78e67..de3ff2bf 100644 --- a/source/opt/loop_utils.h +++ b/source/opt/loop_utils.h @@ -46,6 +46,9 @@ class LoopUtils { struct LoopCloningResult { using ValueMapTy = std::unordered_map<uint32_t, uint32_t>; using BlockMapTy = std::unordered_map<uint32_t, ir::BasicBlock*>; + using PtrMap = std::unordered_map<ir::Instruction*, ir::Instruction*>; + + PtrMap ptr_map_; // Mapping between the original loop ids and the new one. ValueMapTy value_map_; @@ -111,6 +114,12 @@ class LoopUtils { ir::Loop* CloneLoop( LoopCloningResult* cloning_result, const std::vector<ir::BasicBlock*>& ordered_loop_blocks) const; + // Clone |loop_| and remap its instructions, as above. Overload to compute + // loop block ordering within method rather than taking in as parameter. + ir::Loop* CloneLoop(LoopCloningResult* cloning_result) const; + + // Clone the |loop_| and make the new loop branch to the second loop on exit. + ir::Loop* CloneAndAttachLoopToHeader(LoopCloningResult* cloning_result); // Perfom a partial unroll of |loop| by given |factor|. This will copy the // body of the loop |factor| times. So a |factor| of one would give a new loop diff --git a/source/opt/optimizer.cpp b/source/opt/optimizer.cpp index 5558fbb3..f184a9a4 100644 --- a/source/opt/optimizer.cpp +++ b/source/opt/optimizer.cpp @@ -379,6 +379,11 @@ Optimizer::PassToken CreateLocalRedundancyEliminationPass() { MakeUnique<opt::LocalRedundancyEliminationPass>()); } +Optimizer::PassToken CreateLoopFissionPass(size_t threshold) { + return MakeUnique<Optimizer::PassToken::Impl>( + MakeUnique<opt::LoopFissionPass>(threshold)); +} + Optimizer::PassToken CreateLoopInvariantCodeMotionPass() { return MakeUnique<Optimizer::PassToken::Impl>(MakeUnique<opt::LICMPass>()); } diff --git a/source/opt/passes.h b/source/opt/passes.h index 4e81864c..4f44d613 100644 --- a/source/opt/passes.h +++ b/source/opt/passes.h @@ -42,6 +42,7 @@ #include "local_single_block_elim_pass.h" #include "local_single_store_elim_pass.h" #include "local_ssa_elim_pass.h" +#include "loop_fission.h" #include "loop_peeling.h" #include "loop_unroller.h" #include "loop_unswitch_pass.h" diff --git a/test/opt/loop_optimizations/CMakeLists.txt b/test/opt/loop_optimizations/CMakeLists.txt index 7b464fa4..26f32386 100644 --- a/test/opt/loop_optimizations/CMakeLists.txt +++ b/test/opt/loop_optimizations/CMakeLists.txt @@ -108,3 +108,11 @@ add_spvtools_unittest(TARGET loop_dependence_analysis_helpers dependence_analysis_helpers.cpp LIBS SPIRV-Tools-opt ) + +add_spvtools_unittest(TARGET loop_fission + SRCS ../function_utils.h + loop_fission.cpp + LIBS SPIRV-Tools-opt +) + + diff --git a/test/opt/loop_optimizations/loop_fission.cpp b/test/opt/loop_optimizations/loop_fission.cpp new file mode 100644 index 00000000..24e89595 --- /dev/null +++ b/test/opt/loop_optimizations/loop_fission.cpp @@ -0,0 +1,3492 @@ +// Copyright (c) 2018 Google LLC. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <string> +#include <vector> + +#include <gmock/gmock.h> + +#include "../assembly_builder.h" +#include "../function_utils.h" +#include "../pass_fixture.h" +#include "../pass_utils.h" +#include "opt/loop_fission.h" +#include "opt/loop_unroller.h" +#include "opt/loop_utils.h" +#include "opt/pass.h" +namespace { + +using namespace spvtools; +using ::testing::UnorderedElementsAre; + +using FissionClassTest = PassTest<::testing::Test>; + +/* +Generated from the following GLSL + +#version 430 + +void main(void) { + float A[10]; + float B[10]; + for (int i = 0; i < 10; i++) { + A[i] = B[i]; + B[i] = A[i]; + } +} + +Result should be equivalent to: + +void main(void) { + float A[10]; + float B[10]; + for (int i = 0; i < 10; i++) { + A[i] = B[i]; + } + + for (int i = 0; i < 10; i++) { + B[i] = A[i]; + } +} +*/ +TEST_F(FissionClassTest, SimpleFission) { + // clang-format off + // With opt::LocalMultiStoreElimPass +const std::string source = R"(OpCapability Shader +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %2 "main" +OpExecutionMode %2 OriginUpperLeft +OpSource GLSL 430 +OpName %2 "main" +OpName %3 "i" +OpName %4 "A" +OpName %5 "B" +%6 = OpTypeVoid +%7 = OpTypeFunction %6 +%8 = OpTypeInt 32 1 +%9 = OpTypePointer Function %8 +%10 = OpConstant %8 0 +%11 = OpConstant %8 10 +%12 = OpTypeBool +%13 = OpTypeFloat 32 +%14 = OpTypeInt 32 0 +%15 = OpConstant %14 10 +%16 = OpTypeArray %13 %15 +%17 = OpTypePointer Function %16 +%18 = OpTypePointer Function %13 +%19 = OpConstant %8 1 +%2 = OpFunction %6 None %7 +%20 = OpLabel +%3 = OpVariable %9 Function +%4 = OpVariable %17 Function +%5 = OpVariable %17 Function +OpBranch %21 +%21 = OpLabel +%22 = OpPhi %8 %10 %20 %23 %24 +OpLoopMerge %25 %24 None +OpBranch %26 +%26 = OpLabel +%27 = OpSLessThan %12 %22 %11 +OpBranchConditional %27 %28 %25 +%28 = OpLabel +%29 = OpAccessChain %18 %5 %22 +%30 = OpLoad %13 %29 +%31 = OpAccessChain %18 %4 %22 +OpStore %31 %30 +%32 = OpAccessChain %18 %4 %22 +%33 = OpLoad %13 %32 +%34 = OpAccessChain %18 %5 %22 +OpStore %34 %33 +OpBranch %24 +%24 = OpLabel +%23 = OpIAdd %8 %22 %19 +OpBranch %21 +%25 = OpLabel +OpReturn +OpFunctionEnd +)"; + +const std::string expected = R"(OpCapability Shader +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %2 "main" +OpExecutionMode %2 OriginUpperLeft +OpSource GLSL 430 +OpName %2 "main" +OpName %3 "i" +OpName %4 "A" +OpName %5 "B" +%6 = OpTypeVoid +%7 = OpTypeFunction %6 +%8 = OpTypeInt 32 1 +%9 = OpTypePointer Function %8 +%10 = OpConstant %8 0 +%11 = OpConstant %8 10 +%12 = OpTypeBool +%13 = OpTypeFloat 32 +%14 = OpTypeInt 32 0 +%15 = OpConstant %14 10 +%16 = OpTypeArray %13 %15 +%17 = OpTypePointer Function %16 +%18 = OpTypePointer Function %13 +%19 = OpConstant %8 1 +%2 = OpFunction %6 None %7 +%20 = OpLabel +%3 = OpVariable %9 Function +%4 = OpVariable %17 Function +%5 = OpVariable %17 Function +OpBranch %35 +%35 = OpLabel +%36 = OpPhi %8 %10 %20 %47 %46 +OpLoopMerge %48 %46 None +OpBranch %37 +%37 = OpLabel +%38 = OpSLessThan %12 %36 %11 +OpBranchConditional %38 %39 %48 +%39 = OpLabel +%40 = OpAccessChain %18 %5 %36 +%41 = OpLoad %13 %40 +%42 = OpAccessChain %18 %4 %36 +OpStore %42 %41 +OpBranch %46 +%46 = OpLabel +%47 = OpIAdd %8 %36 %19 +OpBranch %35 +%48 = OpLabel +OpBranch %21 +%21 = OpLabel +%22 = OpPhi %8 %10 %48 %23 %24 +OpLoopMerge %25 %24 None +OpBranch %26 +%26 = OpLabel +%27 = OpSLessThan %12 %22 %11 +OpBranchConditional %27 %28 %25 +%28 = OpLabel +%32 = OpAccessChain %18 %4 %22 +%33 = OpLoad %13 %32 +%34 = OpAccessChain %18 %5 %22 +OpStore %34 %33 +OpBranch %24 +%24 = OpLabel +%23 = OpIAdd %8 %22 %19 +OpBranch %21 +%25 = OpLabel +OpReturn +OpFunctionEnd +)"; + // clang-format on + std::unique_ptr<ir::IRContext> context = + BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, source, + SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS); + ir::Module* module = context->module(); + EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n" + << source << std::endl; + + SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER); + SinglePassRunAndCheck<opt::LoopFissionPass>(source, expected, true); + + // Check that the loop will NOT be split when provided with a pass-through + // register pressure functor which just returns false. + SinglePassRunAndCheck<opt::LoopFissionPass>( + source, source, true, + [](const opt::RegisterLiveness::RegionRegisterLiveness&) { + return false; + }); +} + +/* +Generated from the following GLSL + +#version 430 + +void main(void) { + float A[10]; + float B[10]; + for (int i = 0; i < 10; i++) { + A[i] = B[i]; + B[i] = A[i+1]; + } +} + +This loop should not be split, as the i+1 dependence would be broken by +splitting the loop. +*/ + +TEST_F(FissionClassTest, FissionInterdependency) { + // clang-format off + // With opt::LocalMultiStoreElimPass + const std::string source = R"(OpCapability Shader +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %2 "main" +OpExecutionMode %2 OriginUpperLeft +OpSource GLSL 430 +OpName %2 "main" +OpName %3 "i" +OpName %4 "A" +OpName %5 "B" +%6 = OpTypeVoid +%7 = OpTypeFunction %6 +%8 = OpTypeInt 32 1 +%9 = OpTypePointer Function %8 +%10 = OpConstant %8 0 +%11 = OpConstant %8 10 +%12 = OpTypeBool +%13 = OpTypeFloat 32 +%14 = OpTypeInt 32 0 +%15 = OpConstant %14 10 +%16 = OpTypeArray %13 %15 +%17 = OpTypePointer Function %16 +%18 = OpTypePointer Function %13 +%19 = OpConstant %8 1 +%2 = OpFunction %6 None %7 +%20 = OpLabel +%3 = OpVariable %9 Function +%4 = OpVariable %17 Function +%5 = OpVariable %17 Function +OpBranch %21 +%21 = OpLabel +%22 = OpPhi %8 %10 %20 %23 %24 +OpLoopMerge %25 %24 None +OpBranch %26 +%26 = OpLabel +%27 = OpSLessThan %12 %22 %11 +OpBranchConditional %27 %28 %25 +%28 = OpLabel +%29 = OpAccessChain %18 %5 %22 +%30 = OpLoad %13 %29 +%31 = OpAccessChain %18 %4 %22 +OpStore %31 %30 +%32 = OpIAdd %8 %22 %19 +%33 = OpAccessChain %18 %4 %32 +%34 = OpLoad %13 %33 +%35 = OpAccessChain %18 %5 %22 +OpStore %35 %34 +OpBranch %24 +%24 = OpLabel +%23 = OpIAdd %8 %22 %19 +OpBranch %21 +%25 = OpLabel +OpReturn +OpFunctionEnd +)"; + // clang-format on + std::unique_ptr<ir::IRContext> context = + BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, source, + SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS); + ir::Module* module = context->module(); + EXPECT_NE(nullptr, module) << "Assembling failed for ushader:\n" + << source << std::endl; + + SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER); + SinglePassRunAndCheck<opt::LoopFissionPass>(source, source, true); +} + +/* +Generated from the following GLSL + +#version 430 + +void main(void) { + float A[10]; + float B[10]; + for (int i = 0; i < 10; i++) { + A[i] = B[i]; + B[i+1] = A[i]; + } +} + + +This should not be split as the load B[i] is dependent on the store B[i+1] +*/ +TEST_F(FissionClassTest, FissionInterdependency2) { + // clang-format off + // With opt::LocalMultiStoreElimPass +const std::string source = R"(OpCapability Shader +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %2 "main" +OpExecutionMode %2 OriginUpperLeft +OpSource GLSL 430 +OpName %2 "main" +OpName %3 "i" +OpName %4 "A" +OpName %5 "B" +%6 = OpTypeVoid +%7 = OpTypeFunction %6 +%8 = OpTypeInt 32 1 +%9 = OpTypePointer Function %8 +%10 = OpConstant %8 0 +%11 = OpConstant %8 10 +%12 = OpTypeBool +%13 = OpTypeFloat 32 +%14 = OpTypeInt 32 0 +%15 = OpConstant %14 10 +%16 = OpTypeArray %13 %15 +%17 = OpTypePointer Function %16 +%18 = OpTypePointer Function %13 +%19 = OpConstant %8 1 +%2 = OpFunction %6 None %7 +%20 = OpLabel +%3 = OpVariable %9 Function +%4 = OpVariable %17 Function +%5 = OpVariable %17 Function +OpBranch %21 +%21 = OpLabel +%22 = OpPhi %8 %10 %20 %23 %24 +OpLoopMerge %25 %24 None +OpBranch %26 +%26 = OpLabel +%27 = OpSLessThan %12 %22 %11 +OpBranchConditional %27 %28 %25 +%28 = OpLabel +%29 = OpAccessChain %18 %5 %22 +%30 = OpLoad %13 %29 +%31 = OpAccessChain %18 %4 %22 +OpStore %31 %30 +%32 = OpIAdd %8 %22 %19 +%33 = OpAccessChain %18 %4 %22 +%34 = OpLoad %13 %33 +%35 = OpAccessChain %18 %5 %32 +OpStore %35 %34 +OpBranch %24 +%24 = OpLabel +%23 = OpIAdd %8 %22 %19 +OpBranch %21 +%25 = OpLabel +OpReturn +OpFunctionEnd +)"; + // clang-format on + std::unique_ptr<ir::IRContext> context = + BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, source, + SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS); + ir::Module* module = context->module(); + EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n" + << source << std::endl; + + SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER); + SinglePassRunAndCheck<opt::LoopFissionPass>(source, source, true); +} + +/* +#version 430 +void main(void) { + float A[10]; + float B[10]; + float C[10] + float D[10] + for (int i = 0; i < 10; i++) { + A[i] = B[i]; + B[i] = A[i]; + C[i] = D[i]; + D[i] = C[i]; + } +} + +This should be split into the equivalent of: + + for (int i = 0; i < 10; i++) { + A[i] = B[i]; + B[i] = A[i]; + } + for (int i = 0; i < 10; i++) { + C[i] = D[i]; + D[i] = C[i]; + } + +We then check that the loop is broken into four for loops like so, if the pass +is run twice: + for (int i = 0; i < 10; i++) + A[i] = B[i]; + for (int i = 0; i < 10; i++) + B[i] = A[i]; + for (int i = 0; i < 10; i++) + C[i] = D[i]; + for (int i = 0; i < 10; i++) + D[i] = C[i]; + +*/ + +TEST_F(FissionClassTest, FissionMultipleLoadStores) { + // clang-format off + // With opt::LocalMultiStoreElimPass + const std::string source = R"( + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %2 "main" + OpExecutionMode %2 OriginUpperLeft + OpSource GLSL 430 + OpName %2 "main" + OpName %3 "i" + OpName %4 "A" + OpName %5 "B" + OpName %6 "C" + OpName %7 "D" + %8 = OpTypeVoid + %9 = OpTypeFunction %8 + %10 = OpTypeInt 32 1 + %11 = OpTypePointer Function %10 + %12 = OpConstant %10 0 + %13 = OpConstant %10 10 + %14 = OpTypeBool + %15 = OpTypeFloat 32 + %16 = OpTypeInt 32 0 + %17 = OpConstant %16 10 + %18 = OpTypeArray %15 %17 + %19 = OpTypePointer Function %18 + %20 = OpTypePointer Function %15 + %21 = OpConstant %10 1 + %2 = OpFunction %8 None %9 + %22 = OpLabel + %3 = OpVariable %11 Function + %4 = OpVariable %19 Function + %5 = OpVariable %19 Function + %6 = OpVariable %19 Function + %7 = OpVariable %19 Function + OpBranch %23 + %23 = OpLabel + %24 = OpPhi %10 %12 %22 %25 %26 + OpLoopMerge %27 %26 None + OpBranch %28 + %28 = OpLabel + %29 = OpSLessThan %14 %24 %13 + OpBranchConditional %29 %30 %27 + %30 = OpLabel + %31 = OpAccessChain %20 %5 %24 + %32 = OpLoad %15 %31 + %33 = OpAccessChain %20 %4 %24 + OpStore %33 %32 + %34 = OpAccessChain %20 %4 %24 + %35 = OpLoad %15 %34 + %36 = OpAccessChain %20 %5 %24 + OpStore %36 %35 + %37 = OpAccessChain %20 %7 %24 + %38 = OpLoad %15 %37 + %39 = OpAccessChain %20 %6 %24 + OpStore %39 %38 + %40 = OpAccessChain %20 %6 %24 + %41 = OpLoad %15 %40 + %42 = OpAccessChain %20 %7 %24 + OpStore %42 %41 + OpBranch %26 + %26 = OpLabel + %25 = OpIAdd %10 %24 %21 + OpBranch %23 + %27 = OpLabel + OpReturn + OpFunctionEnd + )"; + + const std::string expected = R"(OpCapability Shader +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %2 "main" +OpExecutionMode %2 OriginUpperLeft +OpSource GLSL 430 +OpName %2 "main" +OpName %3 "i" +OpName %4 "A" +OpName %5 "B" +OpName %6 "C" +OpName %7 "D" +%8 = OpTypeVoid +%9 = OpTypeFunction %8 +%10 = OpTypeInt 32 1 +%11 = OpTypePointer Function %10 +%12 = OpConstant %10 0 +%13 = OpConstant %10 10 +%14 = OpTypeBool +%15 = OpTypeFloat 32 +%16 = OpTypeInt 32 0 +%17 = OpConstant %16 10 +%18 = OpTypeArray %15 %17 +%19 = OpTypePointer Function %18 +%20 = OpTypePointer Function %15 +%21 = OpConstant %10 1 +%2 = OpFunction %8 None %9 +%22 = OpLabel +%3 = OpVariable %11 Function +%4 = OpVariable %19 Function +%5 = OpVariable %19 Function +%6 = OpVariable %19 Function +%7 = OpVariable %19 Function +OpBranch %43 +%43 = OpLabel +%44 = OpPhi %10 %12 %22 %61 %60 +OpLoopMerge %62 %60 None +OpBranch %45 +%45 = OpLabel +%46 = OpSLessThan %14 %44 %13 +OpBranchConditional %46 %47 %62 +%47 = OpLabel +%48 = OpAccessChain %20 %5 %44 +%49 = OpLoad %15 %48 +%50 = OpAccessChain %20 %4 %44 +OpStore %50 %49 +%51 = OpAccessChain %20 %4 %44 +%52 = OpLoad %15 %51 +%53 = OpAccessChain %20 %5 %44 +OpStore %53 %52 +OpBranch %60 +%60 = OpLabel +%61 = OpIAdd %10 %44 %21 +OpBranch %43 +%62 = OpLabel +OpBranch %23 +%23 = OpLabel +%24 = OpPhi %10 %12 %62 %25 %26 +OpLoopMerge %27 %26 None +OpBranch %28 +%28 = OpLabel +%29 = OpSLessThan %14 %24 %13 +OpBranchConditional %29 %30 %27 +%30 = OpLabel +%37 = OpAccessChain %20 %7 %24 +%38 = OpLoad %15 %37 +%39 = OpAccessChain %20 %6 %24 +OpStore %39 %38 +%40 = OpAccessChain %20 %6 %24 +%41 = OpLoad %15 %40 +%42 = OpAccessChain %20 %7 %24 +OpStore %42 %41 +OpBranch %26 +%26 = OpLabel +%25 = OpIAdd %10 %24 %21 +OpBranch %23 +%27 = OpLabel +OpReturn +OpFunctionEnd +)"; + + +const std::string expected_multiple_passes = R"(OpCapability Shader +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %2 "main" +OpExecutionMode %2 OriginUpperLeft +OpSource GLSL 430 +OpName %2 "main" +OpName %3 "i" +OpName %4 "A" +OpName %5 "B" +OpName %6 "C" +OpName %7 "D" +%8 = OpTypeVoid +%9 = OpTypeFunction %8 +%10 = OpTypeInt 32 1 +%11 = OpTypePointer Function %10 +%12 = OpConstant %10 0 +%13 = OpConstant %10 10 +%14 = OpTypeBool +%15 = OpTypeFloat 32 +%16 = OpTypeInt 32 0 +%17 = OpConstant %16 10 +%18 = OpTypeArray %15 %17 +%19 = OpTypePointer Function %18 +%20 = OpTypePointer Function %15 +%21 = OpConstant %10 1 +%2 = OpFunction %8 None %9 +%22 = OpLabel +%3 = OpVariable %11 Function +%4 = OpVariable %19 Function +%5 = OpVariable %19 Function +%6 = OpVariable %19 Function +%7 = OpVariable %19 Function +OpBranch %63 +%63 = OpLabel +%64 = OpPhi %10 %12 %22 %75 %74 +OpLoopMerge %76 %74 None +OpBranch %65 +%65 = OpLabel +%66 = OpSLessThan %14 %64 %13 +OpBranchConditional %66 %67 %76 +%67 = OpLabel +%68 = OpAccessChain %20 %5 %64 +%69 = OpLoad %15 %68 +%70 = OpAccessChain %20 %4 %64 +OpStore %70 %69 +OpBranch %74 +%74 = OpLabel +%75 = OpIAdd %10 %64 %21 +OpBranch %63 +%76 = OpLabel +OpBranch %43 +%43 = OpLabel +%44 = OpPhi %10 %12 %76 %61 %60 +OpLoopMerge %62 %60 None +OpBranch %45 +%45 = OpLabel +%46 = OpSLessThan %14 %44 %13 +OpBranchConditional %46 %47 %62 +%47 = OpLabel +%51 = OpAccessChain %20 %4 %44 +%52 = OpLoad %15 %51 +%53 = OpAccessChain %20 %5 %44 +OpStore %53 %52 +OpBranch %60 +%60 = OpLabel +%61 = OpIAdd %10 %44 %21 +OpBranch %43 +%62 = OpLabel +OpBranch %77 +%77 = OpLabel +%78 = OpPhi %10 %12 %62 %89 %88 +OpLoopMerge %90 %88 None +OpBranch %79 +%79 = OpLabel +%80 = OpSLessThan %14 %78 %13 +OpBranchConditional %80 %81 %90 +%81 = OpLabel +%82 = OpAccessChain %20 %7 %78 +%83 = OpLoad %15 %82 +%84 = OpAccessChain %20 %6 %78 +OpStore %84 %83 +OpBranch %88 +%88 = OpLabel +%89 = OpIAdd %10 %78 %21 +OpBranch %77 +%90 = OpLabel +OpBranch %23 +%23 = OpLabel +%24 = OpPhi %10 %12 %90 %25 %26 +OpLoopMerge %27 %26 None +OpBranch %28 +%28 = OpLabel +%29 = OpSLessThan %14 %24 %13 +OpBranchConditional %29 %30 %27 +%30 = OpLabel +%40 = OpAccessChain %20 %6 %24 +%41 = OpLoad %15 %40 +%42 = OpAccessChain %20 %7 %24 +OpStore %42 %41 +OpBranch %26 +%26 = OpLabel +%25 = OpIAdd %10 %24 %21 +OpBranch %23 +%27 = OpLabel +OpReturn +OpFunctionEnd +)"; + // clang-format on + std::unique_ptr<ir::IRContext> context = + BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, source, + SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS); + ir::Module* module = context->module(); + EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n" + << source << std::endl; + + SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER); + SinglePassRunAndCheck<opt::LoopFissionPass>(source, expected, true); + + // By passing 1 as argument we are using the constructor which makes the + // critera to split the loop be if the registers in the loop exceede 1. By + // using this constructor we are also enabling multiple passes (disabled by + // default). + SinglePassRunAndCheck<opt::LoopFissionPass>(source, expected_multiple_passes, + true, 1); +} + +/* +#version 430 +void main(void) { + int accumulator = 0; + float X[10]; + float Y[10]; + + for (int i = 0; i < 10; i++) { + X[i] = Y[i]; + Y[i] = X[i]; + accumulator += i; + } +} + +This should be split into the equivalent of: + +#version 430 +void main(void) { + int accumulator = 0; + float X[10]; + float Y[10]; + + for (int i = 0; i < 10; i++) { + X[i] = Y[i]; + } + for (int i = 0; i < 10; i++) { + Y[i] = X[i]; + accumulator += i; + } +} +*/ +TEST_F(FissionClassTest, FissionWithAccumulator) { + // clang-format off + // With opt::LocalMultiStoreElimPass + const std::string source = R"(OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %2 "main" + OpExecutionMode %2 OriginUpperLeft + OpSource GLSL 430 + OpName %2 "main" + OpName %3 "accumulator" + OpName %4 "i" + OpName %5 "X" + OpName %6 "Y" + %7 = OpTypeVoid + %8 = OpTypeFunction %7 + %9 = OpTypeInt 32 1 + %10 = OpTypePointer Function %9 + %11 = OpConstant %9 0 + %12 = OpConstant %9 10 + %13 = OpTypeBool + %14 = OpTypeFloat 32 + %15 = OpTypeInt 32 0 + %16 = OpConstant %15 10 + %17 = OpTypeArray %14 %16 + %18 = OpTypePointer Function %17 + %19 = OpTypePointer Function %14 + %20 = OpConstant %9 1 + %2 = OpFunction %7 None %8 + %21 = OpLabel + %3 = OpVariable %10 Function + %4 = OpVariable %10 Function + %5 = OpVariable %18 Function + %6 = OpVariable %18 Function + OpBranch %22 + %22 = OpLabel + %23 = OpPhi %9 %11 %21 %24 %25 + %26 = OpPhi %9 %11 %21 %27 %25 + OpLoopMerge %28 %25 None + OpBranch %29 + %29 = OpLabel + %30 = OpSLessThan %13 %26 %12 + OpBranchConditional %30 %31 %28 + %31 = OpLabel + %32 = OpAccessChain %19 %6 %26 + %33 = OpLoad %14 %32 + %34 = OpAccessChain %19 %5 %26 + OpStore %34 %33 + %35 = OpAccessChain %19 %5 %26 + %36 = OpLoad %14 %35 + %37 = OpAccessChain %19 %6 %26 + OpStore %37 %36 + %24 = OpIAdd %9 %23 %26 + OpBranch %25 + %25 = OpLabel + %27 = OpIAdd %9 %26 %20 + OpBranch %22 + %28 = OpLabel + OpReturn + OpFunctionEnd + )"; + + const std::string expected = R"(OpCapability Shader +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %2 "main" +OpExecutionMode %2 OriginUpperLeft +OpSource GLSL 430 +OpName %2 "main" +OpName %3 "accumulator" +OpName %4 "i" +OpName %5 "X" +OpName %6 "Y" +%7 = OpTypeVoid +%8 = OpTypeFunction %7 +%9 = OpTypeInt 32 1 +%10 = OpTypePointer Function %9 +%11 = OpConstant %9 0 +%12 = OpConstant %9 10 +%13 = OpTypeBool +%14 = OpTypeFloat 32 +%15 = OpTypeInt 32 0 +%16 = OpConstant %15 10 +%17 = OpTypeArray %14 %16 +%18 = OpTypePointer Function %17 +%19 = OpTypePointer Function %14 +%20 = OpConstant %9 1 +%2 = OpFunction %7 None %8 +%21 = OpLabel +%3 = OpVariable %10 Function +%4 = OpVariable %10 Function +%5 = OpVariable %18 Function +%6 = OpVariable %18 Function +OpBranch %38 +%38 = OpLabel +%40 = OpPhi %9 %11 %21 %52 %51 +OpLoopMerge %53 %51 None +OpBranch %41 +%41 = OpLabel +%42 = OpSLessThan %13 %40 %12 +OpBranchConditional %42 %43 %53 +%43 = OpLabel +%44 = OpAccessChain %19 %6 %40 +%45 = OpLoad %14 %44 +%46 = OpAccessChain %19 %5 %40 +OpStore %46 %45 +OpBranch %51 +%51 = OpLabel +%52 = OpIAdd %9 %40 %20 +OpBranch %38 +%53 = OpLabel +OpBranch %22 +%22 = OpLabel +%23 = OpPhi %9 %11 %53 %24 %25 +%26 = OpPhi %9 %11 %53 %27 %25 +OpLoopMerge %28 %25 None +OpBranch %29 +%29 = OpLabel +%30 = OpSLessThan %13 %26 %12 +OpBranchConditional %30 %31 %28 +%31 = OpLabel +%35 = OpAccessChain %19 %5 %26 +%36 = OpLoad %14 %35 +%37 = OpAccessChain %19 %6 %26 +OpStore %37 %36 +%24 = OpIAdd %9 %23 %26 +OpBranch %25 +%25 = OpLabel +%27 = OpIAdd %9 %26 %20 +OpBranch %22 +%28 = OpLabel +OpReturn +OpFunctionEnd +)"; + // clang-format on + std::unique_ptr<ir::IRContext> context = + BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, source, + SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS); + ir::Module* module = context->module(); + EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n" + << source << std::endl; + + SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER); + SinglePassRunAndCheck<opt::LoopFissionPass>(source, expected, true); +} + +/* +Generated from the following glsl: + +#version 430 +layout(location=0) out float x; +layout(location=1) out float y; + +void main(void) { + float accumulator_1 = 0; + float accumulator_2 = 0; + for (int i = 0; i < 10; i++) { + accumulator_1 += i; + accumulator_2 += i; + } + + x = accumulator_1; + y = accumulator_2; +} + +Should be split into equivalent of: + +void main(void) { + float accumulator_1 = 0; + float accumulator_2 = 0; + for (int i = 0; i < 10; i++) { + accumulator_1 += i; + } + + for (int i = 0; i < 10; i++) { + accumulator_2 += i; + } + x = accumulator_1; + y = accumulator_2; +} + +*/ +TEST_F(FissionClassTest, FissionWithPhisUsedOutwithLoop) { + // clang-format off + // With opt::LocalMultiStoreElimPass + const std::string source = R"(OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %2 "main" %3 %4 + OpExecutionMode %2 OriginUpperLeft + OpSource GLSL 430 + OpName %2 "main" + OpName %5 "accumulator_1" + OpName %6 "accumulator_2" + OpName %7 "i" + OpName %3 "x" + OpName %4 "y" + OpDecorate %3 Location 0 + OpDecorate %4 Location 1 + %8 = OpTypeVoid + %9 = OpTypeFunction %8 + %10 = OpTypeFloat 32 + %11 = OpTypePointer Function %10 + %12 = OpConstant %10 0 + %13 = OpTypeInt 32 1 + %14 = OpTypePointer Function %13 + %15 = OpConstant %13 0 + %16 = OpConstant %13 10 + %17 = OpTypeBool + %18 = OpConstant %13 1 + %19 = OpTypePointer Output %10 + %3 = OpVariable %19 Output + %4 = OpVariable %19 Output + %2 = OpFunction %8 None %9 + %20 = OpLabel + %5 = OpVariable %11 Function + %6 = OpVariable %11 Function + %7 = OpVariable %14 Function + OpBranch %21 + %21 = OpLabel + %22 = OpPhi %10 %12 %20 %23 %24 + %25 = OpPhi %10 %12 %20 %26 %24 + %27 = OpPhi %13 %15 %20 %28 %24 + OpLoopMerge %29 %24 None + OpBranch %30 + %30 = OpLabel + %31 = OpSLessThan %17 %27 %16 + OpBranchConditional %31 %32 %29 + %32 = OpLabel + %33 = OpConvertSToF %10 %27 + %26 = OpFAdd %10 %25 %33 + %34 = OpConvertSToF %10 %27 + %23 = OpFAdd %10 %22 %34 + OpBranch %24 + %24 = OpLabel + %28 = OpIAdd %13 %27 %18 + OpStore %7 %28 + OpBranch %21 + %29 = OpLabel + OpStore %3 %25 + OpStore %4 %22 + OpReturn + OpFunctionEnd + )"; + + const std::string expected = R"(OpCapability Shader +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %2 "main" %3 %4 +OpExecutionMode %2 OriginUpperLeft +OpSource GLSL 430 +OpName %2 "main" +OpName %5 "accumulator_1" +OpName %6 "accumulator_2" +OpName %7 "i" +OpName %3 "x" +OpName %4 "y" +OpDecorate %3 Location 0 +OpDecorate %4 Location 1 +%8 = OpTypeVoid +%9 = OpTypeFunction %8 +%10 = OpTypeFloat 32 +%11 = OpTypePointer Function %10 +%12 = OpConstant %10 0 +%13 = OpTypeInt 32 1 +%14 = OpTypePointer Function %13 +%15 = OpConstant %13 0 +%16 = OpConstant %13 10 +%17 = OpTypeBool +%18 = OpConstant %13 1 +%19 = OpTypePointer Output %10 +%3 = OpVariable %19 Output +%4 = OpVariable %19 Output +%2 = OpFunction %8 None %9 +%20 = OpLabel +%5 = OpVariable %11 Function +%6 = OpVariable %11 Function +%7 = OpVariable %14 Function +OpBranch %35 +%35 = OpLabel +%37 = OpPhi %10 %12 %20 %43 %46 +%38 = OpPhi %13 %15 %20 %47 %46 +OpLoopMerge %48 %46 None +OpBranch %39 +%39 = OpLabel +%40 = OpSLessThan %17 %38 %16 +OpBranchConditional %40 %41 %48 +%41 = OpLabel +%42 = OpConvertSToF %10 %38 +%43 = OpFAdd %10 %37 %42 +OpBranch %46 +%46 = OpLabel +%47 = OpIAdd %13 %38 %18 +OpStore %7 %47 +OpBranch %35 +%48 = OpLabel +OpBranch %21 +%21 = OpLabel +%22 = OpPhi %10 %12 %48 %23 %24 +%27 = OpPhi %13 %15 %48 %28 %24 +OpLoopMerge %29 %24 None +OpBranch %30 +%30 = OpLabel +%31 = OpSLessThan %17 %27 %16 +OpBranchConditional %31 %32 %29 +%32 = OpLabel +%34 = OpConvertSToF %10 %27 +%23 = OpFAdd %10 %22 %34 +OpBranch %24 +%24 = OpLabel +%28 = OpIAdd %13 %27 %18 +OpStore %7 %28 +OpBranch %21 +%29 = OpLabel +OpStore %3 %37 +OpStore %4 %22 +OpReturn +OpFunctionEnd +)"; + + // clang-format on + std::unique_ptr<ir::IRContext> context = + BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, source, + SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS); + ir::Module* module = context->module(); + EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n" + << source << std::endl; + + SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER); + SinglePassRunAndCheck<opt::LoopFissionPass>(source, expected, true); +} + +/* +#version 430 +void main(void) { + float A[10][10]; + float B[10][10]; + for (int i = 0; i < 10; i++) { + for (int j = 0; j < 10; j++) { + A[i][j] = B[i][j]; + B[i][j] = A[i][j]; + } + } +} + +Should be split into equivalent of: + +#version 430 +void main(void) { + float A[10][10]; + float B[10][10]; + for (int i = 0; i < 10; i++) { + for (int j = 0; j < 10; j++) { + A[i][j] = B[i][j]; + } + for (int j = 0; j < 10; j++) { + B[i][j] = A[i][j]; + } + } +} + + +*/ +TEST_F(FissionClassTest, FissionNested) { + // clang-format off + // With opt::LocalMultiStoreElimPass + const std::string source = R"( + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %2 "main" + OpExecutionMode %2 OriginUpperLeft + OpSource GLSL 430 + OpName %2 "main" + OpName %3 "i" + OpName %4 "j" + OpName %5 "A" + OpName %6 "B" + %7 = OpTypeVoid + %8 = OpTypeFunction %7 + %9 = OpTypeInt 32 1 + %10 = OpTypePointer Function %9 + %11 = OpConstant %9 0 + %12 = OpConstant %9 10 + %13 = OpTypeBool + %14 = OpTypeFloat 32 + %15 = OpTypeInt 32 0 + %16 = OpConstant %15 10 + %17 = OpTypeArray %14 %16 + %18 = OpTypeArray %17 %16 + %19 = OpTypePointer Function %18 + %20 = OpTypePointer Function %14 + %21 = OpConstant %9 1 + %2 = OpFunction %7 None %8 + %22 = OpLabel + %3 = OpVariable %10 Function + %4 = OpVariable %10 Function + %5 = OpVariable %19 Function + %6 = OpVariable %19 Function + OpStore %3 %11 + OpBranch %23 + %23 = OpLabel + %24 = OpPhi %9 %11 %22 %25 %26 + OpLoopMerge %27 %26 None + OpBranch %28 + %28 = OpLabel + %29 = OpSLessThan %13 %24 %12 + OpBranchConditional %29 %30 %27 + %30 = OpLabel + OpStore %4 %11 + OpBranch %31 + %31 = OpLabel + %32 = OpPhi %9 %11 %30 %33 %34 + OpLoopMerge %35 %34 None + OpBranch %36 + %36 = OpLabel + %37 = OpSLessThan %13 %32 %12 + OpBranchConditional %37 %38 %35 + %38 = OpLabel + %39 = OpAccessChain %20 %6 %24 %32 + %40 = OpLoad %14 %39 + %41 = OpAccessChain %20 %5 %24 %32 + OpStore %41 %40 + %42 = OpAccessChain %20 %5 %24 %32 + %43 = OpLoad %14 %42 + %44 = OpAccessChain %20 %6 %24 %32 + OpStore %44 %43 + OpBranch %34 + %34 = OpLabel + %33 = OpIAdd %9 %32 %21 + OpStore %4 %33 + OpBranch %31 + %35 = OpLabel + OpBranch %26 + %26 = OpLabel + %25 = OpIAdd %9 %24 %21 + OpStore %3 %25 + OpBranch %23 + %27 = OpLabel + OpReturn + OpFunctionEnd + )"; + + const std::string expected = R"(OpCapability Shader +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %2 "main" +OpExecutionMode %2 OriginUpperLeft +OpSource GLSL 430 +OpName %2 "main" +OpName %3 "i" +OpName %4 "j" +OpName %5 "A" +OpName %6 "B" +%7 = OpTypeVoid +%8 = OpTypeFunction %7 +%9 = OpTypeInt 32 1 +%10 = OpTypePointer Function %9 +%11 = OpConstant %9 0 +%12 = OpConstant %9 10 +%13 = OpTypeBool +%14 = OpTypeFloat 32 +%15 = OpTypeInt 32 0 +%16 = OpConstant %15 10 +%17 = OpTypeArray %14 %16 +%18 = OpTypeArray %17 %16 +%19 = OpTypePointer Function %18 +%20 = OpTypePointer Function %14 +%21 = OpConstant %9 1 +%2 = OpFunction %7 None %8 +%22 = OpLabel +%3 = OpVariable %10 Function +%4 = OpVariable %10 Function +%5 = OpVariable %19 Function +%6 = OpVariable %19 Function +OpStore %3 %11 +OpBranch %23 +%23 = OpLabel +%24 = OpPhi %9 %11 %22 %25 %26 +OpLoopMerge %27 %26 None +OpBranch %28 +%28 = OpLabel +%29 = OpSLessThan %13 %24 %12 +OpBranchConditional %29 %30 %27 +%30 = OpLabel +OpStore %4 %11 +OpBranch %45 +%45 = OpLabel +%46 = OpPhi %9 %11 %30 %57 %56 +OpLoopMerge %58 %56 None +OpBranch %47 +%47 = OpLabel +%48 = OpSLessThan %13 %46 %12 +OpBranchConditional %48 %49 %58 +%49 = OpLabel +%50 = OpAccessChain %20 %6 %24 %46 +%51 = OpLoad %14 %50 +%52 = OpAccessChain %20 %5 %24 %46 +OpStore %52 %51 +OpBranch %56 +%56 = OpLabel +%57 = OpIAdd %9 %46 %21 +OpStore %4 %57 +OpBranch %45 +%58 = OpLabel +OpBranch %31 +%31 = OpLabel +%32 = OpPhi %9 %11 %58 %33 %34 +OpLoopMerge %35 %34 None +OpBranch %36 +%36 = OpLabel +%37 = OpSLessThan %13 %32 %12 +OpBranchConditional %37 %38 %35 +%38 = OpLabel +%42 = OpAccessChain %20 %5 %24 %32 +%43 = OpLoad %14 %42 +%44 = OpAccessChain %20 %6 %24 %32 +OpStore %44 %43 +OpBranch %34 +%34 = OpLabel +%33 = OpIAdd %9 %32 %21 +OpStore %4 %33 +OpBranch %31 +%35 = OpLabel +OpBranch %26 +%26 = OpLabel +%25 = OpIAdd %9 %24 %21 +OpStore %3 %25 +OpBranch %23 +%27 = OpLabel +OpReturn +OpFunctionEnd +)"; + + // clang-format on + std::unique_ptr<ir::IRContext> context = + BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, source, + SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS); + ir::Module* module = context->module(); + EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n" + << source << std::endl; + + SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER); + SinglePassRunAndCheck<opt::LoopFissionPass>(source, expected, true); +} + +/* +#version 430 +void main(void) { + int accumulator = 0; + float A[10]; + float B[10]; + float C[10]; + + for (int i = 0; i < 10; i++) { + int c = C[i]; + A[i] = B[i]; + B[i] = A[i] + c; + } +} + +This loop should not be split as we would have to break the order of the loads +to do so. It would be grouped into two sets: + +1 + int c = C[i]; + B[i] = A[i] + c; + +2 + A[i] = B[i]; + +To keep the load C[i] in the same order we would need to put B[i] ahead of that +*/ +TEST_F(FissionClassTest, FissionLoad) { + // clang-format off + // With opt::LocalMultiStoreElimPass +const std::string source = R"(OpCapability Shader +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %2 "main" +OpExecutionMode %2 OriginUpperLeft +OpSource GLSL 430 +OpName %2 "main" +OpName %3 "i" +OpName %4 "c" +OpName %5 "C" +OpName %6 "A" +OpName %7 "B" +%8 = OpTypeVoid +%9 = OpTypeFunction %8 +%10 = OpTypeInt 32 1 +%11 = OpTypePointer Function %10 +%12 = OpConstant %10 0 +%13 = OpConstant %10 10 +%14 = OpTypeBool +%15 = OpTypeFloat 32 +%16 = OpTypePointer Function %15 +%17 = OpTypeInt 32 0 +%18 = OpConstant %17 10 +%19 = OpTypeArray %15 %18 +%20 = OpTypePointer Function %19 +%21 = OpConstant %10 1 +%2 = OpFunction %8 None %9 +%22 = OpLabel +%3 = OpVariable %11 Function +%4 = OpVariable %16 Function +%5 = OpVariable %20 Function +%6 = OpVariable %20 Function +%7 = OpVariable %20 Function +OpBranch %23 +%23 = OpLabel +%24 = OpPhi %10 %12 %22 %25 %26 +OpLoopMerge %27 %26 None +OpBranch %28 +%28 = OpLabel +%29 = OpSLessThan %14 %24 %13 +OpBranchConditional %29 %30 %27 +%30 = OpLabel +%31 = OpAccessChain %16 %5 %24 +%32 = OpLoad %15 %31 +OpStore %4 %32 +%33 = OpAccessChain %16 %7 %24 +%34 = OpLoad %15 %33 +%35 = OpAccessChain %16 %6 %24 +OpStore %35 %34 +%36 = OpAccessChain %16 %6 %24 +%37 = OpLoad %15 %36 +%38 = OpFAdd %15 %37 %32 +%39 = OpAccessChain %16 %7 %24 +OpStore %39 %38 +OpBranch %26 +%26 = OpLabel +%25 = OpIAdd %10 %24 %21 +OpBranch %23 +%27 = OpLabel +OpReturn +OpFunctionEnd +)"; + // clang-format on + std::unique_ptr<ir::IRContext> context = + BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, source, + SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS); + ir::Module* module = context->module(); + EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n" + << source << std::endl; + + SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER); + SinglePassRunAndCheck<opt::LoopFissionPass>(source, source, true); +} + +/* +#version 430 +layout(location=0) flat in int condition; +void main(void) { + float A[10]; + float B[10]; + + for (int i = 0; i < 10; i++) { + if (condition == 1) + A[i] = B[i]; + else + B[i] = A[i]; + } +} + + +When this is split we leave the condition check and control flow inplace and +leave its removal for dead code elimination. + +#version 430 +layout(location=0) flat in int condition; +void main(void) { + float A[10]; + float B[10]; + + for (int i = 0; i < 10; i++) { + if (condition == 1) + A[i] = B[i]; + else + ; + } + for (int i = 0; i < 10; i++) { + if (condition == 1) + ; + else + B[i] = A[i]; + } +} + + +*/ +TEST_F(FissionClassTest, FissionControlFlow) { + // clang-format off + // With opt::LocalMultiStoreElimPass + const std::string source = R"( + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %2 "main" %3 + OpExecutionMode %2 OriginUpperLeft + OpSource GLSL 430 + OpName %2 "main" + OpName %4 "i" + OpName %3 "condition" + OpName %5 "A" + OpName %6 "B" + OpDecorate %3 Flat + OpDecorate %3 Location 0 + %7 = OpTypeVoid + %8 = OpTypeFunction %7 + %9 = OpTypeInt 32 1 + %10 = OpTypePointer Function %9 + %11 = OpConstant %9 0 + %12 = OpConstant %9 10 + %13 = OpTypeBool + %14 = OpTypePointer Input %9 + %3 = OpVariable %14 Input + %15 = OpConstant %9 1 + %16 = OpTypeFloat 32 + %17 = OpTypeInt 32 0 + %18 = OpConstant %17 10 + %19 = OpTypeArray %16 %18 + %20 = OpTypePointer Function %19 + %21 = OpTypePointer Function %16 + %2 = OpFunction %7 None %8 + %22 = OpLabel + %4 = OpVariable %10 Function + %5 = OpVariable %20 Function + %6 = OpVariable %20 Function + %31 = OpLoad %9 %3 + OpStore %4 %11 + OpBranch %23 + %23 = OpLabel + %24 = OpPhi %9 %11 %22 %25 %26 + OpLoopMerge %27 %26 None + OpBranch %28 + %28 = OpLabel + %29 = OpSLessThan %13 %24 %12 + OpBranchConditional %29 %30 %27 + %30 = OpLabel + %32 = OpIEqual %13 %31 %15 + OpSelectionMerge %33 None + OpBranchConditional %32 %34 %35 + %34 = OpLabel + %36 = OpAccessChain %21 %6 %24 + %37 = OpLoad %16 %36 + %38 = OpAccessChain %21 %5 %24 + OpStore %38 %37 + OpBranch %33 + %35 = OpLabel + %39 = OpAccessChain %21 %5 %24 + %40 = OpLoad %16 %39 + %41 = OpAccessChain %21 %6 %24 + OpStore %41 %40 + OpBranch %33 + %33 = OpLabel + OpBranch %26 + %26 = OpLabel + %25 = OpIAdd %9 %24 %15 + OpStore %4 %25 + OpBranch %23 + %27 = OpLabel + OpReturn + OpFunctionEnd + )"; + + const std::string expected = R"(OpCapability Shader +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %2 "main" %3 +OpExecutionMode %2 OriginUpperLeft +OpSource GLSL 430 +OpName %2 "main" +OpName %4 "i" +OpName %3 "condition" +OpName %5 "A" +OpName %6 "B" +OpDecorate %3 Flat +OpDecorate %3 Location 0 +%7 = OpTypeVoid +%8 = OpTypeFunction %7 +%9 = OpTypeInt 32 1 +%10 = OpTypePointer Function %9 +%11 = OpConstant %9 0 +%12 = OpConstant %9 10 +%13 = OpTypeBool +%14 = OpTypePointer Input %9 +%3 = OpVariable %14 Input +%15 = OpConstant %9 1 +%16 = OpTypeFloat 32 +%17 = OpTypeInt 32 0 +%18 = OpConstant %17 10 +%19 = OpTypeArray %16 %18 +%20 = OpTypePointer Function %19 +%21 = OpTypePointer Function %16 +%2 = OpFunction %7 None %8 +%22 = OpLabel +%4 = OpVariable %10 Function +%5 = OpVariable %20 Function +%6 = OpVariable %20 Function +%23 = OpLoad %9 %3 +OpStore %4 %11 +OpBranch %42 +%42 = OpLabel +%43 = OpPhi %9 %11 %22 %58 %57 +OpLoopMerge %59 %57 None +OpBranch %44 +%44 = OpLabel +%45 = OpSLessThan %13 %43 %12 +OpBranchConditional %45 %46 %59 +%46 = OpLabel +%47 = OpIEqual %13 %23 %15 +OpSelectionMerge %56 None +OpBranchConditional %47 %52 %48 +%48 = OpLabel +OpBranch %56 +%52 = OpLabel +%53 = OpAccessChain %21 %6 %43 +%54 = OpLoad %16 %53 +%55 = OpAccessChain %21 %5 %43 +OpStore %55 %54 +OpBranch %56 +%56 = OpLabel +OpBranch %57 +%57 = OpLabel +%58 = OpIAdd %9 %43 %15 +OpStore %4 %58 +OpBranch %42 +%59 = OpLabel +OpBranch %24 +%24 = OpLabel +%25 = OpPhi %9 %11 %59 %26 %27 +OpLoopMerge %28 %27 None +OpBranch %29 +%29 = OpLabel +%30 = OpSLessThan %13 %25 %12 +OpBranchConditional %30 %31 %28 +%31 = OpLabel +%32 = OpIEqual %13 %23 %15 +OpSelectionMerge %33 None +OpBranchConditional %32 %34 %35 +%34 = OpLabel +OpBranch %33 +%35 = OpLabel +%39 = OpAccessChain %21 %5 %25 +%40 = OpLoad %16 %39 +%41 = OpAccessChain %21 %6 %25 +OpStore %41 %40 +OpBranch %33 +%33 = OpLabel +OpBranch %27 +%27 = OpLabel +%26 = OpIAdd %9 %25 %15 +OpStore %4 %26 +OpBranch %24 +%28 = OpLabel +OpReturn +OpFunctionEnd +)"; + + // clang-format on + std::unique_ptr<ir::IRContext> context = + BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, source, + SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS); + ir::Module* module = context->module(); + EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n" + << source << std::endl; + + SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER); + SinglePassRunAndCheck<opt::LoopFissionPass>(source, expected, true); +} + +/* +#version 430 +void main(void) { + float A[10]; + float B[10]; + for (int i = 0; i < 10; i++) { + if (i == 1) + B[i] = A[i]; + else if (i == 2) + A[i] = B[i]; + else + A[i] = 0; + } +} + +After running the pass with multiple splits enabled (via register threshold of +1) we expect the equivalent of: + +#version 430 +void main(void) { + float A[10]; + float B[10]; + for (int i = 0; i < 10; i++) { + if (i == 1) + B[i] = A[i]; + else if (i == 2) + else + } + for (int i = 0; i < 10; i++) { + if (i == 1) + else if (i == 2) + A[i] = B[i]; + else + } + for (int i = 0; i < 10; i++) { + if (i == 1) + else if (i == 2) + else + A[i] = 0; + } + +} + +*/ +TEST_F(FissionClassTest, FissionControlFlow2) { + // clang-format off + // With opt::LocalMultiStoreElimPass + const std::string source = R"(OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %2 "main" + OpExecutionMode %2 OriginUpperLeft + OpSource GLSL 430 + OpName %2 "main" + OpName %3 "i" + OpName %4 "B" + OpName %5 "A" + %6 = OpTypeVoid + %7 = OpTypeFunction %6 + %8 = OpTypeInt 32 1 + %9 = OpTypePointer Function %8 + %10 = OpConstant %8 0 + %11 = OpConstant %8 10 + %12 = OpTypeBool + %13 = OpConstant %8 1 + %14 = OpTypeFloat 32 + %15 = OpTypeInt 32 0 + %16 = OpConstant %15 10 + %17 = OpTypeArray %14 %16 + %18 = OpTypePointer Function %17 + %19 = OpTypePointer Function %14 + %20 = OpConstant %8 2 + %21 = OpConstant %14 0 + %2 = OpFunction %6 None %7 + %22 = OpLabel + %3 = OpVariable %9 Function + %4 = OpVariable %18 Function + %5 = OpVariable %18 Function + OpStore %3 %10 + OpBranch %23 + %23 = OpLabel + %24 = OpPhi %8 %10 %22 %25 %26 + OpLoopMerge %27 %26 None + OpBranch %28 + %28 = OpLabel + %29 = OpSLessThan %12 %24 %11 + OpBranchConditional %29 %30 %27 + %30 = OpLabel + %31 = OpIEqual %12 %24 %13 + OpSelectionMerge %32 None + OpBranchConditional %31 %33 %34 + %33 = OpLabel + %35 = OpAccessChain %19 %5 %24 + %36 = OpLoad %14 %35 + %37 = OpAccessChain %19 %4 %24 + OpStore %37 %36 + OpBranch %32 + %34 = OpLabel + %38 = OpIEqual %12 %24 %20 + OpSelectionMerge %39 None + OpBranchConditional %38 %40 %41 + %40 = OpLabel + %42 = OpAccessChain %19 %4 %24 + %43 = OpLoad %14 %42 + %44 = OpAccessChain %19 %5 %24 + OpStore %44 %43 + OpBranch %39 + %41 = OpLabel + %45 = OpAccessChain %19 %5 %24 + OpStore %45 %21 + OpBranch %39 + %39 = OpLabel + OpBranch %32 + %32 = OpLabel + OpBranch %26 + %26 = OpLabel + %25 = OpIAdd %8 %24 %13 + OpStore %3 %25 + OpBranch %23 + %27 = OpLabel + OpReturn + OpFunctionEnd + )"; + + const std::string expected = R"(OpCapability Shader +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %2 "main" +OpExecutionMode %2 OriginUpperLeft +OpSource GLSL 430 +OpName %2 "main" +OpName %3 "i" +OpName %4 "B" +OpName %5 "A" +%6 = OpTypeVoid +%7 = OpTypeFunction %6 +%8 = OpTypeInt 32 1 +%9 = OpTypePointer Function %8 +%10 = OpConstant %8 0 +%11 = OpConstant %8 10 +%12 = OpTypeBool +%13 = OpConstant %8 1 +%14 = OpTypeFloat 32 +%15 = OpTypeInt 32 0 +%16 = OpConstant %15 10 +%17 = OpTypeArray %14 %16 +%18 = OpTypePointer Function %17 +%19 = OpTypePointer Function %14 +%20 = OpConstant %8 2 +%21 = OpConstant %14 0 +%2 = OpFunction %6 None %7 +%22 = OpLabel +%3 = OpVariable %9 Function +%4 = OpVariable %18 Function +%5 = OpVariable %18 Function +OpStore %3 %10 +OpBranch %46 +%46 = OpLabel +%47 = OpPhi %8 %10 %22 %67 %66 +OpLoopMerge %68 %66 None +OpBranch %48 +%48 = OpLabel +%49 = OpSLessThan %12 %47 %11 +OpBranchConditional %49 %50 %68 +%50 = OpLabel +%51 = OpIEqual %12 %47 %13 +OpSelectionMerge %65 None +OpBranchConditional %51 %61 %52 +%52 = OpLabel +%53 = OpIEqual %12 %47 %20 +OpSelectionMerge %60 None +OpBranchConditional %53 %56 %54 +%54 = OpLabel +OpBranch %60 +%56 = OpLabel +OpBranch %60 +%60 = OpLabel +OpBranch %65 +%61 = OpLabel +%62 = OpAccessChain %19 %5 %47 +%63 = OpLoad %14 %62 +%64 = OpAccessChain %19 %4 %47 +OpStore %64 %63 +OpBranch %65 +%65 = OpLabel +OpBranch %66 +%66 = OpLabel +%67 = OpIAdd %8 %47 %13 +OpStore %3 %67 +OpBranch %46 +%68 = OpLabel +OpBranch %69 +%69 = OpLabel +%70 = OpPhi %8 %10 %68 %87 %86 +OpLoopMerge %88 %86 None +OpBranch %71 +%71 = OpLabel +%72 = OpSLessThan %12 %70 %11 +OpBranchConditional %72 %73 %88 +%73 = OpLabel +%74 = OpIEqual %12 %70 %13 +OpSelectionMerge %85 None +OpBranchConditional %74 %84 %75 +%75 = OpLabel +%76 = OpIEqual %12 %70 %20 +OpSelectionMerge %83 None +OpBranchConditional %76 %79 %77 +%77 = OpLabel +OpBranch %83 +%79 = OpLabel +%80 = OpAccessChain %19 %4 %70 +%81 = OpLoad %14 %80 +%82 = OpAccessChain %19 %5 %70 +OpStore %82 %81 +OpBranch %83 +%83 = OpLabel +OpBranch %85 +%84 = OpLabel +OpBranch %85 +%85 = OpLabel +OpBranch %86 +%86 = OpLabel +%87 = OpIAdd %8 %70 %13 +OpStore %3 %87 +OpBranch %69 +%88 = OpLabel +OpBranch %23 +%23 = OpLabel +%24 = OpPhi %8 %10 %88 %25 %26 +OpLoopMerge %27 %26 None +OpBranch %28 +%28 = OpLabel +%29 = OpSLessThan %12 %24 %11 +OpBranchConditional %29 %30 %27 +%30 = OpLabel +%31 = OpIEqual %12 %24 %13 +OpSelectionMerge %32 None +OpBranchConditional %31 %33 %34 +%33 = OpLabel +OpBranch %32 +%34 = OpLabel +%38 = OpIEqual %12 %24 %20 +OpSelectionMerge %39 None +OpBranchConditional %38 %40 %41 +%40 = OpLabel +OpBranch %39 +%41 = OpLabel +%45 = OpAccessChain %19 %5 %24 +OpStore %45 %21 +OpBranch %39 +%39 = OpLabel +OpBranch %32 +%32 = OpLabel +OpBranch %26 +%26 = OpLabel +%25 = OpIAdd %8 %24 %13 +OpStore %3 %25 +OpBranch %23 +%27 = OpLabel +OpReturn +OpFunctionEnd +)"; + + // clang-format on + std::unique_ptr<ir::IRContext> context = + BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, source, + SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS); + ir::Module* module = context->module(); + EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n" + << source << std::endl; + + SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER); + SinglePassRunAndCheck<opt::LoopFissionPass>(source, expected, true, 1); +} + +/* +#version 430 +layout(location=0) flat in int condition; +void main(void) { + float A[10]; + float B[10]; + for (int i = 0; i < 10; i++) { + B[i] = A[i]; + memoryBarrier(); + A[i] = B[i]; + } +} + +This should not be split due to the memory barrier. +*/ +TEST_F(FissionClassTest, FissionBarrier) { + // clang-format off + // With opt::LocalMultiStoreElimPass +const std::string source = R"(OpCapability Shader +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %2 "main" %3 +OpExecutionMode %2 OriginUpperLeft +OpSource GLSL 430 +OpName %2 "main" +OpName %4 "i" +OpName %5 "B" +OpName %6 "A" +OpName %3 "condition" +OpDecorate %3 Flat +OpDecorate %3 Location 0 +%7 = OpTypeVoid +%8 = OpTypeFunction %7 +%9 = OpTypeInt 32 1 +%10 = OpTypePointer Function %9 +%11 = OpConstant %9 0 +%12 = OpConstant %9 10 +%13 = OpTypeBool +%14 = OpTypeFloat 32 +%15 = OpTypeInt 32 0 +%16 = OpConstant %15 10 +%17 = OpTypeArray %14 %16 +%18 = OpTypePointer Function %17 +%19 = OpTypePointer Function %14 +%20 = OpConstant %15 1 +%21 = OpConstant %15 4048 +%22 = OpConstant %9 1 +%23 = OpTypePointer Input %9 +%3 = OpVariable %23 Input +%2 = OpFunction %7 None %8 +%24 = OpLabel +%4 = OpVariable %10 Function +%5 = OpVariable %18 Function +%6 = OpVariable %18 Function +OpStore %4 %11 +OpBranch %25 +%25 = OpLabel +%26 = OpPhi %9 %11 %24 %27 %28 +OpLoopMerge %29 %28 None +OpBranch %30 +%30 = OpLabel +%31 = OpSLessThan %13 %26 %12 +OpBranchConditional %31 %32 %29 +%32 = OpLabel +%33 = OpAccessChain %19 %6 %26 +%34 = OpLoad %14 %33 +%35 = OpAccessChain %19 %5 %26 +OpStore %35 %34 +OpMemoryBarrier %20 %21 +%36 = OpAccessChain %19 %5 %26 +%37 = OpLoad %14 %36 +%38 = OpAccessChain %19 %6 %26 +OpStore %38 %37 +OpBranch %28 +%28 = OpLabel +%27 = OpIAdd %9 %26 %22 +OpStore %4 %27 +OpBranch %25 +%29 = OpLabel +OpReturn +OpFunctionEnd +)"; + // clang-format on + std::unique_ptr<ir::IRContext> context = + BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, source, + SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS); + ir::Module* module = context->module(); + EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n" + << source << std::endl; + + SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER); + SinglePassRunAndCheck<opt::LoopFissionPass>(source, source, true); +} + +/* +#version 430 +void main(void) { + float A[10]; + float B[10]; + for (int i = 0; i < 10; i++) { + B[i] = A[i]; + if ( i== 1) + break; + A[i] = B[i]; + } +} + +This should not be split due to the break. +*/ +TEST_F(FissionClassTest, FissionBreak) { + // clang-format off + // With opt::LocalMultiStoreElimPass +const std::string source = R"(OpCapability Shader +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %2 "main" +OpExecutionMode %2 OriginUpperLeft +OpSource GLSL 430 +OpName %2 "main" +OpName %3 "i" +OpName %4 "B" +OpName %5 "A" +%6 = OpTypeVoid +%7 = OpTypeFunction %6 +%8 = OpTypeInt 32 1 +%9 = OpTypePointer Function %8 +%10 = OpConstant %8 0 +%11 = OpConstant %8 10 +%12 = OpTypeBool +%13 = OpTypeFloat 32 +%14 = OpTypeInt 32 0 +%15 = OpConstant %14 10 +%16 = OpTypeArray %13 %15 +%17 = OpTypePointer Function %16 +%18 = OpTypePointer Function %13 +%19 = OpConstant %8 1 +%2 = OpFunction %6 None %7 +%20 = OpLabel +%3 = OpVariable %9 Function +%4 = OpVariable %17 Function +%5 = OpVariable %17 Function +OpStore %3 %10 +OpBranch %21 +%21 = OpLabel +%22 = OpPhi %8 %10 %20 %23 %24 +OpLoopMerge %25 %24 None +OpBranch %26 +%26 = OpLabel +%27 = OpSLessThan %12 %22 %11 +OpBranchConditional %27 %28 %25 +%28 = OpLabel +%29 = OpAccessChain %18 %5 %22 +%30 = OpLoad %13 %29 +%31 = OpAccessChain %18 %4 %22 +OpStore %31 %30 +%32 = OpIEqual %12 %22 %19 +OpSelectionMerge %33 None +OpBranchConditional %32 %34 %33 +%34 = OpLabel +OpBranch %25 +%33 = OpLabel +%35 = OpAccessChain %18 %4 %22 +%36 = OpLoad %13 %35 +%37 = OpAccessChain %18 %5 %22 +OpStore %37 %36 +OpBranch %24 +%24 = OpLabel +%23 = OpIAdd %8 %22 %19 +OpStore %3 %23 +OpBranch %21 +%25 = OpLabel +OpReturn +OpFunctionEnd +)"; + // clang-format on + std::unique_ptr<ir::IRContext> context = + BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, source, + SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS); + ir::Module* module = context->module(); + EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n" + << source << std::endl; + + SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER); + SinglePassRunAndCheck<opt::LoopFissionPass>(source, source, true); +} + +/* +#version 430 +void main(void) { + float A[10]; + float B[10]; + for (int i = 0; i < 10; i++) { + B[i] = A[i]; + if ( i== 1) + continue; + A[i] = B[i]; + } +} + +This loop should be split into: + + for (int i = 0; i < 10; i++) { + B[i] = A[i]; + if ( i== 1) + continue; + } + for (int i = 0; i < 10; i++) { + if ( i== 1) + continue; + A[i] = B[i]; + } +The continue block in the first loop is left to DCE. +} + + +*/ +TEST_F(FissionClassTest, FissionContinue) { + // clang-format off + // With opt::LocalMultiStoreElimPass +const std::string source = R"(OpCapability Shader +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %2 "main" +OpExecutionMode %2 OriginUpperLeft +OpSource GLSL 430 +OpName %2 "main" +OpName %3 "i" +OpName %4 "B" +OpName %5 "A" +%6 = OpTypeVoid +%7 = OpTypeFunction %6 +%8 = OpTypeInt 32 1 +%9 = OpTypePointer Function %8 +%10 = OpConstant %8 0 +%11 = OpConstant %8 10 +%12 = OpTypeBool +%13 = OpTypeFloat 32 +%14 = OpTypeInt 32 0 +%15 = OpConstant %14 10 +%16 = OpTypeArray %13 %15 +%17 = OpTypePointer Function %16 +%18 = OpTypePointer Function %13 +%19 = OpConstant %8 1 +%2 = OpFunction %6 None %7 +%20 = OpLabel +%3 = OpVariable %9 Function +%4 = OpVariable %17 Function +%5 = OpVariable %17 Function +OpStore %3 %10 +OpBranch %21 +%21 = OpLabel +%22 = OpPhi %8 %10 %20 %23 %24 +OpLoopMerge %25 %24 None +OpBranch %26 +%26 = OpLabel +%27 = OpSLessThan %12 %22 %11 +OpBranchConditional %27 %28 %25 +%28 = OpLabel +%29 = OpAccessChain %18 %5 %22 +%30 = OpLoad %13 %29 +%31 = OpAccessChain %18 %4 %22 +OpStore %31 %30 +%32 = OpIEqual %12 %22 %19 +OpSelectionMerge %33 None +OpBranchConditional %32 %34 %33 +%34 = OpLabel +OpBranch %24 +%33 = OpLabel +%35 = OpAccessChain %18 %4 %22 +%36 = OpLoad %13 %35 +%37 = OpAccessChain %18 %5 %22 +OpStore %37 %36 +OpBranch %24 +%24 = OpLabel +%23 = OpIAdd %8 %22 %19 +OpStore %3 %23 +OpBranch %21 +%25 = OpLabel +OpReturn +OpFunctionEnd +)"; + +const std::string expected = R"(OpCapability Shader +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %2 "main" +OpExecutionMode %2 OriginUpperLeft +OpSource GLSL 430 +OpName %2 "main" +OpName %3 "i" +OpName %4 "B" +OpName %5 "A" +%6 = OpTypeVoid +%7 = OpTypeFunction %6 +%8 = OpTypeInt 32 1 +%9 = OpTypePointer Function %8 +%10 = OpConstant %8 0 +%11 = OpConstant %8 10 +%12 = OpTypeBool +%13 = OpTypeFloat 32 +%14 = OpTypeInt 32 0 +%15 = OpConstant %14 10 +%16 = OpTypeArray %13 %15 +%17 = OpTypePointer Function %16 +%18 = OpTypePointer Function %13 +%19 = OpConstant %8 1 +%2 = OpFunction %6 None %7 +%20 = OpLabel +%3 = OpVariable %9 Function +%4 = OpVariable %17 Function +%5 = OpVariable %17 Function +OpStore %3 %10 +OpBranch %38 +%38 = OpLabel +%39 = OpPhi %8 %10 %20 %53 %52 +OpLoopMerge %54 %52 None +OpBranch %40 +%40 = OpLabel +%41 = OpSLessThan %12 %39 %11 +OpBranchConditional %41 %42 %54 +%42 = OpLabel +%43 = OpAccessChain %18 %5 %39 +%44 = OpLoad %13 %43 +%45 = OpAccessChain %18 %4 %39 +OpStore %45 %44 +%46 = OpIEqual %12 %39 %19 +OpSelectionMerge %47 None +OpBranchConditional %46 %51 %47 +%47 = OpLabel +OpBranch %52 +%51 = OpLabel +OpBranch %52 +%52 = OpLabel +%53 = OpIAdd %8 %39 %19 +OpStore %3 %53 +OpBranch %38 +%54 = OpLabel +OpBranch %21 +%21 = OpLabel +%22 = OpPhi %8 %10 %54 %23 %24 +OpLoopMerge %25 %24 None +OpBranch %26 +%26 = OpLabel +%27 = OpSLessThan %12 %22 %11 +OpBranchConditional %27 %28 %25 +%28 = OpLabel +%32 = OpIEqual %12 %22 %19 +OpSelectionMerge %33 None +OpBranchConditional %32 %34 %33 +%34 = OpLabel +OpBranch %24 +%33 = OpLabel +%35 = OpAccessChain %18 %4 %22 +%36 = OpLoad %13 %35 +%37 = OpAccessChain %18 %5 %22 +OpStore %37 %36 +OpBranch %24 +%24 = OpLabel +%23 = OpIAdd %8 %22 %19 +OpStore %3 %23 +OpBranch %21 +%25 = OpLabel +OpReturn +OpFunctionEnd +)"; + // clang-format on + std::unique_ptr<ir::IRContext> context = + BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, source, + SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS); + ir::Module* module = context->module(); + EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n" + << source << std::endl; + + SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER); + SinglePassRunAndCheck<opt::LoopFissionPass>(source, expected, true); +} + +/* +#version 430 +void main(void) { + float A[10]; + float B[10]; + int i = 0; + do { + B[i] = A[i]; + A[i] = B[i]; + ++i; + } while (i < 10); +} + + +Check that this is split into: + int i = 0; + do { + B[i] = A[i]; + ++i; + } while (i < 10); + + i = 0; + do { + A[i] = B[i]; + ++i; + } while (i < 10); + + +*/ +TEST_F(FissionClassTest, FissionDoWhile) { + // clang-format off + // With opt::LocalMultiStoreElimPass +const std::string source = R"(OpCapability Shader +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %2 "main" +OpExecutionMode %2 OriginUpperLeft +OpSource GLSL 430 +OpName %2 "main" +OpName %3 "i" +OpName %4 "B" +OpName %5 "A" +%6 = OpTypeVoid +%7 = OpTypeFunction %6 +%8 = OpTypeInt 32 1 +%9 = OpTypePointer Function %8 +%10 = OpConstant %8 0 +%11 = OpTypeFloat 32 +%12 = OpTypeInt 32 0 +%13 = OpConstant %12 10 +%14 = OpTypeArray %11 %13 +%15 = OpTypePointer Function %14 +%16 = OpTypePointer Function %11 +%17 = OpConstant %8 1 +%18 = OpConstant %8 10 +%19 = OpTypeBool +%2 = OpFunction %6 None %7 +%20 = OpLabel +%3 = OpVariable %9 Function +%4 = OpVariable %15 Function +%5 = OpVariable %15 Function +OpStore %3 %10 +OpBranch %21 +%21 = OpLabel +%22 = OpPhi %8 %10 %20 %23 %24 +OpLoopMerge %25 %24 None +OpBranch %26 +%26 = OpLabel +%27 = OpAccessChain %16 %5 %22 +%28 = OpLoad %11 %27 +%29 = OpAccessChain %16 %4 %22 +OpStore %29 %28 +%30 = OpAccessChain %16 %4 %22 +%31 = OpLoad %11 %30 +%32 = OpAccessChain %16 %5 %22 +OpStore %32 %31 +%23 = OpIAdd %8 %22 %17 +OpStore %3 %23 +OpBranch %24 +%24 = OpLabel +%33 = OpSLessThan %19 %23 %18 +OpBranchConditional %33 %21 %25 +%25 = OpLabel +OpReturn +OpFunctionEnd +)"; + +const std::string expected = R"(OpCapability Shader +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %2 "main" +OpExecutionMode %2 OriginUpperLeft +OpSource GLSL 430 +OpName %2 "main" +OpName %3 "i" +OpName %4 "B" +OpName %5 "A" +%6 = OpTypeVoid +%7 = OpTypeFunction %6 +%8 = OpTypeInt 32 1 +%9 = OpTypePointer Function %8 +%10 = OpConstant %8 0 +%11 = OpTypeFloat 32 +%12 = OpTypeInt 32 0 +%13 = OpConstant %12 10 +%14 = OpTypeArray %11 %13 +%15 = OpTypePointer Function %14 +%16 = OpTypePointer Function %11 +%17 = OpConstant %8 1 +%18 = OpConstant %8 10 +%19 = OpTypeBool +%2 = OpFunction %6 None %7 +%20 = OpLabel +%3 = OpVariable %9 Function +%4 = OpVariable %15 Function +%5 = OpVariable %15 Function +OpStore %3 %10 +OpBranch %34 +%34 = OpLabel +%35 = OpPhi %8 %10 %20 %43 %44 +OpLoopMerge %46 %44 None +OpBranch %36 +%36 = OpLabel +%37 = OpAccessChain %16 %5 %35 +%38 = OpLoad %11 %37 +%39 = OpAccessChain %16 %4 %35 +OpStore %39 %38 +%43 = OpIAdd %8 %35 %17 +OpStore %3 %43 +OpBranch %44 +%44 = OpLabel +%45 = OpSLessThan %19 %43 %18 +OpBranchConditional %45 %34 %46 +%46 = OpLabel +OpBranch %21 +%21 = OpLabel +%22 = OpPhi %8 %10 %46 %23 %24 +OpLoopMerge %25 %24 None +OpBranch %26 +%26 = OpLabel +%30 = OpAccessChain %16 %4 %22 +%31 = OpLoad %11 %30 +%32 = OpAccessChain %16 %5 %22 +OpStore %32 %31 +%23 = OpIAdd %8 %22 %17 +OpStore %3 %23 +OpBranch %24 +%24 = OpLabel +%33 = OpSLessThan %19 %23 %18 +OpBranchConditional %33 %21 %25 +%25 = OpLabel +OpReturn +OpFunctionEnd +)"; + // clang-format on + std::unique_ptr<ir::IRContext> context = + BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, source, + SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS); + ir::Module* module = context->module(); + EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n" + << source << std::endl; + + SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER); + SinglePassRunAndCheck<opt::LoopFissionPass>(source, expected, true); +} + +/* + +#version 430 +void main(void) { + float A[10][10]; + float B[10][10]; + for (int j = 0; j < 10; ++j) { + for (int i = 0; i < 10; ++i) { + B[i][j] = A[i][i]; + A[i][i] = B[i][j + 1]; + } + } +} + + +This loop can't be split because the load B[i][j + 1] is dependent on the store +B[i][j]. + +*/ +TEST_F(FissionClassTest, FissionNestedDependency) { + // clang-format off + // With opt::LocalMultiStoreElimPass +const std::string source = R"(OpCapability Shader +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %2 "main" +OpExecutionMode %2 OriginUpperLeft +OpSource GLSL 430 +OpName %2 "main" +OpName %3 "j" +OpName %4 "i" +OpName %5 "B" +OpName %6 "A" +%7 = OpTypeVoid +%8 = OpTypeFunction %7 +%9 = OpTypeInt 32 1 +%10 = OpTypePointer Function %9 +%11 = OpConstant %9 0 +%12 = OpConstant %9 10 +%13 = OpTypeBool +%14 = OpTypeFloat 32 +%15 = OpTypeInt 32 0 +%16 = OpConstant %15 10 +%17 = OpTypeArray %14 %16 +%18 = OpTypeArray %17 %16 +%19 = OpTypePointer Function %18 +%20 = OpTypePointer Function %14 +%21 = OpConstant %9 1 +%2 = OpFunction %7 None %8 +%22 = OpLabel +%3 = OpVariable %10 Function +%4 = OpVariable %10 Function +%5 = OpVariable %19 Function +%6 = OpVariable %19 Function +OpBranch %23 +%23 = OpLabel +%24 = OpPhi %9 %11 %22 %25 %26 +OpLoopMerge %27 %26 None +OpBranch %28 +%28 = OpLabel +%29 = OpSLessThan %13 %24 %12 +OpBranchConditional %29 %30 %27 +%30 = OpLabel +OpBranch %31 +%31 = OpLabel +%32 = OpPhi %9 %11 %30 %33 %34 +OpLoopMerge %35 %34 None +OpBranch %36 +%36 = OpLabel +%37 = OpSLessThan %13 %32 %12 +OpBranchConditional %37 %38 %35 +%38 = OpLabel +%39 = OpAccessChain %20 %6 %32 %32 +%40 = OpLoad %14 %39 +%41 = OpAccessChain %20 %5 %32 %24 +OpStore %41 %40 +%42 = OpIAdd %9 %24 %21 +%43 = OpAccessChain %20 %5 %32 %42 +%44 = OpLoad %14 %43 +%45 = OpAccessChain %20 %6 %32 %32 +OpStore %45 %44 +OpBranch %34 +%34 = OpLabel +%33 = OpIAdd %9 %32 %21 +OpBranch %31 +%35 = OpLabel +OpBranch %26 +%26 = OpLabel +%25 = OpIAdd %9 %24 %21 +OpBranch %23 +%27 = OpLabel +OpReturn +OpFunctionEnd +)"; + + // clang-format on + std::unique_ptr<ir::IRContext> context = + BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, source, + SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS); + ir::Module* module = context->module(); + EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n" + << source << std::endl; + + SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER); + SinglePassRunAndCheck<opt::LoopFissionPass>(source, source, true); +} + +/* +#version 430 +void main(void) { + float A[10][10]; + float B[10][10]; + for (int j = 0; j < 10; ++j) { + for (int i = 0; i < 10; ++i) { + B[i][i] = A[i][j]; + A[i][j+1] = B[i][i]; + } + } +} + +This loop should not be split as the load A[i][j+1] would be reading a value +written in the store A[i][j] which would be hit before A[i][j+1] if the loops +where split but would not get hit before the read currently. + +*/ +TEST_F(FissionClassTest, FissionNestedDependency2) { + // clang-format off + // With opt::LocalMultiStoreElimPass +const std::string source = R"(OpCapability Shader +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %2 "main" +OpExecutionMode %2 OriginUpperLeft +OpSource GLSL 430 +OpName %2 "main" +OpName %3 "j" +OpName %4 "i" +OpName %5 "B" +OpName %6 "A" +%7 = OpTypeVoid +%8 = OpTypeFunction %7 +%9 = OpTypeInt 32 1 +%10 = OpTypePointer Function %9 +%11 = OpConstant %9 0 +%12 = OpConstant %9 10 +%13 = OpTypeBool +%14 = OpTypeFloat 32 +%15 = OpTypeInt 32 0 +%16 = OpConstant %15 10 +%17 = OpTypeArray %14 %16 +%18 = OpTypeArray %17 %16 +%19 = OpTypePointer Function %18 +%20 = OpTypePointer Function %14 +%21 = OpConstant %9 1 +%2 = OpFunction %7 None %8 +%22 = OpLabel +%3 = OpVariable %10 Function +%4 = OpVariable %10 Function +%5 = OpVariable %19 Function +%6 = OpVariable %19 Function +OpStore %3 %11 +OpBranch %23 +%23 = OpLabel +%24 = OpPhi %9 %11 %22 %25 %26 +OpLoopMerge %27 %26 None +OpBranch %28 +%28 = OpLabel +%29 = OpSLessThan %13 %24 %12 +OpBranchConditional %29 %30 %27 +%30 = OpLabel +OpStore %4 %11 +OpBranch %31 +%31 = OpLabel +%32 = OpPhi %9 %11 %30 %33 %34 +OpLoopMerge %35 %34 None +OpBranch %36 +%36 = OpLabel +%37 = OpSLessThan %13 %32 %12 +OpBranchConditional %37 %38 %35 +%38 = OpLabel +%39 = OpAccessChain %20 %6 %32 %24 +%40 = OpLoad %14 %39 +%41 = OpAccessChain %20 %5 %32 %32 +OpStore %41 %40 +%42 = OpIAdd %9 %24 %21 +%43 = OpAccessChain %20 %5 %32 %32 +%44 = OpLoad %14 %43 +%45 = OpAccessChain %20 %6 %32 %42 +OpStore %45 %44 +OpBranch %34 +%34 = OpLabel +%33 = OpIAdd %9 %32 %21 +OpStore %4 %33 +OpBranch %31 +%35 = OpLabel +OpBranch %26 +%26 = OpLabel +%25 = OpIAdd %9 %24 %21 +OpStore %3 %25 +OpBranch %23 +%27 = OpLabel +OpReturn +OpFunctionEnd +)"; + + // clang-format on + std::unique_ptr<ir::IRContext> context = + BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, source, + SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS); + ir::Module* module = context->module(); + EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n" + << source << std::endl; + + SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER); + SinglePassRunAndCheck<opt::LoopFissionPass>(source, source, true); +} + +/* +#version 430 +void main(void) { + float A[10][10]; + float B[10][10]; + for (int j = 0; j < 10; ++j) { + for (int i = 0; i < 10; ++i) { + B[i][j] = A[i][j]; + A[i][j] = B[i][j]; + } + for (int i = 0; i < 10; ++i) { + B[i][j] = A[i][j]; + A[i][j] = B[i][j]; + } + } +} + + + +Should be split into: + +for (int j = 0; j < 10; ++j) { + for (int i = 0; i < 10; ++i) + B[i][j] = A[i][j]; + for (int i = 0; i < 10; ++i) + A[i][j] = B[i][j]; + for (int i = 0; i < 10; ++i) + B[i][j] = A[i][j]; + for (int i = 0; i < 10; ++i) + A[i][j] = B[i][j]; +*/ +TEST_F(FissionClassTest, FissionMultipleLoopsNested) { + // clang-format off + // With opt::LocalMultiStoreElimPass +const std::string source = R"(OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %2 "main" + OpExecutionMode %2 OriginUpperLeft + OpSource GLSL 430 + OpName %2 "main" + OpName %3 "j" + OpName %4 "i" + OpName %5 "B" + OpName %6 "A" + OpName %7 "i" + %8 = OpTypeVoid + %9 = OpTypeFunction %8 + %10 = OpTypeInt 32 1 + %11 = OpTypePointer Function %10 + %12 = OpConstant %10 0 + %13 = OpConstant %10 10 + %14 = OpTypeBool + %15 = OpTypeFloat 32 + %16 = OpTypeInt 32 0 + %17 = OpConstant %16 10 + %18 = OpTypeArray %15 %17 + %19 = OpTypeArray %18 %17 + %20 = OpTypePointer Function %19 + %21 = OpTypePointer Function %15 + %22 = OpConstant %10 1 + %2 = OpFunction %8 None %9 + %23 = OpLabel + %3 = OpVariable %11 Function + %4 = OpVariable %11 Function + %5 = OpVariable %20 Function + %6 = OpVariable %20 Function + %7 = OpVariable %11 Function + OpStore %3 %12 + OpBranch %24 + %24 = OpLabel + %25 = OpPhi %10 %12 %23 %26 %27 + OpLoopMerge %28 %27 None + OpBranch %29 + %29 = OpLabel + %30 = OpSLessThan %14 %25 %13 + OpBranchConditional %30 %31 %28 + %31 = OpLabel + OpStore %4 %12 + OpBranch %32 + %32 = OpLabel + %33 = OpPhi %10 %12 %31 %34 %35 + OpLoopMerge %36 %35 None + OpBranch %37 + %37 = OpLabel + %38 = OpSLessThan %14 %33 %13 + OpBranchConditional %38 %39 %36 + %39 = OpLabel + %40 = OpAccessChain %21 %6 %33 %25 + %41 = OpLoad %15 %40 + %42 = OpAccessChain %21 %5 %33 %25 + OpStore %42 %41 + %43 = OpAccessChain %21 %5 %33 %25 + %44 = OpLoad %15 %43 + %45 = OpAccessChain %21 %6 %33 %25 + OpStore %45 %44 + OpBranch %35 + %35 = OpLabel + %34 = OpIAdd %10 %33 %22 + OpStore %4 %34 + OpBranch %32 + %36 = OpLabel + OpStore %7 %12 + OpBranch %46 + %46 = OpLabel + %47 = OpPhi %10 %12 %36 %48 %49 + OpLoopMerge %50 %49 None + OpBranch %51 + %51 = OpLabel + %52 = OpSLessThan %14 %47 %13 + OpBranchConditional %52 %53 %50 + %53 = OpLabel + %54 = OpAccessChain %21 %6 %47 %25 + %55 = OpLoad %15 %54 + %56 = OpAccessChain %21 %5 %47 %25 + OpStore %56 %55 + %57 = OpAccessChain %21 %5 %47 %25 + %58 = OpLoad %15 %57 + %59 = OpAccessChain %21 %6 %47 %25 + OpStore %59 %58 + OpBranch %49 + %49 = OpLabel + %48 = OpIAdd %10 %47 %22 + OpStore %7 %48 + OpBranch %46 + %50 = OpLabel + OpBranch %27 + %27 = OpLabel + %26 = OpIAdd %10 %25 %22 + OpStore %3 %26 + OpBranch %24 + %28 = OpLabel + OpReturn + OpFunctionEnd +)"; + +const std::string expected = R"(OpCapability Shader +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %2 "main" +OpExecutionMode %2 OriginUpperLeft +OpSource GLSL 430 +OpName %2 "main" +OpName %3 "j" +OpName %4 "i" +OpName %5 "B" +OpName %6 "A" +OpName %7 "i" +%8 = OpTypeVoid +%9 = OpTypeFunction %8 +%10 = OpTypeInt 32 1 +%11 = OpTypePointer Function %10 +%12 = OpConstant %10 0 +%13 = OpConstant %10 10 +%14 = OpTypeBool +%15 = OpTypeFloat 32 +%16 = OpTypeInt 32 0 +%17 = OpConstant %16 10 +%18 = OpTypeArray %15 %17 +%19 = OpTypeArray %18 %17 +%20 = OpTypePointer Function %19 +%21 = OpTypePointer Function %15 +%22 = OpConstant %10 1 +%2 = OpFunction %8 None %9 +%23 = OpLabel +%3 = OpVariable %11 Function +%4 = OpVariable %11 Function +%5 = OpVariable %20 Function +%6 = OpVariable %20 Function +%7 = OpVariable %11 Function +OpStore %3 %12 +OpBranch %24 +%24 = OpLabel +%25 = OpPhi %10 %12 %23 %26 %27 +OpLoopMerge %28 %27 None +OpBranch %29 +%29 = OpLabel +%30 = OpSLessThan %14 %25 %13 +OpBranchConditional %30 %31 %28 +%31 = OpLabel +OpStore %4 %12 +OpBranch %60 +%60 = OpLabel +%61 = OpPhi %10 %12 %31 %72 %71 +OpLoopMerge %73 %71 None +OpBranch %62 +%62 = OpLabel +%63 = OpSLessThan %14 %61 %13 +OpBranchConditional %63 %64 %73 +%64 = OpLabel +%65 = OpAccessChain %21 %6 %61 %25 +%66 = OpLoad %15 %65 +%67 = OpAccessChain %21 %5 %61 %25 +OpStore %67 %66 +OpBranch %71 +%71 = OpLabel +%72 = OpIAdd %10 %61 %22 +OpStore %4 %72 +OpBranch %60 +%73 = OpLabel +OpBranch %32 +%32 = OpLabel +%33 = OpPhi %10 %12 %73 %34 %35 +OpLoopMerge %36 %35 None +OpBranch %37 +%37 = OpLabel +%38 = OpSLessThan %14 %33 %13 +OpBranchConditional %38 %39 %36 +%39 = OpLabel +%43 = OpAccessChain %21 %5 %33 %25 +%44 = OpLoad %15 %43 +%45 = OpAccessChain %21 %6 %33 %25 +OpStore %45 %44 +OpBranch %35 +%35 = OpLabel +%34 = OpIAdd %10 %33 %22 +OpStore %4 %34 +OpBranch %32 +%36 = OpLabel +OpStore %7 %12 +OpBranch %74 +%74 = OpLabel +%75 = OpPhi %10 %12 %36 %86 %85 +OpLoopMerge %87 %85 None +OpBranch %76 +%76 = OpLabel +%77 = OpSLessThan %14 %75 %13 +OpBranchConditional %77 %78 %87 +%78 = OpLabel +%79 = OpAccessChain %21 %6 %75 %25 +%80 = OpLoad %15 %79 +%81 = OpAccessChain %21 %5 %75 %25 +OpStore %81 %80 +OpBranch %85 +%85 = OpLabel +%86 = OpIAdd %10 %75 %22 +OpStore %7 %86 +OpBranch %74 +%87 = OpLabel +OpBranch %46 +%46 = OpLabel +%47 = OpPhi %10 %12 %87 %48 %49 +OpLoopMerge %50 %49 None +OpBranch %51 +%51 = OpLabel +%52 = OpSLessThan %14 %47 %13 +OpBranchConditional %52 %53 %50 +%53 = OpLabel +%57 = OpAccessChain %21 %5 %47 %25 +%58 = OpLoad %15 %57 +%59 = OpAccessChain %21 %6 %47 %25 +OpStore %59 %58 +OpBranch %49 +%49 = OpLabel +%48 = OpIAdd %10 %47 %22 +OpStore %7 %48 +OpBranch %46 +%50 = OpLabel +OpBranch %27 +%27 = OpLabel +%26 = OpIAdd %10 %25 %22 +OpStore %3 %26 +OpBranch %24 +%28 = OpLabel +OpReturn +OpFunctionEnd +)"; + // clang-format on + std::unique_ptr<ir::IRContext> context = + BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, source, + SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS); + ir::Module* module = context->module(); + EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n" + << source << std::endl; + const ir::Function* function = spvtest::GetFunction(module, 2); + ir::LoopDescriptor& pre_pass_descriptor = + *context->GetLoopDescriptor(function); + EXPECT_EQ(pre_pass_descriptor.NumLoops(), 3u); + EXPECT_EQ(pre_pass_descriptor.pre_begin()->NumImmediateChildren(), 2u); + + // Test that the pass transforms the ir into the expected output. + SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER); + SinglePassRunAndCheck<opt::LoopFissionPass>(source, expected, true); + + // Test that the loop descriptor is correctly maintained and updated by the + // pass. + opt::LoopFissionPass loop_fission{}; + loop_fission.Process(context.get()); + + function = spvtest::GetFunction(module, 2); + ir::LoopDescriptor& post_pass_descriptor = + *context->GetLoopDescriptor(function); + EXPECT_EQ(post_pass_descriptor.NumLoops(), 5u); + EXPECT_EQ(post_pass_descriptor.pre_begin()->NumImmediateChildren(), 4u); +} + +/* +#version 430 +void main(void) { + float A[10][10]; + float B[10][10]; + for (int i = 0; i < 10; ++i) { + B[i][i] = A[i][i]; + A[i][i] = B[i][i]; + } + for (int i = 0; i < 10; ++i) { + B[i][i] = A[i][i]; + A[i][i] = B[i][i] + } +} + + + +Should be split into: + + for (int i = 0; i < 10; ++i) + B[i][i] = A[i][i]; + for (int i = 0; i < 10; ++i) + A[i][i] = B[i][i]; + for (int i = 0; i < 10; ++i) + B[i][i] = A[i][i]; + for (int i = 0; i < 10; ++i) + A[i][i] = B[i][i]; +*/ +TEST_F(FissionClassTest, FissionMultipleLoops) { + // clang-format off + // With opt::LocalMultiStoreElimPass +const std::string source = R"(OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %2 "main" + OpExecutionMode %2 OriginUpperLeft + OpSource GLSL 430 + OpName %2 "main" + OpName %3 "i" + OpName %4 "B" + OpName %5 "A" + OpName %6 "i" + %7 = OpTypeVoid + %8 = OpTypeFunction %7 + %9 = OpTypeInt 32 1 + %10 = OpTypePointer Function %9 + %11 = OpConstant %9 0 + %12 = OpConstant %9 10 + %13 = OpTypeBool + %14 = OpTypeFloat 32 + %15 = OpTypeInt 32 0 + %16 = OpConstant %15 10 + %17 = OpTypeArray %14 %16 + %18 = OpTypePointer Function %17 + %19 = OpTypePointer Function %14 + %20 = OpConstant %9 1 + %2 = OpFunction %7 None %8 + %21 = OpLabel + %3 = OpVariable %10 Function + %4 = OpVariable %18 Function + %5 = OpVariable %18 Function + %6 = OpVariable %10 Function + OpStore %3 %11 + OpBranch %22 + %22 = OpLabel + %23 = OpPhi %9 %11 %21 %24 %25 + OpLoopMerge %26 %25 None + OpBranch %27 + %27 = OpLabel + %28 = OpSLessThan %13 %23 %12 + OpBranchConditional %28 %29 %26 + %29 = OpLabel + %30 = OpAccessChain %19 %5 %23 + %31 = OpLoad %14 %30 + %32 = OpAccessChain %19 %4 %23 + OpStore %32 %31 + %33 = OpAccessChain %19 %4 %23 + %34 = OpLoad %14 %33 + %35 = OpAccessChain %19 %5 %23 + OpStore %35 %34 + OpBranch %25 + %25 = OpLabel + %24 = OpIAdd %9 %23 %20 + OpStore %3 %24 + OpBranch %22 + %26 = OpLabel + OpStore %6 %11 + OpBranch %36 + %36 = OpLabel + %37 = OpPhi %9 %11 %26 %38 %39 + OpLoopMerge %40 %39 None + OpBranch %41 + %41 = OpLabel + %42 = OpSLessThan %13 %37 %12 + OpBranchConditional %42 %43 %40 + %43 = OpLabel + %44 = OpAccessChain %19 %5 %37 + %45 = OpLoad %14 %44 + %46 = OpAccessChain %19 %4 %37 + OpStore %46 %45 + %47 = OpAccessChain %19 %4 %37 + %48 = OpLoad %14 %47 + %49 = OpAccessChain %19 %5 %37 + OpStore %49 %48 + OpBranch %39 + %39 = OpLabel + %38 = OpIAdd %9 %37 %20 + OpStore %6 %38 + OpBranch %36 + %40 = OpLabel + OpReturn + OpFunctionEnd +)"; + +const std::string expected = R"(OpCapability Shader +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %2 "main" +OpExecutionMode %2 OriginUpperLeft +OpSource GLSL 430 +OpName %2 "main" +OpName %3 "i" +OpName %4 "B" +OpName %5 "A" +OpName %6 "i" +%7 = OpTypeVoid +%8 = OpTypeFunction %7 +%9 = OpTypeInt 32 1 +%10 = OpTypePointer Function %9 +%11 = OpConstant %9 0 +%12 = OpConstant %9 10 +%13 = OpTypeBool +%14 = OpTypeFloat 32 +%15 = OpTypeInt 32 0 +%16 = OpConstant %15 10 +%17 = OpTypeArray %14 %16 +%18 = OpTypePointer Function %17 +%19 = OpTypePointer Function %14 +%20 = OpConstant %9 1 +%2 = OpFunction %7 None %8 +%21 = OpLabel +%3 = OpVariable %10 Function +%4 = OpVariable %18 Function +%5 = OpVariable %18 Function +%6 = OpVariable %10 Function +OpStore %3 %11 +OpBranch %64 +%64 = OpLabel +%65 = OpPhi %9 %11 %21 %76 %75 +OpLoopMerge %77 %75 None +OpBranch %66 +%66 = OpLabel +%67 = OpSLessThan %13 %65 %12 +OpBranchConditional %67 %68 %77 +%68 = OpLabel +%69 = OpAccessChain %19 %5 %65 +%70 = OpLoad %14 %69 +%71 = OpAccessChain %19 %4 %65 +OpStore %71 %70 +OpBranch %75 +%75 = OpLabel +%76 = OpIAdd %9 %65 %20 +OpStore %3 %76 +OpBranch %64 +%77 = OpLabel +OpBranch %22 +%22 = OpLabel +%23 = OpPhi %9 %11 %77 %24 %25 +OpLoopMerge %26 %25 None +OpBranch %27 +%27 = OpLabel +%28 = OpSLessThan %13 %23 %12 +OpBranchConditional %28 %29 %26 +%29 = OpLabel +%33 = OpAccessChain %19 %4 %23 +%34 = OpLoad %14 %33 +%35 = OpAccessChain %19 %5 %23 +OpStore %35 %34 +OpBranch %25 +%25 = OpLabel +%24 = OpIAdd %9 %23 %20 +OpStore %3 %24 +OpBranch %22 +%26 = OpLabel +OpStore %6 %11 +OpBranch %50 +%50 = OpLabel +%51 = OpPhi %9 %11 %26 %62 %61 +OpLoopMerge %63 %61 None +OpBranch %52 +%52 = OpLabel +%53 = OpSLessThan %13 %51 %12 +OpBranchConditional %53 %54 %63 +%54 = OpLabel +%55 = OpAccessChain %19 %5 %51 +%56 = OpLoad %14 %55 +%57 = OpAccessChain %19 %4 %51 +OpStore %57 %56 +OpBranch %61 +%61 = OpLabel +%62 = OpIAdd %9 %51 %20 +OpStore %6 %62 +OpBranch %50 +%63 = OpLabel +OpBranch %36 +%36 = OpLabel +%37 = OpPhi %9 %11 %63 %38 %39 +OpLoopMerge %40 %39 None +OpBranch %41 +%41 = OpLabel +%42 = OpSLessThan %13 %37 %12 +OpBranchConditional %42 %43 %40 +%43 = OpLabel +%47 = OpAccessChain %19 %4 %37 +%48 = OpLoad %14 %47 +%49 = OpAccessChain %19 %5 %37 +OpStore %49 %48 +OpBranch %39 +%39 = OpLabel +%38 = OpIAdd %9 %37 %20 +OpStore %6 %38 +OpBranch %36 +%40 = OpLabel +OpReturn +OpFunctionEnd +)"; + // clang-format on + std::unique_ptr<ir::IRContext> context = + BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, source, + SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS); + ir::Module* module = context->module(); + EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n" + << source << std::endl; + + SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER); + SinglePassRunAndCheck<opt::LoopFissionPass>(source, expected, true); + + const ir::Function* function = spvtest::GetFunction(module, 2); + ir::LoopDescriptor& pre_pass_descriptor = + *context->GetLoopDescriptor(function); + EXPECT_EQ(pre_pass_descriptor.NumLoops(), 2u); + EXPECT_EQ(pre_pass_descriptor.pre_begin()->NumImmediateChildren(), 0u); + + // Test that the pass transforms the ir into the expected output. + SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER); + SinglePassRunAndCheck<opt::LoopFissionPass>(source, expected, true); + + // Test that the loop descriptor is correctly maintained and updated by the + // pass. + opt::LoopFissionPass loop_fission{}; + loop_fission.Process(context.get()); + + function = spvtest::GetFunction(module, 2); + ir::LoopDescriptor& post_pass_descriptor = + *context->GetLoopDescriptor(function); + EXPECT_EQ(post_pass_descriptor.NumLoops(), 4u); + EXPECT_EQ(post_pass_descriptor.pre_begin()->NumImmediateChildren(), 0u); +} + +/* +#version 430 +int foo() { return 1; } +void main(void) { + float A[10]; + float B[10]; + for (int i = 0; i < 10; ++i) { + B[i] = A[i]; + foo(); + A[i] = B[i]; + } +} + +This should not be split as it has a function call in it so we can't determine +if it has side effects. +*/ +TEST_F(FissionClassTest, FissionFunctionCall) { + // clang-format off + // With opt::LocalMultiStoreElimPass +const std::string source = R"(OpCapability Shader +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %2 "main" +OpExecutionMode %2 OriginUpperLeft +OpSource GLSL 430 +OpName %2 "main" +OpName %3 "foo(" +OpName %4 "i" +OpName %5 "B" +OpName %6 "A" +%7 = OpTypeVoid +%8 = OpTypeFunction %7 +%9 = OpTypeInt 32 1 +%10 = OpTypeFunction %9 +%11 = OpConstant %9 1 +%12 = OpTypePointer Function %9 +%13 = OpConstant %9 0 +%14 = OpConstant %9 10 +%15 = OpTypeBool +%16 = OpTypeFloat 32 +%17 = OpTypeInt 32 0 +%18 = OpConstant %17 10 +%19 = OpTypeArray %16 %18 +%20 = OpTypePointer Function %19 +%21 = OpTypePointer Function %16 +%2 = OpFunction %7 None %8 +%22 = OpLabel +%4 = OpVariable %12 Function +%5 = OpVariable %20 Function +%6 = OpVariable %20 Function +OpStore %4 %13 +OpBranch %23 +%23 = OpLabel +%24 = OpPhi %9 %13 %22 %25 %26 +OpLoopMerge %27 %26 None +OpBranch %28 +%28 = OpLabel +%29 = OpSLessThan %15 %24 %14 +OpBranchConditional %29 %30 %27 +%30 = OpLabel +%31 = OpAccessChain %21 %6 %24 +%32 = OpLoad %16 %31 +%33 = OpAccessChain %21 %5 %24 +OpStore %33 %32 +%34 = OpFunctionCall %9 %3 +%35 = OpAccessChain %21 %5 %24 +%36 = OpLoad %16 %35 +%37 = OpAccessChain %21 %6 %24 +OpStore %37 %36 +OpBranch %26 +%26 = OpLabel +%25 = OpIAdd %9 %24 %11 +OpStore %4 %25 +OpBranch %23 +%27 = OpLabel +OpReturn +OpFunctionEnd +%3 = OpFunction %9 None %10 +%38 = OpLabel +OpReturnValue %11 +OpFunctionEnd +)"; + + // clang-format on + std::unique_ptr<ir::IRContext> context = + BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, source, + SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS); + ir::Module* module = context->module(); + EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n" + << source << std::endl; + + SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER); + SinglePassRunAndCheck<opt::LoopFissionPass>(source, source, true); +} + +/* +#version 430 +void main(void) { + float A[10]; + float B[10]; + for (int i = 0; i < 10; ++i) { + switch (i) { + case 1: + B[i] = A[i]; + break; + default: + A[i] = B[i]; + } + } +} + +This should be split into: + for (int i = 0; i < 10; ++i) { + switch (i) { + case 1: + break; + default: + A[i] = B[i]; + } + } + + for (int i = 0; i < 10; ++i) { + switch (i) { + case 1: + B[i] = A[i]; + break; + default: + break; + } + } + +*/ +TEST_F(FissionClassTest, FissionSwitchStatement) { + // clang-format off + // With opt::LocalMultiStoreElimPass +const std::string source = R"(OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %2 "main" + OpExecutionMode %2 OriginUpperLeft + OpSource GLSL 430 + OpName %2 "main" + OpName %3 "i" + OpName %4 "B" + OpName %5 "A" + %6 = OpTypeVoid + %7 = OpTypeFunction %6 + %8 = OpTypeInt 32 1 + %9 = OpTypePointer Function %8 + %10 = OpConstant %8 0 + %11 = OpConstant %8 10 + %12 = OpTypeBool + %13 = OpTypeFloat 32 + %14 = OpTypeInt 32 0 + %15 = OpConstant %14 10 + %16 = OpTypeArray %13 %15 + %17 = OpTypePointer Function %16 + %18 = OpTypePointer Function %13 + %19 = OpConstant %8 1 + %2 = OpFunction %6 None %7 + %20 = OpLabel + %3 = OpVariable %9 Function + %4 = OpVariable %17 Function + %5 = OpVariable %17 Function + OpStore %3 %10 + OpBranch %21 + %21 = OpLabel + %22 = OpPhi %8 %10 %20 %23 %24 + OpLoopMerge %25 %24 None + OpBranch %26 + %26 = OpLabel + %27 = OpSLessThan %12 %22 %11 + OpBranchConditional %27 %28 %25 + %28 = OpLabel + OpSelectionMerge %29 None + OpSwitch %22 %30 1 %31 + %30 = OpLabel + %32 = OpAccessChain %18 %4 %22 + %33 = OpLoad %13 %32 + %34 = OpAccessChain %18 %5 %22 + OpStore %34 %33 + OpBranch %29 + %31 = OpLabel + %35 = OpAccessChain %18 %5 %22 + %36 = OpLoad %13 %35 + %37 = OpAccessChain %18 %4 %22 + OpStore %37 %36 + OpBranch %29 + %29 = OpLabel + OpBranch %24 + %24 = OpLabel + %23 = OpIAdd %8 %22 %19 + OpStore %3 %23 + OpBranch %21 + %25 = OpLabel + OpReturn + OpFunctionEnd +)"; + +const std::string expected = R"(OpCapability Shader +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %2 "main" +OpExecutionMode %2 OriginUpperLeft +OpSource GLSL 430 +OpName %2 "main" +OpName %3 "i" +OpName %4 "B" +OpName %5 "A" +%6 = OpTypeVoid +%7 = OpTypeFunction %6 +%8 = OpTypeInt 32 1 +%9 = OpTypePointer Function %8 +%10 = OpConstant %8 0 +%11 = OpConstant %8 10 +%12 = OpTypeBool +%13 = OpTypeFloat 32 +%14 = OpTypeInt 32 0 +%15 = OpConstant %14 10 +%16 = OpTypeArray %13 %15 +%17 = OpTypePointer Function %16 +%18 = OpTypePointer Function %13 +%19 = OpConstant %8 1 +%2 = OpFunction %6 None %7 +%20 = OpLabel +%3 = OpVariable %9 Function +%4 = OpVariable %17 Function +%5 = OpVariable %17 Function +OpStore %3 %10 +OpBranch %38 +%38 = OpLabel +%39 = OpPhi %8 %10 %20 %53 %52 +OpLoopMerge %54 %52 None +OpBranch %40 +%40 = OpLabel +%41 = OpSLessThan %12 %39 %11 +OpBranchConditional %41 %42 %54 +%42 = OpLabel +OpSelectionMerge %51 None +OpSwitch %39 %47 1 %43 +%43 = OpLabel +OpBranch %51 +%47 = OpLabel +%48 = OpAccessChain %18 %4 %39 +%49 = OpLoad %13 %48 +%50 = OpAccessChain %18 %5 %39 +OpStore %50 %49 +OpBranch %51 +%51 = OpLabel +OpBranch %52 +%52 = OpLabel +%53 = OpIAdd %8 %39 %19 +OpStore %3 %53 +OpBranch %38 +%54 = OpLabel +OpBranch %21 +%21 = OpLabel +%22 = OpPhi %8 %10 %54 %23 %24 +OpLoopMerge %25 %24 None +OpBranch %26 +%26 = OpLabel +%27 = OpSLessThan %12 %22 %11 +OpBranchConditional %27 %28 %25 +%28 = OpLabel +OpSelectionMerge %29 None +OpSwitch %22 %30 1 %31 +%30 = OpLabel +OpBranch %29 +%31 = OpLabel +%35 = OpAccessChain %18 %5 %22 +%36 = OpLoad %13 %35 +%37 = OpAccessChain %18 %4 %22 +OpStore %37 %36 +OpBranch %29 +%29 = OpLabel +OpBranch %24 +%24 = OpLabel +%23 = OpIAdd %8 %22 %19 +OpStore %3 %23 +OpBranch %21 +%25 = OpLabel +OpReturn +OpFunctionEnd +)"; + // clang-format on + std::unique_ptr<ir::IRContext> context = + BuildModule(SPV_ENV_UNIVERSAL_1_1, nullptr, source, + SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS); + ir::Module* module = context->module(); + EXPECT_NE(nullptr, module) << "Assembling failed for shader:\n" + << source << std::endl; + + SetDisassembleOptions(SPV_BINARY_TO_TEXT_OPTION_NO_HEADER); + SinglePassRunAndCheck<opt::LoopFissionPass>(source, expected, true); +} + +} // namespace diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp index fec58526..db069c0b 100644 --- a/tools/opt/opt.cpp +++ b/tools/opt/opt.cpp @@ -176,6 +176,10 @@ Options (in lexicographical order): --local-redundancy-elimination Looks for instructions in the same basic block that compute the same value, and deletes the redundant ones. + --loop-fission + Splits any top level loops in which the register pressure has exceeded + a given threshold. The threshold must follow the use of this flag and + must be a positive integer value. --loop-unroll Fully unrolls loops marked with the Unroll flag --loop-unroll-partial @@ -403,6 +407,20 @@ OptStatus ParseOconfigFlag(const char* prog_name, const char* opt_flag, in_file, out_file, nullptr, &skip_validator); } +OptStatus ParseLoopFissionArg(int argc, const char** argv, int argi, + Optimizer* optimizer) { + if (argi < argc) { + int register_threshold_to_split = atoi(argv[argi]); + optimizer->RegisterPass(CreateLoopFissionPass( + static_cast<size_t>(register_threshold_to_split))); + return {OPT_CONTINUE, 0}; + } + fprintf( + stderr, + "error: --loop-fission must be followed by a positive integer value\n"); + return {OPT_STOP, 1}; +} + OptStatus ParseLoopUnrollPartialArg(int argc, const char** argv, int argi, Optimizer* optimizer) { if (argi < argc) { @@ -557,6 +575,11 @@ OptStatus ParseFlags(int argc, const char** argv, Optimizer* optimizer, optimizer->RegisterPass(CreateSSARewritePass()); } else if (0 == strcmp(cur_arg, "--copy-propagate-arrays")) { optimizer->RegisterPass(CreateCopyPropagateArraysPass()); + } else if (0 == strcmp(cur_arg, "--loop-fission")) { + OptStatus status = ParseLoopFissionArg(argc, argv, ++argi, optimizer); + if (status.action != OPT_CONTINUE) { + return status; + } } else if (0 == strcmp(cur_arg, "--loop-unroll")) { optimizer->RegisterPass(CreateLoopUnrollPass(true)); } else if (0 == strcmp(cur_arg, "--vector-dce")) { |