diff options
author | Michael Kruse <llvm@meinersbur.de> | 2019-03-19 03:18:21 +0000 |
---|---|---|
committer | Michael Kruse <llvm@meinersbur.de> | 2019-03-19 03:18:21 +0000 |
commit | 89251edefcb46f0b5e0caf2bb47f38d115e12fa4 (patch) | |
tree | 7e54729d60c01c687dc8e5764cb26cbf0e01a581 /polly/include | |
parent | b9b05100c567d67b237484be950ddf73fbeea797 (diff) | |
download | llvm-project-89251edefcb46f0b5e0caf2bb47f38d115e12fa4.tar.gz |
[CodeGen] LLVM OpenMP Backend.
The ParallelLoopGenerator class is changed such that GNU OpenMP specific
code was removed, allowing to use it as super class in a
template-pattern. Therefore, the code has been reorganized and one may
not use the ParallelLoopGenerator directly anymore, instead specific
implementations have to be provided. These implementations contain the
library-specific code. As such, the "GOMP" (code completely taken from
the existing backend) and "KMP" variant were created.
For "check-polly" all tests that involved "GOMP": equivalents were added
that test the new functionalities, like static scheduling and different
chunk sizes. "docs/UsingPollyWithClang.rst" shows how the alternative
backend may be used.
Patch by Michael Halkenhäuser <michaelhalk@web.de>
Differential Revision: https://reviews.llvm.org/D59100
llvm-svn: 356434
Diffstat (limited to 'polly/include')
-rw-r--r-- | polly/include/polly/CodeGen/LoopGenerators.h | 77 | ||||
-rw-r--r-- | polly/include/polly/CodeGen/LoopGeneratorsGOMP.h | 83 | ||||
-rw-r--r-- | polly/include/polly/CodeGen/LoopGeneratorsKMP.h | 152 |
3 files changed, 276 insertions, 36 deletions
diff --git a/polly/include/polly/CodeGen/LoopGenerators.h b/polly/include/polly/CodeGen/LoopGenerators.h index f41edc50619a..39ff3a78e044 100644 --- a/polly/include/polly/CodeGen/LoopGenerators.h +++ b/polly/include/polly/CodeGen/LoopGenerators.h @@ -28,6 +28,21 @@ class BasicBlock; namespace polly { using namespace llvm; +/// General scheduling types of parallel OpenMP for loops. +/// Initialization values taken from OpenMP's enum in kmp.h: sched_type. +/// Currently, only 'static' scheduling may change from chunked to non-chunked. +enum class OMPGeneralSchedulingType { + StaticChunked = 33, + StaticNonChunked = 34, + Dynamic = 35, + Guided = 36, + Runtime = 37 +}; + +extern int PollyNumThreads; +extern OMPGeneralSchedulingType PollyScheduling; +extern int PollyChunkSize; + /// Create a scalar do/for-style loop. /// /// @param LowerBound The starting value of the induction variable. @@ -132,7 +147,7 @@ public: SetVector<Value *> &Values, ValueMapT &VMap, BasicBlock::iterator *LoopBody); -private: +protected: /// The IR builder we use to create instructions. PollyIRBuilder &Builder; @@ -149,38 +164,6 @@ private: Module *M; public: - /// The functions below can be used if one does not want to generate a - /// specific OpenMP parallel loop, but generate individual parts of it - /// (e.g., the subfunction definition). - - /// Create a runtime library call to spawn the worker threads. - /// - /// @param SubFn The subfunction which holds the loop body. - /// @param SubFnParam The parameter for the subfunction (basically the struct - /// filled with the outside values). - /// @param LB The lower bound for the loop we parallelize. - /// @param UB The upper bound for the loop we parallelize. - /// @param Stride The stride of the loop we parallelize. - void createCallSpawnThreads(Value *SubFn, Value *SubFnParam, Value *LB, - Value *UB, Value *Stride); - - /// Create a runtime library call to join the worker threads. - void createCallJoinThreads(); - - /// Create a runtime library call to get the next work item. - /// - /// @param LBPtr A pointer value to store the work item begin in. - /// @param UBPtr A pointer value to store the work item end in. - /// - /// @returns A true value if the work item is not empty. - Value *createCallGetWorkItem(Value *LBPtr, Value *UBPtr); - - /// Create a runtime library call to allow cleanup of the thread. - /// - /// @note This function is called right before the thread will exit the - /// subfunction and only if the runtime system depends on it. - void createCallCleanupThread(); - /// Create a struct for all @p Values and store them in there. /// /// @param Values The values which should be stored in the struct. @@ -198,8 +181,30 @@ public: Value *Struct, ValueMapT &VMap); /// Create the definition of the parallel subfunction. + /// + /// @return A pointer to the subfunction. Function *createSubFnDefinition(); + /// Create the runtime library calls for spawn and join of the worker threads. + /// Additionally, places a call to the specified subfunction. + /// + /// @param SubFn The subfunction which holds the loop body. + /// @param SubFnParam The parameter for the subfunction (basically the struct + /// filled with the outside values). + /// @param LB The lower bound for the loop we parallelize. + /// @param UB The upper bound for the loop we parallelize. + /// @param Stride The stride of the loop we parallelize. + virtual void deployParallelExecution(Value *SubFn, Value *SubFnParam, + Value *LB, Value *UB, Value *Stride) = 0; + + /// Prepare the definition of the parallel subfunction. + /// Creates the argument list and names them (as well as the subfunction). + /// + /// @param F A pointer to the (parallel) subfunction's parent function. + /// + /// @return The pointer to the (parallel) subfunction. + virtual Function *prepareSubFnDefinition(Function *F) const = 0; + /// Create the parallel subfunction. /// /// @param Stride The induction variable increment. @@ -211,9 +216,9 @@ public: /// @param SubFn The newly created subfunction is returned here. /// /// @return The newly created induction variable. - Value *createSubFn(Value *Stride, AllocaInst *Struct, - SetVector<Value *> UsedValues, ValueMapT &VMap, - Function **SubFn); + virtual std::tuple<Value *, Function *> + createSubFn(Value *Stride, AllocaInst *Struct, SetVector<Value *> UsedValues, + ValueMapT &VMap) = 0; }; } // end namespace polly #endif diff --git a/polly/include/polly/CodeGen/LoopGeneratorsGOMP.h b/polly/include/polly/CodeGen/LoopGeneratorsGOMP.h new file mode 100644 index 000000000000..641d0dd08929 --- /dev/null +++ b/polly/include/polly/CodeGen/LoopGeneratorsGOMP.h @@ -0,0 +1,83 @@ +//===- LoopGeneratorsGOMP.h - IR helper to create loops ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains functions to create scalar and OpenMP parallel loops +// as LLVM-IR. +// +//===----------------------------------------------------------------------===// +#ifndef POLLY_LOOP_GENERATORS_GOMP_H +#define POLLY_LOOP_GENERATORS_GOMP_H + +#include "polly/CodeGen/IRBuilder.h" +#include "polly/CodeGen/LoopGenerators.h" +#include "polly/Support/ScopHelper.h" + +#include "llvm/ADT/SetVector.h" +#include "llvm/IR/ValueMap.h" + +namespace llvm { +class Value; +class Pass; +class BasicBlock; +} // namespace llvm + +namespace polly { +using namespace llvm; + +/// This ParallelLoopGenerator subclass handles the generation of parallelized +/// code, utilizing the GNU OpenMP library. +class ParallelLoopGeneratorGOMP : public ParallelLoopGenerator { +public: + /// Create a parallel loop generator for the current function. + ParallelLoopGeneratorGOMP(PollyIRBuilder &Builder, LoopInfo &LI, + DominatorTree &DT, const DataLayout &DL) + : ParallelLoopGenerator(Builder, LI, DT, DL) {} + + // The functions below may be used if one does not want to generate a + // specific OpenMP parallel loop, but generate individual parts of it + // (e.g. the subfunction definition). + + /// Create a runtime library call to spawn the worker threads. + /// + /// @param SubFn The subfunction which holds the loop body. + /// @param SubFnParam The parameter for the subfunction (basically the struct + /// filled with the outside values). + /// @param LB The lower bound for the loop we parallelize. + /// @param UB The upper bound for the loop we parallelize. + /// @param Stride The stride of the loop we parallelize. + void createCallSpawnThreads(Value *SubFn, Value *SubFnParam, Value *LB, + Value *UB, Value *Stride); + + void deployParallelExecution(Value *SubFn, Value *SubFnParam, Value *LB, + Value *UB, Value *Stride) override; + + virtual Function *prepareSubFnDefinition(Function *F) const override; + + std::tuple<Value *, Function *> createSubFn(Value *Stride, AllocaInst *Struct, + SetVector<Value *> UsedValues, + ValueMapT &VMap) override; + + /// Create a runtime library call to join the worker threads. + void createCallJoinThreads(); + + /// Create a runtime library call to get the next work item. + /// + /// @param LBPtr A pointer value to store the work item begin in. + /// @param UBPtr A pointer value to store the work item end in. + /// + /// @returns A true value if the work item is not empty. + Value *createCallGetWorkItem(Value *LBPtr, Value *UBPtr); + + /// Create a runtime library call to allow cleanup of the thread. + /// + /// @note This function is called right before the thread will exit the + /// subfunction and only if the runtime system depends on it. + void createCallCleanupThread(); +}; +} // end namespace polly +#endif diff --git a/polly/include/polly/CodeGen/LoopGeneratorsKMP.h b/polly/include/polly/CodeGen/LoopGeneratorsKMP.h new file mode 100644 index 000000000000..9adcd56b159f --- /dev/null +++ b/polly/include/polly/CodeGen/LoopGeneratorsKMP.h @@ -0,0 +1,152 @@ +//===- LoopGeneratorsKMP.h - IR helper to create loops ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains functions to create scalar and OpenMP parallel loops +// as LLVM-IR. +// +//===----------------------------------------------------------------------===// +#ifndef POLLY_LOOP_GENERATORS_KMP_H +#define POLLY_LOOP_GENERATORS_KMP_H + +#include "polly/CodeGen/IRBuilder.h" +#include "polly/CodeGen/LoopGenerators.h" +#include "polly/Support/ScopHelper.h" + +#include "llvm/ADT/SetVector.h" +#include "llvm/IR/ValueMap.h" + +namespace llvm { +class Value; +class Pass; +class BasicBlock; +} // namespace llvm + +namespace polly { +using namespace llvm; + +/// This ParallelLoopGenerator subclass handles the generation of parallelized +/// code, utilizing the LLVM OpenMP library. +class ParallelLoopGeneratorKMP : public ParallelLoopGenerator { +public: + /// Create a parallel loop generator for the current function. + ParallelLoopGeneratorKMP(PollyIRBuilder &Builder, LoopInfo &LI, + DominatorTree &DT, const DataLayout &DL) + : ParallelLoopGenerator(Builder, LI, DT, DL) { + SourceLocationInfo = createSourceLocation(); + } + +protected: + /// The source location struct of this loop. + /// ident_t = type { i32, i32, i32, i32, i8* } + GlobalValue *SourceLocationInfo; + + /// Convert the combination of given chunk size and scheduling type (which + /// might have been set via the command line) into the corresponding + /// scheduling type. This may result (e.g.) in a 'change' from + /// "static chunked" scheduling to "static non-chunked" (regarding the + /// provided and returned scheduling types). + /// + /// @param ChunkSize The chunk size, set via command line or its default. + /// @param Scheduling The scheduling, set via command line or its default. + /// + /// @return The corresponding OMPGeneralSchedulingType. + OMPGeneralSchedulingType + getSchedType(int ChunkSize, OMPGeneralSchedulingType Scheduling) const; + + /// Returns True if 'LongType' is 64bit wide, otherwise: False. + bool is64BitArch(); + +public: + // The functions below may be used if one does not want to generate a + // specific OpenMP parallel loop, but generate individual parts of it + // (e.g. the subfunction definition). + + /// Create a runtime library call to spawn the worker threads. + /// + /// @param SubFn The subfunction which holds the loop body. + /// @param SubFnParam The parameter for the subfunction (basically the struct + /// filled with the outside values). + /// @param LB The lower bound for the loop we parallelize. + /// @param UB The upper bound for the loop we parallelize. + /// @param Stride The stride of the loop we parallelize. + void createCallSpawnThreads(Value *SubFn, Value *SubFnParam, Value *LB, + Value *UB, Value *Stride); + + void deployParallelExecution(Value *SubFn, Value *SubFnParam, Value *LB, + Value *UB, Value *Stride) override; + + virtual Function *prepareSubFnDefinition(Function *F) const override; + + std::tuple<Value *, Function *> createSubFn(Value *Stride, AllocaInst *Struct, + SetVector<Value *> UsedValues, + ValueMapT &VMap) override; + + /// Create a runtime library call to get the current global thread number. + /// + /// @return A Value ref which holds the current global thread number. + Value *createCallGlobalThreadNum(); + + /// Create a runtime library call to request a number of threads. + /// Which will be used in the next OpenMP section (by the next fork). + /// + /// @param GlobalThreadID The global thread ID. + /// @param NumThreads The number of threads to use. + void createCallPushNumThreads(Value *GlobalThreadID, Value *NumThreads); + + /// Create a runtime library call to prepare the OpenMP runtime. + /// For dynamically scheduled loops, saving the loop arguments. + /// + /// @param GlobalThreadID The global thread ID. + /// @param LB The loop's lower bound. + /// @param UB The loop's upper bound. + /// @param Inc The loop increment. + /// @param ChunkSize The chunk size of the parallel loop. + void createCallDispatchInit(Value *GlobalThreadID, Value *LB, Value *UB, + Value *Inc, Value *ChunkSize); + + /// Create a runtime library call to retrieve the next (dynamically) + /// allocated chunk of work for this thread. + /// + /// @param GlobalThreadID The global thread ID. + /// @param IsLastPtr Pointer to a flag, which is set to 1 if this is + /// the last chunk of work, or 0 otherwise. + /// @param LBPtr Pointer to the lower bound for the next chunk. + /// @param UBPtr Pointer to the upper bound for the next chunk. + /// @param StridePtr Pointer to the stride for the next chunk. + /// + /// @return A Value which holds 1 if there is work to be done, 0 otherwise. + Value *createCallDispatchNext(Value *GlobalThreadID, Value *IsLastPtr, + Value *LBPtr, Value *UBPtr, Value *StridePtr); + + /// Create a runtime library call to prepare the OpenMP runtime. + /// For statically scheduled loops, saving the loop arguments. + /// + /// @param GlobalThreadID The global thread ID. + /// @param IsLastPtr Pointer to a flag, which is set to 1 if this is + /// the last chunk of work, or 0 otherwise. + /// @param LBPtr Pointer to the lower bound for the next chunk. + /// @param UBPtr Pointer to the upper bound for the next chunk. + /// @param StridePtr Pointer to the stride for the next chunk. + /// @param ChunkSize The chunk size of the parallel loop. + void createCallStaticInit(Value *GlobalThreadID, Value *IsLastPtr, + Value *LBPtr, Value *UBPtr, Value *StridePtr, + Value *ChunkSize); + + /// Create a runtime library call to mark the end of + /// a statically scheduled loop. + /// + /// @param GlobalThreadID The global thread ID. + void createCallStaticFini(Value *GlobalThreadID); + + /// Create the current source location. + /// + /// TODO: Generates only(!) dummy values. + GlobalVariable *createSourceLocation(); +}; +} // end namespace polly +#endif |