aboutsummaryrefslogtreecommitdiff
path: root/polly/include
diff options
context:
space:
mode:
authorMichael Kruse <llvm@meinersbur.de>2019-03-19 03:18:21 +0000
committerMichael Kruse <llvm@meinersbur.de>2019-03-19 03:18:21 +0000
commit89251edefcb46f0b5e0caf2bb47f38d115e12fa4 (patch)
tree7e54729d60c01c687dc8e5764cb26cbf0e01a581 /polly/include
parentb9b05100c567d67b237484be950ddf73fbeea797 (diff)
downloadllvm-project-89251edefcb46f0b5e0caf2bb47f38d115e12fa4.tar.gz
[CodeGen] LLVM OpenMP Backend.
The ParallelLoopGenerator class is changed such that GNU OpenMP specific code was removed, allowing to use it as super class in a template-pattern. Therefore, the code has been reorganized and one may not use the ParallelLoopGenerator directly anymore, instead specific implementations have to be provided. These implementations contain the library-specific code. As such, the "GOMP" (code completely taken from the existing backend) and "KMP" variant were created. For "check-polly" all tests that involved "GOMP": equivalents were added that test the new functionalities, like static scheduling and different chunk sizes. "docs/UsingPollyWithClang.rst" shows how the alternative backend may be used. Patch by Michael Halkenhäuser <michaelhalk@web.de> Differential Revision: https://reviews.llvm.org/D59100 llvm-svn: 356434
Diffstat (limited to 'polly/include')
-rw-r--r--polly/include/polly/CodeGen/LoopGenerators.h77
-rw-r--r--polly/include/polly/CodeGen/LoopGeneratorsGOMP.h83
-rw-r--r--polly/include/polly/CodeGen/LoopGeneratorsKMP.h152
3 files changed, 276 insertions, 36 deletions
diff --git a/polly/include/polly/CodeGen/LoopGenerators.h b/polly/include/polly/CodeGen/LoopGenerators.h
index f41edc50619a..39ff3a78e044 100644
--- a/polly/include/polly/CodeGen/LoopGenerators.h
+++ b/polly/include/polly/CodeGen/LoopGenerators.h
@@ -28,6 +28,21 @@ class BasicBlock;
namespace polly {
using namespace llvm;
+/// General scheduling types of parallel OpenMP for loops.
+/// Initialization values taken from OpenMP's enum in kmp.h: sched_type.
+/// Currently, only 'static' scheduling may change from chunked to non-chunked.
+enum class OMPGeneralSchedulingType {
+ StaticChunked = 33,
+ StaticNonChunked = 34,
+ Dynamic = 35,
+ Guided = 36,
+ Runtime = 37
+};
+
+extern int PollyNumThreads;
+extern OMPGeneralSchedulingType PollyScheduling;
+extern int PollyChunkSize;
+
/// Create a scalar do/for-style loop.
///
/// @param LowerBound The starting value of the induction variable.
@@ -132,7 +147,7 @@ public:
SetVector<Value *> &Values, ValueMapT &VMap,
BasicBlock::iterator *LoopBody);
-private:
+protected:
/// The IR builder we use to create instructions.
PollyIRBuilder &Builder;
@@ -149,38 +164,6 @@ private:
Module *M;
public:
- /// The functions below can be used if one does not want to generate a
- /// specific OpenMP parallel loop, but generate individual parts of it
- /// (e.g., the subfunction definition).
-
- /// Create a runtime library call to spawn the worker threads.
- ///
- /// @param SubFn The subfunction which holds the loop body.
- /// @param SubFnParam The parameter for the subfunction (basically the struct
- /// filled with the outside values).
- /// @param LB The lower bound for the loop we parallelize.
- /// @param UB The upper bound for the loop we parallelize.
- /// @param Stride The stride of the loop we parallelize.
- void createCallSpawnThreads(Value *SubFn, Value *SubFnParam, Value *LB,
- Value *UB, Value *Stride);
-
- /// Create a runtime library call to join the worker threads.
- void createCallJoinThreads();
-
- /// Create a runtime library call to get the next work item.
- ///
- /// @param LBPtr A pointer value to store the work item begin in.
- /// @param UBPtr A pointer value to store the work item end in.
- ///
- /// @returns A true value if the work item is not empty.
- Value *createCallGetWorkItem(Value *LBPtr, Value *UBPtr);
-
- /// Create a runtime library call to allow cleanup of the thread.
- ///
- /// @note This function is called right before the thread will exit the
- /// subfunction and only if the runtime system depends on it.
- void createCallCleanupThread();
-
/// Create a struct for all @p Values and store them in there.
///
/// @param Values The values which should be stored in the struct.
@@ -198,8 +181,30 @@ public:
Value *Struct, ValueMapT &VMap);
/// Create the definition of the parallel subfunction.
+ ///
+ /// @return A pointer to the subfunction.
Function *createSubFnDefinition();
+ /// Create the runtime library calls for spawn and join of the worker threads.
+ /// Additionally, places a call to the specified subfunction.
+ ///
+ /// @param SubFn The subfunction which holds the loop body.
+ /// @param SubFnParam The parameter for the subfunction (basically the struct
+ /// filled with the outside values).
+ /// @param LB The lower bound for the loop we parallelize.
+ /// @param UB The upper bound for the loop we parallelize.
+ /// @param Stride The stride of the loop we parallelize.
+ virtual void deployParallelExecution(Value *SubFn, Value *SubFnParam,
+ Value *LB, Value *UB, Value *Stride) = 0;
+
+ /// Prepare the definition of the parallel subfunction.
+ /// Creates the argument list and names them (as well as the subfunction).
+ ///
+ /// @param F A pointer to the (parallel) subfunction's parent function.
+ ///
+ /// @return The pointer to the (parallel) subfunction.
+ virtual Function *prepareSubFnDefinition(Function *F) const = 0;
+
/// Create the parallel subfunction.
///
/// @param Stride The induction variable increment.
@@ -211,9 +216,9 @@ public:
/// @param SubFn The newly created subfunction is returned here.
///
/// @return The newly created induction variable.
- Value *createSubFn(Value *Stride, AllocaInst *Struct,
- SetVector<Value *> UsedValues, ValueMapT &VMap,
- Function **SubFn);
+ virtual std::tuple<Value *, Function *>
+ createSubFn(Value *Stride, AllocaInst *Struct, SetVector<Value *> UsedValues,
+ ValueMapT &VMap) = 0;
};
} // end namespace polly
#endif
diff --git a/polly/include/polly/CodeGen/LoopGeneratorsGOMP.h b/polly/include/polly/CodeGen/LoopGeneratorsGOMP.h
new file mode 100644
index 000000000000..641d0dd08929
--- /dev/null
+++ b/polly/include/polly/CodeGen/LoopGeneratorsGOMP.h
@@ -0,0 +1,83 @@
+//===- LoopGeneratorsGOMP.h - IR helper to create loops ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains functions to create scalar and OpenMP parallel loops
+// as LLVM-IR.
+//
+//===----------------------------------------------------------------------===//
+#ifndef POLLY_LOOP_GENERATORS_GOMP_H
+#define POLLY_LOOP_GENERATORS_GOMP_H
+
+#include "polly/CodeGen/IRBuilder.h"
+#include "polly/CodeGen/LoopGenerators.h"
+#include "polly/Support/ScopHelper.h"
+
+#include "llvm/ADT/SetVector.h"
+#include "llvm/IR/ValueMap.h"
+
+namespace llvm {
+class Value;
+class Pass;
+class BasicBlock;
+} // namespace llvm
+
+namespace polly {
+using namespace llvm;
+
+/// This ParallelLoopGenerator subclass handles the generation of parallelized
+/// code, utilizing the GNU OpenMP library.
+class ParallelLoopGeneratorGOMP : public ParallelLoopGenerator {
+public:
+ /// Create a parallel loop generator for the current function.
+ ParallelLoopGeneratorGOMP(PollyIRBuilder &Builder, LoopInfo &LI,
+ DominatorTree &DT, const DataLayout &DL)
+ : ParallelLoopGenerator(Builder, LI, DT, DL) {}
+
+ // The functions below may be used if one does not want to generate a
+ // specific OpenMP parallel loop, but generate individual parts of it
+ // (e.g. the subfunction definition).
+
+ /// Create a runtime library call to spawn the worker threads.
+ ///
+ /// @param SubFn The subfunction which holds the loop body.
+ /// @param SubFnParam The parameter for the subfunction (basically the struct
+ /// filled with the outside values).
+ /// @param LB The lower bound for the loop we parallelize.
+ /// @param UB The upper bound for the loop we parallelize.
+ /// @param Stride The stride of the loop we parallelize.
+ void createCallSpawnThreads(Value *SubFn, Value *SubFnParam, Value *LB,
+ Value *UB, Value *Stride);
+
+ void deployParallelExecution(Value *SubFn, Value *SubFnParam, Value *LB,
+ Value *UB, Value *Stride) override;
+
+ virtual Function *prepareSubFnDefinition(Function *F) const override;
+
+ std::tuple<Value *, Function *> createSubFn(Value *Stride, AllocaInst *Struct,
+ SetVector<Value *> UsedValues,
+ ValueMapT &VMap) override;
+
+ /// Create a runtime library call to join the worker threads.
+ void createCallJoinThreads();
+
+ /// Create a runtime library call to get the next work item.
+ ///
+ /// @param LBPtr A pointer value to store the work item begin in.
+ /// @param UBPtr A pointer value to store the work item end in.
+ ///
+ /// @returns A true value if the work item is not empty.
+ Value *createCallGetWorkItem(Value *LBPtr, Value *UBPtr);
+
+ /// Create a runtime library call to allow cleanup of the thread.
+ ///
+ /// @note This function is called right before the thread will exit the
+ /// subfunction and only if the runtime system depends on it.
+ void createCallCleanupThread();
+};
+} // end namespace polly
+#endif
diff --git a/polly/include/polly/CodeGen/LoopGeneratorsKMP.h b/polly/include/polly/CodeGen/LoopGeneratorsKMP.h
new file mode 100644
index 000000000000..9adcd56b159f
--- /dev/null
+++ b/polly/include/polly/CodeGen/LoopGeneratorsKMP.h
@@ -0,0 +1,152 @@
+//===- LoopGeneratorsKMP.h - IR helper to create loops ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains functions to create scalar and OpenMP parallel loops
+// as LLVM-IR.
+//
+//===----------------------------------------------------------------------===//
+#ifndef POLLY_LOOP_GENERATORS_KMP_H
+#define POLLY_LOOP_GENERATORS_KMP_H
+
+#include "polly/CodeGen/IRBuilder.h"
+#include "polly/CodeGen/LoopGenerators.h"
+#include "polly/Support/ScopHelper.h"
+
+#include "llvm/ADT/SetVector.h"
+#include "llvm/IR/ValueMap.h"
+
+namespace llvm {
+class Value;
+class Pass;
+class BasicBlock;
+} // namespace llvm
+
+namespace polly {
+using namespace llvm;
+
+/// This ParallelLoopGenerator subclass handles the generation of parallelized
+/// code, utilizing the LLVM OpenMP library.
+class ParallelLoopGeneratorKMP : public ParallelLoopGenerator {
+public:
+ /// Create a parallel loop generator for the current function.
+ ParallelLoopGeneratorKMP(PollyIRBuilder &Builder, LoopInfo &LI,
+ DominatorTree &DT, const DataLayout &DL)
+ : ParallelLoopGenerator(Builder, LI, DT, DL) {
+ SourceLocationInfo = createSourceLocation();
+ }
+
+protected:
+ /// The source location struct of this loop.
+ /// ident_t = type { i32, i32, i32, i32, i8* }
+ GlobalValue *SourceLocationInfo;
+
+ /// Convert the combination of given chunk size and scheduling type (which
+ /// might have been set via the command line) into the corresponding
+ /// scheduling type. This may result (e.g.) in a 'change' from
+ /// "static chunked" scheduling to "static non-chunked" (regarding the
+ /// provided and returned scheduling types).
+ ///
+ /// @param ChunkSize The chunk size, set via command line or its default.
+ /// @param Scheduling The scheduling, set via command line or its default.
+ ///
+ /// @return The corresponding OMPGeneralSchedulingType.
+ OMPGeneralSchedulingType
+ getSchedType(int ChunkSize, OMPGeneralSchedulingType Scheduling) const;
+
+ /// Returns True if 'LongType' is 64bit wide, otherwise: False.
+ bool is64BitArch();
+
+public:
+ // The functions below may be used if one does not want to generate a
+ // specific OpenMP parallel loop, but generate individual parts of it
+ // (e.g. the subfunction definition).
+
+ /// Create a runtime library call to spawn the worker threads.
+ ///
+ /// @param SubFn The subfunction which holds the loop body.
+ /// @param SubFnParam The parameter for the subfunction (basically the struct
+ /// filled with the outside values).
+ /// @param LB The lower bound for the loop we parallelize.
+ /// @param UB The upper bound for the loop we parallelize.
+ /// @param Stride The stride of the loop we parallelize.
+ void createCallSpawnThreads(Value *SubFn, Value *SubFnParam, Value *LB,
+ Value *UB, Value *Stride);
+
+ void deployParallelExecution(Value *SubFn, Value *SubFnParam, Value *LB,
+ Value *UB, Value *Stride) override;
+
+ virtual Function *prepareSubFnDefinition(Function *F) const override;
+
+ std::tuple<Value *, Function *> createSubFn(Value *Stride, AllocaInst *Struct,
+ SetVector<Value *> UsedValues,
+ ValueMapT &VMap) override;
+
+ /// Create a runtime library call to get the current global thread number.
+ ///
+ /// @return A Value ref which holds the current global thread number.
+ Value *createCallGlobalThreadNum();
+
+ /// Create a runtime library call to request a number of threads.
+ /// Which will be used in the next OpenMP section (by the next fork).
+ ///
+ /// @param GlobalThreadID The global thread ID.
+ /// @param NumThreads The number of threads to use.
+ void createCallPushNumThreads(Value *GlobalThreadID, Value *NumThreads);
+
+ /// Create a runtime library call to prepare the OpenMP runtime.
+ /// For dynamically scheduled loops, saving the loop arguments.
+ ///
+ /// @param GlobalThreadID The global thread ID.
+ /// @param LB The loop's lower bound.
+ /// @param UB The loop's upper bound.
+ /// @param Inc The loop increment.
+ /// @param ChunkSize The chunk size of the parallel loop.
+ void createCallDispatchInit(Value *GlobalThreadID, Value *LB, Value *UB,
+ Value *Inc, Value *ChunkSize);
+
+ /// Create a runtime library call to retrieve the next (dynamically)
+ /// allocated chunk of work for this thread.
+ ///
+ /// @param GlobalThreadID The global thread ID.
+ /// @param IsLastPtr Pointer to a flag, which is set to 1 if this is
+ /// the last chunk of work, or 0 otherwise.
+ /// @param LBPtr Pointer to the lower bound for the next chunk.
+ /// @param UBPtr Pointer to the upper bound for the next chunk.
+ /// @param StridePtr Pointer to the stride for the next chunk.
+ ///
+ /// @return A Value which holds 1 if there is work to be done, 0 otherwise.
+ Value *createCallDispatchNext(Value *GlobalThreadID, Value *IsLastPtr,
+ Value *LBPtr, Value *UBPtr, Value *StridePtr);
+
+ /// Create a runtime library call to prepare the OpenMP runtime.
+ /// For statically scheduled loops, saving the loop arguments.
+ ///
+ /// @param GlobalThreadID The global thread ID.
+ /// @param IsLastPtr Pointer to a flag, which is set to 1 if this is
+ /// the last chunk of work, or 0 otherwise.
+ /// @param LBPtr Pointer to the lower bound for the next chunk.
+ /// @param UBPtr Pointer to the upper bound for the next chunk.
+ /// @param StridePtr Pointer to the stride for the next chunk.
+ /// @param ChunkSize The chunk size of the parallel loop.
+ void createCallStaticInit(Value *GlobalThreadID, Value *IsLastPtr,
+ Value *LBPtr, Value *UBPtr, Value *StridePtr,
+ Value *ChunkSize);
+
+ /// Create a runtime library call to mark the end of
+ /// a statically scheduled loop.
+ ///
+ /// @param GlobalThreadID The global thread ID.
+ void createCallStaticFini(Value *GlobalThreadID);
+
+ /// Create the current source location.
+ ///
+ /// TODO: Generates only(!) dummy values.
+ GlobalVariable *createSourceLocation();
+};
+} // end namespace polly
+#endif