1 files changed, 262 insertions, 0 deletions
diff --git a/renderscript-toolkit/src/main/cpp/TaskProcessor.h b/renderscript-toolkit/src/main/cpp/TaskProcessor.h
new file mode 100644
index 0000000..0c59e25
--- /dev/null
+++ b/renderscript-toolkit/src/main/cpp/TaskProcessor.h
@@ -0,0 +1,262 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ANDROID_RENDERSCRIPT_TOOLKIT_TASKPROCESSOR_H
+#define ANDROID_RENDERSCRIPT_TOOLKIT_TASKPROCESSOR_H
+
+// #include <android-base/thread_annotations.h>
+
+#include <atomic>
+#include <condition_variable>
+#include <cstddef>
+#include <mutex>
+#include <thread>
+#include <vector>
+
+namespace renderscript {
+
+/**
+ * Description of the data to be processed for one Toolkit method call, e.g. one blur or one
+ * blend operation.
+ *
+ * The data to be processed is a 2D array of cells. Each cell is a vector of 1 to 4 unsigned bytes.
+ * The most typical configuration is a 2D array of uchar4 used to represent RGBA images.
+ *
+ * This is a base class. There will be a subclass for each Toolkit op.
+ *
+ * Typical usage of a derived class would look like:
+ *    BlurTask task(in, out, sizeX, sizeY, vectorSize, etc);
+ *    processor->doTask(&task);
+ *
+ * The TaskProcessor should call setTiling() and setUsesSimd() once, before calling processTile().
+ * Other classes should not call setTiling(), setUsesSimd(), and processTile().
+ */
+class Task {
+   protected:
+    /**
+     * Number of cells in the X direction.
+     */
+    const size_t mSizeX;
+    /**
+     * Number of cells in the Y direction.
+     */
+    const size_t mSizeY;
+    /**
+     * Number of elements in a vector (cell). From 1-4.
+     */
+    const size_t mVectorSize;
+    /**
+     * Whether the task prefers the processData call to represent the work to be done as
+     * one line rather than a rectangle. This would be the case for work that don't involve
+     * vertical neighbors, e.g. blend or histogram. A task would prefer this to minimize the
+     * number of SIMD calls to make, i.e. have one call that covers all the rows.
+     *
+     * This setting will be used only when a tile covers the entire width of the data to be
+     * processed.
+     */
+    const bool mPrefersDataAsOneRow;
+    /**
+     * Whether the processor we're working on supports SIMD operations.
+     */
+    bool mUsesSimd = false;
+
+   private:
+    /**
+     * If not null, we'll process a subset of the whole 2D array. This specifies the restriction.
+     */
+    const struct Restriction* mRestriction;
+
+    /**
+     * We'll divide the work into rectangular tiles. See setTiling().
+     */
+
+    /**
+     * Size of a tile in the X direction, as a number of cells.
+     */
+    size_t mCellsPerTileX = 0;
+    /**
+     * Size of a tile in the Y direction, as a number of cells.
+     */
+    size_t mCellsPerTileY = 0;
+    /**
+     * Number of tiles per row of the restricted area we're working on.
+     */
+    size_t mTilesPerRow = 0;
+    /**
+     * Number of tiles per column of the restricted area we're working on.
+     */
+    size_t mTilesPerColumn = 0;
+
+   public:
+    /**
+     * Construct a task.
+     *
+     * sizeX and sizeY should be greater than 0. vectorSize should be between 1 and 4.
+     * The restriction should outlive this instance. The Toolkit validates the
+     * arguments so we won't do that again here.
+     */
+    Task(size_t sizeX, size_t sizeY, size_t vectorSize, bool prefersDataAsOneRow,
+         const Restriction* restriction)
+        : mSizeX{sizeX},
+          mSizeY{sizeY},
+          mVectorSize{vectorSize},
+          mPrefersDataAsOneRow{prefersDataAsOneRow},
+          mRestriction{restriction} {}
+    virtual ~Task() {}
+
+    void setUsesSimd(bool uses) { mUsesSimd = uses; }
+
+    /**
+     * Divide the work into a number of tiles that can be distributed to the various threads.
+     * A tile will be a rectangular region. To be robust, we'll want to handle regular cases
+     * like 400x300 but also unusual ones like 1x120000, 120000x1, 1x1.
+     *
+     * We have a target size for the tiles, which corresponds roughly to how much data a thread
+     * will want to process before checking for more work. If the target is set too low, we'll spend
+     * more time in synchronization. If it's too large, some cores may not be used as efficiently.
+     *
+     * This method returns the number of tiles.
+     *
+     * @param targetTileSizeInBytes Target size. Values less than 1000 will be treated as 1000.
+     */
+    int setTiling(unsigned int targetTileSizeInBytes);
+
+    /**
+     * This is called by the TaskProcessor to instruct the task to process a tile.
+     *
+     * @param threadIndex The index of the thread that's processing the tile.
+     * @param tileIndex The index of the tile to process.
+     */
+    void processTile(unsigned int threadIndex, size_t tileIndex);
+
+   private:
+    /**
+     * Call to the derived class to process the data bounded by the rectangle specified
+     * by (startX, startY) and (endX, endY). The end values are EXCLUDED. This rectangle
+     * will be contained with the restriction, if one is provided.
+     */
+    virtual void processData(int threadIndex, size_t startX, size_t startY, size_t endX,
+                             size_t endY) = 0;
+};
+
+/**
+ * There's one instance of the task processor for the Toolkit. This class owns the thread pool,
+ * and dispatches the tiles of work to the threads.
+ */
+class TaskProcessor {
+    /**
+     * Does this processor support SIMD-like instructions?
+     */
+    const bool mUsesSimd;
+    /**
+     * The number of separate threads we'll spawn. It's one less than the number of threads that
+     * do the work as the client thread that starts the work will also be used.
+     */
+    const unsigned int mNumberOfPoolThreads;
+    /**
+     * Ensures that only one task is done at a time.
+     */
+    std::mutex mTaskMutex;
+    /**
+     * Ensures consistent access to the shared queue state.
+     */
+    std::mutex mQueueMutex;
+    /**
+     * The thread pool workers.
+     */
+    std::vector<std::thread> mPoolThreads;
+    /**
+     * The task being processed, if any. We only do one task at a time. We could create a queue
+     * of tasks but using a mTaskMutex is sufficient for now.
+     */
+    Task* mCurrentTask /*GUARDED_BY(mTaskMutex)*/ = nullptr;
+    /**
+     * Signals that the mPoolThreads should terminate.
+     */
+    bool mStopThreads /*GUARDED_BY(mQueueMutex)*/ = false;
+    /**
+     * Signaled when work is available or the mPoolThreads need to shut down. mStopThreads is used
+     * to distinguish between the two.
+     */
+    std::condition_variable mWorkAvailableOrStop;
+    /**
+     * Signaled when the work for the task is finished.
+     */
+    std::condition_variable mWorkIsFinished;
+    /**
+     * A user task, e.g. a blend or a blur, is split into a number of tiles. When a thread starts
+     * working on a new tile, it uses this count to identify which tile to work on. The tile
+     * number is sufficient to determine the boundaries of the data to process.
+     *
+     * The number of tiles left to process.
+     */
+    int mTilesNotYetStarted /*GUARDED_BY(mQueueMutex)*/ = 0;
+    /**
+     * The number of tiles currently being processed. Must not be greater than
+     * mNumberOfPoolThreads + 1.
+     */
+    int mTilesInProcess /*GUARDED_BY(mQueueMutex)*/ = 0;
+
+    /**
+     * Determines how we'll tile the work and signals the thread pool of available work.
+     *
+     * @param task The task to be performed.
+     */
+    void startWork(Task* task) /*REQUIRES(mTaskMutex)*/;
+
+    /**
+     * Tells the thread to start processing work off the queue.
+     *
+     * The flag is used for prevent the main thread from blocking forever if the work is
+     * so trivial that the worker threads complete the work before the main thread calls this
+     * method.
+     *
+     * @param threadIndex The index number (0..mNumberOfPoolThreads) this thread will referred by.
+     * @param returnWhenNoWork If there's no work, return immediately.
+     */
+    void processTilesOfWork(int threadIndex, bool returnWhenNoWork);
+
+    /**
+     * Wait for the pool workers to complete the work on the current task.
+     */
+    void waitForPoolWorkersToComplete();
+
+   public:
+    /**
+     * Create the processor.
+     *
+     * @param numThreads The total number of threads to use. If 0, we'll decided based on system
+     * properties.
+     */
+    explicit TaskProcessor(unsigned int numThreads = 0);
+
+    ~TaskProcessor();
+
+    /**
+     * Do the specified task. Returns only after the task has been completed.
+     */
+    void doTask(Task* task);
+
+    /**
+     * Some Tasks need to allocate temporary storage for each worker thread.
+     * This provides the number of threads.
+     */
+    unsigned int getNumberOfThreads() const { return mNumberOfPoolThreads + 1; }
+};
+
+}  // namespace renderscript
+
+#endif  // ANDROID_RENDERSCRIPT_TOOLKIT_TASKPROCESSOR_H