summaryrefslogtreecommitdiff
path: root/cpu_ref/rsCpuCore.h
blob: c2a08640036f72b679cea47f9ba6146147a27431 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
/*
 * Copyright (C) 2012 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef RSD_CPU_CORE_H
#define RSD_CPU_CORE_H

#include "rsd_cpu.h"
#include "rsSignal.h"
#include "rsContext.h"
#include "rsCppUtils.h"
#include "rsElement.h"
#include "rsScriptC.h"
#include "rsCpuCoreRuntime.h"

namespace android {
namespace renderscript {

// Whether the CPU we're running on supports SIMD instructions
extern bool gArchUseSIMD;

// Function types found in RenderScript code
typedef void (*ReduceFunc_t)(const uint8_t *inBuf, uint8_t *outBuf, uint32_t len);
typedef void (*ReduceNewAccumulatorFunc_t)(const RsExpandKernelDriverInfo *info, uint32_t x1, uint32_t x2, uint8_t *accum);
typedef void (*ReduceNewCombinerFunc_t)(uint8_t *accum, const uint8_t *other);
typedef void (*ReduceNewInitializerFunc_t)(uint8_t *accum);
typedef void (*ReduceNewOutConverterFunc_t)(uint8_t *out, const uint8_t *accum);
typedef void (*ForEachFunc_t)(const RsExpandKernelDriverInfo *info, uint32_t x1, uint32_t x2, uint32_t outStride);
typedef void (*InvokeFunc_t)(void *params);
typedef void (*InitOrDtorFunc_t)(void);
typedef int  (*RootFunc_t)(void);

struct ReduceNewDescription {
    ReduceNewAccumulatorFunc_t  accumFunc;  // expanded accumulator function
    ReduceNewInitializerFunc_t  initFunc;   // user initializer function
    ReduceNewCombinerFunc_t     combFunc;   // user combiner function
    ReduceNewOutConverterFunc_t outFunc;    // user outconverter function
    size_t                      accumSize;  // accumulator datum size, in bytes
};

// Internal driver callback used to execute a kernel
typedef void (*WorkerCallback_t)(void *usr, uint32_t idx);

class RsdCpuScriptImpl;
class RsdCpuReferenceImpl;

struct ScriptTLSStruct {
    android::renderscript::Context * mContext;
    const android::renderscript::Script * mScript;
    RsdCpuScriptImpl *mImpl;
};

// MTLaunchStruct passes information about a multithreaded kernel launch.
struct MTLaunchStructCommon {
    RsdCpuReferenceImpl *rs;
    RsdCpuScriptImpl *script;

    uint32_t mSliceSize;
    volatile int mSliceNum;
    bool isThreadable;

    // Boundary information about the launch
    RsLaunchDimensions start;
    RsLaunchDimensions end;
    // Points to MTLaunchStructForEach::fep::dim or
    // MTLaunchStructReduce::inputDim or
    // MTLaunchStructReduceNew::redp::dim.
    RsLaunchDimensions *dimPtr;
};

struct MTLaunchStructForEach : public MTLaunchStructCommon {
    // Driver info structure
    RsExpandKernelDriverInfo fep;

    ForEachFunc_t kernel;
    const Allocation *ains[RS_KERNEL_INPUT_LIMIT];
    Allocation *aout[RS_KERNEL_INPUT_LIMIT];
};

struct MTLaunchStructReduce : public MTLaunchStructCommon {
    ReduceFunc_t kernel;
    const uint8_t *inBuf;
    uint8_t *outBuf;
    RsLaunchDimensions inputDim;
};

struct MTLaunchStructReduceNew : public MTLaunchStructCommon {
    // Driver info structure
    RsExpandKernelDriverInfo redp;

    const Allocation *ains[RS_KERNEL_INPUT_LIMIT];

    ReduceNewAccumulatorFunc_t accumFunc;
    ReduceNewInitializerFunc_t initFunc;
    ReduceNewCombinerFunc_t combFunc;
    ReduceNewOutConverterFunc_t outFunc;

    size_t accumSize;  // accumulator datum size in bytes

    size_t accumStride;  // stride between accumulators in accumAlloc (below)

    // These fields are used for managing accumulator data items in a
    // multithreaded execution.
    //
    // Let the number of threads be N.
    // Let Outc be true iff there is an outconverter.
    //
    // accumAlloc is a pointer to a single allocation of (N - !Outc)
    // accumulators.  (If there is no outconverter, then the output
    // allocation acts as an accumulator.)  It is created at kernel
    // launch time.  Within that allocation, the distance between the
    // start of adjacent accumulators is accumStride bytes -- this
    // might be the same as accumSize, or it might be larger, if we
    // are attempting to avoid false sharing.
    //
    // accumCount is an atomic counter of how many accumulators have
    // been grabbed by threads.  It is initialized to zero at kernel
    // launch time.  See accumPtr for further description.
    //
    // accumPtr is pointer to an array of N pointers to accumulators.
    // The array is created at kernel launch time, and each element is
    // initialized to nullptr.  When a particular thread goes to work,
    // that thread obtains its accumulator from its entry in this
    // array.  If the entry is nullptr, that thread needs to obtain an
    // accumulator, and initialize its entry in the array accordingly.
    // It does so via atomic access (fetch-and-add) to accumCount.
    // - If Outc, then the fetched value is used as an index into
    //   accumAlloc.
    // - If !Outc, then
    //   - If the fetched value is zero, then this thread gets the
    //     output allocation for its accumulator.
    //   - If the fetched value is nonzero, then (fetched value - 1)
    //     is used as an index into accumAlloc.
    uint8_t *accumAlloc;
    uint8_t **accumPtr;
    uint32_t accumCount;

    // Logging control
    bool logReduceAccum;
};

class RsdCpuReferenceImpl : public RsdCpuReference {
public:
    ~RsdCpuReferenceImpl() override;
    RsdCpuReferenceImpl(Context *);

    void lockMutex();
    void unlockMutex();

    bool init(uint32_t version_major, uint32_t version_minor, sym_lookup_t, script_lookup_t);
    void setPriority(int32_t priority) override;
    virtual void launchThreads(WorkerCallback_t cbk, void *data);
    static void * helperThreadProc(void *vrsc);
    RsdCpuScriptImpl * setTLS(RsdCpuScriptImpl *sc);

    Context * getContext() {return mRSC;}
    uint32_t getThreadCount() const {
        return mWorkers.mCount + 1;
    }

    // Launch foreach kernel
    void launchForEach(const Allocation **ains, uint32_t inLen, Allocation *aout,
                       const RsScriptCall *sc, MTLaunchStructForEach *mtls);

    // Launch a simple reduce kernel
    void launchReduce(const Allocation *ain, Allocation *aout,
                      MTLaunchStructReduce *mtls);

    // Launch a general reduce kernel
    void launchReduceNew(const Allocation ** ains, uint32_t inLen, Allocation *aout,
                         MTLaunchStructReduceNew *mtls);

    CpuScript * createScript(const ScriptC *s, char const *resName, char const *cacheDir,
                             uint8_t const *bitcode, size_t bitcodeSize, uint32_t flags) override;
    CpuScript * createIntrinsic(const Script *s, RsScriptIntrinsicID iid, Element *e) override;
    void* createScriptGroup(const ScriptGroupBase *sg) override;

    const RsdCpuReference::CpuSymbol *symLookup(const char *);

    RsdCpuReference::CpuScript *lookupScript(const Script *s) {
        return mScriptLookupFn(mRSC, s);
    }

    void setSelectRTCallback(RSSelectRTCallback pSelectRTCallback) {
        mSelectRTCallback = pSelectRTCallback;
    }
    RSSelectRTCallback getSelectRTCallback() {
        return mSelectRTCallback;
    }

    virtual void setBccPluginName(const char *name) {
        mBccPluginName.setTo(name);
    }
    virtual const char *getBccPluginName() const {
        return mBccPluginName.string();
    }
    bool getInKernel() override { return mInKernel; }

    // Set to true if we should embed global variable information in the code.
    void setEmbedGlobalInfo(bool v) override {
        mEmbedGlobalInfo = v;
    }

    // Returns true if we should embed global variable information in the code.
    bool getEmbedGlobalInfo() const override {
        return mEmbedGlobalInfo;
    }

    // Set to true if we should skip constant (immutable) global variables when
    // potentially embedding information about globals.
    void setEmbedGlobalInfoSkipConstant(bool v) override {
        mEmbedGlobalInfoSkipConstant = v;
    }

    // Returns true if we should skip constant (immutable) global variables when
    // potentially embedding information about globals.
    bool getEmbedGlobalInfoSkipConstant() const override {
        return mEmbedGlobalInfoSkipConstant;
    }

protected:
    Context *mRSC;
    uint32_t version_major;
    uint32_t version_minor;
    //bool mHasGraphics;
    bool mInKernel;  // Is a parallel kernel execution underway?

    struct Workers {
        volatile int mRunningCount;
        volatile int mLaunchCount;
        uint32_t mCount;
        pthread_t *mThreadId;
        pid_t *mNativeThreadId;
        Signal mCompleteSignal;
        Signal *mLaunchSignals;
        WorkerCallback_t mLaunchCallback;
        void *mLaunchData;
    };
    Workers mWorkers;
    bool mExit;
    sym_lookup_t mSymLookupFn;
    script_lookup_t mScriptLookupFn;

    ScriptTLSStruct mTlsStruct;

    RSSelectRTCallback mSelectRTCallback;
    String8 mBccPluginName;

    // Specifies whether we should embed global variable information in the
    // code via special RS variables that can be examined later by the driver.
    // Defaults to true.
    bool mEmbedGlobalInfo;

    // Specifies whether we should skip constant (immutable) global variables
    // when potentially embedding information about globals.
    // Defaults to true.
    bool mEmbedGlobalInfoSkipConstant;

    long mPageSize;

    // Launch a general reduce kernel
    void launchReduceNewSerial(const Allocation ** ains, uint32_t inLen, Allocation *aout,
                               MTLaunchStructReduceNew *mtls);
    void launchReduceNewParallel(const Allocation ** ains, uint32_t inLen, Allocation *aout,
                                 MTLaunchStructReduceNew *mtls);
};


}
}

#endif