diff options
-rw-r--r-- | cpp/RenderScript.cpp | 8 | ||||
-rw-r--r-- | cpp/rsCppStructs.h | 5 | ||||
-rw-r--r-- | cpu_ref/rsCpuCore.cpp | 12 | ||||
-rw-r--r-- | rs.h | 2 | ||||
-rw-r--r-- | rsContext.cpp | 53 | ||||
-rw-r--r-- | rsContext.h | 6 | ||||
-rw-r--r-- | rsg_generator.c | 23 | ||||
-rw-r--r-- | tests/latency/latency.cpp | 29 |
8 files changed, 104 insertions, 34 deletions
diff --git a/cpp/RenderScript.cpp b/cpp/RenderScript.cpp index 4ce4c9ba..98ab3807 100644 --- a/cpp/RenderScript.cpp +++ b/cpp/RenderScript.cpp @@ -54,18 +54,18 @@ RS::~RS() { mDev = NULL; } -bool RS::init(bool forceCpu) { - return RS::init(RS_VERSION, forceCpu); +bool RS::init(bool forceCpu, bool synchronous) { + return RS::init(RS_VERSION, forceCpu, synchronous); } -bool RS::init(int targetApi, bool forceCpu) { +bool RS::init(int targetApi, bool forceCpu, bool synchronous) { mDev = rsDeviceCreate(); if (mDev == 0) { ALOGE("Device creation failed"); return false; } - mContext = rsContextCreate(mDev, 0, targetApi, forceCpu); + mContext = rsContextCreate(mDev, 0, targetApi, forceCpu, synchronous); if (mContext == 0) { ALOGE("Context creation failed"); return false; diff --git a/cpp/rsCppStructs.h b/cpp/rsCppStructs.h index a430c351..a3818163 100644 --- a/cpp/rsCppStructs.h +++ b/cpp/rsCppStructs.h @@ -43,8 +43,7 @@ class RS : public android::LightRefBase<RS> { RS(); virtual ~RS(); - bool init() { return init(false); } - bool init(bool forceCpu); + bool init(bool forceCpu = false, bool synchronous = false); void setErrorHandler(ErrorHandlerFunc_t func); ErrorHandlerFunc_t getErrorHandler() { return mErrorFunc; } @@ -59,7 +58,7 @@ class RS : public android::LightRefBase<RS> { void finish(); private: - bool init(int targetApi, bool forceCpu); + bool init(int targetApi, bool forceCpu, bool synchronous); static void * threadProc(void *); static bool gInitialized; diff --git a/cpu_ref/rsCpuCore.cpp b/cpu_ref/rsCpuCore.cpp index 5ea28d46..e22b730c 100644 --- a/cpu_ref/rsCpuCore.cpp +++ b/cpu_ref/rsCpuCore.cpp @@ -132,6 +132,16 @@ void * RsdCpuReferenceImpl::helperThreadProc(void *vrsc) { void RsdCpuReferenceImpl::launchThreads(WorkerCallback_t cbk, void *data) { mWorkers.mLaunchData = data; mWorkers.mLaunchCallback = cbk; + + // fast path for very small launches + MTLaunchStruct *mtls = (MTLaunchStruct *)data; + if (mtls && mtls->fep.dimY <= 1 && mtls->xEnd <= mtls->xStart + mtls->mSliceSize) { + if (mWorkers.mLaunchCallback) { + mWorkers.mLaunchCallback(mWorkers.mLaunchData, 0); + } + return; + } + android_atomic_release_store(mWorkers.mCount, &mWorkers.mRunningCount); for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) { mWorkers.mLaunchSignals[ct].set(); @@ -140,7 +150,7 @@ void RsdCpuReferenceImpl::launchThreads(WorkerCallback_t cbk, void *data) { // We use the calling thread as one of the workers so we can start without // the delay of the thread wakeup. if (mWorkers.mLaunchCallback) { - mWorkers.mLaunchCallback(mWorkers.mLaunchData, 0); + mWorkers.mLaunchCallback(mWorkers.mLaunchData, 0); } while (android_atomic_acquire_load(&mWorkers.mRunningCount) != 0) { @@ -52,7 +52,7 @@ RsDevice rsDeviceCreate(); void rsDeviceDestroy(RsDevice dev); void rsDeviceSetConfig(RsDevice dev, RsDeviceParam p, int32_t value); RsContext rsContextCreate(RsDevice dev, uint32_t version, uint32_t sdkVersion); -RsContext rsContextCreate(RsDevice dev, uint32_t version, uint32_t sdkVersion, bool forceCpu); +RsContext rsContextCreate(RsDevice dev, uint32_t version, uint32_t sdkVersion, bool forceCpu, bool synchronous); RsContext rsContextCreateGL(RsDevice dev, uint32_t version, uint32_t sdkVersion, RsSurfaceConfig sc, uint32_t dpi); diff --git a/rsContext.cpp b/rsContext.cpp index 221d5723..80021d57 100644 --- a/rsContext.cpp +++ b/rsContext.cpp @@ -250,7 +250,9 @@ void * Context::threadProc(void *vrsc) { Context *rsc = static_cast<Context *>(vrsc); #ifndef ANDROID_RS_SERIALIZE rsc->mNativeThreadId = gettid(); - setpriority(PRIO_PROCESS, rsc->mNativeThreadId, ANDROID_PRIORITY_DISPLAY); + if (!rsc->isSynchronous()) { + setpriority(PRIO_PROCESS, rsc->mNativeThreadId, ANDROID_PRIORITY_DISPLAY); + } rsc->mThreadPriority = ANDROID_PRIORITY_DISPLAY; #endif //ANDROID_RS_SERIALIZE rsc->props.mLogTimes = getProp("debug.rs.profile") != 0; @@ -318,6 +320,11 @@ void * Context::threadProc(void *vrsc) { } rsc->mRunning = true; + + if (rsc->isSynchronous()) { + return NULL; + } + if (!rsc->mIsGraphicsContext) { while (!rsc->mExit) { rsc->mIO.playCoreCommands(rsc, -1); @@ -442,17 +449,15 @@ Context::Context() { mIsContextLite = false; memset(&watchdog, 0, sizeof(watchdog)); mForceCpu = false; -} - -Context * Context::createContext(Device *dev, const RsSurfaceConfig *sc) { - return createContext(dev, sc, false); + mSynchronous = false; } Context * Context::createContext(Device *dev, const RsSurfaceConfig *sc, - bool forceCpu) { + bool forceCpu, bool synchronous) { Context * rsc = new Context(); rsc->mForceCpu = forceCpu; + rsc->mSynchronous = synchronous; if (!rsc->initContext(dev, sc)) { delete rsc; @@ -500,22 +505,25 @@ bool Context::initContext(Device *dev, const RsSurfaceConfig *sc) { timerInit(); timerSet(RS_TIMER_INTERNAL); + if (mSynchronous) { + threadProc(this); + } else { + status = pthread_create(&mThreadId, &threadAttr, threadProc, this); + if (status) { + ALOGE("Failed to start rs context thread."); + return false; + } + while (!mRunning && (mError == RS_ERROR_NONE)) { + usleep(100); + } - status = pthread_create(&mThreadId, &threadAttr, threadProc, this); - if (status) { - ALOGE("Failed to start rs context thread."); - return false; - } - while (!mRunning && (mError == RS_ERROR_NONE)) { - usleep(100); - } + if (mError != RS_ERROR_NONE) { + ALOGE("Errors during thread init"); + return false; + } - if (mError != RS_ERROR_NONE) { - ALOGE("Errors during thread init"); - return false; + pthread_attr_destroy(&threadAttr); } - - pthread_attr_destroy(&threadAttr); return true; } @@ -817,14 +825,15 @@ void rsi_ContextDeinitToClient(Context *rsc) { RsContext rsContextCreate(RsDevice vdev, uint32_t version, uint32_t sdkVersion) { - return rsContextCreate(vdev, version, sdkVersion, false); + return rsContextCreate(vdev, version, sdkVersion, false, false); } RsContext rsContextCreate(RsDevice vdev, uint32_t version, - uint32_t sdkVersion, bool forceCpu) { + uint32_t sdkVersion, bool forceCpu, + bool synchronous) { ALOGV("rsContextCreate dev=%p", vdev); Device * dev = static_cast<Device *>(vdev); - Context *rsc = Context::createContext(dev, NULL, forceCpu); + Context *rsc = Context::createContext(dev, NULL, forceCpu, synchronous); if (rsc) { rsc->setTargetSdkVersion(sdkVersion); } diff --git a/rsContext.h b/rsContext.h index 61218dab..bc0c63ef 100644 --- a/rsContext.h +++ b/rsContext.h @@ -67,8 +67,7 @@ public: }; Hal mHal; - static Context * createContext(Device *, const RsSurfaceConfig *sc); - static Context * createContext(Device *, const RsSurfaceConfig *sc, bool forceCpu); + static Context * createContext(Device *, const RsSurfaceConfig *sc, bool forceCpu = false, bool synchronous = false); static Context * createContextLite(); ~Context(); @@ -104,6 +103,8 @@ public: ScriptCState mScriptC; FBOCache mFBOCache; + bool isSynchronous() {return mSynchronous;} + void swapBuffers(); void setRootScript(Script *); void setProgramRaster(ProgramRaster *); @@ -247,6 +248,7 @@ private: Context(); bool initContext(Device *, const RsSurfaceConfig *sc); + bool mSynchronous; bool initGLThread(); void deinitEGL(); diff --git a/rsg_generator.c b/rsg_generator.c index c404c9ca..7022bcbd 100644 --- a/rsg_generator.c +++ b/rsg_generator.c @@ -224,6 +224,29 @@ void printApiCpp(FILE *f) { } fprintf(f, ");\n"); } else { + // handle synchronous path + fprintf(f, " if (((Context *)rsc)->isSynchronous()) {\n"); + fprintf(f, " "); + if (api->ret.typeName[0]) { + fprintf(f, "return "); + } + fprintf(f, "rsi_%s(", api->name); + if (!api->nocontext) { + fprintf(f, "(Context *)rsc"); + } + for (ct2=0; ct2 < api->paramCount; ct2++) { + const VarType *vt = &api->params[ct2]; + if (ct2 > 0 || !api->nocontext) { + fprintf(f, ", "); + } + fprintf(f, "%s", vt->name); + } + fprintf(f, ");\n"); + if (!api->ret.typeName[0]) { + fprintf(f, " return;"); + } + fprintf(f, " }\n\n"); + fprintf(f, " ThreadIO *io = &((Context *)rsc)->mIO;\n"); fprintf(f, " const uint32_t size = sizeof(RS_CMD_%s);\n", api->name); if (hasInlineDataPointers(api)) { diff --git a/tests/latency/latency.cpp b/tests/latency/latency.cpp index 124fb20a..86d1a291 100644 --- a/tests/latency/latency.cpp +++ b/tests/latency/latency.cpp @@ -11,6 +11,7 @@ int main(int argc, char** argv) int iters = 100; int numElems = 1000; bool forceCpu = false; + bool synchronous = false; if (argc >= 2) { iters = atoi(argv[1]); @@ -36,14 +37,23 @@ int main(int argc, char** argv) forceCpu = true; } + if (argc >= 5) { + int temp = atoi(argv[4]); + if (temp != 0) + synchronous = true; + } + if (forceCpu) printf("forcing CPU\n"); + if (synchronous) + printf("forcing synchronous\n"); + printf("numElems = %d\n", numElems); sp<RS> rs = new RS(); - bool r = rs->init(forceCpu); // force CPU execution + bool r = rs->init(forceCpu, synchronous); sp<const Element> e = Element::U32(rs); @@ -51,6 +61,8 @@ int main(int argc, char** argv) tb.setX(numElems); sp<const Type> t = tb.create(); + uint32_t *buf = new uint32_t[numElems]; + sp<Allocation> ain = Allocation::createTyped(rs, t); sp<Allocation> aout = Allocation::createTyped(rs, t); @@ -72,6 +84,21 @@ int main(int argc, char** argv) printf("elapsed time : %lld microseconds\n", elapsed); printf("time per iter: %f microseconds\n", (double)elapsed / iters); + gettimeofday(&start, NULL); + + for (int i = 0; i < iters; i++) { + ain->copy1DFrom(buf); + sc->forEach_root(ain, aout); + aout->copy1DTo(buf); + } + + rs->finish(); + + gettimeofday(&stop, NULL); + elapsed = (stop.tv_sec * 1000000) - (start.tv_sec * 1000000) + (stop.tv_usec - start.tv_usec); + printf("elapsed time with copy : %lld microseconds\n", elapsed); + printf("time per iter with copy: %f microseconds\n", (double)elapsed / iters); + sc.clear(); t.clear(); e.clear(); |