summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--cpp/RenderScript.cpp8
-rw-r--r--cpp/rsCppStructs.h5
-rw-r--r--cpu_ref/rsCpuCore.cpp12
-rw-r--r--rs.h2
-rw-r--r--rsContext.cpp53
-rw-r--r--rsContext.h6
-rw-r--r--rsg_generator.c23
-rw-r--r--tests/latency/latency.cpp29
8 files changed, 104 insertions, 34 deletions
diff --git a/cpp/RenderScript.cpp b/cpp/RenderScript.cpp
index 4ce4c9ba..98ab3807 100644
--- a/cpp/RenderScript.cpp
+++ b/cpp/RenderScript.cpp
@@ -54,18 +54,18 @@ RS::~RS() {
mDev = NULL;
}
-bool RS::init(bool forceCpu) {
- return RS::init(RS_VERSION, forceCpu);
+bool RS::init(bool forceCpu, bool synchronous) {
+ return RS::init(RS_VERSION, forceCpu, synchronous);
}
-bool RS::init(int targetApi, bool forceCpu) {
+bool RS::init(int targetApi, bool forceCpu, bool synchronous) {
mDev = rsDeviceCreate();
if (mDev == 0) {
ALOGE("Device creation failed");
return false;
}
- mContext = rsContextCreate(mDev, 0, targetApi, forceCpu);
+ mContext = rsContextCreate(mDev, 0, targetApi, forceCpu, synchronous);
if (mContext == 0) {
ALOGE("Context creation failed");
return false;
diff --git a/cpp/rsCppStructs.h b/cpp/rsCppStructs.h
index a430c351..a3818163 100644
--- a/cpp/rsCppStructs.h
+++ b/cpp/rsCppStructs.h
@@ -43,8 +43,7 @@ class RS : public android::LightRefBase<RS> {
RS();
virtual ~RS();
- bool init() { return init(false); }
- bool init(bool forceCpu);
+ bool init(bool forceCpu = false, bool synchronous = false);
void setErrorHandler(ErrorHandlerFunc_t func);
ErrorHandlerFunc_t getErrorHandler() { return mErrorFunc; }
@@ -59,7 +58,7 @@ class RS : public android::LightRefBase<RS> {
void finish();
private:
- bool init(int targetApi, bool forceCpu);
+ bool init(int targetApi, bool forceCpu, bool synchronous);
static void * threadProc(void *);
static bool gInitialized;
diff --git a/cpu_ref/rsCpuCore.cpp b/cpu_ref/rsCpuCore.cpp
index 5ea28d46..e22b730c 100644
--- a/cpu_ref/rsCpuCore.cpp
+++ b/cpu_ref/rsCpuCore.cpp
@@ -132,6 +132,16 @@ void * RsdCpuReferenceImpl::helperThreadProc(void *vrsc) {
void RsdCpuReferenceImpl::launchThreads(WorkerCallback_t cbk, void *data) {
mWorkers.mLaunchData = data;
mWorkers.mLaunchCallback = cbk;
+
+ // fast path for very small launches
+ MTLaunchStruct *mtls = (MTLaunchStruct *)data;
+ if (mtls && mtls->fep.dimY <= 1 && mtls->xEnd <= mtls->xStart + mtls->mSliceSize) {
+ if (mWorkers.mLaunchCallback) {
+ mWorkers.mLaunchCallback(mWorkers.mLaunchData, 0);
+ }
+ return;
+ }
+
android_atomic_release_store(mWorkers.mCount, &mWorkers.mRunningCount);
for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) {
mWorkers.mLaunchSignals[ct].set();
@@ -140,7 +150,7 @@ void RsdCpuReferenceImpl::launchThreads(WorkerCallback_t cbk, void *data) {
// We use the calling thread as one of the workers so we can start without
// the delay of the thread wakeup.
if (mWorkers.mLaunchCallback) {
- mWorkers.mLaunchCallback(mWorkers.mLaunchData, 0);
+ mWorkers.mLaunchCallback(mWorkers.mLaunchData, 0);
}
while (android_atomic_acquire_load(&mWorkers.mRunningCount) != 0) {
diff --git a/rs.h b/rs.h
index e6d3b12b..afe55341 100644
--- a/rs.h
+++ b/rs.h
@@ -52,7 +52,7 @@ RsDevice rsDeviceCreate();
void rsDeviceDestroy(RsDevice dev);
void rsDeviceSetConfig(RsDevice dev, RsDeviceParam p, int32_t value);
RsContext rsContextCreate(RsDevice dev, uint32_t version, uint32_t sdkVersion);
-RsContext rsContextCreate(RsDevice dev, uint32_t version, uint32_t sdkVersion, bool forceCpu);
+RsContext rsContextCreate(RsDevice dev, uint32_t version, uint32_t sdkVersion, bool forceCpu, bool synchronous);
RsContext rsContextCreateGL(RsDevice dev, uint32_t version, uint32_t sdkVersion,
RsSurfaceConfig sc, uint32_t dpi);
diff --git a/rsContext.cpp b/rsContext.cpp
index 221d5723..80021d57 100644
--- a/rsContext.cpp
+++ b/rsContext.cpp
@@ -250,7 +250,9 @@ void * Context::threadProc(void *vrsc) {
Context *rsc = static_cast<Context *>(vrsc);
#ifndef ANDROID_RS_SERIALIZE
rsc->mNativeThreadId = gettid();
- setpriority(PRIO_PROCESS, rsc->mNativeThreadId, ANDROID_PRIORITY_DISPLAY);
+ if (!rsc->isSynchronous()) {
+ setpriority(PRIO_PROCESS, rsc->mNativeThreadId, ANDROID_PRIORITY_DISPLAY);
+ }
rsc->mThreadPriority = ANDROID_PRIORITY_DISPLAY;
#endif //ANDROID_RS_SERIALIZE
rsc->props.mLogTimes = getProp("debug.rs.profile") != 0;
@@ -318,6 +320,11 @@ void * Context::threadProc(void *vrsc) {
}
rsc->mRunning = true;
+
+ if (rsc->isSynchronous()) {
+ return NULL;
+ }
+
if (!rsc->mIsGraphicsContext) {
while (!rsc->mExit) {
rsc->mIO.playCoreCommands(rsc, -1);
@@ -442,17 +449,15 @@ Context::Context() {
mIsContextLite = false;
memset(&watchdog, 0, sizeof(watchdog));
mForceCpu = false;
-}
-
-Context * Context::createContext(Device *dev, const RsSurfaceConfig *sc) {
- return createContext(dev, sc, false);
+ mSynchronous = false;
}
Context * Context::createContext(Device *dev, const RsSurfaceConfig *sc,
- bool forceCpu) {
+ bool forceCpu, bool synchronous) {
Context * rsc = new Context();
rsc->mForceCpu = forceCpu;
+ rsc->mSynchronous = synchronous;
if (!rsc->initContext(dev, sc)) {
delete rsc;
@@ -500,22 +505,25 @@ bool Context::initContext(Device *dev, const RsSurfaceConfig *sc) {
timerInit();
timerSet(RS_TIMER_INTERNAL);
+ if (mSynchronous) {
+ threadProc(this);
+ } else {
+ status = pthread_create(&mThreadId, &threadAttr, threadProc, this);
+ if (status) {
+ ALOGE("Failed to start rs context thread.");
+ return false;
+ }
+ while (!mRunning && (mError == RS_ERROR_NONE)) {
+ usleep(100);
+ }
- status = pthread_create(&mThreadId, &threadAttr, threadProc, this);
- if (status) {
- ALOGE("Failed to start rs context thread.");
- return false;
- }
- while (!mRunning && (mError == RS_ERROR_NONE)) {
- usleep(100);
- }
+ if (mError != RS_ERROR_NONE) {
+ ALOGE("Errors during thread init");
+ return false;
+ }
- if (mError != RS_ERROR_NONE) {
- ALOGE("Errors during thread init");
- return false;
+ pthread_attr_destroy(&threadAttr);
}
-
- pthread_attr_destroy(&threadAttr);
return true;
}
@@ -817,14 +825,15 @@ void rsi_ContextDeinitToClient(Context *rsc) {
RsContext rsContextCreate(RsDevice vdev, uint32_t version,
uint32_t sdkVersion) {
- return rsContextCreate(vdev, version, sdkVersion, false);
+ return rsContextCreate(vdev, version, sdkVersion, false, false);
}
RsContext rsContextCreate(RsDevice vdev, uint32_t version,
- uint32_t sdkVersion, bool forceCpu) {
+ uint32_t sdkVersion, bool forceCpu,
+ bool synchronous) {
ALOGV("rsContextCreate dev=%p", vdev);
Device * dev = static_cast<Device *>(vdev);
- Context *rsc = Context::createContext(dev, NULL, forceCpu);
+ Context *rsc = Context::createContext(dev, NULL, forceCpu, synchronous);
if (rsc) {
rsc->setTargetSdkVersion(sdkVersion);
}
diff --git a/rsContext.h b/rsContext.h
index 61218dab..bc0c63ef 100644
--- a/rsContext.h
+++ b/rsContext.h
@@ -67,8 +67,7 @@ public:
};
Hal mHal;
- static Context * createContext(Device *, const RsSurfaceConfig *sc);
- static Context * createContext(Device *, const RsSurfaceConfig *sc, bool forceCpu);
+ static Context * createContext(Device *, const RsSurfaceConfig *sc, bool forceCpu = false, bool synchronous = false);
static Context * createContextLite();
~Context();
@@ -104,6 +103,8 @@ public:
ScriptCState mScriptC;
FBOCache mFBOCache;
+ bool isSynchronous() {return mSynchronous;}
+
void swapBuffers();
void setRootScript(Script *);
void setProgramRaster(ProgramRaster *);
@@ -247,6 +248,7 @@ private:
Context();
bool initContext(Device *, const RsSurfaceConfig *sc);
+ bool mSynchronous;
bool initGLThread();
void deinitEGL();
diff --git a/rsg_generator.c b/rsg_generator.c
index c404c9ca..7022bcbd 100644
--- a/rsg_generator.c
+++ b/rsg_generator.c
@@ -224,6 +224,29 @@ void printApiCpp(FILE *f) {
}
fprintf(f, ");\n");
} else {
+ // handle synchronous path
+ fprintf(f, " if (((Context *)rsc)->isSynchronous()) {\n");
+ fprintf(f, " ");
+ if (api->ret.typeName[0]) {
+ fprintf(f, "return ");
+ }
+ fprintf(f, "rsi_%s(", api->name);
+ if (!api->nocontext) {
+ fprintf(f, "(Context *)rsc");
+ }
+ for (ct2=0; ct2 < api->paramCount; ct2++) {
+ const VarType *vt = &api->params[ct2];
+ if (ct2 > 0 || !api->nocontext) {
+ fprintf(f, ", ");
+ }
+ fprintf(f, "%s", vt->name);
+ }
+ fprintf(f, ");\n");
+ if (!api->ret.typeName[0]) {
+ fprintf(f, " return;");
+ }
+ fprintf(f, " }\n\n");
+
fprintf(f, " ThreadIO *io = &((Context *)rsc)->mIO;\n");
fprintf(f, " const uint32_t size = sizeof(RS_CMD_%s);\n", api->name);
if (hasInlineDataPointers(api)) {
diff --git a/tests/latency/latency.cpp b/tests/latency/latency.cpp
index 124fb20a..86d1a291 100644
--- a/tests/latency/latency.cpp
+++ b/tests/latency/latency.cpp
@@ -11,6 +11,7 @@ int main(int argc, char** argv)
int iters = 100;
int numElems = 1000;
bool forceCpu = false;
+ bool synchronous = false;
if (argc >= 2) {
iters = atoi(argv[1]);
@@ -36,14 +37,23 @@ int main(int argc, char** argv)
forceCpu = true;
}
+ if (argc >= 5) {
+ int temp = atoi(argv[4]);
+ if (temp != 0)
+ synchronous = true;
+ }
+
if (forceCpu)
printf("forcing CPU\n");
+ if (synchronous)
+ printf("forcing synchronous\n");
+
printf("numElems = %d\n", numElems);
sp<RS> rs = new RS();
- bool r = rs->init(forceCpu); // force CPU execution
+ bool r = rs->init(forceCpu, synchronous);
sp<const Element> e = Element::U32(rs);
@@ -51,6 +61,8 @@ int main(int argc, char** argv)
tb.setX(numElems);
sp<const Type> t = tb.create();
+ uint32_t *buf = new uint32_t[numElems];
+
sp<Allocation> ain = Allocation::createTyped(rs, t);
sp<Allocation> aout = Allocation::createTyped(rs, t);
@@ -72,6 +84,21 @@ int main(int argc, char** argv)
printf("elapsed time : %lld microseconds\n", elapsed);
printf("time per iter: %f microseconds\n", (double)elapsed / iters);
+ gettimeofday(&start, NULL);
+
+ for (int i = 0; i < iters; i++) {
+ ain->copy1DFrom(buf);
+ sc->forEach_root(ain, aout);
+ aout->copy1DTo(buf);
+ }
+
+ rs->finish();
+
+ gettimeofday(&stop, NULL);
+ elapsed = (stop.tv_sec * 1000000) - (start.tv_sec * 1000000) + (stop.tv_usec - start.tv_usec);
+ printf("elapsed time with copy : %lld microseconds\n", elapsed);
+ printf("time per iter with copy: %f microseconds\n", (double)elapsed / iters);
+
sc.clear();
t.clear();
e.clear();