diff options
46 files changed, 3155 insertions, 2147 deletions
@@ -21,21 +21,12 @@ LOCAL_SRC_FILES:= \ driver/rsdFrameBuffer.cpp \ driver/rsdFrameBufferObj.cpp \ driver/rsdGL.cpp \ - driver/rsdIntrinsics.cpp \ - driver/rsdIntrinsicBlend.cpp \ - driver/rsdIntrinsicBlur.cpp \ - driver/rsdIntrinsicConvolve3x3.cpp \ - driver/rsdIntrinsicConvolve5x5.cpp \ - driver/rsdIntrinsicLUT.cpp \ - driver/rsdIntrinsicColorMatrix.cpp \ - driver/rsdIntrinsicYuvToRGB.cpp \ driver/rsdMesh.cpp \ driver/rsdMeshObj.cpp \ driver/rsdPath.cpp \ driver/rsdProgram.cpp \ driver/rsdProgramRaster.cpp \ driver/rsdProgramStore.cpp \ - driver/rsdRuntimeMath.cpp \ driver/rsdRuntimeStubs.cpp \ driver/rsdSampler.cpp \ driver/rsdScriptGroup.cpp \ @@ -43,13 +34,8 @@ LOCAL_SRC_FILES:= \ driver/rsdShaderCache.cpp \ driver/rsdVertexArray.cpp -ifeq ($(ARCH_ARM_HAVE_NEON),true) - LOCAL_CFLAGS += -DARCH_ARM_HAVE_NEON - LOCAL_SRC_FILES+= \ - driver/rsdIntrinsics_Convolve.S -endif -LOCAL_SHARED_LIBRARIES += libRS +LOCAL_SHARED_LIBRARIES += libRS libRSCpuRef LOCAL_SHARED_LIBRARIES += libcutils libutils libEGL libGLESv1_CM libGLESv2 LOCAL_SHARED_LIBRARIES += libbcc libbcinfo libui libgui libsync @@ -258,3 +244,6 @@ LOCAL_STATIC_LIBRARIES := libcutils libutils LOCAL_LDLIBS := -lpthread include $(BUILD_HOST_STATIC_LIBRARY) + +include $(call all-makefiles-under,$(LOCAL_PATH)) + diff --git a/cpu_ref/Android.mk b/cpu_ref/Android.mk new file mode 100644 index 00000000..062a916f --- /dev/null +++ b/cpu_ref/Android.mk @@ -0,0 +1,51 @@ + +LOCAL_PATH:=$(call my-dir) + +rs_base_CFLAGS := -Werror -Wall -Wno-unused-parameter -Wno-unused-variable +ifeq ($(TARGET_BUILD_PDK), true) + rs_base_CFLAGS += -D__RS_PDK__ +endif + +ifneq ($(OVERRIDE_RS_DRIVER),) + rs_base_CFLAGS += -DOVERRIDE_RS_DRIVER=$(OVERRIDE_RS_DRIVER) +endif + +include $(CLEAR_VARS) +LOCAL_CLANG := true +LOCAL_MODULE := libRSCpuRef + +LOCAL_SRC_FILES:= \ + rsCpuCore.cpp \ + rsCpuScript.cpp \ + rsCpuRuntimeMath.cpp \ + rsCpuRuntimeStubs.cpp \ + rsCpuScriptGroup.cpp \ + rsCpuIntrinsic.cpp \ + rsCpuIntrinsicBlend.cpp \ + rsCpuIntrinsicBlur.cpp \ + rsCpuIntrinsicColorMatrix.cpp \ + rsCpuIntrinsicConvolve3x3.cpp \ + rsCpuIntrinsicConvolve5x5.cpp \ + rsCpuIntrinsicLUT.cpp \ + rsCpuIntrinsicYuvToRGB.cpp + +ifeq ($(ARCH_ARM_HAVE_NEON),true) + LOCAL_CFLAGS += -DARCH_ARM_HAVE_NEON + LOCAL_SRC_FILES+= \ + rsCpuIntrinsics_neon.S +endif + +LOCAL_SHARED_LIBRARIES += libRS libcutils libutils libsync +LOCAL_SHARED_LIBRARIES += libbcc libbcinfo + +LOCAL_C_INCLUDES += frameworks/compile/libbcc/include +LOCAL_C_INCLUDES += frameworks/rs + +LOCAL_CFLAGS += $(rs_base_CFLAGS) + +LOCAL_LDLIBS := -lpthread -ldl +LOCAL_MODULE_TAGS := optional + +include $(BUILD_SHARED_LIBRARY) + + diff --git a/cpu_ref/rsCpuCore.cpp b/cpu_ref/rsCpuCore.cpp new file mode 100644 index 00000000..29539da0 --- /dev/null +++ b/cpu_ref/rsCpuCore.cpp @@ -0,0 +1,477 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "rsCpuCore.h" +#include "rsCpuScript.h" +#include "rsCpuScriptGroup.h" + +#include <malloc.h> +#include "rsContext.h" + +#include <sys/types.h> +#include <sys/resource.h> +#include <sched.h> +#include <cutils/properties.h> +#include <sys/syscall.h> +#include <string.h> +#include "utils/StopWatch.h" + +using namespace android; +using namespace android::renderscript; + +typedef void (*outer_foreach_t)( + const android::renderscript::RsForEachStubParamStruct *, + uint32_t x1, uint32_t x2, + uint32_t instep, uint32_t outstep); + + +static pthread_key_t gThreadTLSKey = 0; +static uint32_t gThreadTLSKeyCount = 0; +static pthread_mutex_t gInitMutex = PTHREAD_MUTEX_INITIALIZER; + +RsdCpuReference::~RsdCpuReference() { +} + +RsdCpuReference * RsdCpuReference::create(Context *rsc, uint32_t version_major, + uint32_t version_minor, sym_lookup_t lfn, + script_lookup_t slfn) { + + RsdCpuReferenceImpl *cpu = new RsdCpuReferenceImpl(rsc); + if (!cpu) { + return NULL; + } + if (!cpu->init(version_major, version_minor, lfn, slfn)) { + delete cpu; + return NULL; + } + return cpu; +} + + +Context * RsdCpuReference::getTlsContext() { + ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(gThreadTLSKey); + return tls->mContext; +} + +const Script * RsdCpuReference::getTlsScript() { + ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(gThreadTLSKey); + return tls->mScript; +} + + +//////////////////////////////////////////////////////////// +/// + +RsdCpuReferenceImpl::RsdCpuReferenceImpl(Context *rsc) { + mRSC = rsc; + + version_major = 0; + version_minor = 0; + mInForEach = false; + memset(&mWorkers, 0, sizeof(mWorkers)); + memset(&mTlsStruct, 0, sizeof(mTlsStruct)); + mExit = false; + +} + + +void * RsdCpuReferenceImpl::helperThreadProc(void *vrsc) { + RsdCpuReferenceImpl *dc = (RsdCpuReferenceImpl *)vrsc; + + + uint32_t idx = (uint32_t)android_atomic_inc(&dc->mWorkers.mLaunchCount); + + //ALOGV("RS helperThread starting %p idx=%i", dc, idx); + + dc->mWorkers.mLaunchSignals[idx].init(); + dc->mWorkers.mNativeThreadId[idx] = gettid(); + + memset(&dc->mTlsStruct, 0, sizeof(dc->mTlsStruct)); + int status = pthread_setspecific(gThreadTLSKey, &dc->mTlsStruct); + if (status) { + ALOGE("pthread_setspecific %i", status); + } + +#if 0 + typedef struct {uint64_t bits[1024 / 64]; } cpu_set_t; + cpu_set_t cpuset; + memset(&cpuset, 0, sizeof(cpuset)); + cpuset.bits[idx / 64] |= 1ULL << (idx % 64); + int ret = syscall(241, rsc->mWorkers.mNativeThreadId[idx], + sizeof(cpuset), &cpuset); + ALOGE("SETAFFINITY ret = %i %s", ret, EGLUtils::strerror(ret)); +#endif + + while (!dc->mExit) { + dc->mWorkers.mLaunchSignals[idx].wait(); + if (dc->mWorkers.mLaunchCallback) { + // idx +1 is used because the calling thread is always worker 0. + dc->mWorkers.mLaunchCallback(dc->mWorkers.mLaunchData, idx+1); + } + android_atomic_dec(&dc->mWorkers.mRunningCount); + dc->mWorkers.mCompleteSignal.set(); + } + + //ALOGV("RS helperThread exited %p idx=%i", dc, idx); + return NULL; +} + +void RsdCpuReferenceImpl::launchThreads(WorkerCallback_t cbk, void *data) { + mWorkers.mLaunchData = data; + mWorkers.mLaunchCallback = cbk; + android_atomic_release_store(mWorkers.mCount, &mWorkers.mRunningCount); + for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) { + mWorkers.mLaunchSignals[ct].set(); + } + + // We use the calling thread as one of the workers so we can start without + // the delay of the thread wakeup. + if (mWorkers.mLaunchCallback) { + mWorkers.mLaunchCallback(mWorkers.mLaunchData, 0); + } + + while (android_atomic_acquire_load(&mWorkers.mRunningCount) != 0) { + mWorkers.mCompleteSignal.wait(); + } +} + + +void RsdCpuReferenceImpl::lockMutex() { + pthread_mutex_lock(&gInitMutex); +} + +void RsdCpuReferenceImpl::unlockMutex() { + pthread_mutex_unlock(&gInitMutex); +} + +bool RsdCpuReferenceImpl::init(uint32_t version_major, uint32_t version_minor, + sym_lookup_t lfn, script_lookup_t slfn) { + + mSymLookupFn = lfn; + mScriptLookupFn = slfn; + + lockMutex(); + if (!gThreadTLSKeyCount) { + int status = pthread_key_create(&gThreadTLSKey, NULL); + if (status) { + ALOGE("Failed to init thread tls key."); + unlockMutex(); + return false; + } + } + gThreadTLSKeyCount++; + unlockMutex(); + + mTlsStruct.mContext = mRSC; + mTlsStruct.mScript = NULL; + int status = pthread_setspecific(gThreadTLSKey, &mTlsStruct); + if (status) { + ALOGE("pthread_setspecific %i", status); + } + + int cpu = sysconf(_SC_NPROCESSORS_ONLN); + if(mRSC->props.mDebugMaxThreads) { + cpu = mRSC->props.mDebugMaxThreads; + } + if (cpu < 2) { + mWorkers.mCount = 0; + return true; + } + + // Subtract one from the cpu count because we also use the command thread as a worker. + mWorkers.mCount = (uint32_t)(cpu - 1); + + ALOGV("%p Launching thread(s), CPUs %i", mRSC, mWorkers.mCount); + + mWorkers.mThreadId = (pthread_t *) calloc(mWorkers.mCount, sizeof(pthread_t)); + mWorkers.mNativeThreadId = (pid_t *) calloc(mWorkers.mCount, sizeof(pid_t)); + mWorkers.mLaunchSignals = new Signal[mWorkers.mCount]; + mWorkers.mLaunchCallback = NULL; + + mWorkers.mCompleteSignal.init(); + + android_atomic_release_store(mWorkers.mCount, &mWorkers.mRunningCount); + android_atomic_release_store(0, &mWorkers.mLaunchCount); + + pthread_attr_t threadAttr; + status = pthread_attr_init(&threadAttr); + if (status) { + ALOGE("Failed to init thread attribute."); + return false; + } + + for (uint32_t ct=0; ct < mWorkers.mCount; ct++) { + status = pthread_create(&mWorkers.mThreadId[ct], &threadAttr, helperThreadProc, this); + if (status) { + mWorkers.mCount = ct; + ALOGE("Created fewer than expected number of RS threads."); + break; + } + } + while (android_atomic_acquire_load(&mWorkers.mRunningCount) != 0) { + usleep(100); + } + + pthread_attr_destroy(&threadAttr); + return true; +} + + +void RsdCpuReferenceImpl::setPriority(int32_t priority) { + for (uint32_t ct=0; ct < mWorkers.mCount; ct++) { + setpriority(PRIO_PROCESS, mWorkers.mNativeThreadId[ct], priority); + } +} + +RsdCpuReferenceImpl::~RsdCpuReferenceImpl() { + mExit = true; + mWorkers.mLaunchData = NULL; + mWorkers.mLaunchCallback = NULL; + android_atomic_release_store(mWorkers.mCount, &mWorkers.mRunningCount); + for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) { + mWorkers.mLaunchSignals[ct].set(); + } + void *res; + for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) { + pthread_join(mWorkers.mThreadId[ct], &res); + } + rsAssert(android_atomic_acquire_load(&mWorkers.mRunningCount) == 0); + + // Global structure cleanup. + lockMutex(); + --gThreadTLSKeyCount; + if (!gThreadTLSKeyCount) { + pthread_key_delete(gThreadTLSKey); + } + unlockMutex(); + +} + +typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t); + +static void wc_xy(void *usr, uint32_t idx) { + MTLaunchStruct *mtls = (MTLaunchStruct *)usr; + RsForEachStubParamStruct p; + memcpy(&p, &mtls->fep, sizeof(p)); + p.lid = idx; + uint32_t sig = mtls->sig; + + outer_foreach_t fn = (outer_foreach_t) mtls->kernel; + while (1) { + uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum); + uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize; + uint32_t yEnd = yStart + mtls->mSliceSize; + yEnd = rsMin(yEnd, mtls->yEnd); + if (yEnd <= yStart) { + return; + } + + //ALOGE("usr idx %i, x %i,%i y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd); + //ALOGE("usr ptr in %p, out %p", mtls->fep.ptrIn, mtls->fep.ptrOut); + + for (p.y = yStart; p.y < yEnd; p.y++) { + p.out = mtls->fep.ptrOut + (mtls->fep.yStrideOut * p.y) + + (mtls->fep.eStrideOut * mtls->xStart); + p.in = mtls->fep.ptrIn + (mtls->fep.yStrideIn * p.y) + + (mtls->fep.eStrideIn * mtls->xStart); + fn(&p, mtls->xStart, mtls->xEnd, mtls->fep.eStrideIn, mtls->fep.eStrideOut); + } + } +} + +static void wc_x(void *usr, uint32_t idx) { + MTLaunchStruct *mtls = (MTLaunchStruct *)usr; + RsForEachStubParamStruct p; + memcpy(&p, &mtls->fep, sizeof(p)); + p.lid = idx; + uint32_t sig = mtls->sig; + + outer_foreach_t fn = (outer_foreach_t) mtls->kernel; + while (1) { + uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum); + uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize; + uint32_t xEnd = xStart + mtls->mSliceSize; + xEnd = rsMin(xEnd, mtls->xEnd); + if (xEnd <= xStart) { + return; + } + + //ALOGE("usr slice %i idx %i, x %i,%i", slice, idx, xStart, xEnd); + //ALOGE("usr ptr in %p, out %p", mtls->fep.ptrIn, mtls->fep.ptrOut); + + p.out = mtls->fep.ptrOut + (mtls->fep.eStrideOut * xStart); + p.in = mtls->fep.ptrIn + (mtls->fep.eStrideIn * xStart); + fn(&p, xStart, xEnd, mtls->fep.eStrideIn, mtls->fep.eStrideOut); + } +} + +void RsdCpuReferenceImpl::launchThreads(const Allocation * ain, Allocation * aout, + const RsScriptCall *sc, MTLaunchStruct *mtls) { + + //android::StopWatch kernel_time("kernel time"); + + if ((mWorkers.mCount >= 1) && mtls->isThreadable && !mInForEach) { + const size_t targetByteChunk = 16 * 1024; + mInForEach = true; + if (mtls->fep.dimY > 1) { + uint32_t s1 = mtls->fep.dimY / ((mWorkers.mCount + 1) * 4); + uint32_t s2 = 0; + + // This chooses our slice size to rate limit atomic ops to + // one per 16k bytes of reads/writes. + if (mtls->fep.yStrideOut) { + s2 = targetByteChunk / mtls->fep.yStrideOut; + } else { + s2 = targetByteChunk / mtls->fep.yStrideIn; + } + mtls->mSliceSize = rsMin(s1, s2); + + if(mtls->mSliceSize < 1) { + mtls->mSliceSize = 1; + } + + // mtls->mSliceSize = 2; + launchThreads(wc_xy, mtls); + } else { + uint32_t s1 = mtls->fep.dimX / ((mWorkers.mCount + 1) * 4); + uint32_t s2 = 0; + + // This chooses our slice size to rate limit atomic ops to + // one per 16k bytes of reads/writes. + if (mtls->fep.eStrideOut) { + s2 = targetByteChunk / mtls->fep.eStrideOut; + } else { + s2 = targetByteChunk / mtls->fep.eStrideIn; + } + mtls->mSliceSize = rsMin(s1, s2); + + if(mtls->mSliceSize < 1) { + mtls->mSliceSize = 1; + } + + launchThreads(wc_x, mtls); + } + mInForEach = false; + + //ALOGE("launch 1"); + } else { + RsForEachStubParamStruct p; + memcpy(&p, &mtls->fep, sizeof(p)); + uint32_t sig = mtls->sig; + + //ALOGE("launch 3"); + outer_foreach_t fn = (outer_foreach_t) mtls->kernel; + for (p.ar[0] = mtls->arrayStart; p.ar[0] < mtls->arrayEnd; p.ar[0]++) { + for (p.z = mtls->zStart; p.z < mtls->zEnd; p.z++) { + for (p.y = mtls->yStart; p.y < mtls->yEnd; p.y++) { + uint32_t offset = mtls->fep.dimY * mtls->fep.dimZ * p.ar[0] + + mtls->fep.dimY * p.z + p.y; + p.out = mtls->fep.ptrOut + (mtls->fep.yStrideOut * offset) + + (mtls->fep.eStrideOut * mtls->xStart); + p.in = mtls->fep.ptrIn + (mtls->fep.yStrideIn * offset) + + (mtls->fep.eStrideIn * mtls->xStart); + fn(&p, mtls->xStart, mtls->xEnd, mtls->fep.eStrideIn, mtls->fep.eStrideOut); + } + } + } + } +} + +RsdCpuScriptImpl * RsdCpuReferenceImpl::setTLS(RsdCpuScriptImpl *sc) { + //ALOGE("setTls %p", sc); + ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(gThreadTLSKey); + rsAssert(tls); + RsdCpuScriptImpl *old = tls->mImpl; + tls->mImpl = sc; + tls->mContext = mRSC; + if (sc) { + tls->mScript = sc->getScript(); + } else { + tls->mScript = NULL; + } + return old; +} + +const RsdCpuReference::CpuSymbol * RsdCpuReferenceImpl::symLookup(const char *name) { + return mSymLookupFn(mRSC, name); +} + + +RsdCpuReference::CpuScript * RsdCpuReferenceImpl::createScript(const ScriptC *s, + char const *resName, char const *cacheDir, + uint8_t const *bitcode, size_t bitcodeSize, + uint32_t flags) { + + RsdCpuScriptImpl *i = new RsdCpuScriptImpl(this, s); + if (!i->init(resName, cacheDir, bitcode, bitcodeSize, flags)) { + delete i; + return NULL; + } + return i; +} + +extern RsdCpuScriptImpl * rsdIntrinsic_Convolve3x3(RsdCpuReferenceImpl *ctx, const Script *s); +extern RsdCpuScriptImpl * rsdIntrinsic_ColorMatrix(RsdCpuReferenceImpl *ctx, const Script *s); +extern RsdCpuScriptImpl * rsdIntrinsic_LUT(RsdCpuReferenceImpl *ctx, const Script *s); +extern RsdCpuScriptImpl * rsdIntrinsic_Convolve5x5(RsdCpuReferenceImpl *ctx, const Script *s); +extern RsdCpuScriptImpl * rsdIntrinsic_Blur(RsdCpuReferenceImpl *ctx, const Script *s); +extern RsdCpuScriptImpl * rsdIntrinsic_YuvToRGB(RsdCpuReferenceImpl *ctx, const Script *s); +extern RsdCpuScriptImpl * rsdIntrinsic_Blend(RsdCpuReferenceImpl *ctx, const Script *s); + +RsdCpuReference::CpuScript * RsdCpuReferenceImpl::createIntrinsic(const Script *s, + RsScriptIntrinsicID iid, Element *e) { + + RsdCpuScriptImpl *i = NULL; + switch (iid) { + case RS_SCRIPT_INTRINSIC_ID_CONVOLVE_3x3: + i = rsdIntrinsic_Convolve3x3(this, s); + break; + case RS_SCRIPT_INTRINSIC_ID_COLOR_MATRIX: + i = rsdIntrinsic_ColorMatrix(this, s); + break; + case RS_SCRIPT_INTRINSIC_ID_LUT: + i = rsdIntrinsic_LUT(this, s); + break; + case RS_SCRIPT_INTRINSIC_ID_CONVOLVE_5x5: + i = rsdIntrinsic_Convolve5x5(this, s); + break; + case RS_SCRIPT_INTRINSIC_ID_BLUR: + i = rsdIntrinsic_Blur(this, s); + break; + case RS_SCRIPT_INTRINSIC_ID_YUV_TO_RGB: + i = rsdIntrinsic_YuvToRGB(this, s); + break; + case RS_SCRIPT_INTRINSIC_ID_BLEND: + i = rsdIntrinsic_Blend(this, s); + break; + + default: + rsAssert(0); + } + + return i; +} + +RsdCpuReference::CpuScriptGroup * RsdCpuReferenceImpl::createScriptGroup(const ScriptGroup *sg) { + CpuScriptGroupImpl *sgi = new CpuScriptGroupImpl(this, sg); + if (!sgi->init()) { + delete sgi; + return NULL; + } + return sgi; +} + + diff --git a/cpu_ref/rsCpuCore.h b/cpu_ref/rsCpuCore.h new file mode 100644 index 00000000..48835918 --- /dev/null +++ b/cpu_ref/rsCpuCore.h @@ -0,0 +1,141 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef RSD_CPU_CORE_H +#define RSD_CPU_CORE_H + +#include "rsd_cpu.h" +#include "rsSignal.h" +#include "rsContext.h" +#include "rsElement.h" +#include "rsScriptC.h" + +namespace bcc { + class BCCContext; + class RSCompilerDriver; + class RSExecutable; +} + +namespace android { +namespace renderscript { + + +typedef void (* InvokeFunc_t)(void); +typedef void (* ForEachFunc_t)(void); +typedef void (*WorkerCallback_t)(void *usr, uint32_t idx); + +class RsdCpuScriptImpl; +class RsdCpuReferenceImpl; + +typedef struct ScriptTLSStructRec { + android::renderscript::Context * mContext; + const android::renderscript::Script * mScript; + RsdCpuScriptImpl *mImpl; +} ScriptTLSStruct; + +typedef struct { + RsForEachStubParamStruct fep; + + RsdCpuReferenceImpl *rsc; + RsdCpuScriptImpl *script; + + ForEachFunc_t kernel; + uint32_t sig; + const Allocation * ain; + Allocation * aout; + + uint32_t mSliceSize; + volatile int mSliceNum; + bool isThreadable; + + uint32_t xStart; + uint32_t xEnd; + uint32_t yStart; + uint32_t yEnd; + uint32_t zStart; + uint32_t zEnd; + uint32_t arrayStart; + uint32_t arrayEnd; +} MTLaunchStruct; + + + + +class RsdCpuReferenceImpl : public RsdCpuReference { +public: + virtual ~RsdCpuReferenceImpl(); + RsdCpuReferenceImpl(Context *); + + void lockMutex(); + void unlockMutex(); + + bool init(uint32_t version_major, uint32_t version_minor, sym_lookup_t, script_lookup_t); + virtual void setPriority(int32_t priority); + virtual void launchThreads(WorkerCallback_t cbk, void *data); + static void * helperThreadProc(void *vrsc); + RsdCpuScriptImpl * setTLS(RsdCpuScriptImpl *sc); + + Context * getContext() {return mRSC;} + + void launchThreads(const Allocation * ain, Allocation * aout, + const RsScriptCall *sc, MTLaunchStruct *mtls); + + virtual CpuScript * createScript(const ScriptC *s, + char const *resName, char const *cacheDir, + uint8_t const *bitcode, size_t bitcodeSize, + uint32_t flags); + virtual CpuScript * createIntrinsic(const Script *s, + RsScriptIntrinsicID iid, Element *e); + virtual CpuScriptGroup * createScriptGroup(const ScriptGroup *sg); + + const RsdCpuReference::CpuSymbol *symLookup(const char *); + + RsdCpuReference::CpuScript * lookupScript(const Script *s) { + return mScriptLookupFn(mRSC, s); + } + + +protected: + Context *mRSC; + uint32_t version_major; + uint32_t version_minor; + //bool mHasGraphics; + bool mInForEach; + + struct Workers { + volatile int mRunningCount; + volatile int mLaunchCount; + uint32_t mCount; + pthread_t *mThreadId; + pid_t *mNativeThreadId; + Signal mCompleteSignal; + Signal *mLaunchSignals; + WorkerCallback_t mLaunchCallback; + void *mLaunchData; + }; + Workers mWorkers; + bool mExit; + sym_lookup_t mSymLookupFn; + script_lookup_t mScriptLookupFn; + + ScriptTLSStruct mTlsStruct; +}; + + +} +} + +#endif diff --git a/cpu_ref/rsCpuIntrinsic.cpp b/cpu_ref/rsCpuIntrinsic.cpp new file mode 100644 index 00000000..a4eef218 --- /dev/null +++ b/cpu_ref/rsCpuIntrinsic.cpp @@ -0,0 +1,104 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#include "rsCpuIntrinsic.h" + +using namespace android; +using namespace android::renderscript; + +RsdCpuScriptIntrinsic::RsdCpuScriptIntrinsic(RsdCpuReferenceImpl *ctx, const Script *s, + RsScriptIntrinsicID iid) + : RsdCpuScriptImpl(ctx, s) { + + mID = iid; +} + +RsdCpuScriptIntrinsic::~RsdCpuScriptIntrinsic() { +} + +void RsdCpuScriptIntrinsic::invokeFunction(uint32_t slot, const void *params, size_t paramLength) { + mCtx->getContext()->setError(RS_ERROR_FATAL_DRIVER, + "Unexpected RsdCpuScriptIntrinsic::invokeFunction"); +} + +int RsdCpuScriptIntrinsic::invokeRoot() { + mCtx->getContext()->setError(RS_ERROR_FATAL_DRIVER, + "Unexpected RsdCpuScriptIntrinsic::invokeRoot"); + return 0; +} + +void RsdCpuScriptIntrinsic::invokeInit() { + mCtx->getContext()->setError(RS_ERROR_FATAL_DRIVER, + "Unexpected RsdCpuScriptIntrinsic::invokeInit"); +} + +void RsdCpuScriptIntrinsic::setGlobalVar(uint32_t slot, const void *data, size_t dataLength) { + mCtx->getContext()->setError(RS_ERROR_FATAL_DRIVER, + "Unexpected RsdCpuScriptIntrinsic::setGlobalVar"); +} + +void RsdCpuScriptIntrinsic::setGlobalVarWithElemDims(uint32_t slot, const void *data, + size_t dataLength, const Element *e, + const size_t *dims, size_t dimLength) { + mCtx->getContext()->setError(RS_ERROR_FATAL_DRIVER, + "Unexpected RsdCpuScriptIntrinsic::setGlobalVarWithElemDims"); +} + +void RsdCpuScriptIntrinsic::setGlobalBind(uint32_t slot, Allocation *data) { + mCtx->getContext()->setError(RS_ERROR_FATAL_DRIVER, + "Unexpected RsdCpuScriptIntrinsic::setGlobalBind"); +} + +void RsdCpuScriptIntrinsic::setGlobalObj(uint32_t slot, ObjectBase *data) { + mCtx->getContext()->setError(RS_ERROR_FATAL_DRIVER, + "Unexpected RsdCpuScriptIntrinsic::setGlobalObj"); +} + +void RsdCpuScriptIntrinsic::invokeFreeChildren() { +} + + +void RsdCpuScriptIntrinsic::invokeForEach(uint32_t slot, + const Allocation * ain, + Allocation * aout, + const void * usr, + uint32_t usrLen, + const RsScriptCall *sc) { + + MTLaunchStruct mtls; + forEachMtlsSetup(ain, aout, usr, usrLen, sc, &mtls); + mtls.script = this; + mtls.fep.slot = slot; + + mtls.kernel = (void (*)())mRootPtr; + mtls.fep.usr = this; + + RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this); + mCtx->launchThreads(ain, aout, sc, &mtls); + mCtx->setTLS(oldTLS); +} + +void RsdCpuScriptIntrinsic::forEachKernelSetup(uint32_t slot, MTLaunchStruct *mtls) { + + mtls->script = this; + mtls->fep.slot = slot; + mtls->kernel = (void (*)())mRootPtr; + mtls->fep.usr = this; +} + + + diff --git a/cpu_ref/rsCpuIntrinsic.h b/cpu_ref/rsCpuIntrinsic.h new file mode 100644 index 00000000..17561157 --- /dev/null +++ b/cpu_ref/rsCpuIntrinsic.h @@ -0,0 +1,63 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef RSD_CPU_SCRIPT_INTRINSIC_H +#define RSD_CPU_SCRIPT_INTRINSIC_H + +#include "rsCpuScript.h" + + +namespace android { +namespace renderscript { + + +class RsdCpuScriptIntrinsic : public RsdCpuScriptImpl { +public: + virtual void populateScript(Script *) = 0; + + virtual void invokeFunction(uint32_t slot, const void *params, size_t paramLength); + virtual int invokeRoot(); + virtual void invokeForEach(uint32_t slot, + const Allocation * ain, + Allocation * aout, + const void * usr, + uint32_t usrLen, + const RsScriptCall *sc); + virtual void forEachKernelSetup(uint32_t slot, MTLaunchStruct *mtls); + virtual void invokeInit(); + virtual void invokeFreeChildren(); + + virtual void setGlobalVar(uint32_t slot, const void *data, size_t dataLength); + virtual void setGlobalVarWithElemDims(uint32_t slot, const void *data, size_t dataLength, + const Element *e, const size_t *dims, size_t dimLength); + virtual void setGlobalBind(uint32_t slot, Allocation *data); + virtual void setGlobalObj(uint32_t slot, ObjectBase *data); + + virtual ~RsdCpuScriptIntrinsic(); + RsdCpuScriptIntrinsic(RsdCpuReferenceImpl *ctx, const Script *s, RsScriptIntrinsicID iid); + +protected: + RsScriptIntrinsicID mID; + outer_foreach_t mRootPtr; + +}; + + + +} +} + +#endif diff --git a/driver/rsdIntrinsicBlend.cpp b/cpu_ref/rsCpuIntrinsicBlend.cpp index c35c3796..57286d55 100644 --- a/driver/rsdIntrinsicBlend.cpp +++ b/cpu_ref/rsCpuIntrinsicBlend.cpp @@ -15,19 +15,32 @@ */ -#include "rsdCore.h" -#include "rsdIntrinsics.h" -#include "rsdAllocation.h" - -#include "rsdIntrinsicInlines.h" +#include "rsCpuIntrinsic.h" +#include "rsCpuIntrinsicInlines.h" using namespace android; using namespace android::renderscript; -struct ConvolveParams { - float f[4]; +namespace android { +namespace renderscript { + + +class RsdCpuScriptIntrinsicBlend : public RsdCpuScriptIntrinsic { +public: + virtual void populateScript(Script *); + + virtual ~RsdCpuScriptIntrinsicBlend(); + RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl *ctx, const Script *s); + +protected: + static void kernel(const RsForEachStubParamStruct *p, + uint32_t xstart, uint32_t xend, + uint32_t instep, uint32_t outstep); }; +} +} + enum { BLEND_CLEAR = 0, @@ -92,10 +105,10 @@ extern "C" void rsdIntrinsicBlendSub_K(void *dst, const void *src, uint32_t coun //#undef ARCH_ARM_HAVE_NEON -static void ColorMatrix_uchar4(const RsForEachStubParamStruct *p, - uint32_t xstart, uint32_t xend, - uint32_t instep, uint32_t outstep) { - ConvolveParams *cp = (ConvolveParams *)p->usr; +void RsdCpuScriptIntrinsicBlend::kernel(const RsForEachStubParamStruct *p, + uint32_t xstart, uint32_t xend, + uint32_t instep, uint32_t outstep) { + RsdCpuScriptIntrinsicBlend *cp = (RsdCpuScriptIntrinsicBlend *)p->usr; // instep/outstep can be ignored--sizeof(uchar4) known at compile time uchar4 *out = (uchar4 *)p->out; @@ -442,15 +455,23 @@ static void ColorMatrix_uchar4(const RsForEachStubParamStruct *p, } } -void * rsdIntrinsic_InitBlend(const android::renderscript::Context *dc, - android::renderscript::Script *script, - RsdIntriniscFuncs_t *funcs) { - script->mHal.info.exportedVariableCount = 0; - funcs->root = ColorMatrix_uchar4; +RsdCpuScriptIntrinsicBlend::RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl *ctx, const Script *s) + : RsdCpuScriptIntrinsic(ctx, s, RS_SCRIPT_INTRINSIC_ID_BLEND) { - ConvolveParams *cp = (ConvolveParams *)calloc(1, sizeof(ConvolveParams)); - return cp; + mRootPtr = &kernel; } +RsdCpuScriptIntrinsicBlend::~RsdCpuScriptIntrinsicBlend() { +} + +void RsdCpuScriptIntrinsicBlend::populateScript(Script *s) { + s->mHal.info.exportedVariableCount = 0; +} + +RsdCpuScriptImpl * rsdIntrinsic_Blend(RsdCpuReferenceImpl *ctx, const Script *s) { + return new RsdCpuScriptIntrinsicBlend(ctx, s); +} + + diff --git a/driver/rsdIntrinsicBlur.cpp b/cpu_ref/rsCpuIntrinsicBlur.cpp index b67e8d51..48363d14 100644 --- a/driver/rsdIntrinsicBlur.cpp +++ b/cpu_ref/rsCpuIntrinsicBlur.cpp @@ -14,25 +14,45 @@ * limitations under the License. */ - -#include "rsdCore.h" -#include "rsdIntrinsics.h" -#include "rsdAllocation.h" - -#include "rsdIntrinsicInlines.h" +#include "rsCpuIntrinsic.h" +#include "rsCpuIntrinsicInlines.h" using namespace android; using namespace android::renderscript; -struct ConvolveParams { +namespace android { +namespace renderscript { + + +class RsdCpuScriptIntrinsicBlur : public RsdCpuScriptIntrinsic { +public: + virtual void populateScript(Script *); + virtual void invokeFreeChildren(); + + virtual void setGlobalVar(uint32_t slot, const void *data, size_t dataLength); + virtual void setGlobalObj(uint32_t slot, ObjectBase *data); + + virtual ~RsdCpuScriptIntrinsicBlur(); + RsdCpuScriptIntrinsicBlur(RsdCpuReferenceImpl *ctx, const Script *s); + +protected: float fp[104]; short ip[104]; float radius; int iradius; ObjectBaseRef<Allocation> alloc; + + static void kernel(const RsForEachStubParamStruct *p, + uint32_t xstart, uint32_t xend, + uint32_t instep, uint32_t outstep); + void ComputeGaussianWeights(); }; -static void ComputeGaussianWeights(ConvolveParams *cp) { +} +} + + +void RsdCpuScriptIntrinsicBlur::ComputeGaussianWeights() { // Compute gaussian weights for the blur // e is the euler's number float e = 2.718281828459045f; @@ -46,7 +66,7 @@ static void ComputeGaussianWeights(ConvolveParams *cp) { // The larger the radius gets, the more our gaussian blur // will resemble a box blur since with large sigma // the gaussian curve begins to lose its shape - float sigma = 0.4f * cp->radius + 0.6f; + float sigma = 0.4f * radius + 0.6f; // Now compute the coefficients. We will store some redundant values to save // some math during the blur calculations precompute some values @@ -56,35 +76,30 @@ static void ComputeGaussianWeights(ConvolveParams *cp) { float normalizeFactor = 0.0f; float floatR = 0.0f; int r; - cp->iradius = (float)ceil(cp->radius) + 0.5f; - for (r = -cp->iradius; r <= cp->iradius; r ++) { + iradius = (float)ceil(radius) + 0.5f; + for (r = -iradius; r <= iradius; r ++) { floatR = (float)r; - cp->fp[r + cp->iradius] = coeff1 * powf(e, floatR * floatR * coeff2); - normalizeFactor += cp->fp[r + cp->iradius]; + fp[r + iradius] = coeff1 * powf(e, floatR * floatR * coeff2); + normalizeFactor += fp[r + iradius]; } //Now we need to normalize the weights because all our coefficients need to add up to one normalizeFactor = 1.0f / normalizeFactor; - for (r = -cp->iradius; r <= cp->iradius; r ++) { - cp->fp[r + cp->iradius] *= normalizeFactor; - cp->ip[r + cp->iradius] = (short)(cp->ip[r + cp->iradius] * 32768); + for (r = -iradius; r <= iradius; r ++) { + fp[r + iradius] *= normalizeFactor; + ip[r + iradius] = (short)(ip[r + iradius] * 32768); } } -static void Blur_Bind(const Context *dc, const Script *script, - void * intrinsicData, uint32_t slot, Allocation *data) { - ConvolveParams *cp = (ConvolveParams *)intrinsicData; +void RsdCpuScriptIntrinsicBlur::setGlobalObj(uint32_t slot, ObjectBase *data) { rsAssert(slot == 1); - cp->alloc.set(data); + alloc.set(static_cast<Allocation *>(data)); } -static void Blur_SetVar(const Context *dc, const Script *script, void * intrinsicData, - uint32_t slot, void *data, size_t dataLength) { - ConvolveParams *cp = (ConvolveParams *)intrinsicData; +void RsdCpuScriptIntrinsicBlur::setGlobalVar(uint32_t slot, const void *data, size_t dataLength) { rsAssert(slot == 0); - - cp->radius = ((const float *)data)[0]; - ComputeGaussianWeights(cp); + radius = ((const float *)data)[0]; + ComputeGaussianWeights(); } @@ -158,17 +173,17 @@ static void OneH(const RsForEachStubParamStruct *p, uchar4 *out, int32_t x, } -static void Blur_uchar4(const RsForEachStubParamStruct *p, - uint32_t xstart, uint32_t xend, - uint32_t instep, uint32_t outstep) { +void RsdCpuScriptIntrinsicBlur::kernel(const RsForEachStubParamStruct *p, + uint32_t xstart, uint32_t xend, + uint32_t instep, uint32_t outstep) { float buf[4 * 2048]; - ConvolveParams *cp = (ConvolveParams *)p->usr; + RsdCpuScriptIntrinsicBlur *cp = (RsdCpuScriptIntrinsicBlur *)p->usr; if (!cp->alloc.get()) { ALOGE("Blur executed without input, skipping"); return; } - DrvAllocation *din = (DrvAllocation *)cp->alloc->mHal.drv; - const uchar *pin = (const uchar *)din->lod[0].mallocPtr; + const uchar *pin = (const uchar *)cp->alloc->mHal.drvState.lod[0].mallocPtr; + const size_t stride = cp->alloc->mHal.drvState.lod[0].stride; uchar4 *out = (uchar4 *)p->out; uint32_t x1 = xstart; @@ -177,11 +192,11 @@ static void Blur_uchar4(const RsForEachStubParamStruct *p, float4 *fout = (float4 *)buf; int y = p->y; if ((y > cp->iradius) && (y < ((int)p->dimY - cp->iradius))) { - const uchar *pi = pin + (y - cp->iradius) * din->lod[0].stride; - OneVF(fout, pi, din->lod[0].stride, cp->fp, cp->iradius * 2 + 1, x1, x2); + const uchar *pi = pin + (y - cp->iradius) * stride; + OneVF(fout, pi, stride, cp->fp, cp->iradius * 2 + 1, x1, x2); } else { while(x2 > x1) { - OneV(p, fout, x1, y, pin, din->lod[0].stride, cp->fp, cp->iradius); + OneV(p, fout, x1, y, pin, stride, cp->fp, cp->iradius); fout++; x1++; } @@ -208,19 +223,29 @@ static void Blur_uchar4(const RsForEachStubParamStruct *p, } -void * rsdIntrinsic_InitBlur(const android::renderscript::Context *dc, - android::renderscript::Script *script, - RsdIntriniscFuncs_t *funcs) { +RsdCpuScriptIntrinsicBlur::RsdCpuScriptIntrinsicBlur(RsdCpuReferenceImpl *ctx, const Script *s) + : RsdCpuScriptIntrinsic(ctx, s, RS_SCRIPT_INTRINSIC_ID_BLUR) { + + mRootPtr = &kernel; + radius = 5; + ComputeGaussianWeights(); +} + +RsdCpuScriptIntrinsicBlur::~RsdCpuScriptIntrinsicBlur() { +} + +void RsdCpuScriptIntrinsicBlur::populateScript(Script *s) { + s->mHal.info.exportedVariableCount = 2; +} + +void RsdCpuScriptIntrinsicBlur::invokeFreeChildren() { + alloc.clear(); +} + - script->mHal.info.exportedVariableCount = 2; - funcs->setVarObj = Blur_Bind; - funcs->setVar = Blur_SetVar; - funcs->root = Blur_uchar4; +RsdCpuScriptImpl * rsdIntrinsic_Blur(RsdCpuReferenceImpl *ctx, const Script *s) { - ConvolveParams *cp = (ConvolveParams *)calloc(1, sizeof(ConvolveParams)); - cp->radius = 5; - ComputeGaussianWeights(cp); - return cp; + return new RsdCpuScriptIntrinsicBlur(ctx, s); } diff --git a/cpu_ref/rsCpuIntrinsicColorMatrix.cpp b/cpu_ref/rsCpuIntrinsicColorMatrix.cpp new file mode 100644 index 00000000..8f3196d7 --- /dev/null +++ b/cpu_ref/rsCpuIntrinsicColorMatrix.cpp @@ -0,0 +1,219 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#include "rsCpuIntrinsic.h" +#include "rsCpuIntrinsicInlines.h" + +using namespace android; +using namespace android::renderscript; + +namespace android { +namespace renderscript { + + +class RsdCpuScriptIntrinsicColorMatrix : public RsdCpuScriptIntrinsic { +public: + virtual void populateScript(Script *); + + virtual void setGlobalVar(uint32_t slot, const void *data, size_t dataLength); + + virtual ~RsdCpuScriptIntrinsicColorMatrix(); + RsdCpuScriptIntrinsicColorMatrix(RsdCpuReferenceImpl *ctx, const Script *s); + +protected: + float fp[16]; + short ip[16]; + + static void kernel4x4(const RsForEachStubParamStruct *p, + uint32_t xstart, uint32_t xend, + uint32_t instep, uint32_t outstep); + static void kernel3x3(const RsForEachStubParamStruct *p, + uint32_t xstart, uint32_t xend, + uint32_t instep, uint32_t outstep); + static void kernelDot(const RsForEachStubParamStruct *p, + uint32_t xstart, uint32_t xend, + uint32_t instep, uint32_t outstep); +}; + +} +} + + +void RsdCpuScriptIntrinsicColorMatrix::setGlobalVar(uint32_t slot, const void *data, + size_t dataLength) { + rsAssert(slot == 0); + memcpy (fp, data, dataLength); + for(int ct=0; ct < 16; ct++) { + ip[ct] = (short)(fp[ct] * 255.f + 0.5f); + } + + mRootPtr = &kernel4x4; + if ((ip[3] == 0) && (ip[7] == 0) && (ip[11] == 0) && + (ip[12] == 0) && (ip[13] == 0) && (ip[14] == 0) && (ip[15] == 255)) { + mRootPtr = &kernel3x3; + + if ((ip[0] == ip[1]) && (ip[0] == ip[2]) && + (ip[4] == ip[5]) && (ip[4] == ip[6]) && + (ip[8] == ip[9]) && (ip[8] == ip[10])) { + mRootPtr = &kernelDot; + } + } +} + +extern "C" void rsdIntrinsicColorMatrix4x4_K(void *dst, const void *src, const short *coef, uint32_t count); +extern "C" void rsdIntrinsicColorMatrix3x3_K(void *dst, const void *src, const short *coef, uint32_t count); +extern "C" void rsdIntrinsicColorMatrixDot_K(void *dst, const void *src, const short *coef, uint32_t count); + +static void One(const RsForEachStubParamStruct *p, uchar4 *out, + const uchar4 *py, const float* coeff) { + float4 i = convert_float4(py[0]); + + float4 sum; + sum.x = i.x * coeff[0] + + i.y * coeff[4] + + i.z * coeff[8] + + i.w * coeff[12]; + sum.y = i.x * coeff[1] + + i.y * coeff[5] + + i.z * coeff[9] + + i.w * coeff[13]; + sum.z = i.x * coeff[2] + + i.y * coeff[6] + + i.z * coeff[10] + + i.w * coeff[14]; + sum.w = i.x * coeff[3] + + i.y * coeff[7] + + i.z * coeff[11] + + i.w * coeff[15]; + + sum.x = sum.x < 0 ? 0 : (sum.x > 255 ? 255 : sum.x); + sum.y = sum.y < 0 ? 0 : (sum.y > 255 ? 255 : sum.y); + sum.z = sum.z < 0 ? 0 : (sum.z > 255 ? 255 : sum.z); + sum.w = sum.w < 0 ? 0 : (sum.w > 255 ? 255 : sum.w); + + *out = convert_uchar4(sum); +} + +void RsdCpuScriptIntrinsicColorMatrix::kernel4x4(const RsForEachStubParamStruct *p, + uint32_t xstart, uint32_t xend, + uint32_t instep, uint32_t outstep) { + RsdCpuScriptIntrinsicColorMatrix *cp = (RsdCpuScriptIntrinsicColorMatrix *)p->usr; + uchar4 *out = (uchar4 *)p->out; + uchar4 *in = (uchar4 *)p->in; + uint32_t x1 = xstart; + uint32_t x2 = xend; + + if(x2 > x1) { +#if defined(ARCH_ARM_HAVE_NEON) + int32_t len = (x2 - x1) >> 2; + if(len > 0) { + rsdIntrinsicColorMatrix4x4_K(out, in, cp->ip, len); + x1 += len << 2; + out += len << 2; + in += len << 2; + } +#endif + + while(x1 != x2) { + One(p, out++, in++, cp->fp); + x1++; + } + } +} + +void RsdCpuScriptIntrinsicColorMatrix::kernel3x3(const RsForEachStubParamStruct *p, + uint32_t xstart, uint32_t xend, + uint32_t instep, uint32_t outstep) { + RsdCpuScriptIntrinsicColorMatrix *cp = (RsdCpuScriptIntrinsicColorMatrix *)p->usr; + uchar4 *out = (uchar4 *)p->out; + uchar4 *in = (uchar4 *)p->in; + uint32_t x1 = xstart; + uint32_t x2 = xend; + + if(x2 > x1) { +#if defined(ARCH_ARM_HAVE_NEON) + int32_t len = (x2 - x1) >> 2; + if(len > 0) { + rsdIntrinsicColorMatrix3x3_K(out, in, cp->ip, len); + x1 += len << 2; + out += len << 2; + in += len << 2; + } +#endif + + while(x1 != x2) { + One(p, out++, in++, cp->fp); + x1++; + } + } +} + +void RsdCpuScriptIntrinsicColorMatrix::kernelDot(const RsForEachStubParamStruct *p, + uint32_t xstart, uint32_t xend, + uint32_t instep, uint32_t outstep) { + RsdCpuScriptIntrinsicColorMatrix *cp = (RsdCpuScriptIntrinsicColorMatrix *)p->usr; + uchar4 *out = (uchar4 *)p->out; + uchar4 *in = (uchar4 *)p->in; + uint32_t x1 = xstart; + uint32_t x2 = xend; + + if(x2 > x1) { +#if defined(ARCH_ARM_HAVE_NEON) + int32_t len = (x2 - x1) >> 2; + if(len > 0) { + rsdIntrinsicColorMatrixDot_K(out, in, cp->ip, len); + x1 += len << 2; + out += len << 2; + in += len << 2; + } +#endif + + while(x1 != x2) { + One(p, out++, in++, cp->fp); + x1++; + } + } +} + + +RsdCpuScriptIntrinsicColorMatrix::RsdCpuScriptIntrinsicColorMatrix( + RsdCpuReferenceImpl *ctx, const Script *s) + : RsdCpuScriptIntrinsic(ctx, s, RS_SCRIPT_INTRINSIC_ID_COLOR_MATRIX) { + + const static float defaultMatrix[] = { + 1.f, 0.f, 0.f, 0.f, + 0.f, 1.f, 0.f, 0.f, + 0.f, 0.f, 1.f, 0.f, + 0.f, 0.f, 0.f, 1.f + }; + setGlobalVar(0, defaultMatrix, sizeof(defaultMatrix)); +} + +RsdCpuScriptIntrinsicColorMatrix::~RsdCpuScriptIntrinsicColorMatrix() { +} + +void RsdCpuScriptIntrinsicColorMatrix::populateScript(Script *s) { + s->mHal.info.exportedVariableCount = 1; +} + +RsdCpuScriptImpl * rsdIntrinsic_ColorMatrix(RsdCpuReferenceImpl *ctx, const Script *s) { + + return new RsdCpuScriptIntrinsicColorMatrix(ctx, s); +} + + + diff --git a/driver/rsdIntrinsicConvolve3x3.cpp b/cpu_ref/rsCpuIntrinsicConvolve3x3.cpp index 55f4360e..18a53119 100644 --- a/driver/rsdIntrinsicConvolve3x3.cpp +++ b/cpu_ref/rsCpuIntrinsicConvolve3x3.cpp @@ -15,40 +15,57 @@ */ -#include "rsdCore.h" -#include "rsdIntrinsics.h" -#include "rsdAllocation.h" - -#include "rsdIntrinsicInlines.h" +#include "rsCpuIntrinsic.h" +#include "rsCpuIntrinsicInlines.h" using namespace android; using namespace android::renderscript; -struct ConvolveParams { +namespace android { +namespace renderscript { + + +class RsdCpuScriptIntrinsicConvolve3x3 : public RsdCpuScriptIntrinsic { +public: + virtual void populateScript(Script *); + virtual void invokeFreeChildren(); + + virtual void setGlobalVar(uint32_t slot, const void *data, size_t dataLength); + virtual void setGlobalObj(uint32_t slot, ObjectBase *data); + + virtual ~RsdCpuScriptIntrinsicConvolve3x3(); + RsdCpuScriptIntrinsicConvolve3x3(RsdCpuReferenceImpl *ctx, const Script *s); + +protected: float fp[16]; short ip[16]; ObjectBaseRef<Allocation> alloc; + + static void kernel(const RsForEachStubParamStruct *p, + uint32_t xstart, uint32_t xend, + uint32_t instep, uint32_t outstep); }; -static void Convolve3x3_Bind(const Context *dc, const Script *script, - void * intrinsicData, uint32_t slot, Allocation *data) { - ConvolveParams *cp = (ConvolveParams *)intrinsicData; - rsAssert(slot == 1); - cp->alloc.set(data); +} } -static void Convolve3x3_SetVar(const Context *dc, const Script *script, void * intrinsicData, - uint32_t slot, void *data, size_t dataLength) { - ConvolveParams *cp = (ConvolveParams *)intrinsicData; +void RsdCpuScriptIntrinsicConvolve3x3::setGlobalObj(uint32_t slot, ObjectBase *data) { + rsAssert(slot == 1); + alloc.set(static_cast<Allocation *>(data)); +} + +void RsdCpuScriptIntrinsicConvolve3x3::setGlobalVar(uint32_t slot, const void *data, + size_t dataLength) { rsAssert(slot == 0); - memcpy (cp->fp, data, dataLength); + memcpy (&fp, data, dataLength); for(int ct=0; ct < 9; ct++) { - cp->ip[ct] = (short)(cp->fp[ct] * 255.f + 0.5f); + ip[ct] = (short)(fp[ct] * 255.f + 0.5f); } } -extern "C" void rsdIntrinsicConvolve3x3_K(void *dst, const void *y0, const void *y1, const void *y2, const short *coef, uint32_t count); +extern "C" void rsdIntrinsicConvolve3x3_K(void *dst, const void *y0, const void *y1, + const void *y2, const short *coef, uint32_t count); static void ConvolveOne(const RsForEachStubParamStruct *p, uint32_t x, uchar4 *out, @@ -73,23 +90,23 @@ static void ConvolveOne(const RsForEachStubParamStruct *p, uint32_t x, uchar4 *o *out = o; } -static void Convolve3x3_uchar4(const RsForEachStubParamStruct *p, - uint32_t xstart, uint32_t xend, - uint32_t instep, uint32_t outstep) { - ConvolveParams *cp = (ConvolveParams *)p->usr; +void RsdCpuScriptIntrinsicConvolve3x3::kernel(const RsForEachStubParamStruct *p, + uint32_t xstart, uint32_t xend, + uint32_t instep, uint32_t outstep) { + RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)p->usr; if (!cp->alloc.get()) { ALOGE("Convolve3x3 executed without input, skipping"); return; } - DrvAllocation *din = (DrvAllocation *)cp->alloc->mHal.drv; - const uchar *pin = (const uchar *)din->lod[0].mallocPtr; + const uchar *pin = (const uchar *)cp->alloc->mHal.drvState.lod[0].mallocPtr; + const size_t stride = cp->alloc->mHal.drvState.lod[0].stride; uint32_t y1 = rsMin((int32_t)p->y + 1, (int32_t)(p->dimY-1)); uint32_t y2 = rsMax((int32_t)p->y - 1, 0); - const uchar4 *py0 = (const uchar4 *)(pin + din->lod[0].stride * y2); - const uchar4 *py1 = (const uchar4 *)(pin + din->lod[0].stride * p->y); - const uchar4 *py2 = (const uchar4 *)(pin + din->lod[0].stride * y1); + const uchar4 *py0 = (const uchar4 *)(pin + stride * y2); + const uchar4 *py1 = (const uchar4 *)(pin + stride * p->y); + const uchar4 *py2 = (const uchar4 *)(pin + stride * y1); uchar4 *out = (uchar4 *)p->out; uint32_t x1 = xstart; @@ -118,21 +135,32 @@ static void Convolve3x3_uchar4(const RsForEachStubParamStruct *p, } } -void * rsdIntrinsic_InitConvolve3x3(const android::renderscript::Context *dc, - android::renderscript::Script *script, - RsdIntriniscFuncs_t *funcs) { +RsdCpuScriptIntrinsicConvolve3x3::RsdCpuScriptIntrinsicConvolve3x3( + RsdCpuReferenceImpl *ctx, const Script *s) + : RsdCpuScriptIntrinsic(ctx, s, RS_SCRIPT_INTRINSIC_ID_CONVOLVE_3x3) { - script->mHal.info.exportedVariableCount = 2; - funcs->setVarObj = Convolve3x3_Bind; - funcs->setVar = Convolve3x3_SetVar; - funcs->root = Convolve3x3_uchar4; - - ConvolveParams *cp = (ConvolveParams *)calloc(1, sizeof(ConvolveParams)); + mRootPtr = &kernel; for(int ct=0; ct < 9; ct++) { - cp->fp[ct] = 1.f / 9.f; - cp->ip[ct] = (short)(cp->fp[ct] * 255.f + 0.5f); + fp[ct] = 1.f / 9.f; + ip[ct] = (short)(fp[ct] * 255.f + 0.5f); } - return cp; +} + +RsdCpuScriptIntrinsicConvolve3x3::~RsdCpuScriptIntrinsicConvolve3x3() { +} + +void RsdCpuScriptIntrinsicConvolve3x3::populateScript(Script *s) { + s->mHal.info.exportedVariableCount = 2; +} + +void RsdCpuScriptIntrinsicConvolve3x3::invokeFreeChildren() { + alloc.clear(); +} + + +RsdCpuScriptImpl * rsdIntrinsic_Convolve3x3(RsdCpuReferenceImpl *ctx, const Script *s) { + + return new RsdCpuScriptIntrinsicConvolve3x3(ctx, s); } diff --git a/driver/rsdIntrinsicConvolve5x5.cpp b/cpu_ref/rsCpuIntrinsicConvolve5x5.cpp index fc6b029e..2cae2c04 100644 --- a/driver/rsdIntrinsicConvolve5x5.cpp +++ b/cpu_ref/rsCpuIntrinsicConvolve5x5.cpp @@ -15,36 +15,54 @@ */ -#include "rsdCore.h" -#include "rsdIntrinsics.h" -#include "rsdAllocation.h" - -#include "rsdIntrinsicInlines.h" +#include "rsCpuIntrinsic.h" +#include "rsCpuIntrinsicInlines.h" using namespace android; using namespace android::renderscript; -struct ConvolveParams { +namespace android { +namespace renderscript { + + +class RsdCpuScriptIntrinsicConvolve5x5 : public RsdCpuScriptIntrinsic { +public: + virtual void populateScript(Script *); + virtual void invokeFreeChildren(); + + virtual void setGlobalVar(uint32_t slot, const void *data, size_t dataLength); + virtual void setGlobalObj(uint32_t slot, ObjectBase *data); + + virtual ~RsdCpuScriptIntrinsicConvolve5x5(); + RsdCpuScriptIntrinsicConvolve5x5(RsdCpuReferenceImpl *ctx, const Script *s); + +protected: float fp[28]; short ip[28]; ObjectBaseRef<Allocation> alloc; + + + static void kernel(const RsForEachStubParamStruct *p, + uint32_t xstart, uint32_t xend, + uint32_t instep, uint32_t outstep); + + }; -static void Convolve5x5_Bind(const Context *dc, const Script *script, - void * intrinsicData, uint32_t slot, Allocation *data) { - ConvolveParams *cp = (ConvolveParams *)intrinsicData; - rsAssert(slot == 1); - cp->alloc.set(data); +} } -static void Convolve5x5_SetVar(const Context *dc, const Script *script, void * intrinsicData, - uint32_t slot, void *data, size_t dataLength) { - ConvolveParams *cp = (ConvolveParams *)intrinsicData; +void RsdCpuScriptIntrinsicConvolve5x5::setGlobalObj(uint32_t slot, ObjectBase *data) { + rsAssert(slot == 1); + alloc.set(static_cast<Allocation *>(data)); +} +void RsdCpuScriptIntrinsicConvolve5x5::setGlobalVar(uint32_t slot, + const void *data, size_t dataLength) { rsAssert(slot == 0); - memcpy (cp->fp, data, dataLength); + memcpy (&fp, data, dataLength); for(int ct=0; ct < 25; ct++) { - cp->ip[ct] = (short)(cp->fp[ct] * 255.f + 0.5f); + ip[ct] = (short)(fp[ct] * 255.f + 0.5f); } } @@ -98,16 +116,16 @@ extern "C" void rsdIntrinsicConvolve5x5_K(void *dst, const void *y0, const void const void *y2, const void *y3, const void *y4, const short *coef, uint32_t count); -static void Convolve5x5_uchar4(const RsForEachStubParamStruct *p, - uint32_t xstart, uint32_t xend, - uint32_t instep, uint32_t outstep) { - ConvolveParams *cp = (ConvolveParams *)p->usr; +void RsdCpuScriptIntrinsicConvolve5x5::kernel(const RsForEachStubParamStruct *p, + uint32_t xstart, uint32_t xend, + uint32_t instep, uint32_t outstep) { + RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)p->usr; if (!cp->alloc.get()) { ALOGE("Convolve5x5 executed without input, skipping"); return; } - DrvAllocation *din = (DrvAllocation *)cp->alloc->mHal.drv; - const uchar *pin = (const uchar *)din->lod[0].mallocPtr; + const uchar *pin = (const uchar *)cp->alloc->mHal.drvState.lod[0].mallocPtr; + const size_t stride = cp->alloc->mHal.drvState.lod[0].stride; uint32_t y0 = rsMax((int32_t)p->y-2, 0); uint32_t y1 = rsMax((int32_t)p->y-1, 0); @@ -115,11 +133,11 @@ static void Convolve5x5_uchar4(const RsForEachStubParamStruct *p, uint32_t y3 = rsMin((int32_t)p->y+1, (int32_t)(p->dimY-1)); uint32_t y4 = rsMin((int32_t)p->y+2, (int32_t)(p->dimY-1)); - const uchar4 *py0 = (const uchar4 *)(pin + din->lod[0].stride * y0); - const uchar4 *py1 = (const uchar4 *)(pin + din->lod[0].stride * y1); - const uchar4 *py2 = (const uchar4 *)(pin + din->lod[0].stride * y2); - const uchar4 *py3 = (const uchar4 *)(pin + din->lod[0].stride * y3); - const uchar4 *py4 = (const uchar4 *)(pin + din->lod[0].stride * y4); + const uchar4 *py0 = (const uchar4 *)(pin + stride * y0); + const uchar4 *py1 = (const uchar4 *)(pin + stride * y1); + const uchar4 *py2 = (const uchar4 *)(pin + stride * y2); + const uchar4 *py3 = (const uchar4 *)(pin + stride * y3); + const uchar4 *py4 = (const uchar4 *)(pin + stride * y4); uchar4 *out = (uchar4 *)p->out; uint32_t x1 = xstart; @@ -147,21 +165,34 @@ static void Convolve5x5_uchar4(const RsForEachStubParamStruct *p, } } -void * rsdIntrinsic_InitConvolve5x5(const android::renderscript::Context *dc, - android::renderscript::Script *script, - RsdIntriniscFuncs_t *funcs) { - script->mHal.info.exportedVariableCount = 2; - funcs->setVarObj = Convolve5x5_Bind; - funcs->setVar = Convolve5x5_SetVar; - funcs->root = Convolve5x5_uchar4; +RsdCpuScriptIntrinsicConvolve5x5::RsdCpuScriptIntrinsicConvolve5x5( + RsdCpuReferenceImpl *ctx, const Script *s) + : RsdCpuScriptIntrinsic(ctx, s, RS_SCRIPT_INTRINSIC_ID_CONVOLVE_5x5) { - ConvolveParams *cp = (ConvolveParams *)calloc(1, sizeof(ConvolveParams)); - for(int ct=0; ct < 25; ct++) { - cp->fp[ct] = 1.f / 25.f; - cp->ip[ct] = (short)(cp->fp[ct] * 255.f + 0.5f); + mRootPtr = &kernel; + for(int ct=0; ct < 9; ct++) { + fp[ct] = 1.f / 25.f; + ip[ct] = (short)(fp[ct] * 255.f + 0.5f); } - return cp; } +RsdCpuScriptIntrinsicConvolve5x5::~RsdCpuScriptIntrinsicConvolve5x5() { +} + +void RsdCpuScriptIntrinsicConvolve5x5::populateScript(Script *s) { + s->mHal.info.exportedVariableCount = 2; +} + +void RsdCpuScriptIntrinsicConvolve5x5::invokeFreeChildren() { + alloc.clear(); +} + + +RsdCpuScriptImpl * rsdIntrinsic_Convolve5x5(RsdCpuReferenceImpl *ctx, const Script *s) { + + return new RsdCpuScriptIntrinsicConvolve5x5(ctx, s); +} + + diff --git a/driver/rsdIntrinsicInlines.h b/cpu_ref/rsCpuIntrinsicInlines.h index ab11b4f5..ab11b4f5 100644 --- a/driver/rsdIntrinsicInlines.h +++ b/cpu_ref/rsCpuIntrinsicInlines.h diff --git a/cpu_ref/rsCpuIntrinsicLUT.cpp b/cpu_ref/rsCpuIntrinsicLUT.cpp new file mode 100644 index 00000000..188ed2b6 --- /dev/null +++ b/cpu_ref/rsCpuIntrinsicLUT.cpp @@ -0,0 +1,104 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#include "rsCpuIntrinsic.h" +#include "rsCpuIntrinsicInlines.h" + +using namespace android; +using namespace android::renderscript; + +namespace android { +namespace renderscript { + + +class RsdCpuScriptIntrinsicLUT : public RsdCpuScriptIntrinsic { +public: + virtual void populateScript(Script *); + virtual void invokeFreeChildren(); + + virtual void setGlobalObj(uint32_t slot, ObjectBase *data); + + virtual ~RsdCpuScriptIntrinsicLUT(); + RsdCpuScriptIntrinsicLUT(RsdCpuReferenceImpl *ctx, const Script *s); + +protected: + ObjectBaseRef<Allocation> lut; + + static void kernel(const RsForEachStubParamStruct *p, + uint32_t xstart, uint32_t xend, + uint32_t instep, uint32_t outstep); +}; + +} +} + + +void RsdCpuScriptIntrinsicLUT::setGlobalObj(uint32_t slot, ObjectBase *data) { + rsAssert(slot == 0); + lut.set(static_cast<Allocation *>(data)); +} + + +void RsdCpuScriptIntrinsicLUT::kernel(const RsForEachStubParamStruct *p, + uint32_t xstart, uint32_t xend, + uint32_t instep, uint32_t outstep) { + RsdCpuScriptIntrinsicLUT *cp = (RsdCpuScriptIntrinsicLUT *)p->usr; + + uchar4 *out = (uchar4 *)p->out; + uchar4 *in = (uchar4 *)p->in; + uint32_t x1 = xstart; + uint32_t x2 = xend; + + const uchar *tr = (const uchar *)cp->lut->mHal.drvState.lod[0].mallocPtr; + const uchar *tg = &tr[256]; + const uchar *tb = &tg[256]; + const uchar *ta = &tb[256]; + + while (x1 < x2) { + uchar4 p = *in; + uchar4 o = {tr[p.x], tg[p.y], tb[p.z], ta[p.w]}; + *out = o; + in++; + out++; + x1++; + } +} + +RsdCpuScriptIntrinsicLUT::RsdCpuScriptIntrinsicLUT(RsdCpuReferenceImpl *ctx, const Script *s) + : RsdCpuScriptIntrinsic(ctx, s, RS_SCRIPT_INTRINSIC_ID_LUT) { + + mRootPtr = &kernel; +} + +RsdCpuScriptIntrinsicLUT::~RsdCpuScriptIntrinsicLUT() { +} + +void RsdCpuScriptIntrinsicLUT::populateScript(Script *s) { + s->mHal.info.exportedVariableCount = 1; +} + +void RsdCpuScriptIntrinsicLUT::invokeFreeChildren() { + lut.clear(); +} + + +RsdCpuScriptImpl * rsdIntrinsic_LUT(RsdCpuReferenceImpl *ctx, const Script *s) { + + return new RsdCpuScriptIntrinsicLUT(ctx, s); +} + + diff --git a/driver/rsdIntrinsicYuvToRGB.cpp b/cpu_ref/rsCpuIntrinsicYuvToRGB.cpp index b3fb0596..7b8f7688 100644 --- a/driver/rsdIntrinsicYuvToRGB.cpp +++ b/cpu_ref/rsCpuIntrinsicYuvToRGB.cpp @@ -15,28 +15,46 @@ */ -#include "rsdCore.h" -#include "rsdIntrinsics.h" -#include "rsdAllocation.h" - -#include "rsdIntrinsicInlines.h" +#include "rsCpuIntrinsic.h" +#include "rsCpuIntrinsicInlines.h" using namespace android; using namespace android::renderscript; -struct YuvParams { +namespace android { +namespace renderscript { + + +class RsdCpuScriptIntrinsicYuvToRGB : public RsdCpuScriptIntrinsic { +public: + virtual void populateScript(Script *); + virtual void invokeFreeChildren(); + + virtual void setGlobalObj(uint32_t slot, ObjectBase *data); + + virtual ~RsdCpuScriptIntrinsicYuvToRGB(); + RsdCpuScriptIntrinsicYuvToRGB(RsdCpuReferenceImpl *ctx, const Script *s); + +protected: ObjectBaseRef<Allocation> alloc; + + static void kernel(const RsForEachStubParamStruct *p, + uint32_t xstart, uint32_t xend, + uint32_t instep, uint32_t outstep); }; -static void YuvToRGB_Bind(const Context *dc, const Script *script, - void * intrinsicData, uint32_t slot, Allocation *data) { - YuvParams *cp = (YuvParams *)intrinsicData; +} +} + + +void RsdCpuScriptIntrinsicYuvToRGB::setGlobalObj(uint32_t slot, ObjectBase *data) { rsAssert(slot == 0); - cp->alloc.set(data); + alloc.set(static_cast<Allocation *>(data)); } + static uchar4 rsYuvToRGBA_uchar4(uchar y, uchar u, uchar v) { short Y = ((short)y) - 16; short U = ((short)u) - 128; @@ -82,16 +100,16 @@ static short YuvCoeff[] = { extern "C" void rsdIntrinsicYuv_K(void *dst, const uchar *Y, const uchar *uv, uint32_t count, const short *param); -static void YuvToRGB_uchar4(const RsForEachStubParamStruct *p, - uint32_t xstart, uint32_t xend, - uint32_t instep, uint32_t outstep) { - YuvParams *cp = (YuvParams *)p->usr; +void RsdCpuScriptIntrinsicYuvToRGB::kernel(const RsForEachStubParamStruct *p, + uint32_t xstart, uint32_t xend, + uint32_t instep, uint32_t outstep) { + RsdCpuScriptIntrinsicYuvToRGB *cp = (RsdCpuScriptIntrinsicYuvToRGB *)p->usr; if (!cp->alloc.get()) { ALOGE("YuvToRGB executed without input, skipping"); return; } - DrvAllocation *din = (DrvAllocation *)cp->alloc->mHal.drv; - const uchar *pin = (const uchar *)din->lod[0].mallocPtr; + const uchar *pin = (const uchar *)cp->alloc->mHal.drvState.lod[0].mallocPtr; + const size_t stride = cp->alloc->mHal.drvState.lod[0].stride; const uchar *Y = pin + (p->y * p->dimX); const uchar *uv = pin + (p->dimX * p->dimY); @@ -125,15 +143,27 @@ static void YuvToRGB_uchar4(const RsForEachStubParamStruct *p, } } -void * rsdIntrinsic_InitYuvToRGB(const android::renderscript::Context *dc, - android::renderscript::Script *script, - RsdIntriniscFuncs_t *funcs) { +RsdCpuScriptIntrinsicYuvToRGB::RsdCpuScriptIntrinsicYuvToRGB( + RsdCpuReferenceImpl *ctx, const Script *s) + : RsdCpuScriptIntrinsic(ctx, s, RS_SCRIPT_INTRINSIC_ID_YUV_TO_RGB) { + + mRootPtr = &kernel; +} + +RsdCpuScriptIntrinsicYuvToRGB::~RsdCpuScriptIntrinsicYuvToRGB() { +} + +void RsdCpuScriptIntrinsicYuvToRGB::populateScript(Script *s) { + s->mHal.info.exportedVariableCount = 1; +} + +void RsdCpuScriptIntrinsicYuvToRGB::invokeFreeChildren() { + alloc.clear(); +} + - script->mHal.info.exportedVariableCount = 1; - funcs->setVarObj = YuvToRGB_Bind; - funcs->root = YuvToRGB_uchar4; - YuvParams *cp = (YuvParams *)calloc(1, sizeof(YuvParams)); - return cp; +RsdCpuScriptImpl * rsdIntrinsic_YuvToRGB(RsdCpuReferenceImpl *ctx, const Script *s) { + return new RsdCpuScriptIntrinsicYuvToRGB(ctx, s); } diff --git a/driver/rsdIntrinsics_Convolve.S b/cpu_ref/rsCpuIntrinsics_neon.S index 04dd8b1c..04dd8b1c 100644 --- a/driver/rsdIntrinsics_Convolve.S +++ b/cpu_ref/rsCpuIntrinsics_neon.S diff --git a/driver/rsdRuntimeMath.cpp b/cpu_ref/rsCpuRuntimeMath.cpp index ba372430..cf2c8a41 100644 --- a/driver/rsdRuntimeMath.cpp +++ b/cpu_ref/rsCpuRuntimeMath.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2011 The Android Open Source Project + * Copyright (C) 2011-2012 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,8 +22,8 @@ #include "rsMatrix3x3.h" #include "rsMatrix2x2.h" -#include "rsdCore.h" -#include "rsdRuntime.h" +#include "rsCpuCore.h" +#include "rsCpuScript.h" using namespace android; @@ -375,7 +375,7 @@ static int32_t SC_AtomicMax(volatile int32_t *ptr, int32_t value) { // ::= f # float // ::= d # double -static RsdSymbolTable gSyms[] = { +static RsdCpuReference::CpuSymbol gSyms[] = { { "_Z4acosf", (void *)&acosf, true }, { "_Z5acoshf", (void *)&acoshf, true }, { "_Z4asinf", (void *)&asinf, true }, @@ -532,11 +532,11 @@ static RsdSymbolTable gSyms[] = { { NULL, NULL, false } }; -const RsdSymbolTable * rsdLookupSymbolMath(const char *sym) { - const RsdSymbolTable *syms = gSyms; +const RsdCpuReference::CpuSymbol * RsdCpuScriptImpl::lookupSymbolMath(const char *sym) { + const RsdCpuReference::CpuSymbol *syms = gSyms; - while (syms->mPtr) { - if (!strcmp(syms->mName, sym)) { + while (syms->fnPtr) { + if (!strcmp(syms->name, sym)) { return syms; } syms++; diff --git a/cpu_ref/rsCpuRuntimeStubs.cpp b/cpu_ref/rsCpuRuntimeStubs.cpp new file mode 100644 index 00000000..b87a6398 --- /dev/null +++ b/cpu_ref/rsCpuRuntimeStubs.cpp @@ -0,0 +1,335 @@ +/* + * Copyright (C) 2011-2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "rsContext.h" +#include "rsScriptC.h" +#include "rsMatrix4x4.h" +#include "rsMatrix3x3.h" +#include "rsMatrix2x2.h" +#include "rsRuntime.h" + +#include "utils/Timers.h" +#include "rsCpuCore.h" +#include "rsCpuScript.h" + +#include <time.h> + +using namespace android; +using namespace android::renderscript; + +typedef float float2 __attribute__((ext_vector_type(2))); +typedef float float3 __attribute__((ext_vector_type(3))); +typedef float float4 __attribute__((ext_vector_type(4))); +typedef char char2 __attribute__((ext_vector_type(2))); +typedef char char3 __attribute__((ext_vector_type(3))); +typedef char char4 __attribute__((ext_vector_type(4))); +typedef unsigned char uchar2 __attribute__((ext_vector_type(2))); +typedef unsigned char uchar3 __attribute__((ext_vector_type(3))); +typedef unsigned char uchar4 __attribute__((ext_vector_type(4))); +typedef short short2 __attribute__((ext_vector_type(2))); +typedef short short3 __attribute__((ext_vector_type(3))); +typedef short short4 __attribute__((ext_vector_type(4))); +typedef unsigned short ushort2 __attribute__((ext_vector_type(2))); +typedef unsigned short ushort3 __attribute__((ext_vector_type(3))); +typedef unsigned short ushort4 __attribute__((ext_vector_type(4))); +typedef int32_t int2 __attribute__((ext_vector_type(2))); +typedef int32_t int3 __attribute__((ext_vector_type(3))); +typedef int32_t int4 __attribute__((ext_vector_type(4))); +typedef uint32_t uint2 __attribute__((ext_vector_type(2))); +typedef uint32_t uint3 __attribute__((ext_vector_type(3))); +typedef uint32_t uint4 __attribute__((ext_vector_type(4))); +typedef long long long2 __attribute__((ext_vector_type(2))); +typedef long long long3 __attribute__((ext_vector_type(3))); +typedef long long long4 __attribute__((ext_vector_type(4))); +typedef unsigned long long ulong2 __attribute__((ext_vector_type(2))); +typedef unsigned long long ulong3 __attribute__((ext_vector_type(3))); +typedef unsigned long long ulong4 __attribute__((ext_vector_type(4))); + + +////////////////////////////////////////////////////////////////////////////// +// Message routines +////////////////////////////////////////////////////////////////////////////// + + +int SC_divsi3(int a, int b) { + return a / b; +} + +int SC_modsi3(int a, int b) { + return a % b; +} + +unsigned int SC_udivsi3(unsigned int a, unsigned int b) { + return a / b; +} + +unsigned int SC_umodsi3(unsigned int a, unsigned int b) { + return a % b; +} + +static void SC_debugF(const char *s, float f) { + ALOGD("%s %f, 0x%08x", s, f, *((int *) (&f))); +} +static void SC_debugFv2(const char *s, float f1, float f2) { + ALOGD("%s {%f, %f}", s, f1, f2); +} +static void SC_debugFv3(const char *s, float f1, float f2, float f3) { + ALOGD("%s {%f, %f, %f}", s, f1, f2, f3); +} +static void SC_debugFv4(const char *s, float f1, float f2, float f3, float f4) { + ALOGD("%s {%f, %f, %f, %f}", s, f1, f2, f3, f4); +} +static void SC_debugF2(const char *s, float2 f) { + ALOGD("%s {%f, %f}", s, f.x, f.y); +} +static void SC_debugF3(const char *s, float3 f) { + ALOGD("%s {%f, %f, %f}", s, f.x, f.y, f.z); +} +static void SC_debugF4(const char *s, float4 f) { + ALOGD("%s {%f, %f, %f, %f}", s, f.x, f.y, f.z, f.w); +} +static void SC_debugD(const char *s, double d) { + ALOGD("%s %f, 0x%08llx", s, d, *((long long *) (&d))); +} +static void SC_debugFM4v4(const char *s, const float *f) { + ALOGD("%s {%f, %f, %f, %f", s, f[0], f[4], f[8], f[12]); + ALOGD("%s %f, %f, %f, %f", s, f[1], f[5], f[9], f[13]); + ALOGD("%s %f, %f, %f, %f", s, f[2], f[6], f[10], f[14]); + ALOGD("%s %f, %f, %f, %f}", s, f[3], f[7], f[11], f[15]); +} +static void SC_debugFM3v3(const char *s, const float *f) { + ALOGD("%s {%f, %f, %f", s, f[0], f[3], f[6]); + ALOGD("%s %f, %f, %f", s, f[1], f[4], f[7]); + ALOGD("%s %f, %f, %f}",s, f[2], f[5], f[8]); +} +static void SC_debugFM2v2(const char *s, const float *f) { + ALOGD("%s {%f, %f", s, f[0], f[2]); + ALOGD("%s %f, %f}",s, f[1], f[3]); +} +static void SC_debugI8(const char *s, char c) { + ALOGD("%s %hhd 0x%hhx", s, c, (unsigned char)c); +} +static void SC_debugC2(const char *s, char2 c) { + ALOGD("%s {%hhd, %hhd} 0x%hhx 0x%hhx", s, c.x, c.y, (unsigned char)c.x, (unsigned char)c.y); +} +static void SC_debugC3(const char *s, char3 c) { + ALOGD("%s {%hhd, %hhd, %hhd} 0x%hhx 0x%hhx 0x%hhx", s, c.x, c.y, c.z, (unsigned char)c.x, (unsigned char)c.y, (unsigned char)c.z); +} +static void SC_debugC4(const char *s, char4 c) { + ALOGD("%s {%hhd, %hhd, %hhd, %hhd} 0x%hhx 0x%hhx 0x%hhx 0x%hhx", s, c.x, c.y, c.z, c.w, (unsigned char)c.x, (unsigned char)c.y, (unsigned char)c.z, (unsigned char)c.w); +} +static void SC_debugU8(const char *s, unsigned char c) { + ALOGD("%s %hhu 0x%hhx", s, c, c); +} +static void SC_debugUC2(const char *s, uchar2 c) { + ALOGD("%s {%hhu, %hhu} 0x%hhx 0x%hhx", s, c.x, c.y, c.x, c.y); +} +static void SC_debugUC3(const char *s, uchar3 c) { + ALOGD("%s {%hhu, %hhu, %hhu} 0x%hhx 0x%hhx 0x%hhx", s, c.x, c.y, c.z, c.x, c.y, c.z); +} +static void SC_debugUC4(const char *s, uchar4 c) { + ALOGD("%s {%hhu, %hhu, %hhu, %hhu} 0x%hhx 0x%hhx 0x%hhx 0x%hhx", s, c.x, c.y, c.z, c.w, c.x, c.y, c.z, c.w); +} +static void SC_debugI16(const char *s, short c) { + ALOGD("%s %hd 0x%hx", s, c, c); +} +static void SC_debugS2(const char *s, short2 c) { + ALOGD("%s {%hd, %hd} 0x%hx 0x%hx", s, c.x, c.y, c.x, c.y); +} +static void SC_debugS3(const char *s, short3 c) { + ALOGD("%s {%hd, %hd, %hd} 0x%hx 0x%hx 0x%hx", s, c.x, c.y, c.z, c.x, c.y, c.z); +} +static void SC_debugS4(const char *s, short4 c) { + ALOGD("%s {%hd, %hd, %hd, %hd} 0x%hx 0x%hx 0x%hx 0x%hx", s, c.x, c.y, c.z, c.w, c.x, c.y, c.z, c.w); +} +static void SC_debugU16(const char *s, unsigned short c) { + ALOGD("%s %hu 0x%hx", s, c, c); +} +static void SC_debugUS2(const char *s, ushort2 c) { + ALOGD("%s {%hu, %hu} 0x%hx 0x%hx", s, c.x, c.y, c.x, c.y); +} +static void SC_debugUS3(const char *s, ushort3 c) { + ALOGD("%s {%hu, %hu, %hu} 0x%hx 0x%hx 0x%hx", s, c.x, c.y, c.z, c.x, c.y, c.z); +} +static void SC_debugUS4(const char *s, ushort4 c) { + ALOGD("%s {%hu, %hu, %hu, %hu} 0x%hx 0x%hx 0x%hx 0x%hx", s, c.x, c.y, c.z, c.w, c.x, c.y, c.z, c.w); +} +static void SC_debugI32(const char *s, int32_t i) { + ALOGD("%s %d 0x%x", s, i, i); +} +static void SC_debugI2(const char *s, int2 i) { + ALOGD("%s {%d, %d} 0x%x 0x%x", s, i.x, i.y, i.x, i.y); +} +static void SC_debugI3(const char *s, int3 i) { + ALOGD("%s {%d, %d, %d} 0x%x 0x%x 0x%x", s, i.x, i.y, i.z, i.x, i.y, i.z); +} +static void SC_debugI4(const char *s, int4 i) { + ALOGD("%s {%d, %d, %d, %d} 0x%x 0x%x 0x%x 0x%x", s, i.x, i.y, i.z, i.w, i.x, i.y, i.z, i.w); +} +static void SC_debugU32(const char *s, uint32_t i) { + ALOGD("%s %u 0x%x", s, i, i); +} +static void SC_debugUI2(const char *s, uint2 i) { + ALOGD("%s {%u, %u} 0x%x 0x%x", s, i.x, i.y, i.x, i.y); +} +static void SC_debugUI3(const char *s, uint3 i) { + ALOGD("%s {%u, %u, %u} 0x%x 0x%x 0x%x", s, i.x, i.y, i.z, i.x, i.y, i.z); +} +static void SC_debugUI4(const char *s, uint4 i) { + ALOGD("%s {%u, %u, %u, %u} 0x%x 0x%x 0x%x 0x%x", s, i.x, i.y, i.z, i.w, i.x, i.y, i.z, i.w); +} +static void SC_debugLL64(const char *s, long long ll) { + ALOGD("%s %lld 0x%llx", s, ll, ll); +} +static void SC_debugL2(const char *s, long2 ll) { + ALOGD("%s {%lld, %lld} 0x%llx 0x%llx", s, ll.x, ll.y, ll.x, ll.y); +} +static void SC_debugL3(const char *s, long3 ll) { + ALOGD("%s {%lld, %lld, %lld} 0x%llx 0x%llx 0x%llx", s, ll.x, ll.y, ll.z, ll.x, ll.y, ll.z); +} +static void SC_debugL4(const char *s, long4 ll) { + ALOGD("%s {%lld, %lld, %lld, %lld} 0x%llx 0x%llx 0x%llx 0x%llx", s, ll.x, ll.y, ll.z, ll.w, ll.x, ll.y, ll.z, ll.w); +} +static void SC_debugULL64(const char *s, unsigned long long ll) { + ALOGD("%s %llu 0x%llx", s, ll, ll); +} +static void SC_debugUL2(const char *s, ulong2 ll) { + ALOGD("%s {%llu, %llu} 0x%llx 0x%llx", s, ll.x, ll.y, ll.x, ll.y); +} +static void SC_debugUL3(const char *s, ulong3 ll) { + ALOGD("%s {%llu, %llu, %llu} 0x%llx 0x%llx 0x%llx", s, ll.x, ll.y, ll.z, ll.x, ll.y, ll.z); +} +static void SC_debugUL4(const char *s, ulong4 ll) { + ALOGD("%s {%llu, %llu, %llu, %llu} 0x%llx 0x%llx 0x%llx 0x%llx", s, ll.x, ll.y, ll.z, ll.w, ll.x, ll.y, ll.z, ll.w); +} +static void SC_debugP(const char *s, const void *p) { + ALOGD("%s %p", s, p); +} + + +////////////////////////////////////////////////////////////////////////////// +// Stub implementation +////////////////////////////////////////////////////////////////////////////// + +// llvm name mangling ref +// <builtin-type> ::= v # void +// ::= b # bool +// ::= c # char +// ::= a # signed char +// ::= h # unsigned char +// ::= s # short +// ::= t # unsigned short +// ::= i # int +// ::= j # unsigned int +// ::= l # long +// ::= m # unsigned long +// ::= x # long long, __int64 +// ::= y # unsigned long long, __int64 +// ::= f # float +// ::= d # double + +static RsdCpuReference::CpuSymbol gSyms[] = { + { "memset", (void *)&memset, true }, + { "memcpy", (void *)&memcpy, true }, + + // Debug + { "_Z7rsDebugPKcf", (void *)&SC_debugF, true }, + { "_Z7rsDebugPKcff", (void *)&SC_debugFv2, true }, + { "_Z7rsDebugPKcfff", (void *)&SC_debugFv3, true }, + { "_Z7rsDebugPKcffff", (void *)&SC_debugFv4, true }, + { "_Z7rsDebugPKcDv2_f", (void *)&SC_debugF2, true }, + { "_Z7rsDebugPKcDv3_f", (void *)&SC_debugF3, true }, + { "_Z7rsDebugPKcDv4_f", (void *)&SC_debugF4, true }, + { "_Z7rsDebugPKcd", (void *)&SC_debugD, true }, + { "_Z7rsDebugPKcPK12rs_matrix4x4", (void *)&SC_debugFM4v4, true }, + { "_Z7rsDebugPKcPK12rs_matrix3x3", (void *)&SC_debugFM3v3, true }, + { "_Z7rsDebugPKcPK12rs_matrix2x2", (void *)&SC_debugFM2v2, true }, + { "_Z7rsDebugPKcc", (void *)&SC_debugI8, true }, + { "_Z7rsDebugPKcDv2_c", (void *)&SC_debugC2, true }, + { "_Z7rsDebugPKcDv3_c", (void *)&SC_debugC3, true }, + { "_Z7rsDebugPKcDv4_c", (void *)&SC_debugC4, true }, + { "_Z7rsDebugPKch", (void *)&SC_debugU8, true }, + { "_Z7rsDebugPKcDv2_h", (void *)&SC_debugUC2, true }, + { "_Z7rsDebugPKcDv3_h", (void *)&SC_debugUC3, true }, + { "_Z7rsDebugPKcDv4_h", (void *)&SC_debugUC4, true }, + { "_Z7rsDebugPKcs", (void *)&SC_debugI16, true }, + { "_Z7rsDebugPKcDv2_s", (void *)&SC_debugS2, true }, + { "_Z7rsDebugPKcDv3_s", (void *)&SC_debugS3, true }, + { "_Z7rsDebugPKcDv4_s", (void *)&SC_debugS4, true }, + { "_Z7rsDebugPKct", (void *)&SC_debugU16, true }, + { "_Z7rsDebugPKcDv2_t", (void *)&SC_debugUS2, true }, + { "_Z7rsDebugPKcDv3_t", (void *)&SC_debugUS3, true }, + { "_Z7rsDebugPKcDv4_t", (void *)&SC_debugUS4, true }, + { "_Z7rsDebugPKci", (void *)&SC_debugI32, true }, + { "_Z7rsDebugPKcDv2_i", (void *)&SC_debugI2, true }, + { "_Z7rsDebugPKcDv3_i", (void *)&SC_debugI3, true }, + { "_Z7rsDebugPKcDv4_i", (void *)&SC_debugI4, true }, + { "_Z7rsDebugPKcj", (void *)&SC_debugU32, true }, + { "_Z7rsDebugPKcDv2_j", (void *)&SC_debugUI2, true }, + { "_Z7rsDebugPKcDv3_j", (void *)&SC_debugUI3, true }, + { "_Z7rsDebugPKcDv4_j", (void *)&SC_debugUI4, true }, + // Both "long" and "unsigned long" need to be redirected to their + // 64-bit counterparts, since we have hacked Slang to use 64-bit + // for "long" on Arm (to be similar to Java). + { "_Z7rsDebugPKcl", (void *)&SC_debugLL64, true }, + { "_Z7rsDebugPKcDv2_l", (void *)&SC_debugL2, true }, + { "_Z7rsDebugPKcDv3_l", (void *)&SC_debugL3, true }, + { "_Z7rsDebugPKcDv4_l", (void *)&SC_debugL4, true }, + { "_Z7rsDebugPKcm", (void *)&SC_debugULL64, true }, + { "_Z7rsDebugPKcDv2_m", (void *)&SC_debugUL2, true }, + { "_Z7rsDebugPKcDv3_m", (void *)&SC_debugUL3, true }, + { "_Z7rsDebugPKcDv4_m", (void *)&SC_debugUL4, true }, + { "_Z7rsDebugPKcx", (void *)&SC_debugLL64, true }, + { "_Z7rsDebugPKcDv2_x", (void *)&SC_debugL2, true }, + { "_Z7rsDebugPKcDv3_x", (void *)&SC_debugL3, true }, + { "_Z7rsDebugPKcDv4_x", (void *)&SC_debugL4, true }, + { "_Z7rsDebugPKcy", (void *)&SC_debugULL64, true }, + { "_Z7rsDebugPKcDv2_y", (void *)&SC_debugUL2, true }, + { "_Z7rsDebugPKcDv3_y", (void *)&SC_debugUL3, true }, + { "_Z7rsDebugPKcDv4_y", (void *)&SC_debugUL4, true }, + { "_Z7rsDebugPKcPKv", (void *)&SC_debugP, true }, + + { NULL, NULL, false } +}; + + +void * RsdCpuScriptImpl::lookupRuntimeStub(void* pContext, char const* name) { + RsdCpuScriptImpl *s = (RsdCpuScriptImpl *)pContext; + const RsdCpuReference::CpuSymbol *syms = gSyms; + const RsdCpuReference::CpuSymbol *sym = NULL; + + sym = s->mCtx->symLookup(name); + if (!sym) { + sym = s->lookupSymbolMath(name); + } + if (!sym) { + while (syms->fnPtr) { + if (!strcmp(syms->name, name)) { + sym = syms; + } + syms++; + } + } + + if (sym) { + s->mIsThreadable &= sym->threadable; + return sym->fnPtr; + } + ALOGE("ScriptC sym lookup failed for %s", name); + return NULL; +} + + diff --git a/cpu_ref/rsCpuScript.cpp b/cpu_ref/rsCpuScript.cpp new file mode 100644 index 00000000..06ce4bb7 --- /dev/null +++ b/cpu_ref/rsCpuScript.cpp @@ -0,0 +1,463 @@ +/* + * Copyright (C) 2011-2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + + +#include "rsCpuCore.h" + +#include "rsCpuScript.h" +//#include "rsdRuntime.h" +//#include "rsdAllocation.h" +//#include "rsCpuIntrinsics.h" + + +#include "utils/Vector.h" +#include "utils/Timers.h" +#include "utils/StopWatch.h" + + +#include <bcc/BCCContext.h> +#include <bcc/Renderscript/RSCompilerDriver.h> +#include <bcc/Renderscript/RSExecutable.h> +#include <bcc/Renderscript/RSInfo.h> + +namespace android { +namespace renderscript { + + + +RsdCpuScriptImpl::RsdCpuScriptImpl(RsdCpuReferenceImpl *ctx, const Script *s) { + mCtx = ctx; + mScript = s; + + mRoot = NULL; + mRootExpand = NULL; + mInit = NULL; + mFreeChildren = NULL; + + mCompilerContext = NULL; + mCompilerDriver = NULL; + mExecutable = NULL; + + mBoundAllocs = NULL; + mIntrinsicData = NULL; + mIsThreadable = true; +} + + +bool RsdCpuScriptImpl::init(char const *resName, char const *cacheDir, + uint8_t const *bitcode, size_t bitcodeSize, + uint32_t flags) { + //ALOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir, bitcode, bitcodeSize, flags, lookupFunc); + //ALOGE("rsdScriptInit %p %p", rsc, script); + + mCtx->lockMutex(); + + bcc::RSExecutable *exec; + const bcc::RSInfo *info; + + mCompilerContext = NULL; + mCompilerDriver = NULL; + mExecutable = NULL; + + mCompilerContext = new bcc::BCCContext(); + if (mCompilerContext == NULL) { + ALOGE("bcc: FAILS to create compiler context (out of memory)"); + mCtx->unlockMutex(); + return false; + } + + mCompilerDriver = new bcc::RSCompilerDriver(); + if (mCompilerDriver == NULL) { + ALOGE("bcc: FAILS to create compiler driver (out of memory)"); + mCtx->unlockMutex(); + return false; + } + + mCompilerDriver->setRSRuntimeLookupFunction(lookupRuntimeStub); + mCompilerDriver->setRSRuntimeLookupContext(this); + + exec = mCompilerDriver->build(*mCompilerContext, cacheDir, resName, + (const char *)bitcode, bitcodeSize, NULL); + + if (exec == NULL) { + ALOGE("bcc: FAILS to prepare executable for '%s'", resName); + mCtx->unlockMutex(); + return false; + } + + mExecutable = exec; + + exec->setThreadable(mIsThreadable); + if (!exec->syncInfo()) { + ALOGW("bcc: FAILS to synchronize the RS info file to the disk"); + } + + mRoot = reinterpret_cast<int (*)()>(exec->getSymbolAddress("root")); + mRootExpand = + reinterpret_cast<int (*)()>(exec->getSymbolAddress("root.expand")); + mInit = reinterpret_cast<void (*)()>(exec->getSymbolAddress("init")); + mFreeChildren = + reinterpret_cast<void (*)()>(exec->getSymbolAddress(".rs.dtor")); + + + info = &mExecutable->getInfo(); + if (info->getExportVarNames().size()) { + mBoundAllocs = new Allocation *[info->getExportVarNames().size()]; + memset(mBoundAllocs, 0, sizeof(void *) * info->getExportVarNames().size()); + } + + mCtx->unlockMutex(); + return true; +} + +void RsdCpuScriptImpl::populateScript(Script *script) { + const bcc::RSInfo *info = &mExecutable->getInfo(); + + // Copy info over to runtime + script->mHal.info.exportedFunctionCount = info->getExportFuncNames().size(); + script->mHal.info.exportedVariableCount = info->getExportVarNames().size(); + script->mHal.info.exportedPragmaCount = info->getPragmas().size(); + script->mHal.info.exportedPragmaKeyList = + const_cast<const char**>(mExecutable->getPragmaKeys().array()); + script->mHal.info.exportedPragmaValueList = + const_cast<const char**>(mExecutable->getPragmaValues().array()); + + if (mRootExpand) { + script->mHal.info.root = mRootExpand; + } else { + script->mHal.info.root = mRoot; + } +} + +/* +bool rsdInitIntrinsic(const Context *rsc, Script *s, RsScriptIntrinsicID iid, Element *e) { + pthread_mutex_lock(&rsdgInitMutex); + + DrvScript *drv = (DrvScript *)calloc(1, sizeof(DrvScript)); + if (drv == NULL) { + goto error; + } + s->mHal.drv = drv; + drv->mIntrinsicID = iid; + drv->mIntrinsicData = rsdIntrinsic_Init(rsc, s, iid, &drv->mIntrinsicFuncs); + s->mHal.info.isThreadable = true; + + pthread_mutex_unlock(&rsdgInitMutex); + return true; + +error: + pthread_mutex_unlock(&rsdgInitMutex); + return false; +} +*/ + +typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t); + +void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation * ain, Allocation * aout, + const void * usr, uint32_t usrLen, + const RsScriptCall *sc, + MTLaunchStruct *mtls) { + + memset(mtls, 0, sizeof(MTLaunchStruct)); + + if (ain) { + mtls->fep.dimX = ain->getType()->getDimX(); + mtls->fep.dimY = ain->getType()->getDimY(); + mtls->fep.dimZ = ain->getType()->getDimZ(); + //mtls->dimArray = ain->getType()->getDimArray(); + } else if (aout) { + mtls->fep.dimX = aout->getType()->getDimX(); + mtls->fep.dimY = aout->getType()->getDimY(); + mtls->fep.dimZ = aout->getType()->getDimZ(); + //mtls->dimArray = aout->getType()->getDimArray(); + } else { + mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations"); + return; + } + + if (!sc || (sc->xEnd == 0)) { + mtls->xEnd = mtls->fep.dimX; + } else { + rsAssert(sc->xStart < mtls->fep.dimX); + rsAssert(sc->xEnd <= mtls->fep.dimX); + rsAssert(sc->xStart < sc->xEnd); + mtls->xStart = rsMin(mtls->fep.dimX, sc->xStart); + mtls->xEnd = rsMin(mtls->fep.dimX, sc->xEnd); + if (mtls->xStart >= mtls->xEnd) return; + } + + if (!sc || (sc->yEnd == 0)) { + mtls->yEnd = mtls->fep.dimY; + } else { + rsAssert(sc->yStart < mtls->fep.dimY); + rsAssert(sc->yEnd <= mtls->fep.dimY); + rsAssert(sc->yStart < sc->yEnd); + mtls->yStart = rsMin(mtls->fep.dimY, sc->yStart); + mtls->yEnd = rsMin(mtls->fep.dimY, sc->yEnd); + if (mtls->yStart >= mtls->yEnd) return; + } + + mtls->xEnd = rsMax((uint32_t)1, mtls->xEnd); + mtls->yEnd = rsMax((uint32_t)1, mtls->yEnd); + mtls->zEnd = rsMax((uint32_t)1, mtls->zEnd); + mtls->arrayEnd = rsMax((uint32_t)1, mtls->arrayEnd); + + rsAssert(!ain || (ain->getType()->getDimZ() == 0)); + + mtls->rsc = mCtx; + mtls->ain = ain; + mtls->aout = aout; + mtls->fep.usr = usr; + mtls->fep.usrLen = usrLen; + mtls->mSliceSize = 1; + mtls->mSliceNum = 0; + + mtls->fep.ptrIn = NULL; + mtls->fep.eStrideIn = 0; + mtls->isThreadable = mIsThreadable; + + if (ain) { + mtls->fep.ptrIn = (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr; + mtls->fep.eStrideIn = ain->getType()->getElementSizeBytes(); + mtls->fep.yStrideIn = ain->mHal.drvState.lod[0].stride; + } + + mtls->fep.ptrOut = NULL; + mtls->fep.eStrideOut = 0; + if (aout) { + mtls->fep.ptrOut = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr; + mtls->fep.eStrideOut = aout->getType()->getElementSizeBytes(); + mtls->fep.yStrideOut = aout->mHal.drvState.lod[0].stride; + } +} + + +void RsdCpuScriptImpl::invokeForEach(uint32_t slot, + const Allocation * ain, + Allocation * aout, + const void * usr, + uint32_t usrLen, + const RsScriptCall *sc) { + + MTLaunchStruct mtls; + forEachMtlsSetup(ain, aout, usr, usrLen, sc, &mtls); + forEachKernelSetup(slot, &mtls); + + RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this); + mCtx->launchThreads(ain, aout, sc, &mtls); + mCtx->setTLS(oldTLS); +} + +void RsdCpuScriptImpl::forEachKernelSetup(uint32_t slot, MTLaunchStruct *mtls) { + + mtls->script = this; + mtls->fep.slot = slot; + + rsAssert(slot < mExecutable->getExportForeachFuncAddrs().size()); + mtls->kernel = reinterpret_cast<ForEachFunc_t>( + mExecutable->getExportForeachFuncAddrs()[slot]); + rsAssert(mtls->kernel != NULL); + mtls->sig = mExecutable->getInfo().getExportForeachFuncs()[slot].second; +} + +int RsdCpuScriptImpl::invokeRoot() { + RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this); + int ret = mRoot(); + mCtx->setTLS(oldTLS); + return ret; +} + +void RsdCpuScriptImpl::invokeInit() { + if (mInit) { + mInit(); + } +} + +void RsdCpuScriptImpl::invokeFreeChildren() { + if (mFreeChildren) { + mFreeChildren(); + } +} + +void RsdCpuScriptImpl::invokeFunction(uint32_t slot, const void *params, + size_t paramLength) { + //ALOGE("invoke %p %p %i %p %i", dc, script, slot, params, paramLength); + + RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this); + reinterpret_cast<void (*)(const void *, uint32_t)>( + mExecutable->getExportFuncAddrs()[slot])(params, paramLength); + mCtx->setTLS(oldTLS); +} + +void RsdCpuScriptImpl::setGlobalVar(uint32_t slot, const void *data, size_t dataLength) { + //rsAssert(!script->mFieldIsObject[slot]); + //ALOGE("setGlobalVar %p %p %i %p %i", dc, script, slot, data, dataLength); + + //if (mIntrinsicID) { + //mIntrinsicFuncs.setVar(dc, script, drv->mIntrinsicData, slot, data, dataLength); + //return; + //} + + int32_t *destPtr = reinterpret_cast<int32_t *>( + mExecutable->getExportVarAddrs()[slot]); + if (!destPtr) { + //ALOGV("Calling setVar on slot = %i which is null", slot); + return; + } + + memcpy(destPtr, data, dataLength); +} + +void RsdCpuScriptImpl::setGlobalVarWithElemDims(uint32_t slot, const void *data, size_t dataLength, + const Element *elem, + const size_t *dims, size_t dimLength) { + + int32_t *destPtr = reinterpret_cast<int32_t *>( + mExecutable->getExportVarAddrs()[slot]); + if (!destPtr) { + //ALOGV("Calling setVar on slot = %i which is null", slot); + return; + } + + // We want to look at dimension in terms of integer components, + // but dimLength is given in terms of bytes. + dimLength /= sizeof(int); + + // Only a single dimension is currently supported. + rsAssert(dimLength == 1); + if (dimLength == 1) { + // First do the increment loop. + size_t stride = elem->getSizeBytes(); + const char *cVal = reinterpret_cast<const char *>(data); + for (size_t i = 0; i < dims[0]; i++) { + elem->incRefs(cVal); + cVal += stride; + } + + // Decrement loop comes after (to prevent race conditions). + char *oldVal = reinterpret_cast<char *>(destPtr); + for (size_t i = 0; i < dims[0]; i++) { + elem->decRefs(oldVal); + oldVal += stride; + } + } + + memcpy(destPtr, data, dataLength); +} + +void RsdCpuScriptImpl::setGlobalBind(uint32_t slot, Allocation *data) { + + //rsAssert(!script->mFieldIsObject[slot]); + //ALOGE("setGlobalBind %p %p %i %p", dc, script, slot, data); + + int32_t *destPtr = reinterpret_cast<int32_t *>( + mExecutable->getExportVarAddrs()[slot]); + if (!destPtr) { + //ALOGV("Calling setVar on slot = %i which is null", slot); + return; + } + + void *ptr = NULL; + mBoundAllocs[slot] = data; + if(data) { + ptr = data->mHal.drvState.lod[0].mallocPtr; + } + memcpy(destPtr, &ptr, sizeof(void *)); +} + +void RsdCpuScriptImpl::setGlobalObj(uint32_t slot, ObjectBase *data) { + + //rsAssert(script->mFieldIsObject[slot]); + //ALOGE("setGlobalObj %p %p %i %p", dc, script, slot, data); + + //if (mIntrinsicID) { + //mIntrinsicFuncs.setVarObj(dc, script, drv->mIntrinsicData, slot, alloc); + //return; + //} + + int32_t *destPtr = reinterpret_cast<int32_t *>( + mExecutable->getExportVarAddrs()[slot]); + if (!destPtr) { + //ALOGV("Calling setVar on slot = %i which is null", slot); + return; + } + + rsrSetObject(mCtx->getContext(), (ObjectBase **)destPtr, data); +} + +RsdCpuScriptImpl::~RsdCpuScriptImpl() { + + if (mExecutable) { + Vector<void *>::const_iterator var_addr_iter = + mExecutable->getExportVarAddrs().begin(); + Vector<void *>::const_iterator var_addr_end = + mExecutable->getExportVarAddrs().end(); + + bcc::RSInfo::ObjectSlotListTy::const_iterator is_object_iter = + mExecutable->getInfo().getObjectSlots().begin(); + bcc::RSInfo::ObjectSlotListTy::const_iterator is_object_end = + mExecutable->getInfo().getObjectSlots().end(); + + while ((var_addr_iter != var_addr_end) && + (is_object_iter != is_object_end)) { + // The field address can be NULL if the script-side has optimized + // the corresponding global variable away. + ObjectBase **obj_addr = + reinterpret_cast<ObjectBase **>(*var_addr_iter); + if (*is_object_iter) { + if (*var_addr_iter != NULL) { + rsrClearObject(mCtx->getContext(), obj_addr); + } + } + var_addr_iter++; + is_object_iter++; + } + } + + if (mCompilerContext) { + delete mCompilerContext; + } + if (mCompilerDriver) { + delete mCompilerDriver; + } + if (mExecutable) { + delete mExecutable; + } + if (mBoundAllocs) { + delete[] mBoundAllocs; + } +} + +Allocation * RsdCpuScriptImpl::getAllocationForPointer(const void *ptr) const { + if (!ptr) { + return NULL; + } + + for (uint32_t ct=0; ct < mScript->mHal.info.exportedVariableCount; ct++) { + Allocation *a = mBoundAllocs[ct]; + if (!a) continue; + if (a->mHal.drvState.lod[0].mallocPtr == ptr) { + return a; + } + } + ALOGE("rsGetAllocation, failed to find %p", ptr); + return NULL; +} + + +} +} diff --git a/cpu_ref/rsCpuScript.h b/cpu_ref/rsCpuScript.h new file mode 100644 index 00000000..2197a204 --- /dev/null +++ b/cpu_ref/rsCpuScript.h @@ -0,0 +1,112 @@ +/* + * Copyright (C) 2011-2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef RSD_BCC_H +#define RSD_BCC_H + +#include <rs_hal.h> +#include <rsRuntime.h> + +#include "rsCpuCore.h" + +namespace bcc { + class BCCContext; + class RSCompilerDriver; + class RSExecutable; +} + +namespace android { +namespace renderscript { + + + +class RsdCpuScriptImpl : public RsdCpuReferenceImpl::CpuScript { +public: + typedef void (*outer_foreach_t)( + const RsForEachStubParamStruct *, + uint32_t x1, uint32_t x2, + uint32_t instep, uint32_t outstep); + + bool init(char const *resName, char const *cacheDir, + uint8_t const *bitcode, size_t bitcodeSize, uint32_t flags); + virtual void populateScript(Script *); + + virtual void invokeFunction(uint32_t slot, const void *params, size_t paramLength); + virtual int invokeRoot(); + virtual void invokeForEach(uint32_t slot, + const Allocation * ain, + Allocation * aout, + const void * usr, + uint32_t usrLen, + const RsScriptCall *sc); + virtual void invokeInit(); + virtual void invokeFreeChildren(); + + virtual void setGlobalVar(uint32_t slot, const void *data, size_t dataLength); + virtual void setGlobalVarWithElemDims(uint32_t slot, const void *data, size_t dataLength, + const Element *e, const size_t *dims, size_t dimLength); + virtual void setGlobalBind(uint32_t slot, Allocation *data); + virtual void setGlobalObj(uint32_t slot, ObjectBase *data); + + + virtual ~RsdCpuScriptImpl(); + RsdCpuScriptImpl(RsdCpuReferenceImpl *ctx, const Script *s); + + const Script * getScript() {return mScript;} + + void forEachMtlsSetup(const Allocation * ain, Allocation * aout, + const void * usr, uint32_t usrLen, + const RsScriptCall *sc, MTLaunchStruct *mtls); + virtual void forEachKernelSetup(uint32_t slot, MTLaunchStruct *mtls); + + + const RsdCpuReference::CpuSymbol * lookupSymbolMath(const char *sym); + static void * lookupRuntimeStub(void* pContext, char const* name); + + virtual Allocation * getAllocationForPointer(const void *ptr) const; + + +protected: + RsdCpuReferenceImpl *mCtx; + const Script *mScript; + + int (*mRoot)(); + int (*mRootExpand)(); + void (*mInit)(); + void (*mFreeChildren)(); + + bcc::BCCContext *mCompilerContext; + bcc::RSCompilerDriver *mCompilerDriver; + bcc::RSExecutable *mExecutable; + + Allocation **mBoundAllocs; + void * mIntrinsicData; + bool mIsThreadable; + +}; + + +Allocation * rsdScriptGetAllocationForPointer( + const Context *dc, + const Script *script, + const void *); + + + +} +} + +#endif diff --git a/cpu_ref/rsCpuScriptGroup.cpp b/cpu_ref/rsCpuScriptGroup.cpp new file mode 100644 index 00000000..765057d2 --- /dev/null +++ b/cpu_ref/rsCpuScriptGroup.cpp @@ -0,0 +1,236 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "rsCpuCore.h" +#include "rsCpuScript.h" +#include "rsCpuScriptGroup.h" + +#include <bcc/BCCContext.h> +#include <bcc/Renderscript/RSCompilerDriver.h> +#include <bcc/Renderscript/RSExecutable.h> +#include <bcc/Renderscript/RSInfo.h> + +#include "rsScript.h" +#include "rsScriptGroup.h" +#include "rsCpuScriptGroup.h" +//#include "rsdBcc.h" +//#include "rsdAllocation.h" + +using namespace android; +using namespace android::renderscript; + +CpuScriptGroupImpl::CpuScriptGroupImpl(RsdCpuReferenceImpl *ctx, const ScriptGroup *sg) { + mCtx = ctx; + mSG = sg; +} + +CpuScriptGroupImpl::~CpuScriptGroupImpl() { + +} + +bool CpuScriptGroupImpl::init() { + return true; +} + +void CpuScriptGroupImpl::setInput(const ScriptKernelID *kid, Allocation *a) { +} + +void CpuScriptGroupImpl::setOutput(const ScriptKernelID *kid, Allocation *a) { +} + + +typedef void (*ScriptGroupRootFunc_t)(const RsForEachStubParamStruct *p, + uint32_t xstart, uint32_t xend, + uint32_t instep, uint32_t outstep); + +void CpuScriptGroupImpl::scriptGroupRoot(const RsForEachStubParamStruct *p, + uint32_t xstart, uint32_t xend, + uint32_t instep, uint32_t outstep) { + + + const ScriptList *sl = (const ScriptList *)p->usr; + RsForEachStubParamStruct *mp = (RsForEachStubParamStruct *)p; + const void *oldUsr = p->usr; + + for(size_t ct=0; ct < sl->count; ct++) { + ScriptGroupRootFunc_t func; + func = (ScriptGroupRootFunc_t)sl->fnPtrs[ct]; + mp->usr = sl->usrPtrs[ct]; + + mp->ptrIn = NULL; + mp->in = NULL; + mp->ptrOut = NULL; + mp->out = NULL; + + if (sl->ins[ct]) { + mp->ptrIn = (const uint8_t *)sl->ins[ct]->mHal.drvState.lod[0].mallocPtr; + mp->in = mp->ptrIn; + if (sl->inExts[ct]) { + mp->in = mp->ptrIn + sl->ins[ct]->mHal.drvState.lod[0].stride * p->y; + } else { + if (sl->ins[ct]->mHal.drvState.lod[0].dimY > p->lid) { + mp->in = mp->ptrIn + sl->ins[ct]->mHal.drvState.lod[0].stride * p->lid; + } + } + } + + if (sl->outs[ct]) { + mp->ptrOut = (uint8_t *)sl->outs[ct]->mHal.drvState.lod[0].mallocPtr; + mp->out = mp->ptrOut; + if (sl->outExts[ct]) { + mp->out = mp->ptrOut + sl->outs[ct]->mHal.drvState.lod[0].stride * p->y; + } else { + if (sl->outs[ct]->mHal.drvState.lod[0].dimY > p->lid) { + mp->out = mp->ptrOut + sl->outs[ct]->mHal.drvState.lod[0].stride * p->lid; + } + } + } + + //ALOGE("kernel %i %p,%p %p,%p", ct, mp->ptrIn, mp->in, mp->ptrOut, mp->out); + func(p, xstart, xend, instep, outstep); + } + //ALOGE("script group root"); + + //ConvolveParams *cp = (ConvolveParams *)p->usr; + + mp->usr = oldUsr; +} + + + +void CpuScriptGroupImpl::execute() { + Vector<Allocation *> ins; + Vector<bool> inExts; + Vector<Allocation *> outs; + Vector<bool> outExts; + Vector<const ScriptKernelID *> kernels; + bool fieldDep = false; + + for (size_t ct=0; ct < mSG->mNodes.size(); ct++) { + ScriptGroup::Node *n = mSG->mNodes[ct]; + Script *s = n->mKernels[0]->mScript; + + //ALOGE("node %i, order %i, in %i out %i", (int)ct, n->mOrder, (int)n->mInputs.size(), (int)n->mOutputs.size()); + + for (size_t ct2=0; ct2 < n->mInputs.size(); ct2++) { + if (n->mInputs[ct2]->mDstField.get() && n->mInputs[ct2]->mDstField->mScript) { + //ALOGE("field %p %zu", n->mInputs[ct2]->mDstField->mScript, n->mInputs[ct2]->mDstField->mSlot); + s->setVarObj(n->mInputs[ct2]->mDstField->mSlot, n->mInputs[ct2]->mAlloc.get()); + } + } + + for (size_t ct2=0; ct2 < n->mKernels.size(); ct2++) { + const ScriptKernelID *k = n->mKernels[ct2]; + Allocation *ain = NULL; + Allocation *aout = NULL; + bool inExt = false; + bool outExt = false; + + for (size_t ct3=0; ct3 < n->mInputs.size(); ct3++) { + if (n->mInputs[ct3]->mDstKernel.get() == k) { + ain = n->mInputs[ct3]->mAlloc.get(); + //ALOGE(" link in %p", ain); + } + } + for (size_t ct3=0; ct3 < mSG->mInputs.size(); ct3++) { + if (mSG->mInputs[ct3]->mKernel == k) { + ain = mSG->mInputs[ct3]->mAlloc.get(); + inExt = true; + //ALOGE(" io in %p", ain); + } + } + + for (size_t ct3=0; ct3 < n->mOutputs.size(); ct3++) { + if (n->mOutputs[ct3]->mSource.get() == k) { + aout = n->mOutputs[ct3]->mAlloc.get(); + if(n->mOutputs[ct3]->mDstField.get() != NULL) { + fieldDep = true; + } + //ALOGE(" link out %p", aout); + } + } + for (size_t ct3=0; ct3 < mSG->mOutputs.size(); ct3++) { + if (mSG->mOutputs[ct3]->mKernel == k) { + aout = mSG->mOutputs[ct3]->mAlloc.get(); + outExt = true; + //ALOGE(" io out %p", aout); + } + } + + if ((k->mHasKernelOutput == (aout != NULL)) && + (k->mHasKernelInput == (ain != NULL))) { + ins.add(ain); + inExts.add(inExt); + outs.add(aout); + outExts.add(outExt); + kernels.add(k); + } + } + + } + + MTLaunchStruct mtls; + + if(fieldDep) { + for (size_t ct=0; ct < ins.size(); ct++) { + Script *s = kernels[ct]->mScript; + RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s); + uint32_t slot = kernels[ct]->mSlot; + + si->forEachMtlsSetup(ins[ct], outs[ct], NULL, 0, NULL, &mtls); + si->forEachKernelSetup(slot, &mtls); + mCtx->launchThreads(ins[ct], outs[ct], NULL, &mtls); + } + } else { + ScriptList sl; + sl.ins = ins.array(); + sl.outs = outs.array(); + sl.kernels = kernels.array(); + sl.count = kernels.size(); + + Vector<const void *> usrPtrs; + Vector<const void *> fnPtrs; + Vector<uint32_t> sigs; + for (size_t ct=0; ct < kernels.size(); ct++) { + Script *s = kernels[ct]->mScript; + RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s); + + si->forEachKernelSetup(kernels[ct]->mSlot, &mtls); + fnPtrs.add((void *)mtls.kernel); + usrPtrs.add(mtls.fep.usr); + sigs.add(mtls.fep.usrLen); + } + sl.sigs = sigs.array(); + sl.usrPtrs = usrPtrs.array(); + sl.fnPtrs = fnPtrs.array(); + sl.inExts = inExts.array(); + sl.outExts = outExts.array(); + + Script *s = kernels[0]->mScript; + RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s); + si->forEachMtlsSetup(ins[0], outs[0], NULL, 0, NULL, &mtls); + mtls.script = NULL; + mtls.kernel = (void (*)())&scriptGroupRoot; + mtls.fep.usr = &sl; + mCtx->launchThreads(ins[0], outs[0], NULL, &mtls); + } +} + +void rsdScriptGroupDestroy(const android::renderscript::Context *rsc, + const android::renderscript::ScriptGroup *sg) { +} + + diff --git a/cpu_ref/rsCpuScriptGroup.h b/cpu_ref/rsCpuScriptGroup.h new file mode 100644 index 00000000..f6fa2ac0 --- /dev/null +++ b/cpu_ref/rsCpuScriptGroup.h @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2011-2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef RSD_SCRIPT_GROUP_H +#define RSD_SCRIPT_GROUP_H + +#include <rsd_cpu.h> + +namespace android { +namespace renderscript { + + +class CpuScriptGroupImpl : public RsdCpuReference::CpuScriptGroup { +public: + virtual void setInput(const ScriptKernelID *kid, Allocation *); + virtual void setOutput(const ScriptKernelID *kid, Allocation *); + virtual void execute(); + virtual ~CpuScriptGroupImpl(); + + CpuScriptGroupImpl(RsdCpuReferenceImpl *ctx, const ScriptGroup *sg); + bool init(); + + static void scriptGroupRoot(const RsForEachStubParamStruct *p, + uint32_t xstart, uint32_t xend, + uint32_t instep, uint32_t outstep); + +protected: + struct ScriptList { + size_t count; + Allocation *const* ins; + bool const* inExts; + Allocation *const* outs; + bool const* outExts; + const void *const* usrPtrs; + size_t const *usrSizes; + uint32_t const *sigs; + const void *const* fnPtrs; + + const ScriptKernelID *const* kernels; + }; + ScriptList mSl; + const ScriptGroup *mSG; + RsdCpuReferenceImpl *mCtx; +}; + +} +} + +#endif // RSD_SCRIPT_GROUP_H diff --git a/cpu_ref/rsd_cpu.h b/cpu_ref/rsd_cpu.h new file mode 100644 index 00000000..d96d2d1d --- /dev/null +++ b/cpu_ref/rsd_cpu.h @@ -0,0 +1,99 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef RSD_CPU_H +#define RSD_CPU_H + +#include "rsAllocation.h" + + +namespace android { +namespace renderscript { + +class ScriptC; +class Script; +class ScriptGroup; +class ScriptKernelID; + + +class RsdCpuReference { +public: + struct CpuSymbol { + const char * name; + void * fnPtr; + bool threadable; + }; + + typedef const CpuSymbol * (* sym_lookup_t)(Context *, const char *name); + + struct CpuTls { + Context *rsc; + const ScriptC * sc; + }; + + class CpuScript { + public: + virtual void populateScript(Script *) = 0; + virtual void invokeFunction(uint32_t slot, const void *params, size_t paramLength) = 0; + virtual int invokeRoot() = 0; + virtual void invokeForEach(uint32_t slot, + const Allocation * ain, + Allocation * aout, + const void * usr, + uint32_t usrLen, + const RsScriptCall *sc) = 0; + virtual void invokeInit() = 0; + virtual void invokeFreeChildren() = 0; + + virtual void setGlobalVar(uint32_t slot, const void *data, size_t dataLength) = 0; + virtual void setGlobalVarWithElemDims(uint32_t slot, const void *data, size_t dataLength, + const Element *e, const size_t *dims, size_t dimLength) = 0; + virtual void setGlobalBind(uint32_t slot, Allocation *data) = 0; + virtual void setGlobalObj(uint32_t slot, ObjectBase *obj) = 0; + + virtual Allocation * getAllocationForPointer(const void *ptr) const = 0; + virtual ~CpuScript() {} + }; + typedef CpuScript * (* script_lookup_t)(Context *, const Script *s); + + class CpuScriptGroup { + public: + virtual void setInput(const ScriptKernelID *kid, Allocation *) = 0; + virtual void setOutput(const ScriptKernelID *kid, Allocation *) = 0; + virtual void execute() = 0; + virtual ~CpuScriptGroup() {}; + }; + + static Context * getTlsContext(); + static const Script * getTlsScript(); + + static RsdCpuReference * create(Context *c, uint32_t version_major, + uint32_t version_minor, sym_lookup_t lfn, script_lookup_t slfn); + virtual ~RsdCpuReference(); + virtual void setPriority(int32_t priority) = 0; + + virtual CpuScript * createScript(const ScriptC *s, char const *resName, char const *cacheDir, + uint8_t const *bitcode, size_t bitcodeSize, + uint32_t flags) = 0; + virtual CpuScript * createIntrinsic(const Script *s, RsScriptIntrinsicID iid, Element *e) = 0; + virtual CpuScriptGroup * createScriptGroup(const ScriptGroup *sg) = 0; +}; + + +} +} + +#endif diff --git a/driver/rsdAllocation.cpp b/driver/rsdAllocation.cpp index 8956b2ec..928f7779 100644 --- a/driver/rsdAllocation.cpp +++ b/driver/rsdAllocation.cpp @@ -16,7 +16,6 @@ #include "rsdCore.h" -#include "rsdRuntime.h" #include "rsdAllocation.h" #include "rsdFrameBufferObj.h" @@ -80,10 +79,9 @@ GLenum rsdKindToGLFormat(RsDataKind k) { uint8_t *GetOffsetPtr(const android::renderscript::Allocation *alloc, uint32_t xoff, uint32_t yoff, uint32_t lod, RsAllocationCubemapFace face) { - DrvAllocation *drv = (DrvAllocation *)alloc->mHal.drv; - uint8_t *ptr = (uint8_t *)drv->lod[lod].mallocPtr; - ptr += face * drv->faceOffset; - ptr += yoff * drv->lod[lod].stride; + uint8_t *ptr = (uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr; + ptr += face * alloc->mHal.drvState.faceOffset; + ptr += yoff * alloc->mHal.drvState.lod[lod].stride; ptr += xoff * alloc->mHal.state.elementSizeBytes; return ptr; } @@ -160,7 +158,7 @@ static void UploadToTexture(const Context *rsc, const Allocation *alloc) { return; } - if (!drv->lod[0].mallocPtr) { + if (!alloc->mHal.drvState.lod[0].mallocPtr) { return; } @@ -174,10 +172,9 @@ static void UploadToTexture(const Context *rsc, const Allocation *alloc) { Upload2DTexture(rsc, alloc, isFirstUpload); if (!(alloc->mHal.state.usageFlags & RS_ALLOCATION_USAGE_SCRIPT)) { - if (alloc->mHal.drvState.mallocPtrLOD0) { - free(alloc->mHal.drvState.mallocPtrLOD0); - alloc->mHal.drvState.mallocPtrLOD0 = NULL; - drv->lod[0].mallocPtr = NULL; + if (alloc->mHal.drvState.lod[0].mallocPtr) { + free(alloc->mHal.drvState.lod[0].mallocPtr); + alloc->mHal.drvState.lod[0].mallocPtr = NULL; } } rsdGLCheckError(rsc, "UploadToTexture"); @@ -224,54 +221,50 @@ static void UploadToBufferObject(const Context *rsc, const Allocation *alloc) { } RSD_CALL_GL(glBindBuffer, drv->glTarget, drv->bufferID); RSD_CALL_GL(glBufferData, drv->glTarget, alloc->mHal.state.type->getSizeBytes(), - alloc->mHal.drvState.mallocPtrLOD0, GL_DYNAMIC_DRAW); + alloc->mHal.drvState.lod[0].mallocPtr, GL_DYNAMIC_DRAW); RSD_CALL_GL(glBindBuffer, drv->glTarget, 0); rsdGLCheckError(rsc, "UploadToBufferObject"); } static size_t AllocationBuildPointerTable(const Context *rsc, const Allocation *alloc, const Type *type, uint8_t *ptr) { - - DrvAllocation *drv = (DrvAllocation *)alloc->mHal.drv; - - drv->lod[0].dimX = type->getDimX(); - drv->lod[0].dimY = type->getDimY(); - drv->lod[0].mallocPtr = 0; - drv->lod[0].stride = drv->lod[0].dimX * type->getElementSizeBytes(); - drv->lodCount = type->getLODCount(); - drv->faceCount = type->getDimFaces(); + alloc->mHal.drvState.lod[0].dimX = type->getDimX(); + alloc->mHal.drvState.lod[0].dimY = type->getDimY(); + alloc->mHal.drvState.lod[0].mallocPtr = 0; + alloc->mHal.drvState.lod[0].stride = alloc->mHal.drvState.lod[0].dimX * type->getElementSizeBytes(); + alloc->mHal.drvState.lodCount = type->getLODCount(); + alloc->mHal.drvState.faceCount = type->getDimFaces(); size_t offsets[Allocation::MAX_LOD]; memset(offsets, 0, sizeof(offsets)); - size_t o = drv->lod[0].stride * rsMax(drv->lod[0].dimY, 1u) * rsMax(drv->lod[0].dimZ, 1u); - if(drv->lodCount > 1) { - uint32_t tx = drv->lod[0].dimX; - uint32_t ty = drv->lod[0].dimY; - uint32_t tz = drv->lod[0].dimZ; - for (uint32_t lod=1; lod < drv->lodCount; lod++) { - drv->lod[lod].dimX = tx; - drv->lod[lod].dimY = ty; - drv->lod[lod].dimZ = tz; - drv->lod[lod].stride = tx * type->getElementSizeBytes(); + size_t o = alloc->mHal.drvState.lod[0].stride * rsMax(alloc->mHal.drvState.lod[0].dimY, 1u) * + rsMax(alloc->mHal.drvState.lod[0].dimZ, 1u); + if(alloc->mHal.drvState.lodCount > 1) { + uint32_t tx = alloc->mHal.drvState.lod[0].dimX; + uint32_t ty = alloc->mHal.drvState.lod[0].dimY; + uint32_t tz = alloc->mHal.drvState.lod[0].dimZ; + for (uint32_t lod=1; lod < alloc->mHal.drvState.lodCount; lod++) { + alloc->mHal.drvState.lod[lod].dimX = tx; + alloc->mHal.drvState.lod[lod].dimY = ty; + alloc->mHal.drvState.lod[lod].dimZ = tz; + alloc->mHal.drvState.lod[lod].stride = tx * type->getElementSizeBytes(); offsets[lod] = o; - o += drv->lod[lod].stride * rsMax(ty, 1u) * rsMax(tz, 1u); + o += alloc->mHal.drvState.lod[lod].stride * rsMax(ty, 1u) * rsMax(tz, 1u); if (tx > 1) tx >>= 1; if (ty > 1) ty >>= 1; if (tz > 1) tz >>= 1; } } - drv->faceOffset = o; + alloc->mHal.drvState.faceOffset = o; - drv->lod[0].mallocPtr = ptr; - for (uint32_t lod=1; lod < drv->lodCount; lod++) { - drv->lod[lod].mallocPtr = ptr + offsets[lod]; + alloc->mHal.drvState.lod[0].mallocPtr = ptr; + for (uint32_t lod=1; lod < alloc->mHal.drvState.lodCount; lod++) { + alloc->mHal.drvState.lod[lod].mallocPtr = ptr + offsets[lod]; } - alloc->mHal.drvState.strideLOD0 = drv->lod[0].stride; - alloc->mHal.drvState.mallocPtrLOD0 = ptr; - size_t allocSize = drv->faceOffset; - if(drv->faceCount) { + size_t allocSize = alloc->mHal.drvState.faceOffset; + if(alloc->mHal.drvState.faceCount) { allocSize *= 6; } @@ -352,9 +345,9 @@ void rsdAllocationDestroy(const Context *rsc, Allocation *alloc) { drv->renderTargetID = 0; } - if (alloc->mHal.drvState.mallocPtrLOD0) { - free(alloc->mHal.drvState.mallocPtrLOD0); - alloc->mHal.drvState.mallocPtrLOD0 = NULL; + if (alloc->mHal.drvState.lod[0].mallocPtr) { + free(alloc->mHal.drvState.lod[0].mallocPtr); + alloc->mHal.drvState.lod[0].mallocPtr = NULL; } if (drv->readBackFBO != NULL) { delete drv->readBackFBO; @@ -366,9 +359,7 @@ void rsdAllocationDestroy(const Context *rsc, Allocation *alloc) { void rsdAllocationResize(const Context *rsc, const Allocation *alloc, const Type *newType, bool zeroNew) { - DrvAllocation *drv = (DrvAllocation *)alloc->mHal.drv; - - void * oldPtr = drv->lod[0].mallocPtr; + void * oldPtr = alloc->mHal.drvState.lod[0].mallocPtr; // Calculate the object size size_t s = AllocationBuildPointerTable(rsc, alloc, newType, NULL); uint8_t *ptr = (uint8_t *)realloc(oldPtr, s); @@ -383,7 +374,7 @@ void rsdAllocationResize(const Context *rsc, const Allocation *alloc, if (dimX > oldDimX) { uint32_t stride = alloc->mHal.state.elementSizeBytes; - memset(((uint8_t *)alloc->mHal.drvState.mallocPtrLOD0) + stride * oldDimX, + memset(((uint8_t *)alloc->mHal.drvState.lod[0].mallocPtr) + stride * oldDimX, 0, stride * (dimX - oldDimX)); } } @@ -411,8 +402,9 @@ static void rsdAllocationSyncFromFBO(const Context *rsc, const Allocation *alloc drv->readBackFBO->setActive(rsc); // Do the readback - RSD_CALL_GL(glReadPixels, 0, 0, drv->lod[0].dimX, drv->lod[0].dimY, - drv->glFormat, drv->glType, drv->lod[0].mallocPtr); + RSD_CALL_GL(glReadPixels, 0, 0, alloc->mHal.drvState.lod[0].dimX, + alloc->mHal.drvState.lod[0].dimY, + drv->glFormat, drv->glType, alloc->mHal.drvState.lod[0].mallocPtr); // Revert framebuffer to its original lastFbo->setActive(rsc); @@ -482,9 +474,8 @@ static bool IoGetBuffer(const Context *rsc, Allocation *alloc, ANativeWindow *nw mapper.lock(drv->wndBuffer->handle, GRALLOC_USAGE_SW_READ_NEVER | GRALLOC_USAGE_SW_WRITE_OFTEN, bounds, &dst); - drv->lod[0].mallocPtr = dst; - alloc->mHal.drvState.mallocPtrLOD0 = dst; - drv->lod[0].stride = drv->wndBuffer->stride * alloc->mHal.state.elementSizeBytes; + alloc->mHal.drvState.lod[0].mallocPtr = dst; + alloc->mHal.drvState.lod[0].stride = drv->wndBuffer->stride * alloc->mHal.state.elementSizeBytes; return true; } @@ -597,7 +588,7 @@ void rsdAllocationData2D(const Context *rsc, const Allocation *alloc, uint32_t eSize = alloc->mHal.state.elementSizeBytes; uint32_t lineSize = eSize * w; - if (drv->lod[0].mallocPtr) { + if (alloc->mHal.drvState.lod[0].mallocPtr) { const uint8_t *src = static_cast<const uint8_t *>(data); uint8_t *dst = GetOffsetPtr(alloc, xoff, yoff, lod, face); @@ -608,7 +599,7 @@ void rsdAllocationData2D(const Context *rsc, const Allocation *alloc, } memcpy(dst, src, lineSize); src += lineSize; - dst += drv->lod[lod].stride; + dst += alloc->mHal.drvState.lod[lod].stride; } drv->uploadDeferred = true; } else { @@ -626,8 +617,6 @@ void rsdAllocationData3D(const Context *rsc, const Allocation *alloc, void rsdAllocationRead1D(const Context *rsc, const Allocation *alloc, uint32_t xoff, uint32_t lod, uint32_t count, void *data, size_t sizeBytes) { - DrvAllocation *drv = (DrvAllocation *)alloc->mHal.drv; - const uint32_t eSize = alloc->mHal.state.type->getElementSizeBytes(); const uint8_t * ptr = GetOffsetPtr(alloc, xoff, 0, 0, RS_ALLOCATION_CUBEMAP_FACE_POSITIVE_X); memcpy(data, ptr, count * eSize); @@ -636,19 +625,17 @@ void rsdAllocationRead1D(const Context *rsc, const Allocation *alloc, void rsdAllocationRead2D(const Context *rsc, const Allocation *alloc, uint32_t xoff, uint32_t yoff, uint32_t lod, RsAllocationCubemapFace face, uint32_t w, uint32_t h, void *data, size_t sizeBytes) { - DrvAllocation *drv = (DrvAllocation *)alloc->mHal.drv; - uint32_t eSize = alloc->mHal.state.elementSizeBytes; uint32_t lineSize = eSize * w; - if (drv->lod[0].mallocPtr) { + if (alloc->mHal.drvState.lod[0].mallocPtr) { uint8_t *dst = static_cast<uint8_t *>(data); const uint8_t *src = GetOffsetPtr(alloc, xoff, yoff, lod, face); for (uint32_t line=yoff; line < (yoff+h); line++) { memcpy(dst, src, lineSize); dst += lineSize; - src += drv->lod[lod].stride; + src += alloc->mHal.drvState.lod[lod].stride; } } else { ALOGE("Add code to readback from non-script memory"); @@ -664,8 +651,7 @@ void rsdAllocationRead3D(const Context *rsc, const Allocation *alloc, void * rsdAllocationLock1D(const android::renderscript::Context *rsc, const android::renderscript::Allocation *alloc) { - DrvAllocation *drv = (DrvAllocation *)alloc->mHal.drv; - return drv->lod[0].mallocPtr; + return alloc->mHal.drvState.lod[0].mallocPtr; } void rsdAllocationUnlock1D(const android::renderscript::Context *rsc, @@ -767,9 +753,8 @@ void rsdAllocationElementData2D(const Context *rsc, const Allocation *alloc, } static void mip565(const Allocation *alloc, int lod, RsAllocationCubemapFace face) { - DrvAllocation *drv = (DrvAllocation *)alloc->mHal.drv; - uint32_t w = drv->lod[lod + 1].dimX; - uint32_t h = drv->lod[lod + 1].dimY; + uint32_t w = alloc->mHal.drvState.lod[lod + 1].dimX; + uint32_t h = alloc->mHal.drvState.lod[lod + 1].dimY; for (uint32_t y=0; y < h; y++) { uint16_t *oPtr = (uint16_t *)GetOffsetPtr(alloc, 0, y, lod + 1, face); @@ -786,9 +771,8 @@ static void mip565(const Allocation *alloc, int lod, RsAllocationCubemapFace fac } static void mip8888(const Allocation *alloc, int lod, RsAllocationCubemapFace face) { - DrvAllocation *drv = (DrvAllocation *)alloc->mHal.drv; - uint32_t w = drv->lod[lod + 1].dimX; - uint32_t h = drv->lod[lod + 1].dimY; + uint32_t w = alloc->mHal.drvState.lod[lod + 1].dimX; + uint32_t h = alloc->mHal.drvState.lod[lod + 1].dimY; for (uint32_t y=0; y < h; y++) { uint32_t *oPtr = (uint32_t *)GetOffsetPtr(alloc, 0, y, lod + 1, face); @@ -805,9 +789,8 @@ static void mip8888(const Allocation *alloc, int lod, RsAllocationCubemapFace fa } static void mip8(const Allocation *alloc, int lod, RsAllocationCubemapFace face) { - DrvAllocation *drv = (DrvAllocation *)alloc->mHal.drv; - uint32_t w = drv->lod[lod + 1].dimX; - uint32_t h = drv->lod[lod + 1].dimY; + uint32_t w = alloc->mHal.drvState.lod[lod + 1].dimX; + uint32_t h = alloc->mHal.drvState.lod[lod + 1].dimY; for (uint32_t y=0; y < h; y++) { uint8_t *oPtr = GetOffsetPtr(alloc, 0, y, lod + 1, face); @@ -824,8 +807,7 @@ static void mip8(const Allocation *alloc, int lod, RsAllocationCubemapFace face) } void rsdAllocationGenerateMipmaps(const Context *rsc, const Allocation *alloc) { - DrvAllocation *drv = (DrvAllocation *)alloc->mHal.drv; - if(!drv->lod[0].mallocPtr) { + if(!alloc->mHal.drvState.lod[0].mallocPtr) { return; } uint32_t numFaces = alloc->getType()->getDimFaces() ? 6 : 1; diff --git a/driver/rsdAllocation.h b/driver/rsdAllocation.h index e6488b93..d2ecc9a4 100644 --- a/driver/rsdAllocation.h +++ b/driver/rsdAllocation.h @@ -21,6 +21,8 @@ #include <rsRuntime.h> #include <rsAllocation.h> +#include "../cpu_ref/rsd_cpu.h" + #include <GLES/gl.h> #include <GLES2/gl2.h> @@ -49,19 +51,6 @@ struct DrvAllocation { RsdFrameBufferObj * readBackFBO; ANativeWindow *wnd; ANativeWindowBuffer *wndBuffer; - - struct LodState { - void * mallocPtr; - size_t stride; - uint32_t dimX; - uint32_t dimY; - uint32_t dimZ; - } lod[android::renderscript::Allocation::MAX_LOD]; - size_t faceOffset; - uint32_t lodCount; - uint32_t faceCount; - - }; GLenum rsdTypeToGLType(RsDataType t); diff --git a/driver/rsdBcc.cpp b/driver/rsdBcc.cpp index ddcaac81..436b9b2a 100644 --- a/driver/rsdBcc.cpp +++ b/driver/rsdBcc.cpp @@ -14,17 +14,12 @@ * limitations under the License. */ -#include "rsdCore.h" +#include "../cpu_ref/rsd_cpu.h" -#include <bcc/BCCContext.h> -#include <bcc/Renderscript/RSCompilerDriver.h> -#include <bcc/Renderscript/RSExecutable.h> -#include <bcc/Renderscript/RSInfo.h> +#include "rsdCore.h" #include "rsdBcc.h" -#include "rsdRuntime.h" #include "rsdAllocation.h" -#include "rsdIntrinsics.h" #include "rsContext.h" #include "rsElement.h" @@ -38,15 +33,6 @@ using namespace android; using namespace android::renderscript; -static Script * setTLS(Script *sc) { - ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(rsdgThreadTLSKey); - rsAssert(tls); - Script *old = tls->mScript; - tls->mScript = sc; - return old; -} - - bool rsdScriptInit(const Context *rsc, ScriptC *script, char const *resName, @@ -54,358 +40,26 @@ bool rsdScriptInit(const Context *rsc, uint8_t const *bitcode, size_t bitcodeSize, uint32_t flags) { - //ALOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir, bitcode, bitcodeSize, flags, lookupFunc); - //ALOGE("rsdScriptInit %p %p", rsc, script); - - pthread_mutex_lock(&rsdgInitMutex); - - bcc::RSExecutable *exec; - const bcc::RSInfo *info; - DrvScript *drv = (DrvScript *)calloc(1, sizeof(DrvScript)); - if (drv == NULL) { - goto error; - } - script->mHal.drv = drv; - - drv->mCompilerContext = NULL; - drv->mCompilerDriver = NULL; - drv->mExecutable = NULL; - - drv->mCompilerContext = new bcc::BCCContext(); - if (drv->mCompilerContext == NULL) { - ALOGE("bcc: FAILS to create compiler context (out of memory)"); - goto error; - } - - drv->mCompilerDriver = new bcc::RSCompilerDriver(); - if (drv->mCompilerDriver == NULL) { - ALOGE("bcc: FAILS to create compiler driver (out of memory)"); - goto error; - } - - script->mHal.info.isThreadable = true; - - drv->mCompilerDriver->setRSRuntimeLookupFunction(rsdLookupRuntimeStub); - drv->mCompilerDriver->setRSRuntimeLookupContext(script); - - exec = drv->mCompilerDriver->build(*drv->mCompilerContext, - cacheDir, resName, - (const char *)bitcode, bitcodeSize, - NULL); - - if (exec == NULL) { - ALOGE("bcc: FAILS to prepare executable for '%s'", resName); - goto error; - } - - drv->mExecutable = exec; - - exec->setThreadable(script->mHal.info.isThreadable); - if (!exec->syncInfo()) { - ALOGW("bcc: FAILS to synchronize the RS info file to the disk"); - } - - drv->mRoot = reinterpret_cast<int (*)()>(exec->getSymbolAddress("root")); - drv->mRootExpand = - reinterpret_cast<int (*)()>(exec->getSymbolAddress("root.expand")); - drv->mInit = reinterpret_cast<void (*)()>(exec->getSymbolAddress("init")); - drv->mFreeChildren = - reinterpret_cast<void (*)()>(exec->getSymbolAddress(".rs.dtor")); - - info = &drv->mExecutable->getInfo(); - // Copy info over to runtime - script->mHal.info.exportedFunctionCount = info->getExportFuncNames().size(); - script->mHal.info.exportedVariableCount = info->getExportVarNames().size(); - script->mHal.info.exportedPragmaCount = info->getPragmas().size(); - script->mHal.info.exportedPragmaKeyList = - const_cast<const char**>(exec->getPragmaKeys().array()); - script->mHal.info.exportedPragmaValueList = - const_cast<const char**>(exec->getPragmaValues().array()); - - if (drv->mRootExpand) { - script->mHal.info.root = drv->mRootExpand; - } else { - script->mHal.info.root = drv->mRoot; - } - - if (script->mHal.info.exportedVariableCount) { - drv->mBoundAllocs = new Allocation *[script->mHal.info.exportedVariableCount]; - memset(drv->mBoundAllocs, 0, sizeof(void *) * script->mHal.info.exportedVariableCount); - } - - pthread_mutex_unlock(&rsdgInitMutex); + RsdHal *dc = (RsdHal *)rsc->mHal.drv; + RsdCpuReference::CpuScript * cs = dc->mCpuRef->createScript(script, resName, cacheDir, + bitcode, bitcodeSize, flags); + if (cs == NULL) { + return false; + } + script->mHal.drv = cs; + cs->populateScript(script); return true; - -error: - - pthread_mutex_unlock(&rsdgInitMutex); - if (drv) { - delete drv->mCompilerContext; - delete drv->mCompilerDriver; - delete drv->mExecutable; - delete[] drv->mBoundAllocs; - free(drv); - } - script->mHal.drv = NULL; - return false; - } bool rsdInitIntrinsic(const Context *rsc, Script *s, RsScriptIntrinsicID iid, Element *e) { - pthread_mutex_lock(&rsdgInitMutex); - - DrvScript *drv = (DrvScript *)calloc(1, sizeof(DrvScript)); - if (drv == NULL) { - goto error; + RsdHal *dc = (RsdHal *)rsc->mHal.drv; + RsdCpuReference::CpuScript * cs = dc->mCpuRef->createIntrinsic(s, iid, e); + if (cs == NULL) { + return false; } - s->mHal.drv = drv; - drv->mIntrinsicID = iid; - drv->mIntrinsicData = rsdIntrinsic_Init(rsc, s, iid, &drv->mIntrinsicFuncs); - s->mHal.info.isThreadable = true; - - pthread_mutex_unlock(&rsdgInitMutex); + s->mHal.drv = cs; + cs->populateScript(s); return true; - -error: - pthread_mutex_unlock(&rsdgInitMutex); - return false; -} - -typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t); - -static void wc_xy(void *usr, uint32_t idx) { - MTLaunchStruct *mtls = (MTLaunchStruct *)usr; - RsForEachStubParamStruct p; - memcpy(&p, &mtls->fep, sizeof(p)); - p.lid = idx; - RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv; - uint32_t sig = mtls->sig; - -#if defined(ARCH_ARM_RS_USE_CACHED_SCANLINE_WRITE) - unsigned char buf[1024 * 8]; -#endif - - outer_foreach_t fn = (outer_foreach_t) mtls->kernel; - while (1) { - uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum); - uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize; - uint32_t yEnd = yStart + mtls->mSliceSize; - yEnd = rsMin(yEnd, mtls->yEnd); - if (yEnd <= yStart) { - return; - } - - //ALOGE("usr idx %i, x %i,%i y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd); - //ALOGE("usr ptr in %p, out %p", mtls->fep.ptrIn, mtls->fep.ptrOut); - -#if defined(ARCH_ARM_RS_USE_CACHED_SCANLINE_WRITE) - if (mtls->fep.yStrideOut < sizeof(buf)) { - p.out = buf; - for (p.y = yStart; p.y < yEnd; p.y++) { - p.in = mtls->fep.ptrIn + (mtls->fep.yStrideIn * p.y); - fn(&p, mtls->xStart, mtls->xEnd, mtls->fep.eStrideIn, mtls->fep.eStrideOut); - memcpy(mtls->fep.ptrOut + (mtls->fep.yStrideOut * p.y), buf, mtls->fep.yStrideOut); - } - } else -#endif - { - for (p.y = yStart; p.y < yEnd; p.y++) { - p.out = mtls->fep.ptrOut + (mtls->fep.yStrideOut * p.y) + - (mtls->fep.eStrideOut * mtls->xStart); - p.in = mtls->fep.ptrIn + (mtls->fep.yStrideIn * p.y) + - (mtls->fep.eStrideIn * mtls->xStart); - fn(&p, mtls->xStart, mtls->xEnd, mtls->fep.eStrideIn, mtls->fep.eStrideOut); - } - } - } -} - -static void wc_x(void *usr, uint32_t idx) { - MTLaunchStruct *mtls = (MTLaunchStruct *)usr; - RsForEachStubParamStruct p; - memcpy(&p, &mtls->fep, sizeof(p)); - p.lid = idx; - RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv; - uint32_t sig = mtls->sig; - - outer_foreach_t fn = (outer_foreach_t) mtls->kernel; - while (1) { - uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum); - uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize; - uint32_t xEnd = xStart + mtls->mSliceSize; - xEnd = rsMin(xEnd, mtls->xEnd); - if (xEnd <= xStart) { - return; - } - - //ALOGE("usr slice %i idx %i, x %i,%i", slice, idx, xStart, xEnd); - //ALOGE("usr ptr in %p, out %p", mtls->fep.ptrIn, mtls->fep.ptrOut); - - p.out = mtls->fep.ptrOut + (mtls->fep.eStrideOut * xStart); - p.in = mtls->fep.ptrIn + (mtls->fep.eStrideIn * xStart); - fn(&p, xStart, xEnd, mtls->fep.eStrideIn, mtls->fep.eStrideOut); - } -} - -void rsdScriptInvokeForEachMtlsSetup(const Context *rsc, - const Allocation * ain, - Allocation * aout, - const void * usr, - uint32_t usrLen, - const RsScriptCall *sc, - MTLaunchStruct *mtls) { - - memset(mtls, 0, sizeof(MTLaunchStruct)); - - if (ain) { - mtls->fep.dimX = ain->getType()->getDimX(); - mtls->fep.dimY = ain->getType()->getDimY(); - mtls->fep.dimZ = ain->getType()->getDimZ(); - //mtls->dimArray = ain->getType()->getDimArray(); - } else if (aout) { - mtls->fep.dimX = aout->getType()->getDimX(); - mtls->fep.dimY = aout->getType()->getDimY(); - mtls->fep.dimZ = aout->getType()->getDimZ(); - //mtls->dimArray = aout->getType()->getDimArray(); - } else { - rsc->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations"); - return; - } - - if (!sc || (sc->xEnd == 0)) { - mtls->xEnd = mtls->fep.dimX; - } else { - rsAssert(sc->xStart < mtls->fep.dimX); - rsAssert(sc->xEnd <= mtls->fep.dimX); - rsAssert(sc->xStart < sc->xEnd); - mtls->xStart = rsMin(mtls->fep.dimX, sc->xStart); - mtls->xEnd = rsMin(mtls->fep.dimX, sc->xEnd); - if (mtls->xStart >= mtls->xEnd) return; - } - - if (!sc || (sc->yEnd == 0)) { - mtls->yEnd = mtls->fep.dimY; - } else { - rsAssert(sc->yStart < mtls->fep.dimY); - rsAssert(sc->yEnd <= mtls->fep.dimY); - rsAssert(sc->yStart < sc->yEnd); - mtls->yStart = rsMin(mtls->fep.dimY, sc->yStart); - mtls->yEnd = rsMin(mtls->fep.dimY, sc->yEnd); - if (mtls->yStart >= mtls->yEnd) return; - } - - mtls->xEnd = rsMax((uint32_t)1, mtls->xEnd); - mtls->yEnd = rsMax((uint32_t)1, mtls->yEnd); - mtls->zEnd = rsMax((uint32_t)1, mtls->zEnd); - mtls->arrayEnd = rsMax((uint32_t)1, mtls->arrayEnd); - - rsAssert(!ain || (ain->getType()->getDimZ() == 0)); - - Context *mrsc = (Context *)rsc; - mtls->rsc = mrsc; - mtls->ain = ain; - mtls->aout = aout; - mtls->fep.usr = usr; - mtls->fep.usrLen = usrLen; - mtls->mSliceSize = 10; - mtls->mSliceNum = 0; - - mtls->fep.ptrIn = NULL; - mtls->fep.eStrideIn = 0; - - if (ain) { - DrvAllocation *aindrv = (DrvAllocation *)ain->mHal.drv; - mtls->fep.ptrIn = (const uint8_t *)aindrv->lod[0].mallocPtr; - mtls->fep.eStrideIn = ain->getType()->getElementSizeBytes(); - mtls->fep.yStrideIn = aindrv->lod[0].stride; - } - - mtls->fep.ptrOut = NULL; - mtls->fep.eStrideOut = 0; - if (aout) { - DrvAllocation *aoutdrv = (DrvAllocation *)aout->mHal.drv; - mtls->fep.ptrOut = (uint8_t *)aoutdrv->lod[0].mallocPtr; - mtls->fep.eStrideOut = aout->getType()->getElementSizeBytes(); - mtls->fep.yStrideOut = aoutdrv->lod[0].stride; - } -} - -void rsdScriptLaunchThreads(const Context *rsc, - bool isThreadable, - const Allocation * ain, - Allocation * aout, - const void * usr, - uint32_t usrLen, - const RsScriptCall *sc, - MTLaunchStruct *mtls) { - - Context *mrsc = (Context *)rsc; - RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv; - - if ((dc->mWorkers.mCount >= 1) && isThreadable && !dc->mInForEach) { - const size_t targetByteChunk = 16 * 1024; - dc->mInForEach = true; - if (mtls->fep.dimY > 1) { - uint32_t s1 = mtls->fep.dimY / ((dc->mWorkers.mCount + 1) * 4); - uint32_t s2 = 0; - - // This chooses our slice size to rate limit atomic ops to - // one per 16k bytes of reads/writes. - if (mtls->fep.yStrideOut) { - s2 = targetByteChunk / mtls->fep.yStrideOut; - } else { - s2 = targetByteChunk / mtls->fep.yStrideIn; - } - mtls->mSliceSize = rsMin(s1, s2); - - if(mtls->mSliceSize < 1) { - mtls->mSliceSize = 1; - } - - rsdLaunchThreads(mrsc, wc_xy, mtls); - } else { - uint32_t s1 = mtls->fep.dimX / ((dc->mWorkers.mCount + 1) * 4); - uint32_t s2 = 0; - - // This chooses our slice size to rate limit atomic ops to - // one per 16k bytes of reads/writes. - if (mtls->fep.eStrideOut) { - s2 = targetByteChunk / mtls->fep.eStrideOut; - } else { - s2 = targetByteChunk / mtls->fep.eStrideIn; - } - mtls->mSliceSize = rsMin(s1, s2); - - if(mtls->mSliceSize < 1) { - mtls->mSliceSize = 1; - } - - rsdLaunchThreads(mrsc, wc_x, mtls); - } - dc->mInForEach = false; - - //ALOGE("launch 1"); - } else { - RsForEachStubParamStruct p; - memcpy(&p, &mtls->fep, sizeof(p)); - uint32_t sig = mtls->sig; - - //ALOGE("launch 3"); - outer_foreach_t fn = (outer_foreach_t) mtls->kernel; - for (p.ar[0] = mtls->arrayStart; p.ar[0] < mtls->arrayEnd; p.ar[0]++) { - for (p.z = mtls->zStart; p.z < mtls->zEnd; p.z++) { - for (p.y = mtls->yStart; p.y < mtls->yEnd; p.y++) { - uint32_t offset = mtls->fep.dimY * mtls->fep.dimZ * p.ar[0] + - mtls->fep.dimY * p.z + p.y; - p.out = mtls->fep.ptrOut + (mtls->fep.yStrideOut * offset) + - (mtls->fep.eStrideOut * mtls->xStart); - p.in = mtls->fep.ptrIn + (mtls->fep.yStrideIn * offset) + - (mtls->fep.eStrideIn * mtls->xStart); - fn(&p, mtls->xStart, mtls->xEnd, mtls->fep.eStrideIn, mtls->fep.eStrideOut); - } - } - } - } } void rsdScriptInvokeForEach(const Context *rsc, @@ -417,237 +71,69 @@ void rsdScriptInvokeForEach(const Context *rsc, uint32_t usrLen, const RsScriptCall *sc) { - RsdHal * dc = (RsdHal *)rsc->mHal.drv; - - MTLaunchStruct mtls; - rsdScriptInvokeForEachMtlsSetup(rsc, ain, aout, usr, usrLen, sc, &mtls); - mtls.script = s; - mtls.fep.slot = slot; - - DrvScript *drv = (DrvScript *)s->mHal.drv; - if (drv->mIntrinsicID) { - mtls.kernel = (void (*)())drv->mIntrinsicFuncs.root; - mtls.fep.usr = drv->mIntrinsicData; - } else { - rsAssert(slot < drv->mExecutable->getExportForeachFuncAddrs().size()); - mtls.kernel = reinterpret_cast<ForEachFunc_t>( - drv->mExecutable->getExportForeachFuncAddrs()[slot]); - rsAssert(mtls.kernel != NULL); - mtls.sig = drv->mExecutable->getInfo().getExportForeachFuncs()[slot].second; - } - - - Script * oldTLS = setTLS(s); - rsdScriptLaunchThreads(rsc, s->mHal.info.isThreadable, ain, aout, usr, usrLen, sc, &mtls); - setTLS(oldTLS); + RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv; + cs->invokeForEach(slot, ain, aout, usr, usrLen, sc); } -int rsdScriptInvokeRoot(const Context *dc, Script *script) { - DrvScript *drv = (DrvScript *)script->mHal.drv; - - Script * oldTLS = setTLS(script); - int ret = drv->mRoot(); - setTLS(oldTLS); - - return ret; +int rsdScriptInvokeRoot(const Context *dc, Script *s) { + RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv; + return cs->invokeRoot(); } -void rsdScriptInvokeInit(const Context *dc, Script *script) { - DrvScript *drv = (DrvScript *)script->mHal.drv; - - if (drv->mInit) { - drv->mInit(); - } +void rsdScriptInvokeInit(const Context *dc, Script *s) { + RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv; + cs->invokeInit(); } -void rsdScriptInvokeFreeChildren(const Context *dc, Script *script) { - DrvScript *drv = (DrvScript *)script->mHal.drv; - - if (drv->mFreeChildren) { - drv->mFreeChildren(); - } +void rsdScriptInvokeFreeChildren(const Context *dc, Script *s) { + RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv; + cs->invokeFreeChildren(); } -void rsdScriptInvokeFunction(const Context *dc, Script *script, +void rsdScriptInvokeFunction(const Context *dc, Script *s, uint32_t slot, const void *params, size_t paramLength) { - DrvScript *drv = (DrvScript *)script->mHal.drv; - //ALOGE("invoke %p %p %i %p %i", dc, script, slot, params, paramLength); - - Script * oldTLS = setTLS(script); - reinterpret_cast<void (*)(const void *, uint32_t)>( - drv->mExecutable->getExportFuncAddrs()[slot])(params, paramLength); - setTLS(oldTLS); + RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv; + cs->invokeFunction(slot, params, paramLength); } -void rsdScriptSetGlobalVar(const Context *dc, const Script *script, +void rsdScriptSetGlobalVar(const Context *dc, const Script *s, uint32_t slot, void *data, size_t dataLength) { - DrvScript *drv = (DrvScript *)script->mHal.drv; - //rsAssert(!script->mFieldIsObject[slot]); - //ALOGE("setGlobalVar %p %p %i %p %i", dc, script, slot, data, dataLength); - - if (drv->mIntrinsicID) { - drv->mIntrinsicFuncs.setVar(dc, script, drv->mIntrinsicData, slot, data, dataLength); - return; - } - - int32_t *destPtr = reinterpret_cast<int32_t *>( - drv->mExecutable->getExportVarAddrs()[slot]); - if (!destPtr) { - //ALOGV("Calling setVar on slot = %i which is null", slot); - return; - } - - memcpy(destPtr, data, dataLength); + RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv; + cs->setGlobalVar(slot, data, dataLength); } -void rsdScriptSetGlobalVarWithElemDims( - const android::renderscript::Context *dc, - const android::renderscript::Script *script, - uint32_t slot, void *data, size_t dataLength, - const android::renderscript::Element *elem, - const size_t *dims, size_t dimLength) { - DrvScript *drv = (DrvScript *)script->mHal.drv; - - int32_t *destPtr = reinterpret_cast<int32_t *>( - drv->mExecutable->getExportVarAddrs()[slot]); - if (!destPtr) { - //ALOGV("Calling setVar on slot = %i which is null", slot); - return; - } - - // We want to look at dimension in terms of integer components, - // but dimLength is given in terms of bytes. - dimLength /= sizeof(int); - - // Only a single dimension is currently supported. - rsAssert(dimLength == 1); - if (dimLength == 1) { - // First do the increment loop. - size_t stride = elem->getSizeBytes(); - char *cVal = reinterpret_cast<char *>(data); - for (size_t i = 0; i < dims[0]; i++) { - elem->incRefs(cVal); - cVal += stride; - } - - // Decrement loop comes after (to prevent race conditions). - char *oldVal = reinterpret_cast<char *>(destPtr); - for (size_t i = 0; i < dims[0]; i++) { - elem->decRefs(oldVal); - oldVal += stride; - } - } - - memcpy(destPtr, data, dataLength); +void rsdScriptSetGlobalVarWithElemDims(const Context *dc, const Script *s, + uint32_t slot, void *data, size_t dataLength, + const android::renderscript::Element *elem, + const size_t *dims, size_t dimLength) { + RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv; + cs->setGlobalVarWithElemDims(slot, data, dataLength, elem, dims, dimLength); } -void rsdScriptSetGlobalBind(const Context *dc, const Script *script, uint32_t slot, Allocation *data) { - DrvScript *drv = (DrvScript *)script->mHal.drv; - - //rsAssert(!script->mFieldIsObject[slot]); - //ALOGE("setGlobalBind %p %p %i %p", dc, script, slot, data); - - rsAssert(!drv->mIntrinsicID); - - int32_t *destPtr = reinterpret_cast<int32_t *>( - drv->mExecutable->getExportVarAddrs()[slot]); - if (!destPtr) { - //ALOGV("Calling setVar on slot = %i which is null", slot); - return; - } - - void *ptr = NULL; - drv->mBoundAllocs[slot] = data; - if(data) { - DrvAllocation *allocDrv = (DrvAllocation *)data->mHal.drv; - ptr = allocDrv->lod[0].mallocPtr; - } - memcpy(destPtr, &ptr, sizeof(void *)); +void rsdScriptSetGlobalBind(const Context *dc, const Script *s, uint32_t slot, Allocation *data) { + RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv; + cs->setGlobalBind(slot, data); } -void rsdScriptSetGlobalObj(const Context *dc, const Script *script, uint32_t slot, ObjectBase *data) { - DrvScript *drv = (DrvScript *)script->mHal.drv; - //rsAssert(script->mFieldIsObject[slot]); - //ALOGE("setGlobalObj %p %p %i %p", dc, script, slot, data); - - if (drv->mIntrinsicID) { - drv->mIntrinsicFuncs.setVarObj(dc, script, drv->mIntrinsicData, slot, - static_cast<Allocation *>(data)); - return; - } - - int32_t *destPtr = reinterpret_cast<int32_t *>( - drv->mExecutable->getExportVarAddrs()[slot]); - if (!destPtr) { - //ALOGV("Calling setVar on slot = %i which is null", slot); - return; - } - - rsrSetObject(dc, script, (ObjectBase **)destPtr, data); +void rsdScriptSetGlobalObj(const Context *dc, const Script *s, uint32_t slot, ObjectBase *data) { + RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv; + cs->setGlobalObj(slot, data); } -void rsdScriptDestroy(const Context *dc, Script *script) { - DrvScript *drv = (DrvScript *)script->mHal.drv; - - if (drv == NULL) { - return; - } - - if (drv->mExecutable) { - Vector<void *>::const_iterator var_addr_iter = - drv->mExecutable->getExportVarAddrs().begin(); - Vector<void *>::const_iterator var_addr_end = - drv->mExecutable->getExportVarAddrs().end(); - - bcc::RSInfo::ObjectSlotListTy::const_iterator is_object_iter = - drv->mExecutable->getInfo().getObjectSlots().begin(); - bcc::RSInfo::ObjectSlotListTy::const_iterator is_object_end = - drv->mExecutable->getInfo().getObjectSlots().end(); - - while ((var_addr_iter != var_addr_end) && - (is_object_iter != is_object_end)) { - // The field address can be NULL if the script-side has optimized - // the corresponding global variable away. - ObjectBase **obj_addr = - reinterpret_cast<ObjectBase **>(*var_addr_iter); - if (*is_object_iter) { - if (*var_addr_iter != NULL) { - rsrClearObject(dc, script, obj_addr); - } - } - var_addr_iter++; - is_object_iter++; - } - } - - delete drv->mCompilerContext; - delete drv->mCompilerDriver; - delete drv->mExecutable; - delete[] drv->mBoundAllocs; - free(drv); - script->mHal.drv = NULL; +void rsdScriptDestroy(const Context *dc, Script *s) { + RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv; + delete cs; + s->mHal.drv = NULL; } + Allocation * rsdScriptGetAllocationForPointer(const android::renderscript::Context *dc, const android::renderscript::Script *sc, const void *ptr) { - DrvScript *drv = (DrvScript *)sc->mHal.drv; - if (!ptr) { - return NULL; - } - - for (uint32_t ct=0; ct < sc->mHal.info.exportedVariableCount; ct++) { - Allocation *a = drv->mBoundAllocs[ct]; - if (!a) continue; - DrvAllocation *adrv = (DrvAllocation *)a->mHal.drv; - if (adrv->lod[0].mallocPtr == ptr) { - return a; - } - } - ALOGE("rsGetAllocation, failed to find %p", ptr); - return NULL; + RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)sc->mHal.drv; + return cs->getAllocationForPointer(ptr); } diff --git a/driver/rsdBcc.h b/driver/rsdBcc.h index 4a42eb55..4c65c2af 100644 --- a/driver/rsdBcc.h +++ b/driver/rsdBcc.h @@ -20,12 +20,6 @@ #include <rs_hal.h> #include <rsRuntime.h> -namespace bcc { - class BCCContext; - class RSCompilerDriver; - class RSExecutable; -} - bool rsdScriptInit(const android::renderscript::Context *, android::renderscript::ScriptC *, char const *resName, char const *cacheDir, uint8_t const *bitcode, size_t bitcodeSize, uint32_t flags); @@ -92,92 +86,4 @@ android::renderscript::Allocation * rsdScriptGetAllocationForPointer( const void *); -typedef void (*outer_foreach_t)( - const android::renderscript::RsForEachStubParamStruct *, - uint32_t x1, uint32_t x2, - uint32_t instep, uint32_t outstep); - -typedef struct RsdIntriniscFuncs_rec { - - void (*setVarObj)(const android::renderscript::Context *dc, - const android::renderscript::Script *script, - void * intrinsicData, - uint32_t slot, android::renderscript::Allocation *data); - void (*setVar)(const android::renderscript::Context *dc, - const android::renderscript::Script *script, - void * intrinsicData, - uint32_t slot, void *data, size_t dataLength); - void (*root)(const android::renderscript::RsForEachStubParamStruct *, - uint32_t x1, uint32_t x2, uint32_t instep, uint32_t outstep); - - void (*destroy)(const android::renderscript::Context *dc, - const android::renderscript::Script *script, - void * intrinsicData); -} RsdIntriniscFuncs_t; - -struct DrvScript { - RsScriptIntrinsicID mIntrinsicID; - int (*mRoot)(); - int (*mRootExpand)(); - void (*mInit)(); - void (*mFreeChildren)(); - - bcc::BCCContext *mCompilerContext; - bcc::RSCompilerDriver *mCompilerDriver; - bcc::RSExecutable *mExecutable; - - android::renderscript::Allocation **mBoundAllocs; - RsdIntriniscFuncs_t mIntrinsicFuncs; - void * mIntrinsicData; -}; - -typedef struct { - android::renderscript::RsForEachStubParamStruct fep; - uint32_t cpuIdx; - -} MTThreadStuct; - -typedef struct { - android::renderscript::RsForEachStubParamStruct fep; - - android::renderscript::Context *rsc; - android::renderscript::Script *script; - ForEachFunc_t kernel; - uint32_t sig; - const android::renderscript::Allocation * ain; - android::renderscript::Allocation * aout; - - uint32_t mSliceSize; - volatile int mSliceNum; - - uint32_t xStart; - uint32_t xEnd; - uint32_t yStart; - uint32_t yEnd; - uint32_t zStart; - uint32_t zEnd; - uint32_t arrayStart; - uint32_t arrayEnd; -} MTLaunchStruct; - -void rsdScriptLaunchThreads(const android::renderscript::Context *rsc, - bool isThreadable, - const android::renderscript::Allocation * ain, - android::renderscript::Allocation * aout, - const void * usr, - uint32_t usrLen, - const RsScriptCall *sc, - MTLaunchStruct *mtls); - -void rsdScriptInvokeForEachMtlsSetup(const android::renderscript::Context *rsc, - const android::renderscript::Allocation * ain, - android::renderscript::Allocation * aout, - const void * usr, - uint32_t usrLen, - const RsScriptCall *sc, - MTLaunchStruct *mtls); - - - - #endif diff --git a/driver/rsdCore.cpp b/driver/rsdCore.cpp index caa5aa7e..7f4060a1 100644 --- a/driver/rsdCore.cpp +++ b/driver/rsdCore.cpp @@ -14,6 +14,8 @@ * limitations under the License. */ +#include "../cpu_ref/rsd_cpu.h" + #include "rsdCore.h" #include "rsdAllocation.h" #include "rsdBcc.h" @@ -154,71 +156,10 @@ static RsdHalFunctions FunctionTable = { }; -pthread_key_t rsdgThreadTLSKey = 0; -uint32_t rsdgThreadTLSKeyCount = 0; -pthread_mutex_t rsdgInitMutex = PTHREAD_MUTEX_INITIALIZER; - - -static void * HelperThreadProc(void *vrsc) { - Context *rsc = static_cast<Context *>(vrsc); - RsdHal *dc = (RsdHal *)rsc->mHal.drv; - - - uint32_t idx = (uint32_t)android_atomic_inc(&dc->mWorkers.mLaunchCount); - - //ALOGV("RS helperThread starting %p idx=%i", rsc, idx); - - dc->mWorkers.mLaunchSignals[idx].init(); - dc->mWorkers.mNativeThreadId[idx] = gettid(); - - int status = pthread_setspecific(rsdgThreadTLSKey, &dc->mTlsStruct); - if (status) { - ALOGE("pthread_setspecific %i", status); - } - -#if 0 - typedef struct {uint64_t bits[1024 / 64]; } cpu_set_t; - cpu_set_t cpuset; - memset(&cpuset, 0, sizeof(cpuset)); - cpuset.bits[idx / 64] |= 1ULL << (idx % 64); - int ret = syscall(241, rsc->mWorkers.mNativeThreadId[idx], - sizeof(cpuset), &cpuset); - ALOGE("SETAFFINITY ret = %i %s", ret, EGLUtils::strerror(ret)); -#endif - - while (!dc->mExit) { - dc->mWorkers.mLaunchSignals[idx].wait(); - if (dc->mWorkers.mLaunchCallback) { - // idx +1 is used because the calling thread is always worker 0. - dc->mWorkers.mLaunchCallback(dc->mWorkers.mLaunchData, idx+1); - } - android_atomic_dec(&dc->mWorkers.mRunningCount); - dc->mWorkers.mCompleteSignal.set(); - } +extern const RsdCpuReference::CpuSymbol * rsdLookupRuntimeStub(Context * pContext, char const* name); - //ALOGV("RS helperThread exited %p idx=%i", rsc, idx); - return NULL; -} - -void rsdLaunchThreads(Context *rsc, WorkerCallback_t cbk, void *data) { - RsdHal *dc = (RsdHal *)rsc->mHal.drv; - - dc->mWorkers.mLaunchData = data; - dc->mWorkers.mLaunchCallback = cbk; - android_atomic_release_store(dc->mWorkers.mCount, &dc->mWorkers.mRunningCount); - for (uint32_t ct = 0; ct < dc->mWorkers.mCount; ct++) { - dc->mWorkers.mLaunchSignals[ct].set(); - } - - // We use the calling thread as one of the workers so we can start without - // the delay of the thread wakeup. - if (dc->mWorkers.mLaunchCallback) { - dc->mWorkers.mLaunchCallback(dc->mWorkers.mLaunchData, 0); - } - - while (android_atomic_acquire_load(&dc->mWorkers.mRunningCount) != 0) { - dc->mWorkers.mCompleteSignal.wait(); - } +static RsdCpuReference::CpuScript * LookupScript(Context *, const Script *s) { + return (RsdCpuReference::CpuScript *)s->mHal.drv; } extern "C" bool rsdHalInit(RsContext c, uint32_t version_major, @@ -233,76 +174,23 @@ extern "C" bool rsdHalInit(RsContext c, uint32_t version_major, } rsc->mHal.drv = dc; - pthread_mutex_lock(&rsdgInitMutex); - if (!rsdgThreadTLSKeyCount) { - int status = pthread_key_create(&rsdgThreadTLSKey, NULL); - if (status) { - ALOGE("Failed to init thread tls key."); - pthread_mutex_unlock(&rsdgInitMutex); - return false; - } - } - rsdgThreadTLSKeyCount++; - pthread_mutex_unlock(&rsdgInitMutex); - - dc->mTlsStruct.mContext = rsc; - dc->mTlsStruct.mScript = NULL; - int status = pthread_setspecific(rsdgThreadTLSKey, &dc->mTlsStruct); - if (status) { - ALOGE("pthread_setspecific %i", status); - } - - - int cpu = sysconf(_SC_NPROCESSORS_ONLN); - if(rsc->props.mDebugMaxThreads) { - cpu = rsc->props.mDebugMaxThreads; - } - if (cpu < 2) { - cpu = 0; - } - ALOGV("%p Launching thread(s), CPUs %i", rsc, cpu); - - // Subtract one from the cpu count because we also use the command thread as a worker. - dc->mWorkers.mCount = (uint32_t)(cpu - 1); - dc->mWorkers.mThreadId = (pthread_t *) calloc(dc->mWorkers.mCount, sizeof(pthread_t)); - dc->mWorkers.mNativeThreadId = (pid_t *) calloc(dc->mWorkers.mCount, sizeof(pid_t)); - dc->mWorkers.mLaunchSignals = new Signal[dc->mWorkers.mCount]; - dc->mWorkers.mLaunchCallback = NULL; - - dc->mWorkers.mCompleteSignal.init(); - - android_atomic_release_store(dc->mWorkers.mCount, &dc->mWorkers.mRunningCount); - android_atomic_release_store(0, &dc->mWorkers.mLaunchCount); - - pthread_attr_t threadAttr; - status = pthread_attr_init(&threadAttr); - if (status) { - ALOGE("Failed to init thread attribute."); + dc->mCpuRef = RsdCpuReference::create((Context *)c, version_major, version_minor, + &rsdLookupRuntimeStub, &LookupScript); + if (!dc->mCpuRef) { + ALOGE("RsdCpuReference::create for driver hal failed."); + free(dc); return false; } - for (uint32_t ct=0; ct < dc->mWorkers.mCount; ct++) { - status = pthread_create(&dc->mWorkers.mThreadId[ct], &threadAttr, HelperThreadProc, rsc); - if (status) { - dc->mWorkers.mCount = ct; - ALOGE("Created fewer than expected number of RS threads."); - break; - } - } - while (android_atomic_acquire_load(&dc->mWorkers.mRunningCount) != 0) { - usleep(100); - } - - pthread_attr_destroy(&threadAttr); return true; } void SetPriority(const Context *rsc, int32_t priority) { RsdHal *dc = (RsdHal *)rsc->mHal.drv; - for (uint32_t ct=0; ct < dc->mWorkers.mCount; ct++) { - setpriority(PRIO_PROCESS, dc->mWorkers.mNativeThreadId[ct], priority); - } + + dc->mCpuRef->setPriority(priority); + if (dc->mHasGraphics) { rsdGLSetPriority(rsc, priority); } @@ -310,27 +198,7 @@ void SetPriority(const Context *rsc, int32_t priority) { void Shutdown(Context *rsc) { RsdHal *dc = (RsdHal *)rsc->mHal.drv; - - dc->mExit = true; - dc->mWorkers.mLaunchData = NULL; - dc->mWorkers.mLaunchCallback = NULL; - android_atomic_release_store(dc->mWorkers.mCount, &dc->mWorkers.mRunningCount); - for (uint32_t ct = 0; ct < dc->mWorkers.mCount; ct++) { - dc->mWorkers.mLaunchSignals[ct].set(); - } - void *res; - for (uint32_t ct = 0; ct < dc->mWorkers.mCount; ct++) { - pthread_join(dc->mWorkers.mThreadId[ct], &res); - } - rsAssert(android_atomic_acquire_load(&dc->mWorkers.mRunningCount) == 0); - - // Global structure cleanup. - pthread_mutex_lock(&rsdgInitMutex); - --rsdgThreadTLSKeyCount; - if (!rsdgThreadTLSKeyCount) { - pthread_key_delete(rsdgThreadTLSKey); - } - pthread_mutex_unlock(&rsdgInitMutex); - + delete dc->mCpuRef; + rsc->mHal.drv = NULL; } diff --git a/driver/rsdCore.h b/driver/rsdCore.h index 92e7c7f9..0a464600 100644 --- a/driver/rsdCore.h +++ b/driver/rsdCore.h @@ -19,6 +19,8 @@ #include <rs_hal.h> +#include "../cpu_ref/rsd_cpu.h" + #include "rsMutex.h" #include "rsSignal.h" @@ -28,12 +30,6 @@ typedef void (* InvokeFunc_t)(void); typedef void (* ForEachFunc_t)(void); typedef void (*WorkerCallback_t)(void *usr, uint32_t idx); -typedef struct RsdSymbolTableRec { - const char * mName; - void * mPtr; - bool threadable; -} RsdSymbolTable; - typedef struct ScriptTLSStructRec { android::renderscript::Context * mContext; android::renderscript::Script * mScript; @@ -43,33 +39,13 @@ typedef struct RsdHalRec { uint32_t version_major; uint32_t version_minor; bool mHasGraphics; - bool mInForEach; - - struct Workers { - volatile int mRunningCount; - volatile int mLaunchCount; - uint32_t mCount; - pthread_t *mThreadId; - pid_t *mNativeThreadId; - android::renderscript::Signal mCompleteSignal; - - android::renderscript::Signal *mLaunchSignals; - WorkerCallback_t mLaunchCallback; - void *mLaunchData; - }; - Workers mWorkers; - bool mExit; ScriptTLSStruct mTlsStruct; + android::renderscript::RsdCpuReference *mCpuRef; RsdGL gl; } RsdHal; -extern pthread_key_t rsdgThreadTLSKey; -extern uint32_t rsdgThreadTLSKeyCount; -extern pthread_mutex_t rsdgInitMutex; - - void rsdLaunchThreads(android::renderscript::Context *rsc, WorkerCallback_t cbk, void *data); #endif diff --git a/driver/rsdIntrinsicColorMatrix.cpp b/driver/rsdIntrinsicColorMatrix.cpp deleted file mode 100644 index cfe0333e..00000000 --- a/driver/rsdIntrinsicColorMatrix.cpp +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Copyright (C) 2012 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#include "rsdCore.h" -#include "rsdIntrinsics.h" -#include "rsdAllocation.h" - -#include "rsdIntrinsicInlines.h" - -using namespace android; -using namespace android::renderscript; - -struct ConvolveParams { - float fp[16]; - short ip[16]; - bool use3x3; - bool useDot; -}; - -static void ColorMatrix_SetVar(const Context *dc, const Script *script, void * intrinsicData, - uint32_t slot, void *data, size_t dataLength) { - ConvolveParams *cp = (ConvolveParams *)intrinsicData; - - rsAssert(slot == 0); - memcpy (cp->fp, data, dataLength); - for(int ct=0; ct < 16; ct++) { - cp->ip[ct] = (short)(cp->fp[ct] * 255.f + 0.5f); - } - - if ((cp->ip[3] == 0) && (cp->ip[7] == 0) && (cp->ip[11] == 0) && - (cp->ip[12] == 0) && (cp->ip[13] == 0) && (cp->ip[14] == 0) && - (cp->ip[15] == 255)) { - cp->use3x3 = true; - - if ((cp->ip[0] == cp->ip[1]) && (cp->ip[0] == cp->ip[2]) && - (cp->ip[4] == cp->ip[5]) && (cp->ip[4] == cp->ip[6]) && - (cp->ip[8] == cp->ip[9]) && (cp->ip[8] == cp->ip[10])) { - cp->useDot = true; - } - } -} - -extern "C" void rsdIntrinsicColorMatrix4x4_K(void *dst, const void *src, const short *coef, uint32_t count); -extern "C" void rsdIntrinsicColorMatrix3x3_K(void *dst, const void *src, const short *coef, uint32_t count); -extern "C" void rsdIntrinsicColorMatrixDot_K(void *dst, const void *src, const short *coef, uint32_t count); - -static void One(const RsForEachStubParamStruct *p, uchar4 *out, - const uchar4 *py, const float* coeff) { - float4 i = convert_float4(py[0]); - - float4 sum; - sum.x = i.x * coeff[0] + - i.y * coeff[4] + - i.z * coeff[8] + - i.w * coeff[12]; - sum.y = i.x * coeff[1] + - i.y * coeff[5] + - i.z * coeff[9] + - i.w * coeff[13]; - sum.z = i.x * coeff[2] + - i.y * coeff[6] + - i.z * coeff[10] + - i.w * coeff[14]; - sum.w = i.x * coeff[3] + - i.y * coeff[7] + - i.z * coeff[11] + - i.w * coeff[15]; - - sum.x = sum.x < 0 ? 0 : (sum.x > 255 ? 255 : sum.x); - sum.y = sum.y < 0 ? 0 : (sum.y > 255 ? 255 : sum.y); - sum.z = sum.z < 0 ? 0 : (sum.z > 255 ? 255 : sum.z); - sum.w = sum.w < 0 ? 0 : (sum.w > 255 ? 255 : sum.w); - - *out = convert_uchar4(sum); -} - -static void ColorMatrix_uchar4(const RsForEachStubParamStruct *p, - uint32_t xstart, uint32_t xend, - uint32_t instep, uint32_t outstep) { - ConvolveParams *cp = (ConvolveParams *)p->usr; - uchar4 *out = (uchar4 *)p->out; - uchar4 *in = (uchar4 *)p->in; - uint32_t x1 = xstart; - uint32_t x2 = xend; - - if(x2 > x1) { -#if defined(ARCH_ARM_HAVE_NEON) - int32_t len = (x2 - x1) >> 2; - if(len > 0) { - if (cp->use3x3) { - if (cp->useDot) { - rsdIntrinsicColorMatrixDot_K(out, in, cp->ip, len); - } else { - rsdIntrinsicColorMatrix3x3_K(out, in, cp->ip, len); - } - } else { - rsdIntrinsicColorMatrix4x4_K(out, in, cp->ip, len); - } - x1 += len << 2; - out += len << 2; - in += len << 2; - } -#endif - - while(x1 != x2) { - One(p, out++, in++, cp->fp); - x1++; - } - } -} - -void * rsdIntrinsic_InitColorMatrix(const android::renderscript::Context *dc, - android::renderscript::Script *script, - RsdIntriniscFuncs_t *funcs) { - - script->mHal.info.exportedVariableCount = 1; - funcs->setVar = ColorMatrix_SetVar; - funcs->root = ColorMatrix_uchar4; - - ConvolveParams *cp = (ConvolveParams *)calloc(1, sizeof(ConvolveParams)); - cp->fp[0] = 1.f; - cp->fp[5] = 1.f; - cp->fp[10] = 1.f; - cp->fp[15] = 1.f; - for(int ct=0; ct < 16; ct++) { - cp->ip[ct] = (short)(cp->fp[ct] * 255.f + 0.5f); - } - return cp; -} - - diff --git a/driver/rsdIntrinsicLUT.cpp b/driver/rsdIntrinsicLUT.cpp deleted file mode 100644 index 818a132d..00000000 --- a/driver/rsdIntrinsicLUT.cpp +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (C) 2012 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#include "rsdCore.h" -#include "rsdIntrinsics.h" -#include "rsdAllocation.h" - -#include "rsdIntrinsicInlines.h" - -using namespace android; -using namespace android::renderscript; - -struct ConvolveParams { - ObjectBaseRef<Allocation> lut; -}; - -static void LUT_Bind(const Context *dc, const Script *script, - void * intrinsicData, uint32_t slot, Allocation *data) { - ConvolveParams *cp = (ConvolveParams *)intrinsicData; - rsAssert(slot == 0); - cp->lut.set(data); -} - -static void LUT_uchar4(const RsForEachStubParamStruct *p, - uint32_t xstart, uint32_t xend, - uint32_t instep, uint32_t outstep) { - ConvolveParams *cp = (ConvolveParams *)p->usr; - uchar4 *out = (uchar4 *)p->out; - uchar4 *in = (uchar4 *)p->in; - uint32_t x1 = xstart; - uint32_t x2 = xend; - - DrvAllocation *din = (DrvAllocation *)cp->lut->mHal.drv; - const uchar *tr = (const uchar *)din->lod[0].mallocPtr; - const uchar *tg = &tr[256]; - const uchar *tb = &tg[256]; - const uchar *ta = &tb[256]; - - while (x1 < x2) { - uchar4 p = *in; - uchar4 o = {tr[p.x], tg[p.y], tb[p.z], ta[p.w]}; - *out = o; - in++; - out++; - x1++; - } -} - -void * rsdIntrinsic_InitLUT(const android::renderscript::Context *dc, - android::renderscript::Script *script, - RsdIntriniscFuncs_t *funcs) { - - script->mHal.info.exportedVariableCount = 1; - funcs->setVarObj = LUT_Bind; - funcs->root = LUT_uchar4; - ConvolveParams *cp = (ConvolveParams *)calloc(1, sizeof(ConvolveParams)); - return cp; -} - - diff --git a/driver/rsdIntrinsics.cpp b/driver/rsdIntrinsics.cpp deleted file mode 100644 index 0f747faa..00000000 --- a/driver/rsdIntrinsics.cpp +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (C) 2012 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#include "rsdCore.h" -#include "rsdIntrinsics.h" -#include "rsdAllocation.h" - -using namespace android; -using namespace android::renderscript; - -void * rsdIntrinsic_InitBlur(const Context *, Script *, RsdIntriniscFuncs_t *); -void * rsdIntrinsic_InitConvolve3x3(const Context *, Script *, RsdIntriniscFuncs_t *); -void * rsdIntrinsic_InitConvolve5x5(const Context *, Script *, RsdIntriniscFuncs_t *); -void * rsdIntrinsic_InitColorMatrix(const Context *, Script *, RsdIntriniscFuncs_t *); -void * rsdIntrinsic_InitLUT(const Context *, Script *, RsdIntriniscFuncs_t *); -void * rsdIntrinsic_InitYuvToRGB(const Context *, Script *, RsdIntriniscFuncs_t *); -void * rsdIntrinsic_InitBlend(const Context *, Script *, RsdIntriniscFuncs_t *); - -static void SetVarObj(const Context *, const Script *, void *, uint32_t, Allocation *) { - rsAssert(!"Intrinsic_SetVarObj unexpectedly called"); -} - -static void SetVar(const Context *, const Script *, void *, uint32_t, void *, size_t) { - rsAssert(!"Intrinsic_Bind unexpectedly called"); -} - -static void Destroy(const Context *dc, const Script *script, void * intrinsicData) { - free(intrinsicData); -} - -void * rsdIntrinsic_Init(const android::renderscript::Context *dc, - android::renderscript::Script *script, - RsScriptIntrinsicID iid, - RsdIntriniscFuncs_t *funcs) { - - funcs->setVarObj = SetVarObj; - funcs->setVar = SetVar; - funcs->destroy = Destroy; - - switch(iid) { - case RS_SCRIPT_INTRINSIC_ID_CONVOLVE_3x3: - return rsdIntrinsic_InitConvolve3x3(dc, script, funcs); - case RS_SCRIPT_INTRINSIC_ID_CONVOLVE_5x5: - return rsdIntrinsic_InitConvolve5x5(dc, script, funcs); - case RS_SCRIPT_INTRINSIC_ID_COLOR_MATRIX: - return rsdIntrinsic_InitColorMatrix(dc, script, funcs); - case RS_SCRIPT_INTRINSIC_ID_LUT: - return rsdIntrinsic_InitLUT(dc, script, funcs); - case RS_SCRIPT_INTRINSIC_ID_BLUR: - return rsdIntrinsic_InitBlur(dc, script, funcs); - case RS_SCRIPT_INTRINSIC_ID_YUV_TO_RGB: - return rsdIntrinsic_InitYuvToRGB(dc, script, funcs); - case RS_SCRIPT_INTRINSIC_ID_BLEND: - return rsdIntrinsic_InitBlend(dc, script, funcs); - - default: - return NULL; - } - return NULL; -} - - - diff --git a/driver/rsdIntrinsics.h b/driver/rsdIntrinsics.h deleted file mode 100644 index 221a81ad..00000000 --- a/driver/rsdIntrinsics.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (C) 2012 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef RSD_INTRINSICS_H -#define RSD_INTRINSICS_H - -#include <rs_hal.h> -#include "rsdBcc.h" - -void * rsdIntrinsic_Init(const android::renderscript::Context *dc, - android::renderscript::Script *script, - RsScriptIntrinsicID id, RsdIntriniscFuncs_t *funcs); - -#endif // RSD_INTRINSICS_H - diff --git a/driver/rsdMeshObj.cpp b/driver/rsdMeshObj.cpp index 92e02bef..e8df21f3 100644 --- a/driver/rsdMeshObj.cpp +++ b/driver/rsdMeshObj.cpp @@ -151,7 +151,7 @@ void RsdMeshObj::renderPrimitiveRange(const Context *rsc, uint32_t primIndex, mAttribs[ct].ptr = NULL; } else { mAttribs[ct].buffer = 0; - mAttribs[ct].ptr = (const uint8_t*)alloc->mHal.drvState.mallocPtrLOD0; + mAttribs[ct].ptr = (const uint8_t*)alloc->mHal.drvState.lod[0].mallocPtr; } } @@ -172,7 +172,7 @@ void RsdMeshObj::renderPrimitiveRange(const Context *rsc, uint32_t primIndex, } else { RSD_CALL_GL(glBindBuffer, GL_ELEMENT_ARRAY_BUFFER, 0); RSD_CALL_GL(glDrawElements, mGLPrimitives[primIndex], len, GL_UNSIGNED_SHORT, - idxAlloc->mHal.drvState.mallocPtrLOD0); + idxAlloc->mHal.drvState.lod[0].mallocPtr); } } else { RSD_CALL_GL(glDrawArrays, mGLPrimitives[primIndex], start, len); diff --git a/driver/rsdRuntime.h b/driver/rsdRuntime.h deleted file mode 100644 index dc84032f..00000000 --- a/driver/rsdRuntime.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (C) 2011 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef RSD_RUNTIME_STUBS_H -#define RSD_RUNTIME_STUBS_H - -#include <rs_hal.h> - -#include "rsMutex.h" - -const RsdSymbolTable * rsdLookupSymbolMath(const char *sym); - -void* rsdLookupRuntimeStub(void* pContext, char const* name); - -#endif diff --git a/driver/rsdRuntimeStubs.cpp b/driver/rsdRuntimeStubs.cpp index 9bd1396b..5141c9fd 100644 --- a/driver/rsdRuntimeStubs.cpp +++ b/driver/rsdRuntimeStubs.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2011 The Android Open Source Project + * Copyright (C) 2011-2012 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,7 +25,6 @@ #include "rsdCore.h" #include "rsdBcc.h" -#include "rsdRuntime.h" #include "rsdPath.h" #include "rsdAllocation.h" #include "rsdShaderCache.h" @@ -36,11 +35,6 @@ using namespace android; using namespace android::renderscript; -#define GET_TLS() ScriptTLSStruct * tls = \ - (ScriptTLSStruct *)pthread_getspecific(rsdgThreadTLSKey); \ - Context * rsc = tls->mContext; \ - ScriptC * sc = (ScriptC *) tls->mScript - typedef float float2 __attribute__((ext_vector_type(2))); typedef float float3 __attribute__((ext_vector_type(3))); typedef float float4 __attribute__((ext_vector_type(4))); @@ -76,13 +70,13 @@ typedef unsigned long long ulong4 __attribute__((ext_vector_type(4))); static void SC_AllocationSyncAll2(Allocation *a, RsAllocationUsageType source) { - GET_TLS(); - rsrAllocationSyncAll(rsc, sc, a, source); + Context *rsc = RsdCpuReference::getTlsContext(); + rsrAllocationSyncAll(rsc, a, source); } static void SC_AllocationSyncAll(Allocation *a) { - GET_TLS(); - rsrAllocationSyncAll(rsc, sc, a, RS_ALLOCATION_USAGE_SCRIPT); + Context *rsc = RsdCpuReference::getTlsContext(); + rsrAllocationSyncAll(rsc, a, RS_ALLOCATION_USAGE_SCRIPT); } static void SC_AllocationCopy1DRange(Allocation *dstAlloc, @@ -91,7 +85,7 @@ static void SC_AllocationCopy1DRange(Allocation *dstAlloc, uint32_t count, Allocation *srcAlloc, uint32_t srcOff, uint32_t srcMip) { - GET_TLS(); + Context *rsc = RsdCpuReference::getTlsContext(); rsrAllocationCopy1DRange(rsc, dstAlloc, dstOff, dstMip, count, srcAlloc, srcOff, srcMip); } @@ -103,7 +97,7 @@ static void SC_AllocationCopy2DRange(Allocation *dstAlloc, Allocation *srcAlloc, uint32_t srcXoff, uint32_t srcYoff, uint32_t srcMip, uint32_t srcFace) { - GET_TLS(); + Context *rsc = RsdCpuReference::getTlsContext(); rsrAllocationCopy2DRange(rsc, dstAlloc, dstXoff, dstYoff, dstMip, dstFace, width, height, @@ -112,13 +106,13 @@ static void SC_AllocationCopy2DRange(Allocation *dstAlloc, } static void SC_AllocationIoSend(Allocation *alloc) { - GET_TLS(); + Context *rsc = RsdCpuReference::getTlsContext(); rsdAllocationIoSend(rsc, alloc); } static void SC_AllocationIoReceive(Allocation *alloc) { - GET_TLS(); + Context *rsc = RsdCpuReference::getTlsContext(); rsdAllocationIoReceive(rsc, alloc); } @@ -129,68 +123,68 @@ static void SC_AllocationIoReceive(Allocation *alloc) { ////////////////////////////////////////////////////////////////////////////// static void SC_BindTexture(ProgramFragment *pf, uint32_t slot, Allocation *a) { - GET_TLS(); - rsrBindTexture(rsc, sc, pf, slot, a); + Context *rsc = RsdCpuReference::getTlsContext(); + rsrBindTexture(rsc, pf, slot, a); } static void SC_BindVertexConstant(ProgramVertex *pv, uint32_t slot, Allocation *a) { - GET_TLS(); - rsrBindConstant(rsc, sc, pv, slot, a); + Context *rsc = RsdCpuReference::getTlsContext(); + rsrBindConstant(rsc, pv, slot, a); } static void SC_BindFragmentConstant(ProgramFragment *pf, uint32_t slot, Allocation *a) { - GET_TLS(); - rsrBindConstant(rsc, sc, pf, slot, a); + Context *rsc = RsdCpuReference::getTlsContext(); + rsrBindConstant(rsc, pf, slot, a); } static void SC_BindSampler(ProgramFragment *pf, uint32_t slot, Sampler *s) { - GET_TLS(); - rsrBindSampler(rsc, sc, pf, slot, s); + Context *rsc = RsdCpuReference::getTlsContext(); + rsrBindSampler(rsc, pf, slot, s); } static void SC_BindProgramStore(ProgramStore *ps) { - GET_TLS(); - rsrBindProgramStore(rsc, sc, ps); + Context *rsc = RsdCpuReference::getTlsContext(); + rsrBindProgramStore(rsc, ps); } static void SC_BindProgramFragment(ProgramFragment *pf) { - GET_TLS(); - rsrBindProgramFragment(rsc, sc, pf); + Context *rsc = RsdCpuReference::getTlsContext(); + rsrBindProgramFragment(rsc, pf); } static void SC_BindProgramVertex(ProgramVertex *pv) { - GET_TLS(); - rsrBindProgramVertex(rsc, sc, pv); + Context *rsc = RsdCpuReference::getTlsContext(); + rsrBindProgramVertex(rsc, pv); } static void SC_BindProgramRaster(ProgramRaster *pr) { - GET_TLS(); - rsrBindProgramRaster(rsc, sc, pr); + Context *rsc = RsdCpuReference::getTlsContext(); + rsrBindProgramRaster(rsc, pr); } static void SC_BindFrameBufferObjectColorTarget(Allocation *a, uint32_t slot) { - GET_TLS(); - rsrBindFrameBufferObjectColorTarget(rsc, sc, a, slot); + Context *rsc = RsdCpuReference::getTlsContext(); + rsrBindFrameBufferObjectColorTarget(rsc, a, slot); } static void SC_BindFrameBufferObjectDepthTarget(Allocation *a) { - GET_TLS(); - rsrBindFrameBufferObjectDepthTarget(rsc, sc, a); + Context *rsc = RsdCpuReference::getTlsContext(); + rsrBindFrameBufferObjectDepthTarget(rsc, a); } static void SC_ClearFrameBufferObjectColorTarget(uint32_t slot) { - GET_TLS(); - rsrClearFrameBufferObjectColorTarget(rsc, sc, slot); + Context *rsc = RsdCpuReference::getTlsContext(); + rsrClearFrameBufferObjectColorTarget(rsc, slot); } static void SC_ClearFrameBufferObjectDepthTarget(Context *, Script *) { - GET_TLS(); - rsrClearFrameBufferObjectDepthTarget(rsc, sc); + Context *rsc = RsdCpuReference::getTlsContext(); + rsrClearFrameBufferObjectDepthTarget(rsc); } static void SC_ClearFrameBufferObjectTargets(Context *, Script *) { - GET_TLS(); - rsrClearFrameBufferObjectTargets(rsc, sc); + Context *rsc = RsdCpuReference::getTlsContext(); + rsrClearFrameBufferObjectTargets(rsc); } @@ -199,28 +193,28 @@ static void SC_ClearFrameBufferObjectTargets(Context *, Script *) { ////////////////////////////////////////////////////////////////////////////// static void SC_VpLoadProjectionMatrix(const rsc_Matrix *m) { - GET_TLS(); - rsrVpLoadProjectionMatrix(rsc, sc, m); + Context *rsc = RsdCpuReference::getTlsContext(); + rsrVpLoadProjectionMatrix(rsc, m); } static void SC_VpLoadModelMatrix(const rsc_Matrix *m) { - GET_TLS(); - rsrVpLoadModelMatrix(rsc, sc, m); + Context *rsc = RsdCpuReference::getTlsContext(); + rsrVpLoadModelMatrix(rsc, m); } static void SC_VpLoadTextureMatrix(const rsc_Matrix *m) { - GET_TLS(); - rsrVpLoadTextureMatrix(rsc, sc, m); + Context *rsc = RsdCpuReference::getTlsContext(); + rsrVpLoadTextureMatrix(rsc, m); } static void SC_PfConstantColor(ProgramFragment *pf, float r, float g, float b, float a) { - GET_TLS(); - rsrPfConstantColor(rsc, sc, pf, r, g, b, a); + Context *rsc = RsdCpuReference::getTlsContext(); + rsrPfConstantColor(rsc, pf, r, g, b, a); } static void SC_VpGetProjectionMatrix(rsc_Matrix *m) { - GET_TLS(); - rsrVpGetProjectionMatrix(rsc, sc, m); + Context *rsc = RsdCpuReference::getTlsContext(); + rsrVpGetProjectionMatrix(rsc, m); } @@ -232,7 +226,7 @@ static void SC_DrawQuadTexCoords(float x1, float y1, float z1, float u1, float v float x2, float y2, float z2, float u2, float v2, float x3, float y3, float z3, float u3, float v3, float x4, float y4, float z4, float u4, float v4) { - GET_TLS(); + Context *rsc = RsdCpuReference::getTlsContext(); if (!rsc->setupCheck()) { return; @@ -266,7 +260,6 @@ static void SC_DrawQuad(float x1, float y1, float z1, float x2, float y2, float z2, float x3, float y3, float z3, float x4, float y4, float z4) { - GET_TLS(); SC_DrawQuadTexCoords(x1, y1, z1, 0, 1, x2, y2, z2, 1, 1, x3, y3, z3, 1, 0, @@ -274,7 +267,7 @@ static void SC_DrawQuad(float x1, float y1, float z1, } static void SC_DrawSpriteScreenspace(float x, float y, float z, float w, float h) { - GET_TLS(); + Context *rsc = RsdCpuReference::getTlsContext(); ObjectBaseRef<const ProgramVertex> tmp(rsc->getProgramVertex()); rsc->setProgramVertex(rsc->getDefaultProgramVertex()); @@ -292,38 +285,34 @@ static void SC_DrawSpriteScreenspace(float x, float y, float z, float w, float h } static void SC_DrawRect(float x1, float y1, float x2, float y2, float z) { - GET_TLS(); - SC_DrawQuad(x1, y2, z, x2, y2, z, x2, y1, z, x1, y1, z); - } static void SC_DrawPath(Path *p) { - GET_TLS(); - //rsrDrawPath(rsc, sc, p); + Context *rsc = RsdCpuReference::getTlsContext(); rsdPathDraw(rsc, p); } static void SC_DrawMesh(Mesh *m) { - GET_TLS(); - rsrDrawMesh(rsc, sc, m); + Context *rsc = RsdCpuReference::getTlsContext(); + rsrDrawMesh(rsc, m); } static void SC_DrawMeshPrimitive(Mesh *m, uint32_t primIndex) { - GET_TLS(); - rsrDrawMeshPrimitive(rsc, sc, m, primIndex); + Context *rsc = RsdCpuReference::getTlsContext(); + rsrDrawMeshPrimitive(rsc, m, primIndex); } static void SC_DrawMeshPrimitiveRange(Mesh *m, uint32_t primIndex, uint32_t start, uint32_t len) { - GET_TLS(); - rsrDrawMeshPrimitiveRange(rsc, sc, m, primIndex, start, len); + Context *rsc = RsdCpuReference::getTlsContext(); + rsrDrawMeshPrimitiveRange(rsc, m, primIndex, start, len); } static void SC_MeshComputeBoundingBox(Mesh *m, float *minX, float *minY, float *minZ, float *maxX, float *maxY, float *maxZ) { - GET_TLS(); - rsrMeshComputeBoundingBox(rsc, sc, m, minX, minY, minZ, maxX, maxY, maxZ); + Context *rsc = RsdCpuReference::getTlsContext(); + rsrMeshComputeBoundingBox(rsc, m, minX, minY, minZ, maxX, maxY, maxZ); } @@ -334,67 +323,67 @@ static void SC_MeshComputeBoundingBox(Mesh *m, static void SC_Color(float r, float g, float b, float a) { - GET_TLS(); - rsrColor(rsc, sc, r, g, b, a); + Context *rsc = RsdCpuReference::getTlsContext(); + rsrColor(rsc, r, g, b, a); } static void SC_Finish() { - GET_TLS(); + Context *rsc = RsdCpuReference::getTlsContext(); rsdGLFinish(rsc); } static void SC_ClearColor(float r, float g, float b, float a) { - GET_TLS(); - rsrPrepareClear(rsc, sc); + Context *rsc = RsdCpuReference::getTlsContext(); + rsrPrepareClear(rsc); rsdGLClearColor(rsc, r, g, b, a); } static void SC_ClearDepth(float v) { - GET_TLS(); - rsrPrepareClear(rsc, sc); + Context *rsc = RsdCpuReference::getTlsContext(); + rsrPrepareClear(rsc); rsdGLClearDepth(rsc, v); } static uint32_t SC_GetWidth() { - GET_TLS(); - return rsrGetWidth(rsc, sc); + Context *rsc = RsdCpuReference::getTlsContext(); + return rsrGetWidth(rsc); } static uint32_t SC_GetHeight() { - GET_TLS(); - return rsrGetHeight(rsc, sc); + Context *rsc = RsdCpuReference::getTlsContext(); + return rsrGetHeight(rsc); } static void SC_DrawTextAlloc(Allocation *a, int x, int y) { - GET_TLS(); - rsrDrawTextAlloc(rsc, sc, a, x, y); + Context *rsc = RsdCpuReference::getTlsContext(); + rsrDrawTextAlloc(rsc, a, x, y); } static void SC_DrawText(const char *text, int x, int y) { - GET_TLS(); - rsrDrawText(rsc, sc, text, x, y); + Context *rsc = RsdCpuReference::getTlsContext(); + rsrDrawText(rsc, text, x, y); } static void SC_MeasureTextAlloc(Allocation *a, int32_t *left, int32_t *right, int32_t *top, int32_t *bottom) { - GET_TLS(); - rsrMeasureTextAlloc(rsc, sc, a, left, right, top, bottom); + Context *rsc = RsdCpuReference::getTlsContext(); + rsrMeasureTextAlloc(rsc, a, left, right, top, bottom); } static void SC_MeasureText(const char *text, int32_t *left, int32_t *right, int32_t *top, int32_t *bottom) { - GET_TLS(); - rsrMeasureText(rsc, sc, text, left, right, top, bottom); + Context *rsc = RsdCpuReference::getTlsContext(); + rsrMeasureText(rsc, text, left, right, top, bottom); } static void SC_BindFont(Font *f) { - GET_TLS(); - rsrBindFont(rsc, sc, f); + Context *rsc = RsdCpuReference::getTlsContext(); + rsrBindFont(rsc, f); } static void SC_FontColor(float r, float g, float b, float a) { - GET_TLS(); - rsrFontColor(rsc, sc, r, g, b, a); + Context *rsc = RsdCpuReference::getTlsContext(); + rsrFontColor(rsc, r, g, b, a); } @@ -404,41 +393,42 @@ static void SC_FontColor(float r, float g, float b, float a) { ////////////////////////////////////////////////////////////////////////////// static void SC_SetObject(ObjectBase **dst, ObjectBase * src) { - GET_TLS(); - rsrSetObject(rsc, sc, dst, src); + Context *rsc = RsdCpuReference::getTlsContext(); + rsrSetObject(rsc, dst, src); } static void SC_ClearObject(ObjectBase **dst) { - GET_TLS(); - rsrClearObject(rsc, sc, dst); + Context *rsc = RsdCpuReference::getTlsContext(); + rsrClearObject(rsc, dst); } static bool SC_IsObject(const ObjectBase *src) { - GET_TLS(); - return rsrIsObject(rsc, sc, src); + Context *rsc = RsdCpuReference::getTlsContext(); + return rsrIsObject(rsc, src); } static const Allocation * SC_GetAllocation(const void *ptr) { - GET_TLS(); + Context *rsc = RsdCpuReference::getTlsContext(); + const Script *sc = RsdCpuReference::getTlsScript(); return rsdScriptGetAllocationForPointer(rsc, sc, ptr); } static void SC_ForEach_SAA(Script *target, Allocation *in, Allocation *out) { - GET_TLS(); - rsrForEach(rsc, sc, target, in, out, NULL, 0, NULL); + Context *rsc = RsdCpuReference::getTlsContext(); + rsrForEach(rsc, target, in, out, NULL, 0, NULL); } static void SC_ForEach_SAAU(Script *target, Allocation *in, Allocation *out, const void *usr) { - GET_TLS(); - rsrForEach(rsc, sc, target, in, out, usr, 0, NULL); + Context *rsc = RsdCpuReference::getTlsContext(); + rsrForEach(rsc, target, in, out, usr, 0, NULL); } static void SC_ForEach_SAAUS(Script *target, @@ -446,8 +436,8 @@ static void SC_ForEach_SAAUS(Script *target, Allocation *out, const void *usr, const RsScriptCall *call) { - GET_TLS(); - rsrForEach(rsc, sc, target, in, out, usr, 0, call); + Context *rsc = RsdCpuReference::getTlsContext(); + rsrForEach(rsc, target, in, out, usr, 0, call); } static void SC_ForEach_SAAUL(Script *target, @@ -455,8 +445,8 @@ static void SC_ForEach_SAAUL(Script *target, Allocation *out, const void *usr, uint32_t usrLen) { - GET_TLS(); - rsrForEach(rsc, sc, target, in, out, usr, usrLen, NULL); + Context *rsc = RsdCpuReference::getTlsContext(); + rsrForEach(rsc, target, in, out, usr, usrLen, NULL); } static void SC_ForEach_SAAULS(Script *target, @@ -465,8 +455,8 @@ static void SC_ForEach_SAAULS(Script *target, const void *usr, uint32_t usrLen, const RsScriptCall *call) { - GET_TLS(); - rsrForEach(rsc, sc, target, in, out, usr, usrLen, call); + Context *rsc = RsdCpuReference::getTlsContext(); + rsrForEach(rsc, target, in, out, usr, usrLen, call); } @@ -476,28 +466,29 @@ static void SC_ForEach_SAAULS(Script *target, ////////////////////////////////////////////////////////////////////////////// static float SC_GetDt() { - GET_TLS(); + Context *rsc = RsdCpuReference::getTlsContext(); + const Script *sc = RsdCpuReference::getTlsScript(); return rsrGetDt(rsc, sc); } time_t SC_Time(time_t *timer) { - GET_TLS(); - return rsrTime(rsc, sc, timer); + Context *rsc = RsdCpuReference::getTlsContext(); + return rsrTime(rsc, timer); } tm* SC_LocalTime(tm *local, time_t *timer) { - GET_TLS(); - return rsrLocalTime(rsc, sc, local, timer); + Context *rsc = RsdCpuReference::getTlsContext(); + return rsrLocalTime(rsc, local, timer); } int64_t SC_UptimeMillis() { - GET_TLS(); - return rsrUptimeMillis(rsc, sc); + Context *rsc = RsdCpuReference::getTlsContext(); + return rsrUptimeMillis(rsc); } int64_t SC_UptimeNanos() { - GET_TLS(); - return rsrUptimeNanos(rsc, sc); + Context *rsc = RsdCpuReference::getTlsContext(); + return rsrUptimeNanos(rsc); } ////////////////////////////////////////////////////////////////////////////// @@ -505,179 +496,25 @@ int64_t SC_UptimeNanos() { ////////////////////////////////////////////////////////////////////////////// static uint32_t SC_ToClient2(int cmdID, void *data, int len) { - GET_TLS(); - return rsrToClient(rsc, sc, cmdID, data, len); + Context *rsc = RsdCpuReference::getTlsContext(); + return rsrToClient(rsc, cmdID, data, len); } static uint32_t SC_ToClient(int cmdID) { - GET_TLS(); - return rsrToClient(rsc, sc, cmdID, NULL, 0); + Context *rsc = RsdCpuReference::getTlsContext(); + return rsrToClient(rsc, cmdID, NULL, 0); } static uint32_t SC_ToClientBlocking2(int cmdID, void *data, int len) { - GET_TLS(); - return rsrToClientBlocking(rsc, sc, cmdID, data, len); + Context *rsc = RsdCpuReference::getTlsContext(); + return rsrToClientBlocking(rsc, cmdID, data, len); } static uint32_t SC_ToClientBlocking(int cmdID) { - GET_TLS(); - return rsrToClientBlocking(rsc, sc, cmdID, NULL, 0); -} - -int SC_divsi3(int a, int b) { - return a / b; -} - -int SC_modsi3(int a, int b) { - return a % b; + Context *rsc = RsdCpuReference::getTlsContext(); + return rsrToClientBlocking(rsc, cmdID, NULL, 0); } -unsigned int SC_udivsi3(unsigned int a, unsigned int b) { - return a / b; -} - -unsigned int SC_umodsi3(unsigned int a, unsigned int b) { - return a % b; -} - -static void SC_debugF(const char *s, float f) { - ALOGD("%s %f, 0x%08x", s, f, *((int *) (&f))); -} -static void SC_debugFv2(const char *s, float f1, float f2) { - ALOGD("%s {%f, %f}", s, f1, f2); -} -static void SC_debugFv3(const char *s, float f1, float f2, float f3) { - ALOGD("%s {%f, %f, %f}", s, f1, f2, f3); -} -static void SC_debugFv4(const char *s, float f1, float f2, float f3, float f4) { - ALOGD("%s {%f, %f, %f, %f}", s, f1, f2, f3, f4); -} -static void SC_debugF2(const char *s, float2 f) { - ALOGD("%s {%f, %f}", s, f.x, f.y); -} -static void SC_debugF3(const char *s, float3 f) { - ALOGD("%s {%f, %f, %f}", s, f.x, f.y, f.z); -} -static void SC_debugF4(const char *s, float4 f) { - ALOGD("%s {%f, %f, %f, %f}", s, f.x, f.y, f.z, f.w); -} -static void SC_debugD(const char *s, double d) { - ALOGD("%s %f, 0x%08llx", s, d, *((long long *) (&d))); -} -static void SC_debugFM4v4(const char *s, const float *f) { - ALOGD("%s {%f, %f, %f, %f", s, f[0], f[4], f[8], f[12]); - ALOGD("%s %f, %f, %f, %f", s, f[1], f[5], f[9], f[13]); - ALOGD("%s %f, %f, %f, %f", s, f[2], f[6], f[10], f[14]); - ALOGD("%s %f, %f, %f, %f}", s, f[3], f[7], f[11], f[15]); -} -static void SC_debugFM3v3(const char *s, const float *f) { - ALOGD("%s {%f, %f, %f", s, f[0], f[3], f[6]); - ALOGD("%s %f, %f, %f", s, f[1], f[4], f[7]); - ALOGD("%s %f, %f, %f}",s, f[2], f[5], f[8]); -} -static void SC_debugFM2v2(const char *s, const float *f) { - ALOGD("%s {%f, %f", s, f[0], f[2]); - ALOGD("%s %f, %f}",s, f[1], f[3]); -} -static void SC_debugI8(const char *s, char c) { - ALOGD("%s %hhd 0x%hhx", s, c, (unsigned char)c); -} -static void SC_debugC2(const char *s, char2 c) { - ALOGD("%s {%hhd, %hhd} 0x%hhx 0x%hhx", s, c.x, c.y, (unsigned char)c.x, (unsigned char)c.y); -} -static void SC_debugC3(const char *s, char3 c) { - ALOGD("%s {%hhd, %hhd, %hhd} 0x%hhx 0x%hhx 0x%hhx", s, c.x, c.y, c.z, (unsigned char)c.x, (unsigned char)c.y, (unsigned char)c.z); -} -static void SC_debugC4(const char *s, char4 c) { - ALOGD("%s {%hhd, %hhd, %hhd, %hhd} 0x%hhx 0x%hhx 0x%hhx 0x%hhx", s, c.x, c.y, c.z, c.w, (unsigned char)c.x, (unsigned char)c.y, (unsigned char)c.z, (unsigned char)c.w); -} -static void SC_debugU8(const char *s, unsigned char c) { - ALOGD("%s %hhu 0x%hhx", s, c, c); -} -static void SC_debugUC2(const char *s, uchar2 c) { - ALOGD("%s {%hhu, %hhu} 0x%hhx 0x%hhx", s, c.x, c.y, c.x, c.y); -} -static void SC_debugUC3(const char *s, uchar3 c) { - ALOGD("%s {%hhu, %hhu, %hhu} 0x%hhx 0x%hhx 0x%hhx", s, c.x, c.y, c.z, c.x, c.y, c.z); -} -static void SC_debugUC4(const char *s, uchar4 c) { - ALOGD("%s {%hhu, %hhu, %hhu, %hhu} 0x%hhx 0x%hhx 0x%hhx 0x%hhx", s, c.x, c.y, c.z, c.w, c.x, c.y, c.z, c.w); -} -static void SC_debugI16(const char *s, short c) { - ALOGD("%s %hd 0x%hx", s, c, c); -} -static void SC_debugS2(const char *s, short2 c) { - ALOGD("%s {%hd, %hd} 0x%hx 0x%hx", s, c.x, c.y, c.x, c.y); -} -static void SC_debugS3(const char *s, short3 c) { - ALOGD("%s {%hd, %hd, %hd} 0x%hx 0x%hx 0x%hx", s, c.x, c.y, c.z, c.x, c.y, c.z); -} -static void SC_debugS4(const char *s, short4 c) { - ALOGD("%s {%hd, %hd, %hd, %hd} 0x%hx 0x%hx 0x%hx 0x%hx", s, c.x, c.y, c.z, c.w, c.x, c.y, c.z, c.w); -} -static void SC_debugU16(const char *s, unsigned short c) { - ALOGD("%s %hu 0x%hx", s, c, c); -} -static void SC_debugUS2(const char *s, ushort2 c) { - ALOGD("%s {%hu, %hu} 0x%hx 0x%hx", s, c.x, c.y, c.x, c.y); -} -static void SC_debugUS3(const char *s, ushort3 c) { - ALOGD("%s {%hu, %hu, %hu} 0x%hx 0x%hx 0x%hx", s, c.x, c.y, c.z, c.x, c.y, c.z); -} -static void SC_debugUS4(const char *s, ushort4 c) { - ALOGD("%s {%hu, %hu, %hu, %hu} 0x%hx 0x%hx 0x%hx 0x%hx", s, c.x, c.y, c.z, c.w, c.x, c.y, c.z, c.w); -} -static void SC_debugI32(const char *s, int32_t i) { - ALOGD("%s %d 0x%x", s, i, i); -} -static void SC_debugI2(const char *s, int2 i) { - ALOGD("%s {%d, %d} 0x%x 0x%x", s, i.x, i.y, i.x, i.y); -} -static void SC_debugI3(const char *s, int3 i) { - ALOGD("%s {%d, %d, %d} 0x%x 0x%x 0x%x", s, i.x, i.y, i.z, i.x, i.y, i.z); -} -static void SC_debugI4(const char *s, int4 i) { - ALOGD("%s {%d, %d, %d, %d} 0x%x 0x%x 0x%x 0x%x", s, i.x, i.y, i.z, i.w, i.x, i.y, i.z, i.w); -} -static void SC_debugU32(const char *s, uint32_t i) { - ALOGD("%s %u 0x%x", s, i, i); -} -static void SC_debugUI2(const char *s, uint2 i) { - ALOGD("%s {%u, %u} 0x%x 0x%x", s, i.x, i.y, i.x, i.y); -} -static void SC_debugUI3(const char *s, uint3 i) { - ALOGD("%s {%u, %u, %u} 0x%x 0x%x 0x%x", s, i.x, i.y, i.z, i.x, i.y, i.z); -} -static void SC_debugUI4(const char *s, uint4 i) { - ALOGD("%s {%u, %u, %u, %u} 0x%x 0x%x 0x%x 0x%x", s, i.x, i.y, i.z, i.w, i.x, i.y, i.z, i.w); -} -static void SC_debugLL64(const char *s, long long ll) { - ALOGD("%s %lld 0x%llx", s, ll, ll); -} -static void SC_debugL2(const char *s, long2 ll) { - ALOGD("%s {%lld, %lld} 0x%llx 0x%llx", s, ll.x, ll.y, ll.x, ll.y); -} -static void SC_debugL3(const char *s, long3 ll) { - ALOGD("%s {%lld, %lld, %lld} 0x%llx 0x%llx 0x%llx", s, ll.x, ll.y, ll.z, ll.x, ll.y, ll.z); -} -static void SC_debugL4(const char *s, long4 ll) { - ALOGD("%s {%lld, %lld, %lld, %lld} 0x%llx 0x%llx 0x%llx 0x%llx", s, ll.x, ll.y, ll.z, ll.w, ll.x, ll.y, ll.z, ll.w); -} -static void SC_debugULL64(const char *s, unsigned long long ll) { - ALOGD("%s %llu 0x%llx", s, ll, ll); -} -static void SC_debugUL2(const char *s, ulong2 ll) { - ALOGD("%s {%llu, %llu} 0x%llx 0x%llx", s, ll.x, ll.y, ll.x, ll.y); -} -static void SC_debugUL3(const char *s, ulong3 ll) { - ALOGD("%s {%llu, %llu, %llu} 0x%llx 0x%llx 0x%llx", s, ll.x, ll.y, ll.z, ll.x, ll.y, ll.z); -} -static void SC_debugUL4(const char *s, ulong4 ll) { - ALOGD("%s {%llu, %llu, %llu, %llu} 0x%llx 0x%llx 0x%llx 0x%llx", s, ll.x, ll.y, ll.z, ll.w, ll.x, ll.y, ll.z, ll.w); -} -static void SC_debugP(const char *s, const void *p) { - ALOGD("%s %p", s, p); -} ////////////////////////////////////////////////////////////////////////////// @@ -701,10 +538,7 @@ static void SC_debugP(const char *s, const void *p) { // ::= f # float // ::= d # double -static RsdSymbolTable gSyms[] = { - { "memset", (void *)&memset, true }, - { "memcpy", (void *)&memcpy, true }, - +static RsdCpuReference::CpuSymbol gSyms[] = { // Refcounting { "_Z11rsSetObjectP10rs_elementS_", (void *)&SC_SetObject, true }, { "_Z13rsClearObjectP10rs_element", (void *)&SC_ClearObject, true }, @@ -839,86 +673,24 @@ static RsdSymbolTable gSyms[] = { { "_Z5colorffff", (void *)&SC_Color, false }, { "_Z9rsgFinishv", (void *)&SC_Finish, false }, - // Debug - { "_Z7rsDebugPKcf", (void *)&SC_debugF, true }, - { "_Z7rsDebugPKcff", (void *)&SC_debugFv2, true }, - { "_Z7rsDebugPKcfff", (void *)&SC_debugFv3, true }, - { "_Z7rsDebugPKcffff", (void *)&SC_debugFv4, true }, - { "_Z7rsDebugPKcDv2_f", (void *)&SC_debugF2, true }, - { "_Z7rsDebugPKcDv3_f", (void *)&SC_debugF3, true }, - { "_Z7rsDebugPKcDv4_f", (void *)&SC_debugF4, true }, - { "_Z7rsDebugPKcd", (void *)&SC_debugD, true }, - { "_Z7rsDebugPKcPK12rs_matrix4x4", (void *)&SC_debugFM4v4, true }, - { "_Z7rsDebugPKcPK12rs_matrix3x3", (void *)&SC_debugFM3v3, true }, - { "_Z7rsDebugPKcPK12rs_matrix2x2", (void *)&SC_debugFM2v2, true }, - { "_Z7rsDebugPKcc", (void *)&SC_debugI8, true }, - { "_Z7rsDebugPKcDv2_c", (void *)&SC_debugC2, true }, - { "_Z7rsDebugPKcDv3_c", (void *)&SC_debugC3, true }, - { "_Z7rsDebugPKcDv4_c", (void *)&SC_debugC4, true }, - { "_Z7rsDebugPKch", (void *)&SC_debugU8, true }, - { "_Z7rsDebugPKcDv2_h", (void *)&SC_debugUC2, true }, - { "_Z7rsDebugPKcDv3_h", (void *)&SC_debugUC3, true }, - { "_Z7rsDebugPKcDv4_h", (void *)&SC_debugUC4, true }, - { "_Z7rsDebugPKcs", (void *)&SC_debugI16, true }, - { "_Z7rsDebugPKcDv2_s", (void *)&SC_debugS2, true }, - { "_Z7rsDebugPKcDv3_s", (void *)&SC_debugS3, true }, - { "_Z7rsDebugPKcDv4_s", (void *)&SC_debugS4, true }, - { "_Z7rsDebugPKct", (void *)&SC_debugU16, true }, - { "_Z7rsDebugPKcDv2_t", (void *)&SC_debugUS2, true }, - { "_Z7rsDebugPKcDv3_t", (void *)&SC_debugUS3, true }, - { "_Z7rsDebugPKcDv4_t", (void *)&SC_debugUS4, true }, - { "_Z7rsDebugPKci", (void *)&SC_debugI32, true }, - { "_Z7rsDebugPKcDv2_i", (void *)&SC_debugI2, true }, - { "_Z7rsDebugPKcDv3_i", (void *)&SC_debugI3, true }, - { "_Z7rsDebugPKcDv4_i", (void *)&SC_debugI4, true }, - { "_Z7rsDebugPKcj", (void *)&SC_debugU32, true }, - { "_Z7rsDebugPKcDv2_j", (void *)&SC_debugUI2, true }, - { "_Z7rsDebugPKcDv3_j", (void *)&SC_debugUI3, true }, - { "_Z7rsDebugPKcDv4_j", (void *)&SC_debugUI4, true }, - // Both "long" and "unsigned long" need to be redirected to their - // 64-bit counterparts, since we have hacked Slang to use 64-bit - // for "long" on Arm (to be similar to Java). - { "_Z7rsDebugPKcl", (void *)&SC_debugLL64, true }, - { "_Z7rsDebugPKcDv2_l", (void *)&SC_debugL2, true }, - { "_Z7rsDebugPKcDv3_l", (void *)&SC_debugL3, true }, - { "_Z7rsDebugPKcDv4_l", (void *)&SC_debugL4, true }, - { "_Z7rsDebugPKcm", (void *)&SC_debugULL64, true }, - { "_Z7rsDebugPKcDv2_m", (void *)&SC_debugUL2, true }, - { "_Z7rsDebugPKcDv3_m", (void *)&SC_debugUL3, true }, - { "_Z7rsDebugPKcDv4_m", (void *)&SC_debugUL4, true }, - { "_Z7rsDebugPKcx", (void *)&SC_debugLL64, true }, - { "_Z7rsDebugPKcDv2_x", (void *)&SC_debugL2, true }, - { "_Z7rsDebugPKcDv3_x", (void *)&SC_debugL3, true }, - { "_Z7rsDebugPKcDv4_x", (void *)&SC_debugL4, true }, - { "_Z7rsDebugPKcy", (void *)&SC_debugULL64, true }, - { "_Z7rsDebugPKcDv2_y", (void *)&SC_debugUL2, true }, - { "_Z7rsDebugPKcDv3_y", (void *)&SC_debugUL3, true }, - { "_Z7rsDebugPKcDv4_y", (void *)&SC_debugUL4, true }, - { "_Z7rsDebugPKcPKv", (void *)&SC_debugP, true }, - { NULL, NULL, false } }; -void* rsdLookupRuntimeStub(void* pContext, char const* name) { +extern const RsdCpuReference::CpuSymbol * rsdLookupRuntimeStub(Context * pContext, char const* name) { ScriptC *s = (ScriptC *)pContext; - RsdSymbolTable *syms = gSyms; - const RsdSymbolTable *sym = rsdLookupSymbolMath(name); + const RsdCpuReference::CpuSymbol *syms = gSyms; + const RsdCpuReference::CpuSymbol *sym = NULL; if (!sym) { - while (syms->mPtr) { - if (!strcmp(syms->mName, name)) { - sym = syms; + while (syms->fnPtr) { + if (!strcmp(syms->name, name)) { + return syms; } syms++; } } - if (sym) { - s->mHal.info.isThreadable &= sym->threadable; - return sym->mPtr; - } - ALOGE("ScriptC sym lookup failed for %s", name); return NULL; } diff --git a/driver/rsdScriptGroup.cpp b/driver/rsdScriptGroup.cpp index f4f0f1c1..ef802a24 100644 --- a/driver/rsdScriptGroup.cpp +++ b/driver/rsdScriptGroup.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2011 The Android Open Source Project + * Copyright (C) 2011-2012 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,11 +15,8 @@ */ #include "rsdCore.h" +#include "../cpu_ref/rsd_cpu.h" -#include <bcc/BCCContext.h> -#include <bcc/Renderscript/RSCompilerDriver.h> -#include <bcc/Renderscript/RSExecutable.h> -#include <bcc/Renderscript/RSInfo.h> #include "rsScript.h" #include "rsScriptGroup.h" @@ -31,236 +28,29 @@ using namespace android; using namespace android::renderscript; -bool rsdScriptGroupInit(const android::renderscript::Context *rsc, - const android::renderscript::ScriptGroup *sg) { - return true; -} +bool rsdScriptGroupInit(const Context *rsc, ScriptGroup *sg) { + RsdHal *dc = (RsdHal *)rsc->mHal.drv; -void rsdScriptGroupSetInput(const android::renderscript::Context *rsc, - const android::renderscript::ScriptGroup *sg, - const android::renderscript::ScriptKernelID *kid, - android::renderscript::Allocation *) { + sg->mHal.drv = dc->mCpuRef->createScriptGroup(sg); + return sg->mHal.drv != NULL; } -void rsdScriptGroupSetOutput(const android::renderscript::Context *rsc, - const android::renderscript::ScriptGroup *sg, - const android::renderscript::ScriptKernelID *kid, - android::renderscript::Allocation *) { +void rsdScriptGroupSetInput(const Context *rsc, const ScriptGroup *sg, + const ScriptKernelID *kid, Allocation *) { } -struct ScriptList { - size_t count; - Allocation *const* ins; - bool const* inExts; - Allocation *const* outs; - bool const* outExts; - const void *const* usrPtrs; - size_t const *usrSizes; - uint32_t const *sigs; - const void *const* fnPtrs; - - const ScriptKernelID *const* kernels; -}; - -typedef void (*ScriptGroupRootFunc_t)(const RsForEachStubParamStruct *p, - uint32_t xstart, uint32_t xend, - uint32_t instep, uint32_t outstep); - -static void ScriptGroupRoot(const RsForEachStubParamStruct *p, - uint32_t xstart, uint32_t xend, - uint32_t instep, uint32_t outstep) { - - const ScriptList *sl = (const ScriptList *)p->usr; - RsForEachStubParamStruct *mp = (RsForEachStubParamStruct *)p; - const void *oldUsr = p->usr; - - for(size_t ct=0; ct < sl->count; ct++) { - ScriptGroupRootFunc_t func; - func = (ScriptGroupRootFunc_t)sl->fnPtrs[ct]; - mp->usr = sl->usrPtrs[ct]; - - mp->ptrIn = NULL; - mp->in = NULL; - mp->ptrOut = NULL; - mp->out = NULL; - - if (sl->ins[ct]) { - DrvAllocation *drv = (DrvAllocation *)sl->ins[ct]->mHal.drv; - mp->ptrIn = (const uint8_t *)drv->lod[0].mallocPtr; - mp->in = mp->ptrIn; - if (sl->inExts[ct]) { - mp->in = mp->ptrIn + drv->lod[0].stride * p->y; - } else { - if (drv->lod[0].dimY > p->lid) { - mp->in = mp->ptrIn + drv->lod[0].stride * p->lid; - } - } - } - - if (sl->outs[ct]) { - DrvAllocation *drv = (DrvAllocation *)sl->outs[ct]->mHal.drv; - mp->ptrOut = (uint8_t *)drv->lod[0].mallocPtr; - mp->out = mp->ptrOut; - if (sl->outExts[ct]) { - mp->out = mp->ptrOut + drv->lod[0].stride * p->y; - } else { - if (drv->lod[0].dimY > p->lid) { - mp->out = mp->ptrOut + drv->lod[0].stride * p->lid; - } - } - } - - //ALOGE("kernel %i %p,%p %p,%p", ct, mp->ptrIn, mp->in, mp->ptrOut, mp->out); - func(p, xstart, xend, instep, outstep); - } - //ALOGE("script group root"); - - //ConvolveParams *cp = (ConvolveParams *)p->usr; - - mp->usr = oldUsr; +void rsdScriptGroupSetOutput(const Context *rsc, const ScriptGroup *sg, + const ScriptKernelID *kid, Allocation *) { } - -void rsdScriptGroupExecute(const android::renderscript::Context *rsc, - const android::renderscript::ScriptGroup *sg) { - - Vector<Allocation *> ins; - Vector<bool> inExts; - Vector<Allocation *> outs; - Vector<bool> outExts; - Vector<const ScriptKernelID *> kernels; - bool fieldDep = false; - - for (size_t ct=0; ct < sg->mNodes.size(); ct++) { - ScriptGroup::Node *n = sg->mNodes[ct]; - Script *s = n->mKernels[0]->mScript; - - //ALOGE("node %i, order %i, in %i out %i", (int)ct, n->mOrder, (int)n->mInputs.size(), (int)n->mOutputs.size()); - - for (size_t ct2=0; ct2 < n->mInputs.size(); ct2++) { - if (n->mInputs[ct2]->mDstField.get() && n->mInputs[ct2]->mDstField->mScript) { - //ALOGE("field %p %zu", n->mInputs[ct2]->mDstField->mScript, n->mInputs[ct2]->mDstField->mSlot); - s->setVarObj(n->mInputs[ct2]->mDstField->mSlot, n->mInputs[ct2]->mAlloc.get()); - } - } - - for (size_t ct2=0; ct2 < n->mKernels.size(); ct2++) { - const ScriptKernelID *k = n->mKernels[ct2]; - Allocation *ain = NULL; - Allocation *aout = NULL; - bool inExt = false; - bool outExt = false; - - for (size_t ct3=0; ct3 < n->mInputs.size(); ct3++) { - if (n->mInputs[ct3]->mDstKernel.get() == k) { - ain = n->mInputs[ct3]->mAlloc.get(); - //ALOGE(" link in %p", ain); - } - } - for (size_t ct3=0; ct3 < sg->mInputs.size(); ct3++) { - if (sg->mInputs[ct3]->mKernel == k) { - ain = sg->mInputs[ct3]->mAlloc.get(); - inExt = true; - //ALOGE(" io in %p", ain); - } - } - - for (size_t ct3=0; ct3 < n->mOutputs.size(); ct3++) { - if (n->mOutputs[ct3]->mSource.get() == k) { - aout = n->mOutputs[ct3]->mAlloc.get(); - if(n->mOutputs[ct3]->mDstField.get() != NULL) { - fieldDep = true; - } - //ALOGE(" link out %p", aout); - } - } - for (size_t ct3=0; ct3 < sg->mOutputs.size(); ct3++) { - if (sg->mOutputs[ct3]->mKernel == k) { - aout = sg->mOutputs[ct3]->mAlloc.get(); - outExt = true; - //ALOGE(" io out %p", aout); - } - } - - if ((k->mHasKernelOutput == (aout != NULL)) && - (k->mHasKernelInput == (ain != NULL))) { - ins.add(ain); - inExts.add(inExt); - outs.add(aout); - outExts.add(outExt); - kernels.add(k); - } - } - - } - - RsdHal * dc = (RsdHal *)rsc->mHal.drv; - MTLaunchStruct mtls; - - if(fieldDep) { - for (size_t ct=0; ct < ins.size(); ct++) { - Script *s = kernels[ct]->mScript; - DrvScript *drv = (DrvScript *)s->mHal.drv; - uint32_t slot = kernels[ct]->mSlot; - - rsdScriptInvokeForEachMtlsSetup(rsc, ins[ct], outs[ct], NULL, 0, NULL, &mtls); - mtls.script = s; - - if (drv->mIntrinsicID) { - mtls.kernel = (void (*)())drv->mIntrinsicFuncs.root; - mtls.fep.usr = drv->mIntrinsicData; - } else { - mtls.kernel = reinterpret_cast<ForEachFunc_t>( - drv->mExecutable->getExportForeachFuncAddrs()[slot]); - rsAssert(mtls.kernel != NULL); - mtls.sig = drv->mExecutable->getInfo().getExportForeachFuncs()[slot].second; - } - - rsdScriptLaunchThreads(rsc, s->mHal.info.isThreadable, ins[ct], outs[ct], - NULL, 0, NULL, &mtls); - } - } else { - ScriptList sl; - sl.ins = ins.array(); - sl.outs = outs.array(); - sl.kernels = kernels.array(); - sl.count = kernels.size(); - - Vector<const void *> usrPtrs; - Vector<const void *> fnPtrs; - Vector<uint32_t> sigs; - for (size_t ct=0; ct < kernels.size(); ct++) { - Script *s = kernels[ct]->mScript; - DrvScript *drv = (DrvScript *)s->mHal.drv; - - if (drv->mIntrinsicID) { - fnPtrs.add((void *)drv->mIntrinsicFuncs.root); - usrPtrs.add(drv->mIntrinsicData); - sigs.add(0); - } else { - int slot = kernels[ct]->mSlot; - fnPtrs.add((void *)drv->mExecutable->getExportForeachFuncAddrs()[slot]); - usrPtrs.add(NULL); - sigs.add(drv->mExecutable->getInfo().getExportForeachFuncs()[slot].second); - } - } - sl.sigs = sigs.array(); - sl.usrPtrs = usrPtrs.array(); - sl.fnPtrs = fnPtrs.array(); - sl.inExts = inExts.array(); - sl.outExts = outExts.array(); - - rsdScriptInvokeForEachMtlsSetup(rsc, ins[0], outs[0], NULL, 0, NULL, &mtls); - mtls.script = NULL; - mtls.kernel = (void (*)())&ScriptGroupRoot; - mtls.fep.usr = &sl; - rsdScriptLaunchThreads(rsc, true, ins[0], outs[0], NULL, 0, NULL, &mtls); - } - +void rsdScriptGroupExecute(const Context *rsc, const ScriptGroup *sg) { + RsdCpuReference::CpuScriptGroup *sgi = (RsdCpuReference::CpuScriptGroup *)sg->mHal.drv; + sgi->execute(); } -void rsdScriptGroupDestroy(const android::renderscript::Context *rsc, - const android::renderscript::ScriptGroup *sg) { +void rsdScriptGroupDestroy(const Context *rsc, const ScriptGroup *sg) { + RsdCpuReference::CpuScriptGroup *sgi = (RsdCpuReference::CpuScriptGroup *)sg->mHal.drv; + delete sgi; } diff --git a/driver/rsdScriptGroup.h b/driver/rsdScriptGroup.h index a817aef4..ee8cd69b 100644 --- a/driver/rsdScriptGroup.h +++ b/driver/rsdScriptGroup.h @@ -20,7 +20,7 @@ #include <rs_hal.h> bool rsdScriptGroupInit(const android::renderscript::Context *rsc, - const android::renderscript::ScriptGroup *sg); + android::renderscript::ScriptGroup *sg); void rsdScriptGroupSetInput(const android::renderscript::Context *rsc, const android::renderscript::ScriptGroup *sg, const android::renderscript::ScriptKernelID *kid, diff --git a/driver/rsdShader.cpp b/driver/rsdShader.cpp index 36540901..03618448 100644 --- a/driver/rsdShader.cpp +++ b/driver/rsdShader.cpp @@ -346,9 +346,9 @@ void RsdShader::logUniform(const Element *field, const float *fd, uint32_t array rsAssert(0); } } - ALOGE("Element size %u data=%p", elementSize, fd); + ALOGV("Element size %u data=%p", elementSize, fd); fd += elementSize; - ALOGE("New data=%p", fd); + ALOGV("New data=%p", fd); } } @@ -524,8 +524,7 @@ void RsdShader::setupUserConstants(const Context *rsc, RsdShaderCache *sc, bool continue; } - DrvAllocation *adrv = (DrvAllocation *)alloc->mHal.drv; - const uint8_t *data = static_cast<const uint8_t *>(adrv->lod[0].mallocPtr); + const uint8_t *data = static_cast<const uint8_t *>(alloc->mHal.drvState.lod[0].mallocPtr); const Element *e = mRSProgram->mHal.state.constantTypes[ct]->getElement(); for (uint32_t field=0; field < e->mHal.state.fieldsCount; field++) { const Element *f = e->mHal.state.fields[field]; diff --git a/rsAllocation.cpp b/rsAllocation.cpp index b1247d7a..79a48089 100644 --- a/rsAllocation.cpp +++ b/rsAllocation.cpp @@ -235,7 +235,7 @@ void Allocation::dumpLOGV(const char *prefix) const { } ALOGV("%s allocation ptr=%p mUsageFlags=0x04%x, mMipmapControl=0x%04x", - prefix, mHal.drvState.mallocPtrLOD0, mHal.state.usageFlags, mHal.state.mipmapControl); + prefix, mHal.drvState.lod[0].mallocPtr, mHal.state.usageFlags, mHal.state.mipmapControl); } uint32_t Allocation::getPackedSize() const { diff --git a/rsAllocation.h b/rsAllocation.h index c6b918f1..de79cbaa 100644 --- a/rsAllocation.h +++ b/rsAllocation.h @@ -70,9 +70,18 @@ public: State state; struct DrvState { - mutable void * mallocPtrLOD0; - mutable uint32_t strideLOD0; - } drvState; + struct LodState { + void * mallocPtr; + size_t stride; + uint32_t dimX; + uint32_t dimY; + uint32_t dimZ; + } lod[android::renderscript::Allocation::MAX_LOD]; + size_t faceOffset; + uint32_t lodCount; + uint32_t faceCount; + }; + mutable DrvState drvState; }; Hal mHal; diff --git a/rsRuntime.h b/rsRuntime.h index 7a1d5e27..3a20cb03 100644 --- a/rsRuntime.h +++ b/rsRuntime.h @@ -29,40 +29,40 @@ namespace renderscript { // Context ////////////////////////////////////////////////////////////////////////////// -void rsrBindTexture(Context *, Script *, ProgramFragment *, uint32_t slot, Allocation *); -void rsrBindConstant(Context *, Script *, ProgramFragment *, uint32_t slot, Allocation *); -void rsrBindConstant(Context *, Script *, ProgramVertex*, uint32_t slot, Allocation *); -void rsrBindSampler(Context *, Script *, ProgramFragment *, uint32_t slot, Sampler *); -void rsrBindProgramStore(Context *, Script *, ProgramStore *); -void rsrBindProgramFragment(Context *, Script *, ProgramFragment *); -void rsrBindProgramVertex(Context *, Script *, ProgramVertex *); -void rsrBindProgramRaster(Context *, Script *, ProgramRaster *); -void rsrBindFrameBufferObjectColorTarget(Context *, Script *, Allocation *, uint32_t slot); -void rsrBindFrameBufferObjectDepthTarget(Context *, Script *, Allocation *); -void rsrClearFrameBufferObjectColorTarget(Context *, Script *, uint32_t slot); -void rsrClearFrameBufferObjectDepthTarget(Context *, Script *); -void rsrClearFrameBufferObjectTargets(Context *, Script *); +void rsrBindTexture(Context *, ProgramFragment *, uint32_t slot, Allocation *); +void rsrBindConstant(Context *, ProgramFragment *, uint32_t slot, Allocation *); +void rsrBindConstant(Context *, ProgramVertex*, uint32_t slot, Allocation *); +void rsrBindSampler(Context *, ProgramFragment *, uint32_t slot, Sampler *); +void rsrBindProgramStore(Context *, ProgramStore *); +void rsrBindProgramFragment(Context *, ProgramFragment *); +void rsrBindProgramVertex(Context *, ProgramVertex *); +void rsrBindProgramRaster(Context *, ProgramRaster *); +void rsrBindFrameBufferObjectColorTarget(Context *, Allocation *, uint32_t slot); +void rsrBindFrameBufferObjectDepthTarget(Context *, Allocation *); +void rsrClearFrameBufferObjectColorTarget(Context *, uint32_t slot); +void rsrClearFrameBufferObjectDepthTarget(Context *); +void rsrClearFrameBufferObjectTargets(Context *); ////////////////////////////////////////////////////////////////////////////// // VP ////////////////////////////////////////////////////////////////////////////// -void rsrVpLoadProjectionMatrix(Context *, Script *, const rsc_Matrix *m); -void rsrVpLoadModelMatrix(Context *, Script *, const rsc_Matrix *m); -void rsrVpLoadTextureMatrix(Context *, Script *, const rsc_Matrix *m); -void rsrPfConstantColor(Context *, Script *, ProgramFragment *, float r, float g, float b, float a); -void rsrVpGetProjectionMatrix(Context *, Script *, rsc_Matrix *m); +void rsrVpLoadProjectionMatrix(Context *, const rsc_Matrix *m); +void rsrVpLoadModelMatrix(Context *, const rsc_Matrix *m); +void rsrVpLoadTextureMatrix(Context *, const rsc_Matrix *m); +void rsrPfConstantColor(Context *, ProgramFragment *, float r, float g, float b, float a); +void rsrVpGetProjectionMatrix(Context *, rsc_Matrix *m); ////////////////////////////////////////////////////////////////////////////// // Drawing ////////////////////////////////////////////////////////////////////////////// -void rsrDrawPath(Context *, Script *, Path *); -void rsrDrawMesh(Context *, Script *, Mesh *); -void rsrDrawMeshPrimitive(Context *, Script *, Mesh *, uint32_t primIndex); -void rsrDrawMeshPrimitiveRange(Context *, Script *, Mesh *, +void rsrDrawPath(Context *, Path *); +void rsrDrawMesh(Context *, Mesh *); +void rsrDrawMeshPrimitive(Context *, Mesh *, uint32_t primIndex); +void rsrDrawMeshPrimitiveRange(Context *, Mesh *, uint32_t primIndex, uint32_t start, uint32_t len); -void rsrMeshComputeBoundingBox(Context *, Script *, Mesh *, +void rsrMeshComputeBoundingBox(Context *, Mesh *, float *minX, float *minY, float *minZ, float *maxX, float *maxY, float *maxZ); @@ -72,8 +72,7 @@ void rsrMeshComputeBoundingBox(Context *, Script *, Mesh *, ////////////////////////////////////////////////////////////////////////////// -void rsrColor(Context *, Script *, float r, float g, float b, float a); -void rsrAllocationSyncAll(Context *, Script *, Allocation *); +void rsrColor(Context *, float r, float g, float b, float a); void rsrAllocationCopy1DRange(Context *, Allocation *dstAlloc, uint32_t dstOff, @@ -89,44 +88,44 @@ void rsrAllocationCopy2DRange(Context *, Allocation *dstAlloc, uint32_t srcXoff, uint32_t srcYoff, uint32_t srcMip, uint32_t srcFace); -void rsrPrepareClear(Context *, Script *); -uint32_t rsrGetWidth(Context *, Script *); -uint32_t rsrGetHeight(Context *, Script *); -void rsrDrawTextAlloc(Context *, Script *, Allocation *, int x, int y); -void rsrDrawText(Context *, Script *, const char *text, int x, int y); -void rsrSetMetrics(Context *, Script *, Font::Rect *metrics, +void rsrPrepareClear(Context *); +uint32_t rsrGetWidth(Context *); +uint32_t rsrGetHeight(Context *); +void rsrDrawTextAlloc(Context *, Allocation *, int x, int y); +void rsrDrawText(Context *, const char *text, int x, int y); +void rsrSetMetrics(Context *, Font::Rect *metrics, int32_t *left, int32_t *right, int32_t *top, int32_t *bottom); -void rsrMeasureTextAlloc(Context *, Script *, Allocation *, +void rsrMeasureTextAlloc(Context *, Allocation *, int32_t *left, int32_t *right, int32_t *top, int32_t *bottom); -void rsrMeasureText(Context *, Script *, const char *text, +void rsrMeasureText(Context *, const char *text, int32_t *left, int32_t *right, int32_t *top, int32_t *bottom); -void rsrBindFont(Context *, Script *, Font *); -void rsrFontColor(Context *, Script *, float r, float g, float b, float a); +void rsrBindFont(Context *, Font *); +void rsrFontColor(Context *, float r, float g, float b, float a); ////////////////////////////////////////////////////////////////////////////// // Time routines ////////////////////////////////////////////////////////////////////////////// -float rsrGetDt(Context *, Script *); -time_t rsrTime(Context *, Script *, time_t *timer); -tm* rsrLocalTime(Context *, Script *, tm *local, time_t *timer); -int64_t rsrUptimeMillis(Context *, Script *); -int64_t rsrUptimeNanos(Context *, Script *); +float rsrGetDt(Context *, const Script *sc); +time_t rsrTime(Context *, time_t *timer); +tm* rsrLocalTime(Context *, tm *local, time_t *timer); +int64_t rsrUptimeMillis(Context *); +int64_t rsrUptimeNanos(Context *); ////////////////////////////////////////////////////////////////////////////// // Message routines ////////////////////////////////////////////////////////////////////////////// -uint32_t rsrToClient(Context *, Script *, int cmdID, void *data, int len); -uint32_t rsrToClientBlocking(Context *, Script *, int cmdID, void *data, int len); +uint32_t rsrToClient(Context *, int cmdID, void *data, int len); +uint32_t rsrToClientBlocking(Context *, int cmdID, void *data, int len); ////////////////////////////////////////////////////////////////////////////// // ////////////////////////////////////////////////////////////////////////////// -void rsrSetObject(const Context *, const Script *, ObjectBase **dst, ObjectBase * src); -void rsrClearObject(const Context *, const Script *, ObjectBase **dst); -bool rsrIsObject(const Context *, const Script *, const ObjectBase *src); +void rsrSetObject(const Context *, ObjectBase **dst, ObjectBase * src); +void rsrClearObject(const Context *, ObjectBase **dst); +bool rsrIsObject(const Context *, const ObjectBase *src); void rsrAllocationIncRefs(const Context *, const Allocation *, void *ptr, size_t elementCount, size_t startOffset); @@ -134,14 +133,10 @@ void rsrAllocationDecRefs(const Context *, const Allocation *, void *ptr, size_t elementCount, size_t startOffset); -uint32_t rsrToClient(Context *, Script *, int cmdID, void *data, int len); -uint32_t rsrToClientBlocking(Context *, Script *, int cmdID, void *data, int len); +void rsrAllocationSyncAll(Context *, Allocation *a, RsAllocationUsageType source); -void rsrAllocationMarkDirty(Context *, Script *, RsAllocation a); -void rsrAllocationSyncAll(Context *, Script *, Allocation *a, RsAllocationUsageType source); - -void rsrForEach(Context *, Script *, Script *target, +void rsrForEach(Context *, Script *target, Allocation *in, Allocation *out, const void *usr, @@ -72,7 +72,6 @@ public: char const **exportedPragmaValueList; int (* root)(); - bool isThreadable; }; DriverInfo info; }; @@ -85,7 +84,7 @@ public: struct Enviroment_t { int64_t mStartTimeMillis; - int64_t mLastDtTime; + mutable int64_t mLastDtTime; ObjectBaseRef<ProgramVertex> mVertex; ObjectBaseRef<ProgramFragment> mFragment; diff --git a/rsScriptC.h b/rsScriptC.h index 4ef2c4b8..75fb0f49 100644 --- a/rsScriptC.h +++ b/rsScriptC.h @@ -60,8 +60,7 @@ public: //protected: void setupScript(Context *); void setupGLState(Context *); - Script * setTLS(Script *); - private: +private: #ifndef ANDROID_RS_SERIALIZE bcinfo::BitcodeTranslator *BT; #endif diff --git a/rsScriptC_Lib.cpp b/rsScriptC_Lib.cpp index ac3dd125..e8c9d1d4 100644 --- a/rsScriptC_Lib.cpp +++ b/rsScriptC_Lib.cpp @@ -79,11 +79,11 @@ static float SC_cosf_fast(float x) { // Time routines ////////////////////////////////////////////////////////////////////////////// -time_t rsrTime(Context *rsc, Script *sc, time_t *timer) { +time_t rsrTime(Context *rsc, time_t *timer) { return time(timer); } -tm* rsrLocalTime(Context *rsc, Script *sc, tm *local, time_t *timer) { +tm* rsrLocalTime(Context *rsc, tm *local, time_t *timer) { if (!local) { return NULL; } @@ -97,15 +97,15 @@ tm* rsrLocalTime(Context *rsc, Script *sc, tm *local, time_t *timer) { return local; } -int64_t rsrUptimeMillis(Context *rsc, Script *sc) { +int64_t rsrUptimeMillis(Context *rsc) { return nanoseconds_to_milliseconds(systemTime(SYSTEM_TIME_MONOTONIC)); } -int64_t rsrUptimeNanos(Context *rsc, Script *sc) { +int64_t rsrUptimeNanos(Context *rsc) { return systemTime(SYSTEM_TIME_MONOTONIC); } -float rsrGetDt(Context *rsc, Script *sc) { +float rsrGetDt(Context *rsc, const Script *sc) { int64_t l = sc->mEnviroment.mLastDtTime; sc->mEnviroment.mLastDtTime = systemTime(SYSTEM_TIME_MONOTONIC); return ((float)(sc->mEnviroment.mLastDtTime - l)) / 1.0e9; @@ -115,7 +115,7 @@ float rsrGetDt(Context *rsc, Script *sc) { // ////////////////////////////////////////////////////////////////////////////// -void rsrSetObject(const Context *rsc, const Script *sc, ObjectBase **dst, ObjectBase * src) { +void rsrSetObject(const Context *rsc, ObjectBase **dst, ObjectBase * src) { //ALOGE("rsiSetObject %p,%p %p", vdst, *vdst, vsrc); if (src) { CHECK_OBJ(src); @@ -128,7 +128,7 @@ void rsrSetObject(const Context *rsc, const Script *sc, ObjectBase **dst, Object *dst = src; } -void rsrClearObject(const Context *rsc, const Script *sc, ObjectBase **dst) { +void rsrClearObject(const Context *rsc, ObjectBase **dst) { //ALOGE("rsiClearObject %p,%p", vdst, *vdst); if (dst[0]) { CHECK_OBJ(dst[0]); @@ -137,23 +137,23 @@ void rsrClearObject(const Context *rsc, const Script *sc, ObjectBase **dst) { *dst = NULL; } -bool rsrIsObject(const Context *rsc, const Script *sc, const ObjectBase *src) { +bool rsrIsObject(const Context *rsc, const ObjectBase *src) { return src != NULL; } -uint32_t rsrToClient(Context *rsc, Script *sc, int cmdID, void *data, int len) { +uint32_t rsrToClient(Context *rsc, int cmdID, void *data, int len) { //ALOGE("SC_toClient %i %i %i", cmdID, len); return rsc->sendMessageToClient(data, RS_MESSAGE_TO_CLIENT_USER, cmdID, len, false); } -uint32_t rsrToClientBlocking(Context *rsc, Script *sc, int cmdID, void *data, int len) { +uint32_t rsrToClientBlocking(Context *rsc, int cmdID, void *data, int len) { //ALOGE("SC_toClientBlocking %i %i", cmdID, len); return rsc->sendMessageToClient(data, RS_MESSAGE_TO_CLIENT_USER, cmdID, len, true); } -void rsrForEach(Context *rsc, Script *sc, +void rsrForEach(Context *rsc, Script *target, Allocation *in, Allocation *out, const void *usr, uint32_t usrBytes, @@ -161,7 +161,7 @@ void rsrForEach(Context *rsc, Script *sc, target->runForEach(rsc, /* root slot */ 0, in, out, usr, usrBytes, call); } -void rsrAllocationSyncAll(Context *rsc, Script *sc, Allocation *a, RsAllocationUsageType usage) { +void rsrAllocationSyncAll(Context *rsc, Allocation *a, RsAllocationUsageType usage) { a->syncAll(rsc, usage); } diff --git a/rsScriptC_LibGL.cpp b/rsScriptC_LibGL.cpp index 63fb53e6..279ddb2c 100644 --- a/rsScriptC_LibGL.cpp +++ b/rsScriptC_LibGL.cpp @@ -46,73 +46,73 @@ namespace renderscript { // Context ////////////////////////////////////////////////////////////////////////////// -void rsrBindTexture(Context *rsc, Script *sc, ProgramFragment *pf, uint32_t slot, Allocation *a) { +void rsrBindTexture(Context *rsc, ProgramFragment *pf, uint32_t slot, Allocation *a) { CHECK_OBJ_OR_NULL(a); CHECK_OBJ(pf); pf->bindTexture(rsc, slot, a); } -void rsrBindConstant(Context *rsc, Script *sc, ProgramFragment *pf, uint32_t slot, Allocation *a) { +void rsrBindConstant(Context *rsc, ProgramFragment *pf, uint32_t slot, Allocation *a) { CHECK_OBJ_OR_NULL(a); CHECK_OBJ(pf); pf->bindAllocation(rsc, a, slot); } -void rsrBindConstant(Context *rsc, Script *sc, ProgramVertex *pv, uint32_t slot, Allocation *a) { +void rsrBindConstant(Context *rsc, ProgramVertex *pv, uint32_t slot, Allocation *a) { CHECK_OBJ_OR_NULL(a); CHECK_OBJ(pv); pv->bindAllocation(rsc, a, slot); } -void rsrBindSampler(Context *rsc, Script *sc, ProgramFragment *pf, uint32_t slot, Sampler *s) { +void rsrBindSampler(Context *rsc, ProgramFragment *pf, uint32_t slot, Sampler *s) { CHECK_OBJ_OR_NULL(vs); CHECK_OBJ(vpf); pf->bindSampler(rsc, slot, s); } -void rsrBindProgramStore(Context *rsc, Script *sc, ProgramStore *ps) { +void rsrBindProgramStore(Context *rsc, ProgramStore *ps) { CHECK_OBJ_OR_NULL(ps); rsc->setProgramStore(ps); } -void rsrBindProgramFragment(Context *rsc, Script *sc, ProgramFragment *pf) { +void rsrBindProgramFragment(Context *rsc, ProgramFragment *pf) { CHECK_OBJ_OR_NULL(pf); rsc->setProgramFragment(pf); } -void rsrBindProgramVertex(Context *rsc, Script *sc, ProgramVertex *pv) { +void rsrBindProgramVertex(Context *rsc, ProgramVertex *pv) { CHECK_OBJ_OR_NULL(pv); rsc->setProgramVertex(pv); } -void rsrBindProgramRaster(Context *rsc, Script *sc, ProgramRaster *pr) { +void rsrBindProgramRaster(Context *rsc, ProgramRaster *pr) { CHECK_OBJ_OR_NULL(pr); rsc->setProgramRaster(pr); } -void rsrBindFrameBufferObjectColorTarget(Context *rsc, Script *sc, Allocation *a, uint32_t slot) { +void rsrBindFrameBufferObjectColorTarget(Context *rsc, Allocation *a, uint32_t slot) { CHECK_OBJ(va); rsc->mFBOCache.bindColorTarget(rsc, a, slot); rsc->mStateVertex.updateSize(rsc); } -void rsrBindFrameBufferObjectDepthTarget(Context *rsc, Script *sc, Allocation *a) { +void rsrBindFrameBufferObjectDepthTarget(Context *rsc, Allocation *a) { CHECK_OBJ(va); rsc->mFBOCache.bindDepthTarget(rsc, a); rsc->mStateVertex.updateSize(rsc); } -void rsrClearFrameBufferObjectColorTarget(Context *rsc, Script *sc, uint32_t slot) { +void rsrClearFrameBufferObjectColorTarget(Context *rsc, uint32_t slot) { rsc->mFBOCache.bindColorTarget(rsc, NULL, slot); rsc->mStateVertex.updateSize(rsc); } -void rsrClearFrameBufferObjectDepthTarget(Context *rsc, Script *sc) { +void rsrClearFrameBufferObjectDepthTarget(Context *rsc) { rsc->mFBOCache.bindDepthTarget(rsc, NULL); rsc->mStateVertex.updateSize(rsc); } -void rsrClearFrameBufferObjectTargets(Context *rsc, Script *sc) { +void rsrClearFrameBufferObjectTargets(Context *rsc) { rsc->mFBOCache.resetAll(rsc); rsc->mStateVertex.updateSize(rsc); } @@ -121,25 +121,25 @@ void rsrClearFrameBufferObjectTargets(Context *rsc, Script *sc) { // VP ////////////////////////////////////////////////////////////////////////////// -void rsrVpLoadProjectionMatrix(Context *rsc, Script *sc, const rsc_Matrix *m) { +void rsrVpLoadProjectionMatrix(Context *rsc, const rsc_Matrix *m) { rsc->getProgramVertex()->setProjectionMatrix(rsc, m); } -void rsrVpLoadModelMatrix(Context *rsc, Script *sc, const rsc_Matrix *m) { +void rsrVpLoadModelMatrix(Context *rsc, const rsc_Matrix *m) { rsc->getProgramVertex()->setModelviewMatrix(rsc, m); } -void rsrVpLoadTextureMatrix(Context *rsc, Script *sc, const rsc_Matrix *m) { +void rsrVpLoadTextureMatrix(Context *rsc, const rsc_Matrix *m) { rsc->getProgramVertex()->setTextureMatrix(rsc, m); } -void rsrPfConstantColor(Context *rsc, Script *sc, ProgramFragment *pf, +void rsrPfConstantColor(Context *rsc, ProgramFragment *pf, float r, float g, float b, float a) { CHECK_OBJ(pf); pf->setConstantColor(rsc, r, g, b, a); } -void rsrVpGetProjectionMatrix(Context *rsc, Script *sc, rsc_Matrix *m) { +void rsrVpGetProjectionMatrix(Context *rsc, rsc_Matrix *m) { rsc->getProgramVertex()->getProjectionMatrix(rsc, m); } @@ -148,7 +148,7 @@ void rsrVpGetProjectionMatrix(Context *rsc, Script *sc, rsc_Matrix *m) { ////////////////////////////////////////////////////////////////////////////// -void rsrDrawPath(Context *rsc, Script *sc, Path *sm) { +void rsrDrawPath(Context *rsc, Path *sm) { CHECK_OBJ(sm); if (!rsc->setupCheck()) { return; @@ -156,7 +156,7 @@ void rsrDrawPath(Context *rsc, Script *sc, Path *sm) { sm->render(rsc); } -void rsrDrawMesh(Context *rsc, Script *sc, Mesh *sm) { +void rsrDrawMesh(Context *rsc, Mesh *sm) { CHECK_OBJ(sm); if (!rsc->setupCheck()) { return; @@ -164,7 +164,7 @@ void rsrDrawMesh(Context *rsc, Script *sc, Mesh *sm) { sm->render(rsc); } -void rsrDrawMeshPrimitive(Context *rsc, Script *sc, Mesh *sm, uint32_t primIndex) { +void rsrDrawMeshPrimitive(Context *rsc, Mesh *sm, uint32_t primIndex) { CHECK_OBJ(sm); if (!rsc->setupCheck()) { return; @@ -172,7 +172,7 @@ void rsrDrawMeshPrimitive(Context *rsc, Script *sc, Mesh *sm, uint32_t primIndex sm->renderPrimitive(rsc, primIndex); } -void rsrDrawMeshPrimitiveRange(Context *rsc, Script *sc, Mesh *sm, uint32_t primIndex, +void rsrDrawMeshPrimitiveRange(Context *rsc, Mesh *sm, uint32_t primIndex, uint32_t start, uint32_t len) { CHECK_OBJ(sm); if (!rsc->setupCheck()) { @@ -181,7 +181,7 @@ void rsrDrawMeshPrimitiveRange(Context *rsc, Script *sc, Mesh *sm, uint32_t prim sm->renderPrimitiveRange(rsc, primIndex, start, len); } -void rsrMeshComputeBoundingBox(Context *rsc, Script *sc, Mesh *sm, +void rsrMeshComputeBoundingBox(Context *rsc, Mesh *sm, float *minX, float *minY, float *minZ, float *maxX, float *maxY, float *maxZ) { CHECK_OBJ(sm); @@ -200,32 +200,32 @@ void rsrMeshComputeBoundingBox(Context *rsc, Script *sc, Mesh *sm, ////////////////////////////////////////////////////////////////////////////// -void rsrColor(Context *rsc, Script *sc, float r, float g, float b, float a) { +void rsrColor(Context *rsc, float r, float g, float b, float a) { ProgramFragment *pf = rsc->getProgramFragment(); pf->setConstantColor(rsc, r, g, b, a); } -void rsrPrepareClear(Context *rsc, Script *sc) { +void rsrPrepareClear(Context *rsc) { rsc->mFBOCache.setup(rsc); rsc->setupProgramStore(); } -uint32_t rsrGetWidth(Context *rsc, Script *sc) { +uint32_t rsrGetWidth(Context *rsc) { return rsc->getWidth(); } -uint32_t rsrGetHeight(Context *rsc, Script *sc) { +uint32_t rsrGetHeight(Context *rsc) { return rsc->getHeight(); } -void rsrDrawTextAlloc(Context *rsc, Script *sc, Allocation *a, int x, int y) { +void rsrDrawTextAlloc(Context *rsc, Allocation *a, int x, int y) { const char *text = (const char *)rsc->mHal.funcs.allocation.lock1D(rsc, a); size_t allocSize = a->getType()->getSizeBytes(); rsc->mStateFont.renderText(text, allocSize, x, y); rsc->mHal.funcs.allocation.unlock1D(rsc, a); } -void rsrDrawText(Context *rsc, Script *sc, const char *text, int x, int y) { +void rsrDrawText(Context *rsc, const char *text, int x, int y) { size_t textLen = strlen(text); rsc->mStateFont.renderText(text, textLen, x, y); } @@ -246,7 +246,7 @@ static void SetMetrics(Font::Rect *metrics, } } -void rsrMeasureTextAlloc(Context *rsc, Script *sc, Allocation *a, +void rsrMeasureTextAlloc(Context *rsc, Allocation *a, int32_t *left, int32_t *right, int32_t *top, int32_t *bottom) { CHECK_OBJ(a); const char *text = (const char *)rsc->mHal.funcs.allocation.lock1D(rsc, a); @@ -257,7 +257,7 @@ void rsrMeasureTextAlloc(Context *rsc, Script *sc, Allocation *a, rsc->mHal.funcs.allocation.unlock1D(rsc, a); } -void rsrMeasureText(Context *rsc, Script *sc, const char *text, +void rsrMeasureText(Context *rsc, const char *text, int32_t *left, int32_t *right, int32_t *top, int32_t *bottom) { size_t textLen = strlen(text); Font::Rect metrics; @@ -265,12 +265,12 @@ void rsrMeasureText(Context *rsc, Script *sc, const char *text, SetMetrics(&metrics, left, right, top, bottom); } -void rsrBindFont(Context *rsc, Script *sc, Font *font) { +void rsrBindFont(Context *rsc, Font *font) { CHECK_OBJ(font); rsi_ContextBindFont(rsc, font); } -void rsrFontColor(Context *rsc, Script *sc, float r, float g, float b, float a) { +void rsrFontColor(Context *rsc, float r, float g, float b, float a) { rsc->mStateFont.setFontColor(r, g, b, a); } @@ -265,7 +265,7 @@ typedef struct { } framebuffer; struct { - bool (*init)(const Context *rsc, const ScriptGroup *sg); + bool (*init)(const Context *rsc, ScriptGroup *sg); void (*setInput)(const Context *rsc, const ScriptGroup *sg, const ScriptKernelID *kid, Allocation *); void (*setOutput)(const Context *rsc, const ScriptGroup *sg, |