From 75f0d3110b04346b901771f96ce15cdbe907278f Mon Sep 17 00:00:00 2001 From: Yang Ni Date: Fri, 11 Nov 2016 12:30:09 -0800 Subject: Initial driver for RSoV (RenderScript over Vulkan/SPIR-V) Bug: 30964317 Supports the following: * Allocations of 32-bit integers and floating point numbers and vectors * Single-input single-output kernels * Co-existence of RSoV scripts and CPU intrinsics Added default .clang-format for driver code using Google C++ code style. The RSoV driver is loaded, if and only if the property debug.rs.rsov is set to non-zero. Test: RSTest and CTS with debug.rs.rsov set to 0; and RSoVTest with debug.rs.rsov set to 1. Change-Id: If63370a502d499e8fc5f4bbd2e90ce84b167c331 --- rsContext.cpp | 8 +- rsContext.h | 6 +- rsDriverLoader.cpp | 15 +- rsov/compiler/Android.mk | 19 +- rsov/compiler/bcc_rsov.sh | 65 ++ rsov/driver/.clang-format | 95 ++ rsov/driver/Android.mk | 56 + rsov/driver/rsovAllocation.cpp | 790 ++++++++++++++ rsov/driver/rsovAllocation.h | 216 ++++ rsov/driver/rsovContext.cpp | 167 +++ rsov/driver/rsovContext.h | 62 ++ rsov/driver/rsovCore.cpp | 322 ++++++ rsov/driver/rsovCore.h | 40 + rsov/driver/rsovElement.cpp | 36 + rsov/driver/rsovElement.h | 33 + rsov/driver/rsovRuntimeStubs.cpp | 1150 ++++++++++++++++++++ rsov/driver/rsovSampler.cpp | 41 + rsov/driver/rsovSampler.h | 33 + rsov/driver/rsovScript.cpp | 675 ++++++++++++ rsov/driver/rsovScript.h | 208 ++++ rsov/driver/rsovScriptGroup.cpp | 57 + rsov/driver/rsovScriptGroup.h | 37 + rsov/driver/rsovType.cpp | 36 + rsov/driver/rsovType.h | 32 + rsov/tests/RSoVTest/Android.mk | 29 + rsov/tests/RSoVTest/AndroidManifest.xml | 20 + .../RSoVTest/res/drawable-nodpi/test_pattern.png | Bin 0 -> 308 bytes .../src/com/android/rs/rsov/test/RSoVTest.java | 49 + .../src/com/android/rs/rsov/test/RSoVTestCore.java | 153 +++ .../src/com/android/rs/rsov/test/UT_invert.java | 101 ++ .../src/com/android/rs/rsov/test/UT_modulo.java | 86 ++ .../src/com/android/rs/rsov/test/UnitTest.java | 148 +++ .../src/com/android/rs/rsov/test/invert.rs | 23 + .../src/com/android/rs/rsov/test/modulo.rs | 23 + .../src/com/android/rs/rsov/test/rslist.rs | 25 + .../src/com/android/rs/rsov/test/shared.rsh | 116 ++ 36 files changed, 4964 insertions(+), 8 deletions(-) create mode 100755 rsov/compiler/bcc_rsov.sh create mode 100644 rsov/driver/.clang-format create mode 100644 rsov/driver/Android.mk create mode 100644 rsov/driver/rsovAllocation.cpp create mode 100644 rsov/driver/rsovAllocation.h create mode 100644 rsov/driver/rsovContext.cpp create mode 100644 rsov/driver/rsovContext.h create mode 100644 rsov/driver/rsovCore.cpp create mode 100644 rsov/driver/rsovCore.h create mode 100644 rsov/driver/rsovElement.cpp create mode 100644 rsov/driver/rsovElement.h create mode 100644 rsov/driver/rsovRuntimeStubs.cpp create mode 100644 rsov/driver/rsovSampler.cpp create mode 100644 rsov/driver/rsovSampler.h create mode 100644 rsov/driver/rsovScript.cpp create mode 100644 rsov/driver/rsovScript.h create mode 100644 rsov/driver/rsovScriptGroup.cpp create mode 100644 rsov/driver/rsovScriptGroup.h create mode 100644 rsov/driver/rsovType.cpp create mode 100644 rsov/driver/rsovType.h create mode 100644 rsov/tests/RSoVTest/Android.mk create mode 100644 rsov/tests/RSoVTest/AndroidManifest.xml create mode 100644 rsov/tests/RSoVTest/res/drawable-nodpi/test_pattern.png create mode 100644 rsov/tests/RSoVTest/src/com/android/rs/rsov/test/RSoVTest.java create mode 100644 rsov/tests/RSoVTest/src/com/android/rs/rsov/test/RSoVTestCore.java create mode 100644 rsov/tests/RSoVTest/src/com/android/rs/rsov/test/UT_invert.java create mode 100644 rsov/tests/RSoVTest/src/com/android/rs/rsov/test/UT_modulo.java create mode 100644 rsov/tests/RSoVTest/src/com/android/rs/rsov/test/UnitTest.java create mode 100644 rsov/tests/RSoVTest/src/com/android/rs/rsov/test/invert.rs create mode 100644 rsov/tests/RSoVTest/src/com/android/rs/rsov/test/modulo.rs create mode 100644 rsov/tests/RSoVTest/src/com/android/rs/rsov/test/rslist.rs create mode 100644 rsov/tests/RSoVTest/src/com/android/rs/rsov/test/shared.rsh diff --git a/rsContext.cpp b/rsContext.cpp index a706a8bc..0c25d04d 100644 --- a/rsContext.cpp +++ b/rsContext.cpp @@ -270,6 +270,12 @@ void * Context::threadProc(void *vrsc) { rsc->mForceCpu = true; } + bool forceRSoV = getProp("debug.rs.rsov") != 0; + if (forceRSoV) { + ALOGD("Force the use of RSoV driver"); + rsc->mForceRSoV = true; + } + bool forceCpu = getProp("debug.rs.default-CPU-driver") != 0; if (forceCpu) { ALOGD("Skipping hardware driver and loading default CPU driver"); @@ -277,7 +283,7 @@ void * Context::threadProc(void *vrsc) { } rsc->mForceCpu |= rsc->mIsGraphicsContext; - rsc->loadDriver(rsc->mForceCpu); + rsc->loadDriver(rsc->mForceCpu, rsc->mForceRSoV); if (!rsc->isSynchronous()) { // Due to legacy we default to normal_graphics diff --git a/rsContext.h b/rsContext.h index fe771ec5..e36eddb7 100644 --- a/rsContext.h +++ b/rsContext.h @@ -312,6 +312,7 @@ protected: int32_t mThreadPriority; bool mIsGraphicsContext; + bool mForceRSoV; bool mForceCpu; RsContextType mContextType; @@ -349,7 +350,10 @@ private: uint32_t runRootScript(); bool loadRuntime(const char* filename); - bool loadDriver(bool forceDefault); + // Loads the driver. + // forceDefault: If true, loads the default CPU driver. + // forceRSoV: If true, overrides forceDefault and loads the RSoV driver. + bool loadDriver(bool forceDefault, bool forceRSoV); static void * threadProc(void *); static void * helperThreadProc(void *); diff --git a/rsDriverLoader.cpp b/rsDriverLoader.cpp index 16efa0d7..9e3c39da 100644 --- a/rsDriverLoader.cpp +++ b/rsDriverLoader.cpp @@ -232,16 +232,27 @@ error: -bool Context::loadDriver(bool forceDefault) { +bool Context::loadDriver(bool forceDefault, bool forceRSoV) { bool loadDefault = true; // Provide a mechanism for dropping in a different RS driver. #ifndef RS_COMPATIBILITY_LIB + + if (forceRSoV) { + // If the debug property is set to use the RSoV driver, load it and fail + // if it does not load. + if (loadRuntime("libRSDriver_RSoV.so")) { + ALOGV("Successfully loaded the RSoV driver!"); + return true; + } + ALOGE("Failed to load the RSoV driver!"); + return false; + } + #ifdef OVERRIDE_RS_DRIVER #define XSTR(S) #S #define STR(S) XSTR(S) #define OVERRIDE_RS_DRIVER_STRING STR(OVERRIDE_RS_DRIVER) - if (!forceDefault) { if (loadRuntime(OVERRIDE_RS_DRIVER_STRING)) { ALOGV("Successfully loaded runtime: %s", OVERRIDE_RS_DRIVER_STRING); diff --git a/rsov/compiler/Android.mk b/rsov/compiler/Android.mk index 7f31d67c..477f1d9d 100644 --- a/rsov/compiler/Android.mk +++ b/rsov/compiler/Android.mk @@ -59,12 +59,9 @@ LOCAL_C_INCLUDES := \ LOCAL_MODULE := rs2spirv LOCAL_MODULE_CLASS := EXECUTABLES -# TODO: handle windows and darwin - -LOCAL_MODULE_HOST_OS := linux LOCAL_IS_HOST_MODULE := true -LOCAL_SHARED_LIBRARIES_linux += libLLVM libbcinfo libSPIRV +LOCAL_SHARED_LIBRARIES += libLLVM libbcinfo libSPIRV # TODO: fix the remaining warnings @@ -131,6 +128,20 @@ include $(BUILD_EXECUTABLE) endif # Don't build in unbundled branches +#===================================================================== +# Device executable bcc_rsov +#===================================================================== + +include $(CLEAR_VARS) +include $(CLEAR_TBLGEN_VARS) + +LOCAL_MODULE:= bcc_rsov +LOCAL_MULTILIB := first +LOCAL_MODULE_CLASS := EXECUTABLES +LOCAL_SRC_FILES := bcc_rsov.sh + +include $(BUILD_PREBUILT) + #===================================================================== # Include Subdirectories #===================================================================== diff --git a/rsov/compiler/bcc_rsov.sh b/rsov/compiler/bcc_rsov.sh new file mode 100755 index 00000000..c2cb2196 --- /dev/null +++ b/rsov/compiler/bcc_rsov.sh @@ -0,0 +1,65 @@ +#! /system/bin/sh +# +# Copyright 2016, The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +function help() { + echo "USAGE: $0 [options] " + echo + echo "OPTIONS:" + echo " -h Show this help message." + echo " -o Write output to file." +} + +OUTPUT_FILE="" + +while getopts "ho:" opt; do + case "$opt" in + h) + help + exit 0 + ;; + o) + OUTPUT_FILE=$OPTARG + ;; + esac +done + +shift $((OPTIND-1)) + +if [[ "$#" -ne 1 ]]; then + help + exit -1 +fi + +INPUT_FILE=$1 + +if [[ -z "$OUTPUT_FILE" ]]; then + OUTPUT_FILE="${INPUT_FILE%.*}.spv" +fi + +KERNEL="${INPUT_FILE%.*}_k.spv" +KERNEL_TXT="${INPUT_FILE%.*}_k.spt" +WRAPPER="${INPUT_FILE%.*}_w.spt" +OUTPUT_TXT="${INPUT_FILE%.*}.spt" + +eval rs2spirv $INPUT_FILE -o $KERNEL -wo $WRAPPER && +eval spirv-dis $KERNEL --no-color -o $KERNEL_TXT && +eval rs2spirv -o $OUTPUT_TXT -lk $KERNEL_TXT -lw $WRAPPER && +eval spirv-as $OUTPUT_TXT -o $OUTPUT_FILE + +#rm -f $INPUT_FILE $KERNEL $KERNEL_TXT $WRAPPER $OUTPUT_TXT + +exit $? diff --git a/rsov/driver/.clang-format b/rsov/driver/.clang-format new file mode 100644 index 00000000..e8b6a2d9 --- /dev/null +++ b/rsov/driver/.clang-format @@ -0,0 +1,95 @@ +--- +Language: Cpp +# BasedOnStyle: Google +AccessModifierOffset: -1 +AlignAfterOpenBracket: Align +AlignConsecutiveAssignments: false +AlignConsecutiveDeclarations: false +AlignEscapedNewlinesLeft: true +AlignOperands: true +AlignTrailingComments: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: false +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: All +AllowShortIfStatementsOnASingleLine: true +AllowShortLoopsOnASingleLine: true +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: true +AlwaysBreakTemplateDeclarations: true +BinPackArguments: true +BinPackParameters: true +BraceWrapping: + AfterClass: false + AfterControlStatement: false + AfterEnum: false + AfterFunction: false + AfterNamespace: false + AfterObjCDeclaration: false + AfterStruct: false + AfterUnion: false + BeforeCatch: false + BeforeElse: false + IndentBraces: false +BreakBeforeBinaryOperators: None +BreakBeforeBraces: Attach +BreakBeforeTernaryOperators: true +BreakConstructorInitializersBeforeComma: false +BreakAfterJavaFieldAnnotations: false +BreakStringLiterals: true +ColumnLimit: 80 +CommentPragmas: '^ IWYU pragma:' +ConstructorInitializerAllOnOneLineOrOnePerLine: true +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DerivePointerAlignment: true +DisableFormat: false +ExperimentalAutoDetectBinPacking: false +ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] +IncludeCategories: + - Regex: '^<.*\.h>' + Priority: 1 + - Regex: '^<.*' + Priority: 2 + - Regex: '.*' + Priority: 3 +IncludeIsMainRegex: '([-_](test|unittest))?$' +IndentCaseLabels: true +IndentWidth: 2 +IndentWrappedFunctionNames: false +JavaScriptQuotes: Leave +JavaScriptWrapImports: true +KeepEmptyLinesAtTheStartOfBlocks: false +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCBlockIndentWidth: 2 +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: false +PenaltyBreakBeforeFirstCallParameter: 1 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakString: 1000 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 200 +PointerAlignment: Left +ReflowComments: true +SortIncludes: true +SpaceAfterCStyleCast: false +SpaceBeforeAssignmentOperators: true +SpaceBeforeParens: ControlStatements +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 2 +SpacesInAngles: false +SpacesInContainerLiterals: true +SpacesInCStyleCastParentheses: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +Standard: Auto +TabWidth: 8 +UseTab: Never +... + diff --git a/rsov/driver/Android.mk b/rsov/driver/Android.mk new file mode 100644 index 00000000..5309ba8e --- /dev/null +++ b/rsov/driver/Android.mk @@ -0,0 +1,56 @@ +# +# Copyright (C) 2016 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +LOCAL_PATH :=$(call my-dir) + +include $(CLEAR_VARS) + +LOCAL_MODULE := libRSDriver_RSoV + +LOCAL_SRC_FILES := \ + rsovAllocation.cpp \ + rsovContext.cpp \ + rsovCore.cpp \ + rsovElement.cpp \ + rsovRuntimeStubs.cpp \ + rsovSampler.cpp \ + rsovScript.cpp \ + rsovScriptGroup.cpp \ + rsovType.cpp \ + +LOCAL_SHARED_LIBRARIES := \ + libRS_internal \ + libRSCpuRef \ + libc++ \ + libcutils \ + libdl \ + liblog \ + libsync \ + libutils \ + libvulkan + +LOCAL_C_INCLUDES := \ + frameworks/native/vulkan/include \ + frameworks/rs \ + frameworks/rs/cpu_ref \ + +LOCAL_C_INCLUDES += \ + +LOCAL_CFLAGS := -Werror -Wall -Wextra -fno-exceptions +# TODO: remove warnings on unused variables and parameters +LOCAL_CFLAGS += -Wno-unused-variable -Wno-unused-parameter + +include $(BUILD_SHARED_LIBRARY) diff --git a/rsov/driver/rsovAllocation.cpp b/rsov/driver/rsovAllocation.cpp new file mode 100644 index 00000000..fdb477d1 --- /dev/null +++ b/rsov/driver/rsovAllocation.cpp @@ -0,0 +1,790 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "rsovAllocation.h" + +#include + +#include "rsAllocation.h" +#include "rsContext.h" +#include "rsCppUtils.h" +#include "rsElement.h" +#include "rsType.h" +#include "rsovContext.h" +#include "rsovCore.h" + +namespace android { +namespace renderscript { +namespace rsov { + +namespace { + +using std::make_pair; + +// TODO: handle 8-bit, 16-bit, and 64-bit integers and floating point numbers +const std::map, VkFormat> mapElementToFormat{ + make_pair(make_pair(RS_TYPE_FLOAT_32, 1), VK_FORMAT_R32_SFLOAT), + make_pair(make_pair(RS_TYPE_FLOAT_32, 2), VK_FORMAT_R32G32_SFLOAT), + make_pair(make_pair(RS_TYPE_FLOAT_32, 3), VK_FORMAT_R32G32B32_SFLOAT), + make_pair(make_pair(RS_TYPE_FLOAT_32, 4), VK_FORMAT_R32G32B32A32_SFLOAT), + + make_pair(make_pair(RS_TYPE_SIGNED_32, 1), VK_FORMAT_R32_SINT), + make_pair(make_pair(RS_TYPE_SIGNED_32, 2), VK_FORMAT_R32G32_SINT), + make_pair(make_pair(RS_TYPE_SIGNED_32, 3), VK_FORMAT_R32G32B32_SINT), + make_pair(make_pair(RS_TYPE_SIGNED_32, 4), VK_FORMAT_R32G32B32A32_SINT), + + make_pair(make_pair(RS_TYPE_UNSIGNED_32, 1), VK_FORMAT_R32_UINT), + make_pair(make_pair(RS_TYPE_UNSIGNED_32, 2), VK_FORMAT_R32G32_UINT), + make_pair(make_pair(RS_TYPE_UNSIGNED_32, 3), VK_FORMAT_R32G32B32_UINT), + make_pair(make_pair(RS_TYPE_UNSIGNED_32, 4), VK_FORMAT_R32G32B32A32_UINT), +}; + +VkFormat VkFormatFromRSElement(const Element &elem) { + // TODO: reject struct, allocation, and other non-numeric element + rsAssert(!elem.getFieldCount()); + + RsDataType dataType = elem.getType(); + uint32_t vectorWidth = elem.getVectorSize(); + + auto it = mapElementToFormat.find(make_pair(dataType, vectorWidth)); + if (it != mapElementToFormat.end()) { + return it->second; + } + + rsAssert(0 && "Unexpected RS Element to map to VkFormat"); + + return VK_FORMAT_R32G32B32A32_SFLOAT; +} + +size_t DeriveYUVLayout(int yuv, Allocation::Hal::DrvState *state) { + // For the flexible YCbCr format, layout is initialized during call to + // Allocation::ioReceive. Return early and avoid clobberring any + // pre-existing layout. + if (yuv == HAL_PIXEL_FORMAT_YCbCr_420_888) { + return 0; + } + + // YUV only supports basic 2d + // so we can stash the plane pointers in the mipmap levels. + size_t uvSize = 0; + state->lod[1].dimX = state->lod[0].dimX / 2; + state->lod[1].dimY = state->lod[0].dimY / 2; + state->lod[2].dimX = state->lod[0].dimX / 2; + state->lod[2].dimY = state->lod[0].dimY / 2; + state->yuv.shift = 1; + state->yuv.step = 1; + state->lodCount = 3; + + switch (yuv) { + case HAL_PIXEL_FORMAT_YV12: + state->lod[2].stride = rsRound(state->lod[0].stride >> 1, 16); + state->lod[2].mallocPtr = ((uint8_t *)state->lod[0].mallocPtr) + + (state->lod[0].stride * state->lod[0].dimY); + uvSize += state->lod[2].stride * state->lod[2].dimY; + + state->lod[1].stride = state->lod[2].stride; + state->lod[1].mallocPtr = ((uint8_t *)state->lod[2].mallocPtr) + + (state->lod[2].stride * state->lod[2].dimY); + uvSize += state->lod[1].stride * state->lod[2].dimY; + break; + case HAL_PIXEL_FORMAT_YCrCb_420_SP: // NV21 + // state->lod[1].dimX = state->lod[0].dimX; + state->lod[1].stride = state->lod[0].stride; + state->lod[2].stride = state->lod[0].stride; + state->lod[2].mallocPtr = ((uint8_t *)state->lod[0].mallocPtr) + + (state->lod[0].stride * state->lod[0].dimY); + state->lod[1].mallocPtr = ((uint8_t *)state->lod[2].mallocPtr) + 1; + uvSize += state->lod[1].stride * state->lod[1].dimY; + state->yuv.step = 2; + break; + default: + rsAssert(0); + } + + return uvSize; +} + +// TODO: Dedup this with the same code under frameworks/rs/driver +size_t AllocationBuildPointerTable(const Context *rsc, const Allocation *alloc, + const Type *type, uint8_t *ptr, + size_t requiredAlignment) { + alloc->mHal.drvState.lod[0].dimX = type->getDimX(); + alloc->mHal.drvState.lod[0].dimY = type->getDimY(); + alloc->mHal.drvState.lod[0].dimZ = type->getDimZ(); + alloc->mHal.drvState.lod[0].mallocPtr = 0; + // Stride needs to be aligned to a boundary defined by requiredAlignment! + size_t stride = + alloc->mHal.drvState.lod[0].dimX * type->getElementSizeBytes(); + alloc->mHal.drvState.lod[0].stride = rsRound(stride, requiredAlignment); + alloc->mHal.drvState.lodCount = type->getLODCount(); + alloc->mHal.drvState.faceCount = type->getDimFaces(); + + size_t offsets[Allocation::MAX_LOD]; + memset(offsets, 0, sizeof(offsets)); + + size_t o = alloc->mHal.drvState.lod[0].stride * + rsMax(alloc->mHal.drvState.lod[0].dimY, 1u) * + rsMax(alloc->mHal.drvState.lod[0].dimZ, 1u); + if (alloc->mHal.state.yuv) { + o += DeriveYUVLayout(alloc->mHal.state.yuv, &alloc->mHal.drvState); + + for (uint32_t ct = 1; ct < alloc->mHal.drvState.lodCount; ct++) { + offsets[ct] = (size_t)alloc->mHal.drvState.lod[ct].mallocPtr; + } + } else if (alloc->mHal.drvState.lodCount > 1) { + uint32_t tx = alloc->mHal.drvState.lod[0].dimX; + uint32_t ty = alloc->mHal.drvState.lod[0].dimY; + uint32_t tz = alloc->mHal.drvState.lod[0].dimZ; + for (uint32_t lod = 1; lod < alloc->mHal.drvState.lodCount; lod++) { + alloc->mHal.drvState.lod[lod].dimX = tx; + alloc->mHal.drvState.lod[lod].dimY = ty; + alloc->mHal.drvState.lod[lod].dimZ = tz; + alloc->mHal.drvState.lod[lod].stride = + rsRound(tx * type->getElementSizeBytes(), requiredAlignment); + offsets[lod] = o; + o += alloc->mHal.drvState.lod[lod].stride * rsMax(ty, 1u) * rsMax(tz, 1u); + if (tx > 1) tx >>= 1; + if (ty > 1) ty >>= 1; + if (tz > 1) tz >>= 1; + } + } + + alloc->mHal.drvState.faceOffset = o; + + alloc->mHal.drvState.lod[0].mallocPtr = ptr; + for (uint32_t lod = 1; lod < alloc->mHal.drvState.lodCount; lod++) { + alloc->mHal.drvState.lod[lod].mallocPtr = ptr + offsets[lod]; + } + + size_t allocSize = alloc->mHal.drvState.faceOffset; + if (alloc->mHal.drvState.faceCount) { + allocSize *= 6; + } + + return allocSize; +} + +size_t AllocationBuildPointerTable(const Context *rsc, const Allocation *alloc, + const Type *type, uint8_t *ptr) { + return AllocationBuildPointerTable(rsc, alloc, type, ptr, + Allocation::kMinimumRSAlignment); +} + +uint8_t *GetOffsetPtr(const Allocation *alloc, uint32_t xoff, uint32_t yoff, + uint32_t zoff, uint32_t lod, + RsAllocationCubemapFace face) { + uint8_t *ptr = (uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr; + ptr += face * alloc->mHal.drvState.faceOffset; + ptr += zoff * alloc->mHal.drvState.lod[lod].dimY * + alloc->mHal.drvState.lod[lod].stride; + ptr += yoff * alloc->mHal.drvState.lod[lod].stride; + ptr += xoff * alloc->mHal.state.elementSizeBytes; + return ptr; +} + +void mip565(const Allocation *alloc, int lod, RsAllocationCubemapFace face) { + uint32_t w = alloc->mHal.drvState.lod[lod + 1].dimX; + uint32_t h = alloc->mHal.drvState.lod[lod + 1].dimY; + + for (uint32_t y = 0; y < h; y++) { + uint16_t *oPtr = (uint16_t *)GetOffsetPtr(alloc, 0, y, 0, lod + 1, face); + const uint16_t *i1 = + (uint16_t *)GetOffsetPtr(alloc, 0, 0, y * 2, lod, face); + const uint16_t *i2 = + (uint16_t *)GetOffsetPtr(alloc, 0, 0, y * 2 + 1, lod, face); + + for (uint32_t x = 0; x < w; x++) { + *oPtr = rsBoxFilter565(i1[0], i1[1], i2[0], i2[1]); + oPtr++; + i1 += 2; + i2 += 2; + } + } +} + +void mip8888(const Allocation *alloc, int lod, RsAllocationCubemapFace face) { + uint32_t w = alloc->mHal.drvState.lod[lod + 1].dimX; + uint32_t h = alloc->mHal.drvState.lod[lod + 1].dimY; + + for (uint32_t y = 0; y < h; y++) { + uint32_t *oPtr = (uint32_t *)GetOffsetPtr(alloc, 0, y, 0, lod + 1, face); + const uint32_t *i1 = + (uint32_t *)GetOffsetPtr(alloc, 0, y * 2, 0, lod, face); + const uint32_t *i2 = + (uint32_t *)GetOffsetPtr(alloc, 0, y * 2 + 1, 0, lod, face); + + for (uint32_t x = 0; x < w; x++) { + *oPtr = rsBoxFilter8888(i1[0], i1[1], i2[0], i2[1]); + oPtr++; + i1 += 2; + i2 += 2; + } + } +} + +void mip8(const Allocation *alloc, int lod, RsAllocationCubemapFace face) { + uint32_t w = alloc->mHal.drvState.lod[lod + 1].dimX; + uint32_t h = alloc->mHal.drvState.lod[lod + 1].dimY; + + for (uint32_t y = 0; y < h; y++) { + uint8_t *oPtr = GetOffsetPtr(alloc, 0, y, 0, lod + 1, face); + const uint8_t *i1 = GetOffsetPtr(alloc, 0, y * 2, 0, lod, face); + const uint8_t *i2 = GetOffsetPtr(alloc, 0, y * 2 + 1, 0, lod, face); + + for (uint32_t x = 0; x < w; x++) { + *oPtr = (uint8_t)(((uint32_t)i1[0] + i1[1] + i2[0] + i2[1]) * 0.25f); + oPtr++; + i1 += 2; + i2 += 2; + } + } +} + +} // anonymous namespace + +RSoVAllocation::RSoVAllocation(RSoVContext *context, const Type *type) + : mRSoV(context), + mDevice(context->getDevice()), + mType(type), + mWidth(type->getDimX()), + mHeight(type->getDimY()), + mDepth(type->getDimZ()) { + InitImage(); +} + +RSoVAllocation::~RSoVAllocation() { + vkDestroyImageView(mDevice, mImageView, nullptr); + vkDestroyImage(mDevice, mImage, nullptr); + vkFreeMemory(mDevice, mMem, nullptr); +} + +void RSoVAllocation::InitImage() { + VkResult res; + + mFormat = VkFormatFromRSElement(*mType->getElement()); + + const uint32_t width = mWidth; + const uint32_t height = mHeight; + const uint32_t depth = mDepth; + + VkImageType imageType = + depth > 0 ? VK_IMAGE_TYPE_3D + : (height > 0 ? VK_IMAGE_TYPE_2D : VK_IMAGE_TYPE_1D); + + VkImageCreateInfo createInfo = { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .imageType = imageType, + .format = mFormat, + .extent = {width, rsMax(height, 1U), rsMax(depth, 1U)}, + .mipLevels = 1, + .arrayLayers = 1, + .samples = VK_SAMPLE_COUNT_1_BIT, + .tiling = VK_IMAGE_TILING_LINEAR, + .usage = VK_IMAGE_USAGE_STORAGE_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + .initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED, + }; + + res = vkCreateImage(mDevice, &createInfo, nullptr, &mImage); + rsAssert(res == VK_SUCCESS); + + VkMemoryRequirements mem_reqs; + vkGetImageMemoryRequirements(mDevice, mImage, &mem_reqs); + + ALOGI("size of memory needed = %u", (uint)mem_reqs.size); + + VkMemoryAllocateInfo allocateInfo = { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .pNext = nullptr, + .allocationSize = mem_reqs.size, + }; + + /* Use the memory properties to determine the type of memory required */ + bool pass; + pass = mRSoV->MemoryTypeFromProperties( + mem_reqs.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + &allocateInfo.memoryTypeIndex); + ALOGI("TypeBits = 0x%08X", mem_reqs.memoryTypeBits); + rsAssert(pass); + + // TODO: Make this aligned + res = vkAllocateMemory(mDevice, &allocateInfo, nullptr, &mMem); + rsAssert(res == VK_SUCCESS); + + res = vkBindImageMemory(mDevice, mImage, mMem, 0); + rsAssert(res == VK_SUCCESS); + + VkImageViewType viewType = + depth > 0 ? VK_IMAGE_VIEW_TYPE_3D + : (height > 0 ? VK_IMAGE_VIEW_TYPE_2D : VK_IMAGE_VIEW_TYPE_1D); + + VkImageViewCreateInfo view_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = nullptr, + .image = mImage, + .viewType = viewType, + .format = mFormat, + .components = + { + .r = VK_COMPONENT_SWIZZLE_IDENTITY, + .g = VK_COMPONENT_SWIZZLE_IDENTITY, + .b = VK_COMPONENT_SWIZZLE_IDENTITY, + .a = VK_COMPONENT_SWIZZLE_IDENTITY, + }, + .subresourceRange = + { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }; + + res = vkCreateImageView(mDevice, &view_info, nullptr, &mImageView); + rsAssert(res == VK_SUCCESS); + + mImageLayout = VK_IMAGE_LAYOUT_GENERAL; + + mImageInfo = { + .imageView = mImageView, .imageLayout = mImageLayout, + }; + + res = vkMapMemory(mDevice, mMem, 0, mem_reqs.size, 0, (void **)&mPtr); + rsAssert(res == VK_SUCCESS); +} + +} // namespace rsov +} // namespace renderscript +} // namespace android + +using android::renderscript::Allocation; +using android::renderscript::Context; +using android::renderscript::Element; +using android::renderscript::Type; +using android::renderscript::rs_allocation; +using android::renderscript::rsMax; +using namespace android::renderscript::rsov; + +bool rsovAllocationInit(const Context *rsc, Allocation *alloc, bool forceZero) { + RSoVHal *hal = static_cast(rsc->mHal.drv); + RSoVContext *rsov = hal->mRSoV; + const Type *type = alloc->getType(); + + RSoVAllocation *rsovAlloc = new RSoVAllocation(rsov, type); + alloc->mHal.drv = rsovAlloc; + AllocationBuildPointerTable(rsc, alloc, type, + (uint8_t *)rsovAlloc->getHostPtr()); + return true; +} + +void rsovAllocationDestroy(const Context *rsc, Allocation *alloc) { + RSoVAllocation *rsovAlloc = static_cast(alloc->mHal.drv); + delete rsovAlloc; + alloc->mHal.drv = nullptr; +} + +void rsovAllocationData1D(const Context *rsc, const Allocation *alloc, + uint32_t xoff, uint32_t lod, size_t count, + const void *data, size_t sizeBytes) { + const size_t eSize = alloc->mHal.state.type->getElementSizeBytes(); + uint8_t *ptr = + GetOffsetPtr(alloc, xoff, 0, 0, 0, RS_ALLOCATION_CUBEMAP_FACE_POSITIVE_X); + size_t size = count * eSize; + if (ptr != data) { + // Skip the copy if we are the same allocation. This can arise from + // our Bitmap optimization, where we share the same storage. + if (alloc->mHal.state.hasReferences) { + alloc->incRefs(data, count); + alloc->decRefs(ptr, count); + } + memcpy(ptr, data, size); + } +} + +void rsovAllocationData2D(const Context *rsc, const Allocation *alloc, + uint32_t xoff, uint32_t yoff, uint32_t lod, + RsAllocationCubemapFace face, uint32_t w, uint32_t h, + const void *data, size_t sizeBytes, size_t stride) { + size_t eSize = alloc->mHal.state.elementSizeBytes; + size_t lineSize = eSize * w; + if (!stride) { + stride = lineSize; + } + + if (alloc->mHal.drvState.lod[0].mallocPtr) { + const uint8_t *src = static_cast(data); + uint8_t *dst = GetOffsetPtr(alloc, xoff, yoff, 0, lod, face); + + for (uint32_t line = yoff; line < (yoff + h); line++) { + if (alloc->mHal.state.hasReferences) { + alloc->incRefs(src, w); + alloc->decRefs(dst, w); + } + memcpy(dst, src, lineSize); + src += stride; + dst += alloc->mHal.drvState.lod[lod].stride; + } + // TODO: handle YUV Allocations + if (alloc->mHal.state.yuv) { + size_t clineSize = lineSize; + int lod = 1; + int maxLod = 2; + if (alloc->mHal.state.yuv == HAL_PIXEL_FORMAT_YV12) { + maxLod = 3; + clineSize >>= 1; + } else if (alloc->mHal.state.yuv == HAL_PIXEL_FORMAT_YCrCb_420_SP) { + lod = 2; + maxLod = 3; + } + + while (lod < maxLod) { + uint8_t *dst = GetOffsetPtr(alloc, xoff, yoff, 0, lod, face); + + for (uint32_t line = (yoff >> 1); line < ((yoff + h) >> 1); line++) { + memcpy(dst, src, clineSize); + // When copying from an array to an Allocation, the src pointer + // to the array should just move by the number of bytes copied. + src += clineSize; + dst += alloc->mHal.drvState.lod[lod].stride; + } + lod++; + } + } + } +} + +void rsovAllocationData3D(const Context *rsc, const Allocation *alloc, + uint32_t xoff, uint32_t yoff, uint32_t zoff, + uint32_t lod, uint32_t w, uint32_t h, uint32_t d, + const void *data, size_t sizeBytes, size_t stride) { + uint32_t eSize = alloc->mHal.state.elementSizeBytes; + uint32_t lineSize = eSize * w; + if (!stride) { + stride = lineSize; + } + + if (alloc->mHal.drvState.lod[0].mallocPtr) { + const uint8_t *src = static_cast(data); + for (uint32_t z = zoff; z < (d + zoff); z++) { + uint8_t *dst = GetOffsetPtr(alloc, xoff, yoff, z, lod, + RS_ALLOCATION_CUBEMAP_FACE_POSITIVE_X); + for (uint32_t line = yoff; line < (yoff + h); line++) { + if (alloc->mHal.state.hasReferences) { + alloc->incRefs(src, w); + alloc->decRefs(dst, w); + } + memcpy(dst, src, lineSize); + src += stride; + dst += alloc->mHal.drvState.lod[lod].stride; + } + } + } +} + +void rsovAllocationRead1D(const Context *rsc, const Allocation *alloc, + uint32_t xoff, uint32_t lod, size_t count, void *data, + size_t sizeBytes) { + const size_t eSize = alloc->mHal.state.type->getElementSizeBytes(); + const uint8_t *ptr = + GetOffsetPtr(alloc, xoff, 0, 0, 0, RS_ALLOCATION_CUBEMAP_FACE_POSITIVE_X); + if (data != ptr) { + // Skip the copy if we are the same allocation. This can arise from + // our Bitmap optimization, where we share the same storage. + memcpy(data, ptr, count * eSize); + } +} + +void rsovAllocationRead2D(const Context *rsc, const Allocation *alloc, + uint32_t xoff, uint32_t yoff, uint32_t lod, + RsAllocationCubemapFace face, uint32_t w, uint32_t h, + void *data, size_t sizeBytes, size_t stride) { + size_t eSize = alloc->mHal.state.elementSizeBytes; + size_t lineSize = eSize * w; + if (!stride) { + stride = lineSize; + } + + if (alloc->mHal.drvState.lod[0].mallocPtr) { + uint8_t *dst = static_cast(data); + const uint8_t *src = GetOffsetPtr(alloc, xoff, yoff, 0, lod, face); + if (dst == src) { + // Skip the copy if we are the same allocation. This can arise from + // our Bitmap optimization, where we share the same storage. + return; + } + + for (uint32_t line = yoff; line < (yoff + h); line++) { + memcpy(dst, src, lineSize); + dst += stride; + src += alloc->mHal.drvState.lod[lod].stride; + } + } else { + ALOGE("Add code to readback from non-script memory"); + } +} + +void rsovAllocationRead3D(const Context *rsc, const Allocation *alloc, + uint32_t xoff, uint32_t yoff, uint32_t zoff, + uint32_t lod, uint32_t w, uint32_t h, uint32_t d, + void *data, size_t sizeBytes, size_t stride) { + uint32_t eSize = alloc->mHal.state.elementSizeBytes; + uint32_t lineSize = eSize * w; + if (!stride) { + stride = lineSize; + } + + if (alloc->mHal.drvState.lod[0].mallocPtr) { + uint8_t *dst = static_cast(data); + for (uint32_t z = zoff; z < (d + zoff); z++) { + const uint8_t *src = GetOffsetPtr(alloc, xoff, yoff, z, lod, + RS_ALLOCATION_CUBEMAP_FACE_POSITIVE_X); + if (dst == src) { + // Skip the copy if we are the same allocation. This can arise from + // our Bitmap optimization, where we share the same storage. + return; + } + + for (uint32_t line = yoff; line < (yoff + h); line++) { + memcpy(dst, src, lineSize); + dst += stride; + src += alloc->mHal.drvState.lod[lod].stride; + } + } + } +} + +void *rsovAllocationLock1D(const Context *rsc, const Allocation *alloc) { + return alloc->mHal.drvState.lod[0].mallocPtr; +} + +void rsovAllocationUnlock1D(const Context *rsc, const Allocation *alloc) {} + +void rsovAllocationData1D_alloc(const Context *rsc, const Allocation *dstAlloc, + uint32_t dstXoff, uint32_t dstLod, size_t count, + const Allocation *srcAlloc, uint32_t srcXoff, + uint32_t srcLod) {} + +void rsovAllocationData2D_alloc_script( + const Context *rsc, const Allocation *dstAlloc, uint32_t dstXoff, + uint32_t dstYoff, uint32_t dstLod, RsAllocationCubemapFace dstFace, + uint32_t w, uint32_t h, const Allocation *srcAlloc, uint32_t srcXoff, + uint32_t srcYoff, uint32_t srcLod, RsAllocationCubemapFace srcFace) { + size_t elementSize = dstAlloc->getType()->getElementSizeBytes(); + for (uint32_t i = 0; i < h; i++) { + uint8_t *dstPtr = + GetOffsetPtr(dstAlloc, dstXoff, dstYoff + i, 0, dstLod, dstFace); + uint8_t *srcPtr = + GetOffsetPtr(srcAlloc, srcXoff, srcYoff + i, 0, srcLod, srcFace); + memcpy(dstPtr, srcPtr, w * elementSize); + } +} + +void rsovAllocationData3D_alloc_script( + const Context *rsc, const Allocation *dstAlloc, uint32_t dstXoff, + uint32_t dstYoff, uint32_t dstZoff, uint32_t dstLod, uint32_t w, uint32_t h, + uint32_t d, const Allocation *srcAlloc, uint32_t srcXoff, uint32_t srcYoff, + uint32_t srcZoff, uint32_t srcLod) { + uint32_t elementSize = dstAlloc->getType()->getElementSizeBytes(); + for (uint32_t j = 0; j < d; j++) { + for (uint32_t i = 0; i < h; i++) { + uint8_t *dstPtr = + GetOffsetPtr(dstAlloc, dstXoff, dstYoff + i, dstZoff + j, dstLod, + RS_ALLOCATION_CUBEMAP_FACE_POSITIVE_X); + uint8_t *srcPtr = + GetOffsetPtr(srcAlloc, srcXoff, srcYoff + i, srcZoff + j, srcLod, + RS_ALLOCATION_CUBEMAP_FACE_POSITIVE_X); + memcpy(dstPtr, srcPtr, w * elementSize); + } + } +} + +void rsovAllocationData2D_alloc( + const Context *rsc, const Allocation *dstAlloc, uint32_t dstXoff, + uint32_t dstYoff, uint32_t dstLod, RsAllocationCubemapFace dstFace, + uint32_t w, uint32_t h, const Allocation *srcAlloc, uint32_t srcXoff, + uint32_t srcYoff, uint32_t srcLod, RsAllocationCubemapFace srcFace) { + if (!dstAlloc->getIsScript() && !srcAlloc->getIsScript()) { + rsc->setError(RS_ERROR_FATAL_DRIVER, + "Non-script allocation copies not " + "yet implemented."); + return; + } + rsovAllocationData2D_alloc_script(rsc, dstAlloc, dstXoff, dstYoff, dstLod, + dstFace, w, h, srcAlloc, srcXoff, srcYoff, + srcLod, srcFace); +} + +void rsovAllocationData3D_alloc(const Context *rsc, const Allocation *dstAlloc, + uint32_t dstXoff, uint32_t dstYoff, + uint32_t dstZoff, uint32_t dstLod, uint32_t w, + uint32_t h, uint32_t d, + const Allocation *srcAlloc, uint32_t srcXoff, + uint32_t srcYoff, uint32_t srcZoff, + uint32_t srcLod) { + if (!dstAlloc->getIsScript() && !srcAlloc->getIsScript()) { + rsc->setError(RS_ERROR_FATAL_DRIVER, + "Non-script allocation copies not " + "yet implemented."); + return; + } + rsovAllocationData3D_alloc_script(rsc, dstAlloc, dstXoff, dstYoff, dstZoff, + dstLod, w, h, d, srcAlloc, srcXoff, srcYoff, + srcZoff, srcLod); +} + +void rsovAllocationAdapterOffset(const Context *rsc, const Allocation *alloc) { + // Get a base pointer to the new LOD + const Allocation *base = alloc->mHal.state.baseAlloc; + const Type *type = alloc->mHal.state.type; + if (base == nullptr) { + return; + } + + const int lodBias = alloc->mHal.state.originLOD; + uint32_t lodCount = rsMax(alloc->mHal.drvState.lodCount, (uint32_t)1); + for (uint32_t lod = 0; lod < lodCount; lod++) { + alloc->mHal.drvState.lod[lod] = base->mHal.drvState.lod[lod + lodBias]; + alloc->mHal.drvState.lod[lod].mallocPtr = GetOffsetPtr( + alloc, alloc->mHal.state.originX, alloc->mHal.state.originY, + alloc->mHal.state.originZ, lodBias, + (RsAllocationCubemapFace)alloc->mHal.state.originFace); + } +} + +bool rsovAllocationAdapterInit(const Context *rsc, Allocation *alloc) { +// TODO: may need a RSoV Allocation here +#if 0 + DrvAllocation *drv = (DrvAllocation *)calloc(1, sizeof(DrvAllocation)); + if (!drv) { + return false; + } + alloc->mHal.drv = drv; +#endif + // We need to build an allocation that looks like a subset of the parent + // allocation + rsovAllocationAdapterOffset(rsc, alloc); + + return true; +} + +void rsovAllocationSyncAll(const Context *rsc, const Allocation *alloc, + RsAllocationUsageType src) { + // TODO: anything to do here? +} + +void rsovAllocationMarkDirty(const Context *rsc, const Allocation *alloc) { + // TODO: anything to do here? +} + +void rsovAllocationResize(const Context *rsc, const Allocation *alloc, + const Type *newType, bool zeroNew) { + // TODO: implement this + // can this be done without copying, if the new size is greater than the + // original? +} + +void rsovAllocationGenerateMipmaps(const Context *rsc, + const Allocation *alloc) { + if (!alloc->mHal.drvState.lod[0].mallocPtr) { + return; + } + uint32_t numFaces = alloc->getType()->getDimFaces() ? 6 : 1; + for (uint32_t face = 0; face < numFaces; face++) { + for (uint32_t lod = 0; lod < (alloc->getType()->getLODCount() - 1); lod++) { + switch (alloc->getType()->getElement()->getSizeBits()) { + case 32: + mip8888(alloc, lod, (RsAllocationCubemapFace)face); + break; + case 16: + mip565(alloc, lod, (RsAllocationCubemapFace)face); + break; + case 8: + mip8(alloc, lod, (RsAllocationCubemapFace)face); + break; + } + } + } +} + +uint32_t rsovAllocationGrallocBits(const Context *rsc, Allocation *alloc) { + return 0; +} + +void rsovAllocationUpdateCachedObject(const Context *rsc, + const Allocation *alloc, + rs_allocation *obj) { + obj->p = alloc; +#ifdef __LP64__ + if (alloc != nullptr) { + obj->r = alloc->mHal.drvState.lod[0].mallocPtr; + obj->v1 = alloc->mHal.drv; + obj->v2 = (void *)alloc->mHal.drvState.lod[0].stride; + } else { + obj->r = nullptr; + obj->v1 = nullptr; + obj->v2 = nullptr; + } +#endif +} + +void rsovAllocationSetSurface(const Context *rsc, Allocation *alloc, + ANativeWindow *nw) { + // TODO: implement this +} + +void rsovAllocationIoSend(const Context *rsc, Allocation *alloc) { + // TODO: implement this +} + +void rsovAllocationIoReceive(const Context *rsc, Allocation *alloc) { + // TODO: implement this +} + +void rsovAllocationElementData(const Context *rsc, const Allocation *alloc, + uint32_t x, uint32_t y, uint32_t z, + const void *data, uint32_t cIdx, + size_t sizeBytes) { + uint8_t *ptr = + GetOffsetPtr(alloc, x, y, z, 0, RS_ALLOCATION_CUBEMAP_FACE_POSITIVE_X); + + const Element *e = alloc->mHal.state.type->getElement()->getField(cIdx); + ptr += alloc->mHal.state.type->getElement()->getFieldOffsetBytes(cIdx); + + if (alloc->mHal.state.hasReferences) { + e->incRefs(data); + e->decRefs(ptr); + } + + memcpy(ptr, data, sizeBytes); +} + +void rsovAllocationElementRead(const Context *rsc, const Allocation *alloc, + uint32_t x, uint32_t y, uint32_t z, void *data, + uint32_t cIdx, size_t sizeBytes) { + uint8_t *ptr = + GetOffsetPtr(alloc, x, y, z, 0, RS_ALLOCATION_CUBEMAP_FACE_POSITIVE_X); + + const Element *e = alloc->mHal.state.type->getElement()->getField(cIdx); + ptr += alloc->mHal.state.type->getElement()->getFieldOffsetBytes(cIdx); + + memcpy(data, ptr, sizeBytes); +} diff --git a/rsov/driver/rsovAllocation.h b/rsov/driver/rsovAllocation.h new file mode 100644 index 00000000..24f4cf0a --- /dev/null +++ b/rsov/driver/rsovAllocation.h @@ -0,0 +1,216 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef RSOV_ALLOCATION_H +#define RSOV_ALLOCATION_H + +#include + +#include "rsDefines.h" +#include "rs_hal.h" +#include "system/window.h" + +namespace android { +namespace renderscript { + +class Allocation; +class Context; +class Type; + +namespace rsov { + +class RSoVContext; + +class RSoVAllocation { + public: + RSoVAllocation(RSoVContext *context, const Type *type); + ~RSoVAllocation(); + + uint32_t getWidth() const { return mWidth; } + uint32_t getHeight() const { return mHeight; } + uint32_t getDepth() const { return mDepth; } + const VkDescriptorImageInfo *getImageInfo() const { return &mImageInfo; } + char *getHostPtr() const { return mPtr; } + + private: + void InitImage(); + + char *mPtr; // Host pointer to mmapped device memory for the Allocation + RSoVContext *mRSoV; + VkDevice mDevice; + const Type *mType; + const uint32_t mWidth; + const uint32_t mHeight; + const uint32_t mDepth; + + VkFormat mFormat; + VkDescriptorImageInfo mImageInfo; + VkDeviceMemory mMem; + VkImage mImage; + VkImageView mImageView; + VkImageLayout mImageLayout; + + // TODO: add an underneath buffer too +}; + +} // namespace rsov +} // namespace renderscript +} // namespace android + +extern bool rsovAllocationInit(const android::renderscript::Context *rsc, + android::renderscript::Allocation *alloc, + bool forceZero); + +extern void rsovAllocationDestroy(const android::renderscript::Context *rsc, + android::renderscript::Allocation *alloc); + +extern void rsovAllocationData1D(const android::renderscript::Context *rsc, + const android::renderscript::Allocation *alloc, + uint32_t xoff, uint32_t lod, size_t count, + const void *data, size_t sizeBytes); + +extern void rsovAllocationData2D(const android::renderscript::Context *rsc, + const android::renderscript::Allocation *alloc, + uint32_t xoff, uint32_t yoff, uint32_t lod, + RsAllocationCubemapFace face, uint32_t w, + uint32_t h, const void *data, size_t sizeBytes, + size_t stride); + +extern void rsovAllocationData3D(const android::renderscript::Context *rsc, + const android::renderscript::Allocation *alloc, + uint32_t xoff, uint32_t yoff, uint32_t zoff, + uint32_t lod, uint32_t w, uint32_t h, + uint32_t d, const void *data, size_t sizeBytes, + size_t stride); + +extern void rsovAllocationRead1D(const android::renderscript::Context *rsc, + const android::renderscript::Allocation *alloc, + uint32_t xoff, uint32_t lod, size_t count, + void *data, size_t sizeBytes); + +extern void rsovAllocationRead2D(const android::renderscript::Context *rsc, + const android::renderscript::Allocation *alloc, + uint32_t xoff, uint32_t yoff, uint32_t lod, + RsAllocationCubemapFace face, uint32_t w, + uint32_t h, void *data, size_t sizeBytes, + size_t stride); + +extern void rsovAllocationRead3D(const android::renderscript::Context *rsc, + const android::renderscript::Allocation *alloc, + uint32_t xoff, uint32_t yoff, uint32_t zoff, + uint32_t lod, uint32_t w, uint32_t h, + uint32_t d, void *data, size_t sizeBytes, + size_t stride); + +extern void *rsovAllocationLock1D( + const android::renderscript::Context *rsc, + const android::renderscript::Allocation *alloc); + +extern void rsovAllocationUnlock1D( + const android::renderscript::Context *rsc, + const android::renderscript::Allocation *alloc); + +extern void rsovAllocationData1D_alloc( + const android::renderscript::Context *rsc, + const android::renderscript::Allocation *dstAlloc, uint32_t dstXoff, + uint32_t dstLod, size_t count, + const android::renderscript::Allocation *srcAlloc, uint32_t srcXoff, + uint32_t srcLod); + +extern void rsovAllocationData2D_alloc_script( + const android::renderscript::Context *rsc, + const android::renderscript::Allocation *dstAlloc, uint32_t dstXoff, + uint32_t dstYoff, uint32_t dstLod, RsAllocationCubemapFace dstFace, + uint32_t w, uint32_t h, const android::renderscript::Allocation *srcAlloc, + uint32_t srcXoff, uint32_t srcYoff, uint32_t srcLod, + RsAllocationCubemapFace srcFace); + +extern void rsovAllocationData2D_alloc( + const android::renderscript::Context *rsc, + const android::renderscript::Allocation *dstAlloc, uint32_t dstXoff, + uint32_t dstYoff, uint32_t dstLod, RsAllocationCubemapFace dstFace, + uint32_t w, uint32_t h, const android::renderscript::Allocation *srcAlloc, + uint32_t srcXoff, uint32_t srcYoff, uint32_t srcLod, + RsAllocationCubemapFace srcFace); + +extern void rsovAllocationData3D_alloc_script( + const android::renderscript::Context *rsc, + const android::renderscript::Allocation *dstAlloc, uint32_t dstXoff, + uint32_t dstYoff, uint32_t dstZoff, uint32_t dstLod, uint32_t w, uint32_t h, + uint32_t d, const android::renderscript::Allocation *srcAlloc, + uint32_t srcXoff, uint32_t srcYoff, uint32_t srcZoff, uint32_t srcLod); + +extern void rsovAllocationData3D_alloc( + const android::renderscript::Context *rsc, + const android::renderscript::Allocation *dstAlloc, uint32_t dstXoff, + uint32_t dstYoff, uint32_t dstZoff, uint32_t dstLod, uint32_t w, uint32_t h, + uint32_t d, const android::renderscript::Allocation *srcAlloc, + uint32_t srcXoff, uint32_t srcYoff, uint32_t srcZoff, uint32_t srcLod); + +extern void rsovAllocationAdapterOffset( + const android::renderscript::Context *rsc, + const android::renderscript::Allocation *alloc); + +extern bool rsovAllocationAdapterInit(const android::renderscript::Context *rsc, + android::renderscript::Allocation *alloc); + +extern void rsovAllocationSyncAll( + const android::renderscript::Context *rsc, + const android::renderscript::Allocation *alloc, RsAllocationUsageType src); + +extern void rsovAllocationMarkDirty( + const android::renderscript::Context *rsc, + const android::renderscript::Allocation *alloc); + +extern void rsovAllocationResize(const android::renderscript::Context *rsc, + const android::renderscript::Allocation *alloc, + const android::renderscript::Type *newType, + bool zeroNew); + +extern void rsovAllocationGenerateMipmaps( + const android::renderscript::Context *rsc, + const android::renderscript::Allocation *alloc); + +extern uint32_t rsovAllocationGrallocBits( + const android::renderscript::Context *rsc, + android::renderscript::Allocation *alloc); + +extern void rsovAllocationUpdateCachedObject( + const android::renderscript::Context *rsc, + const android::renderscript::Allocation *alloc, + android::renderscript::rs_allocation *obj); + +extern void rsovAllocationSetSurface(const android::renderscript::Context *rsc, + android::renderscript::Allocation *alloc, + ANativeWindow *nw); + +extern void rsovAllocationIoSend(const android::renderscript::Context *rsc, + android::renderscript::Allocation *alloc); + +extern void rsovAllocationIoReceive(const android::renderscript::Context *rsc, + android::renderscript::Allocation *alloc); + +extern void rsovAllocationElementData( + const android::renderscript::Context *rsc, + const android::renderscript::Allocation *alloc, uint32_t x, uint32_t y, + uint32_t z, const void *data, uint32_t cIdx, size_t sizeBytes); + +extern void rsovAllocationElementRead( + const android::renderscript::Context *rsc, + const android::renderscript::Allocation *alloc, uint32_t x, uint32_t y, + uint32_t z, void *data, uint32_t cIdx, size_t sizeBytes); + +#endif // RSOV_ALLOCATION_H diff --git a/rsov/driver/rsovContext.cpp b/rsov/driver/rsovContext.cpp new file mode 100644 index 00000000..f1bfc66e --- /dev/null +++ b/rsov/driver/rsovContext.cpp @@ -0,0 +1,167 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "rsovContext.h" + +#include + +#include "rsUtils.h" + +namespace android { +namespace renderscript { +namespace rsov { + +RSoVContext* RSoVContext::mContext = nullptr; +std::once_flag RSoVContext::mInitFlag; + +void RSoVContext::Initialize(char const* const name) { + // Initialize instance + VkApplicationInfo appInfo = { + .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, + .pNext = nullptr, + .pApplicationName = name, // TODO: set to app name + .applicationVersion = 1, + .pEngineName = name, + .engineVersion = 1, + .apiVersion = VK_API_VERSION_1_0}; + + VkInstanceCreateInfo instInfo = { + .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .pApplicationInfo = &appInfo, + }; + + VkResult res; + res = vkCreateInstance(&instInfo, nullptr, &mInstance); + rsAssert(res == VK_SUCCESS); + + // Enumerate devices + uint32_t gpu_count; + + res = vkEnumeratePhysicalDevices(mInstance, &gpu_count, nullptr); + rsAssert(gpu_count > 0); + + std::vector GPUs(gpu_count); + + res = vkEnumeratePhysicalDevices(mInstance, &gpu_count, GPUs.data()); + rsAssert(res == VK_SUCCESS && gpu_count > 0); + + mGPU = GPUs[0]; + + // Get device memory properties + vkGetPhysicalDeviceMemoryProperties(mGPU, &mMemoryProperties); + + // Initialize device + + float queuePriorities[] = {0.0}; + + VkDeviceQueueCreateInfo queueInfo = { + .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, + .pNext = nullptr, + .queueCount = 1, + .pQueuePriorities = queuePriorities, + }; + + VkDeviceCreateInfo deviceInfo = { + .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, + .pNext = nullptr, + .queueCreateInfoCount = 1, + .pQueueCreateInfos = &queueInfo, + .pEnabledFeatures = nullptr, + }; + + res = vkCreateDevice(mGPU, &deviceInfo, nullptr, &mDevice); + rsAssert(res == VK_SUCCESS); + + // Initialize queue family index + uint32_t queueCount; + + vkGetPhysicalDeviceQueueFamilyProperties(mGPU, &queueCount, nullptr); + rsAssert(queueCount > 0); + + std::vector queueProps(queueCount); + + vkGetPhysicalDeviceQueueFamilyProperties(mGPU, &queueCount, + queueProps.data()); + rsAssert(queueCount > 0); + + uint32_t queueFamilyIndex = UINT_MAX; + bool found = false; + for (unsigned int i = 0; i < queueCount; i++) { + if (queueProps[i].queueFlags & VK_QUEUE_COMPUTE_BIT) { + queueFamilyIndex = i; + found = true; + break; + } + } + + rsAssert(found); + + // Create a device queue + + vkGetDeviceQueue(mDevice, queueFamilyIndex, 0, &mQueue); + + // Create command pool + + VkCommandPoolCreateInfo cmd_pool_info = { + .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, + .pNext = nullptr, + .queueFamilyIndex = queueFamilyIndex, + .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, + }; + + res = vkCreateCommandPool(mDevice, &cmd_pool_info, nullptr, &mCmdPool); + rsAssert(res == VK_SUCCESS); +} + +bool RSoVContext::MemoryTypeFromProperties(uint32_t typeBits, + VkFlags requirements_mask, + uint32_t* typeIndex) { + for (uint32_t i = 0; i < 32; i++) { + if ((typeBits & 1) == 1) { + const uint32_t prop = mMemoryProperties.memoryTypes[i].propertyFlags; + if ((prop & requirements_mask) == requirements_mask) { + *typeIndex = i; + return true; + } + } + typeBits >>= 1; + } + + return false; +} + +RSoVContext::RSoVContext() { + char engineName[] = "RSoV"; + + Initialize(engineName); +} + +RSoVContext::~RSoVContext() { + vkDestroyCommandPool(mDevice, mCmdPool, nullptr); + vkDestroyDevice(mDevice, nullptr); + vkDestroyInstance(mInstance, nullptr); +} + +RSoVContext* RSoVContext::create() { + std::call_once(mInitFlag, []() { mContext = new RSoVContext(); }); + return mContext; +} + +} // namespace rsov +} // namespace renderscript +} // namespace android diff --git a/rsov/driver/rsovContext.h b/rsov/driver/rsovContext.h new file mode 100644 index 00000000..cf8e30bd --- /dev/null +++ b/rsov/driver/rsovContext.h @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef RSOV_CONTEXT_H +#define RSOV_CONTEXT_H + +#include +#include + +namespace android { +namespace renderscript { + +class RsdCpuReference; + +namespace rsov { + +class RSoVContext { + public: + static RSoVContext* create(); + ~RSoVContext(); + + VkDevice getDevice() const { return mDevice; } + VkQueue getQueue() const { return mQueue; } + VkCommandPool getCmdPool() const { return mCmdPool; } + + bool MemoryTypeFromProperties(uint32_t typeBits, VkFlags requirements_mask, + uint32_t* typeIndex); + + private: + RSoVContext(); + + void Initialize(char const* const name); + + static RSoVContext* mContext; + static std::once_flag mInitFlag; + + VkInstance mInstance; + VkPhysicalDevice mGPU; + VkDevice mDevice; + VkPhysicalDeviceMemoryProperties mMemoryProperties; + VkQueue mQueue; + VkCommandPool mCmdPool; +}; + +} // namespace rsov +} // namespace renderscript +} // namespace android + +#endif // RSOV_CONTEXT_H diff --git a/rsov/driver/rsovCore.cpp b/rsov/driver/rsovCore.cpp new file mode 100644 index 00000000..b51fa391 --- /dev/null +++ b/rsov/driver/rsovCore.cpp @@ -0,0 +1,322 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "rsovCore.h" + +#include +#include +#include +#include +#include +#include + +#include "cpu_ref/rsd_cpu.h" +#include "rsContext.h" +#include "rsovAllocation.h" +#include "rsovContext.h" +#include "rsovElement.h" +#include "rsovSampler.h" +#include "rsovScript.h" +#include "rsovScriptGroup.h" +#include "rsovType.h" + +namespace android { +namespace renderscript { + +namespace { +void SetPriority(const Context *rsc, int32_t priority) { + RSoVHal *dc = (RSoVHal *)rsc->mHal.drv; + + dc->mCpuRef->setPriority(priority); +} + +void Shutdown(Context *rsc) { + RSoVHal *dc = (RSoVHal *)rsc->mHal.drv; + delete dc->mCpuRef; + free(dc); + rsc->mHal.drv = nullptr; +} + +void *AllocRuntimeMem(size_t size, uint32_t flags) { + void* buffer = calloc(size, sizeof(char)); + return buffer; +} + +void FreeRuntimeMem(void* ptr) { + free(ptr); +} + +const RsdCpuReference::CpuSymbol *rsdLookupRuntimeStub( + Context *pContext, char const *name) { + return nullptr; +} + +RsdCpuReference::CpuScript *LookupScript(Context *, const Script *s) { + return (RsdCpuReference::CpuScript *)s->mHal.drv; +} + +} // anonymous namespace + +extern "C" bool rsdHalQueryHal(RsHalInitEnums entry, void **fnPtr) { + switch (entry) { + case RS_HAL_ALLOCATION_INIT: + fnPtr[0] = (void *)rsovAllocationInit; + break; + case RS_HAL_ALLOCATION_INIT_OEM: + fnPtr[0] = (void *)nullptr; + break; + case RS_HAL_ALLOCATION_INIT_ADAPTER: + fnPtr[0] = (void *)rsovAllocationAdapterInit; + break; + case RS_HAL_ALLOCATION_DESTROY: + fnPtr[0] = (void *)rsovAllocationDestroy; + break; + case RS_HAL_ALLOCATION_GET_GRALLOC_BITS: + fnPtr[0] = (void *)rsovAllocationGrallocBits; + break; + case RS_HAL_ALLOCATION_DATA_1D: + fnPtr[0] = (void *)rsovAllocationData1D; + break; + case RS_HAL_ALLOCATION_DATA_2D: + fnPtr[0] = (void *)rsovAllocationData2D; + break; + case RS_HAL_ALLOCATION_DATA_3D: + fnPtr[0] = (void *)rsovAllocationData3D; + break; + case RS_HAL_ALLOCATION_READ_1D: + fnPtr[0] = (void *)rsovAllocationRead1D; + break; + case RS_HAL_ALLOCATION_READ_2D: + fnPtr[0] = (void *)rsovAllocationRead2D; + break; + case RS_HAL_ALLOCATION_READ_3D: + fnPtr[0] = (void *)rsovAllocationRead3D; + break; + case RS_HAL_ALLOCATION_LOCK_1D: + fnPtr[0] = (void *)rsovAllocationLock1D; + break; + case RS_HAL_ALLOCATION_UNLOCK_1D: + fnPtr[0] = (void *)rsovAllocationUnlock1D; + break; + case RS_HAL_ALLOCATION_COPY_1D: + fnPtr[0] = (void *)rsovAllocationData1D_alloc; + break; + case RS_HAL_ALLOCATION_COPY_2D: + fnPtr[0] = (void *)rsovAllocationData2D_alloc; + break; + case RS_HAL_ALLOCATION_COPY_3D: + fnPtr[0] = (void *)rsovAllocationData3D_alloc; + break; + case RS_HAL_ALLOCATION_ADAPTER_OFFSET: + fnPtr[0] = (void *)rsovAllocationAdapterOffset; + break; + case RS_HAL_ALLOCATION_RESIZE: + fnPtr[0] = (void *)rsovAllocationResize; + break; + case RS_HAL_ALLOCATION_SYNC_ALL: + fnPtr[0] = (void *)rsovAllocationSyncAll; + break; + case RS_HAL_ALLOCATION_MARK_DIRTY: + fnPtr[0] = (void *)rsovAllocationMarkDirty; + break; + case RS_HAL_ALLOCATION_GENERATE_MIPMAPS: + fnPtr[0] = (void *)rsovAllocationGenerateMipmaps; + break; + case RS_HAL_ALLOCATION_UPDATE_CACHED_OBJECT: + fnPtr[0] = (void *)rsovAllocationUpdateCachedObject; + break; + case RS_HAL_ALLOCATION_GET_POINTER: + fnPtr[0] = (void *)nullptr; + break; + case RS_HAL_ALLOCATION_SET_SURFACE: + fnPtr[0] = (void *)rsovAllocationSetSurface; + break; + case RS_HAL_ALLOCATION_IO_SEND: + fnPtr[0] = (void *)rsovAllocationIoSend; + break; + case RS_HAL_ALLOCATION_IO_RECEIVE: + fnPtr[0] = (void *)rsovAllocationIoReceive; + break; + case RS_HAL_ALLOCATION_ELEMENT_DATA: + fnPtr[0] = (void *)rsovAllocationElementData; + break; + case RS_HAL_ALLOCATION_ELEMENT_READ: + fnPtr[0] = (void *)rsovAllocationElementRead; + break; + + case RS_HAL_CORE_SHUTDOWN: + fnPtr[0] = (void *)Shutdown; + break; + case RS_HAL_CORE_SET_PRIORITY: + fnPtr[0] = (void *)SetPriority; + break; + case RS_HAL_CORE_ALLOC_RUNTIME_MEM: + fnPtr[0] = (void *)AllocRuntimeMem; + break; + case RS_HAL_CORE_FREE_RUNTIME_MEM: + fnPtr[0] = (void *)FreeRuntimeMem; + break; + case RS_HAL_CORE_FINISH: + fnPtr[0] = (void *)nullptr; + break; + + case RS_HAL_SCRIPT_INIT: + fnPtr[0] = (void *)rsovScriptInit; + break; + case RS_HAL_SCRIPT_INIT_INTRINSIC: + fnPtr[0] = (void *)rsovInitIntrinsic; + break; + case RS_HAL_SCRIPT_INVOKE_FUNCTION: + fnPtr[0] = (void *)rsovScriptInvokeFunction; + break; + case RS_HAL_SCRIPT_INVOKE_ROOT: + fnPtr[0] = (void *)rsovScriptInvokeRoot; + break; + case RS_HAL_SCRIPT_INVOKE_FOR_EACH: + fnPtr[0] = (void *)rsovScriptInvokeForEach; + break; + case RS_HAL_SCRIPT_INVOKE_INIT: + fnPtr[0] = (void *)rsovScriptInvokeInit; + break; + case RS_HAL_SCRIPT_INVOKE_FREE_CHILDREN: + fnPtr[0] = (void *)rsovScriptInvokeFreeChildren; + break; + case RS_HAL_SCRIPT_DESTROY: + fnPtr[0] = (void *)rsovScriptDestroy; + break; + case RS_HAL_SCRIPT_SET_GLOBAL_VAR: + fnPtr[0] = (void *)rsovScriptSetGlobalVar; + break; + case RS_HAL_SCRIPT_GET_GLOBAL_VAR: + fnPtr[0] = (void *)rsovScriptGetGlobalVar; + break; + case RS_HAL_SCRIPT_SET_GLOBAL_VAR_WITH_ELEMENT_DIM: + fnPtr[0] = (void *)rsovScriptSetGlobalVarWithElemDims; + break; + case RS_HAL_SCRIPT_SET_GLOBAL_BIND: + fnPtr[0] = (void *)rsovScriptSetGlobalBind; + break; + case RS_HAL_SCRIPT_SET_GLOBAL_OBJECT: + fnPtr[0] = (void *)rsovScriptSetGlobalObj; + break; + case RS_HAL_SCRIPT_INVOKE_FOR_EACH_MULTI: + fnPtr[0] = (void *)rsovScriptInvokeForEachMulti; + break; + case RS_HAL_SCRIPT_UPDATE_CACHED_OBJECT: + fnPtr[0] = (void *)rsovScriptUpdateCachedObject; + break; + case RS_HAL_SCRIPT_INVOKE_REDUCE: + fnPtr[0] = (void *)rsovScriptInvokeReduce; + break; + + case RS_HAL_SAMPLER_INIT: + fnPtr[0] = (void *)rsovSamplerInit; + break; + case RS_HAL_SAMPLER_DESTROY: + fnPtr[0] = (void *)rsovSamplerDestroy; + break; + case RS_HAL_SAMPLER_UPDATE_CACHED_OBJECT: + fnPtr[0] = (void *)rsovSamplerUpdateCachedObject; + break; + + case RS_HAL_TYPE_INIT: + fnPtr[0] = (void *)rsovTypeInit; + break; + case RS_HAL_TYPE_DESTROY: + fnPtr[0] = (void *)rsovTypeDestroy; + break; + case RS_HAL_TYPE_UPDATE_CACHED_OBJECT: + fnPtr[0] = (void *)rsovTypeUpdateCachedObject; + break; + + case RS_HAL_ELEMENT_INIT: + fnPtr[0] = (void *)rsovElementInit; + break; + case RS_HAL_ELEMENT_DESTROY: + fnPtr[0] = (void *)rsovElementDestroy; + break; + case RS_HAL_ELEMENT_UPDATE_CACHED_OBJECT: + fnPtr[0] = (void *)rsovElementUpdateCachedObject; + break; + + case RS_HAL_SCRIPT_GROUP_INIT: + fnPtr[0] = (void *)rsovScriptGroupInit; + break; + case RS_HAL_SCRIPT_GROUP_DESTROY: + fnPtr[0] = (void *)rsovScriptGroupDestroy; + break; + case RS_HAL_SCRIPT_GROUP_UPDATE_CACHED_OBJECT: + fnPtr[0] = (void *)nullptr; + break; + case RS_HAL_SCRIPT_GROUP_SET_INPUT: + fnPtr[0] = (void *)rsovScriptGroupSetInput; + break; + case RS_HAL_SCRIPT_GROUP_SET_OUTPUT: + fnPtr[0] = (void *)rsovScriptGroupSetOutput; + break; + case RS_HAL_SCRIPT_GROUP_EXECUTE: + fnPtr[0] = (void *)rsovScriptGroupExecute; + break; + + // Ignore entries for the legacy graphics api, + + default: + ALOGE("ERROR: unknown RenderScript HAL API query, %i", entry); + return false; + } + + return true; +} + +extern "C" void rsdHalAbort(RsContext) {} + +extern "C" bool rsdHalQueryVersion(uint32_t *major, uint32_t *minor) { + *major = RS_HAL_VERSION; + *minor = 0; + return true; +} + +extern "C" bool rsdHalInit(RsContext c, uint32_t version_major, + uint32_t version_minor) { + Context *rsc = (Context *)c; + + RSoVHal *hal = new RSoVHal(); + if (!hal) { + ALOGE("Failed creating RSoV driver hal."); + return false; + } + rsc->mHal.drv = hal; + + hal->mCpuRef = RsdCpuReference::create(rsc, version_major, version_minor, + &rsdLookupRuntimeStub, &LookupScript); + if (!hal->mCpuRef) { + ALOGE("RsdCpuReference::create for driver hal failed."); + rsc->mHal.drv = nullptr; + return false; + } + + hal->mRSoV = android::renderscript::rsov::RSoVContext::create(); + if (!hal->mRSoV) { + ALOGE("RsdCpuReference::create for driver hal failed."); + rsc->mHal.drv = nullptr; + return false; + } + + return true; +} + +} // namespace renderscript +} // namespace android diff --git a/rsov/driver/rsovCore.h b/rsov/driver/rsovCore.h new file mode 100644 index 00000000..d0c160d5 --- /dev/null +++ b/rsov/driver/rsovCore.h @@ -0,0 +1,40 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef RSOV_CORE_H +#define RSOV_CORE_H + +namespace android { +namespace renderscript { + +class RsdCpuReference; + +namespace rsov { + +class RSoVContext; + +} // namespace rsov +} // namespace renderscript +} // namespace android + +struct RSoVHal { + android::renderscript::rsov::RSoVContext* mRSoV; + android::renderscript::RsdCpuReference *mCpuRef; +}; + +#define NELEM(x) (sizeof(x) / sizeof((x)[0])) + +#endif diff --git a/rsov/driver/rsovElement.cpp b/rsov/driver/rsovElement.cpp new file mode 100644 index 00000000..d6321b34 --- /dev/null +++ b/rsov/driver/rsovElement.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "rsContext.h" +#include "rsElement.h" + +using android::renderscript::Context; +using android::renderscript::Element; +using android::renderscript::rs_element; + +bool rsovElementInit(const Context *rsc, const Element *e) { return true; } + +void rsovElementDestroy(const Context *rsc, const Element *e) {} + +void rsovElementUpdateCachedObject(const Context *rsc, const Element *element, + rs_element *obj) { + obj->p = element; +#ifdef __LP64__ + obj->r = nullptr; + obj->v1 = nullptr; + obj->v2 = nullptr; +#endif +} diff --git a/rsov/driver/rsovElement.h b/rsov/driver/rsovElement.h new file mode 100644 index 00000000..e3d46bcd --- /dev/null +++ b/rsov/driver/rsovElement.h @@ -0,0 +1,33 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef RSOV_ELEMENT_H +#define RSOV_ELEMENT_H + +#include "rs_hal.h" + +extern bool rsovElementInit(const android::renderscript::Context *rsc, + const android::renderscript::Element *); + +extern void rsovElementDestroy(const android::renderscript::Context *rsc, + const android::renderscript::Element *); + +extern void rsovElementUpdateCachedObject( + const android::renderscript::Context *rsc, + const android::renderscript::Element *, + android::renderscript::rs_element *obj); + +#endif // RSOV_ELEMENT_H diff --git a/rsov/driver/rsovRuntimeStubs.cpp b/rsov/driver/rsovRuntimeStubs.cpp new file mode 100644 index 00000000..308c0a53 --- /dev/null +++ b/rsov/driver/rsovRuntimeStubs.cpp @@ -0,0 +1,1150 @@ +/* + * Copyright (C) 2011-2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "rsContext.h" +#include "rsElement.h" +#include "rsMatrix2x2.h" +#include "rsMatrix3x3.h" +#include "rsMatrix4x4.h" +#include "rsRuntime.h" +#include "rsScriptC.h" +#include "rsType.h" +#include "rsovAllocation.h" +#include "rsovCore.h" +#include "rsovScript.h" + +using namespace android; +using namespace android::renderscript; + +typedef __fp16 half; +typedef half half2 __attribute__((ext_vector_type(2))); +typedef half half3 __attribute__((ext_vector_type(3))); +typedef half half4 __attribute__((ext_vector_type(4))); + +typedef float float2 __attribute__((ext_vector_type(2))); +typedef float float3 __attribute__((ext_vector_type(3))); +typedef float float4 __attribute__((ext_vector_type(4))); +typedef double double2 __attribute__((ext_vector_type(2))); +typedef double double3 __attribute__((ext_vector_type(3))); +typedef double double4 __attribute__((ext_vector_type(4))); +typedef char char2 __attribute__((ext_vector_type(2))); +typedef char char3 __attribute__((ext_vector_type(3))); +typedef char char4 __attribute__((ext_vector_type(4))); +typedef unsigned char uchar2 __attribute__((ext_vector_type(2))); +typedef unsigned char uchar3 __attribute__((ext_vector_type(3))); +typedef unsigned char uchar4 __attribute__((ext_vector_type(4))); +typedef int16_t short2 __attribute__((ext_vector_type(2))); +typedef int16_t short3 __attribute__((ext_vector_type(3))); +typedef int16_t short4 __attribute__((ext_vector_type(4))); +typedef uint16_t ushort2 __attribute__((ext_vector_type(2))); +typedef uint16_t ushort3 __attribute__((ext_vector_type(3))); +typedef uint16_t ushort4 __attribute__((ext_vector_type(4))); +typedef int32_t int2 __attribute__((ext_vector_type(2))); +typedef int32_t int3 __attribute__((ext_vector_type(3))); +typedef int32_t int4 __attribute__((ext_vector_type(4))); +typedef uint32_t uint2 __attribute__((ext_vector_type(2))); +typedef uint32_t uint3 __attribute__((ext_vector_type(3))); +typedef uint32_t uint4 __attribute__((ext_vector_type(4))); +typedef int64_t long2 __attribute__((ext_vector_type(2))); +typedef int64_t long3 __attribute__((ext_vector_type(3))); +typedef int64_t long4 __attribute__((ext_vector_type(4))); +typedef uint64_t ulong2 __attribute__((ext_vector_type(2))); +typedef uint64_t ulong3 __attribute__((ext_vector_type(3))); +typedef uint64_t ulong4 __attribute__((ext_vector_type(4))); + +typedef uint8_t uchar; +typedef uint16_t ushort; +typedef uint32_t uint; +#ifndef RS_SERVER +typedef uint64_t ulong; +#endif + +// Add NOLINT to suppress wrong warnings from clang-tidy. +#ifndef __LP64__ +#define OPAQUETYPE(t) \ + typedef struct { \ + const int *const p; \ + } __attribute__((packed, aligned(4))) t; /*NOLINT*/ +#else +#define OPAQUETYPE(t) \ + typedef struct { \ + const void *p; \ + const void *r; \ + const void *v1; \ + const void *v2; \ + } t; /*NOLINT*/ +#endif + +OPAQUETYPE(rs_element) +OPAQUETYPE(rs_type) +OPAQUETYPE(rs_allocation) +OPAQUETYPE(rs_sampler) +OPAQUETYPE(rs_script) +OPAQUETYPE(rs_script_call) + +OPAQUETYPE(rs_program_fragment); +OPAQUETYPE(rs_program_store); +OPAQUETYPE(rs_program_vertex); +OPAQUETYPE(rs_program_raster); +OPAQUETYPE(rs_mesh); +OPAQUETYPE(rs_font); + +#undef OPAQUETYPE + +typedef enum { + // Empty to avoid conflicting definitions with RsAllocationCubemapFace +} rs_allocation_cubemap_face; + +typedef enum { + // Empty to avoid conflicting definitions with RsYuvFormat +} rs_yuv_format; + +typedef enum { + // Empty to avoid conflicting definitions with RsAllocationMipmapControl +} rs_allocation_mipmap_control; + +typedef struct { unsigned int val; } rs_allocation_usage_type; + +typedef struct { + int tm_sec; ///< seconds + int tm_min; ///< minutes + int tm_hour; ///< hours + int tm_mday; ///< day of the month + int tm_mon; ///< month + int tm_year; ///< year + int tm_wday; ///< day of the week + int tm_yday; ///< day of the year + int tm_isdst; ///< daylight savings time +} rs_tm; + +// Some RS functions are not threadsafe but can be called from an invoke +// function. Instead of summarily marking scripts that call these functions as +// not-threadable we detect calls to them in the driver and sends a fatal error +// message. +static bool failIfInKernel(Context *rsc, const char *funcName) { + RSoVHal *dc = (RSoVHal *)rsc->mHal.drv; + RsdCpuReference *impl = (RsdCpuReference *)dc->mCpuRef; + + if (impl->getInKernel()) { + char buf[256]; + snprintf(buf, sizeof(buf), + "Error: Call to unsupported function %s " + "in kernel", + funcName); + rsc->setError(RS_ERROR_FATAL_DRIVER, buf); + return true; + } + return false; +} + +////////////////////////////////////////////////////////////////////////////// +// Allocation routines +////////////////////////////////////////////////////////////////////////////// +#if defined(__i386__) || (defined(__mips__) && __mips == 32) +// i386 and MIPS32 have different struct return passing to ARM; emulate with a +// pointer +const Allocation *rsGetAllocation(const void *ptr) { + Context *rsc = RsdCpuReference::getTlsContext(); + const Script *sc = RsdCpuReference::getTlsScript(); + Allocation *alloc = rsovScriptGetAllocationForPointer(rsc, sc, ptr); + android::renderscript::rs_allocation obj = {0}; + alloc->callUpdateCacheObject(rsc, &obj); + return (Allocation *)obj.p; +} +#else +const android::renderscript::rs_allocation rsGetAllocation(const void *ptr) { + Context *rsc = RsdCpuReference::getTlsContext(); + const Script *sc = RsdCpuReference::getTlsScript(); + Allocation *alloc = rsovScriptGetAllocationForPointer(rsc, sc, ptr); + +#ifndef __LP64__ // ARMv7 + android::renderscript::rs_allocation obj = {0}; +#else // AArch64/x86_64/MIPS64 + android::renderscript::rs_allocation obj = {0, 0, 0, 0}; +#endif + alloc->callUpdateCacheObject(rsc, &obj); + return obj; +} +#endif + +void __attribute__((overloadable)) rsAllocationIoSend(::rs_allocation a) { + Context *rsc = RsdCpuReference::getTlsContext(); + if (failIfInKernel(rsc, "rsAllocationIoSend")) return; + rsrAllocationIoSend(rsc, (Allocation *)a.p); +} + +void __attribute__((overloadable)) rsAllocationIoReceive(::rs_allocation a) { + Context *rsc = RsdCpuReference::getTlsContext(); + if (failIfInKernel(rsc, "rsAllocationIoReceive")) return; + rsrAllocationIoReceive(rsc, (Allocation *)a.p); +} + +void __attribute__((overloadable)) +rsAllocationCopy1DRange(::rs_allocation dstAlloc, uint32_t dstOff, + uint32_t dstMip, uint32_t count, + ::rs_allocation srcAlloc, uint32_t srcOff, + uint32_t srcMip) { + Context *rsc = RsdCpuReference::getTlsContext(); + if (failIfInKernel(rsc, "rsAllocationCopy1DRange")) return; + rsrAllocationCopy1DRange(rsc, (Allocation *)dstAlloc.p, dstOff, dstMip, count, + (Allocation *)srcAlloc.p, srcOff, srcMip); +} + +void __attribute__((overloadable)) +rsAllocationCopy2DRange(::rs_allocation dstAlloc, uint32_t dstXoff, + uint32_t dstYoff, uint32_t dstMip, + rs_allocation_cubemap_face dstFace, uint32_t width, + uint32_t height, ::rs_allocation srcAlloc, + uint32_t srcXoff, uint32_t srcYoff, uint32_t srcMip, + rs_allocation_cubemap_face srcFace) { + Context *rsc = RsdCpuReference::getTlsContext(); + if (failIfInKernel(rsc, "rsAllocationCopy2DRange")) return; + rsrAllocationCopy2DRange( + rsc, (Allocation *)dstAlloc.p, dstXoff, dstYoff, dstMip, dstFace, width, + height, (Allocation *)srcAlloc.p, srcXoff, srcYoff, srcMip, srcFace); +} + +static android::renderscript::rs_element CreateElement(RsDataType dt, + RsDataKind dk, + bool isNormalized, + uint32_t vecSize) { + Context *rsc = RsdCpuReference::getTlsContext(); + + // No need for validation here. The rsCreateElement overload below is not + // exposed to the Script. The Element-creation APIs call this function in a + // consistent manner and rsComponent.cpp asserts on any inconsistency. + Element *element = + (Element *)rsrElementCreate(rsc, dt, dk, isNormalized, vecSize); + android::renderscript::rs_element obj = {}; + if (element == nullptr) return obj; + element->callUpdateCacheObject(rsc, &obj); + + // Any new rsObject created from inside a script should have the usrRefCount + // initialized to 0 and the sysRefCount initialized to 1. + element->incSysRef(); + element->decUserRef(); + + return obj; +} + +static android::renderscript::rs_type CreateType(RsElement element, + uint32_t dimX, uint32_t dimY, + uint32_t dimZ, bool mipmaps, + bool faces, + uint32_t yuv_format) { + Context *rsc = RsdCpuReference::getTlsContext(); + android::renderscript::rs_type obj = {}; + + if (element == nullptr) { + ALOGE("rs_type creation error: Invalid element"); + return obj; + } + + // validate yuv_format + RsYuvFormat yuv = (RsYuvFormat)yuv_format; + if (yuv != RS_YUV_NONE && yuv != RS_YUV_YV12 && yuv != RS_YUV_NV21 && + yuv != RS_YUV_420_888) { + ALOGE("rs_type creation error: Invalid yuv_format %d\n", yuv_format); + return obj; + } + + // validate consistency of shape parameters + if (dimZ > 0) { + if (dimX < 1 || dimY < 1) { + ALOGE( + "rs_type creation error: Both X and Y dimension required " + "when Z is present."); + return obj; + } + if (mipmaps) { + ALOGE("rs_type creation error: mipmap control requires 2D types"); + return obj; + } + if (faces) { + ALOGE("rs_type creation error: Cube maps require 2D types"); + return obj; + } + } + if (dimY > 0 && dimX < 1) { + ALOGE( + "rs_type creation error: X dimension required when Y is " + "present."); + return obj; + } + if (mipmaps && dimY < 1) { + ALOGE("rs_type creation error: mipmap control require 2D Types."); + return obj; + } + if (faces && dimY < 1) { + ALOGE("rs_type creation error: Cube maps require 2D Types."); + return obj; + } + if (yuv_format != RS_YUV_NONE) { + if (dimZ != 0 || dimY == 0 || faces || mipmaps) { + ALOGE("rs_type creation error: YUV only supports basic 2D."); + return obj; + } + } + + Type *type = (Type *)rsrTypeCreate(rsc, element, dimX, dimY, dimZ, mipmaps, + faces, yuv_format); + if (type == nullptr) return obj; + type->callUpdateCacheObject(rsc, &obj); + + // Any new rsObject created from inside a script should have the usrRefCount + // initialized to 0 and the sysRefCount initialized to 1. + type->incSysRef(); + type->decUserRef(); + + return obj; +} + +static android::renderscript::rs_allocation CreateAllocation( + RsType type, RsAllocationMipmapControl mipmaps, uint32_t usages, + void *ptr) { + Context *rsc = RsdCpuReference::getTlsContext(); + android::renderscript::rs_allocation obj = {}; + + if (type == nullptr) { + ALOGE("rs_allocation creation error: Invalid type"); + return obj; + } + + uint32_t validUsages = + RS_ALLOCATION_USAGE_SCRIPT | RS_ALLOCATION_USAGE_GRAPHICS_TEXTURE; + if (usages & ~validUsages) { + ALOGE("rs_allocation creation error: Invalid usage flag"); + return obj; + } + + Allocation *alloc = (Allocation *)rsrAllocationCreateTyped( + rsc, type, mipmaps, usages, (uintptr_t)ptr); + if (alloc == nullptr) return obj; + alloc->callUpdateCacheObject(rsc, &obj); + + // Any new rsObject created from inside a script should have the usrRefCount + // initialized to 0 and the sysRefCount initialized to 1. + alloc->incSysRef(); + alloc->decUserRef(); + + return obj; +} + +// Define rsCreateElement, rsCreateType and rsCreateAllocation entry points +// differently for 32-bit x86 and Mips. The definitions for ARM32 and all +// 64-bit architectures is further below. +#if defined(__i386__) || (defined(__mips__) && __mips == 32) + +// The calling convention for the driver on 32-bit x86 and Mips returns +// rs_element etc. as a stack-return parameter. The Script uses ARM32 calling +// conventions that return the structs in a register. To match this convention, +// emulate the return value using a pointer. +Element *rsCreateElement(int32_t dt, int32_t dk, bool isNormalized, + uint32_t vecSize) { + android::renderscript::rs_element obj = + CreateElement((RsDataType)dt, (RsDataKind)dk, isNormalized, vecSize); + return (Element *)obj.p; +} + +Type *rsCreateType(::rs_element element, uint32_t dimX, uint32_t dimY, + uint32_t dimZ, bool mipmaps, bool faces, + rs_yuv_format yuv_format) { + android::renderscript::rs_type obj = + CreateType((RsElement)element.p, dimX, dimY, dimZ, mipmaps, faces, + (RsYuvFormat)yuv_format); + return (Type *)obj.p; +} + +Allocation *rsCreateAllocation(::rs_type type, + rs_allocation_mipmap_control mipmaps, + uint32_t usages, void *ptr) { + android::renderscript::rs_allocation obj; + obj = CreateAllocation((RsType)type.p, (RsAllocationMipmapControl)mipmaps, + usages, ptr); + return (Allocation *)obj.p; +} + +#else +android::renderscript::rs_element rsCreateElement(int32_t dt, int32_t dk, + bool isNormalized, + uint32_t vecSize) { + return CreateElement((RsDataType)dt, (RsDataKind)dk, isNormalized, vecSize); +} + +android::renderscript::rs_type rsCreateType(::rs_element element, uint32_t dimX, + uint32_t dimY, uint32_t dimZ, + bool mipmaps, bool faces, + rs_yuv_format yuv_format) { + return CreateType((RsElement)element.p, dimX, dimY, dimZ, mipmaps, faces, + yuv_format); +} + +android::renderscript::rs_allocation rsCreateAllocation( + ::rs_type type, rs_allocation_mipmap_control mipmaps, uint32_t usages, + void *ptr) { + return CreateAllocation((RsType)type.p, (RsAllocationMipmapControl)mipmaps, + usages, ptr); +} +#endif + +////////////////////////////////////////////////////////////////////////////// +// Object routines +////////////////////////////////////////////////////////////////////////////// +// Add NOLINT to suppress wrong warnings from clang-tidy. +#define IS_CLEAR_SET_OBJ(t) \ + bool rsIsObject(t src) { return src.p != nullptr; } \ + void __attribute__((overloadable)) rsClearObject(t *dst) { /*NOLINT*/ \ + rsrClearObject(reinterpret_cast(dst)); \ + } \ + void __attribute__((overloadable)) rsSetObject(t *dst, t src) { /*NOLINT*/ \ + Context *rsc = RsdCpuReference::getTlsContext(); \ + rsrSetObject(rsc, reinterpret_cast(dst), \ + (ObjectBase *)src.p); \ + } + +IS_CLEAR_SET_OBJ(::rs_element) +IS_CLEAR_SET_OBJ(::rs_type) +IS_CLEAR_SET_OBJ(::rs_allocation) +IS_CLEAR_SET_OBJ(::rs_sampler) +IS_CLEAR_SET_OBJ(::rs_script) + +IS_CLEAR_SET_OBJ(::rs_mesh) +IS_CLEAR_SET_OBJ(::rs_program_fragment) +IS_CLEAR_SET_OBJ(::rs_program_vertex) +IS_CLEAR_SET_OBJ(::rs_program_raster) +IS_CLEAR_SET_OBJ(::rs_program_store) +IS_CLEAR_SET_OBJ(::rs_font) + +#undef IS_CLEAR_SET_OBJ + +////////////////////////////////////////////////////////////////////////////// +// Element routines +////////////////////////////////////////////////////////////////////////////// +static void *ElementAt(Allocation *a, RsDataType dt, uint32_t vecSize, + uint32_t x, uint32_t y, uint32_t z) { + Context *rsc = RsdCpuReference::getTlsContext(); + const Type *t = a->getType(); + const Element *e = t->getElement(); + + char buf[256]; + if (x && (x >= t->getLODDimX(0))) { + snprintf(buf, sizeof(buf), "Out range ElementAt X %i of %i", x, + t->getLODDimX(0)); + rsc->setError(RS_ERROR_FATAL_DEBUG, buf); + return nullptr; + } + + if (y && (y >= t->getLODDimY(0))) { + snprintf(buf, sizeof(buf), "Out range ElementAt Y %i of %i", y, + t->getLODDimY(0)); + rsc->setError(RS_ERROR_FATAL_DEBUG, buf); + return nullptr; + } + + if (z && (z >= t->getLODDimZ(0))) { + snprintf(buf, sizeof(buf), "Out range ElementAt Z %i of %i", z, + t->getLODDimZ(0)); + rsc->setError(RS_ERROR_FATAL_DEBUG, buf); + return nullptr; + } + + if (vecSize > 0) { + if (vecSize != e->getVectorSize()) { + snprintf(buf, sizeof(buf), "Vector size mismatch for ElementAt %i of %i", + vecSize, e->getVectorSize()); + rsc->setError(RS_ERROR_FATAL_DEBUG, buf); + return nullptr; + } + + if (dt != e->getType()) { + snprintf(buf, sizeof(buf), "Data type mismatch for ElementAt %i of %i", + dt, e->getType()); + rsc->setError(RS_ERROR_FATAL_DEBUG, buf); + return nullptr; + } + } + + uint8_t *p = (uint8_t *)a->mHal.drvState.lod[0].mallocPtr; + const uint32_t eSize = e->getSizeBytes(); + const uint32_t stride = a->mHal.drvState.lod[0].stride; + const uint32_t dimY = a->mHal.drvState.lod[0].dimY; + return &p[(x * eSize) + (y * stride) + (z * stride * dimY)]; +} + +void rsSetElementAt(::rs_allocation a, const void *ptr, uint32_t x, uint32_t y, + uint32_t z) { + const Type *t = const_cast((Allocation *)a.p)->getType(); + const Element *e = t->getElement(); + void *tmp = ElementAt((Allocation *)a.p, RS_TYPE_UNSIGNED_8, 0, x, y, z); + if (tmp != nullptr) memcpy(tmp, ptr, e->getSizeBytes()); +} + +void rsSetElementAt(::rs_allocation a, const void *ptr, uint32_t x, + uint32_t y) { + rsSetElementAt(a, ptr, x, y, 0); +} + +void rsSetElementAt(::rs_allocation a, const void *ptr, uint32_t x) { + rsSetElementAt(a, ptr, x, 0, 0); +} + +const void *rsGetElementAt(::rs_allocation a, uint32_t x, uint32_t y, + uint32_t z) { + return ElementAt((Allocation *)a.p, RS_TYPE_UNSIGNED_8, 0, x, y, z); +} + +const void *rsGetElementAt(::rs_allocation a, uint32_t x, uint32_t y) { + return rsGetElementAt(a, x, y, 0); +} + +const void *rsGetElementAt(::rs_allocation a, uint32_t x) { + return rsGetElementAt(a, x, 0, 0); +} + +// Add NOLINT to suppress wrong warnings from clang-tidy. +#define ELEMENT_AT(T, DT, VS) \ + void rsSetElementAt_##T(::rs_allocation a, const T *val, uint32_t x, \ + uint32_t y, uint32_t z) { \ + void *r = ElementAt((Allocation *)a.p, DT, VS, x, y, z); \ + if (r != nullptr) \ + ((T *)r)[0] = *val; \ + else \ + ALOGE("Error from %s", __PRETTY_FUNCTION__); \ + } \ + void rsSetElementAt_##T(::rs_allocation a, const T *val, uint32_t x, \ + uint32_t y) { \ + rsSetElementAt_##T(a, val, x, y, 0); \ + } \ + void rsSetElementAt_##T(::rs_allocation a, const T *val, uint32_t x) { \ + rsSetElementAt_##T(a, val, x, 0, 0); \ + } \ + void rsGetElementAt_##T(::rs_allocation a, T *val, uint32_t x, uint32_t y, \ + uint32_t z) { /*NOLINT*/ \ + void *r = ElementAt((Allocation *)a.p, DT, VS, x, y, z); \ + if (r != nullptr) \ + *val = ((T *)r)[0]; \ + else \ + ALOGE("Error from %s", __PRETTY_FUNCTION__); \ + } \ + void rsGetElementAt_##T(::rs_allocation a, T *val, uint32_t x, \ + uint32_t y) { /*NOLINT*/ \ + rsGetElementAt_##T(a, val, x, y, 0); \ + } \ + void rsGetElementAt_##T(::rs_allocation a, T *val, uint32_t x) { /*NOLINT*/ \ + rsGetElementAt_##T(a, val, x, 0, 0); \ + } + +ELEMENT_AT(char, RS_TYPE_SIGNED_8, 1) +ELEMENT_AT(char2, RS_TYPE_SIGNED_8, 2) +ELEMENT_AT(char3, RS_TYPE_SIGNED_8, 3) +ELEMENT_AT(char4, RS_TYPE_SIGNED_8, 4) +ELEMENT_AT(uchar, RS_TYPE_UNSIGNED_8, 1) +ELEMENT_AT(uchar2, RS_TYPE_UNSIGNED_8, 2) +ELEMENT_AT(uchar3, RS_TYPE_UNSIGNED_8, 3) +ELEMENT_AT(uchar4, RS_TYPE_UNSIGNED_8, 4) +ELEMENT_AT(short, RS_TYPE_SIGNED_16, 1) +ELEMENT_AT(short2, RS_TYPE_SIGNED_16, 2) +ELEMENT_AT(short3, RS_TYPE_SIGNED_16, 3) +ELEMENT_AT(short4, RS_TYPE_SIGNED_16, 4) +ELEMENT_AT(ushort, RS_TYPE_UNSIGNED_16, 1) +ELEMENT_AT(ushort2, RS_TYPE_UNSIGNED_16, 2) +ELEMENT_AT(ushort3, RS_TYPE_UNSIGNED_16, 3) +ELEMENT_AT(ushort4, RS_TYPE_UNSIGNED_16, 4) +ELEMENT_AT(int, RS_TYPE_SIGNED_32, 1) +ELEMENT_AT(int2, RS_TYPE_SIGNED_32, 2) +ELEMENT_AT(int3, RS_TYPE_SIGNED_32, 3) +ELEMENT_AT(int4, RS_TYPE_SIGNED_32, 4) +ELEMENT_AT(uint, RS_TYPE_UNSIGNED_32, 1) +ELEMENT_AT(uint2, RS_TYPE_UNSIGNED_32, 2) +ELEMENT_AT(uint3, RS_TYPE_UNSIGNED_32, 3) +ELEMENT_AT(uint4, RS_TYPE_UNSIGNED_32, 4) +ELEMENT_AT(long, RS_TYPE_SIGNED_64, 1) +ELEMENT_AT(long2, RS_TYPE_SIGNED_64, 2) +ELEMENT_AT(long3, RS_TYPE_SIGNED_64, 3) +ELEMENT_AT(long4, RS_TYPE_SIGNED_64, 4) +ELEMENT_AT(ulong, RS_TYPE_UNSIGNED_64, 1) +ELEMENT_AT(ulong2, RS_TYPE_UNSIGNED_64, 2) +ELEMENT_AT(ulong3, RS_TYPE_UNSIGNED_64, 3) +ELEMENT_AT(ulong4, RS_TYPE_UNSIGNED_64, 4) +ELEMENT_AT(half, RS_TYPE_FLOAT_16, 1) +ELEMENT_AT(half2, RS_TYPE_FLOAT_16, 2) +ELEMENT_AT(half3, RS_TYPE_FLOAT_16, 3) +ELEMENT_AT(half4, RS_TYPE_FLOAT_16, 4) +ELEMENT_AT(float, RS_TYPE_FLOAT_32, 1) +ELEMENT_AT(float2, RS_TYPE_FLOAT_32, 2) +ELEMENT_AT(float3, RS_TYPE_FLOAT_32, 3) +ELEMENT_AT(float4, RS_TYPE_FLOAT_32, 4) +ELEMENT_AT(double, RS_TYPE_FLOAT_64, 1) +ELEMENT_AT(double2, RS_TYPE_FLOAT_64, 2) +ELEMENT_AT(double3, RS_TYPE_FLOAT_64, 3) +ELEMENT_AT(double4, RS_TYPE_FLOAT_64, 4) + +#undef ELEMENT_AT + +#ifndef __LP64__ +/* + * We miss some symbols for rs{Get,Set}Element_long,ulong variants because 64 + * bit integer values are 'long' in RS-land but might be 'long long' in the + * driver. Define native_long* and native_ulong* types to be vectors of + * 'long' as seen by the driver and define overloaded versions of + * rsSetElementAt_* and rsGetElementAt_*. This should get us the correct + * mangled names in the driver. + */ + +typedef long native_long2 __attribute__((ext_vector_type(2))); +typedef long native_long3 __attribute__((ext_vector_type(3))); +typedef long native_long4 __attribute__((ext_vector_type(4))); +typedef unsigned long native_ulong2 __attribute__((ext_vector_type(2))); +typedef unsigned long native_ulong3 __attribute__((ext_vector_type(3))); +typedef unsigned long native_ulong4 __attribute__((ext_vector_type(4))); + +// Add NOLINT to suppress wrong warnings from clang-tidy. +#define ELEMENT_AT_OVERLOADS(T, U) \ + void rsSetElementAt_##T(::rs_allocation a, const U *val, uint32_t x, \ + uint32_t y, uint32_t z) { \ + rsSetElementAt_##T(a, (T *)val, x, y, z); \ + } \ + void rsSetElementAt_##T(::rs_allocation a, const U *val, uint32_t x, \ + uint32_t y) { \ + rsSetElementAt_##T(a, (T *)val, x, y, 0); \ + } \ + void rsSetElementAt_##T(::rs_allocation a, const U *val, uint32_t x) { \ + rsSetElementAt_##T(a, (T *)val, x, 0, 0); \ + } \ + void rsGetElementAt_##T(::rs_allocation a, U *val, uint32_t x, uint32_t y, \ + uint32_t z) { /*NOLINT*/ \ + rsGetElementAt_##T(a, (T *)val, x, y, z); \ + } \ + void rsGetElementAt_##T(::rs_allocation a, U *val, uint32_t x, \ + uint32_t y) { /*NOLINT*/ \ + rsGetElementAt_##T(a, (T *)val, x, y, 0); \ + } \ + void rsGetElementAt_##T(::rs_allocation a, U *val, uint32_t x) { /*NOLINT*/ \ + rsGetElementAt_##T(a, (T *)val, x, 0, 0); \ + } + +ELEMENT_AT_OVERLOADS(long2, native_long2) +ELEMENT_AT_OVERLOADS(long3, native_long3) +ELEMENT_AT_OVERLOADS(long4, native_long4) +ELEMENT_AT_OVERLOADS(ulong, unsigned long) +ELEMENT_AT_OVERLOADS(ulong2, native_ulong2) +ELEMENT_AT_OVERLOADS(ulong3, native_ulong3) +ELEMENT_AT_OVERLOADS(ulong4, native_ulong4) + +// We also need variants of rs{Get,Set}ElementAt_long that take 'long long *' as +// we might have this overloaded variant in old APKs. +ELEMENT_AT_OVERLOADS(long, long long) + +#undef ELEMENT_AT_OVERLOADS +#endif + +////////////////////////////////////////////////////////////////////////////// +// ForEach routines +////////////////////////////////////////////////////////////////////////////// +void rsForEachInternal(int slot, rs_script_call *options, int hasOutput, + int numInputs, ::rs_allocation *allocs) { + Context *rsc = RsdCpuReference::getTlsContext(); + Script *s = const_cast