diff options
Diffstat (limited to 'src/libANGLE')
24 files changed, 1868 insertions, 371 deletions
diff --git a/src/libANGLE/CLPlatform.cpp b/src/libANGLE/CLPlatform.cpp index 9997f51dec..29c5db1ecb 100644 --- a/src/libANGLE/CLPlatform.cpp +++ b/src/libANGLE/CLPlatform.cpp @@ -254,7 +254,8 @@ Platform::~Platform() = default; Platform::Platform(const rx::CLPlatformImpl::CreateFunc &createFunc) : mImpl(createFunc(*this)), mInfo(mImpl->createInfo()), - mDevices(createDevices(mImpl->createDevices())) + mDevices(createDevices(mImpl->createDevices())), + mMultiThreadPool(angle::WorkerThreadPool::Create(0, ANGLEPlatformCurrent())) {} DevicePtrs Platform::createDevices(rx::CLDeviceImpl::CreateDatas &&createDatas) diff --git a/src/libANGLE/CLPlatform.h b/src/libANGLE/CLPlatform.h index e19ed2a072..7540b96e28 100644 --- a/src/libANGLE/CLPlatform.h +++ b/src/libANGLE/CLPlatform.h @@ -10,6 +10,9 @@ #define LIBANGLE_CLPLATFORM_H_ #include "libANGLE/CLObject.h" + +#include "common/WorkerThread.h" + #include "libANGLE/renderer/CLPlatformImpl.h" #include "anglebase/no_destructor.h" @@ -71,6 +74,8 @@ class Platform final : public _cl_platform_id, public Object static constexpr const char *GetVendor(); + const std::shared_ptr<angle::WorkerThreadPool> &getMultiThreadPool() const; + private: explicit Platform(const rx::CLPlatformImpl::CreateFunc &createFunc); @@ -81,6 +86,7 @@ class Platform final : public _cl_platform_id, public Object const rx::CLPlatformImpl::Ptr mImpl; const rx::CLPlatformImpl::Info mInfo; const DevicePtrs mDevices; + std::shared_ptr<angle::WorkerThreadPool> mMultiThreadPool; static constexpr char kVendor[] = "ANGLE"; static constexpr char kIcdSuffix[] = "ANGLE"; @@ -139,6 +145,11 @@ constexpr const char *Platform::GetVendor() return kVendor; } +inline const std::shared_ptr<angle::WorkerThreadPool> &Platform::getMultiThreadPool() const +{ + return mMultiThreadPool; +} + inline PlatformPtrs &Platform::GetPointers() { static angle::base::NoDestructor<PlatformPtrs> sPointers; diff --git a/src/libANGLE/CLProgram.h b/src/libANGLE/CLProgram.h index b4ff886672..bcd7de6840 100644 --- a/src/libANGLE/CLProgram.h +++ b/src/libANGLE/CLProgram.h @@ -10,6 +10,7 @@ #include "libANGLE/CLDevice.h" #include "libANGLE/CLKernel.h" +#include "libANGLE/cl_utils.h" #include "libANGLE/renderer/CLProgramImpl.h" #include "common/Spinlock.h" @@ -61,6 +62,7 @@ class Program final : public _cl_program, public Object Context &getContext(); const Context &getContext() const; const DevicePtrs &getDevices() const; + const std::string &getSource() const; bool hasDevice(const _cl_device_id *device) const; bool isBuilding() const; @@ -123,6 +125,11 @@ inline const DevicePtrs &Program::getDevices() const return mDevices; } +inline const std::string &Program::getSource() const +{ + return mSource; +} + inline bool Program::hasDevice(const _cl_device_id *device) const { return std::find(mDevices.cbegin(), mDevices.cend(), device) != mDevices.cend(); @@ -130,7 +137,17 @@ inline bool Program::hasDevice(const _cl_device_id *device) const inline bool Program::isBuilding() const { - return mCallback->first != nullptr; + for (const DevicePtr &device : getDevices()) + { + cl_build_status buildStatus; + ANGLE_CL_IMPL_TRY(getBuildInfo(device->getNative(), ProgramBuildInfo::Status, + sizeof(cl_build_status), &buildStatus, nullptr)); + if ((mCallback->first != nullptr) || (buildStatus == CL_BUILD_IN_PROGRESS)) + { + return true; + } + } + return false; } inline bool Program::hasAttachedKernels() const diff --git a/src/libANGLE/Display.h b/src/libANGLE/Display.h index 536347df54..af7be36587 100644 --- a/src/libANGLE/Display.h +++ b/src/libANGLE/Display.h @@ -309,6 +309,7 @@ class Display final : public LabeledObject, egl::Sync *getSync(egl::SyncID syncID); const SyncMap &getSyncsForCapture() const { return mSyncMap; } + const ImageMap &getImagesForCapture() const { return mImageMap; } // Initialize thread-local variables used by the Display and its backing implementations. This // includes: diff --git a/src/libANGLE/capture/FrameCapture.cpp b/src/libANGLE/capture/FrameCapture.cpp index c10574bac6..79ffcedcfd 100644 --- a/src/libANGLE/capture/FrameCapture.cpp +++ b/src/libANGLE/capture/FrameCapture.cpp @@ -1464,6 +1464,43 @@ void MaybeResetResources(gl::ContextID contextID, } break; } + case ResourceIDType::Image: + { + TrackedResource &trackedEGLImages = + resourceTracker->getTrackedResource(contextID, ResourceIDType::Image); + ResourceSet &newEGLImages = trackedEGLImages.getNewResources(); + ResourceSet &eglImagesToDelete = trackedEGLImages.getResourcesToDelete(); + ResourceSet &eglImagesToRegen = trackedEGLImages.getResourcesToRegen(); + ResourceCalls &eglImageRegenCalls = trackedEGLImages.getResourceRegenCalls(); + + if (!newEGLImages.empty() || !eglImagesToDelete.empty()) + { + for (GLuint oldResource : eglImagesToDelete) + { + out << " DestroyEGLImageKHR(gEGLDisplay, gEGLImageMap2[" << oldResource + << "], " << oldResource << ");\n"; + } + + for (GLuint newResource : newEGLImages) + { + out << " DestroyEGLImageKHR(gEGLDisplay, gEGLImageMap2[" << newResource + << "], " << newResource << ");\n"; + } + } + // If any of our starting EGLImages were deleted during the run, recreate them + for (GLuint id : eglImagesToRegen) + { + // Emit their regen calls + for (CallCapture &call : eglImageRegenCalls[id]) + { + out << " "; + WriteCppReplayForCall(call, replayWriter, out, header, binaryData, + maxResourceIDBufferSize); + out << ";\n"; + } + } + break; + } default: // TODO (http://anglebug.com/4599): Reset more resource types break; @@ -3419,18 +3456,6 @@ void CaptureTextureContents(std::vector<CallCapture> *setupCalls, return; } - if (index.getType() == gl::TextureType::External) - { - // The generated glTexImage2D call is for creating the staging texture - Capture(setupCalls, - CaptureTexImage2D(*replayState, true, gl::TextureTarget::_2D, index.getLevelIndex(), - format.internalFormat, desc.size.width, desc.size.height, 0, - format.format, format.type, data)); - - // For external textures, we're done - return; - } - bool is3D = (index.getType() == gl::TextureType::_3D || index.getType() == gl::TextureType::_2DArray || index.getType() == gl::TextureType::CubeMapArray); @@ -3564,15 +3589,54 @@ void CaptureCustomFenceSync(CallCapture &call, std::vector<CallCapture> &callsOu callsOut.emplace_back(std::move(call)); } -void CaptureCustomCreateEGLImage(const char *name, +const egl::Image *GetImageFromParam(const gl::Context *context, const ParamCapture ¶m) +{ + const egl::ImageID eglImageID = egl::PackParam<egl::ImageID>(param.value.EGLImageVal); + const egl::Image *eglImage = context->getDisplay()->getImage(eglImageID); + ASSERT(eglImage != nullptr); + return eglImage; +} + +void CaptureCustomCreateEGLImage(const gl::Context *context, + const char *name, + size_t width, + size_t height, CallCapture &call, std::vector<CallCapture> &callsOut) { - ParamBuffer &¶ms = std::move(call.params); - EGLImage returnVal = params.getReturnValue().value.EGLImageVal; - egl::ImageID imageID = egl::PackParam<egl::ImageID>(returnVal); + ParamBuffer &¶ms = std::move(call.params); + EGLImage returnVal = params.getReturnValue().value.EGLImageVal; + egl::ImageID imageID = egl::PackParam<egl::ImageID>(returnVal); + call.customFunctionName = name; + + // Clear client buffer value if it is a pointer to a hardware buffer. It is + // not used by replay and will not be portable to 32-bit builds + if (params.getParam("target", ParamType::TEGLenum, 2).value.EGLenumVal == + EGL_NATIVE_BUFFER_ANDROID) + { + params.setValueParamAtIndex("buffer", ParamType::TEGLClientBuffer, + reinterpret_cast<EGLClientBuffer>(static_cast<uintptr_t>(0)), + 3); + } + + // Record image dimensions in case a backing resource needs to be created during replay + params.addValueParam("width", ParamType::TGLsizei, static_cast<GLsizei>(width)); + params.addValueParam("height", ParamType::TGLsizei, static_cast<GLsizei>(height)); + params.addValueParam("image", ParamType::TGLuint, imageID.value); + callsOut.emplace_back(std::move(call)); +} + +void CaptureCustomDestroyEGLImage(const char *name, + CallCapture &call, + std::vector<CallCapture> &callsOut) +{ call.customFunctionName = name; + ParamBuffer &¶ms = std::move(call.params); + + const ParamCapture &imageID = params.getParam("imagePacked", ParamType::TImageID, 1); + params.addValueParam("imageID", ParamType::TGLuint, imageID.value.ImageIDVal.value); + callsOut.emplace_back(std::move(call)); } @@ -4150,6 +4214,36 @@ void CaptureShareGroupMidExecutionSetup( replayState.getMutablePrivateStateForCapture()->setUnpackAlignment(1); } + const egl::ImageMap eglImageMap = context->getDisplay()->getImagesForCapture(); + for (const auto &[eglImageID, eglImage] : eglImageMap) + { + // Track this as a starting resource that may need to be restored. + TrackedResource &trackedImages = + resourceTracker->getTrackedResource(context->id(), ResourceIDType::Image); + trackedImages.getStartingResources().insert(eglImageID); + + ResourceCalls &imageRegenCalls = trackedImages.getResourceRegenCalls(); + CallVector imageGenCalls({setupCalls, &imageRegenCalls[eglImageID]}); + + auto eglImageAttribIter = resourceTracker->getImageToAttribTable().find( + reinterpret_cast<EGLImage>(static_cast<uintptr_t>(eglImageID))); + ASSERT(eglImageAttribIter != resourceTracker->getImageToAttribTable().end()); + const egl::AttributeMap &attribs = eglImageAttribIter->second; + + for (std::vector<CallCapture> *calls : imageGenCalls) + { + // Create the image on demand with the same attrib retrieved above + CallCapture eglCreateImageKHRCall = egl::CaptureCreateImageKHR( + nullptr, true, nullptr, context->id(), EGL_GL_TEXTURE_2D, + reinterpret_cast<EGLClientBuffer>(static_cast<uintptr_t>(0)), attribs, + reinterpret_cast<EGLImage>(static_cast<uintptr_t>(eglImageID))); + + // Convert the CaptureCreateImageKHR CallCapture to the customized CallCapture + CaptureCustomCreateEGLImage(context, "CreateEGLImageKHR", eglImage->getWidth(), + eglImage->getHeight(), eglCreateImageKHRCall, *calls); + } + } + // Capture Texture setup and data. const gl::TextureManager &textures = apiState.getTextureManagerForCapture(); @@ -4358,21 +4452,40 @@ void CaptureShareGroupMidExecutionSetup( continue; } - // create a staging GL_TEXTURE_2D texture to create the eglImage with - gl::TextureID stagingTexId = {maxAccessedResourceIDs[ResourceIDType::Texture] + 1}; if (index.getType() == gl::TextureType::External) { - Capture(setupCalls, CaptureGenTextures(replayState, true, 1, &stagingTexId)); - MaybeCaptureUpdateResourceIDs(context, resourceTracker, setupCalls); - Capture(setupCalls, - CaptureBindTexture(replayState, true, gl::TextureType::_2D, stagingTexId)); - Capture(setupCalls, CaptureTexParameteri(replayState, true, gl::TextureType::_2D, - GL_TEXTURE_MIN_FILTER, GL_NEAREST)); - Capture(setupCalls, CaptureTexParameteri(replayState, true, gl::TextureType::_2D, - GL_TEXTURE_MAG_FILTER, GL_NEAREST)); + // Lookup the eglImage ID associated with this texture when the app issued + // glEGLImageTargetTexture2DOES() + auto eglImageIter = resourceTracker->getTextureIDToImageTable().find(id.value); + egl::ImageID eglImageID; + if (eglImageIter != resourceTracker->getTextureIDToImageTable().end()) + { + eglImageID = eglImageIter->second; + } + else + { + // Original image was deleted and needs to be recreated first + eglImageID = {maxAccessedResourceIDs[ResourceIDType::Image] + 1}; + for (std::vector<CallCapture> *calls : texSetupCalls) + { + egl::AttributeMap attribs = egl::AttributeMap::CreateFromIntArray(nullptr); + CallCapture eglCreateImageKHRCall = egl::CaptureCreateImageKHR( + nullptr, true, nullptr, context->id(), EGL_GL_TEXTURE_2D, + reinterpret_cast<EGLClientBuffer>(static_cast<uintptr_t>(0)), attribs, + reinterpret_cast<EGLImage>(static_cast<uintptr_t>(eglImageID.value))); + CaptureCustomCreateEGLImage(context, "CreateEGLImageKHR", desc.size.width, + desc.size.height, eglCreateImageKHRCall, + *calls); + } + } + // Pass the eglImage to the texture that is bound to GL_TEXTURE_EXTERNAL_OES target + for (std::vector<CallCapture> *calls : texSetupCalls) + { + Capture(calls, CaptureEGLImageTargetTexture2DOES( + replayState, true, gl::TextureType::External, eglImageID)); + } } - - if (context->getExtensions().getImageANGLE) + else if (context->getExtensions().getImageANGLE) { // Use ANGLE_get_image to read back pixel data. angle::MemoryBuffer data; @@ -4438,54 +4551,6 @@ void CaptureShareGroupMidExecutionSetup( CaptureTextureContents(calls, &replayState, texture, index, desc, static_cast<GLuint>(data.size()), data.data()); } - - if (index.getType() == gl::TextureType::External) - { - // Look up the attribs used when the image was created - // Firstly, lookup the eglImage ID associated with this texture when the app - // issued glEGLImageTargetTexture2DOES() - auto eglImageIter = resourceTracker->getTextureIDToImageTable().find(id.value); - ASSERT(eglImageIter != resourceTracker->getTextureIDToImageTable().end()); - - const egl::ImageID eglImageID = eglImageIter->second; - const EGLImage eglImage = - reinterpret_cast<EGLImage>(static_cast<uintptr_t>(eglImageID.value)); - - // Secondly, lookup the attrib we used to create the eglImage - auto eglImageAttribIter = - resourceTracker->getImageToAttribTable().find(eglImage); - ASSERT(eglImageAttribIter != resourceTracker->getImageToAttribTable().end()); - - const egl::AttributeMap &retrievedAttribs = eglImageAttribIter->second; - - // Create the image on demand with the same attrib retrieved above - CallCapture eglCreateImageKHRCall = egl::CaptureCreateImageKHR( - nullptr, true, nullptr, context->id(), EGL_GL_TEXTURE_2D_KHR, - reinterpret_cast<EGLClientBuffer>( - static_cast<GLuint64>(stagingTexId.value)), - retrievedAttribs, eglImage); - - // Convert the CaptureCreateImageKHR CallCapture to the customized CallCapture - std::vector<CallCapture> eglCustomCreateImageKHRCall; - CaptureCustomCreateEGLImage("CreateEGLImageKHR", eglCreateImageKHRCall, - eglCustomCreateImageKHRCall); - ASSERT(eglCustomCreateImageKHRCall.size() > 0); - - // Append the customized CallCapture to the setupCalls list - Capture(setupCalls, std::move(eglCustomCreateImageKHRCall[0])); - - // Pass the eglImage to the texture that is bound to GL_TEXTURE_EXTERNAL_OES - // target - for (std::vector<CallCapture> *calls : texSetupCalls) - { - Capture(calls, - CaptureEGLImageTargetTexture2DOES( - replayState, true, gl::TextureType::External, eglImageID)); - } - - // Delete the staging texture - Capture(setupCalls, CaptureDeleteTextures(replayState, true, 1, &stagingTexId)); - } } else { @@ -7161,12 +7226,26 @@ void FrameCaptureShared::maybeOverrideEntryPoint(const gl::Context *context, } case EntryPoint::EGLCreateImage: { - CaptureCustomCreateEGLImage("CreateEGLImage", inCall, outCalls); + const egl::Image *eglImage = GetImageFromParam(context, inCall.params.getReturnValue()); + CaptureCustomCreateEGLImage(context, "CreateEGLImage", eglImage->getWidth(), + eglImage->getHeight(), inCall, outCalls); break; } case EntryPoint::EGLCreateImageKHR: { - CaptureCustomCreateEGLImage("CreateEGLImageKHR", inCall, outCalls); + const egl::Image *eglImage = GetImageFromParam(context, inCall.params.getReturnValue()); + CaptureCustomCreateEGLImage(context, "CreateEGLImageKHR", eglImage->getWidth(), + eglImage->getHeight(), inCall, outCalls); + break; + } + case EntryPoint::EGLDestroyImage: + { + CaptureCustomDestroyEGLImage("DestroyEGLImage", inCall, outCalls); + break; + } + case EntryPoint::EGLDestroyImageKHR: + { + CaptureCustomDestroyEGLImage("DestroyEGLImageKHR", inCall, outCalls); break; } case EntryPoint::EGLCreateSync: @@ -7916,12 +7995,52 @@ void FrameCaptureShared::maybeCapturePreCallUpdates( CreateEGLImagePreCallUpdate<EGLAttrib>(call, mResourceTracker, ParamType::TEGLAttribPointer, egl::AttributeMap::CreateFromAttribArray); + if (isCaptureActive()) + { + EGLImage eglImage = call.params.getReturnValue().value.EGLImageVal; + egl::ImageID imageID = egl::PackParam<egl::ImageID>(eglImage); + handleGennedResource(context, imageID); + } break; } case EntryPoint::EGLCreateImageKHR: { CreateEGLImagePreCallUpdate<EGLint>(call, mResourceTracker, ParamType::TEGLintPointer, egl::AttributeMap::CreateFromIntArray); + if (isCaptureActive()) + { + EGLImageKHR eglImage = call.params.getReturnValue().value.EGLImageKHRVal; + egl::ImageID imageID = egl::PackParam<egl::ImageID>(eglImage); + handleGennedResource(context, imageID); + } + break; + } + case EntryPoint::EGLDestroyImage: + case EntryPoint::EGLDestroyImageKHR: + { + egl::ImageID eglImageID = + call.params.getParam("imagePacked", ParamType::TImageID, 1).value.ImageIDVal; + + // Clear any texture->image mappings that involve this image + for (auto texImageIter = mResourceTracker.getTextureIDToImageTable().begin(); + texImageIter != mResourceTracker.getTextureIDToImageTable().end();) + { + if (texImageIter->second == eglImageID) + { + texImageIter = mResourceTracker.getTextureIDToImageTable().erase(texImageIter); + } + else + { + ++texImageIter; + } + } + + FrameCaptureShared *frameCaptureShared = + context->getShareGroup()->getFrameCaptureShared(); + if (frameCaptureShared->isCaptureActive()) + { + handleDeletedResource(context, eglImageID); + } break; } case EntryPoint::EGLCreateSync: diff --git a/src/libANGLE/capture/FrameCapture.h b/src/libANGLE/capture/FrameCapture.h index b6c047a6a0..5986a9ce73 100644 --- a/src/libANGLE/capture/FrameCapture.h +++ b/src/libANGLE/capture/FrameCapture.h @@ -837,6 +837,16 @@ void CaptureGLCallToFrameCapture(CaptureFuncT captureFunc, frameCaptureShared->captureCall(context, std::move(call), isCallValid); } +template <typename FirstT, typename... OthersT> +egl::Display *GetEGLDisplayArg(FirstT display, OthersT... others) +{ + if constexpr (std::is_same<egl::Display *, FirstT>::value) + { + return display; + } + return nullptr; +} + template <typename CaptureFuncT, typename... ArgsT> void CaptureEGLCallToFrameCapture(CaptureFuncT captureFunc, bool isCallValid, @@ -846,7 +856,21 @@ void CaptureEGLCallToFrameCapture(CaptureFuncT captureFunc, gl::Context *context = thread->getContext(); if (!context) { - return; + // Get a valid context from the display argument if no context is associated with this + // thread + egl::Display *display = GetEGLDisplayArg(captureParams...); + if (display) + { + for (const auto &contextIter : display->getState().contextMap) + { + context = contextIter.second; + break; + } + } + if (!context) + { + return; + } } std::lock_guard<egl::ContextMutex> lock(context->getContextMutex()); diff --git a/src/libANGLE/cl_types.h b/src/libANGLE/cl_types.h index 7808f43a85..a18f595283 100644 --- a/src/libANGLE/cl_types.h +++ b/src/libANGLE/cl_types.h @@ -60,6 +60,8 @@ using MemoryPtrs = std::vector<MemoryPtr>; using PlatformPtrs = std::vector<PlatformPtr>; using ProgramPtrs = std::vector<ProgramPtr>; +using CompiledWorkgroupSize = std::array<uint32_t, 3>; + struct ImageDescriptor { MemObjectType type; diff --git a/src/libANGLE/renderer/CLKernelImpl.h b/src/libANGLE/renderer/CLKernelImpl.h index f9047c249f..9d57f895d0 100644 --- a/src/libANGLE/renderer/CLKernelImpl.h +++ b/src/libANGLE/renderer/CLKernelImpl.h @@ -44,8 +44,8 @@ class CLKernelImpl : angle::NonCopyable ArgInfo(); ~ArgInfo(); - ArgInfo(const ArgInfo &) = delete; - ArgInfo &operator=(const ArgInfo &) = delete; + ArgInfo(const ArgInfo &) = default; + ArgInfo &operator=(const ArgInfo &) = default; ArgInfo(ArgInfo &&); ArgInfo &operator=(ArgInfo &&); diff --git a/src/libANGLE/renderer/vulkan/BUILD.gn b/src/libANGLE/renderer/vulkan/BUILD.gn index 5a860d0bcd..3f60a4bd6f 100644 --- a/src/libANGLE/renderer/vulkan/BUILD.gn +++ b/src/libANGLE/renderer/vulkan/BUILD.gn @@ -115,6 +115,19 @@ template("angle_vulkan_backend_template") { deps += [ "$angle_root:angle_version_info" ] } + # OpenCL on ANGLE needs both spirv-tools and clspv for compiler + if (angle_enable_cl) { + deps += [ + "$angle_root/third_party/clspv/:clspv_core_shared", + "$angle_root/third_party/vulkan-deps/spirv-tools/src/:spvtools", + "$angle_root/third_party/vulkan-deps/spirv-tools/src/:spvtools_opt", + ] + include_dirs = [ + "$angle_root/third_party/vulkan-deps/spirv-tools/src/include", + "$angle_root/third_party/clspv/src/include", + ] + } + public_deps = [ "$angle_root:libANGLE_headers", "$angle_root/src/common/vulkan", diff --git a/src/libANGLE/renderer/vulkan/CLContextVk.cpp b/src/libANGLE/renderer/vulkan/CLContextVk.cpp index 709bfa55db..d4857b8169 100644 --- a/src/libANGLE/renderer/vulkan/CLContextVk.cpp +++ b/src/libANGLE/renderer/vulkan/CLContextVk.cpp @@ -7,6 +7,7 @@ #include "libANGLE/renderer/vulkan/CLContextVk.h" #include "libANGLE/renderer/vulkan/CLCommandQueueVk.h" +#include "libANGLE/renderer/vulkan/CLProgramVk.h" #include "libANGLE/renderer/vulkan/DisplayVk.h" #include "libANGLE/renderer/vulkan/RendererVk.h" #include "libANGLE/renderer/vulkan/vk_utils.h" @@ -118,8 +119,15 @@ angle::Result CLContextVk::createProgramWithSource(const cl::Program &program, const std::string &source, CLProgramImpl::Ptr *programOut) { - UNIMPLEMENTED(); - ANGLE_CL_RETURN_ERROR(CL_OUT_OF_RESOURCES); + CLProgramVk *programVk = new (std::nothrow) CLProgramVk(program); + if (programVk == nullptr) + { + ANGLE_CL_RETURN_ERROR(CL_OUT_OF_HOST_MEMORY); + } + ANGLE_TRY(programVk->init()); + *programOut = CLProgramImpl::Ptr(std::move(programVk)); + + return angle::Result::Continue; } angle::Result CLContextVk::createProgramWithIL(const cl::Program &program, @@ -137,8 +145,15 @@ angle::Result CLContextVk::createProgramWithBinary(const cl::Program &program, cl_int *binaryStatus, CLProgramImpl::Ptr *programOut) { - UNIMPLEMENTED(); - ANGLE_CL_RETURN_ERROR(CL_OUT_OF_RESOURCES); + CLProgramVk *programVk = new (std::nothrow) CLProgramVk(program); + if (programVk == nullptr) + { + ANGLE_CL_RETURN_ERROR(CL_OUT_OF_HOST_MEMORY); + } + ANGLE_TRY(programVk->init(lengths, binaries, binaryStatus)); + *programOut = CLProgramImpl::Ptr(std::move(programVk)); + + return angle::Result::Continue; } angle::Result CLContextVk::createProgramWithBuiltInKernels(const cl::Program &program, diff --git a/src/libANGLE/renderer/vulkan/CLKernelVk.h b/src/libANGLE/renderer/vulkan/CLKernelVk.h index 0d6a26bd3c..1161594235 100644 --- a/src/libANGLE/renderer/vulkan/CLKernelVk.h +++ b/src/libANGLE/renderer/vulkan/CLKernelVk.h @@ -26,6 +26,51 @@ class CLKernelVk : public CLKernelImpl angle::Result createInfo(CLKernelImpl::Info *infoOut) const override; }; +struct CLKernelArgument +{ + CLKernelImpl::ArgInfo info{}; + uint32_t type = 0; + uint32_t ordinal = 0; + size_t handleSize = 0; + void *handle = nullptr; + bool used = false; + + // Shared operand words/regions for "OpExtInst" type spv instructions + // (starts from spv word index/offset 7 and onward) + // https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpExtInst + // https://github.com/google/clspv/blob/main/docs/OpenCLCOnVulkan.md#kernels + union + { + uint32_t op3; + uint32_t descriptorSet; + uint32_t pushConstOffset; + uint32_t workgroupSpecId; + }; + union + { + uint32_t op4; + uint32_t descriptorBinding; + uint32_t pushConstantSize; + uint32_t workgroupSize; + }; + union + { + uint32_t op5; + uint32_t podStorageBufferOffset; + uint32_t podUniformOffset; + uint32_t pointerUniformOffset; + }; + union + { + uint32_t op6; + uint32_t podStorageBufferSize; + uint32_t podUniformSize; + uint32_t pointerUniformSize; + }; +}; +using CLKernelArguments = std::vector<CLKernelArgument>; +using CLKernelArgsMap = angle::HashMap<std::string, CLKernelArguments>; + } // namespace rx #endif // LIBANGLE_RENDERER_VULKAN_CLKERNELVK_H_ diff --git a/src/libANGLE/renderer/vulkan/CLProgramVk.cpp b/src/libANGLE/renderer/vulkan/CLProgramVk.cpp index 82fc540a17..624348bc35 100644 --- a/src/libANGLE/renderer/vulkan/CLProgramVk.cpp +++ b/src/libANGLE/renderer/vulkan/CLProgramVk.cpp @@ -6,22 +6,428 @@ // CLProgramVk.cpp: Implements the class methods for CLProgramVk. #include "libANGLE/renderer/vulkan/CLProgramVk.h" +#include "libANGLE/renderer/vulkan/CLContextVk.h" +#include "libANGLE/CLContext.h" +#include "libANGLE/CLProgram.h" #include "libANGLE/cl_utils.h" +#include "clspv/Compiler.h" + +#include "spirv/unified1/NonSemanticClspvReflection.h" +#include "spirv/unified1/spirv.hpp" + +#include "spirv-tools/libspirv.hpp" +#include "spirv-tools/optimizer.hpp" + +#include "common/string_utils.h" + namespace rx { -CLProgramVk::CLProgramVk(const cl::Program &program) : CLProgramImpl(program) {} +namespace +{ +#if defined(ANGLE_ENABLE_ASSERTS) +constexpr bool kAngleDebug = true; +#else +constexpr bool kAngleDebug = false; +#endif + +// Used by SPIRV-Tools to parse reflection info +spv_result_t ParseReflection(CLProgramVk::SpvReflectionData &reflectionData, + const spv_parsed_instruction_t &spvInstr) +{ + // Parse spir-v opcodes + switch (spvInstr.opcode) + { + // --- Clspv specific parsing for below cases --- + case spv::OpExtInst: + { + switch (spvInstr.words[4]) + { + case NonSemanticClspvReflectionKernel: + { + // Extract kernel name and args - add to kernel args map + std::string functionName = reflectionData.spvStrLookup[spvInstr.words[6]]; + uint32_t numArgs = reflectionData.spvIntLookup[spvInstr.words[7]]; + reflectionData.kernelArgsMap[functionName] = CLKernelArguments(); + reflectionData.kernelArgsMap[functionName].resize(numArgs); + + // Store kernel flags and attributes + reflectionData.kernelFlags[functionName] = + reflectionData.spvIntLookup[spvInstr.words[8]]; + reflectionData.kernelAttributes[functionName] = + reflectionData.spvStrLookup[spvInstr.words[9]]; + + // Save kernel name to reflection table for later use/lookup in parser routine + reflectionData.spvStrLookup[spvInstr.words[2]] = std::string(functionName); + break; + } + case NonSemanticClspvReflectionArgumentInfo: + { + CLKernelVk::ArgInfo kernelArgInfo; + kernelArgInfo.name = reflectionData.spvStrLookup[spvInstr.words[5]]; + // If instruction has more than 5 instruction operands (minus instruction + // name/opcode), that means we have arg qualifiers. ArgumentInfo also counts as + // an operand for OpExtInst. In below example, [ %e %f %g %h ] are the arg + // qualifier operands. + // + // %a = OpExtInst %b %c ArgumentInfo %d [ %e %f %g %h ] + if (spvInstr.num_operands > 5) + { + kernelArgInfo.typeName = reflectionData.spvStrLookup[spvInstr.words[6]]; + kernelArgInfo.addressQualifier = + reflectionData.spvIntLookup[spvInstr.words[7]]; + kernelArgInfo.accessQualifier = + reflectionData.spvIntLookup[spvInstr.words[8]]; + kernelArgInfo.typeQualifier = + reflectionData.spvIntLookup[spvInstr.words[9]]; + } + // Store kern arg for later lookup + reflectionData.kernelArgInfos[spvInstr.words[2]] = std::move(kernelArgInfo); + break; + } + case NonSemanticClspvReflectionArgumentPodUniform: + case NonSemanticClspvReflectionArgumentPointerUniform: + case NonSemanticClspvReflectionArgumentPodStorageBuffer: + { + CLKernelArgument kernelArg; + if (spvInstr.num_operands == 11) + { + const CLKernelVk::ArgInfo &kernelArgInfo = + reflectionData.kernelArgInfos[spvInstr.words[11]]; + kernelArg.info.name = kernelArgInfo.name; + kernelArg.info.typeName = kernelArgInfo.typeName; + kernelArg.info.addressQualifier = kernelArgInfo.addressQualifier; + kernelArg.info.accessQualifier = kernelArgInfo.accessQualifier; + kernelArg.info.typeQualifier = kernelArgInfo.typeQualifier; + } + CLKernelArguments &kernelArgs = + reflectionData + .kernelArgsMap[reflectionData.spvStrLookup[spvInstr.words[5]]]; + kernelArg.type = spvInstr.words[4]; + kernelArg.used = true; + kernelArg.ordinal = reflectionData.spvIntLookup[spvInstr.words[6]]; + kernelArg.op3 = reflectionData.spvIntLookup[spvInstr.words[7]]; + kernelArg.op4 = reflectionData.spvIntLookup[spvInstr.words[8]]; + kernelArg.op5 = reflectionData.spvIntLookup[spvInstr.words[9]]; + kernelArg.op6 = reflectionData.spvIntLookup[spvInstr.words[10]]; + + if (!kernelArgs.empty()) + { + kernelArgs.at(kernelArg.ordinal) = std::move(kernelArg); + } + break; + } + case NonSemanticClspvReflectionArgumentUniform: + case NonSemanticClspvReflectionArgumentWorkgroup: + case NonSemanticClspvReflectionArgumentStorageBuffer: + case NonSemanticClspvReflectionArgumentPodPushConstant: + case NonSemanticClspvReflectionArgumentPointerPushConstant: + { + CLKernelArgument kernelArg; + if (spvInstr.num_operands == 9) + { + const CLKernelVk::ArgInfo &kernelArgInfo = + reflectionData.kernelArgInfos[spvInstr.words[9]]; + kernelArg.info.name = kernelArgInfo.name; + kernelArg.info.typeName = kernelArgInfo.typeName; + kernelArg.info.addressQualifier = kernelArgInfo.addressQualifier; + kernelArg.info.accessQualifier = kernelArgInfo.accessQualifier; + kernelArg.info.typeQualifier = kernelArgInfo.typeQualifier; + } + CLKernelArguments &kernelArgs = + reflectionData + .kernelArgsMap[reflectionData.spvStrLookup[spvInstr.words[5]]]; + kernelArg.type = spvInstr.words[4]; + kernelArg.used = true; + kernelArg.ordinal = reflectionData.spvIntLookup[spvInstr.words[6]]; + kernelArg.op3 = reflectionData.spvIntLookup[spvInstr.words[7]]; + kernelArg.op4 = reflectionData.spvIntLookup[spvInstr.words[8]]; + kernelArgs.at(kernelArg.ordinal) = std::move(kernelArg); + break; + } + case NonSemanticClspvReflectionPushConstantGlobalOffset: + case NonSemanticClspvReflectionPushConstantRegionOffset: + { + uint32_t offset = reflectionData.spvIntLookup[spvInstr.words[5]]; + uint32_t size = reflectionData.spvIntLookup[spvInstr.words[6]]; + reflectionData.pushConstants[spvInstr.words[4]] = { + .stageFlags = 0, .offset = offset, .size = size}; + break; + } + case NonSemanticClspvReflectionSpecConstantWorkgroupSize: + { + reflectionData.specConstantWGS = { + reflectionData.spvIntLookup[spvInstr.words[5]], + reflectionData.spvIntLookup[spvInstr.words[6]], + reflectionData.spvIntLookup[spvInstr.words[7]]}; + break; + } + case NonSemanticClspvReflectionPropertyRequiredWorkgroupSize: + { + reflectionData + .kernelCompileWGS[reflectionData.spvStrLookup[spvInstr.words[5]]] = { + reflectionData.spvIntLookup[spvInstr.words[6]], + reflectionData.spvIntLookup[spvInstr.words[7]], + reflectionData.spvIntLookup[spvInstr.words[8]]}; + break; + } + default: + break; + } + break; + } + // --- Regular SPIR-V opcode parsing for below cases --- + case spv::OpString: + { + reflectionData.spvStrLookup[spvInstr.words[1]] = + reinterpret_cast<const char *>(&spvInstr.words[2]); + break; + } + case spv::OpConstant: + { + reflectionData.spvIntLookup[spvInstr.words[2]] = spvInstr.words[3]; + break; + } + default: + break; + } + return SPV_SUCCESS; +} + +class CLAsyncBuildTask : public angle::Closure +{ + public: + CLAsyncBuildTask(CLProgramVk *programVk, + const cl::DevicePtrs &devices, + std::string options, + std::string internalOptions, + CLProgramVk::BuildType buildType, + const CLProgramVk::DeviceProgramDatas &inputProgramDatas, + cl::Program *notify) + : mProgramVk(programVk), + mDevices(devices), + mOptions(options), + mInternalOptions(internalOptions), + mBuildType(buildType), + mDeviceProgramDatas(inputProgramDatas), + mNotify(notify) + {} + + void operator()() override + { + ANGLE_TRACE_EVENT0("gpu.angle", "CLProgramVk::buildInternal (async)"); + CLProgramVk::ScopedProgramCallback spc(mNotify); + if (!mProgramVk->buildInternal(mDevices, mOptions, mInternalOptions, mBuildType, + mDeviceProgramDatas)) + { + ERR() << "Async build failed for program (" << mProgramVk + << ")! Check the build status or build log for details."; + } + } + + private: + CLProgramVk *mProgramVk; + const cl::DevicePtrs mDevices; + std::string mOptions; + std::string mInternalOptions; + CLProgramVk::BuildType mBuildType; + const CLProgramVk::DeviceProgramDatas mDeviceProgramDatas; + cl::Program *mNotify; +}; + +std::string ProcessBuildOptions(const std::vector<std::string> &optionTokens, + CLProgramVk::BuildType buildType) +{ + std::string processedOptions; + + // Need to remove/replace options that are not 1-1 mapped to clspv + for (const std::string &optionToken : optionTokens) + { + if (optionToken == "-create-library" && buildType == CLProgramVk::BuildType::LINK) + { + processedOptions += " --output-format=bc"; + continue; + } + processedOptions += optionToken; + } + + switch (buildType) + { + case CLProgramVk::BuildType::COMPILE: + processedOptions += " --output-format=bc"; + break; + case CLProgramVk::BuildType::LINK: + processedOptions += " -x ir"; + break; + default: + break; + } + + // Other internal Clspv compiler flags that are needed/required + processedOptions += " --long-vector"; -CLProgramVk::~CLProgramVk() = default; + return processedOptions; +} + +} // namespace + +CLProgramVk::CLProgramVk(const cl::Program &program) + : CLProgramImpl(program), mContext(&program.getContext().getImpl<CLContextVk>()) +{} + +angle::Result CLProgramVk::init() +{ + cl::DevicePtrs devices; + ANGLE_TRY(mContext->getDevices(&devices)); + + // The devices associated with the program object are the devices associated with context + for (const cl::RefPointer<cl::Device> &device : devices) + { + mAssociatedDevicePrograms[device->getNative()] = DeviceProgramData{}; + } + + return angle::Result::Continue; +} + +angle::Result CLProgramVk::init(const size_t *lengths, + const unsigned char **binaries, + cl_int *binaryStatus) +{ + // The devices associated with program come from device_list param from + // clCreateProgramWithBinary + for (const cl::DevicePtr &device : mProgram.getDevices()) + { + const unsigned char *binaryHandle = *binaries++; + size_t binarySize = *lengths++; + + // Check for header + if (binarySize < sizeof(ProgramBinaryOutputHeader)) + { + if (binaryStatus) + { + *binaryStatus++ = CL_INVALID_BINARY; + } + ANGLE_CL_RETURN_ERROR(CL_INVALID_BINARY); + } + binarySize -= sizeof(ProgramBinaryOutputHeader); + + // Check for valid binary version from header + const ProgramBinaryOutputHeader *binaryHeader = + reinterpret_cast<const ProgramBinaryOutputHeader *>(binaryHandle); + if (binaryHeader == nullptr) + { + ERR() << "NULL binary header!"; + if (binaryStatus) + { + *binaryStatus++ = CL_INVALID_BINARY; + } + ANGLE_CL_RETURN_ERROR(CL_INVALID_BINARY); + } + else if (binaryHeader->headerVersion < LatestSupportedBinaryVersion) + { + ERR() << "Binary version not compatible with runtime!"; + if (binaryStatus) + { + *binaryStatus++ = CL_INVALID_BINARY; + } + ANGLE_CL_RETURN_ERROR(CL_INVALID_BINARY); + } + binaryHandle += sizeof(ProgramBinaryOutputHeader); + + // See what kind of binary we have (i.e. SPIR-V or LLVM Bitcode) + // https://llvm.org/docs/BitCodeFormat.html#llvm-ir-magic-number + // https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#_magic_number + constexpr uint32_t LLVM_BC_MAGIC = 0xDEC04342; + constexpr uint32_t SPIRV_MAGIC = 0x07230203; + const uint32_t &firstWord = reinterpret_cast<const uint32_t *>(binaryHandle)[0]; + bool isBC = firstWord == LLVM_BC_MAGIC; + bool isSPV = firstWord == SPIRV_MAGIC; + if (!isBC && !isSPV) + { + ERR() << "Binary is neither SPIR-V nor LLVM Bitcode!"; + if (binaryStatus) + { + *binaryStatus++ = CL_INVALID_BINARY; + } + ANGLE_CL_RETURN_ERROR(CL_INVALID_BINARY); + } + + // Add device binary to program + DeviceProgramData deviceBinary; + deviceBinary.binaryType = binaryHeader->binaryType; + switch (deviceBinary.binaryType) + { + case CL_PROGRAM_BINARY_TYPE_EXECUTABLE: + deviceBinary.binary.assign(binarySize / sizeof(uint32_t), 0); + std::memcpy(deviceBinary.binary.data(), binaryHandle, binarySize); + break; + case CL_PROGRAM_BINARY_TYPE_LIBRARY: + case CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT: + deviceBinary.IR.assign(binarySize, 0); + std::memcpy(deviceBinary.IR.data(), binaryHandle, binarySize); + break; + default: + UNREACHABLE(); + ERR() << "Invalid binary type!"; + if (binaryStatus) + { + *binaryStatus++ = CL_INVALID_BINARY; + } + ANGLE_CL_RETURN_ERROR(CL_INVALID_BINARY); + } + mAssociatedDevicePrograms[device->getNative()] = std::move(deviceBinary); + if (binaryStatus) + { + *binaryStatus++ = CL_SUCCESS; + } + } + + return angle::Result::Continue; +} + +CLProgramVk::~CLProgramVk() +{ + for (vk::BindingPointer<vk::DescriptorSetLayout, vk::AtomicRefCounted<vk::DescriptorSetLayout>> + &dsLayouts : mDescriptorSetLayouts) + { + dsLayouts.reset(); + } + for (vk::BindingPointer<rx::vk::DynamicDescriptorPool> &pool : mDescriptorPools) + { + pool.reset(); + } + mMetaDescriptorPool.destroy(mContext->getRenderer()); + mDescSetLayoutCache.destroy(mContext->getRenderer()); + mPipelineLayoutCache.destroy(mContext->getRenderer()); +} angle::Result CLProgramVk::build(const cl::DevicePtrs &devices, const char *options, cl::Program *notify) { - UNIMPLEMENTED(); - ANGLE_CL_RETURN_ERROR(CL_OUT_OF_RESOURCES); + BuildType buildType = !mProgram.getSource().empty() ? BuildType::BUILD : BuildType::BINARY; + const cl::DevicePtrs &devicePtrs = !devices.empty() ? devices : mProgram.getDevices(); + + if (notify) + { + std::shared_ptr<angle::WaitableEvent> asyncEvent = + mProgram.getContext().getPlatform().getMultiThreadPool()->postWorkerTask( + std::make_shared<CLAsyncBuildTask>(this, devicePtrs, + std::string(options ? options : ""), "", + buildType, DeviceProgramDatas{}, notify)); + ASSERT(asyncEvent != nullptr); + } + else + { + if (!buildInternal(devicePtrs, std::string(options ? options : ""), "", buildType, + DeviceProgramDatas{})) + { + ANGLE_CL_RETURN_ERROR(CL_BUILD_PROGRAM_FAILURE); + } + } + return angle::Result::Continue; } angle::Result CLProgramVk::compile(const cl::DevicePtrs &devices, @@ -39,8 +445,94 @@ angle::Result CLProgramVk::getInfo(cl::ProgramInfo name, void *value, size_t *valueSizeRet) const { - UNIMPLEMENTED(); - ANGLE_CL_RETURN_ERROR(CL_OUT_OF_RESOURCES); + cl_uint valUInt = 0u; + void *valPointer = nullptr; + const void *copyValue = nullptr; + size_t copySize = 0u; + unsigned char **outputBins = reinterpret_cast<unsigned char **>(value); + std::string kernelNamesList; + std::vector<size_t> vBinarySizes; + + switch (name) + { + case cl::ProgramInfo::NumKernels: + for (const auto &deviceProgram : mAssociatedDevicePrograms) + { + valUInt += static_cast<decltype(valUInt)>(deviceProgram.second.numKernels()); + } + copyValue = &valUInt; + copySize = sizeof(valUInt); + break; + case cl::ProgramInfo::BinarySizes: + { + for (const auto &deviceProgram : mAssociatedDevicePrograms) + { + vBinarySizes.push_back( + sizeof(ProgramBinaryOutputHeader) + + (deviceProgram.second.binaryType == CL_PROGRAM_BINARY_TYPE_EXECUTABLE + ? deviceProgram.second.binary.size() * sizeof(uint32_t) + : deviceProgram.second.IR.size())); + } + valPointer = vBinarySizes.data(); + copyValue = valPointer; + copySize = vBinarySizes.size() * sizeof(size_t); + break; + } + case cl::ProgramInfo::Binaries: + for (const auto &deviceProgram : mAssociatedDevicePrograms) + { + const void *bin = + deviceProgram.second.binaryType == CL_PROGRAM_BINARY_TYPE_EXECUTABLE + ? reinterpret_cast<const void *>(deviceProgram.second.binary.data()) + : reinterpret_cast<const void *>(deviceProgram.second.IR.data()); + size_t binSize = + deviceProgram.second.binaryType == CL_PROGRAM_BINARY_TYPE_EXECUTABLE + ? deviceProgram.second.binary.size() * sizeof(uint32_t) + : deviceProgram.second.IR.size(); + ProgramBinaryOutputHeader header{.headerVersion = LatestSupportedBinaryVersion, + .binaryType = deviceProgram.second.binaryType}; + + if (outputBins != nullptr) + { + if (*outputBins != nullptr) + { + std::memcpy(*outputBins, &header, sizeof(ProgramBinaryOutputHeader)); + std::memcpy((*outputBins) + sizeof(ProgramBinaryOutputHeader), bin, + binSize); + } + outputBins++; + } + + // Spec just wants pointer size here + copySize += sizeof(unsigned char *); + } + // We already copied the (headers + binaries) over - nothing else left to copy + copyValue = nullptr; + break; + case cl::ProgramInfo::KernelNames: + for (const auto &deviceProgram : mAssociatedDevicePrograms) + { + kernelNamesList = deviceProgram.second.getKernelNames(); + } + valPointer = kernelNamesList.data(); + copyValue = valPointer; + copySize = kernelNamesList.size() + 1; + break; + default: + UNREACHABLE(); + } + + if ((value != nullptr) && (copyValue != nullptr)) + { + std::memcpy(value, copyValue, copySize); + } + + if (valueSizeRet != nullptr) + { + *valueSizeRet = copySize; + } + + return angle::Result::Continue; } angle::Result CLProgramVk::getBuildInfo(const cl::Device &device, @@ -49,8 +541,53 @@ angle::Result CLProgramVk::getBuildInfo(const cl::Device &device, void *value, size_t *valueSizeRet) const { - UNIMPLEMENTED(); - ANGLE_CL_RETURN_ERROR(CL_OUT_OF_RESOURCES); + cl_uint valUInt = 0; + cl_build_status valStatus = 0; + const void *copyValue = nullptr; + size_t copySize = 0; + const DeviceProgramData *deviceProgramData = getDeviceProgramData(device.getNative()); + + switch (name) + { + case cl::ProgramBuildInfo::Status: + valStatus = deviceProgramData->buildStatus; + copyValue = &valStatus; + copySize = sizeof(valStatus); + break; + case cl::ProgramBuildInfo::Log: + copyValue = deviceProgramData->buildLog.c_str(); + copySize = deviceProgramData->buildLog.size() + 1; + break; + case cl::ProgramBuildInfo::Options: + copyValue = mProgramOpts.c_str(); + copySize = mProgramOpts.size() + 1; + break; + case cl::ProgramBuildInfo::BinaryType: + valUInt = deviceProgramData->binaryType; + copyValue = &valUInt; + copySize = sizeof(valUInt); + break; + case cl::ProgramBuildInfo::GlobalVariableTotalSize: + // Returns 0 if device does not support program scope global variables. + valUInt = 0; + copyValue = &valUInt; + copySize = sizeof(valUInt); + break; + default: + UNREACHABLE(); + } + + if ((value != nullptr) && (copyValue != nullptr)) + { + memcpy(value, copyValue, std::min(valueSize, copySize)); + } + + if (valueSizeRet != nullptr) + { + *valueSizeRet = copySize; + } + + return angle::Result::Continue; } angle::Result CLProgramVk::createKernel(const cl::Kernel &kernel, @@ -69,4 +606,201 @@ angle::Result CLProgramVk::createKernels(cl_uint numKernels, ANGLE_CL_RETURN_ERROR(CL_OUT_OF_RESOURCES); } +const CLProgramVk::DeviceProgramData *CLProgramVk::getDeviceProgramData( + const _cl_device_id *device) const +{ + if (!mAssociatedDevicePrograms.contains(device)) + { + WARN() << "Device (" << device << ") is not associated with program (" << this << ") !"; + return nullptr; + } + return &mAssociatedDevicePrograms.at(device); +} + +const CLProgramVk::DeviceProgramData *CLProgramVk::getDeviceProgramData( + const char *kernelName) const +{ + for (const auto &deviceProgram : mAssociatedDevicePrograms) + { + if (deviceProgram.second.containsKernel(kernelName)) + { + return &deviceProgram.second; + } + } + WARN() << "Kernel name (" << kernelName << ") is not associated with program (" << this + << ") !"; + return nullptr; +} + +bool CLProgramVk::buildInternal(const cl::DevicePtrs &devices, + std::string options, + std::string internalOptions, + BuildType buildType, + const DeviceProgramDatas &inputProgramDatas) +{ + std::scoped_lock<std::mutex> sl(mProgramMutex); + + // Cache original options string + mProgramOpts = options; + + // Process options and append any other internal (required) options for clspv + std::vector<std::string> optionTokens; + angle::SplitStringAlongWhitespace(options + " " + internalOptions, &optionTokens); + const bool createLibrary = std::find(optionTokens.begin(), optionTokens.end(), + "-create-library") != optionTokens.end(); + std::string processedOptions = ProcessBuildOptions(optionTokens, buildType); + + // Build for each associated device + for (const cl::RefPointer<cl::Device> &device : devices) + { + DeviceProgramData &deviceProgramData = mAssociatedDevicePrograms[device->getNative()]; + deviceProgramData.buildStatus = CL_BUILD_IN_PROGRESS; + + if (buildType != BuildType::BINARY) + { + // Invoke clspv + switch (buildType) + { + case BuildType::BUILD: + case BuildType::COMPILE: + { + ScopedClspvContext clspvCtx; + const char *clSrc = mProgram.getSource().c_str(); + ClspvError clspvRet = clspvCompileFromSourcesString( + 1, NULL, static_cast<const char **>(&clSrc), processedOptions.c_str(), + &clspvCtx.mOutputBin, &clspvCtx.mOutputBinSize, &clspvCtx.mOutputBuildLog); + deviceProgramData.buildLog = + clspvCtx.mOutputBuildLog != nullptr ? clspvCtx.mOutputBuildLog : ""; + if (clspvRet != CLSPV_SUCCESS) + { + ERR() << "OpenCL build failed with: ClspvError(" << clspvRet << ")!"; + deviceProgramData.buildStatus = CL_BUILD_ERROR; + return false; + } + + if (buildType == BuildType::COMPILE) + { + deviceProgramData.IR.assign(clspvCtx.mOutputBinSize, 0); + std::memcpy(deviceProgramData.IR.data(), clspvCtx.mOutputBin, + clspvCtx.mOutputBinSize); + deviceProgramData.binaryType = CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT; + } + else + { + deviceProgramData.binary.assign(clspvCtx.mOutputBinSize / sizeof(uint32_t), + 0); + std::memcpy(deviceProgramData.binary.data(), clspvCtx.mOutputBin, + clspvCtx.mOutputBinSize); + deviceProgramData.binaryType = CL_PROGRAM_BINARY_TYPE_EXECUTABLE; + } + break; + } + case BuildType::LINK: + { + ScopedClspvContext clspvCtx; + std::vector<size_t> vSizes; + std::vector<const char *> vBins; + for (const CLProgramVk::DeviceProgramData *inputProgramData : inputProgramDatas) + { + vSizes.push_back(inputProgramData->IR.size()); + vBins.push_back(inputProgramData->IR.data()); + } + ClspvError clspvRet = clspvCompileFromSourcesString( + inputProgramDatas.size(), vSizes.data(), vBins.data(), + processedOptions.c_str(), &clspvCtx.mOutputBin, &clspvCtx.mOutputBinSize, + &clspvCtx.mOutputBuildLog); + deviceProgramData.buildLog = + clspvCtx.mOutputBuildLog != nullptr ? clspvCtx.mOutputBuildLog : ""; + if (clspvRet != CLSPV_SUCCESS) + { + ERR() << "OpenCL build failed with: ClspvError(" << clspvRet << ")!"; + deviceProgramData.buildStatus = CL_BUILD_ERROR; + return false; + } + + deviceProgramData.IR.assign(clspvCtx.mOutputBinSize, 0); + std::memcpy(deviceProgramData.IR.data(), clspvCtx.mOutputBin, + clspvCtx.mOutputBinSize); + + if (createLibrary) + { + deviceProgramData.binaryType = CL_PROGRAM_BINARY_TYPE_LIBRARY; + } + else + { + deviceProgramData.binaryType = CL_PROGRAM_BINARY_TYPE_EXECUTABLE; + } + break; + } + default: + UNREACHABLE(); + return false; + } + } + + // Extract reflection info from spv binary and populate reflection data + if (deviceProgramData.binaryType == CL_PROGRAM_BINARY_TYPE_EXECUTABLE) + { + spvtools::SpirvTools spvTool(SPV_ENV_UNIVERSAL_1_5); + bool parseRet = spvTool.Parse( + deviceProgramData.binary, + [](const spv_endianness_t endianess, const spv_parsed_header_t &instruction) { + return SPV_SUCCESS; + }, + [&deviceProgramData](const spv_parsed_instruction_t &instruction) { + return ParseReflection(deviceProgramData.reflectionData, instruction); + }); + if (!parseRet) + { + ERR() << "Failed to parse reflection info from SPIR-V!"; + return false; + } + + // Setup inital push constant range + uint32_t pushConstantMinOffet = UINT32_MAX, pushConstantMaxOffset = 0, + pushConstantMaxSize = 0; + for (const auto &pushConstant : deviceProgramData.reflectionData.pushConstants) + { + pushConstantMinOffet = pushConstant.second.offset < pushConstantMinOffet + ? pushConstant.second.offset + : pushConstantMinOffet; + if (pushConstant.second.offset >= pushConstantMaxOffset) + { + pushConstantMaxOffset = pushConstant.second.offset; + pushConstantMaxSize = pushConstant.second.size; + } + } + deviceProgramData.pushConstRange.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + deviceProgramData.pushConstRange.offset = + pushConstantMinOffet == UINT32_MAX ? 0 : pushConstantMinOffet; + deviceProgramData.pushConstRange.size = pushConstantMaxOffset + pushConstantMaxSize; + + if (kAngleDebug) + { + if (mContext->getFeatures().clDumpVkSpirv.enabled) + { + angle::spirv::Print(deviceProgramData.binary); + } + } + } + deviceProgramData.buildStatus = CL_BUILD_SUCCESS; + } + return true; +} + +angle::spirv::Blob CLProgramVk::stripReflection(const DeviceProgramData *deviceProgramData) +{ + angle::spirv::Blob binaryStripped; + spvtools::Optimizer opt(SPV_ENV_UNIVERSAL_1_5); + opt.RegisterPass(spvtools::CreateStripReflectInfoPass()); + spvtools::OptimizerOptions optOptions; + optOptions.set_run_validator(false); + if (!opt.Run(deviceProgramData->binary.data(), deviceProgramData->binary.size(), + &binaryStripped, optOptions)) + { + ERR() << "Could not strip reflection data from binary!"; + } + return binaryStripped; +} + } // namespace rx diff --git a/src/libANGLE/renderer/vulkan/CLProgramVk.h b/src/libANGLE/renderer/vulkan/CLProgramVk.h index 177bdbf513..4c8033097d 100644 --- a/src/libANGLE/renderer/vulkan/CLProgramVk.h +++ b/src/libANGLE/renderer/vulkan/CLProgramVk.h @@ -8,19 +8,158 @@ #ifndef LIBANGLE_RENDERER_VULKAN_CLPROGRAMVK_H_ #define LIBANGLE_RENDERER_VULKAN_CLPROGRAMVK_H_ +#include "libANGLE/renderer/vulkan/CLKernelVk.h" #include "libANGLE/renderer/vulkan/cl_types.h" +#include "libANGLE/renderer/vulkan/vk_cache_utils.h" +#include "libANGLE/renderer/vulkan/vk_helpers.h" #include "libANGLE/renderer/CLProgramImpl.h" +#include "libANGLE/CLProgram.h" + +#include "clspv/Compiler.h" + +#include "vulkan/vulkan_core.h" + +#include "spirv-tools/libspirv.h" + namespace rx { class CLProgramVk : public CLProgramImpl { public: + struct SpvReflectionData + { + angle::HashMap<uint32_t, uint32_t> spvIntLookup; + angle::HashMap<uint32_t, std::string> spvStrLookup; + angle::HashMap<uint32_t, CLKernelVk::ArgInfo> kernelArgInfos; + angle::HashMap<std::string, uint32_t> kernelFlags; + angle::HashMap<std::string, std::string> kernelAttributes; + angle::HashMap<std::string, std::array<uint32_t, 3>> kernelCompileWGS; + angle::HashMap<uint32_t, VkPushConstantRange> pushConstants; + std::array<uint32_t, 3> specConstantWGS{0, 0, 0}; + CLKernelArgsMap kernelArgsMap; + }; + + // Output binary structure (for CL_PROGRAM_BINARIES query) + struct ProgramBinaryOutputHeader + { + uint32_t headerVersion{1}; + cl_program_binary_type binaryType{CL_PROGRAM_BINARY_TYPE_NONE}; + }; + static constexpr uint32_t LatestSupportedBinaryVersion = 1; + + struct ScopedClspvContext : angle::NonCopyable + { + ScopedClspvContext() = default; + ~ScopedClspvContext() { clspvFreeOutputBuildObjs(mOutputBin, mOutputBuildLog); } + + size_t mOutputBinSize{0}; + char *mOutputBin{nullptr}; + char *mOutputBuildLog{nullptr}; + }; + + struct ScopedProgramCallback : angle::NonCopyable + { + ScopedProgramCallback() = delete; + ScopedProgramCallback(cl::Program *notify) : mNotify(notify) {} + ~ScopedProgramCallback() + { + if (mNotify) + { + mNotify->callback(); + } + } + + cl::Program *mNotify{nullptr}; + }; + + enum class BuildType + { + BUILD = 0, + COMPILE, + LINK, + BINARY + }; + + struct DeviceProgramData + { + std::vector<char> IR; + std::string buildLog; + angle::spirv::Blob binary; + SpvReflectionData reflectionData; + VkPushConstantRange pushConstRange{}; + cl_build_status buildStatus{CL_BUILD_NONE}; + cl_program_binary_type binaryType{CL_PROGRAM_BINARY_TYPE_NONE}; + + size_t numKernels() const { return reflectionData.kernelArgsMap.size(); } + + size_t numKernelArgs(const std::string &kernelName) const + { + return containsKernel(kernelName) ? getKernelArgsMap().at(kernelName).size() : 0; + } + + const CLKernelArgsMap &getKernelArgsMap() const { return reflectionData.kernelArgsMap; } + + bool containsKernel(const std::string &name) const + { + return reflectionData.kernelArgsMap.contains(name); + } + + std::string getKernelNames() const + { + std::string names; + for (auto name = getKernelArgsMap().begin(); name != getKernelArgsMap().end(); ++name) + { + names += name->first + (std::next(name) != getKernelArgsMap().end() ? ";" : "\0"); + } + return names; + } + + CLKernelArguments getKernelArguments(const std::string &kernelName) const + { + CLKernelArguments kargsCopy; + if (containsKernel(kernelName)) + { + const CLKernelArguments &kargs = getKernelArgsMap().at(kernelName); + for (const CLKernelArgument &karg : kargs) + { + kargsCopy.push_back(karg); + } + } + return kargsCopy; + } + + cl::CompiledWorkgroupSize getCompiledWGS(const std::string &kernelName) const + { + cl::CompiledWorkgroupSize compiledWGS{0, 0, 0}; + if (reflectionData.kernelCompileWGS.contains(kernelName)) + { + compiledWGS = reflectionData.kernelCompileWGS.at(kernelName); + } + return compiledWGS; + } + + std::string getKernelAttributes(const std::string &kernelName) const + { + if (containsKernel(kernelName)) + { + return reflectionData.kernelAttributes.at(kernelName.c_str()); + } + return std::string{}; + } + }; + using DevicePrograms = angle::HashMap<const _cl_device_id *, DeviceProgramData>; + using DeviceProgramDatas = std::vector<const DeviceProgramData *>; + CLProgramVk(const cl::Program &program); + ~CLProgramVk() override; + angle::Result init(); + angle::Result init(const size_t *lengths, const unsigned char **binaries, cl_int *binaryStatus); + angle::Result build(const cl::DevicePtrs &devices, const char *options, cl::Program *notify) override; @@ -49,6 +188,27 @@ class CLProgramVk : public CLProgramImpl angle::Result createKernels(cl_uint numKernels, CLKernelImpl::CreateFuncs &createFuncs, cl_uint *numKernelsRet) override; + + const DeviceProgramData *getDeviceProgramData(const char *kernelName) const; + const DeviceProgramData *getDeviceProgramData(const _cl_device_id *device) const; + + bool buildInternal(const cl::DevicePtrs &devices, + std::string options, + std::string internalOptions, + BuildType buildType, + const DeviceProgramDatas &inputProgramDatas); + angle::spirv::Blob stripReflection(const DeviceProgramData *deviceProgramData); + + private: + CLContextVk *mContext; + std::string mProgramOpts; + DevicePrograms mAssociatedDevicePrograms; + PipelineLayoutCache mPipelineLayoutCache; + vk::MetaDescriptorPool mMetaDescriptorPool; + DescriptorSetLayoutCache mDescSetLayoutCache; + vk::DescriptorSetLayoutPointerArray mDescriptorSetLayouts; + vk::DescriptorSetArray<vk::DescriptorPoolPointer> mDescriptorPools; + std::mutex mProgramMutex; }; } // namespace rx diff --git a/src/libANGLE/renderer/vulkan/ContextVk.cpp b/src/libANGLE/renderer/vulkan/ContextVk.cpp index d063eb7760..0c7a7f0ef3 100644 --- a/src/libANGLE/renderer/vulkan/ContextVk.cpp +++ b/src/libANGLE/renderer/vulkan/ContextVk.cpp @@ -8596,17 +8596,31 @@ angle::Result ContextVk::onResourceAccess(const vk::CommandBufferAccess &access) mOutsideRenderPassCommands->retainResource(imageAccess.image); } - for (const vk::CommandBufferImageWrite &imageWrite : access.getWriteImages()) + for (const vk::CommandBufferImageSubresourceAccess &imageReadAccess : + access.getReadImageSubresources()) { - ASSERT(!isRenderPassStartedAndUsesImage(*imageWrite.access.image)); + vk::ImageHelper *image = imageReadAccess.access.image; + ASSERT(!isRenderPassStartedAndUsesImage(*image)); - imageWrite.access.image->recordWriteBarrier(this, imageWrite.access.aspectFlags, - imageWrite.access.imageLayout, - mOutsideRenderPassCommands); - mOutsideRenderPassCommands->retainResource(imageWrite.access.image); - imageWrite.access.image->onWrite(imageWrite.levelStart, imageWrite.levelCount, - imageWrite.layerStart, imageWrite.layerCount, - imageWrite.access.aspectFlags); + image->recordReadSubresourceBarrier( + this, imageReadAccess.access.aspectFlags, imageReadAccess.access.imageLayout, + imageReadAccess.levelStart, imageReadAccess.levelCount, imageReadAccess.layerStart, + imageReadAccess.layerCount, mOutsideRenderPassCommands); + mOutsideRenderPassCommands->retainResource(image); + } + + for (const vk::CommandBufferImageSubresourceAccess &imageWrite : access.getWriteImages()) + { + vk::ImageHelper *image = imageWrite.access.image; + ASSERT(!isRenderPassStartedAndUsesImage(*image)); + + image->recordWriteBarrier(this, imageWrite.access.aspectFlags, + imageWrite.access.imageLayout, imageWrite.levelStart, + imageWrite.levelCount, imageWrite.layerStart, + imageWrite.layerCount, mOutsideRenderPassCommands); + mOutsideRenderPassCommands->retainResource(image); + image->onWrite(imageWrite.levelStart, imageWrite.levelCount, imageWrite.layerStart, + imageWrite.layerCount, imageWrite.access.aspectFlags); } for (const vk::CommandBufferBufferAccess &bufferAccess : access.getReadBuffers()) @@ -8663,8 +8677,20 @@ angle::Result ContextVk::flushCommandBuffersIfNecessary(const vk::CommandBufferA } } + // In cases where the image has both read and write permissions, the render pass should be + // closed if there is a read from a previously written subresource (in a specific level/layer), + // or a write to a previously read one. + for (const vk::CommandBufferImageSubresourceAccess &imageSubresourceAccess : + access.getReadImageSubresources()) + { + if (isRenderPassStartedAndUsesImage(*imageSubresourceAccess.access.image)) + { + return flushCommandsAndEndRenderPass(RenderPassClosureReason::ImageUseThenOutOfRPRead); + } + } + // Write images only need to close the render pass if they need a layout transition. - for (const vk::CommandBufferImageWrite &imageWrite : access.getWriteImages()) + for (const vk::CommandBufferImageSubresourceAccess &imageWrite : access.getWriteImages()) { if (isRenderPassStartedAndUsesImage(*imageWrite.access.image)) { diff --git a/src/libANGLE/renderer/vulkan/RendererVk.cpp b/src/libANGLE/renderer/vulkan/RendererVk.cpp index 4da4dec02b..eb2b63934d 100644 --- a/src/libANGLE/renderer/vulkan/RendererVk.cpp +++ b/src/libANGLE/renderer/vulkan/RendererVk.cpp @@ -137,22 +137,6 @@ bool IsQualcommOpenSource(uint32_t vendorId, uint32_t driverId, const char *devi return strstr(deviceName, "Venus") != nullptr || strstr(deviceName, "Turnip") != nullptr; } -bool IsPixel() -{ - if (!IsAndroid()) - { - return false; - } - - angle::SystemInfo info; - if (!angle::GetSystemInfo(&info)) - { - return false; - } - - return strstr(info.machineModelName.c_str(), "Pixel") != nullptr; -} - angle::vk::ICD ChooseICDFromAttribs(const egl::AttributeMap &attribs) { #if !defined(ANGLE_PLATFORM_ANDROID) @@ -296,6 +280,7 @@ constexpr const char *kSkippedMessages[] = { // https://issuetracker.google.com/319228278 "VUID-vkCmdDrawIndexed-format-07753", "VUID-vkCmdDraw-format-07753", + "Undefined-Value-ShaderFragmentOutputMismatch", }; // Validation messages that should be ignored only when VK_EXT_primitive_topology_list_restart is @@ -1333,28 +1318,41 @@ constexpr char kEnableDebugMarkersPropertyName[] = "debug.angle.markers"; ANGLE_INLINE gl::ShadingRate GetShadingRateFromVkExtent(const VkExtent2D &extent) { - if (extent.width == 1 && extent.height == 2) - { - return gl::ShadingRate::_1x2; - } - else if (extent.width == 2 && extent.height == 1) - { - return gl::ShadingRate::_2x1; - } - else if (extent.width == 2 && extent.height == 2) + if (extent.width == 1) { - return gl::ShadingRate::_2x2; + if (extent.height == 1) + { + return gl::ShadingRate::_1x1; + } + else if (extent.height == 2) + { + return gl::ShadingRate::_1x2; + } } - else if (extent.width == 4 && extent.height == 2) + else if (extent.width == 2) { - return gl::ShadingRate::_4x2; + if (extent.height == 1) + { + return gl::ShadingRate::_2x1; + } + else if (extent.height == 2) + { + return gl::ShadingRate::_2x2; + } } - else if (extent.width == 4 && extent.height == 4) + else if (extent.width == 4) { - return gl::ShadingRate::_4x4; + if (extent.height == 2) + { + return gl::ShadingRate::_4x2; + } + else if (extent.height == 4) + { + return gl::ShadingRate::_4x4; + } } - return gl::ShadingRate::_1x1; + return gl::ShadingRate::Undefined; } } // namespace @@ -2258,6 +2256,7 @@ void RendererVk::appendDeviceExtensionFeaturesNotPromoted( if (ExtensionFound(VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME, deviceExtensionNames)) { vk::AddToPNextChain(deviceFeatures, &mFragmentShadingRateFeatures); + vk::AddToPNextChain(deviceProperties, &mFragmentShadingRateProperties); } if (ExtensionFound(VK_EXT_FRAGMENT_SHADER_INTERLOCK_EXTENSION_NAME, deviceExtensionNames)) @@ -2591,6 +2590,10 @@ void RendererVk::queryDeviceExtensionFeatures(const vk::ExtensionNameList &devic mFragmentShadingRateFeatures.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR; + mFragmentShadingRateProperties = {}; + mFragmentShadingRateProperties.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR; + mFragmentShaderInterlockFeatures = {}; mFragmentShaderInterlockFeatures.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_INTERLOCK_FEATURES_EXT; @@ -3835,21 +3838,8 @@ uint32_t RendererVk::getDeviceVersion() return mDeviceVersion == 0 ? mInstanceVersion : mDeviceVersion; } -bool RendererVk::canSupportFragmentShadingRate(const vk::ExtensionNameList &deviceExtensionNames) +void RendererVk::queryAndCacheFragmentShadingRates() { - // VK_KHR_create_renderpass2 is required for VK_KHR_fragment_shading_rate - if (!mFeatures.supportsRenderpass2.enabled) - { - return false; - } - - // Device needs to support VK_KHR_fragment_shading_rate and specifically - // pipeline fragment shading rate. - if (mFragmentShadingRateFeatures.pipelineFragmentShadingRate != VK_TRUE) - { - return false; - } - // Init required functions #if !defined(ANGLE_SHARED_LIBVULKAN) InitFragmentShadingRateKHRInstanceFunction(mInstance); @@ -3874,15 +3864,36 @@ bool RendererVk::canSupportFragmentShadingRate(const vk::ExtensionNameList &devi // Cache supported fragment shading rates mSupportedFragmentShadingRates.reset(); + mSupportedFragmentShadingRateSampleCounts.fill(0u); for (const VkPhysicalDeviceFragmentShadingRateKHR &shadingRate : shadingRates) { if (shadingRate.sampleCounts == 0) { continue; } - mSupportedFragmentShadingRates.set(GetShadingRateFromVkExtent(shadingRate.fragmentSize)); + const gl::ShadingRate rate = GetShadingRateFromVkExtent(shadingRate.fragmentSize); + mSupportedFragmentShadingRates.set(rate); + mSupportedFragmentShadingRateSampleCounts[rate] = shadingRate.sampleCounts; + } +} + +bool RendererVk::canSupportFragmentShadingRate() const +{ + // VK_KHR_create_renderpass2 is required for VK_KHR_fragment_shading_rate + if (!mFeatures.supportsRenderpass2.enabled) + { + return false; + } + + // Device needs to support VK_KHR_fragment_shading_rate and specifically + // pipeline fragment shading rate. + if (mFragmentShadingRateFeatures.pipelineFragmentShadingRate != VK_TRUE) + { + return false; } + ASSERT(mSupportedFragmentShadingRates.any()); + // To implement GL_QCOM_shading_rate extension the Vulkan ICD needs to support at least the // following shading rates - // {1, 1} @@ -3895,6 +3906,38 @@ bool RendererVk::canSupportFragmentShadingRate(const vk::ExtensionNameList &devi mSupportedFragmentShadingRates.test(gl::ShadingRate::_2x2); } +bool RendererVk::canSupportFoveatedRendering() const +{ + // Device needs to support attachment fragment shading rate. + if (mFragmentShadingRateFeatures.attachmentFragmentShadingRate != VK_TRUE) + { + return false; + } + + ASSERT(mSupportedFragmentShadingRates.any()); + ASSERT(!mSupportedFragmentShadingRateSampleCounts.empty()); + + // To implement QCOM foveated rendering extensions the Vulkan ICD needs to support all sample + // count bits listed in VkPhysicalDeviceLimits::framebufferColorSampleCounts for these shading + // rates - + // {1, 1} + // {1, 2} + // {2, 1} + // {2, 2} + VkSampleCountFlags framebufferSampleCounts = + getPhysicalDeviceProperties().limits.framebufferColorSampleCounts & + vk_gl::kSupportedSampleCounts; + + return (mSupportedFragmentShadingRateSampleCounts[gl::ShadingRate::_1x1] & + framebufferSampleCounts) == framebufferSampleCounts && + (mSupportedFragmentShadingRateSampleCounts[gl::ShadingRate::_1x2] & + framebufferSampleCounts) == framebufferSampleCounts && + (mSupportedFragmentShadingRateSampleCounts[gl::ShadingRate::_2x1] & + framebufferSampleCounts) == framebufferSampleCounts && + (mSupportedFragmentShadingRateSampleCounts[gl::ShadingRate::_2x2] & + framebufferSampleCounts) == framebufferSampleCounts; +} + bool RendererVk::canPreferDeviceLocalMemoryHostVisible(VkPhysicalDeviceType deviceType) { if (deviceType == VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU) @@ -3977,9 +4020,6 @@ void RendererVk::initFeatures(DisplayVk *displayVk, const bool isRADV = IsRADV(mPhysicalDeviceProperties.vendorID, mDriverProperties.driverID, mPhysicalDeviceProperties.deviceName); - // Identify Google Pixel brand Android devices - const bool isPixel = IsPixel(); - angle::VersionInfo nvidiaVersion; if (isNvidia) { @@ -4591,27 +4631,14 @@ void RendererVk::initFeatures(DisplayVk *displayVk, // can root cause it. ANGLE_FEATURE_CONDITION(&mFeatures, requireCachedBitForStagingBuffer, !isARM); - bool dynamicStateWorks = true; - if (isARM) - { - // Multiple dynamic state issues on ARM have been fixed. - // http://issuetracker.google.com/285124778 - // http://issuetracker.google.com/285196249 - // http://issuetracker.google.com/286224923 - // http://issuetracker.google.com/287318431 - - // Use it on drivers/devices known to work. - if (isPixel) - { - // Pixel devices are working after r44 - dynamicStateWorks = armDriverVersion >= ARMDriverVersion(44, 0, 0); - } - else - { - // Others should work after r44p1 - dynamicStateWorks = armDriverVersion >= ARMDriverVersion(44, 1, 0); - } - } + // Multiple dynamic state issues on ARM have been fixed. + // http://issuetracker.google.com/285124778 + // http://issuetracker.google.com/285196249 + // http://issuetracker.google.com/286224923 + // http://issuetracker.google.com/287318431 + // + // On Pixel devices, the issues have been fixed since r44, but on others since r44p1. + const bool isArm44OrLess = isARM && armDriverVersion < ARMDriverVersion(44, 1, 0); // Intel driver has issues with VK_EXT_vertex_input_dynamic_state // http://anglebug.com/7162#c8 @@ -4621,7 +4648,7 @@ void RendererVk::initFeatures(DisplayVk *displayVk, ANGLE_FEATURE_CONDITION( &mFeatures, supportsExtendedDynamicState, - mExtendedDynamicStateFeatures.extendedDynamicState == VK_TRUE && dynamicStateWorks); + mExtendedDynamicStateFeatures.extendedDynamicState == VK_TRUE && !isArm44OrLess); // VK_EXT_vertex_input_dynamic_state enables dynamic state for the full vertex input state. As // such, when available use supportsVertexInputDynamicState instead of @@ -4629,15 +4656,15 @@ void RendererVk::initFeatures(DisplayVk *displayVk, ANGLE_FEATURE_CONDITION(&mFeatures, useVertexInputBindingStrideDynamicState, mFeatures.supportsExtendedDynamicState.enabled && !mFeatures.supportsVertexInputDynamicState.enabled && - dynamicStateWorks); + !isArm44OrLess); ANGLE_FEATURE_CONDITION(&mFeatures, useCullModeDynamicState, - mFeatures.supportsExtendedDynamicState.enabled && dynamicStateWorks); + mFeatures.supportsExtendedDynamicState.enabled && !isArm44OrLess); ANGLE_FEATURE_CONDITION(&mFeatures, useDepthCompareOpDynamicState, mFeatures.supportsExtendedDynamicState.enabled); ANGLE_FEATURE_CONDITION(&mFeatures, useDepthTestEnableDynamicState, mFeatures.supportsExtendedDynamicState.enabled); ANGLE_FEATURE_CONDITION(&mFeatures, useDepthWriteEnableDynamicState, - mFeatures.supportsExtendedDynamicState.enabled && dynamicStateWorks); + mFeatures.supportsExtendedDynamicState.enabled && !isArm44OrLess); ANGLE_FEATURE_CONDITION(&mFeatures, useFrontFaceDynamicState, mFeatures.supportsExtendedDynamicState.enabled); ANGLE_FEATURE_CONDITION(&mFeatures, useStencilOpDynamicState, @@ -4647,10 +4674,10 @@ void RendererVk::initFeatures(DisplayVk *displayVk, ANGLE_FEATURE_CONDITION( &mFeatures, supportsExtendedDynamicState2, - mExtendedDynamicState2Features.extendedDynamicState2 == VK_TRUE && dynamicStateWorks); + mExtendedDynamicState2Features.extendedDynamicState2 == VK_TRUE && !isArm44OrLess); ANGLE_FEATURE_CONDITION(&mFeatures, usePrimitiveRestartEnableDynamicState, - mFeatures.supportsExtendedDynamicState2.enabled && dynamicStateWorks); + mFeatures.supportsExtendedDynamicState2.enabled && !isArm44OrLess); ANGLE_FEATURE_CONDITION(&mFeatures, useRasterizerDiscardEnableDynamicState, mFeatures.supportsExtendedDynamicState2.enabled); ANGLE_FEATURE_CONDITION(&mFeatures, useDepthBiasEnableDynamicState, @@ -4667,21 +4694,33 @@ void RendererVk::initFeatures(DisplayVk *displayVk, mExtendedDynamicState2Features.extendedDynamicState2LogicOp == VK_TRUE && !(IsLinux() && isIntel && isMesaLessThan22_2) && !(IsAndroid() && isGalaxyS23)); + // Samsung Vulkan driver crashes in vkCmdClearAttachments() when imageless Framebuffer + // is used to begin Secondary Command Buffer before the corresponding vkCmdBeginRenderPass(). + ANGLE_FEATURE_CONDITION(&mFeatures, supportsImagelessFramebuffer, + mImagelessFramebufferFeatures.imagelessFramebuffer == VK_TRUE && + (vk::RenderPassCommandBuffer::ExecutesInline() || !isSamsung)); + + if (ExtensionFound(VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME, deviceExtensionNames)) + { + queryAndCacheFragmentShadingRates(); + } + // Support GL_QCOM_shading_rate extension ANGLE_FEATURE_CONDITION(&mFeatures, supportsFragmentShadingRate, - canSupportFragmentShadingRate(deviceExtensionNames)); + canSupportFragmentShadingRate()); + + // Support QCOM foveated rendering extensions. + // Gated on imageless framebuffer to reduce code complexity + ANGLE_FEATURE_CONDITION(&mFeatures, supportsFoveatedRendering, + mFeatures.supportsImagelessFramebuffer.enabled && + mFeatures.supportsFragmentShadingRate.enabled && + canSupportFoveatedRendering()); // We can use the interlock to support GL_ANGLE_shader_pixel_local_storage_coherent. ANGLE_FEATURE_CONDITION( &mFeatures, supportsFragmentShaderPixelInterlock, mFragmentShaderInterlockFeatures.fragmentShaderPixelInterlock == VK_TRUE); - // Samsung Vulkan driver crashes in vkCmdClearAttachments() when imageless Framebuffer - // is used to begin Secondary Command Buffer before the corresponding vkCmdBeginRenderPass(). - ANGLE_FEATURE_CONDITION(&mFeatures, supportsImagelessFramebuffer, - mImagelessFramebufferFeatures.imagelessFramebuffer == VK_TRUE && - (vk::RenderPassCommandBuffer::ExecutesInline() || !isSamsung)); - // The VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT behavior is used by // ANGLE, which requires the robustBufferAccess feature to be available. ANGLE_FEATURE_CONDITION(&mFeatures, supportsPipelineRobustness, diff --git a/src/libANGLE/renderer/vulkan/RendererVk.h b/src/libANGLE/renderer/vulkan/RendererVk.h index a2afb45b67..e19d64e041 100644 --- a/src/libANGLE/renderer/vulkan/RendererVk.h +++ b/src/libANGLE/renderer/vulkan/RendererVk.h @@ -555,6 +555,12 @@ class RendererVk : angle::NonCopyable return mSupportedFragmentShadingRates.test(shadingRate); } + VkExtent2D getMaxFragmentShadingRateAttachmentTexelSize() const + { + ASSERT(mFeatures.supportsFoveatedRendering.enabled); + return mFragmentShadingRateProperties.maxFragmentShadingRateAttachmentTexelSize; + } + void addBufferBlockToOrphanList(vk::BufferBlock *block) { mOrphanedBufferBlockList.add(block); } VkDeviceSize getSuballocationDestroyedSize() const @@ -768,7 +774,11 @@ class RendererVk : angle::NonCopyable angle::Result initializeMemoryAllocator(DisplayVk *displayVk); // Query and cache supported fragment shading rates - bool canSupportFragmentShadingRate(const vk::ExtensionNameList &deviceExtensionNames); + void queryAndCacheFragmentShadingRates(); + // Determine support for shading rate based rendering + bool canSupportFragmentShadingRate() const; + // Determine support for foveated rendering + bool canSupportFoveatedRendering() const; // Prefer host visible device local via device local based on device type and heap size. bool canPreferDeviceLocalMemoryHostVisible(VkPhysicalDeviceType deviceType); @@ -863,6 +873,7 @@ class RendererVk : angle::NonCopyable VkPhysicalDeviceGraphicsPipelineLibraryPropertiesEXT mGraphicsPipelineLibraryProperties; VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT mVertexInputDynamicStateFeatures; VkPhysicalDeviceFragmentShadingRateFeaturesKHR mFragmentShadingRateFeatures; + VkPhysicalDeviceFragmentShadingRatePropertiesKHR mFragmentShadingRateProperties; VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT mFragmentShaderInterlockFeatures; VkPhysicalDeviceImagelessFramebufferFeaturesKHR mImagelessFramebufferFeatures; VkPhysicalDevicePipelineRobustnessFeaturesEXT mPipelineRobustnessFeatures; @@ -883,6 +894,8 @@ class RendererVk : angle::NonCopyable #endif angle::PackedEnumBitSet<gl::ShadingRate, uint8_t> mSupportedFragmentShadingRates; + angle::PackedEnumMap<gl::ShadingRate, VkSampleCountFlags> + mSupportedFragmentShadingRateSampleCounts; std::vector<VkQueueFamilyProperties> mQueueFamilyProperties; uint32_t mMaxVertexAttribDivisor; uint32_t mCurrentQueueFamilyIndex; diff --git a/src/libANGLE/renderer/vulkan/ShareGroupVk.cpp b/src/libANGLE/renderer/vulkan/ShareGroupVk.cpp index 8263e6725d..1ba9691f96 100644 --- a/src/libANGLE/renderer/vulkan/ShareGroupVk.cpp +++ b/src/libANGLE/renderer/vulkan/ShareGroupVk.cpp @@ -28,13 +28,6 @@ namespace rx namespace { -// For DesciptorSetUpdates -constexpr size_t kDescriptorBufferInfosInitialSize = 8; -constexpr size_t kDescriptorImageInfosInitialSize = 4; -constexpr size_t kDescriptorWriteInfosInitialSize = - kDescriptorBufferInfosInitialSize + kDescriptorImageInfosInitialSize; -constexpr size_t kDescriptorBufferViewsInitialSize = 0; - // How often monolithic pipelines should be created, if preferMonolithicPipelinesOverLibraries is // enabled. Pipeline creation is typically O(hundreds of microseconds). A value of 2ms is chosen // arbitrarily; it ensures that there is always at most a single pipeline job in progress, while @@ -289,103 +282,6 @@ void TextureUpload::onTextureRelease(TextureVk *textureVk) } } -// UpdateDescriptorSetsBuilder implementation. -UpdateDescriptorSetsBuilder::UpdateDescriptorSetsBuilder() -{ - // Reserve reasonable amount of spaces so that for majority of apps we don't need to grow at all - mDescriptorBufferInfos.reserve(kDescriptorBufferInfosInitialSize); - mDescriptorImageInfos.reserve(kDescriptorImageInfosInitialSize); - mWriteDescriptorSets.reserve(kDescriptorWriteInfosInitialSize); - mBufferViews.reserve(kDescriptorBufferViewsInitialSize); -} - -UpdateDescriptorSetsBuilder::~UpdateDescriptorSetsBuilder() = default; - -template <typename T, const T *VkWriteDescriptorSet::*pInfo> -void UpdateDescriptorSetsBuilder::growDescriptorCapacity(std::vector<T> *descriptorVector, - size_t newSize) -{ - const T *const oldInfoStart = descriptorVector->empty() ? nullptr : &(*descriptorVector)[0]; - size_t newCapacity = std::max(descriptorVector->capacity() << 1, newSize); - descriptorVector->reserve(newCapacity); - - if (oldInfoStart) - { - // patch mWriteInfo with new BufferInfo/ImageInfo pointers - for (VkWriteDescriptorSet &set : mWriteDescriptorSets) - { - if (set.*pInfo) - { - size_t index = set.*pInfo - oldInfoStart; - set.*pInfo = &(*descriptorVector)[index]; - } - } - } -} - -template <typename T, const T *VkWriteDescriptorSet::*pInfo> -T *UpdateDescriptorSetsBuilder::allocDescriptorInfos(std::vector<T> *descriptorVector, size_t count) -{ - size_t oldSize = descriptorVector->size(); - size_t newSize = oldSize + count; - if (newSize > descriptorVector->capacity()) - { - // If we have reached capacity, grow the storage and patch the descriptor set with new - // buffer info pointer - growDescriptorCapacity<T, pInfo>(descriptorVector, newSize); - } - descriptorVector->resize(newSize); - return &(*descriptorVector)[oldSize]; -} - -VkDescriptorBufferInfo *UpdateDescriptorSetsBuilder::allocDescriptorBufferInfos(size_t count) -{ - return allocDescriptorInfos<VkDescriptorBufferInfo, &VkWriteDescriptorSet::pBufferInfo>( - &mDescriptorBufferInfos, count); -} - -VkDescriptorImageInfo *UpdateDescriptorSetsBuilder::allocDescriptorImageInfos(size_t count) -{ - return allocDescriptorInfos<VkDescriptorImageInfo, &VkWriteDescriptorSet::pImageInfo>( - &mDescriptorImageInfos, count); -} - -VkWriteDescriptorSet *UpdateDescriptorSetsBuilder::allocWriteDescriptorSets(size_t count) -{ - size_t oldSize = mWriteDescriptorSets.size(); - size_t newSize = oldSize + count; - mWriteDescriptorSets.resize(newSize); - return &mWriteDescriptorSets[oldSize]; -} - -VkBufferView *UpdateDescriptorSetsBuilder::allocBufferViews(size_t count) -{ - return allocDescriptorInfos<VkBufferView, &VkWriteDescriptorSet::pTexelBufferView>( - &mBufferViews, count); -} - -uint32_t UpdateDescriptorSetsBuilder::flushDescriptorSetUpdates(VkDevice device) -{ - if (mWriteDescriptorSets.empty()) - { - ASSERT(mDescriptorBufferInfos.empty()); - ASSERT(mDescriptorImageInfos.empty()); - return 0; - } - - vkUpdateDescriptorSets(device, static_cast<uint32_t>(mWriteDescriptorSets.size()), - mWriteDescriptorSets.data(), 0, nullptr); - - uint32_t retVal = static_cast<uint32_t>(mWriteDescriptorSets.size()); - - mWriteDescriptorSets.clear(); - mDescriptorBufferInfos.clear(); - mDescriptorImageInfos.clear(); - mBufferViews.clear(); - - return retVal; -} - vk::BufferPool *ShareGroupVk::getDefaultBufferPool(RendererVk *renderer, VkDeviceSize size, uint32_t memoryTypeIndex, diff --git a/src/libANGLE/renderer/vulkan/ShareGroupVk.h b/src/libANGLE/renderer/vulkan/ShareGroupVk.h index 071ad9c154..4169f4fa0e 100644 --- a/src/libANGLE/renderer/vulkan/ShareGroupVk.h +++ b/src/libANGLE/renderer/vulkan/ShareGroupVk.h @@ -36,37 +36,6 @@ class TextureUpload TextureVk *mPrevUploadedMutableTexture; }; -class UpdateDescriptorSetsBuilder final : angle::NonCopyable -{ - public: - UpdateDescriptorSetsBuilder(); - ~UpdateDescriptorSetsBuilder(); - - VkDescriptorBufferInfo *allocDescriptorBufferInfos(size_t count); - VkDescriptorImageInfo *allocDescriptorImageInfos(size_t count); - VkWriteDescriptorSet *allocWriteDescriptorSets(size_t count); - VkBufferView *allocBufferViews(size_t count); - - VkDescriptorBufferInfo &allocDescriptorBufferInfo() { return *allocDescriptorBufferInfos(1); } - VkDescriptorImageInfo &allocDescriptorImageInfo() { return *allocDescriptorImageInfos(1); } - VkWriteDescriptorSet &allocWriteDescriptorSet() { return *allocWriteDescriptorSets(1); } - VkBufferView &allocBufferView() { return *allocBufferViews(1); } - - // Returns the number of written descriptor sets. - uint32_t flushDescriptorSetUpdates(VkDevice device); - - private: - template <typename T, const T *VkWriteDescriptorSet::*pInfo> - T *allocDescriptorInfos(std::vector<T> *descriptorVector, size_t count); - template <typename T, const T *VkWriteDescriptorSet::*pInfo> - void growDescriptorCapacity(std::vector<T> *descriptorVector, size_t newSize); - - std::vector<VkDescriptorBufferInfo> mDescriptorBufferInfos; - std::vector<VkDescriptorImageInfo> mDescriptorImageInfos; - std::vector<VkWriteDescriptorSet> mWriteDescriptorSets; - std::vector<VkBufferView> mBufferViews; -}; - class ShareGroupVk : public ShareGroupImpl { public: diff --git a/src/libANGLE/renderer/vulkan/TextureVk.cpp b/src/libANGLE/renderer/vulkan/TextureVk.cpp index 68705e6942..d05dafdbb8 100644 --- a/src/libANGLE/renderer/vulkan/TextureVk.cpp +++ b/src/libANGLE/renderer/vulkan/TextureVk.cpp @@ -2254,8 +2254,15 @@ angle::Result TextureVk::generateMipmapsWithCompute(ContextVk *contextVk) { vk::CommandBufferAccess access; + // For mipmap generation, we should make sure that there is no pending write for the source + // mip level. If there is, a barrier should be inserted before the source mip being used. + const vk::LevelIndex srcLevelVk = dstBaseLevelVk - 1; uint32_t writeLevelCount = std::min(maxGenerateLevels.get(), dstMaxLevelVk.get() + 1 - dstBaseLevelVk.get()); + + access.onImageComputeMipmapGenerationRead(mImage->toGLLevel(srcLevelVk), 1, 0, + mImage->getLayerCount(), + VK_IMAGE_ASPECT_COLOR_BIT, mImage); access.onImageComputeShaderWrite(mImage->toGLLevel(dstBaseLevelVk), writeLevelCount, 0, mImage->getLayerCount(), VK_IMAGE_ASPECT_COLOR_BIT, mImage); @@ -2270,7 +2277,6 @@ angle::Result TextureVk::generateMipmapsWithCompute(ContextVk *contextVk) const vk::ImageView *srcView = nullptr; UtilsVk::GenerateMipmapDestLevelViews destLevelViews = {}; - const vk::LevelIndex srcLevelVk = dstBaseLevelVk - 1; ANGLE_TRY(getImageViews().getLevelLayerDrawImageView( contextVk, *mImage, srcLevelVk, layer, gl::SrgbWriteControlMode::Default, &srcView)); diff --git a/src/libANGLE/renderer/vulkan/vk_cache_utils.cpp b/src/libANGLE/renderer/vulkan/vk_cache_utils.cpp index 1030ff52b8..a18ce01066 100644 --- a/src/libANGLE/renderer/vulkan/vk_cache_utils.cpp +++ b/src/libANGLE/renderer/vulkan/vk_cache_utils.cpp @@ -6334,6 +6334,109 @@ void PipelineCacheAccess::merge(RendererVk *renderer, const vk::PipelineCache &p } } // namespace vk +// UpdateDescriptorSetsBuilder implementation. +UpdateDescriptorSetsBuilder::UpdateDescriptorSetsBuilder() +{ + // Reserve reasonable amount of spaces so that for majority of apps we don't need to grow at all + constexpr size_t kDescriptorBufferInfosInitialSize = 8; + constexpr size_t kDescriptorImageInfosInitialSize = 4; + constexpr size_t kDescriptorWriteInfosInitialSize = + kDescriptorBufferInfosInitialSize + kDescriptorImageInfosInitialSize; + constexpr size_t kDescriptorBufferViewsInitialSize = 0; + + mDescriptorBufferInfos.reserve(kDescriptorBufferInfosInitialSize); + mDescriptorImageInfos.reserve(kDescriptorImageInfosInitialSize); + mWriteDescriptorSets.reserve(kDescriptorWriteInfosInitialSize); + mBufferViews.reserve(kDescriptorBufferViewsInitialSize); +} + +UpdateDescriptorSetsBuilder::~UpdateDescriptorSetsBuilder() = default; + +template <typename T, const T *VkWriteDescriptorSet::*pInfo> +void UpdateDescriptorSetsBuilder::growDescriptorCapacity(std::vector<T> *descriptorVector, + size_t newSize) +{ + const T *const oldInfoStart = descriptorVector->empty() ? nullptr : &(*descriptorVector)[0]; + size_t newCapacity = std::max(descriptorVector->capacity() << 1, newSize); + descriptorVector->reserve(newCapacity); + + if (oldInfoStart) + { + // patch mWriteInfo with new BufferInfo/ImageInfo pointers + for (VkWriteDescriptorSet &set : mWriteDescriptorSets) + { + if (set.*pInfo) + { + size_t index = set.*pInfo - oldInfoStart; + set.*pInfo = &(*descriptorVector)[index]; + } + } + } +} + +template <typename T, const T *VkWriteDescriptorSet::*pInfo> +T *UpdateDescriptorSetsBuilder::allocDescriptorInfos(std::vector<T> *descriptorVector, size_t count) +{ + size_t oldSize = descriptorVector->size(); + size_t newSize = oldSize + count; + if (newSize > descriptorVector->capacity()) + { + // If we have reached capacity, grow the storage and patch the descriptor set with new + // buffer info pointer + growDescriptorCapacity<T, pInfo>(descriptorVector, newSize); + } + descriptorVector->resize(newSize); + return &(*descriptorVector)[oldSize]; +} + +VkDescriptorBufferInfo *UpdateDescriptorSetsBuilder::allocDescriptorBufferInfos(size_t count) +{ + return allocDescriptorInfos<VkDescriptorBufferInfo, &VkWriteDescriptorSet::pBufferInfo>( + &mDescriptorBufferInfos, count); +} + +VkDescriptorImageInfo *UpdateDescriptorSetsBuilder::allocDescriptorImageInfos(size_t count) +{ + return allocDescriptorInfos<VkDescriptorImageInfo, &VkWriteDescriptorSet::pImageInfo>( + &mDescriptorImageInfos, count); +} + +VkWriteDescriptorSet *UpdateDescriptorSetsBuilder::allocWriteDescriptorSets(size_t count) +{ + size_t oldSize = mWriteDescriptorSets.size(); + size_t newSize = oldSize + count; + mWriteDescriptorSets.resize(newSize); + return &mWriteDescriptorSets[oldSize]; +} + +VkBufferView *UpdateDescriptorSetsBuilder::allocBufferViews(size_t count) +{ + return allocDescriptorInfos<VkBufferView, &VkWriteDescriptorSet::pTexelBufferView>( + &mBufferViews, count); +} + +uint32_t UpdateDescriptorSetsBuilder::flushDescriptorSetUpdates(VkDevice device) +{ + if (mWriteDescriptorSets.empty()) + { + ASSERT(mDescriptorBufferInfos.empty()); + ASSERT(mDescriptorImageInfos.empty()); + return 0; + } + + vkUpdateDescriptorSets(device, static_cast<uint32_t>(mWriteDescriptorSets.size()), + mWriteDescriptorSets.data(), 0, nullptr); + + uint32_t retVal = static_cast<uint32_t>(mWriteDescriptorSets.size()); + + mWriteDescriptorSets.clear(); + mDescriptorBufferInfos.clear(); + mDescriptorImageInfos.clear(); + mBufferViews.clear(); + + return retVal; +} + // FramebufferCache implementation. void FramebufferCache::destroy(RendererVk *rendererVk) { diff --git a/src/libANGLE/renderer/vulkan/vk_cache_utils.h b/src/libANGLE/renderer/vulkan/vk_cache_utils.h index 554abeb884..fe9c4e2d08 100644 --- a/src/libANGLE/renderer/vulkan/vk_cache_utils.h +++ b/src/libANGLE/renderer/vulkan/vk_cache_utils.h @@ -2657,6 +2657,38 @@ class DescriptorSetCache final : angle::NonCopyable // There is 1 default uniform binding used per stage. constexpr uint32_t kReservedPerStageDefaultUniformBindingCount = 1; + +class UpdateDescriptorSetsBuilder final : angle::NonCopyable +{ + public: + UpdateDescriptorSetsBuilder(); + ~UpdateDescriptorSetsBuilder(); + + VkDescriptorBufferInfo *allocDescriptorBufferInfos(size_t count); + VkDescriptorImageInfo *allocDescriptorImageInfos(size_t count); + VkWriteDescriptorSet *allocWriteDescriptorSets(size_t count); + VkBufferView *allocBufferViews(size_t count); + + VkDescriptorBufferInfo &allocDescriptorBufferInfo() { return *allocDescriptorBufferInfos(1); } + VkDescriptorImageInfo &allocDescriptorImageInfo() { return *allocDescriptorImageInfos(1); } + VkWriteDescriptorSet &allocWriteDescriptorSet() { return *allocWriteDescriptorSets(1); } + VkBufferView &allocBufferView() { return *allocBufferViews(1); } + + // Returns the number of written descriptor sets. + uint32_t flushDescriptorSetUpdates(VkDevice device); + + private: + template <typename T, const T *VkWriteDescriptorSet::*pInfo> + T *allocDescriptorInfos(std::vector<T> *descriptorVector, size_t count); + template <typename T, const T *VkWriteDescriptorSet::*pInfo> + void growDescriptorCapacity(std::vector<T> *descriptorVector, size_t newSize); + + std::vector<VkDescriptorBufferInfo> mDescriptorBufferInfos; + std::vector<VkDescriptorImageInfo> mDescriptorImageInfos; + std::vector<VkWriteDescriptorSet> mWriteDescriptorSets; + std::vector<VkBufferView> mBufferViews; +}; + } // namespace rx #endif // LIBANGLE_RENDERER_VULKAN_VK_CACHE_UTILS_H_ diff --git a/src/libANGLE/renderer/vulkan/vk_helpers.cpp b/src/libANGLE/renderer/vulkan/vk_helpers.cpp index a6cec0f035..aede21b4e7 100644 --- a/src/libANGLE/renderer/vulkan/vk_helpers.cpp +++ b/src/libANGLE/renderer/vulkan/vk_helpers.cpp @@ -1035,6 +1035,15 @@ gl::TexLevelMask AggregateSkipLevels(const gl::CubeFaceArray<gl::TexLevelMask> & } return skipLevelsAllFaces; } + +// Get layer mask for a particular image level. +ImageLayerWriteMask GetImageLayerWriteMask(uint32_t layerStart, uint32_t layerCount) +{ + ImageLayerWriteMask layerMask = angle::BitMask<uint64_t>(layerCount); + uint32_t rotateShift = layerStart % kMaxParallelLayerWrites; + layerMask = (layerMask << rotateShift) | (layerMask >> (kMaxParallelLayerWrites - rotateShift)); + return layerMask; +} } // anonymous namespace // This is an arbitrary max. We can change this later if necessary. @@ -1654,8 +1663,10 @@ void CommandBufferHelperCommon::imageWriteImpl(ContextVk *contextVk, ImageHelper *image) { image->onWrite(level, 1, layerStart, layerCount, aspectFlags); - // Write always requires a barrier - updateImageLayoutAndBarrier(contextVk, image, aspectFlags, imageLayout); + if (image->isWriteBarrierNecessary(imageLayout, level, 1, layerStart, layerCount)) + { + updateImageLayoutAndBarrier(contextVk, image, aspectFlags, imageLayout); + } } void CommandBufferHelperCommon::updateImageLayoutAndBarrier(Context *context, @@ -6580,6 +6591,65 @@ bool ImageHelper::isReadBarrierNecessary(ImageLayout newLayout) const return HasResourceWriteAccess(layoutData.type); } +bool ImageHelper::isReadSubresourceBarrierNecessary(ImageLayout newLayout, + gl::LevelIndex levelStart, + uint32_t levelCount, + uint32_t layerStart, + uint32_t layerCount) const +{ + // In case an image has both read and write permissions, the written subresources since the last + // barrier should be checked to avoid RAW and WAR hazards. However, if a layout change is + // necessary regardless, there is no need to check the written subresources. + if (mCurrentLayout != newLayout) + { + return true; + } + + ImageLayerWriteMask layerMask = GetImageLayerWriteMask(layerStart, layerCount); + for (uint32_t levelOffset = 0; levelOffset < levelCount; levelOffset++) + { + uint32_t level = levelStart.get() + levelOffset; + if (areLevelSubresourcesWrittenWithinMaskRange(level, layerMask)) + { + return true; + } + } + + return false; +} + +bool ImageHelper::isWriteBarrierNecessary(ImageLayout newLayout, + gl::LevelIndex levelStart, + uint32_t levelCount, + uint32_t layerStart, + uint32_t layerCount) const +{ + // If transitioning to a different layout, we need always need a barrier. + if (mCurrentLayout != newLayout) + { + return true; + } + + if (layerCount >= kMaxParallelLayerWrites) + { + return true; + } + + // If we are writing to the same parts of the image (level/layer), we need a barrier. Otherwise, + // it can be done in parallel. + ImageLayerWriteMask layerMask = GetImageLayerWriteMask(layerStart, layerCount); + for (uint32_t levelOffset = 0; levelOffset < levelCount; levelOffset++) + { + uint32_t level = levelStart.get() + levelOffset; + if (areLevelSubresourcesWrittenWithinMaskRange(level, layerMask)) + { + return true; + } + } + + return false; +} + void ImageHelper::changeLayoutAndQueue(Context *context, VkImageAspectFlags aspectMask, ImageLayout newLayout, @@ -6740,6 +6810,7 @@ void ImageHelper::barrierImpl(Context *context, mCurrentLayout = newLayout; mCurrentQueueFamilyIndex = newQueueFamilyIndex; + resetSubresourcesWrittenSinceBarrier(); } template void ImageHelper::barrierImpl<priv::CommandBuffer>( @@ -6750,19 +6821,84 @@ template void ImageHelper::barrierImpl<priv::CommandBuffer>( priv::CommandBuffer *commandBuffer, VkSemaphore *acquireNextImageSemaphoreOut); +void ImageHelper::setSubresourcesWrittenSinceBarrier(gl::LevelIndex levelStart, + uint32_t levelCount, + uint32_t layerStart, + uint32_t layerCount) +{ + for (uint32_t levelOffset = 0; levelOffset < levelCount; levelOffset++) + { + uint32_t level = levelStart.get() + levelOffset; + if (layerCount >= kMaxParallelLayerWrites) + { + mSubresourcesWrittenSinceBarrier[level].set(); + } + else + { + ImageLayerWriteMask layerMask = GetImageLayerWriteMask(layerStart, layerCount); + mSubresourcesWrittenSinceBarrier[level] |= layerMask; + } + } +} + +void ImageHelper::resetSubresourcesWrittenSinceBarrier() +{ + for (auto &layerWriteMask : mSubresourcesWrittenSinceBarrier) + { + layerWriteMask.reset(); + } +} + void ImageHelper::recordWriteBarrier(Context *context, VkImageAspectFlags aspectMask, ImageLayout newLayout, + gl::LevelIndex levelStart, + uint32_t levelCount, + uint32_t layerStart, + uint32_t layerCount, OutsideRenderPassCommandBufferHelper *commands) { - VkSemaphore acquireNextImageSemaphore; - barrierImpl(context, aspectMask, newLayout, context->getRenderer()->getQueueFamilyIndex(), - &commands->getCommandBuffer(), &acquireNextImageSemaphore); + if (isWriteBarrierNecessary(newLayout, levelStart, levelCount, layerStart, layerCount)) + { + VkSemaphore acquireNextImageSemaphore; + barrierImpl(context, aspectMask, newLayout, context->getRenderer()->getQueueFamilyIndex(), + &commands->getCommandBuffer(), &acquireNextImageSemaphore); - if (acquireNextImageSemaphore != VK_NULL_HANDLE) + if (acquireNextImageSemaphore != VK_NULL_HANDLE) + { + commands->setAcquireNextImageSemaphore(acquireNextImageSemaphore); + } + } + + setSubresourcesWrittenSinceBarrier(levelStart, levelCount, layerStart, layerCount); +} + +void ImageHelper::recordReadSubresourceBarrier(Context *context, + VkImageAspectFlags aspectMask, + ImageLayout newLayout, + gl::LevelIndex levelStart, + uint32_t levelCount, + uint32_t layerStart, + uint32_t layerCount, + OutsideRenderPassCommandBufferHelper *commands) +{ + // This barrier is used for an image with both read/write permissions, including during mipmap + // generation and self-copy. + if (isReadSubresourceBarrierNecessary(newLayout, levelStart, levelCount, layerStart, + layerCount)) { - commands->setAcquireNextImageSemaphore(acquireNextImageSemaphore); + VkSemaphore acquireNextImageSemaphore; + barrierImpl(context, aspectMask, newLayout, context->getRenderer()->getQueueFamilyIndex(), + &commands->getCommandBuffer(), &acquireNextImageSemaphore); + + if (acquireNextImageSemaphore != VK_NULL_HANDLE) + { + commands->setAcquireNextImageSemaphore(acquireNextImageSemaphore); + } } + + // Levels/layers being read from are also registered to avoid RAW and WAR hazards. + setSubresourcesWrittenSinceBarrier(levelStart, levelCount, layerStart, layerCount); } void ImageHelper::recordReadBarrier(Context *context, @@ -7091,7 +7227,9 @@ angle::Result ImageHelper::CopyImageSubData(const gl::Context *context, CommandBufferAccess access; if (srcImage == dstImage) { - access.onImageSelfCopy(dstLevelGL, 1, region.dstSubresource.baseArrayLayer, + access.onImageSelfCopy(srcLevelGL, 1, region.srcSubresource.baseArrayLayer, + region.srcSubresource.layerCount, dstLevelGL, 1, + region.dstSubresource.baseArrayLayer, region.dstSubresource.layerCount, aspectFlags, srcImage); } else @@ -8797,15 +8935,6 @@ angle::Result ImageHelper::flushStagedUpdatesImpl(ContextVk *contextVk, const gl::TexLevelMask &skipLevelsAllFaces) { RendererVk *renderer = contextVk->getRenderer(); - // For each level, upload layers that don't conflict in parallel. The layer is hashed to - // `layer % 64` and used to track whether that subresource is currently in transfer. If so, a - // barrier is inserted. If mLayerCount > 64, there will be a few unnecessary barriers. - // - // Note: when a barrier is necessary when uploading updates to a level, we could instead move to - // the next level and continue uploads in parallel. Once all levels need a barrier, a single - // barrier can be issued and we could continue with the rest of the updates from the first - // level. - constexpr uint32_t kMaxParallelSubresourceUpload = 64; const angle::FormatID &actualformat = getActualFormatID(); const angle::FormatID &intendedFormat = getIntendedFormatID(); @@ -8843,9 +8972,6 @@ angle::Result ImageHelper::flushStagedUpdatesImpl(ContextVk *contextVk, std::vector<SubresourceUpdate> updatesToKeep; ASSERT(levelUpdates != nullptr); - // Hash map of uploads in progress. See comment on kMaxParallelSubresourceUpload. - uint64_t subresourceUploadsInProgress = 0; - for (SubresourceUpdate &update : *levelUpdates) { ASSERT(IsClearOfAllChannels(update.updateSource) || @@ -8920,35 +9046,34 @@ angle::Result ImageHelper::flushStagedUpdatesImpl(ContextVk *contextVk, } } - // In case of multiple layer updates within the same level, a barrier might be needed if - // there are multiple updates in the same parts of the image. - if (updateLayerCount >= kMaxParallelSubresourceUpload) + // When a barrier is necessary when uploading updates to a level, we could instead move + // to the next level and continue uploads in parallel. Once all levels need a barrier, + // a single barrier can be issued and we could continue with the rest of the updates + // from the first level. In case of multiple layer updates within the same level, a + // barrier might be needed if there are multiple updates in the same parts of the image. + ImageLayout barrierLayout = + transCoding ? ImageLayout::TransferDstAndComputeWrite : ImageLayout::TransferDst; + if (updateLayerCount >= kMaxParallelLayerWrites) { // If there are more subresources than bits we can track, always insert a barrier. - recordWriteBarrier(contextVk, aspectFlags, - transCoding ? ImageLayout::TransferDstAndComputeWrite - : ImageLayout::TransferDst, - commandBuffer); - subresourceUploadsInProgress = std::numeric_limits<uint64_t>::max(); + recordWriteBarrier(contextVk, aspectFlags, barrierLayout, updateMipLevelGL, 1, + updateBaseLayer, updateLayerCount, commandBuffer); + mSubresourcesWrittenSinceBarrier[updateMipLevelGL.get()].set(); } else { - const uint64_t subresourceHashRange = angle::BitMask<uint64_t>(updateLayerCount); - const uint32_t subresourceHashOffset = - updateBaseLayer % kMaxParallelSubresourceUpload; - const uint64_t subresourceHash = - ANGLE_ROTL64(subresourceHashRange, subresourceHashOffset); + ImageLayerWriteMask subresourceHash = + GetImageLayerWriteMask(updateBaseLayer, updateLayerCount); - if ((subresourceUploadsInProgress & subresourceHash) != 0) + if (areLevelSubresourcesWrittenWithinMaskRange(updateMipLevelGL.get(), + subresourceHash)) { // If there's overlap in subresource upload, issue a barrier. - recordWriteBarrier(contextVk, aspectFlags, - transCoding ? ImageLayout::TransferDstAndComputeWrite - : ImageLayout::TransferDst, - commandBuffer); - subresourceUploadsInProgress = 0; + recordWriteBarrier(contextVk, aspectFlags, barrierLayout, updateMipLevelGL, 1, + updateBaseLayer, updateLayerCount, commandBuffer); + mSubresourcesWrittenSinceBarrier[updateMipLevelGL.get()].reset(); } - subresourceUploadsInProgress |= subresourceHash; + mSubresourcesWrittenSinceBarrier[updateMipLevelGL.get()] |= subresourceHash; } // Add the necessary commands to the outside command buffer. @@ -11477,6 +11602,20 @@ void CommandBufferAccess::onImageWrite(gl::LevelIndex levelStart, levelCount, layerStart, layerCount); } +void CommandBufferAccess::onImageReadSubresources(gl::LevelIndex levelStart, + uint32_t levelCount, + uint32_t layerStart, + uint32_t layerCount, + VkImageAspectFlags aspectFlags, + ImageLayout imageLayout, + ImageHelper *image) +{ + ASSERT(!image->isReleasedToExternal()); + ASSERT(image->getImageSerial().valid()); + mReadImageSubresources.emplace_back(CommandBufferImageAccess{image, aspectFlags, imageLayout}, + levelStart, levelCount, layerStart, layerCount); +} + void CommandBufferAccess::onBufferExternalAcquireRelease(BufferHelper *buffer) { mExternalAcquireReleaseBuffers.emplace_back(CommandBufferBufferExternalAcquireRelease{buffer}); diff --git a/src/libANGLE/renderer/vulkan/vk_helpers.h b/src/libANGLE/renderer/vulkan/vk_helpers.h index 073c1b2e62..19f51af647 100644 --- a/src/libANGLE/renderer/vulkan/vk_helpers.h +++ b/src/libANGLE/renderer/vulkan/vk_helpers.h @@ -51,6 +51,14 @@ constexpr VkPipelineStageFlags kSwapchainAcquireImageWaitStageFlags = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | // First use is a draw command. VK_PIPELINE_STAGE_TRANSFER_BIT; // First use is a clear without scissor. +// For each level, write layers that don't conflict in parallel. The layer is hashed to +// `layer % kMaxParallelLayerWrites` and used to track whether that subresource is currently +// being written. If so, a barrier is inserted; otherwise, the barrier is avoided. If the updated +// layer count is greater than kMaxParallelLayerWrites, there will be a few unnecessary +// barriers. +constexpr uint32_t kMaxParallelLayerWrites = 64; +using ImageLayerWriteMask = std::bitset<kMaxParallelLayerWrites>; + using StagingBufferOffsetArray = std::array<VkDeviceSize, 2>; // A dynamic buffer is conceptually an infinitely long buffer. Each time you write to the buffer, @@ -2317,8 +2325,21 @@ class ImageHelper final : public Resource, public angle::Subject void recordWriteBarrier(Context *context, VkImageAspectFlags aspectMask, ImageLayout newLayout, + gl::LevelIndex levelStart, + uint32_t levelCount, + uint32_t layerStart, + uint32_t layerCount, OutsideRenderPassCommandBufferHelper *commands); + void recordReadSubresourceBarrier(Context *context, + VkImageAspectFlags aspectMask, + ImageLayout newLayout, + gl::LevelIndex levelStart, + uint32_t levelCount, + uint32_t layerStart, + uint32_t layerCount, + OutsideRenderPassCommandBufferHelper *commands); + void recordWriteBarrierOneOff(Context *context, ImageLayout newLayout, PrimaryCommandBuffer *commandBuffer, @@ -2330,6 +2351,16 @@ class ImageHelper final : public Resource, public angle::Subject // This function can be used to prevent issuing redundant layout transition commands. bool isReadBarrierNecessary(ImageLayout newLayout) const; + bool isReadSubresourceBarrierNecessary(ImageLayout newLayout, + gl::LevelIndex levelStart, + uint32_t levelCount, + uint32_t layerStart, + uint32_t layerCount) const; + bool isWriteBarrierNecessary(ImageLayout newLayout, + gl::LevelIndex levelStart, + uint32_t levelCount, + uint32_t layerStart, + uint32_t layerCount) const; void recordReadBarrier(Context *context, VkImageAspectFlags aspectMask, @@ -2648,6 +2679,18 @@ class ImageHelper final : public Resource, public angle::Subject CommandBufferT *commandBuffer, VkSemaphore *acquireNextImageSemaphoreOut); + void setSubresourcesWrittenSinceBarrier(gl::LevelIndex levelStart, + uint32_t levelCount, + uint32_t layerStart, + uint32_t layerCount); + + void resetSubresourcesWrittenSinceBarrier(); + bool areLevelSubresourcesWrittenWithinMaskRange(uint32_t level, + ImageLayerWriteMask &layerMask) const + { + return (mSubresourcesWrittenSinceBarrier[level] & layerMask) != 0; + } + // If the image has emulated channels, we clear them once so as not to leave garbage on those // channels. VkColorComponentFlags getEmulatedChannelsMask() const; @@ -2906,6 +2949,11 @@ class ImageHelper final : public Resource, public angle::Subject // Only used for swapChain images. This is set when an image is acquired and is waited on // by the next submission (which uses this image), at which point it is released. Semaphore mAcquireNextImageSemaphore; + + // Used to track subresource writes per level/layer. This can help parallelize writes to + // different levels or layers of the image, such as data uploads. + // See comment on kMaxParallelLayerWrites. + gl::TexLevelArray<ImageLayerWriteMask> mSubresourcesWrittenSinceBarrier; }; ANGLE_INLINE bool RenderPassCommandBufferHelper::usesImage(const ImageHelper &image) const @@ -3372,7 +3420,7 @@ struct CommandBufferImageAccess VkImageAspectFlags aspectFlags; ImageLayout imageLayout; }; -struct CommandBufferImageWrite +struct CommandBufferImageSubresourceAccess { CommandBufferImageAccess access; gl::LevelIndex levelStart; @@ -3431,13 +3479,19 @@ class CommandBufferAccess : angle::NonCopyable onImageWrite(levelStart, levelCount, layerStart, layerCount, aspectFlags, ImageLayout::TransferDst, image); } - void onImageSelfCopy(gl::LevelIndex writeLevelStart, + void onImageSelfCopy(gl::LevelIndex readLevelStart, + uint32_t readLevelCount, + uint32_t readLayerStart, + uint32_t readLayerCount, + gl::LevelIndex writeLevelStart, uint32_t writeLevelCount, uint32_t writeLayerStart, uint32_t writeLayerCount, VkImageAspectFlags aspectFlags, ImageHelper *image) { + onImageReadSubresources(readLevelStart, readLevelCount, readLayerStart, readLayerCount, + aspectFlags, ImageLayout::TransferSrcDst, image); onImageWrite(writeLevelStart, writeLevelCount, writeLayerStart, writeLayerCount, aspectFlags, ImageLayout::TransferSrcDst, image); } @@ -3445,6 +3499,16 @@ class CommandBufferAccess : angle::NonCopyable { onImageRead(aspectFlags, ImageLayout::ComputeShaderReadOnly, image); } + void onImageComputeMipmapGenerationRead(gl::LevelIndex levelStart, + uint32_t levelCount, + uint32_t layerStart, + uint32_t layerCount, + VkImageAspectFlags aspectFlags, + ImageHelper *image) + { + onImageReadSubresources(levelStart, levelCount, layerStart, layerCount, aspectFlags, + ImageLayout::ComputeShaderWrite, image); + } void onImageComputeShaderWrite(gl::LevelIndex levelStart, uint32_t levelCount, uint32_t layerStart, @@ -3471,10 +3535,12 @@ class CommandBufferAccess : angle::NonCopyable // The limits reflect the current maximum concurrent usage of each resource type. ASSERTs will // fire if this limit is exceeded in the future. - using ReadBuffers = angle::FixedVector<CommandBufferBufferAccess, 2>; - using WriteBuffers = angle::FixedVector<CommandBufferBufferAccess, 2>; - using ReadImages = angle::FixedVector<CommandBufferImageAccess, 2>; - using WriteImages = angle::FixedVector<CommandBufferImageWrite, 1>; + using ReadBuffers = angle::FixedVector<CommandBufferBufferAccess, 2>; + using WriteBuffers = angle::FixedVector<CommandBufferBufferAccess, 2>; + using ReadImages = angle::FixedVector<CommandBufferImageAccess, 2>; + using WriteImages = angle::FixedVector<CommandBufferImageSubresourceAccess, 1>; + using ReadImageSubresources = angle::FixedVector<CommandBufferImageSubresourceAccess, 1>; + using ExternalAcquireReleaseBuffers = angle::FixedVector<CommandBufferBufferExternalAcquireRelease, 1>; using AccessResources = angle::FixedVector<CommandBufferResourceAccess, 1>; @@ -3483,6 +3549,7 @@ class CommandBufferAccess : angle::NonCopyable const WriteBuffers &getWriteBuffers() const { return mWriteBuffers; } const ReadImages &getReadImages() const { return mReadImages; } const WriteImages &getWriteImages() const { return mWriteImages; } + const ReadImageSubresources &getReadImageSubresources() const { return mReadImageSubresources; } const ExternalAcquireReleaseBuffers &getExternalAcquireReleaseBuffers() const { return mExternalAcquireReleaseBuffers; @@ -3503,12 +3570,22 @@ class CommandBufferAccess : angle::NonCopyable VkImageAspectFlags aspectFlags, ImageLayout imageLayout, ImageHelper *image); + + void onImageReadSubresources(gl::LevelIndex levelStart, + uint32_t levelCount, + uint32_t layerStart, + uint32_t layerCount, + VkImageAspectFlags aspectFlags, + ImageLayout imageLayout, + ImageHelper *image); + void onResourceAccess(Resource *resource); ReadBuffers mReadBuffers; WriteBuffers mWriteBuffers; ReadImages mReadImages; WriteImages mWriteImages; + ReadImageSubresources mReadImageSubresources; ExternalAcquireReleaseBuffers mExternalAcquireReleaseBuffers; AccessResources mAccessResources; }; diff --git a/src/libANGLE/validationCL.cpp b/src/libANGLE/validationCL.cpp index 9d12a3f0b4..e4675cb18d 100644 --- a/src/libANGLE/validationCL.cpp +++ b/src/libANGLE/validationCL.cpp @@ -1163,6 +1163,37 @@ cl_int ValidateBuildProgram(cl_program program, return CL_INVALID_OPERATION; } + // If program was created with clCreateProgramWithBinary and device does not have a valid + // program binary loaded + std::vector<size_t> binSizes{prog.getDevices().size()}; + std::vector<std::vector<unsigned char *>> bins{prog.getDevices().size()}; + if (IsError(prog.getInfo(ProgramInfo::BinarySizes, binSizes.size() * sizeof(size_t), + binSizes.data(), nullptr))) + { + return CL_INVALID_PROGRAM; + } + for (size_t i = 0; i < prog.getDevices().size(); ++i) + { + cl_program_binary_type binType; + bins.at(i).resize(binSizes[i]); + + if (IsError(prog.getInfo(ProgramInfo::Binaries, sizeof(unsigned char *) * bins.size(), + bins.data(), nullptr))) + { + return CL_INVALID_VALUE; + } + if (IsError(prog.getBuildInfo(prog.getDevices()[i]->getNative(), + ProgramBuildInfo::BinaryType, sizeof(cl_program_binary_type), + &binType, nullptr))) + { + return CL_INVALID_VALUE; + } + if ((binType != CL_PROGRAM_BINARY_TYPE_NONE) && bins[i].empty()) + { + return CL_INVALID_BINARY; + } + } + return CL_SUCCESS; } @@ -1201,6 +1232,18 @@ cl_int ValidateGetProgramInfo(cl_program program, break; } + // CL_INVALID_VALUE if size in bytes specified by param_value_size is < size of return type + // as described in the Program Object Queries table and param_value is not NULL. + if (param_value != nullptr) + { + size_t valueSizeRet = 0; + if (IsError(prog.getInfo(param_name, 0, nullptr, &valueSizeRet)) || + param_value_size < valueSizeRet) + { + return CL_INVALID_VALUE; + } + } + return CL_SUCCESS; } @@ -1241,6 +1284,18 @@ cl_int ValidateGetProgramBuildInfo(cl_program program, break; } + // CL_INVALID_VALUE if size in bytes specified by param_value_size is < size of return type + // as described in the Program Object Queries table and param_value is not NULL. + if (param_value != nullptr) + { + size_t valueSizeRet = 0; + if (IsError(prog.getBuildInfo(device, param_name, 0, nullptr, &valueSizeRet)) || + param_value_size < valueSizeRet) + { + return CL_INVALID_VALUE; + } + } + return CL_SUCCESS; } |