aboutsummaryrefslogtreecommitdiff
path: root/src/libANGLE
diff options
context:
space:
mode:
Diffstat (limited to 'src/libANGLE')
-rw-r--r--src/libANGLE/CLPlatform.cpp3
-rw-r--r--src/libANGLE/CLPlatform.h11
-rw-r--r--src/libANGLE/CLProgram.h19
-rw-r--r--src/libANGLE/Display.h1
-rw-r--r--src/libANGLE/capture/FrameCapture.cpp275
-rw-r--r--src/libANGLE/capture/FrameCapture.h26
-rw-r--r--src/libANGLE/cl_types.h2
-rw-r--r--src/libANGLE/renderer/CLKernelImpl.h4
-rw-r--r--src/libANGLE/renderer/vulkan/BUILD.gn13
-rw-r--r--src/libANGLE/renderer/vulkan/CLContextVk.cpp23
-rw-r--r--src/libANGLE/renderer/vulkan/CLKernelVk.h45
-rw-r--r--src/libANGLE/renderer/vulkan/CLProgramVk.cpp750
-rw-r--r--src/libANGLE/renderer/vulkan/CLProgramVk.h160
-rw-r--r--src/libANGLE/renderer/vulkan/ContextVk.cpp46
-rw-r--r--src/libANGLE/renderer/vulkan/RendererVk.cpp205
-rw-r--r--src/libANGLE/renderer/vulkan/RendererVk.h15
-rw-r--r--src/libANGLE/renderer/vulkan/ShareGroupVk.cpp104
-rw-r--r--src/libANGLE/renderer/vulkan/ShareGroupVk.h31
-rw-r--r--src/libANGLE/renderer/vulkan/TextureVk.cpp8
-rw-r--r--src/libANGLE/renderer/vulkan/vk_cache_utils.cpp103
-rw-r--r--src/libANGLE/renderer/vulkan/vk_cache_utils.h32
-rw-r--r--src/libANGLE/renderer/vulkan/vk_helpers.cpp219
-rw-r--r--src/libANGLE/renderer/vulkan/vk_helpers.h89
-rw-r--r--src/libANGLE/validationCL.cpp55
24 files changed, 1868 insertions, 371 deletions
diff --git a/src/libANGLE/CLPlatform.cpp b/src/libANGLE/CLPlatform.cpp
index 9997f51dec..29c5db1ecb 100644
--- a/src/libANGLE/CLPlatform.cpp
+++ b/src/libANGLE/CLPlatform.cpp
@@ -254,7 +254,8 @@ Platform::~Platform() = default;
Platform::Platform(const rx::CLPlatformImpl::CreateFunc &createFunc)
: mImpl(createFunc(*this)),
mInfo(mImpl->createInfo()),
- mDevices(createDevices(mImpl->createDevices()))
+ mDevices(createDevices(mImpl->createDevices())),
+ mMultiThreadPool(angle::WorkerThreadPool::Create(0, ANGLEPlatformCurrent()))
{}
DevicePtrs Platform::createDevices(rx::CLDeviceImpl::CreateDatas &&createDatas)
diff --git a/src/libANGLE/CLPlatform.h b/src/libANGLE/CLPlatform.h
index e19ed2a072..7540b96e28 100644
--- a/src/libANGLE/CLPlatform.h
+++ b/src/libANGLE/CLPlatform.h
@@ -10,6 +10,9 @@
#define LIBANGLE_CLPLATFORM_H_
#include "libANGLE/CLObject.h"
+
+#include "common/WorkerThread.h"
+
#include "libANGLE/renderer/CLPlatformImpl.h"
#include "anglebase/no_destructor.h"
@@ -71,6 +74,8 @@ class Platform final : public _cl_platform_id, public Object
static constexpr const char *GetVendor();
+ const std::shared_ptr<angle::WorkerThreadPool> &getMultiThreadPool() const;
+
private:
explicit Platform(const rx::CLPlatformImpl::CreateFunc &createFunc);
@@ -81,6 +86,7 @@ class Platform final : public _cl_platform_id, public Object
const rx::CLPlatformImpl::Ptr mImpl;
const rx::CLPlatformImpl::Info mInfo;
const DevicePtrs mDevices;
+ std::shared_ptr<angle::WorkerThreadPool> mMultiThreadPool;
static constexpr char kVendor[] = "ANGLE";
static constexpr char kIcdSuffix[] = "ANGLE";
@@ -139,6 +145,11 @@ constexpr const char *Platform::GetVendor()
return kVendor;
}
+inline const std::shared_ptr<angle::WorkerThreadPool> &Platform::getMultiThreadPool() const
+{
+ return mMultiThreadPool;
+}
+
inline PlatformPtrs &Platform::GetPointers()
{
static angle::base::NoDestructor<PlatformPtrs> sPointers;
diff --git a/src/libANGLE/CLProgram.h b/src/libANGLE/CLProgram.h
index b4ff886672..bcd7de6840 100644
--- a/src/libANGLE/CLProgram.h
+++ b/src/libANGLE/CLProgram.h
@@ -10,6 +10,7 @@
#include "libANGLE/CLDevice.h"
#include "libANGLE/CLKernel.h"
+#include "libANGLE/cl_utils.h"
#include "libANGLE/renderer/CLProgramImpl.h"
#include "common/Spinlock.h"
@@ -61,6 +62,7 @@ class Program final : public _cl_program, public Object
Context &getContext();
const Context &getContext() const;
const DevicePtrs &getDevices() const;
+ const std::string &getSource() const;
bool hasDevice(const _cl_device_id *device) const;
bool isBuilding() const;
@@ -123,6 +125,11 @@ inline const DevicePtrs &Program::getDevices() const
return mDevices;
}
+inline const std::string &Program::getSource() const
+{
+ return mSource;
+}
+
inline bool Program::hasDevice(const _cl_device_id *device) const
{
return std::find(mDevices.cbegin(), mDevices.cend(), device) != mDevices.cend();
@@ -130,7 +137,17 @@ inline bool Program::hasDevice(const _cl_device_id *device) const
inline bool Program::isBuilding() const
{
- return mCallback->first != nullptr;
+ for (const DevicePtr &device : getDevices())
+ {
+ cl_build_status buildStatus;
+ ANGLE_CL_IMPL_TRY(getBuildInfo(device->getNative(), ProgramBuildInfo::Status,
+ sizeof(cl_build_status), &buildStatus, nullptr));
+ if ((mCallback->first != nullptr) || (buildStatus == CL_BUILD_IN_PROGRESS))
+ {
+ return true;
+ }
+ }
+ return false;
}
inline bool Program::hasAttachedKernels() const
diff --git a/src/libANGLE/Display.h b/src/libANGLE/Display.h
index 536347df54..af7be36587 100644
--- a/src/libANGLE/Display.h
+++ b/src/libANGLE/Display.h
@@ -309,6 +309,7 @@ class Display final : public LabeledObject,
egl::Sync *getSync(egl::SyncID syncID);
const SyncMap &getSyncsForCapture() const { return mSyncMap; }
+ const ImageMap &getImagesForCapture() const { return mImageMap; }
// Initialize thread-local variables used by the Display and its backing implementations. This
// includes:
diff --git a/src/libANGLE/capture/FrameCapture.cpp b/src/libANGLE/capture/FrameCapture.cpp
index c10574bac6..79ffcedcfd 100644
--- a/src/libANGLE/capture/FrameCapture.cpp
+++ b/src/libANGLE/capture/FrameCapture.cpp
@@ -1464,6 +1464,43 @@ void MaybeResetResources(gl::ContextID contextID,
}
break;
}
+ case ResourceIDType::Image:
+ {
+ TrackedResource &trackedEGLImages =
+ resourceTracker->getTrackedResource(contextID, ResourceIDType::Image);
+ ResourceSet &newEGLImages = trackedEGLImages.getNewResources();
+ ResourceSet &eglImagesToDelete = trackedEGLImages.getResourcesToDelete();
+ ResourceSet &eglImagesToRegen = trackedEGLImages.getResourcesToRegen();
+ ResourceCalls &eglImageRegenCalls = trackedEGLImages.getResourceRegenCalls();
+
+ if (!newEGLImages.empty() || !eglImagesToDelete.empty())
+ {
+ for (GLuint oldResource : eglImagesToDelete)
+ {
+ out << " DestroyEGLImageKHR(gEGLDisplay, gEGLImageMap2[" << oldResource
+ << "], " << oldResource << ");\n";
+ }
+
+ for (GLuint newResource : newEGLImages)
+ {
+ out << " DestroyEGLImageKHR(gEGLDisplay, gEGLImageMap2[" << newResource
+ << "], " << newResource << ");\n";
+ }
+ }
+ // If any of our starting EGLImages were deleted during the run, recreate them
+ for (GLuint id : eglImagesToRegen)
+ {
+ // Emit their regen calls
+ for (CallCapture &call : eglImageRegenCalls[id])
+ {
+ out << " ";
+ WriteCppReplayForCall(call, replayWriter, out, header, binaryData,
+ maxResourceIDBufferSize);
+ out << ";\n";
+ }
+ }
+ break;
+ }
default:
// TODO (http://anglebug.com/4599): Reset more resource types
break;
@@ -3419,18 +3456,6 @@ void CaptureTextureContents(std::vector<CallCapture> *setupCalls,
return;
}
- if (index.getType() == gl::TextureType::External)
- {
- // The generated glTexImage2D call is for creating the staging texture
- Capture(setupCalls,
- CaptureTexImage2D(*replayState, true, gl::TextureTarget::_2D, index.getLevelIndex(),
- format.internalFormat, desc.size.width, desc.size.height, 0,
- format.format, format.type, data));
-
- // For external textures, we're done
- return;
- }
-
bool is3D =
(index.getType() == gl::TextureType::_3D || index.getType() == gl::TextureType::_2DArray ||
index.getType() == gl::TextureType::CubeMapArray);
@@ -3564,15 +3589,54 @@ void CaptureCustomFenceSync(CallCapture &call, std::vector<CallCapture> &callsOu
callsOut.emplace_back(std::move(call));
}
-void CaptureCustomCreateEGLImage(const char *name,
+const egl::Image *GetImageFromParam(const gl::Context *context, const ParamCapture &param)
+{
+ const egl::ImageID eglImageID = egl::PackParam<egl::ImageID>(param.value.EGLImageVal);
+ const egl::Image *eglImage = context->getDisplay()->getImage(eglImageID);
+ ASSERT(eglImage != nullptr);
+ return eglImage;
+}
+
+void CaptureCustomCreateEGLImage(const gl::Context *context,
+ const char *name,
+ size_t width,
+ size_t height,
CallCapture &call,
std::vector<CallCapture> &callsOut)
{
- ParamBuffer &&params = std::move(call.params);
- EGLImage returnVal = params.getReturnValue().value.EGLImageVal;
- egl::ImageID imageID = egl::PackParam<egl::ImageID>(returnVal);
+ ParamBuffer &&params = std::move(call.params);
+ EGLImage returnVal = params.getReturnValue().value.EGLImageVal;
+ egl::ImageID imageID = egl::PackParam<egl::ImageID>(returnVal);
+ call.customFunctionName = name;
+
+ // Clear client buffer value if it is a pointer to a hardware buffer. It is
+ // not used by replay and will not be portable to 32-bit builds
+ if (params.getParam("target", ParamType::TEGLenum, 2).value.EGLenumVal ==
+ EGL_NATIVE_BUFFER_ANDROID)
+ {
+ params.setValueParamAtIndex("buffer", ParamType::TEGLClientBuffer,
+ reinterpret_cast<EGLClientBuffer>(static_cast<uintptr_t>(0)),
+ 3);
+ }
+
+ // Record image dimensions in case a backing resource needs to be created during replay
+ params.addValueParam("width", ParamType::TGLsizei, static_cast<GLsizei>(width));
+ params.addValueParam("height", ParamType::TGLsizei, static_cast<GLsizei>(height));
+
params.addValueParam("image", ParamType::TGLuint, imageID.value);
+ callsOut.emplace_back(std::move(call));
+}
+
+void CaptureCustomDestroyEGLImage(const char *name,
+ CallCapture &call,
+ std::vector<CallCapture> &callsOut)
+{
call.customFunctionName = name;
+ ParamBuffer &&params = std::move(call.params);
+
+ const ParamCapture &imageID = params.getParam("imagePacked", ParamType::TImageID, 1);
+ params.addValueParam("imageID", ParamType::TGLuint, imageID.value.ImageIDVal.value);
+
callsOut.emplace_back(std::move(call));
}
@@ -4150,6 +4214,36 @@ void CaptureShareGroupMidExecutionSetup(
replayState.getMutablePrivateStateForCapture()->setUnpackAlignment(1);
}
+ const egl::ImageMap eglImageMap = context->getDisplay()->getImagesForCapture();
+ for (const auto &[eglImageID, eglImage] : eglImageMap)
+ {
+ // Track this as a starting resource that may need to be restored.
+ TrackedResource &trackedImages =
+ resourceTracker->getTrackedResource(context->id(), ResourceIDType::Image);
+ trackedImages.getStartingResources().insert(eglImageID);
+
+ ResourceCalls &imageRegenCalls = trackedImages.getResourceRegenCalls();
+ CallVector imageGenCalls({setupCalls, &imageRegenCalls[eglImageID]});
+
+ auto eglImageAttribIter = resourceTracker->getImageToAttribTable().find(
+ reinterpret_cast<EGLImage>(static_cast<uintptr_t>(eglImageID)));
+ ASSERT(eglImageAttribIter != resourceTracker->getImageToAttribTable().end());
+ const egl::AttributeMap &attribs = eglImageAttribIter->second;
+
+ for (std::vector<CallCapture> *calls : imageGenCalls)
+ {
+ // Create the image on demand with the same attrib retrieved above
+ CallCapture eglCreateImageKHRCall = egl::CaptureCreateImageKHR(
+ nullptr, true, nullptr, context->id(), EGL_GL_TEXTURE_2D,
+ reinterpret_cast<EGLClientBuffer>(static_cast<uintptr_t>(0)), attribs,
+ reinterpret_cast<EGLImage>(static_cast<uintptr_t>(eglImageID)));
+
+ // Convert the CaptureCreateImageKHR CallCapture to the customized CallCapture
+ CaptureCustomCreateEGLImage(context, "CreateEGLImageKHR", eglImage->getWidth(),
+ eglImage->getHeight(), eglCreateImageKHRCall, *calls);
+ }
+ }
+
// Capture Texture setup and data.
const gl::TextureManager &textures = apiState.getTextureManagerForCapture();
@@ -4358,21 +4452,40 @@ void CaptureShareGroupMidExecutionSetup(
continue;
}
- // create a staging GL_TEXTURE_2D texture to create the eglImage with
- gl::TextureID stagingTexId = {maxAccessedResourceIDs[ResourceIDType::Texture] + 1};
if (index.getType() == gl::TextureType::External)
{
- Capture(setupCalls, CaptureGenTextures(replayState, true, 1, &stagingTexId));
- MaybeCaptureUpdateResourceIDs(context, resourceTracker, setupCalls);
- Capture(setupCalls,
- CaptureBindTexture(replayState, true, gl::TextureType::_2D, stagingTexId));
- Capture(setupCalls, CaptureTexParameteri(replayState, true, gl::TextureType::_2D,
- GL_TEXTURE_MIN_FILTER, GL_NEAREST));
- Capture(setupCalls, CaptureTexParameteri(replayState, true, gl::TextureType::_2D,
- GL_TEXTURE_MAG_FILTER, GL_NEAREST));
+ // Lookup the eglImage ID associated with this texture when the app issued
+ // glEGLImageTargetTexture2DOES()
+ auto eglImageIter = resourceTracker->getTextureIDToImageTable().find(id.value);
+ egl::ImageID eglImageID;
+ if (eglImageIter != resourceTracker->getTextureIDToImageTable().end())
+ {
+ eglImageID = eglImageIter->second;
+ }
+ else
+ {
+ // Original image was deleted and needs to be recreated first
+ eglImageID = {maxAccessedResourceIDs[ResourceIDType::Image] + 1};
+ for (std::vector<CallCapture> *calls : texSetupCalls)
+ {
+ egl::AttributeMap attribs = egl::AttributeMap::CreateFromIntArray(nullptr);
+ CallCapture eglCreateImageKHRCall = egl::CaptureCreateImageKHR(
+ nullptr, true, nullptr, context->id(), EGL_GL_TEXTURE_2D,
+ reinterpret_cast<EGLClientBuffer>(static_cast<uintptr_t>(0)), attribs,
+ reinterpret_cast<EGLImage>(static_cast<uintptr_t>(eglImageID.value)));
+ CaptureCustomCreateEGLImage(context, "CreateEGLImageKHR", desc.size.width,
+ desc.size.height, eglCreateImageKHRCall,
+ *calls);
+ }
+ }
+ // Pass the eglImage to the texture that is bound to GL_TEXTURE_EXTERNAL_OES target
+ for (std::vector<CallCapture> *calls : texSetupCalls)
+ {
+ Capture(calls, CaptureEGLImageTargetTexture2DOES(
+ replayState, true, gl::TextureType::External, eglImageID));
+ }
}
-
- if (context->getExtensions().getImageANGLE)
+ else if (context->getExtensions().getImageANGLE)
{
// Use ANGLE_get_image to read back pixel data.
angle::MemoryBuffer data;
@@ -4438,54 +4551,6 @@ void CaptureShareGroupMidExecutionSetup(
CaptureTextureContents(calls, &replayState, texture, index, desc,
static_cast<GLuint>(data.size()), data.data());
}
-
- if (index.getType() == gl::TextureType::External)
- {
- // Look up the attribs used when the image was created
- // Firstly, lookup the eglImage ID associated with this texture when the app
- // issued glEGLImageTargetTexture2DOES()
- auto eglImageIter = resourceTracker->getTextureIDToImageTable().find(id.value);
- ASSERT(eglImageIter != resourceTracker->getTextureIDToImageTable().end());
-
- const egl::ImageID eglImageID = eglImageIter->second;
- const EGLImage eglImage =
- reinterpret_cast<EGLImage>(static_cast<uintptr_t>(eglImageID.value));
-
- // Secondly, lookup the attrib we used to create the eglImage
- auto eglImageAttribIter =
- resourceTracker->getImageToAttribTable().find(eglImage);
- ASSERT(eglImageAttribIter != resourceTracker->getImageToAttribTable().end());
-
- const egl::AttributeMap &retrievedAttribs = eglImageAttribIter->second;
-
- // Create the image on demand with the same attrib retrieved above
- CallCapture eglCreateImageKHRCall = egl::CaptureCreateImageKHR(
- nullptr, true, nullptr, context->id(), EGL_GL_TEXTURE_2D_KHR,
- reinterpret_cast<EGLClientBuffer>(
- static_cast<GLuint64>(stagingTexId.value)),
- retrievedAttribs, eglImage);
-
- // Convert the CaptureCreateImageKHR CallCapture to the customized CallCapture
- std::vector<CallCapture> eglCustomCreateImageKHRCall;
- CaptureCustomCreateEGLImage("CreateEGLImageKHR", eglCreateImageKHRCall,
- eglCustomCreateImageKHRCall);
- ASSERT(eglCustomCreateImageKHRCall.size() > 0);
-
- // Append the customized CallCapture to the setupCalls list
- Capture(setupCalls, std::move(eglCustomCreateImageKHRCall[0]));
-
- // Pass the eglImage to the texture that is bound to GL_TEXTURE_EXTERNAL_OES
- // target
- for (std::vector<CallCapture> *calls : texSetupCalls)
- {
- Capture(calls,
- CaptureEGLImageTargetTexture2DOES(
- replayState, true, gl::TextureType::External, eglImageID));
- }
-
- // Delete the staging texture
- Capture(setupCalls, CaptureDeleteTextures(replayState, true, 1, &stagingTexId));
- }
}
else
{
@@ -7161,12 +7226,26 @@ void FrameCaptureShared::maybeOverrideEntryPoint(const gl::Context *context,
}
case EntryPoint::EGLCreateImage:
{
- CaptureCustomCreateEGLImage("CreateEGLImage", inCall, outCalls);
+ const egl::Image *eglImage = GetImageFromParam(context, inCall.params.getReturnValue());
+ CaptureCustomCreateEGLImage(context, "CreateEGLImage", eglImage->getWidth(),
+ eglImage->getHeight(), inCall, outCalls);
break;
}
case EntryPoint::EGLCreateImageKHR:
{
- CaptureCustomCreateEGLImage("CreateEGLImageKHR", inCall, outCalls);
+ const egl::Image *eglImage = GetImageFromParam(context, inCall.params.getReturnValue());
+ CaptureCustomCreateEGLImage(context, "CreateEGLImageKHR", eglImage->getWidth(),
+ eglImage->getHeight(), inCall, outCalls);
+ break;
+ }
+ case EntryPoint::EGLDestroyImage:
+ {
+ CaptureCustomDestroyEGLImage("DestroyEGLImage", inCall, outCalls);
+ break;
+ }
+ case EntryPoint::EGLDestroyImageKHR:
+ {
+ CaptureCustomDestroyEGLImage("DestroyEGLImageKHR", inCall, outCalls);
break;
}
case EntryPoint::EGLCreateSync:
@@ -7916,12 +7995,52 @@ void FrameCaptureShared::maybeCapturePreCallUpdates(
CreateEGLImagePreCallUpdate<EGLAttrib>(call, mResourceTracker,
ParamType::TEGLAttribPointer,
egl::AttributeMap::CreateFromAttribArray);
+ if (isCaptureActive())
+ {
+ EGLImage eglImage = call.params.getReturnValue().value.EGLImageVal;
+ egl::ImageID imageID = egl::PackParam<egl::ImageID>(eglImage);
+ handleGennedResource(context, imageID);
+ }
break;
}
case EntryPoint::EGLCreateImageKHR:
{
CreateEGLImagePreCallUpdate<EGLint>(call, mResourceTracker, ParamType::TEGLintPointer,
egl::AttributeMap::CreateFromIntArray);
+ if (isCaptureActive())
+ {
+ EGLImageKHR eglImage = call.params.getReturnValue().value.EGLImageKHRVal;
+ egl::ImageID imageID = egl::PackParam<egl::ImageID>(eglImage);
+ handleGennedResource(context, imageID);
+ }
+ break;
+ }
+ case EntryPoint::EGLDestroyImage:
+ case EntryPoint::EGLDestroyImageKHR:
+ {
+ egl::ImageID eglImageID =
+ call.params.getParam("imagePacked", ParamType::TImageID, 1).value.ImageIDVal;
+
+ // Clear any texture->image mappings that involve this image
+ for (auto texImageIter = mResourceTracker.getTextureIDToImageTable().begin();
+ texImageIter != mResourceTracker.getTextureIDToImageTable().end();)
+ {
+ if (texImageIter->second == eglImageID)
+ {
+ texImageIter = mResourceTracker.getTextureIDToImageTable().erase(texImageIter);
+ }
+ else
+ {
+ ++texImageIter;
+ }
+ }
+
+ FrameCaptureShared *frameCaptureShared =
+ context->getShareGroup()->getFrameCaptureShared();
+ if (frameCaptureShared->isCaptureActive())
+ {
+ handleDeletedResource(context, eglImageID);
+ }
break;
}
case EntryPoint::EGLCreateSync:
diff --git a/src/libANGLE/capture/FrameCapture.h b/src/libANGLE/capture/FrameCapture.h
index b6c047a6a0..5986a9ce73 100644
--- a/src/libANGLE/capture/FrameCapture.h
+++ b/src/libANGLE/capture/FrameCapture.h
@@ -837,6 +837,16 @@ void CaptureGLCallToFrameCapture(CaptureFuncT captureFunc,
frameCaptureShared->captureCall(context, std::move(call), isCallValid);
}
+template <typename FirstT, typename... OthersT>
+egl::Display *GetEGLDisplayArg(FirstT display, OthersT... others)
+{
+ if constexpr (std::is_same<egl::Display *, FirstT>::value)
+ {
+ return display;
+ }
+ return nullptr;
+}
+
template <typename CaptureFuncT, typename... ArgsT>
void CaptureEGLCallToFrameCapture(CaptureFuncT captureFunc,
bool isCallValid,
@@ -846,7 +856,21 @@ void CaptureEGLCallToFrameCapture(CaptureFuncT captureFunc,
gl::Context *context = thread->getContext();
if (!context)
{
- return;
+ // Get a valid context from the display argument if no context is associated with this
+ // thread
+ egl::Display *display = GetEGLDisplayArg(captureParams...);
+ if (display)
+ {
+ for (const auto &contextIter : display->getState().contextMap)
+ {
+ context = contextIter.second;
+ break;
+ }
+ }
+ if (!context)
+ {
+ return;
+ }
}
std::lock_guard<egl::ContextMutex> lock(context->getContextMutex());
diff --git a/src/libANGLE/cl_types.h b/src/libANGLE/cl_types.h
index 7808f43a85..a18f595283 100644
--- a/src/libANGLE/cl_types.h
+++ b/src/libANGLE/cl_types.h
@@ -60,6 +60,8 @@ using MemoryPtrs = std::vector<MemoryPtr>;
using PlatformPtrs = std::vector<PlatformPtr>;
using ProgramPtrs = std::vector<ProgramPtr>;
+using CompiledWorkgroupSize = std::array<uint32_t, 3>;
+
struct ImageDescriptor
{
MemObjectType type;
diff --git a/src/libANGLE/renderer/CLKernelImpl.h b/src/libANGLE/renderer/CLKernelImpl.h
index f9047c249f..9d57f895d0 100644
--- a/src/libANGLE/renderer/CLKernelImpl.h
+++ b/src/libANGLE/renderer/CLKernelImpl.h
@@ -44,8 +44,8 @@ class CLKernelImpl : angle::NonCopyable
ArgInfo();
~ArgInfo();
- ArgInfo(const ArgInfo &) = delete;
- ArgInfo &operator=(const ArgInfo &) = delete;
+ ArgInfo(const ArgInfo &) = default;
+ ArgInfo &operator=(const ArgInfo &) = default;
ArgInfo(ArgInfo &&);
ArgInfo &operator=(ArgInfo &&);
diff --git a/src/libANGLE/renderer/vulkan/BUILD.gn b/src/libANGLE/renderer/vulkan/BUILD.gn
index 5a860d0bcd..3f60a4bd6f 100644
--- a/src/libANGLE/renderer/vulkan/BUILD.gn
+++ b/src/libANGLE/renderer/vulkan/BUILD.gn
@@ -115,6 +115,19 @@ template("angle_vulkan_backend_template") {
deps += [ "$angle_root:angle_version_info" ]
}
+ # OpenCL on ANGLE needs both spirv-tools and clspv for compiler
+ if (angle_enable_cl) {
+ deps += [
+ "$angle_root/third_party/clspv/:clspv_core_shared",
+ "$angle_root/third_party/vulkan-deps/spirv-tools/src/:spvtools",
+ "$angle_root/third_party/vulkan-deps/spirv-tools/src/:spvtools_opt",
+ ]
+ include_dirs = [
+ "$angle_root/third_party/vulkan-deps/spirv-tools/src/include",
+ "$angle_root/third_party/clspv/src/include",
+ ]
+ }
+
public_deps = [
"$angle_root:libANGLE_headers",
"$angle_root/src/common/vulkan",
diff --git a/src/libANGLE/renderer/vulkan/CLContextVk.cpp b/src/libANGLE/renderer/vulkan/CLContextVk.cpp
index 709bfa55db..d4857b8169 100644
--- a/src/libANGLE/renderer/vulkan/CLContextVk.cpp
+++ b/src/libANGLE/renderer/vulkan/CLContextVk.cpp
@@ -7,6 +7,7 @@
#include "libANGLE/renderer/vulkan/CLContextVk.h"
#include "libANGLE/renderer/vulkan/CLCommandQueueVk.h"
+#include "libANGLE/renderer/vulkan/CLProgramVk.h"
#include "libANGLE/renderer/vulkan/DisplayVk.h"
#include "libANGLE/renderer/vulkan/RendererVk.h"
#include "libANGLE/renderer/vulkan/vk_utils.h"
@@ -118,8 +119,15 @@ angle::Result CLContextVk::createProgramWithSource(const cl::Program &program,
const std::string &source,
CLProgramImpl::Ptr *programOut)
{
- UNIMPLEMENTED();
- ANGLE_CL_RETURN_ERROR(CL_OUT_OF_RESOURCES);
+ CLProgramVk *programVk = new (std::nothrow) CLProgramVk(program);
+ if (programVk == nullptr)
+ {
+ ANGLE_CL_RETURN_ERROR(CL_OUT_OF_HOST_MEMORY);
+ }
+ ANGLE_TRY(programVk->init());
+ *programOut = CLProgramImpl::Ptr(std::move(programVk));
+
+ return angle::Result::Continue;
}
angle::Result CLContextVk::createProgramWithIL(const cl::Program &program,
@@ -137,8 +145,15 @@ angle::Result CLContextVk::createProgramWithBinary(const cl::Program &program,
cl_int *binaryStatus,
CLProgramImpl::Ptr *programOut)
{
- UNIMPLEMENTED();
- ANGLE_CL_RETURN_ERROR(CL_OUT_OF_RESOURCES);
+ CLProgramVk *programVk = new (std::nothrow) CLProgramVk(program);
+ if (programVk == nullptr)
+ {
+ ANGLE_CL_RETURN_ERROR(CL_OUT_OF_HOST_MEMORY);
+ }
+ ANGLE_TRY(programVk->init(lengths, binaries, binaryStatus));
+ *programOut = CLProgramImpl::Ptr(std::move(programVk));
+
+ return angle::Result::Continue;
}
angle::Result CLContextVk::createProgramWithBuiltInKernels(const cl::Program &program,
diff --git a/src/libANGLE/renderer/vulkan/CLKernelVk.h b/src/libANGLE/renderer/vulkan/CLKernelVk.h
index 0d6a26bd3c..1161594235 100644
--- a/src/libANGLE/renderer/vulkan/CLKernelVk.h
+++ b/src/libANGLE/renderer/vulkan/CLKernelVk.h
@@ -26,6 +26,51 @@ class CLKernelVk : public CLKernelImpl
angle::Result createInfo(CLKernelImpl::Info *infoOut) const override;
};
+struct CLKernelArgument
+{
+ CLKernelImpl::ArgInfo info{};
+ uint32_t type = 0;
+ uint32_t ordinal = 0;
+ size_t handleSize = 0;
+ void *handle = nullptr;
+ bool used = false;
+
+ // Shared operand words/regions for "OpExtInst" type spv instructions
+ // (starts from spv word index/offset 7 and onward)
+ // https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpExtInst
+ // https://github.com/google/clspv/blob/main/docs/OpenCLCOnVulkan.md#kernels
+ union
+ {
+ uint32_t op3;
+ uint32_t descriptorSet;
+ uint32_t pushConstOffset;
+ uint32_t workgroupSpecId;
+ };
+ union
+ {
+ uint32_t op4;
+ uint32_t descriptorBinding;
+ uint32_t pushConstantSize;
+ uint32_t workgroupSize;
+ };
+ union
+ {
+ uint32_t op5;
+ uint32_t podStorageBufferOffset;
+ uint32_t podUniformOffset;
+ uint32_t pointerUniformOffset;
+ };
+ union
+ {
+ uint32_t op6;
+ uint32_t podStorageBufferSize;
+ uint32_t podUniformSize;
+ uint32_t pointerUniformSize;
+ };
+};
+using CLKernelArguments = std::vector<CLKernelArgument>;
+using CLKernelArgsMap = angle::HashMap<std::string, CLKernelArguments>;
+
} // namespace rx
#endif // LIBANGLE_RENDERER_VULKAN_CLKERNELVK_H_
diff --git a/src/libANGLE/renderer/vulkan/CLProgramVk.cpp b/src/libANGLE/renderer/vulkan/CLProgramVk.cpp
index 82fc540a17..624348bc35 100644
--- a/src/libANGLE/renderer/vulkan/CLProgramVk.cpp
+++ b/src/libANGLE/renderer/vulkan/CLProgramVk.cpp
@@ -6,22 +6,428 @@
// CLProgramVk.cpp: Implements the class methods for CLProgramVk.
#include "libANGLE/renderer/vulkan/CLProgramVk.h"
+#include "libANGLE/renderer/vulkan/CLContextVk.h"
+#include "libANGLE/CLContext.h"
+#include "libANGLE/CLProgram.h"
#include "libANGLE/cl_utils.h"
+#include "clspv/Compiler.h"
+
+#include "spirv/unified1/NonSemanticClspvReflection.h"
+#include "spirv/unified1/spirv.hpp"
+
+#include "spirv-tools/libspirv.hpp"
+#include "spirv-tools/optimizer.hpp"
+
+#include "common/string_utils.h"
+
namespace rx
{
-CLProgramVk::CLProgramVk(const cl::Program &program) : CLProgramImpl(program) {}
+namespace
+{
+#if defined(ANGLE_ENABLE_ASSERTS)
+constexpr bool kAngleDebug = true;
+#else
+constexpr bool kAngleDebug = false;
+#endif
+
+// Used by SPIRV-Tools to parse reflection info
+spv_result_t ParseReflection(CLProgramVk::SpvReflectionData &reflectionData,
+ const spv_parsed_instruction_t &spvInstr)
+{
+ // Parse spir-v opcodes
+ switch (spvInstr.opcode)
+ {
+ // --- Clspv specific parsing for below cases ---
+ case spv::OpExtInst:
+ {
+ switch (spvInstr.words[4])
+ {
+ case NonSemanticClspvReflectionKernel:
+ {
+ // Extract kernel name and args - add to kernel args map
+ std::string functionName = reflectionData.spvStrLookup[spvInstr.words[6]];
+ uint32_t numArgs = reflectionData.spvIntLookup[spvInstr.words[7]];
+ reflectionData.kernelArgsMap[functionName] = CLKernelArguments();
+ reflectionData.kernelArgsMap[functionName].resize(numArgs);
+
+ // Store kernel flags and attributes
+ reflectionData.kernelFlags[functionName] =
+ reflectionData.spvIntLookup[spvInstr.words[8]];
+ reflectionData.kernelAttributes[functionName] =
+ reflectionData.spvStrLookup[spvInstr.words[9]];
+
+ // Save kernel name to reflection table for later use/lookup in parser routine
+ reflectionData.spvStrLookup[spvInstr.words[2]] = std::string(functionName);
+ break;
+ }
+ case NonSemanticClspvReflectionArgumentInfo:
+ {
+ CLKernelVk::ArgInfo kernelArgInfo;
+ kernelArgInfo.name = reflectionData.spvStrLookup[spvInstr.words[5]];
+ // If instruction has more than 5 instruction operands (minus instruction
+ // name/opcode), that means we have arg qualifiers. ArgumentInfo also counts as
+ // an operand for OpExtInst. In below example, [ %e %f %g %h ] are the arg
+ // qualifier operands.
+ //
+ // %a = OpExtInst %b %c ArgumentInfo %d [ %e %f %g %h ]
+ if (spvInstr.num_operands > 5)
+ {
+ kernelArgInfo.typeName = reflectionData.spvStrLookup[spvInstr.words[6]];
+ kernelArgInfo.addressQualifier =
+ reflectionData.spvIntLookup[spvInstr.words[7]];
+ kernelArgInfo.accessQualifier =
+ reflectionData.spvIntLookup[spvInstr.words[8]];
+ kernelArgInfo.typeQualifier =
+ reflectionData.spvIntLookup[spvInstr.words[9]];
+ }
+ // Store kern arg for later lookup
+ reflectionData.kernelArgInfos[spvInstr.words[2]] = std::move(kernelArgInfo);
+ break;
+ }
+ case NonSemanticClspvReflectionArgumentPodUniform:
+ case NonSemanticClspvReflectionArgumentPointerUniform:
+ case NonSemanticClspvReflectionArgumentPodStorageBuffer:
+ {
+ CLKernelArgument kernelArg;
+ if (spvInstr.num_operands == 11)
+ {
+ const CLKernelVk::ArgInfo &kernelArgInfo =
+ reflectionData.kernelArgInfos[spvInstr.words[11]];
+ kernelArg.info.name = kernelArgInfo.name;
+ kernelArg.info.typeName = kernelArgInfo.typeName;
+ kernelArg.info.addressQualifier = kernelArgInfo.addressQualifier;
+ kernelArg.info.accessQualifier = kernelArgInfo.accessQualifier;
+ kernelArg.info.typeQualifier = kernelArgInfo.typeQualifier;
+ }
+ CLKernelArguments &kernelArgs =
+ reflectionData
+ .kernelArgsMap[reflectionData.spvStrLookup[spvInstr.words[5]]];
+ kernelArg.type = spvInstr.words[4];
+ kernelArg.used = true;
+ kernelArg.ordinal = reflectionData.spvIntLookup[spvInstr.words[6]];
+ kernelArg.op3 = reflectionData.spvIntLookup[spvInstr.words[7]];
+ kernelArg.op4 = reflectionData.spvIntLookup[spvInstr.words[8]];
+ kernelArg.op5 = reflectionData.spvIntLookup[spvInstr.words[9]];
+ kernelArg.op6 = reflectionData.spvIntLookup[spvInstr.words[10]];
+
+ if (!kernelArgs.empty())
+ {
+ kernelArgs.at(kernelArg.ordinal) = std::move(kernelArg);
+ }
+ break;
+ }
+ case NonSemanticClspvReflectionArgumentUniform:
+ case NonSemanticClspvReflectionArgumentWorkgroup:
+ case NonSemanticClspvReflectionArgumentStorageBuffer:
+ case NonSemanticClspvReflectionArgumentPodPushConstant:
+ case NonSemanticClspvReflectionArgumentPointerPushConstant:
+ {
+ CLKernelArgument kernelArg;
+ if (spvInstr.num_operands == 9)
+ {
+ const CLKernelVk::ArgInfo &kernelArgInfo =
+ reflectionData.kernelArgInfos[spvInstr.words[9]];
+ kernelArg.info.name = kernelArgInfo.name;
+ kernelArg.info.typeName = kernelArgInfo.typeName;
+ kernelArg.info.addressQualifier = kernelArgInfo.addressQualifier;
+ kernelArg.info.accessQualifier = kernelArgInfo.accessQualifier;
+ kernelArg.info.typeQualifier = kernelArgInfo.typeQualifier;
+ }
+ CLKernelArguments &kernelArgs =
+ reflectionData
+ .kernelArgsMap[reflectionData.spvStrLookup[spvInstr.words[5]]];
+ kernelArg.type = spvInstr.words[4];
+ kernelArg.used = true;
+ kernelArg.ordinal = reflectionData.spvIntLookup[spvInstr.words[6]];
+ kernelArg.op3 = reflectionData.spvIntLookup[spvInstr.words[7]];
+ kernelArg.op4 = reflectionData.spvIntLookup[spvInstr.words[8]];
+ kernelArgs.at(kernelArg.ordinal) = std::move(kernelArg);
+ break;
+ }
+ case NonSemanticClspvReflectionPushConstantGlobalOffset:
+ case NonSemanticClspvReflectionPushConstantRegionOffset:
+ {
+ uint32_t offset = reflectionData.spvIntLookup[spvInstr.words[5]];
+ uint32_t size = reflectionData.spvIntLookup[spvInstr.words[6]];
+ reflectionData.pushConstants[spvInstr.words[4]] = {
+ .stageFlags = 0, .offset = offset, .size = size};
+ break;
+ }
+ case NonSemanticClspvReflectionSpecConstantWorkgroupSize:
+ {
+ reflectionData.specConstantWGS = {
+ reflectionData.spvIntLookup[spvInstr.words[5]],
+ reflectionData.spvIntLookup[spvInstr.words[6]],
+ reflectionData.spvIntLookup[spvInstr.words[7]]};
+ break;
+ }
+ case NonSemanticClspvReflectionPropertyRequiredWorkgroupSize:
+ {
+ reflectionData
+ .kernelCompileWGS[reflectionData.spvStrLookup[spvInstr.words[5]]] = {
+ reflectionData.spvIntLookup[spvInstr.words[6]],
+ reflectionData.spvIntLookup[spvInstr.words[7]],
+ reflectionData.spvIntLookup[spvInstr.words[8]]};
+ break;
+ }
+ default:
+ break;
+ }
+ break;
+ }
+ // --- Regular SPIR-V opcode parsing for below cases ---
+ case spv::OpString:
+ {
+ reflectionData.spvStrLookup[spvInstr.words[1]] =
+ reinterpret_cast<const char *>(&spvInstr.words[2]);
+ break;
+ }
+ case spv::OpConstant:
+ {
+ reflectionData.spvIntLookup[spvInstr.words[2]] = spvInstr.words[3];
+ break;
+ }
+ default:
+ break;
+ }
+ return SPV_SUCCESS;
+}
+
+class CLAsyncBuildTask : public angle::Closure
+{
+ public:
+ CLAsyncBuildTask(CLProgramVk *programVk,
+ const cl::DevicePtrs &devices,
+ std::string options,
+ std::string internalOptions,
+ CLProgramVk::BuildType buildType,
+ const CLProgramVk::DeviceProgramDatas &inputProgramDatas,
+ cl::Program *notify)
+ : mProgramVk(programVk),
+ mDevices(devices),
+ mOptions(options),
+ mInternalOptions(internalOptions),
+ mBuildType(buildType),
+ mDeviceProgramDatas(inputProgramDatas),
+ mNotify(notify)
+ {}
+
+ void operator()() override
+ {
+ ANGLE_TRACE_EVENT0("gpu.angle", "CLProgramVk::buildInternal (async)");
+ CLProgramVk::ScopedProgramCallback spc(mNotify);
+ if (!mProgramVk->buildInternal(mDevices, mOptions, mInternalOptions, mBuildType,
+ mDeviceProgramDatas))
+ {
+ ERR() << "Async build failed for program (" << mProgramVk
+ << ")! Check the build status or build log for details.";
+ }
+ }
+
+ private:
+ CLProgramVk *mProgramVk;
+ const cl::DevicePtrs mDevices;
+ std::string mOptions;
+ std::string mInternalOptions;
+ CLProgramVk::BuildType mBuildType;
+ const CLProgramVk::DeviceProgramDatas mDeviceProgramDatas;
+ cl::Program *mNotify;
+};
+
+std::string ProcessBuildOptions(const std::vector<std::string> &optionTokens,
+ CLProgramVk::BuildType buildType)
+{
+ std::string processedOptions;
+
+ // Need to remove/replace options that are not 1-1 mapped to clspv
+ for (const std::string &optionToken : optionTokens)
+ {
+ if (optionToken == "-create-library" && buildType == CLProgramVk::BuildType::LINK)
+ {
+ processedOptions += " --output-format=bc";
+ continue;
+ }
+ processedOptions += optionToken;
+ }
+
+ switch (buildType)
+ {
+ case CLProgramVk::BuildType::COMPILE:
+ processedOptions += " --output-format=bc";
+ break;
+ case CLProgramVk::BuildType::LINK:
+ processedOptions += " -x ir";
+ break;
+ default:
+ break;
+ }
+
+ // Other internal Clspv compiler flags that are needed/required
+ processedOptions += " --long-vector";
-CLProgramVk::~CLProgramVk() = default;
+ return processedOptions;
+}
+
+} // namespace
+
+CLProgramVk::CLProgramVk(const cl::Program &program)
+ : CLProgramImpl(program), mContext(&program.getContext().getImpl<CLContextVk>())
+{}
+
+angle::Result CLProgramVk::init()
+{
+ cl::DevicePtrs devices;
+ ANGLE_TRY(mContext->getDevices(&devices));
+
+ // The devices associated with the program object are the devices associated with context
+ for (const cl::RefPointer<cl::Device> &device : devices)
+ {
+ mAssociatedDevicePrograms[device->getNative()] = DeviceProgramData{};
+ }
+
+ return angle::Result::Continue;
+}
+
+angle::Result CLProgramVk::init(const size_t *lengths,
+ const unsigned char **binaries,
+ cl_int *binaryStatus)
+{
+ // The devices associated with program come from device_list param from
+ // clCreateProgramWithBinary
+ for (const cl::DevicePtr &device : mProgram.getDevices())
+ {
+ const unsigned char *binaryHandle = *binaries++;
+ size_t binarySize = *lengths++;
+
+ // Check for header
+ if (binarySize < sizeof(ProgramBinaryOutputHeader))
+ {
+ if (binaryStatus)
+ {
+ *binaryStatus++ = CL_INVALID_BINARY;
+ }
+ ANGLE_CL_RETURN_ERROR(CL_INVALID_BINARY);
+ }
+ binarySize -= sizeof(ProgramBinaryOutputHeader);
+
+ // Check for valid binary version from header
+ const ProgramBinaryOutputHeader *binaryHeader =
+ reinterpret_cast<const ProgramBinaryOutputHeader *>(binaryHandle);
+ if (binaryHeader == nullptr)
+ {
+ ERR() << "NULL binary header!";
+ if (binaryStatus)
+ {
+ *binaryStatus++ = CL_INVALID_BINARY;
+ }
+ ANGLE_CL_RETURN_ERROR(CL_INVALID_BINARY);
+ }
+ else if (binaryHeader->headerVersion < LatestSupportedBinaryVersion)
+ {
+ ERR() << "Binary version not compatible with runtime!";
+ if (binaryStatus)
+ {
+ *binaryStatus++ = CL_INVALID_BINARY;
+ }
+ ANGLE_CL_RETURN_ERROR(CL_INVALID_BINARY);
+ }
+ binaryHandle += sizeof(ProgramBinaryOutputHeader);
+
+ // See what kind of binary we have (i.e. SPIR-V or LLVM Bitcode)
+ // https://llvm.org/docs/BitCodeFormat.html#llvm-ir-magic-number
+ // https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#_magic_number
+ constexpr uint32_t LLVM_BC_MAGIC = 0xDEC04342;
+ constexpr uint32_t SPIRV_MAGIC = 0x07230203;
+ const uint32_t &firstWord = reinterpret_cast<const uint32_t *>(binaryHandle)[0];
+ bool isBC = firstWord == LLVM_BC_MAGIC;
+ bool isSPV = firstWord == SPIRV_MAGIC;
+ if (!isBC && !isSPV)
+ {
+ ERR() << "Binary is neither SPIR-V nor LLVM Bitcode!";
+ if (binaryStatus)
+ {
+ *binaryStatus++ = CL_INVALID_BINARY;
+ }
+ ANGLE_CL_RETURN_ERROR(CL_INVALID_BINARY);
+ }
+
+ // Add device binary to program
+ DeviceProgramData deviceBinary;
+ deviceBinary.binaryType = binaryHeader->binaryType;
+ switch (deviceBinary.binaryType)
+ {
+ case CL_PROGRAM_BINARY_TYPE_EXECUTABLE:
+ deviceBinary.binary.assign(binarySize / sizeof(uint32_t), 0);
+ std::memcpy(deviceBinary.binary.data(), binaryHandle, binarySize);
+ break;
+ case CL_PROGRAM_BINARY_TYPE_LIBRARY:
+ case CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT:
+ deviceBinary.IR.assign(binarySize, 0);
+ std::memcpy(deviceBinary.IR.data(), binaryHandle, binarySize);
+ break;
+ default:
+ UNREACHABLE();
+ ERR() << "Invalid binary type!";
+ if (binaryStatus)
+ {
+ *binaryStatus++ = CL_INVALID_BINARY;
+ }
+ ANGLE_CL_RETURN_ERROR(CL_INVALID_BINARY);
+ }
+ mAssociatedDevicePrograms[device->getNative()] = std::move(deviceBinary);
+ if (binaryStatus)
+ {
+ *binaryStatus++ = CL_SUCCESS;
+ }
+ }
+
+ return angle::Result::Continue;
+}
+
+CLProgramVk::~CLProgramVk()
+{
+ for (vk::BindingPointer<vk::DescriptorSetLayout, vk::AtomicRefCounted<vk::DescriptorSetLayout>>
+ &dsLayouts : mDescriptorSetLayouts)
+ {
+ dsLayouts.reset();
+ }
+ for (vk::BindingPointer<rx::vk::DynamicDescriptorPool> &pool : mDescriptorPools)
+ {
+ pool.reset();
+ }
+ mMetaDescriptorPool.destroy(mContext->getRenderer());
+ mDescSetLayoutCache.destroy(mContext->getRenderer());
+ mPipelineLayoutCache.destroy(mContext->getRenderer());
+}
angle::Result CLProgramVk::build(const cl::DevicePtrs &devices,
const char *options,
cl::Program *notify)
{
- UNIMPLEMENTED();
- ANGLE_CL_RETURN_ERROR(CL_OUT_OF_RESOURCES);
+ BuildType buildType = !mProgram.getSource().empty() ? BuildType::BUILD : BuildType::BINARY;
+ const cl::DevicePtrs &devicePtrs = !devices.empty() ? devices : mProgram.getDevices();
+
+ if (notify)
+ {
+ std::shared_ptr<angle::WaitableEvent> asyncEvent =
+ mProgram.getContext().getPlatform().getMultiThreadPool()->postWorkerTask(
+ std::make_shared<CLAsyncBuildTask>(this, devicePtrs,
+ std::string(options ? options : ""), "",
+ buildType, DeviceProgramDatas{}, notify));
+ ASSERT(asyncEvent != nullptr);
+ }
+ else
+ {
+ if (!buildInternal(devicePtrs, std::string(options ? options : ""), "", buildType,
+ DeviceProgramDatas{}))
+ {
+ ANGLE_CL_RETURN_ERROR(CL_BUILD_PROGRAM_FAILURE);
+ }
+ }
+ return angle::Result::Continue;
}
angle::Result CLProgramVk::compile(const cl::DevicePtrs &devices,
@@ -39,8 +445,94 @@ angle::Result CLProgramVk::getInfo(cl::ProgramInfo name,
void *value,
size_t *valueSizeRet) const
{
- UNIMPLEMENTED();
- ANGLE_CL_RETURN_ERROR(CL_OUT_OF_RESOURCES);
+ cl_uint valUInt = 0u;
+ void *valPointer = nullptr;
+ const void *copyValue = nullptr;
+ size_t copySize = 0u;
+ unsigned char **outputBins = reinterpret_cast<unsigned char **>(value);
+ std::string kernelNamesList;
+ std::vector<size_t> vBinarySizes;
+
+ switch (name)
+ {
+ case cl::ProgramInfo::NumKernels:
+ for (const auto &deviceProgram : mAssociatedDevicePrograms)
+ {
+ valUInt += static_cast<decltype(valUInt)>(deviceProgram.second.numKernels());
+ }
+ copyValue = &valUInt;
+ copySize = sizeof(valUInt);
+ break;
+ case cl::ProgramInfo::BinarySizes:
+ {
+ for (const auto &deviceProgram : mAssociatedDevicePrograms)
+ {
+ vBinarySizes.push_back(
+ sizeof(ProgramBinaryOutputHeader) +
+ (deviceProgram.second.binaryType == CL_PROGRAM_BINARY_TYPE_EXECUTABLE
+ ? deviceProgram.second.binary.size() * sizeof(uint32_t)
+ : deviceProgram.second.IR.size()));
+ }
+ valPointer = vBinarySizes.data();
+ copyValue = valPointer;
+ copySize = vBinarySizes.size() * sizeof(size_t);
+ break;
+ }
+ case cl::ProgramInfo::Binaries:
+ for (const auto &deviceProgram : mAssociatedDevicePrograms)
+ {
+ const void *bin =
+ deviceProgram.second.binaryType == CL_PROGRAM_BINARY_TYPE_EXECUTABLE
+ ? reinterpret_cast<const void *>(deviceProgram.second.binary.data())
+ : reinterpret_cast<const void *>(deviceProgram.second.IR.data());
+ size_t binSize =
+ deviceProgram.second.binaryType == CL_PROGRAM_BINARY_TYPE_EXECUTABLE
+ ? deviceProgram.second.binary.size() * sizeof(uint32_t)
+ : deviceProgram.second.IR.size();
+ ProgramBinaryOutputHeader header{.headerVersion = LatestSupportedBinaryVersion,
+ .binaryType = deviceProgram.second.binaryType};
+
+ if (outputBins != nullptr)
+ {
+ if (*outputBins != nullptr)
+ {
+ std::memcpy(*outputBins, &header, sizeof(ProgramBinaryOutputHeader));
+ std::memcpy((*outputBins) + sizeof(ProgramBinaryOutputHeader), bin,
+ binSize);
+ }
+ outputBins++;
+ }
+
+ // Spec just wants pointer size here
+ copySize += sizeof(unsigned char *);
+ }
+ // We already copied the (headers + binaries) over - nothing else left to copy
+ copyValue = nullptr;
+ break;
+ case cl::ProgramInfo::KernelNames:
+ for (const auto &deviceProgram : mAssociatedDevicePrograms)
+ {
+ kernelNamesList = deviceProgram.second.getKernelNames();
+ }
+ valPointer = kernelNamesList.data();
+ copyValue = valPointer;
+ copySize = kernelNamesList.size() + 1;
+ break;
+ default:
+ UNREACHABLE();
+ }
+
+ if ((value != nullptr) && (copyValue != nullptr))
+ {
+ std::memcpy(value, copyValue, copySize);
+ }
+
+ if (valueSizeRet != nullptr)
+ {
+ *valueSizeRet = copySize;
+ }
+
+ return angle::Result::Continue;
}
angle::Result CLProgramVk::getBuildInfo(const cl::Device &device,
@@ -49,8 +541,53 @@ angle::Result CLProgramVk::getBuildInfo(const cl::Device &device,
void *value,
size_t *valueSizeRet) const
{
- UNIMPLEMENTED();
- ANGLE_CL_RETURN_ERROR(CL_OUT_OF_RESOURCES);
+ cl_uint valUInt = 0;
+ cl_build_status valStatus = 0;
+ const void *copyValue = nullptr;
+ size_t copySize = 0;
+ const DeviceProgramData *deviceProgramData = getDeviceProgramData(device.getNative());
+
+ switch (name)
+ {
+ case cl::ProgramBuildInfo::Status:
+ valStatus = deviceProgramData->buildStatus;
+ copyValue = &valStatus;
+ copySize = sizeof(valStatus);
+ break;
+ case cl::ProgramBuildInfo::Log:
+ copyValue = deviceProgramData->buildLog.c_str();
+ copySize = deviceProgramData->buildLog.size() + 1;
+ break;
+ case cl::ProgramBuildInfo::Options:
+ copyValue = mProgramOpts.c_str();
+ copySize = mProgramOpts.size() + 1;
+ break;
+ case cl::ProgramBuildInfo::BinaryType:
+ valUInt = deviceProgramData->binaryType;
+ copyValue = &valUInt;
+ copySize = sizeof(valUInt);
+ break;
+ case cl::ProgramBuildInfo::GlobalVariableTotalSize:
+ // Returns 0 if device does not support program scope global variables.
+ valUInt = 0;
+ copyValue = &valUInt;
+ copySize = sizeof(valUInt);
+ break;
+ default:
+ UNREACHABLE();
+ }
+
+ if ((value != nullptr) && (copyValue != nullptr))
+ {
+ memcpy(value, copyValue, std::min(valueSize, copySize));
+ }
+
+ if (valueSizeRet != nullptr)
+ {
+ *valueSizeRet = copySize;
+ }
+
+ return angle::Result::Continue;
}
angle::Result CLProgramVk::createKernel(const cl::Kernel &kernel,
@@ -69,4 +606,201 @@ angle::Result CLProgramVk::createKernels(cl_uint numKernels,
ANGLE_CL_RETURN_ERROR(CL_OUT_OF_RESOURCES);
}
+const CLProgramVk::DeviceProgramData *CLProgramVk::getDeviceProgramData(
+ const _cl_device_id *device) const
+{
+ if (!mAssociatedDevicePrograms.contains(device))
+ {
+ WARN() << "Device (" << device << ") is not associated with program (" << this << ") !";
+ return nullptr;
+ }
+ return &mAssociatedDevicePrograms.at(device);
+}
+
+const CLProgramVk::DeviceProgramData *CLProgramVk::getDeviceProgramData(
+ const char *kernelName) const
+{
+ for (const auto &deviceProgram : mAssociatedDevicePrograms)
+ {
+ if (deviceProgram.second.containsKernel(kernelName))
+ {
+ return &deviceProgram.second;
+ }
+ }
+ WARN() << "Kernel name (" << kernelName << ") is not associated with program (" << this
+ << ") !";
+ return nullptr;
+}
+
+bool CLProgramVk::buildInternal(const cl::DevicePtrs &devices,
+ std::string options,
+ std::string internalOptions,
+ BuildType buildType,
+ const DeviceProgramDatas &inputProgramDatas)
+{
+ std::scoped_lock<std::mutex> sl(mProgramMutex);
+
+ // Cache original options string
+ mProgramOpts = options;
+
+ // Process options and append any other internal (required) options for clspv
+ std::vector<std::string> optionTokens;
+ angle::SplitStringAlongWhitespace(options + " " + internalOptions, &optionTokens);
+ const bool createLibrary = std::find(optionTokens.begin(), optionTokens.end(),
+ "-create-library") != optionTokens.end();
+ std::string processedOptions = ProcessBuildOptions(optionTokens, buildType);
+
+ // Build for each associated device
+ for (const cl::RefPointer<cl::Device> &device : devices)
+ {
+ DeviceProgramData &deviceProgramData = mAssociatedDevicePrograms[device->getNative()];
+ deviceProgramData.buildStatus = CL_BUILD_IN_PROGRESS;
+
+ if (buildType != BuildType::BINARY)
+ {
+ // Invoke clspv
+ switch (buildType)
+ {
+ case BuildType::BUILD:
+ case BuildType::COMPILE:
+ {
+ ScopedClspvContext clspvCtx;
+ const char *clSrc = mProgram.getSource().c_str();
+ ClspvError clspvRet = clspvCompileFromSourcesString(
+ 1, NULL, static_cast<const char **>(&clSrc), processedOptions.c_str(),
+ &clspvCtx.mOutputBin, &clspvCtx.mOutputBinSize, &clspvCtx.mOutputBuildLog);
+ deviceProgramData.buildLog =
+ clspvCtx.mOutputBuildLog != nullptr ? clspvCtx.mOutputBuildLog : "";
+ if (clspvRet != CLSPV_SUCCESS)
+ {
+ ERR() << "OpenCL build failed with: ClspvError(" << clspvRet << ")!";
+ deviceProgramData.buildStatus = CL_BUILD_ERROR;
+ return false;
+ }
+
+ if (buildType == BuildType::COMPILE)
+ {
+ deviceProgramData.IR.assign(clspvCtx.mOutputBinSize, 0);
+ std::memcpy(deviceProgramData.IR.data(), clspvCtx.mOutputBin,
+ clspvCtx.mOutputBinSize);
+ deviceProgramData.binaryType = CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT;
+ }
+ else
+ {
+ deviceProgramData.binary.assign(clspvCtx.mOutputBinSize / sizeof(uint32_t),
+ 0);
+ std::memcpy(deviceProgramData.binary.data(), clspvCtx.mOutputBin,
+ clspvCtx.mOutputBinSize);
+ deviceProgramData.binaryType = CL_PROGRAM_BINARY_TYPE_EXECUTABLE;
+ }
+ break;
+ }
+ case BuildType::LINK:
+ {
+ ScopedClspvContext clspvCtx;
+ std::vector<size_t> vSizes;
+ std::vector<const char *> vBins;
+ for (const CLProgramVk::DeviceProgramData *inputProgramData : inputProgramDatas)
+ {
+ vSizes.push_back(inputProgramData->IR.size());
+ vBins.push_back(inputProgramData->IR.data());
+ }
+ ClspvError clspvRet = clspvCompileFromSourcesString(
+ inputProgramDatas.size(), vSizes.data(), vBins.data(),
+ processedOptions.c_str(), &clspvCtx.mOutputBin, &clspvCtx.mOutputBinSize,
+ &clspvCtx.mOutputBuildLog);
+ deviceProgramData.buildLog =
+ clspvCtx.mOutputBuildLog != nullptr ? clspvCtx.mOutputBuildLog : "";
+ if (clspvRet != CLSPV_SUCCESS)
+ {
+ ERR() << "OpenCL build failed with: ClspvError(" << clspvRet << ")!";
+ deviceProgramData.buildStatus = CL_BUILD_ERROR;
+ return false;
+ }
+
+ deviceProgramData.IR.assign(clspvCtx.mOutputBinSize, 0);
+ std::memcpy(deviceProgramData.IR.data(), clspvCtx.mOutputBin,
+ clspvCtx.mOutputBinSize);
+
+ if (createLibrary)
+ {
+ deviceProgramData.binaryType = CL_PROGRAM_BINARY_TYPE_LIBRARY;
+ }
+ else
+ {
+ deviceProgramData.binaryType = CL_PROGRAM_BINARY_TYPE_EXECUTABLE;
+ }
+ break;
+ }
+ default:
+ UNREACHABLE();
+ return false;
+ }
+ }
+
+ // Extract reflection info from spv binary and populate reflection data
+ if (deviceProgramData.binaryType == CL_PROGRAM_BINARY_TYPE_EXECUTABLE)
+ {
+ spvtools::SpirvTools spvTool(SPV_ENV_UNIVERSAL_1_5);
+ bool parseRet = spvTool.Parse(
+ deviceProgramData.binary,
+ [](const spv_endianness_t endianess, const spv_parsed_header_t &instruction) {
+ return SPV_SUCCESS;
+ },
+ [&deviceProgramData](const spv_parsed_instruction_t &instruction) {
+ return ParseReflection(deviceProgramData.reflectionData, instruction);
+ });
+ if (!parseRet)
+ {
+ ERR() << "Failed to parse reflection info from SPIR-V!";
+ return false;
+ }
+
+ // Setup inital push constant range
+ uint32_t pushConstantMinOffet = UINT32_MAX, pushConstantMaxOffset = 0,
+ pushConstantMaxSize = 0;
+ for (const auto &pushConstant : deviceProgramData.reflectionData.pushConstants)
+ {
+ pushConstantMinOffet = pushConstant.second.offset < pushConstantMinOffet
+ ? pushConstant.second.offset
+ : pushConstantMinOffet;
+ if (pushConstant.second.offset >= pushConstantMaxOffset)
+ {
+ pushConstantMaxOffset = pushConstant.second.offset;
+ pushConstantMaxSize = pushConstant.second.size;
+ }
+ }
+ deviceProgramData.pushConstRange.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
+ deviceProgramData.pushConstRange.offset =
+ pushConstantMinOffet == UINT32_MAX ? 0 : pushConstantMinOffet;
+ deviceProgramData.pushConstRange.size = pushConstantMaxOffset + pushConstantMaxSize;
+
+ if (kAngleDebug)
+ {
+ if (mContext->getFeatures().clDumpVkSpirv.enabled)
+ {
+ angle::spirv::Print(deviceProgramData.binary);
+ }
+ }
+ }
+ deviceProgramData.buildStatus = CL_BUILD_SUCCESS;
+ }
+ return true;
+}
+
+angle::spirv::Blob CLProgramVk::stripReflection(const DeviceProgramData *deviceProgramData)
+{
+ angle::spirv::Blob binaryStripped;
+ spvtools::Optimizer opt(SPV_ENV_UNIVERSAL_1_5);
+ opt.RegisterPass(spvtools::CreateStripReflectInfoPass());
+ spvtools::OptimizerOptions optOptions;
+ optOptions.set_run_validator(false);
+ if (!opt.Run(deviceProgramData->binary.data(), deviceProgramData->binary.size(),
+ &binaryStripped, optOptions))
+ {
+ ERR() << "Could not strip reflection data from binary!";
+ }
+ return binaryStripped;
+}
+
} // namespace rx
diff --git a/src/libANGLE/renderer/vulkan/CLProgramVk.h b/src/libANGLE/renderer/vulkan/CLProgramVk.h
index 177bdbf513..4c8033097d 100644
--- a/src/libANGLE/renderer/vulkan/CLProgramVk.h
+++ b/src/libANGLE/renderer/vulkan/CLProgramVk.h
@@ -8,19 +8,158 @@
#ifndef LIBANGLE_RENDERER_VULKAN_CLPROGRAMVK_H_
#define LIBANGLE_RENDERER_VULKAN_CLPROGRAMVK_H_
+#include "libANGLE/renderer/vulkan/CLKernelVk.h"
#include "libANGLE/renderer/vulkan/cl_types.h"
+#include "libANGLE/renderer/vulkan/vk_cache_utils.h"
+#include "libANGLE/renderer/vulkan/vk_helpers.h"
#include "libANGLE/renderer/CLProgramImpl.h"
+#include "libANGLE/CLProgram.h"
+
+#include "clspv/Compiler.h"
+
+#include "vulkan/vulkan_core.h"
+
+#include "spirv-tools/libspirv.h"
+
namespace rx
{
class CLProgramVk : public CLProgramImpl
{
public:
+ struct SpvReflectionData
+ {
+ angle::HashMap<uint32_t, uint32_t> spvIntLookup;
+ angle::HashMap<uint32_t, std::string> spvStrLookup;
+ angle::HashMap<uint32_t, CLKernelVk::ArgInfo> kernelArgInfos;
+ angle::HashMap<std::string, uint32_t> kernelFlags;
+ angle::HashMap<std::string, std::string> kernelAttributes;
+ angle::HashMap<std::string, std::array<uint32_t, 3>> kernelCompileWGS;
+ angle::HashMap<uint32_t, VkPushConstantRange> pushConstants;
+ std::array<uint32_t, 3> specConstantWGS{0, 0, 0};
+ CLKernelArgsMap kernelArgsMap;
+ };
+
+ // Output binary structure (for CL_PROGRAM_BINARIES query)
+ struct ProgramBinaryOutputHeader
+ {
+ uint32_t headerVersion{1};
+ cl_program_binary_type binaryType{CL_PROGRAM_BINARY_TYPE_NONE};
+ };
+ static constexpr uint32_t LatestSupportedBinaryVersion = 1;
+
+ struct ScopedClspvContext : angle::NonCopyable
+ {
+ ScopedClspvContext() = default;
+ ~ScopedClspvContext() { clspvFreeOutputBuildObjs(mOutputBin, mOutputBuildLog); }
+
+ size_t mOutputBinSize{0};
+ char *mOutputBin{nullptr};
+ char *mOutputBuildLog{nullptr};
+ };
+
+ struct ScopedProgramCallback : angle::NonCopyable
+ {
+ ScopedProgramCallback() = delete;
+ ScopedProgramCallback(cl::Program *notify) : mNotify(notify) {}
+ ~ScopedProgramCallback()
+ {
+ if (mNotify)
+ {
+ mNotify->callback();
+ }
+ }
+
+ cl::Program *mNotify{nullptr};
+ };
+
+ enum class BuildType
+ {
+ BUILD = 0,
+ COMPILE,
+ LINK,
+ BINARY
+ };
+
+ struct DeviceProgramData
+ {
+ std::vector<char> IR;
+ std::string buildLog;
+ angle::spirv::Blob binary;
+ SpvReflectionData reflectionData;
+ VkPushConstantRange pushConstRange{};
+ cl_build_status buildStatus{CL_BUILD_NONE};
+ cl_program_binary_type binaryType{CL_PROGRAM_BINARY_TYPE_NONE};
+
+ size_t numKernels() const { return reflectionData.kernelArgsMap.size(); }
+
+ size_t numKernelArgs(const std::string &kernelName) const
+ {
+ return containsKernel(kernelName) ? getKernelArgsMap().at(kernelName).size() : 0;
+ }
+
+ const CLKernelArgsMap &getKernelArgsMap() const { return reflectionData.kernelArgsMap; }
+
+ bool containsKernel(const std::string &name) const
+ {
+ return reflectionData.kernelArgsMap.contains(name);
+ }
+
+ std::string getKernelNames() const
+ {
+ std::string names;
+ for (auto name = getKernelArgsMap().begin(); name != getKernelArgsMap().end(); ++name)
+ {
+ names += name->first + (std::next(name) != getKernelArgsMap().end() ? ";" : "\0");
+ }
+ return names;
+ }
+
+ CLKernelArguments getKernelArguments(const std::string &kernelName) const
+ {
+ CLKernelArguments kargsCopy;
+ if (containsKernel(kernelName))
+ {
+ const CLKernelArguments &kargs = getKernelArgsMap().at(kernelName);
+ for (const CLKernelArgument &karg : kargs)
+ {
+ kargsCopy.push_back(karg);
+ }
+ }
+ return kargsCopy;
+ }
+
+ cl::CompiledWorkgroupSize getCompiledWGS(const std::string &kernelName) const
+ {
+ cl::CompiledWorkgroupSize compiledWGS{0, 0, 0};
+ if (reflectionData.kernelCompileWGS.contains(kernelName))
+ {
+ compiledWGS = reflectionData.kernelCompileWGS.at(kernelName);
+ }
+ return compiledWGS;
+ }
+
+ std::string getKernelAttributes(const std::string &kernelName) const
+ {
+ if (containsKernel(kernelName))
+ {
+ return reflectionData.kernelAttributes.at(kernelName.c_str());
+ }
+ return std::string{};
+ }
+ };
+ using DevicePrograms = angle::HashMap<const _cl_device_id *, DeviceProgramData>;
+ using DeviceProgramDatas = std::vector<const DeviceProgramData *>;
+
CLProgramVk(const cl::Program &program);
+
~CLProgramVk() override;
+ angle::Result init();
+ angle::Result init(const size_t *lengths, const unsigned char **binaries, cl_int *binaryStatus);
+
angle::Result build(const cl::DevicePtrs &devices,
const char *options,
cl::Program *notify) override;
@@ -49,6 +188,27 @@ class CLProgramVk : public CLProgramImpl
angle::Result createKernels(cl_uint numKernels,
CLKernelImpl::CreateFuncs &createFuncs,
cl_uint *numKernelsRet) override;
+
+ const DeviceProgramData *getDeviceProgramData(const char *kernelName) const;
+ const DeviceProgramData *getDeviceProgramData(const _cl_device_id *device) const;
+
+ bool buildInternal(const cl::DevicePtrs &devices,
+ std::string options,
+ std::string internalOptions,
+ BuildType buildType,
+ const DeviceProgramDatas &inputProgramDatas);
+ angle::spirv::Blob stripReflection(const DeviceProgramData *deviceProgramData);
+
+ private:
+ CLContextVk *mContext;
+ std::string mProgramOpts;
+ DevicePrograms mAssociatedDevicePrograms;
+ PipelineLayoutCache mPipelineLayoutCache;
+ vk::MetaDescriptorPool mMetaDescriptorPool;
+ DescriptorSetLayoutCache mDescSetLayoutCache;
+ vk::DescriptorSetLayoutPointerArray mDescriptorSetLayouts;
+ vk::DescriptorSetArray<vk::DescriptorPoolPointer> mDescriptorPools;
+ std::mutex mProgramMutex;
};
} // namespace rx
diff --git a/src/libANGLE/renderer/vulkan/ContextVk.cpp b/src/libANGLE/renderer/vulkan/ContextVk.cpp
index d063eb7760..0c7a7f0ef3 100644
--- a/src/libANGLE/renderer/vulkan/ContextVk.cpp
+++ b/src/libANGLE/renderer/vulkan/ContextVk.cpp
@@ -8596,17 +8596,31 @@ angle::Result ContextVk::onResourceAccess(const vk::CommandBufferAccess &access)
mOutsideRenderPassCommands->retainResource(imageAccess.image);
}
- for (const vk::CommandBufferImageWrite &imageWrite : access.getWriteImages())
+ for (const vk::CommandBufferImageSubresourceAccess &imageReadAccess :
+ access.getReadImageSubresources())
{
- ASSERT(!isRenderPassStartedAndUsesImage(*imageWrite.access.image));
+ vk::ImageHelper *image = imageReadAccess.access.image;
+ ASSERT(!isRenderPassStartedAndUsesImage(*image));
- imageWrite.access.image->recordWriteBarrier(this, imageWrite.access.aspectFlags,
- imageWrite.access.imageLayout,
- mOutsideRenderPassCommands);
- mOutsideRenderPassCommands->retainResource(imageWrite.access.image);
- imageWrite.access.image->onWrite(imageWrite.levelStart, imageWrite.levelCount,
- imageWrite.layerStart, imageWrite.layerCount,
- imageWrite.access.aspectFlags);
+ image->recordReadSubresourceBarrier(
+ this, imageReadAccess.access.aspectFlags, imageReadAccess.access.imageLayout,
+ imageReadAccess.levelStart, imageReadAccess.levelCount, imageReadAccess.layerStart,
+ imageReadAccess.layerCount, mOutsideRenderPassCommands);
+ mOutsideRenderPassCommands->retainResource(image);
+ }
+
+ for (const vk::CommandBufferImageSubresourceAccess &imageWrite : access.getWriteImages())
+ {
+ vk::ImageHelper *image = imageWrite.access.image;
+ ASSERT(!isRenderPassStartedAndUsesImage(*image));
+
+ image->recordWriteBarrier(this, imageWrite.access.aspectFlags,
+ imageWrite.access.imageLayout, imageWrite.levelStart,
+ imageWrite.levelCount, imageWrite.layerStart,
+ imageWrite.layerCount, mOutsideRenderPassCommands);
+ mOutsideRenderPassCommands->retainResource(image);
+ image->onWrite(imageWrite.levelStart, imageWrite.levelCount, imageWrite.layerStart,
+ imageWrite.layerCount, imageWrite.access.aspectFlags);
}
for (const vk::CommandBufferBufferAccess &bufferAccess : access.getReadBuffers())
@@ -8663,8 +8677,20 @@ angle::Result ContextVk::flushCommandBuffersIfNecessary(const vk::CommandBufferA
}
}
+ // In cases where the image has both read and write permissions, the render pass should be
+ // closed if there is a read from a previously written subresource (in a specific level/layer),
+ // or a write to a previously read one.
+ for (const vk::CommandBufferImageSubresourceAccess &imageSubresourceAccess :
+ access.getReadImageSubresources())
+ {
+ if (isRenderPassStartedAndUsesImage(*imageSubresourceAccess.access.image))
+ {
+ return flushCommandsAndEndRenderPass(RenderPassClosureReason::ImageUseThenOutOfRPRead);
+ }
+ }
+
// Write images only need to close the render pass if they need a layout transition.
- for (const vk::CommandBufferImageWrite &imageWrite : access.getWriteImages())
+ for (const vk::CommandBufferImageSubresourceAccess &imageWrite : access.getWriteImages())
{
if (isRenderPassStartedAndUsesImage(*imageWrite.access.image))
{
diff --git a/src/libANGLE/renderer/vulkan/RendererVk.cpp b/src/libANGLE/renderer/vulkan/RendererVk.cpp
index 4da4dec02b..eb2b63934d 100644
--- a/src/libANGLE/renderer/vulkan/RendererVk.cpp
+++ b/src/libANGLE/renderer/vulkan/RendererVk.cpp
@@ -137,22 +137,6 @@ bool IsQualcommOpenSource(uint32_t vendorId, uint32_t driverId, const char *devi
return strstr(deviceName, "Venus") != nullptr || strstr(deviceName, "Turnip") != nullptr;
}
-bool IsPixel()
-{
- if (!IsAndroid())
- {
- return false;
- }
-
- angle::SystemInfo info;
- if (!angle::GetSystemInfo(&info))
- {
- return false;
- }
-
- return strstr(info.machineModelName.c_str(), "Pixel") != nullptr;
-}
-
angle::vk::ICD ChooseICDFromAttribs(const egl::AttributeMap &attribs)
{
#if !defined(ANGLE_PLATFORM_ANDROID)
@@ -296,6 +280,7 @@ constexpr const char *kSkippedMessages[] = {
// https://issuetracker.google.com/319228278
"VUID-vkCmdDrawIndexed-format-07753",
"VUID-vkCmdDraw-format-07753",
+ "Undefined-Value-ShaderFragmentOutputMismatch",
};
// Validation messages that should be ignored only when VK_EXT_primitive_topology_list_restart is
@@ -1333,28 +1318,41 @@ constexpr char kEnableDebugMarkersPropertyName[] = "debug.angle.markers";
ANGLE_INLINE gl::ShadingRate GetShadingRateFromVkExtent(const VkExtent2D &extent)
{
- if (extent.width == 1 && extent.height == 2)
- {
- return gl::ShadingRate::_1x2;
- }
- else if (extent.width == 2 && extent.height == 1)
- {
- return gl::ShadingRate::_2x1;
- }
- else if (extent.width == 2 && extent.height == 2)
+ if (extent.width == 1)
{
- return gl::ShadingRate::_2x2;
+ if (extent.height == 1)
+ {
+ return gl::ShadingRate::_1x1;
+ }
+ else if (extent.height == 2)
+ {
+ return gl::ShadingRate::_1x2;
+ }
}
- else if (extent.width == 4 && extent.height == 2)
+ else if (extent.width == 2)
{
- return gl::ShadingRate::_4x2;
+ if (extent.height == 1)
+ {
+ return gl::ShadingRate::_2x1;
+ }
+ else if (extent.height == 2)
+ {
+ return gl::ShadingRate::_2x2;
+ }
}
- else if (extent.width == 4 && extent.height == 4)
+ else if (extent.width == 4)
{
- return gl::ShadingRate::_4x4;
+ if (extent.height == 2)
+ {
+ return gl::ShadingRate::_4x2;
+ }
+ else if (extent.height == 4)
+ {
+ return gl::ShadingRate::_4x4;
+ }
}
- return gl::ShadingRate::_1x1;
+ return gl::ShadingRate::Undefined;
}
} // namespace
@@ -2258,6 +2256,7 @@ void RendererVk::appendDeviceExtensionFeaturesNotPromoted(
if (ExtensionFound(VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME, deviceExtensionNames))
{
vk::AddToPNextChain(deviceFeatures, &mFragmentShadingRateFeatures);
+ vk::AddToPNextChain(deviceProperties, &mFragmentShadingRateProperties);
}
if (ExtensionFound(VK_EXT_FRAGMENT_SHADER_INTERLOCK_EXTENSION_NAME, deviceExtensionNames))
@@ -2591,6 +2590,10 @@ void RendererVk::queryDeviceExtensionFeatures(const vk::ExtensionNameList &devic
mFragmentShadingRateFeatures.sType =
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR;
+ mFragmentShadingRateProperties = {};
+ mFragmentShadingRateProperties.sType =
+ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR;
+
mFragmentShaderInterlockFeatures = {};
mFragmentShaderInterlockFeatures.sType =
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_INTERLOCK_FEATURES_EXT;
@@ -3835,21 +3838,8 @@ uint32_t RendererVk::getDeviceVersion()
return mDeviceVersion == 0 ? mInstanceVersion : mDeviceVersion;
}
-bool RendererVk::canSupportFragmentShadingRate(const vk::ExtensionNameList &deviceExtensionNames)
+void RendererVk::queryAndCacheFragmentShadingRates()
{
- // VK_KHR_create_renderpass2 is required for VK_KHR_fragment_shading_rate
- if (!mFeatures.supportsRenderpass2.enabled)
- {
- return false;
- }
-
- // Device needs to support VK_KHR_fragment_shading_rate and specifically
- // pipeline fragment shading rate.
- if (mFragmentShadingRateFeatures.pipelineFragmentShadingRate != VK_TRUE)
- {
- return false;
- }
-
// Init required functions
#if !defined(ANGLE_SHARED_LIBVULKAN)
InitFragmentShadingRateKHRInstanceFunction(mInstance);
@@ -3874,15 +3864,36 @@ bool RendererVk::canSupportFragmentShadingRate(const vk::ExtensionNameList &devi
// Cache supported fragment shading rates
mSupportedFragmentShadingRates.reset();
+ mSupportedFragmentShadingRateSampleCounts.fill(0u);
for (const VkPhysicalDeviceFragmentShadingRateKHR &shadingRate : shadingRates)
{
if (shadingRate.sampleCounts == 0)
{
continue;
}
- mSupportedFragmentShadingRates.set(GetShadingRateFromVkExtent(shadingRate.fragmentSize));
+ const gl::ShadingRate rate = GetShadingRateFromVkExtent(shadingRate.fragmentSize);
+ mSupportedFragmentShadingRates.set(rate);
+ mSupportedFragmentShadingRateSampleCounts[rate] = shadingRate.sampleCounts;
+ }
+}
+
+bool RendererVk::canSupportFragmentShadingRate() const
+{
+ // VK_KHR_create_renderpass2 is required for VK_KHR_fragment_shading_rate
+ if (!mFeatures.supportsRenderpass2.enabled)
+ {
+ return false;
+ }
+
+ // Device needs to support VK_KHR_fragment_shading_rate and specifically
+ // pipeline fragment shading rate.
+ if (mFragmentShadingRateFeatures.pipelineFragmentShadingRate != VK_TRUE)
+ {
+ return false;
}
+ ASSERT(mSupportedFragmentShadingRates.any());
+
// To implement GL_QCOM_shading_rate extension the Vulkan ICD needs to support at least the
// following shading rates -
// {1, 1}
@@ -3895,6 +3906,38 @@ bool RendererVk::canSupportFragmentShadingRate(const vk::ExtensionNameList &devi
mSupportedFragmentShadingRates.test(gl::ShadingRate::_2x2);
}
+bool RendererVk::canSupportFoveatedRendering() const
+{
+ // Device needs to support attachment fragment shading rate.
+ if (mFragmentShadingRateFeatures.attachmentFragmentShadingRate != VK_TRUE)
+ {
+ return false;
+ }
+
+ ASSERT(mSupportedFragmentShadingRates.any());
+ ASSERT(!mSupportedFragmentShadingRateSampleCounts.empty());
+
+ // To implement QCOM foveated rendering extensions the Vulkan ICD needs to support all sample
+ // count bits listed in VkPhysicalDeviceLimits::framebufferColorSampleCounts for these shading
+ // rates -
+ // {1, 1}
+ // {1, 2}
+ // {2, 1}
+ // {2, 2}
+ VkSampleCountFlags framebufferSampleCounts =
+ getPhysicalDeviceProperties().limits.framebufferColorSampleCounts &
+ vk_gl::kSupportedSampleCounts;
+
+ return (mSupportedFragmentShadingRateSampleCounts[gl::ShadingRate::_1x1] &
+ framebufferSampleCounts) == framebufferSampleCounts &&
+ (mSupportedFragmentShadingRateSampleCounts[gl::ShadingRate::_1x2] &
+ framebufferSampleCounts) == framebufferSampleCounts &&
+ (mSupportedFragmentShadingRateSampleCounts[gl::ShadingRate::_2x1] &
+ framebufferSampleCounts) == framebufferSampleCounts &&
+ (mSupportedFragmentShadingRateSampleCounts[gl::ShadingRate::_2x2] &
+ framebufferSampleCounts) == framebufferSampleCounts;
+}
+
bool RendererVk::canPreferDeviceLocalMemoryHostVisible(VkPhysicalDeviceType deviceType)
{
if (deviceType == VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU)
@@ -3977,9 +4020,6 @@ void RendererVk::initFeatures(DisplayVk *displayVk,
const bool isRADV = IsRADV(mPhysicalDeviceProperties.vendorID, mDriverProperties.driverID,
mPhysicalDeviceProperties.deviceName);
- // Identify Google Pixel brand Android devices
- const bool isPixel = IsPixel();
-
angle::VersionInfo nvidiaVersion;
if (isNvidia)
{
@@ -4591,27 +4631,14 @@ void RendererVk::initFeatures(DisplayVk *displayVk,
// can root cause it.
ANGLE_FEATURE_CONDITION(&mFeatures, requireCachedBitForStagingBuffer, !isARM);
- bool dynamicStateWorks = true;
- if (isARM)
- {
- // Multiple dynamic state issues on ARM have been fixed.
- // http://issuetracker.google.com/285124778
- // http://issuetracker.google.com/285196249
- // http://issuetracker.google.com/286224923
- // http://issuetracker.google.com/287318431
-
- // Use it on drivers/devices known to work.
- if (isPixel)
- {
- // Pixel devices are working after r44
- dynamicStateWorks = armDriverVersion >= ARMDriverVersion(44, 0, 0);
- }
- else
- {
- // Others should work after r44p1
- dynamicStateWorks = armDriverVersion >= ARMDriverVersion(44, 1, 0);
- }
- }
+ // Multiple dynamic state issues on ARM have been fixed.
+ // http://issuetracker.google.com/285124778
+ // http://issuetracker.google.com/285196249
+ // http://issuetracker.google.com/286224923
+ // http://issuetracker.google.com/287318431
+ //
+ // On Pixel devices, the issues have been fixed since r44, but on others since r44p1.
+ const bool isArm44OrLess = isARM && armDriverVersion < ARMDriverVersion(44, 1, 0);
// Intel driver has issues with VK_EXT_vertex_input_dynamic_state
// http://anglebug.com/7162#c8
@@ -4621,7 +4648,7 @@ void RendererVk::initFeatures(DisplayVk *displayVk,
ANGLE_FEATURE_CONDITION(
&mFeatures, supportsExtendedDynamicState,
- mExtendedDynamicStateFeatures.extendedDynamicState == VK_TRUE && dynamicStateWorks);
+ mExtendedDynamicStateFeatures.extendedDynamicState == VK_TRUE && !isArm44OrLess);
// VK_EXT_vertex_input_dynamic_state enables dynamic state for the full vertex input state. As
// such, when available use supportsVertexInputDynamicState instead of
@@ -4629,15 +4656,15 @@ void RendererVk::initFeatures(DisplayVk *displayVk,
ANGLE_FEATURE_CONDITION(&mFeatures, useVertexInputBindingStrideDynamicState,
mFeatures.supportsExtendedDynamicState.enabled &&
!mFeatures.supportsVertexInputDynamicState.enabled &&
- dynamicStateWorks);
+ !isArm44OrLess);
ANGLE_FEATURE_CONDITION(&mFeatures, useCullModeDynamicState,
- mFeatures.supportsExtendedDynamicState.enabled && dynamicStateWorks);
+ mFeatures.supportsExtendedDynamicState.enabled && !isArm44OrLess);
ANGLE_FEATURE_CONDITION(&mFeatures, useDepthCompareOpDynamicState,
mFeatures.supportsExtendedDynamicState.enabled);
ANGLE_FEATURE_CONDITION(&mFeatures, useDepthTestEnableDynamicState,
mFeatures.supportsExtendedDynamicState.enabled);
ANGLE_FEATURE_CONDITION(&mFeatures, useDepthWriteEnableDynamicState,
- mFeatures.supportsExtendedDynamicState.enabled && dynamicStateWorks);
+ mFeatures.supportsExtendedDynamicState.enabled && !isArm44OrLess);
ANGLE_FEATURE_CONDITION(&mFeatures, useFrontFaceDynamicState,
mFeatures.supportsExtendedDynamicState.enabled);
ANGLE_FEATURE_CONDITION(&mFeatures, useStencilOpDynamicState,
@@ -4647,10 +4674,10 @@ void RendererVk::initFeatures(DisplayVk *displayVk,
ANGLE_FEATURE_CONDITION(
&mFeatures, supportsExtendedDynamicState2,
- mExtendedDynamicState2Features.extendedDynamicState2 == VK_TRUE && dynamicStateWorks);
+ mExtendedDynamicState2Features.extendedDynamicState2 == VK_TRUE && !isArm44OrLess);
ANGLE_FEATURE_CONDITION(&mFeatures, usePrimitiveRestartEnableDynamicState,
- mFeatures.supportsExtendedDynamicState2.enabled && dynamicStateWorks);
+ mFeatures.supportsExtendedDynamicState2.enabled && !isArm44OrLess);
ANGLE_FEATURE_CONDITION(&mFeatures, useRasterizerDiscardEnableDynamicState,
mFeatures.supportsExtendedDynamicState2.enabled);
ANGLE_FEATURE_CONDITION(&mFeatures, useDepthBiasEnableDynamicState,
@@ -4667,21 +4694,33 @@ void RendererVk::initFeatures(DisplayVk *displayVk,
mExtendedDynamicState2Features.extendedDynamicState2LogicOp == VK_TRUE &&
!(IsLinux() && isIntel && isMesaLessThan22_2) && !(IsAndroid() && isGalaxyS23));
+ // Samsung Vulkan driver crashes in vkCmdClearAttachments() when imageless Framebuffer
+ // is used to begin Secondary Command Buffer before the corresponding vkCmdBeginRenderPass().
+ ANGLE_FEATURE_CONDITION(&mFeatures, supportsImagelessFramebuffer,
+ mImagelessFramebufferFeatures.imagelessFramebuffer == VK_TRUE &&
+ (vk::RenderPassCommandBuffer::ExecutesInline() || !isSamsung));
+
+ if (ExtensionFound(VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME, deviceExtensionNames))
+ {
+ queryAndCacheFragmentShadingRates();
+ }
+
// Support GL_QCOM_shading_rate extension
ANGLE_FEATURE_CONDITION(&mFeatures, supportsFragmentShadingRate,
- canSupportFragmentShadingRate(deviceExtensionNames));
+ canSupportFragmentShadingRate());
+
+ // Support QCOM foveated rendering extensions.
+ // Gated on imageless framebuffer to reduce code complexity
+ ANGLE_FEATURE_CONDITION(&mFeatures, supportsFoveatedRendering,
+ mFeatures.supportsImagelessFramebuffer.enabled &&
+ mFeatures.supportsFragmentShadingRate.enabled &&
+ canSupportFoveatedRendering());
// We can use the interlock to support GL_ANGLE_shader_pixel_local_storage_coherent.
ANGLE_FEATURE_CONDITION(
&mFeatures, supportsFragmentShaderPixelInterlock,
mFragmentShaderInterlockFeatures.fragmentShaderPixelInterlock == VK_TRUE);
- // Samsung Vulkan driver crashes in vkCmdClearAttachments() when imageless Framebuffer
- // is used to begin Secondary Command Buffer before the corresponding vkCmdBeginRenderPass().
- ANGLE_FEATURE_CONDITION(&mFeatures, supportsImagelessFramebuffer,
- mImagelessFramebufferFeatures.imagelessFramebuffer == VK_TRUE &&
- (vk::RenderPassCommandBuffer::ExecutesInline() || !isSamsung));
-
// The VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT behavior is used by
// ANGLE, which requires the robustBufferAccess feature to be available.
ANGLE_FEATURE_CONDITION(&mFeatures, supportsPipelineRobustness,
diff --git a/src/libANGLE/renderer/vulkan/RendererVk.h b/src/libANGLE/renderer/vulkan/RendererVk.h
index a2afb45b67..e19d64e041 100644
--- a/src/libANGLE/renderer/vulkan/RendererVk.h
+++ b/src/libANGLE/renderer/vulkan/RendererVk.h
@@ -555,6 +555,12 @@ class RendererVk : angle::NonCopyable
return mSupportedFragmentShadingRates.test(shadingRate);
}
+ VkExtent2D getMaxFragmentShadingRateAttachmentTexelSize() const
+ {
+ ASSERT(mFeatures.supportsFoveatedRendering.enabled);
+ return mFragmentShadingRateProperties.maxFragmentShadingRateAttachmentTexelSize;
+ }
+
void addBufferBlockToOrphanList(vk::BufferBlock *block) { mOrphanedBufferBlockList.add(block); }
VkDeviceSize getSuballocationDestroyedSize() const
@@ -768,7 +774,11 @@ class RendererVk : angle::NonCopyable
angle::Result initializeMemoryAllocator(DisplayVk *displayVk);
// Query and cache supported fragment shading rates
- bool canSupportFragmentShadingRate(const vk::ExtensionNameList &deviceExtensionNames);
+ void queryAndCacheFragmentShadingRates();
+ // Determine support for shading rate based rendering
+ bool canSupportFragmentShadingRate() const;
+ // Determine support for foveated rendering
+ bool canSupportFoveatedRendering() const;
// Prefer host visible device local via device local based on device type and heap size.
bool canPreferDeviceLocalMemoryHostVisible(VkPhysicalDeviceType deviceType);
@@ -863,6 +873,7 @@ class RendererVk : angle::NonCopyable
VkPhysicalDeviceGraphicsPipelineLibraryPropertiesEXT mGraphicsPipelineLibraryProperties;
VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT mVertexInputDynamicStateFeatures;
VkPhysicalDeviceFragmentShadingRateFeaturesKHR mFragmentShadingRateFeatures;
+ VkPhysicalDeviceFragmentShadingRatePropertiesKHR mFragmentShadingRateProperties;
VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT mFragmentShaderInterlockFeatures;
VkPhysicalDeviceImagelessFramebufferFeaturesKHR mImagelessFramebufferFeatures;
VkPhysicalDevicePipelineRobustnessFeaturesEXT mPipelineRobustnessFeatures;
@@ -883,6 +894,8 @@ class RendererVk : angle::NonCopyable
#endif
angle::PackedEnumBitSet<gl::ShadingRate, uint8_t> mSupportedFragmentShadingRates;
+ angle::PackedEnumMap<gl::ShadingRate, VkSampleCountFlags>
+ mSupportedFragmentShadingRateSampleCounts;
std::vector<VkQueueFamilyProperties> mQueueFamilyProperties;
uint32_t mMaxVertexAttribDivisor;
uint32_t mCurrentQueueFamilyIndex;
diff --git a/src/libANGLE/renderer/vulkan/ShareGroupVk.cpp b/src/libANGLE/renderer/vulkan/ShareGroupVk.cpp
index 8263e6725d..1ba9691f96 100644
--- a/src/libANGLE/renderer/vulkan/ShareGroupVk.cpp
+++ b/src/libANGLE/renderer/vulkan/ShareGroupVk.cpp
@@ -28,13 +28,6 @@ namespace rx
namespace
{
-// For DesciptorSetUpdates
-constexpr size_t kDescriptorBufferInfosInitialSize = 8;
-constexpr size_t kDescriptorImageInfosInitialSize = 4;
-constexpr size_t kDescriptorWriteInfosInitialSize =
- kDescriptorBufferInfosInitialSize + kDescriptorImageInfosInitialSize;
-constexpr size_t kDescriptorBufferViewsInitialSize = 0;
-
// How often monolithic pipelines should be created, if preferMonolithicPipelinesOverLibraries is
// enabled. Pipeline creation is typically O(hundreds of microseconds). A value of 2ms is chosen
// arbitrarily; it ensures that there is always at most a single pipeline job in progress, while
@@ -289,103 +282,6 @@ void TextureUpload::onTextureRelease(TextureVk *textureVk)
}
}
-// UpdateDescriptorSetsBuilder implementation.
-UpdateDescriptorSetsBuilder::UpdateDescriptorSetsBuilder()
-{
- // Reserve reasonable amount of spaces so that for majority of apps we don't need to grow at all
- mDescriptorBufferInfos.reserve(kDescriptorBufferInfosInitialSize);
- mDescriptorImageInfos.reserve(kDescriptorImageInfosInitialSize);
- mWriteDescriptorSets.reserve(kDescriptorWriteInfosInitialSize);
- mBufferViews.reserve(kDescriptorBufferViewsInitialSize);
-}
-
-UpdateDescriptorSetsBuilder::~UpdateDescriptorSetsBuilder() = default;
-
-template <typename T, const T *VkWriteDescriptorSet::*pInfo>
-void UpdateDescriptorSetsBuilder::growDescriptorCapacity(std::vector<T> *descriptorVector,
- size_t newSize)
-{
- const T *const oldInfoStart = descriptorVector->empty() ? nullptr : &(*descriptorVector)[0];
- size_t newCapacity = std::max(descriptorVector->capacity() << 1, newSize);
- descriptorVector->reserve(newCapacity);
-
- if (oldInfoStart)
- {
- // patch mWriteInfo with new BufferInfo/ImageInfo pointers
- for (VkWriteDescriptorSet &set : mWriteDescriptorSets)
- {
- if (set.*pInfo)
- {
- size_t index = set.*pInfo - oldInfoStart;
- set.*pInfo = &(*descriptorVector)[index];
- }
- }
- }
-}
-
-template <typename T, const T *VkWriteDescriptorSet::*pInfo>
-T *UpdateDescriptorSetsBuilder::allocDescriptorInfos(std::vector<T> *descriptorVector, size_t count)
-{
- size_t oldSize = descriptorVector->size();
- size_t newSize = oldSize + count;
- if (newSize > descriptorVector->capacity())
- {
- // If we have reached capacity, grow the storage and patch the descriptor set with new
- // buffer info pointer
- growDescriptorCapacity<T, pInfo>(descriptorVector, newSize);
- }
- descriptorVector->resize(newSize);
- return &(*descriptorVector)[oldSize];
-}
-
-VkDescriptorBufferInfo *UpdateDescriptorSetsBuilder::allocDescriptorBufferInfos(size_t count)
-{
- return allocDescriptorInfos<VkDescriptorBufferInfo, &VkWriteDescriptorSet::pBufferInfo>(
- &mDescriptorBufferInfos, count);
-}
-
-VkDescriptorImageInfo *UpdateDescriptorSetsBuilder::allocDescriptorImageInfos(size_t count)
-{
- return allocDescriptorInfos<VkDescriptorImageInfo, &VkWriteDescriptorSet::pImageInfo>(
- &mDescriptorImageInfos, count);
-}
-
-VkWriteDescriptorSet *UpdateDescriptorSetsBuilder::allocWriteDescriptorSets(size_t count)
-{
- size_t oldSize = mWriteDescriptorSets.size();
- size_t newSize = oldSize + count;
- mWriteDescriptorSets.resize(newSize);
- return &mWriteDescriptorSets[oldSize];
-}
-
-VkBufferView *UpdateDescriptorSetsBuilder::allocBufferViews(size_t count)
-{
- return allocDescriptorInfos<VkBufferView, &VkWriteDescriptorSet::pTexelBufferView>(
- &mBufferViews, count);
-}
-
-uint32_t UpdateDescriptorSetsBuilder::flushDescriptorSetUpdates(VkDevice device)
-{
- if (mWriteDescriptorSets.empty())
- {
- ASSERT(mDescriptorBufferInfos.empty());
- ASSERT(mDescriptorImageInfos.empty());
- return 0;
- }
-
- vkUpdateDescriptorSets(device, static_cast<uint32_t>(mWriteDescriptorSets.size()),
- mWriteDescriptorSets.data(), 0, nullptr);
-
- uint32_t retVal = static_cast<uint32_t>(mWriteDescriptorSets.size());
-
- mWriteDescriptorSets.clear();
- mDescriptorBufferInfos.clear();
- mDescriptorImageInfos.clear();
- mBufferViews.clear();
-
- return retVal;
-}
-
vk::BufferPool *ShareGroupVk::getDefaultBufferPool(RendererVk *renderer,
VkDeviceSize size,
uint32_t memoryTypeIndex,
diff --git a/src/libANGLE/renderer/vulkan/ShareGroupVk.h b/src/libANGLE/renderer/vulkan/ShareGroupVk.h
index 071ad9c154..4169f4fa0e 100644
--- a/src/libANGLE/renderer/vulkan/ShareGroupVk.h
+++ b/src/libANGLE/renderer/vulkan/ShareGroupVk.h
@@ -36,37 +36,6 @@ class TextureUpload
TextureVk *mPrevUploadedMutableTexture;
};
-class UpdateDescriptorSetsBuilder final : angle::NonCopyable
-{
- public:
- UpdateDescriptorSetsBuilder();
- ~UpdateDescriptorSetsBuilder();
-
- VkDescriptorBufferInfo *allocDescriptorBufferInfos(size_t count);
- VkDescriptorImageInfo *allocDescriptorImageInfos(size_t count);
- VkWriteDescriptorSet *allocWriteDescriptorSets(size_t count);
- VkBufferView *allocBufferViews(size_t count);
-
- VkDescriptorBufferInfo &allocDescriptorBufferInfo() { return *allocDescriptorBufferInfos(1); }
- VkDescriptorImageInfo &allocDescriptorImageInfo() { return *allocDescriptorImageInfos(1); }
- VkWriteDescriptorSet &allocWriteDescriptorSet() { return *allocWriteDescriptorSets(1); }
- VkBufferView &allocBufferView() { return *allocBufferViews(1); }
-
- // Returns the number of written descriptor sets.
- uint32_t flushDescriptorSetUpdates(VkDevice device);
-
- private:
- template <typename T, const T *VkWriteDescriptorSet::*pInfo>
- T *allocDescriptorInfos(std::vector<T> *descriptorVector, size_t count);
- template <typename T, const T *VkWriteDescriptorSet::*pInfo>
- void growDescriptorCapacity(std::vector<T> *descriptorVector, size_t newSize);
-
- std::vector<VkDescriptorBufferInfo> mDescriptorBufferInfos;
- std::vector<VkDescriptorImageInfo> mDescriptorImageInfos;
- std::vector<VkWriteDescriptorSet> mWriteDescriptorSets;
- std::vector<VkBufferView> mBufferViews;
-};
-
class ShareGroupVk : public ShareGroupImpl
{
public:
diff --git a/src/libANGLE/renderer/vulkan/TextureVk.cpp b/src/libANGLE/renderer/vulkan/TextureVk.cpp
index 68705e6942..d05dafdbb8 100644
--- a/src/libANGLE/renderer/vulkan/TextureVk.cpp
+++ b/src/libANGLE/renderer/vulkan/TextureVk.cpp
@@ -2254,8 +2254,15 @@ angle::Result TextureVk::generateMipmapsWithCompute(ContextVk *contextVk)
{
vk::CommandBufferAccess access;
+ // For mipmap generation, we should make sure that there is no pending write for the source
+ // mip level. If there is, a barrier should be inserted before the source mip being used.
+ const vk::LevelIndex srcLevelVk = dstBaseLevelVk - 1;
uint32_t writeLevelCount =
std::min(maxGenerateLevels.get(), dstMaxLevelVk.get() + 1 - dstBaseLevelVk.get());
+
+ access.onImageComputeMipmapGenerationRead(mImage->toGLLevel(srcLevelVk), 1, 0,
+ mImage->getLayerCount(),
+ VK_IMAGE_ASPECT_COLOR_BIT, mImage);
access.onImageComputeShaderWrite(mImage->toGLLevel(dstBaseLevelVk), writeLevelCount, 0,
mImage->getLayerCount(), VK_IMAGE_ASPECT_COLOR_BIT,
mImage);
@@ -2270,7 +2277,6 @@ angle::Result TextureVk::generateMipmapsWithCompute(ContextVk *contextVk)
const vk::ImageView *srcView = nullptr;
UtilsVk::GenerateMipmapDestLevelViews destLevelViews = {};
- const vk::LevelIndex srcLevelVk = dstBaseLevelVk - 1;
ANGLE_TRY(getImageViews().getLevelLayerDrawImageView(
contextVk, *mImage, srcLevelVk, layer, gl::SrgbWriteControlMode::Default,
&srcView));
diff --git a/src/libANGLE/renderer/vulkan/vk_cache_utils.cpp b/src/libANGLE/renderer/vulkan/vk_cache_utils.cpp
index 1030ff52b8..a18ce01066 100644
--- a/src/libANGLE/renderer/vulkan/vk_cache_utils.cpp
+++ b/src/libANGLE/renderer/vulkan/vk_cache_utils.cpp
@@ -6334,6 +6334,109 @@ void PipelineCacheAccess::merge(RendererVk *renderer, const vk::PipelineCache &p
}
} // namespace vk
+// UpdateDescriptorSetsBuilder implementation.
+UpdateDescriptorSetsBuilder::UpdateDescriptorSetsBuilder()
+{
+ // Reserve reasonable amount of spaces so that for majority of apps we don't need to grow at all
+ constexpr size_t kDescriptorBufferInfosInitialSize = 8;
+ constexpr size_t kDescriptorImageInfosInitialSize = 4;
+ constexpr size_t kDescriptorWriteInfosInitialSize =
+ kDescriptorBufferInfosInitialSize + kDescriptorImageInfosInitialSize;
+ constexpr size_t kDescriptorBufferViewsInitialSize = 0;
+
+ mDescriptorBufferInfos.reserve(kDescriptorBufferInfosInitialSize);
+ mDescriptorImageInfos.reserve(kDescriptorImageInfosInitialSize);
+ mWriteDescriptorSets.reserve(kDescriptorWriteInfosInitialSize);
+ mBufferViews.reserve(kDescriptorBufferViewsInitialSize);
+}
+
+UpdateDescriptorSetsBuilder::~UpdateDescriptorSetsBuilder() = default;
+
+template <typename T, const T *VkWriteDescriptorSet::*pInfo>
+void UpdateDescriptorSetsBuilder::growDescriptorCapacity(std::vector<T> *descriptorVector,
+ size_t newSize)
+{
+ const T *const oldInfoStart = descriptorVector->empty() ? nullptr : &(*descriptorVector)[0];
+ size_t newCapacity = std::max(descriptorVector->capacity() << 1, newSize);
+ descriptorVector->reserve(newCapacity);
+
+ if (oldInfoStart)
+ {
+ // patch mWriteInfo with new BufferInfo/ImageInfo pointers
+ for (VkWriteDescriptorSet &set : mWriteDescriptorSets)
+ {
+ if (set.*pInfo)
+ {
+ size_t index = set.*pInfo - oldInfoStart;
+ set.*pInfo = &(*descriptorVector)[index];
+ }
+ }
+ }
+}
+
+template <typename T, const T *VkWriteDescriptorSet::*pInfo>
+T *UpdateDescriptorSetsBuilder::allocDescriptorInfos(std::vector<T> *descriptorVector, size_t count)
+{
+ size_t oldSize = descriptorVector->size();
+ size_t newSize = oldSize + count;
+ if (newSize > descriptorVector->capacity())
+ {
+ // If we have reached capacity, grow the storage and patch the descriptor set with new
+ // buffer info pointer
+ growDescriptorCapacity<T, pInfo>(descriptorVector, newSize);
+ }
+ descriptorVector->resize(newSize);
+ return &(*descriptorVector)[oldSize];
+}
+
+VkDescriptorBufferInfo *UpdateDescriptorSetsBuilder::allocDescriptorBufferInfos(size_t count)
+{
+ return allocDescriptorInfos<VkDescriptorBufferInfo, &VkWriteDescriptorSet::pBufferInfo>(
+ &mDescriptorBufferInfos, count);
+}
+
+VkDescriptorImageInfo *UpdateDescriptorSetsBuilder::allocDescriptorImageInfos(size_t count)
+{
+ return allocDescriptorInfos<VkDescriptorImageInfo, &VkWriteDescriptorSet::pImageInfo>(
+ &mDescriptorImageInfos, count);
+}
+
+VkWriteDescriptorSet *UpdateDescriptorSetsBuilder::allocWriteDescriptorSets(size_t count)
+{
+ size_t oldSize = mWriteDescriptorSets.size();
+ size_t newSize = oldSize + count;
+ mWriteDescriptorSets.resize(newSize);
+ return &mWriteDescriptorSets[oldSize];
+}
+
+VkBufferView *UpdateDescriptorSetsBuilder::allocBufferViews(size_t count)
+{
+ return allocDescriptorInfos<VkBufferView, &VkWriteDescriptorSet::pTexelBufferView>(
+ &mBufferViews, count);
+}
+
+uint32_t UpdateDescriptorSetsBuilder::flushDescriptorSetUpdates(VkDevice device)
+{
+ if (mWriteDescriptorSets.empty())
+ {
+ ASSERT(mDescriptorBufferInfos.empty());
+ ASSERT(mDescriptorImageInfos.empty());
+ return 0;
+ }
+
+ vkUpdateDescriptorSets(device, static_cast<uint32_t>(mWriteDescriptorSets.size()),
+ mWriteDescriptorSets.data(), 0, nullptr);
+
+ uint32_t retVal = static_cast<uint32_t>(mWriteDescriptorSets.size());
+
+ mWriteDescriptorSets.clear();
+ mDescriptorBufferInfos.clear();
+ mDescriptorImageInfos.clear();
+ mBufferViews.clear();
+
+ return retVal;
+}
+
// FramebufferCache implementation.
void FramebufferCache::destroy(RendererVk *rendererVk)
{
diff --git a/src/libANGLE/renderer/vulkan/vk_cache_utils.h b/src/libANGLE/renderer/vulkan/vk_cache_utils.h
index 554abeb884..fe9c4e2d08 100644
--- a/src/libANGLE/renderer/vulkan/vk_cache_utils.h
+++ b/src/libANGLE/renderer/vulkan/vk_cache_utils.h
@@ -2657,6 +2657,38 @@ class DescriptorSetCache final : angle::NonCopyable
// There is 1 default uniform binding used per stage.
constexpr uint32_t kReservedPerStageDefaultUniformBindingCount = 1;
+
+class UpdateDescriptorSetsBuilder final : angle::NonCopyable
+{
+ public:
+ UpdateDescriptorSetsBuilder();
+ ~UpdateDescriptorSetsBuilder();
+
+ VkDescriptorBufferInfo *allocDescriptorBufferInfos(size_t count);
+ VkDescriptorImageInfo *allocDescriptorImageInfos(size_t count);
+ VkWriteDescriptorSet *allocWriteDescriptorSets(size_t count);
+ VkBufferView *allocBufferViews(size_t count);
+
+ VkDescriptorBufferInfo &allocDescriptorBufferInfo() { return *allocDescriptorBufferInfos(1); }
+ VkDescriptorImageInfo &allocDescriptorImageInfo() { return *allocDescriptorImageInfos(1); }
+ VkWriteDescriptorSet &allocWriteDescriptorSet() { return *allocWriteDescriptorSets(1); }
+ VkBufferView &allocBufferView() { return *allocBufferViews(1); }
+
+ // Returns the number of written descriptor sets.
+ uint32_t flushDescriptorSetUpdates(VkDevice device);
+
+ private:
+ template <typename T, const T *VkWriteDescriptorSet::*pInfo>
+ T *allocDescriptorInfos(std::vector<T> *descriptorVector, size_t count);
+ template <typename T, const T *VkWriteDescriptorSet::*pInfo>
+ void growDescriptorCapacity(std::vector<T> *descriptorVector, size_t newSize);
+
+ std::vector<VkDescriptorBufferInfo> mDescriptorBufferInfos;
+ std::vector<VkDescriptorImageInfo> mDescriptorImageInfos;
+ std::vector<VkWriteDescriptorSet> mWriteDescriptorSets;
+ std::vector<VkBufferView> mBufferViews;
+};
+
} // namespace rx
#endif // LIBANGLE_RENDERER_VULKAN_VK_CACHE_UTILS_H_
diff --git a/src/libANGLE/renderer/vulkan/vk_helpers.cpp b/src/libANGLE/renderer/vulkan/vk_helpers.cpp
index a6cec0f035..aede21b4e7 100644
--- a/src/libANGLE/renderer/vulkan/vk_helpers.cpp
+++ b/src/libANGLE/renderer/vulkan/vk_helpers.cpp
@@ -1035,6 +1035,15 @@ gl::TexLevelMask AggregateSkipLevels(const gl::CubeFaceArray<gl::TexLevelMask> &
}
return skipLevelsAllFaces;
}
+
+// Get layer mask for a particular image level.
+ImageLayerWriteMask GetImageLayerWriteMask(uint32_t layerStart, uint32_t layerCount)
+{
+ ImageLayerWriteMask layerMask = angle::BitMask<uint64_t>(layerCount);
+ uint32_t rotateShift = layerStart % kMaxParallelLayerWrites;
+ layerMask = (layerMask << rotateShift) | (layerMask >> (kMaxParallelLayerWrites - rotateShift));
+ return layerMask;
+}
} // anonymous namespace
// This is an arbitrary max. We can change this later if necessary.
@@ -1654,8 +1663,10 @@ void CommandBufferHelperCommon::imageWriteImpl(ContextVk *contextVk,
ImageHelper *image)
{
image->onWrite(level, 1, layerStart, layerCount, aspectFlags);
- // Write always requires a barrier
- updateImageLayoutAndBarrier(contextVk, image, aspectFlags, imageLayout);
+ if (image->isWriteBarrierNecessary(imageLayout, level, 1, layerStart, layerCount))
+ {
+ updateImageLayoutAndBarrier(contextVk, image, aspectFlags, imageLayout);
+ }
}
void CommandBufferHelperCommon::updateImageLayoutAndBarrier(Context *context,
@@ -6580,6 +6591,65 @@ bool ImageHelper::isReadBarrierNecessary(ImageLayout newLayout) const
return HasResourceWriteAccess(layoutData.type);
}
+bool ImageHelper::isReadSubresourceBarrierNecessary(ImageLayout newLayout,
+ gl::LevelIndex levelStart,
+ uint32_t levelCount,
+ uint32_t layerStart,
+ uint32_t layerCount) const
+{
+ // In case an image has both read and write permissions, the written subresources since the last
+ // barrier should be checked to avoid RAW and WAR hazards. However, if a layout change is
+ // necessary regardless, there is no need to check the written subresources.
+ if (mCurrentLayout != newLayout)
+ {
+ return true;
+ }
+
+ ImageLayerWriteMask layerMask = GetImageLayerWriteMask(layerStart, layerCount);
+ for (uint32_t levelOffset = 0; levelOffset < levelCount; levelOffset++)
+ {
+ uint32_t level = levelStart.get() + levelOffset;
+ if (areLevelSubresourcesWrittenWithinMaskRange(level, layerMask))
+ {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool ImageHelper::isWriteBarrierNecessary(ImageLayout newLayout,
+ gl::LevelIndex levelStart,
+ uint32_t levelCount,
+ uint32_t layerStart,
+ uint32_t layerCount) const
+{
+ // If transitioning to a different layout, we need always need a barrier.
+ if (mCurrentLayout != newLayout)
+ {
+ return true;
+ }
+
+ if (layerCount >= kMaxParallelLayerWrites)
+ {
+ return true;
+ }
+
+ // If we are writing to the same parts of the image (level/layer), we need a barrier. Otherwise,
+ // it can be done in parallel.
+ ImageLayerWriteMask layerMask = GetImageLayerWriteMask(layerStart, layerCount);
+ for (uint32_t levelOffset = 0; levelOffset < levelCount; levelOffset++)
+ {
+ uint32_t level = levelStart.get() + levelOffset;
+ if (areLevelSubresourcesWrittenWithinMaskRange(level, layerMask))
+ {
+ return true;
+ }
+ }
+
+ return false;
+}
+
void ImageHelper::changeLayoutAndQueue(Context *context,
VkImageAspectFlags aspectMask,
ImageLayout newLayout,
@@ -6740,6 +6810,7 @@ void ImageHelper::barrierImpl(Context *context,
mCurrentLayout = newLayout;
mCurrentQueueFamilyIndex = newQueueFamilyIndex;
+ resetSubresourcesWrittenSinceBarrier();
}
template void ImageHelper::barrierImpl<priv::CommandBuffer>(
@@ -6750,19 +6821,84 @@ template void ImageHelper::barrierImpl<priv::CommandBuffer>(
priv::CommandBuffer *commandBuffer,
VkSemaphore *acquireNextImageSemaphoreOut);
+void ImageHelper::setSubresourcesWrittenSinceBarrier(gl::LevelIndex levelStart,
+ uint32_t levelCount,
+ uint32_t layerStart,
+ uint32_t layerCount)
+{
+ for (uint32_t levelOffset = 0; levelOffset < levelCount; levelOffset++)
+ {
+ uint32_t level = levelStart.get() + levelOffset;
+ if (layerCount >= kMaxParallelLayerWrites)
+ {
+ mSubresourcesWrittenSinceBarrier[level].set();
+ }
+ else
+ {
+ ImageLayerWriteMask layerMask = GetImageLayerWriteMask(layerStart, layerCount);
+ mSubresourcesWrittenSinceBarrier[level] |= layerMask;
+ }
+ }
+}
+
+void ImageHelper::resetSubresourcesWrittenSinceBarrier()
+{
+ for (auto &layerWriteMask : mSubresourcesWrittenSinceBarrier)
+ {
+ layerWriteMask.reset();
+ }
+}
+
void ImageHelper::recordWriteBarrier(Context *context,
VkImageAspectFlags aspectMask,
ImageLayout newLayout,
+ gl::LevelIndex levelStart,
+ uint32_t levelCount,
+ uint32_t layerStart,
+ uint32_t layerCount,
OutsideRenderPassCommandBufferHelper *commands)
{
- VkSemaphore acquireNextImageSemaphore;
- barrierImpl(context, aspectMask, newLayout, context->getRenderer()->getQueueFamilyIndex(),
- &commands->getCommandBuffer(), &acquireNextImageSemaphore);
+ if (isWriteBarrierNecessary(newLayout, levelStart, levelCount, layerStart, layerCount))
+ {
+ VkSemaphore acquireNextImageSemaphore;
+ barrierImpl(context, aspectMask, newLayout, context->getRenderer()->getQueueFamilyIndex(),
+ &commands->getCommandBuffer(), &acquireNextImageSemaphore);
- if (acquireNextImageSemaphore != VK_NULL_HANDLE)
+ if (acquireNextImageSemaphore != VK_NULL_HANDLE)
+ {
+ commands->setAcquireNextImageSemaphore(acquireNextImageSemaphore);
+ }
+ }
+
+ setSubresourcesWrittenSinceBarrier(levelStart, levelCount, layerStart, layerCount);
+}
+
+void ImageHelper::recordReadSubresourceBarrier(Context *context,
+ VkImageAspectFlags aspectMask,
+ ImageLayout newLayout,
+ gl::LevelIndex levelStart,
+ uint32_t levelCount,
+ uint32_t layerStart,
+ uint32_t layerCount,
+ OutsideRenderPassCommandBufferHelper *commands)
+{
+ // This barrier is used for an image with both read/write permissions, including during mipmap
+ // generation and self-copy.
+ if (isReadSubresourceBarrierNecessary(newLayout, levelStart, levelCount, layerStart,
+ layerCount))
{
- commands->setAcquireNextImageSemaphore(acquireNextImageSemaphore);
+ VkSemaphore acquireNextImageSemaphore;
+ barrierImpl(context, aspectMask, newLayout, context->getRenderer()->getQueueFamilyIndex(),
+ &commands->getCommandBuffer(), &acquireNextImageSemaphore);
+
+ if (acquireNextImageSemaphore != VK_NULL_HANDLE)
+ {
+ commands->setAcquireNextImageSemaphore(acquireNextImageSemaphore);
+ }
}
+
+ // Levels/layers being read from are also registered to avoid RAW and WAR hazards.
+ setSubresourcesWrittenSinceBarrier(levelStart, levelCount, layerStart, layerCount);
}
void ImageHelper::recordReadBarrier(Context *context,
@@ -7091,7 +7227,9 @@ angle::Result ImageHelper::CopyImageSubData(const gl::Context *context,
CommandBufferAccess access;
if (srcImage == dstImage)
{
- access.onImageSelfCopy(dstLevelGL, 1, region.dstSubresource.baseArrayLayer,
+ access.onImageSelfCopy(srcLevelGL, 1, region.srcSubresource.baseArrayLayer,
+ region.srcSubresource.layerCount, dstLevelGL, 1,
+ region.dstSubresource.baseArrayLayer,
region.dstSubresource.layerCount, aspectFlags, srcImage);
}
else
@@ -8797,15 +8935,6 @@ angle::Result ImageHelper::flushStagedUpdatesImpl(ContextVk *contextVk,
const gl::TexLevelMask &skipLevelsAllFaces)
{
RendererVk *renderer = contextVk->getRenderer();
- // For each level, upload layers that don't conflict in parallel. The layer is hashed to
- // `layer % 64` and used to track whether that subresource is currently in transfer. If so, a
- // barrier is inserted. If mLayerCount > 64, there will be a few unnecessary barriers.
- //
- // Note: when a barrier is necessary when uploading updates to a level, we could instead move to
- // the next level and continue uploads in parallel. Once all levels need a barrier, a single
- // barrier can be issued and we could continue with the rest of the updates from the first
- // level.
- constexpr uint32_t kMaxParallelSubresourceUpload = 64;
const angle::FormatID &actualformat = getActualFormatID();
const angle::FormatID &intendedFormat = getIntendedFormatID();
@@ -8843,9 +8972,6 @@ angle::Result ImageHelper::flushStagedUpdatesImpl(ContextVk *contextVk,
std::vector<SubresourceUpdate> updatesToKeep;
ASSERT(levelUpdates != nullptr);
- // Hash map of uploads in progress. See comment on kMaxParallelSubresourceUpload.
- uint64_t subresourceUploadsInProgress = 0;
-
for (SubresourceUpdate &update : *levelUpdates)
{
ASSERT(IsClearOfAllChannels(update.updateSource) ||
@@ -8920,35 +9046,34 @@ angle::Result ImageHelper::flushStagedUpdatesImpl(ContextVk *contextVk,
}
}
- // In case of multiple layer updates within the same level, a barrier might be needed if
- // there are multiple updates in the same parts of the image.
- if (updateLayerCount >= kMaxParallelSubresourceUpload)
+ // When a barrier is necessary when uploading updates to a level, we could instead move
+ // to the next level and continue uploads in parallel. Once all levels need a barrier,
+ // a single barrier can be issued and we could continue with the rest of the updates
+ // from the first level. In case of multiple layer updates within the same level, a
+ // barrier might be needed if there are multiple updates in the same parts of the image.
+ ImageLayout barrierLayout =
+ transCoding ? ImageLayout::TransferDstAndComputeWrite : ImageLayout::TransferDst;
+ if (updateLayerCount >= kMaxParallelLayerWrites)
{
// If there are more subresources than bits we can track, always insert a barrier.
- recordWriteBarrier(contextVk, aspectFlags,
- transCoding ? ImageLayout::TransferDstAndComputeWrite
- : ImageLayout::TransferDst,
- commandBuffer);
- subresourceUploadsInProgress = std::numeric_limits<uint64_t>::max();
+ recordWriteBarrier(contextVk, aspectFlags, barrierLayout, updateMipLevelGL, 1,
+ updateBaseLayer, updateLayerCount, commandBuffer);
+ mSubresourcesWrittenSinceBarrier[updateMipLevelGL.get()].set();
}
else
{
- const uint64_t subresourceHashRange = angle::BitMask<uint64_t>(updateLayerCount);
- const uint32_t subresourceHashOffset =
- updateBaseLayer % kMaxParallelSubresourceUpload;
- const uint64_t subresourceHash =
- ANGLE_ROTL64(subresourceHashRange, subresourceHashOffset);
+ ImageLayerWriteMask subresourceHash =
+ GetImageLayerWriteMask(updateBaseLayer, updateLayerCount);
- if ((subresourceUploadsInProgress & subresourceHash) != 0)
+ if (areLevelSubresourcesWrittenWithinMaskRange(updateMipLevelGL.get(),
+ subresourceHash))
{
// If there's overlap in subresource upload, issue a barrier.
- recordWriteBarrier(contextVk, aspectFlags,
- transCoding ? ImageLayout::TransferDstAndComputeWrite
- : ImageLayout::TransferDst,
- commandBuffer);
- subresourceUploadsInProgress = 0;
+ recordWriteBarrier(contextVk, aspectFlags, barrierLayout, updateMipLevelGL, 1,
+ updateBaseLayer, updateLayerCount, commandBuffer);
+ mSubresourcesWrittenSinceBarrier[updateMipLevelGL.get()].reset();
}
- subresourceUploadsInProgress |= subresourceHash;
+ mSubresourcesWrittenSinceBarrier[updateMipLevelGL.get()] |= subresourceHash;
}
// Add the necessary commands to the outside command buffer.
@@ -11477,6 +11602,20 @@ void CommandBufferAccess::onImageWrite(gl::LevelIndex levelStart,
levelCount, layerStart, layerCount);
}
+void CommandBufferAccess::onImageReadSubresources(gl::LevelIndex levelStart,
+ uint32_t levelCount,
+ uint32_t layerStart,
+ uint32_t layerCount,
+ VkImageAspectFlags aspectFlags,
+ ImageLayout imageLayout,
+ ImageHelper *image)
+{
+ ASSERT(!image->isReleasedToExternal());
+ ASSERT(image->getImageSerial().valid());
+ mReadImageSubresources.emplace_back(CommandBufferImageAccess{image, aspectFlags, imageLayout},
+ levelStart, levelCount, layerStart, layerCount);
+}
+
void CommandBufferAccess::onBufferExternalAcquireRelease(BufferHelper *buffer)
{
mExternalAcquireReleaseBuffers.emplace_back(CommandBufferBufferExternalAcquireRelease{buffer});
diff --git a/src/libANGLE/renderer/vulkan/vk_helpers.h b/src/libANGLE/renderer/vulkan/vk_helpers.h
index 073c1b2e62..19f51af647 100644
--- a/src/libANGLE/renderer/vulkan/vk_helpers.h
+++ b/src/libANGLE/renderer/vulkan/vk_helpers.h
@@ -51,6 +51,14 @@ constexpr VkPipelineStageFlags kSwapchainAcquireImageWaitStageFlags =
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | // First use is a draw command.
VK_PIPELINE_STAGE_TRANSFER_BIT; // First use is a clear without scissor.
+// For each level, write layers that don't conflict in parallel. The layer is hashed to
+// `layer % kMaxParallelLayerWrites` and used to track whether that subresource is currently
+// being written. If so, a barrier is inserted; otherwise, the barrier is avoided. If the updated
+// layer count is greater than kMaxParallelLayerWrites, there will be a few unnecessary
+// barriers.
+constexpr uint32_t kMaxParallelLayerWrites = 64;
+using ImageLayerWriteMask = std::bitset<kMaxParallelLayerWrites>;
+
using StagingBufferOffsetArray = std::array<VkDeviceSize, 2>;
// A dynamic buffer is conceptually an infinitely long buffer. Each time you write to the buffer,
@@ -2317,8 +2325,21 @@ class ImageHelper final : public Resource, public angle::Subject
void recordWriteBarrier(Context *context,
VkImageAspectFlags aspectMask,
ImageLayout newLayout,
+ gl::LevelIndex levelStart,
+ uint32_t levelCount,
+ uint32_t layerStart,
+ uint32_t layerCount,
OutsideRenderPassCommandBufferHelper *commands);
+ void recordReadSubresourceBarrier(Context *context,
+ VkImageAspectFlags aspectMask,
+ ImageLayout newLayout,
+ gl::LevelIndex levelStart,
+ uint32_t levelCount,
+ uint32_t layerStart,
+ uint32_t layerCount,
+ OutsideRenderPassCommandBufferHelper *commands);
+
void recordWriteBarrierOneOff(Context *context,
ImageLayout newLayout,
PrimaryCommandBuffer *commandBuffer,
@@ -2330,6 +2351,16 @@ class ImageHelper final : public Resource, public angle::Subject
// This function can be used to prevent issuing redundant layout transition commands.
bool isReadBarrierNecessary(ImageLayout newLayout) const;
+ bool isReadSubresourceBarrierNecessary(ImageLayout newLayout,
+ gl::LevelIndex levelStart,
+ uint32_t levelCount,
+ uint32_t layerStart,
+ uint32_t layerCount) const;
+ bool isWriteBarrierNecessary(ImageLayout newLayout,
+ gl::LevelIndex levelStart,
+ uint32_t levelCount,
+ uint32_t layerStart,
+ uint32_t layerCount) const;
void recordReadBarrier(Context *context,
VkImageAspectFlags aspectMask,
@@ -2648,6 +2679,18 @@ class ImageHelper final : public Resource, public angle::Subject
CommandBufferT *commandBuffer,
VkSemaphore *acquireNextImageSemaphoreOut);
+ void setSubresourcesWrittenSinceBarrier(gl::LevelIndex levelStart,
+ uint32_t levelCount,
+ uint32_t layerStart,
+ uint32_t layerCount);
+
+ void resetSubresourcesWrittenSinceBarrier();
+ bool areLevelSubresourcesWrittenWithinMaskRange(uint32_t level,
+ ImageLayerWriteMask &layerMask) const
+ {
+ return (mSubresourcesWrittenSinceBarrier[level] & layerMask) != 0;
+ }
+
// If the image has emulated channels, we clear them once so as not to leave garbage on those
// channels.
VkColorComponentFlags getEmulatedChannelsMask() const;
@@ -2906,6 +2949,11 @@ class ImageHelper final : public Resource, public angle::Subject
// Only used for swapChain images. This is set when an image is acquired and is waited on
// by the next submission (which uses this image), at which point it is released.
Semaphore mAcquireNextImageSemaphore;
+
+ // Used to track subresource writes per level/layer. This can help parallelize writes to
+ // different levels or layers of the image, such as data uploads.
+ // See comment on kMaxParallelLayerWrites.
+ gl::TexLevelArray<ImageLayerWriteMask> mSubresourcesWrittenSinceBarrier;
};
ANGLE_INLINE bool RenderPassCommandBufferHelper::usesImage(const ImageHelper &image) const
@@ -3372,7 +3420,7 @@ struct CommandBufferImageAccess
VkImageAspectFlags aspectFlags;
ImageLayout imageLayout;
};
-struct CommandBufferImageWrite
+struct CommandBufferImageSubresourceAccess
{
CommandBufferImageAccess access;
gl::LevelIndex levelStart;
@@ -3431,13 +3479,19 @@ class CommandBufferAccess : angle::NonCopyable
onImageWrite(levelStart, levelCount, layerStart, layerCount, aspectFlags,
ImageLayout::TransferDst, image);
}
- void onImageSelfCopy(gl::LevelIndex writeLevelStart,
+ void onImageSelfCopy(gl::LevelIndex readLevelStart,
+ uint32_t readLevelCount,
+ uint32_t readLayerStart,
+ uint32_t readLayerCount,
+ gl::LevelIndex writeLevelStart,
uint32_t writeLevelCount,
uint32_t writeLayerStart,
uint32_t writeLayerCount,
VkImageAspectFlags aspectFlags,
ImageHelper *image)
{
+ onImageReadSubresources(readLevelStart, readLevelCount, readLayerStart, readLayerCount,
+ aspectFlags, ImageLayout::TransferSrcDst, image);
onImageWrite(writeLevelStart, writeLevelCount, writeLayerStart, writeLayerCount,
aspectFlags, ImageLayout::TransferSrcDst, image);
}
@@ -3445,6 +3499,16 @@ class CommandBufferAccess : angle::NonCopyable
{
onImageRead(aspectFlags, ImageLayout::ComputeShaderReadOnly, image);
}
+ void onImageComputeMipmapGenerationRead(gl::LevelIndex levelStart,
+ uint32_t levelCount,
+ uint32_t layerStart,
+ uint32_t layerCount,
+ VkImageAspectFlags aspectFlags,
+ ImageHelper *image)
+ {
+ onImageReadSubresources(levelStart, levelCount, layerStart, layerCount, aspectFlags,
+ ImageLayout::ComputeShaderWrite, image);
+ }
void onImageComputeShaderWrite(gl::LevelIndex levelStart,
uint32_t levelCount,
uint32_t layerStart,
@@ -3471,10 +3535,12 @@ class CommandBufferAccess : angle::NonCopyable
// The limits reflect the current maximum concurrent usage of each resource type. ASSERTs will
// fire if this limit is exceeded in the future.
- using ReadBuffers = angle::FixedVector<CommandBufferBufferAccess, 2>;
- using WriteBuffers = angle::FixedVector<CommandBufferBufferAccess, 2>;
- using ReadImages = angle::FixedVector<CommandBufferImageAccess, 2>;
- using WriteImages = angle::FixedVector<CommandBufferImageWrite, 1>;
+ using ReadBuffers = angle::FixedVector<CommandBufferBufferAccess, 2>;
+ using WriteBuffers = angle::FixedVector<CommandBufferBufferAccess, 2>;
+ using ReadImages = angle::FixedVector<CommandBufferImageAccess, 2>;
+ using WriteImages = angle::FixedVector<CommandBufferImageSubresourceAccess, 1>;
+ using ReadImageSubresources = angle::FixedVector<CommandBufferImageSubresourceAccess, 1>;
+
using ExternalAcquireReleaseBuffers =
angle::FixedVector<CommandBufferBufferExternalAcquireRelease, 1>;
using AccessResources = angle::FixedVector<CommandBufferResourceAccess, 1>;
@@ -3483,6 +3549,7 @@ class CommandBufferAccess : angle::NonCopyable
const WriteBuffers &getWriteBuffers() const { return mWriteBuffers; }
const ReadImages &getReadImages() const { return mReadImages; }
const WriteImages &getWriteImages() const { return mWriteImages; }
+ const ReadImageSubresources &getReadImageSubresources() const { return mReadImageSubresources; }
const ExternalAcquireReleaseBuffers &getExternalAcquireReleaseBuffers() const
{
return mExternalAcquireReleaseBuffers;
@@ -3503,12 +3570,22 @@ class CommandBufferAccess : angle::NonCopyable
VkImageAspectFlags aspectFlags,
ImageLayout imageLayout,
ImageHelper *image);
+
+ void onImageReadSubresources(gl::LevelIndex levelStart,
+ uint32_t levelCount,
+ uint32_t layerStart,
+ uint32_t layerCount,
+ VkImageAspectFlags aspectFlags,
+ ImageLayout imageLayout,
+ ImageHelper *image);
+
void onResourceAccess(Resource *resource);
ReadBuffers mReadBuffers;
WriteBuffers mWriteBuffers;
ReadImages mReadImages;
WriteImages mWriteImages;
+ ReadImageSubresources mReadImageSubresources;
ExternalAcquireReleaseBuffers mExternalAcquireReleaseBuffers;
AccessResources mAccessResources;
};
diff --git a/src/libANGLE/validationCL.cpp b/src/libANGLE/validationCL.cpp
index 9d12a3f0b4..e4675cb18d 100644
--- a/src/libANGLE/validationCL.cpp
+++ b/src/libANGLE/validationCL.cpp
@@ -1163,6 +1163,37 @@ cl_int ValidateBuildProgram(cl_program program,
return CL_INVALID_OPERATION;
}
+ // If program was created with clCreateProgramWithBinary and device does not have a valid
+ // program binary loaded
+ std::vector<size_t> binSizes{prog.getDevices().size()};
+ std::vector<std::vector<unsigned char *>> bins{prog.getDevices().size()};
+ if (IsError(prog.getInfo(ProgramInfo::BinarySizes, binSizes.size() * sizeof(size_t),
+ binSizes.data(), nullptr)))
+ {
+ return CL_INVALID_PROGRAM;
+ }
+ for (size_t i = 0; i < prog.getDevices().size(); ++i)
+ {
+ cl_program_binary_type binType;
+ bins.at(i).resize(binSizes[i]);
+
+ if (IsError(prog.getInfo(ProgramInfo::Binaries, sizeof(unsigned char *) * bins.size(),
+ bins.data(), nullptr)))
+ {
+ return CL_INVALID_VALUE;
+ }
+ if (IsError(prog.getBuildInfo(prog.getDevices()[i]->getNative(),
+ ProgramBuildInfo::BinaryType, sizeof(cl_program_binary_type),
+ &binType, nullptr)))
+ {
+ return CL_INVALID_VALUE;
+ }
+ if ((binType != CL_PROGRAM_BINARY_TYPE_NONE) && bins[i].empty())
+ {
+ return CL_INVALID_BINARY;
+ }
+ }
+
return CL_SUCCESS;
}
@@ -1201,6 +1232,18 @@ cl_int ValidateGetProgramInfo(cl_program program,
break;
}
+ // CL_INVALID_VALUE if size in bytes specified by param_value_size is < size of return type
+ // as described in the Program Object Queries table and param_value is not NULL.
+ if (param_value != nullptr)
+ {
+ size_t valueSizeRet = 0;
+ if (IsError(prog.getInfo(param_name, 0, nullptr, &valueSizeRet)) ||
+ param_value_size < valueSizeRet)
+ {
+ return CL_INVALID_VALUE;
+ }
+ }
+
return CL_SUCCESS;
}
@@ -1241,6 +1284,18 @@ cl_int ValidateGetProgramBuildInfo(cl_program program,
break;
}
+ // CL_INVALID_VALUE if size in bytes specified by param_value_size is < size of return type
+ // as described in the Program Object Queries table and param_value is not NULL.
+ if (param_value != nullptr)
+ {
+ size_t valueSizeRet = 0;
+ if (IsError(prog.getBuildInfo(device, param_name, 0, nullptr, &valueSizeRet)) ||
+ param_value_size < valueSizeRet)
+ {
+ return CL_INVALID_VALUE;
+ }
+ }
+
return CL_SUCCESS;
}