diff options
Diffstat (limited to 'layers/gpu_validation.cpp')
-rw-r--r-- | layers/gpu_validation.cpp | 413 |
1 files changed, 293 insertions, 120 deletions
diff --git a/layers/gpu_validation.cpp b/layers/gpu_validation.cpp index effb01137..38737a956 100644 --- a/layers/gpu_validation.cpp +++ b/layers/gpu_validation.cpp @@ -38,6 +38,10 @@ // This is the number of bindings in the debug descriptor set. static const uint32_t kNumBindingsInSet = 2; +static const VkShaderStageFlags kShaderStageAllRayTracing = + VK_SHADER_STAGE_ANY_HIT_BIT_NV | VK_SHADER_STAGE_CALLABLE_BIT_NV | VK_SHADER_STAGE_CLOSEST_HIT_BIT_NV | + VK_SHADER_STAGE_INTERSECTION_BIT_NV | VK_SHADER_STAGE_MISS_BIT_NV | VK_SHADER_STAGE_RAYGEN_BIT_NV; + // Implementation for Descriptor Set Manager class GpuDescriptorSetManager::GpuDescriptorSetManager(CoreChecks *dev_data) { dev_data_ = dev_data; } @@ -191,7 +195,8 @@ VkResult CoreChecks::GpuInitializeVma() { VmaAllocatorCreateInfo allocatorInfo = {}; allocatorInfo.device = device; ValidationObject *device_object = GetLayerDataPtr(get_dispatch_key(allocatorInfo.device), layer_data_map); - ValidationObject *validation_data = GetValidationObject(device_object->object_dispatch, LayerObjectTypeCoreValidation); + ValidationObject *validation_data = + ValidationObject::GetValidationObject(device_object->object_dispatch, LayerObjectTypeCoreValidation); CoreChecks *core_checks = static_cast<CoreChecks *>(validation_data); allocatorInfo.physicalDevice = core_checks->physical_device; @@ -225,17 +230,29 @@ void CoreChecks::ReportSetupProblem(VkDebugReportObjectTypeEXT object_type, uint } // Turn on necessary device features. -void CoreChecks::GpuPreCallRecordCreateDevice(VkPhysicalDevice gpu, std::unique_ptr<safe_VkDeviceCreateInfo> &create_info, +void CoreChecks::GpuPreCallRecordCreateDevice(VkPhysicalDevice gpu, safe_VkDeviceCreateInfo *modified_create_info, VkPhysicalDeviceFeatures *supported_features) { if (supported_features->fragmentStoresAndAtomics || supported_features->vertexPipelineStoresAndAtomics) { - VkPhysicalDeviceFeatures new_features = {}; - if (create_info->pEnabledFeatures) { - new_features = *create_info->pEnabledFeatures; + VkPhysicalDeviceFeatures *features = nullptr; + if (modified_create_info->pEnabledFeatures) { + // If pEnabledFeatures, VkPhysicalDeviceFeatures2 in pNext chain is not allowed + features = const_cast<VkPhysicalDeviceFeatures *>(modified_create_info->pEnabledFeatures); + } else { + VkPhysicalDeviceFeatures2 *features2 = nullptr; + features2 = + const_cast<VkPhysicalDeviceFeatures2 *>(lvl_find_in_chain<VkPhysicalDeviceFeatures2>(modified_create_info->pNext)); + if (features2) features = &features2->features; + } + if (features) { + features->fragmentStoresAndAtomics = supported_features->fragmentStoresAndAtomics; + features->vertexPipelineStoresAndAtomics = supported_features->vertexPipelineStoresAndAtomics; + } else { + VkPhysicalDeviceFeatures new_features = {}; + new_features.fragmentStoresAndAtomics = supported_features->fragmentStoresAndAtomics; + new_features.vertexPipelineStoresAndAtomics = supported_features->vertexPipelineStoresAndAtomics; + delete modified_create_info->pEnabledFeatures; + modified_create_info->pEnabledFeatures = new VkPhysicalDeviceFeatures(new_features); } - new_features.fragmentStoresAndAtomics = supported_features->fragmentStoresAndAtomics; - new_features.vertexPipelineStoresAndAtomics = supported_features->vertexPipelineStoresAndAtomics; - delete create_info->pEnabledFeatures; - create_info->pEnabledFeatures = new VkPhysicalDeviceFeatures(new_features); } } @@ -289,14 +306,14 @@ void CoreChecks::GpuPostCallRecordCreateDevice(const CHECK_ENABLED *enables, con 0, // output VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, - VK_SHADER_STAGE_ALL_GRAPHICS | VK_SHADER_STAGE_COMPUTE_BIT, + VK_SHADER_STAGE_ALL_GRAPHICS | VK_SHADER_STAGE_COMPUTE_BIT | kShaderStageAllRayTracing, NULL, }, { 1, // input VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, - VK_SHADER_STAGE_ALL_GRAPHICS | VK_SHADER_STAGE_COMPUTE_BIT, + VK_SHADER_STAGE_ALL_GRAPHICS | VK_SHADER_STAGE_COMPUTE_BIT | kShaderStageAllRayTracing, NULL, }, }; @@ -332,15 +349,17 @@ void CoreChecks::GpuPostCallRecordCreateDevice(const CHECK_ENABLED *enables, con // Clean up device-related resources void CoreChecks::GpuPreCallRecordDestroyDevice() { - if (gpu_validation_state->barrier_command_buffer) { - DispatchFreeCommandBuffers(device, gpu_validation_state->barrier_command_pool, 1, - &gpu_validation_state->barrier_command_buffer); - gpu_validation_state->barrier_command_buffer = VK_NULL_HANDLE; - } - if (gpu_validation_state->barrier_command_pool) { - DispatchDestroyCommandPool(device, gpu_validation_state->barrier_command_pool, NULL); - gpu_validation_state->barrier_command_pool = VK_NULL_HANDLE; + for (auto &queue_barrier_command_info_kv : gpu_validation_state->queue_barrier_command_infos) { + GpuQueueBarrierCommandInfo &queue_barrier_command_info = queue_barrier_command_info_kv.second; + + DispatchFreeCommandBuffers(device, queue_barrier_command_info.barrier_command_pool, 1, + &queue_barrier_command_info.barrier_command_buffer); + queue_barrier_command_info.barrier_command_buffer = VK_NULL_HANDLE; + + DispatchDestroyCommandPool(device, queue_barrier_command_info.barrier_command_pool, NULL); + queue_barrier_command_info.barrier_command_pool = VK_NULL_HANDLE; } + gpu_validation_state->queue_barrier_command_infos.clear(); if (gpu_validation_state->debug_desc_layout) { DispatchDestroyDescriptorSetLayout(device, gpu_validation_state->debug_desc_layout, NULL); gpu_validation_state->debug_desc_layout = VK_NULL_HANDLE; @@ -434,44 +453,89 @@ std::vector<safe_VkGraphicsPipelineCreateInfo> CoreChecks::GpuPreCallRecordCreat VkPipelineCache pipelineCache, uint32_t count, const VkGraphicsPipelineCreateInfo *pCreateInfos, const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines, std::vector<std::unique_ptr<PIPELINE_STATE>> &pipe_state) { std::vector<safe_VkGraphicsPipelineCreateInfo> new_pipeline_create_infos; - - GpuPreCallRecordPipelineCreations(count, pCreateInfos, nullptr, pAllocator, pPipelines, pipe_state, &new_pipeline_create_infos, - nullptr, VK_PIPELINE_BIND_POINT_GRAPHICS); + GpuPreCallRecordPipelineCreations(count, pCreateInfos, pAllocator, pPipelines, pipe_state, &new_pipeline_create_infos, + VK_PIPELINE_BIND_POINT_GRAPHICS); return new_pipeline_create_infos; } std::vector<safe_VkComputePipelineCreateInfo> CoreChecks::GpuPreCallRecordCreateComputePipelines( VkPipelineCache pipelineCache, uint32_t count, const VkComputePipelineCreateInfo *pCreateInfos, const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines, std::vector<std::unique_ptr<PIPELINE_STATE>> &pipe_state) { std::vector<safe_VkComputePipelineCreateInfo> new_pipeline_create_infos; - GpuPreCallRecordPipelineCreations(count, nullptr, pCreateInfos, pAllocator, pPipelines, pipe_state, nullptr, - &new_pipeline_create_infos, VK_PIPELINE_BIND_POINT_COMPUTE); + GpuPreCallRecordPipelineCreations(count, pCreateInfos, pAllocator, pPipelines, pipe_state, &new_pipeline_create_infos, + VK_PIPELINE_BIND_POINT_COMPUTE); return new_pipeline_create_infos; } +std::vector<safe_VkRayTracingPipelineCreateInfoNV> CoreChecks::GpuPreCallRecordCreateRayTracingPipelinesNV( + VkPipelineCache pipelineCache, uint32_t count, const VkRayTracingPipelineCreateInfoNV *pCreateInfos, + const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines, std::vector<std::unique_ptr<PIPELINE_STATE>> &pipe_state) { + std::vector<safe_VkRayTracingPipelineCreateInfoNV> new_pipeline_create_infos; + GpuPreCallRecordPipelineCreations(count, pCreateInfos, pAllocator, pPipelines, pipe_state, &new_pipeline_create_infos, + VK_PIPELINE_BIND_POINT_RAY_TRACING_NV); + return new_pipeline_create_infos; +} +template <typename CreateInfo> +struct CreatePipelineTraits {}; +template <> +struct CreatePipelineTraits<VkGraphicsPipelineCreateInfo> { + using SafeType = safe_VkGraphicsPipelineCreateInfo; + static const SafeType &GetPipelineCI(const PIPELINE_STATE *pipeline_state) { return pipeline_state->graphicsPipelineCI; } + static uint32_t GetStageCount(const VkGraphicsPipelineCreateInfo &createInfo) { return createInfo.stageCount; } + static VkShaderModule GetShaderModule(const VkGraphicsPipelineCreateInfo &createInfo, uint32_t stage) { + return createInfo.pStages[stage].module; + } + static void SetShaderModule(SafeType *createInfo, VkShaderModule shader_module, uint32_t stage) { + createInfo->pStages[stage].module = shader_module; + } +}; + +template <> +struct CreatePipelineTraits<VkComputePipelineCreateInfo> { + using SafeType = safe_VkComputePipelineCreateInfo; + static const SafeType &GetPipelineCI(const PIPELINE_STATE *pipeline_state) { return pipeline_state->computePipelineCI; } + static uint32_t GetStageCount(const VkComputePipelineCreateInfo &createInfo) { return 1; } + static VkShaderModule GetShaderModule(const VkComputePipelineCreateInfo &createInfo, uint32_t stage) { + return createInfo.stage.module; + } + static void SetShaderModule(SafeType *createInfo, VkShaderModule shader_module, uint32_t stage) { + assert(stage == 0); + createInfo->stage.module = shader_module; + } +}; +template <> +struct CreatePipelineTraits<VkRayTracingPipelineCreateInfoNV> { + using SafeType = safe_VkRayTracingPipelineCreateInfoNV; + static const SafeType &GetPipelineCI(const PIPELINE_STATE *pipeline_state) { return pipeline_state->raytracingPipelineCI; } + static uint32_t GetStageCount(const VkRayTracingPipelineCreateInfoNV &createInfo) { return createInfo.stageCount; } + static VkShaderModule GetShaderModule(const VkRayTracingPipelineCreateInfoNV &createInfo, uint32_t stage) { + return createInfo.pStages[stage].module; + } + static void SetShaderModule(SafeType *createInfo, VkShaderModule shader_module, uint32_t stage) { + createInfo->pStages[stage].module = shader_module; + } +}; // Examine the pipelines to see if they use the debug descriptor set binding index. // If any do, create new non-instrumented shader modules and use them to replace the instrumented // shaders in the pipeline. Return the (possibly) modified create infos to the caller. -void CoreChecks::GpuPreCallRecordPipelineCreations( - uint32_t count, const VkGraphicsPipelineCreateInfo *pGraphicsCreateInfos, - const VkComputePipelineCreateInfo *pComputeCreateInfos, const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines, - std::vector<std::unique_ptr<PIPELINE_STATE>> &pipe_state, - std::vector<safe_VkGraphicsPipelineCreateInfo> *new_graphics_pipeline_create_infos, - std::vector<safe_VkComputePipelineCreateInfo> *new_compute_pipeline_create_infos, const VkPipelineBindPoint bind_point) { - if (bind_point != VK_PIPELINE_BIND_POINT_GRAPHICS && bind_point != VK_PIPELINE_BIND_POINT_COMPUTE) { +template <typename CreateInfo, typename SafeCreateInfo> +void CoreChecks::GpuPreCallRecordPipelineCreations(uint32_t count, const CreateInfo *pCreateInfos, + const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines, + std::vector<std::unique_ptr<PIPELINE_STATE>> &pipe_state, + std::vector<SafeCreateInfo> *new_pipeline_create_infos, + const VkPipelineBindPoint bind_point) { + using Accessor = CreatePipelineTraits<CreateInfo>; + if (bind_point != VK_PIPELINE_BIND_POINT_GRAPHICS && bind_point != VK_PIPELINE_BIND_POINT_COMPUTE && + bind_point != VK_PIPELINE_BIND_POINT_RAY_TRACING_NV) { return; } - bool graphics_pipeline = (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS); // Walk through all the pipelines, make a copy of each and flag each pipeline that contains a shader that uses the debug // descriptor set index. for (uint32_t pipeline = 0; pipeline < count; ++pipeline) { - auto stageCount = graphics_pipeline ? pGraphicsCreateInfos[pipeline].stageCount : 1; - bool replace_shaders = false; - if (graphics_pipeline) - new_graphics_pipeline_create_infos->push_back(pipe_state[pipeline]->graphicsPipelineCI); - else - new_compute_pipeline_create_infos->push_back(pipe_state[pipeline]->computePipelineCI); + uint32_t stageCount = Accessor::GetStageCount(pCreateInfos[pipeline]); + new_pipeline_create_infos->push_back(Accessor::GetPipelineCI(pipe_state[pipeline].get())); + bool replace_shaders = false; if (pipe_state[pipeline]->active_slots.find(gpu_validation_state->desc_set_bind_index) != pipe_state[pipeline]->active_slots.end()) { replace_shaders = true; @@ -484,11 +548,8 @@ void CoreChecks::GpuPreCallRecordPipelineCreations( if (replace_shaders) { for (uint32_t stage = 0; stage < stageCount; ++stage) { - const SHADER_MODULE_STATE *shader; - if (graphics_pipeline) - shader = GetShaderModuleState(pGraphicsCreateInfos[pipeline].pStages[stage].module); - else - shader = GetShaderModuleState(pComputeCreateInfos[pipeline].stage.module); + const SHADER_MODULE_STATE *shader = GetShaderModuleState(Accessor::GetShaderModule(pCreateInfos[pipeline], stage)); + VkShaderModuleCreateInfo create_info = {}; VkShaderModule shader_module; create_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; @@ -496,14 +557,10 @@ void CoreChecks::GpuPreCallRecordPipelineCreations( create_info.codeSize = shader->words.size() * sizeof(uint32_t); VkResult result = DispatchCreateShaderModule(device, &create_info, pAllocator, &shader_module); if (result == VK_SUCCESS) { - if (graphics_pipeline) - new_graphics_pipeline_create_infos[pipeline].data()->pStages[stage].module = shader_module; - else - new_compute_pipeline_create_infos[pipeline].data()->stage.module = shader_module; + Accessor::SetShaderModule(new_pipeline_create_infos[pipeline].data(), shader_module, stage); } else { - ReportSetupProblem(VK_DEBUG_REPORT_OBJECT_TYPE_SHADER_MODULE_EXT, - (graphics_pipeline) ? HandleToUint64(pGraphicsCreateInfos[pipeline].pStages[stage].module) - : HandleToUint64(pComputeCreateInfos[pipeline].stage.module), + uint64_t moduleHandle = HandleToUint64(Accessor::GetShaderModule(pCreateInfos[pipeline], stage)); + ReportSetupProblem(VK_DEBUG_REPORT_OBJECT_TYPE_SHADER_MODULE_EXT, moduleHandle, "Unable to replace instrumented shader with non-instrumented one. " "Device could become unstable."); } @@ -514,37 +571,66 @@ void CoreChecks::GpuPreCallRecordPipelineCreations( void CoreChecks::GpuPostCallRecordCreateGraphicsPipelines(const uint32_t count, const VkGraphicsPipelineCreateInfo *pCreateInfos, const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines) { - GpuPostCallRecordPipelineCreations(count, pCreateInfos, nullptr, pAllocator, pPipelines, VK_PIPELINE_BIND_POINT_GRAPHICS); + GpuPostCallRecordPipelineCreations(count, pCreateInfos, pAllocator, pPipelines, VK_PIPELINE_BIND_POINT_GRAPHICS); } void CoreChecks::GpuPostCallRecordCreateComputePipelines(const uint32_t count, const VkComputePipelineCreateInfo *pCreateInfos, const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines) { - GpuPostCallRecordPipelineCreations(count, nullptr, pCreateInfos, pAllocator, pPipelines, VK_PIPELINE_BIND_POINT_GRAPHICS); + GpuPostCallRecordPipelineCreations(count, pCreateInfos, pAllocator, pPipelines, VK_PIPELINE_BIND_POINT_COMPUTE); +} +void CoreChecks::GpuPostCallRecordCreateRayTracingPipelinesNV(const uint32_t count, + const VkRayTracingPipelineCreateInfoNV *pCreateInfos, + const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines) { + GpuPostCallRecordPipelineCreations(count, pCreateInfos, pAllocator, pPipelines, VK_PIPELINE_BIND_POINT_RAY_TRACING_NV); } + // For every pipeline: // - For every shader in a pipeline: // - If the shader had to be replaced in PreCallRecord (because the pipeline is using the debug desc set index): // - Destroy it since it has been bound into the pipeline by now. This is our only chance to delete it. // - Track the shader in the shader_map // - Save the shader binary if it contains debug code -void CoreChecks::GpuPostCallRecordPipelineCreations(const uint32_t count, const VkGraphicsPipelineCreateInfo *pGraphicsCreateInfos, - const VkComputePipelineCreateInfo *pComputeCreateInfos, +template <typename CreateInfo> +void CoreChecks::GpuPostCallRecordPipelineCreations(const uint32_t count, const CreateInfo *pCreateInfos, const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines, const VkPipelineBindPoint bind_point) { - if (bind_point != VK_PIPELINE_BIND_POINT_GRAPHICS && bind_point != VK_PIPELINE_BIND_POINT_COMPUTE) { + using Accessor = CreatePipelineTraits<CreateInfo>; + if (bind_point != VK_PIPELINE_BIND_POINT_GRAPHICS && bind_point != VK_PIPELINE_BIND_POINT_COMPUTE && + bind_point != VK_PIPELINE_BIND_POINT_RAY_TRACING_NV) { return; } for (uint32_t pipeline = 0; pipeline < count; ++pipeline) { - auto pipeline_state = GetPipelineState(pPipelines[pipeline]); + auto pipeline_state = ValidationStateTracker::GetPipelineState(pPipelines[pipeline]); if (nullptr == pipeline_state) continue; - for (uint32_t stage = 0; stage < pipeline_state->graphicsPipelineCI.stageCount; ++stage) { + + uint32_t stageCount = 0; + if (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) { + stageCount = pipeline_state->graphicsPipelineCI.stageCount; + } else if (bind_point == VK_PIPELINE_BIND_POINT_COMPUTE) { + stageCount = 1; + } else if (bind_point == VK_PIPELINE_BIND_POINT_RAY_TRACING_NV) { + stageCount = pipeline_state->raytracingPipelineCI.stageCount; + } else { + assert(false); + } + + for (uint32_t stage = 0; stage < stageCount; ++stage) { if (pipeline_state->active_slots.find(gpu_validation_state->desc_set_bind_index) != pipeline_state->active_slots.end()) { - if (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) - DispatchDestroyShaderModule(device, pGraphicsCreateInfos->pStages[stage].module, pAllocator); - else - DispatchDestroyShaderModule(device, pComputeCreateInfos->stage.module, pAllocator); + DispatchDestroyShaderModule(device, Accessor::GetShaderModule(pCreateInfos[pipeline], stage), pAllocator); + } + + const SHADER_MODULE_STATE *shader_state = nullptr; + if (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) { + shader_state = GetShaderModuleState(pipeline_state->graphicsPipelineCI.pStages[stage].module); + } else if (bind_point == VK_PIPELINE_BIND_POINT_COMPUTE) { + assert(stage == 0); + shader_state = GetShaderModuleState(pipeline_state->computePipelineCI.stage.module); + } else if (bind_point == VK_PIPELINE_BIND_POINT_RAY_TRACING_NV) { + shader_state = GetShaderModuleState(pipeline_state->raytracingPipelineCI.pStages[stage].module); + } else { + assert(false); } - auto shader_state = GetShaderModuleState(pipeline_state->graphicsPipelineCI.pStages[stage].module); + std::vector<unsigned int> code; // Save the shader binary if debug info is present. // The core_validation ShaderModule tracker saves the binary too, but discards it when the ShaderModule @@ -561,8 +647,18 @@ void CoreChecks::GpuPostCallRecordPipelineCreations(const uint32_t count, const gpu_validation_state->shader_map[shader_state->gpu_validation_shader_id].pipeline = pipeline_state->pipeline; // Be careful to use the originally bound (instrumented) shader here, even if PreCallRecord had to back it // out with a non-instrumented shader. The non-instrumented shader (found in pCreateInfo) was destroyed above. - gpu_validation_state->shader_map[shader_state->gpu_validation_shader_id].shader_module = - pipeline_state->graphicsPipelineCI.pStages[stage].module; + VkShaderModule shader_module = VK_NULL_HANDLE; + if (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) { + shader_module = pipeline_state->graphicsPipelineCI.pStages[stage].module; + } else if (bind_point == VK_PIPELINE_BIND_POINT_COMPUTE) { + assert(stage == 0); + shader_module = pipeline_state->computePipelineCI.stage.module; + } else if (bind_point == VK_PIPELINE_BIND_POINT_RAY_TRACING_NV) { + shader_module = pipeline_state->raytracingPipelineCI.pStages[stage].module; + } else { + assert(false); + } + gpu_validation_state->shader_map[shader_state->gpu_validation_shader_id].shader_module = shader_module; gpu_validation_state->shader_map[shader_state->gpu_validation_shader_id].pgm = std::move(code); } } @@ -629,28 +725,52 @@ static void GenerateStageMessage(const uint32_t *debug_record, std::string &msg) using namespace spvtools; std::ostringstream strm; switch (debug_record[kInstCommonOutStageIdx]) { - case 0: { + case spv::ExecutionModelVertex: { strm << "Stage = Vertex. Vertex Index = " << debug_record[kInstVertOutVertexIndex] << " Instance Index = " << debug_record[kInstVertOutInstanceIndex] << ". "; } break; - case 1: { + case spv::ExecutionModelTessellationControl: { strm << "Stage = Tessellation Control. Invocation ID = " << debug_record[kInstTessOutInvocationId] << ". "; } break; - case 2: { + case spv::ExecutionModelTessellationEvaluation: { strm << "Stage = Tessellation Eval. Invocation ID = " << debug_record[kInstTessOutInvocationId] << ". "; } break; - case 3: { + case spv::ExecutionModelGeometry: { strm << "Stage = Geometry. Primitive ID = " << debug_record[kInstGeomOutPrimitiveId] << " Invocation ID = " << debug_record[kInstGeomOutInvocationId] << ". "; } break; - case 4: { + case spv::ExecutionModelFragment: { strm << "Stage = Fragment. Fragment coord (x,y) = (" << *reinterpret_cast<const float *>(&debug_record[kInstFragOutFragCoordX]) << ", " << *reinterpret_cast<const float *>(&debug_record[kInstFragOutFragCoordY]) << "). "; } break; - case 5: { + case spv::ExecutionModelGLCompute: { strm << "Stage = Compute. Global invocation ID = " << debug_record[kInstCompOutGlobalInvocationId] << ". "; } break; + case spv::ExecutionModelRayGenerationNV: { + strm << "Stage = Ray Generation. Global Launch ID (x,y,z) = (" << debug_record[kInstRayTracingOutLaunchIdX] << ", " + << debug_record[kInstRayTracingOutLaunchIdY] << ", " << debug_record[kInstRayTracingOutLaunchIdZ] << "). "; + } break; + case spv::ExecutionModelIntersectionNV: { + strm << "Stage = Intersection. Global Launch ID (x,y,z) = (" << debug_record[kInstRayTracingOutLaunchIdX] << ", " + << debug_record[kInstRayTracingOutLaunchIdY] << ", " << debug_record[kInstRayTracingOutLaunchIdZ] << "). "; + } break; + case spv::ExecutionModelAnyHitNV: { + strm << "Stage = Any Hit. Global Launch ID (x,y,z) = (" << debug_record[kInstRayTracingOutLaunchIdX] << ", " + << debug_record[kInstRayTracingOutLaunchIdY] << ", " << debug_record[kInstRayTracingOutLaunchIdZ] << "). "; + } break; + case spv::ExecutionModelClosestHitNV: { + strm << "Stage = Closest Hit. Global Launch ID (x,y,z) = (" << debug_record[kInstRayTracingOutLaunchIdX] << ", " + << debug_record[kInstRayTracingOutLaunchIdY] << ", " << debug_record[kInstRayTracingOutLaunchIdZ] << "). "; + } break; + case spv::ExecutionModelMissNV: { + strm << "Stage = Miss. Global Launch ID (x,y,z) = (" << debug_record[kInstRayTracingOutLaunchIdX] << ", " + << debug_record[kInstRayTracingOutLaunchIdY] << ", " << debug_record[kInstRayTracingOutLaunchIdZ] << "). "; + } break; + case spv::ExecutionModelCallableNV: { + strm << "Stage = Callable. Global Launch ID (x,y,z) = (" << debug_record[kInstRayTracingOutLaunchIdX] << ", " + << debug_record[kInstRayTracingOutLaunchIdY] << ", " << debug_record[kInstRayTracingOutLaunchIdZ] << "). "; + } break; default: { strm << "Internal Error (unexpected stage = " << debug_record[kInstCommonOutStageIdx] << "). "; assert(false); @@ -693,7 +813,8 @@ static std::string LookupDebugUtilsName(const debug_report_data *report_data, co // Generate message from the common portion of the debug report record. static void GenerateCommonMessage(const debug_report_data *report_data, const CMD_BUFFER_STATE *cb_node, const uint32_t *debug_record, const VkShaderModule shader_module_handle, - const VkPipeline pipeline_handle, const uint32_t draw_index, std::string &msg) { + const VkPipeline pipeline_handle, const VkPipelineBindPoint pipeline_bind_point, + const uint32_t operation_index, std::string &msg) { using namespace spvtools; std::ostringstream strm; if (shader_module_handle == VK_NULL_HANDLE) { @@ -704,8 +825,18 @@ static void GenerateCommonMessage(const debug_report_data *report_data, const CM } else { strm << std::hex << std::showbase << "Command buffer " << LookupDebugUtilsName(report_data, HandleToUint64(cb_node->commandBuffer)) << "(" - << HandleToUint64(cb_node->commandBuffer) << "). " - << "Draw Index " << draw_index << ". " + << HandleToUint64(cb_node->commandBuffer) << "). "; + if (pipeline_bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) { + strm << "Draw "; + } else if (pipeline_bind_point == VK_PIPELINE_BIND_POINT_COMPUTE) { + strm << "Compute "; + } else if (pipeline_bind_point == VK_PIPELINE_BIND_POINT_RAY_TRACING_NV) { + strm << "Ray Trace "; + } else { + assert(false); + strm << "Unknown Pipeline Operation "; + } + strm << "Index " << operation_index << ". " << "Pipeline " << LookupDebugUtilsName(report_data, HandleToUint64(pipeline_handle)) << "(" << HandleToUint64(pipeline_handle) << "). " << "Shader Module " << LookupDebugUtilsName(report_data, HandleToUint64(shader_module_handle)) << "(" @@ -932,8 +1063,8 @@ static void GenerateSourceMessages(const std::vector<unsigned int> &pgm, const u // sure it is available when the pipeline is submitted. (The ShaderModule tracking object also // keeps a copy, but it can be destroyed after the pipeline is created and before it is submitted.) // -void CoreChecks::AnalyzeAndReportError(CMD_BUFFER_STATE *cb_node, VkQueue queue, uint32_t draw_index, - uint32_t *const debug_output_buffer) { +void CoreChecks::AnalyzeAndReportError(CMD_BUFFER_STATE *cb_node, VkQueue queue, VkPipelineBindPoint pipeline_bind_point, + uint32_t operation_index, uint32_t *const debug_output_buffer) { using namespace spvtools; const uint32_t total_words = debug_output_buffer[0]; // A zero here means that the shader instrumentation didn't write anything. @@ -971,7 +1102,8 @@ void CoreChecks::AnalyzeAndReportError(CMD_BUFFER_STATE *cb_node, VkQueue queue, } GenerateValidationMessage(debug_record, validation_message, vuid_msg); GenerateStageMessage(debug_record, stage_message); - GenerateCommonMessage(report_data, cb_node, debug_record, shader_module_handle, pipeline_handle, draw_index, common_message); + GenerateCommonMessage(report_data, cb_node, debug_record, shader_module_handle, pipeline_handle, pipeline_bind_point, + operation_index, common_message); GenerateSourceMessages(pgm, debug_record, filename_message, source_message); log_msg(report_data, VK_DEBUG_REPORT_ERROR_BIT_EXT, VK_DEBUG_REPORT_OBJECT_TYPE_QUEUE_EXT, HandleToUint64(queue), vuid_msg.c_str(), "%s %s %s %s%s", validation_message.c_str(), common_message.c_str(), stage_message.c_str(), @@ -985,19 +1117,41 @@ void CoreChecks::AnalyzeAndReportError(CMD_BUFFER_STATE *cb_node, VkQueue queue, // For the given command buffer, map its debug data buffers and read their contents for analysis. void CoreChecks::ProcessInstrumentationBuffer(VkQueue queue, CMD_BUFFER_STATE *cb_node) { auto gpu_buffer_list = gpu_validation_state->GetGpuBufferInfo(cb_node->commandBuffer); - if (cb_node && cb_node->hasDrawCmd && gpu_buffer_list.size() > 0) { + if (cb_node && (cb_node->hasDrawCmd || cb_node->hasTraceRaysCmd || cb_node->hasDispatchCmd) && gpu_buffer_list.size() > 0) { VkResult result; char *pData; uint32_t draw_index = 0; + uint32_t compute_index = 0; + uint32_t ray_trace_index = 0; for (auto &buffer_info : gpu_buffer_list) { result = vmaMapMemory(gpu_validation_state->vmaAllocator, buffer_info.output_mem_block.allocation, (void **)&pData); // Analyze debug output buffer if (result == VK_SUCCESS) { - AnalyzeAndReportError(cb_node, queue, draw_index, (uint32_t *)pData); + uint32_t operation_index = 0; + if (buffer_info.pipeline_bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) { + operation_index = draw_index; + } else if (buffer_info.pipeline_bind_point == VK_PIPELINE_BIND_POINT_COMPUTE) { + operation_index = compute_index; + } else if (buffer_info.pipeline_bind_point == VK_PIPELINE_BIND_POINT_RAY_TRACING_NV) { + operation_index = ray_trace_index; + } else { + assert(false); + } + + AnalyzeAndReportError(cb_node, queue, buffer_info.pipeline_bind_point, operation_index, (uint32_t *)pData); vmaUnmapMemory(gpu_validation_state->vmaAllocator, buffer_info.output_mem_block.allocation); } - draw_index++; + + if (buffer_info.pipeline_bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) { + draw_index++; + } else if (buffer_info.pipeline_bind_point == VK_PIPELINE_BIND_POINT_COMPUTE) { + compute_index++; + } else if (buffer_info.pipeline_bind_point == VK_PIPELINE_BIND_POINT_RAY_TRACING_NV) { + ray_trace_index++; + } else { + assert(false); + } } } } @@ -1023,74 +1177,71 @@ void CoreChecks::UpdateInstrumentationBuffer(CMD_BUFFER_STATE *cb_node) { // Submit a memory barrier on graphics queues. // Lazy-create and record the needed command buffer. void CoreChecks::SubmitBarrier(VkQueue queue) { - uint32_t queue_family_index = 0; + auto queue_barrier_command_info_it = + gpu_validation_state->queue_barrier_command_infos.emplace(queue, GpuQueueBarrierCommandInfo{}); + if (queue_barrier_command_info_it.second) { + GpuQueueBarrierCommandInfo &quere_barrier_command_info = queue_barrier_command_info_it.first->second; - auto it = queueMap.find(queue); - if (it != queueMap.end()) { - queue_family_index = it->second.queueFamilyIndex; - } + uint32_t queue_family_index = 0; - // Pay attention only to queues that support graphics. - // This ensures that the command buffer pool is created so that it can be used on a graphics queue. - VkQueueFlags queue_flags = GetPhysicalDeviceState()->queue_family_properties[queue_family_index].queueFlags; - if (!(queue_flags & VK_QUEUE_GRAPHICS_BIT)) { - return; - } + auto queue_state_it = queueMap.find(queue); + if (queue_state_it != queueMap.end()) { + queue_family_index = queue_state_it->second.queueFamilyIndex; + } + + VkResult result = VK_SUCCESS; - // Lazy-allocate and record the command buffer. - if (gpu_validation_state->barrier_command_buffer == VK_NULL_HANDLE) { - VkResult result; VkCommandPoolCreateInfo pool_create_info = {}; pool_create_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; pool_create_info.queueFamilyIndex = queue_family_index; - result = DispatchCreateCommandPool(device, &pool_create_info, nullptr, &gpu_validation_state->barrier_command_pool); + result = DispatchCreateCommandPool(device, &pool_create_info, nullptr, &quere_barrier_command_info.barrier_command_pool); if (result != VK_SUCCESS) { ReportSetupProblem(VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_EXT, HandleToUint64(device), "Unable to create command pool for barrier CB."); - gpu_validation_state->barrier_command_pool = VK_NULL_HANDLE; + quere_barrier_command_info.barrier_command_pool = VK_NULL_HANDLE; return; } - VkCommandBufferAllocateInfo command_buffer_alloc_info = {}; - command_buffer_alloc_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; - command_buffer_alloc_info.commandPool = gpu_validation_state->barrier_command_pool; - command_buffer_alloc_info.commandBufferCount = 1; - command_buffer_alloc_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; - result = DispatchAllocateCommandBuffers(device, &command_buffer_alloc_info, &gpu_validation_state->barrier_command_buffer); + VkCommandBufferAllocateInfo buffer_alloc_info = {}; + buffer_alloc_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + buffer_alloc_info.commandPool = quere_barrier_command_info.barrier_command_pool; + buffer_alloc_info.commandBufferCount = 1; + buffer_alloc_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + result = DispatchAllocateCommandBuffers(device, &buffer_alloc_info, &quere_barrier_command_info.barrier_command_buffer); if (result != VK_SUCCESS) { ReportSetupProblem(VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_EXT, HandleToUint64(device), "Unable to create barrier command buffer."); - DispatchDestroyCommandPool(device, gpu_validation_state->barrier_command_pool, nullptr); - gpu_validation_state->barrier_command_pool = VK_NULL_HANDLE; - gpu_validation_state->barrier_command_buffer = VK_NULL_HANDLE; + DispatchDestroyCommandPool(device, quere_barrier_command_info.barrier_command_pool, nullptr); + quere_barrier_command_info.barrier_command_pool = VK_NULL_HANDLE; + quere_barrier_command_info.barrier_command_buffer = VK_NULL_HANDLE; return; } // Hook up command buffer dispatch - gpu_validation_state->vkSetDeviceLoaderData(device, gpu_validation_state->barrier_command_buffer); + gpu_validation_state->vkSetDeviceLoaderData(device, quere_barrier_command_info.barrier_command_buffer); // Record a global memory barrier to force availability of device memory operations to the host domain. VkCommandBufferBeginInfo command_buffer_begin_info = {}; command_buffer_begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; - result = DispatchBeginCommandBuffer(gpu_validation_state->barrier_command_buffer, &command_buffer_begin_info); - + result = DispatchBeginCommandBuffer(quere_barrier_command_info.barrier_command_buffer, &command_buffer_begin_info); if (result == VK_SUCCESS) { VkMemoryBarrier memory_barrier = {}; memory_barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; memory_barrier.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT; memory_barrier.dstAccessMask = VK_ACCESS_HOST_READ_BIT; - DispatchCmdPipelineBarrier(gpu_validation_state->barrier_command_buffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + DispatchCmdPipelineBarrier(quere_barrier_command_info.barrier_command_buffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0, 1, &memory_barrier, 0, nullptr, 0, nullptr); - DispatchEndCommandBuffer(gpu_validation_state->barrier_command_buffer); + DispatchEndCommandBuffer(quere_barrier_command_info.barrier_command_buffer); } } - if (gpu_validation_state->barrier_command_buffer) { + GpuQueueBarrierCommandInfo &quere_barrier_command_info = queue_barrier_command_info_it.first->second; + if (quere_barrier_command_info.barrier_command_buffer != VK_NULL_HANDLE) { VkSubmitInfo submit_info = {}; submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; submit_info.commandBufferCount = 1; - submit_info.pCommandBuffers = &gpu_validation_state->barrier_command_buffer; + submit_info.pCommandBuffers = &quere_barrier_command_info.barrier_command_buffer; DispatchQueueSubmit(queue, 1, &submit_info, VK_NULL_HANDLE); } } @@ -1131,8 +1282,8 @@ void CoreChecks::GpuPostCallQueueSubmit(VkQueue queue, uint32_t submitCount, con } void CoreChecks::GpuAllocateValidationResources(const VkCommandBuffer cmd_buffer, const VkPipelineBindPoint bind_point) { - // Does GPUAV support VK_PIPELINE_BIND_POINT_RAY_TRACING_NV? - if (bind_point != VK_PIPELINE_BIND_POINT_GRAPHICS && bind_point != VK_PIPELINE_BIND_POINT_COMPUTE) { + if (bind_point != VK_PIPELINE_BIND_POINT_GRAPHICS && bind_point != VK_PIPELINE_BIND_POINT_COMPUTE && + bind_point != VK_PIPELINE_BIND_POINT_RAY_TRACING_NV) { return; } VkResult result; @@ -1190,19 +1341,28 @@ void CoreChecks::GpuAllocateValidationResources(const VkCommandBuffer cmd_buffer VkWriteDescriptorSet desc_writes[2] = {}; uint32_t desc_count = 1; auto const &state = cb_node->lastBound[bind_point]; - uint32_t number_of_sets = (uint32_t)state.boundDescriptorSets.size(); + uint32_t number_of_sets = (uint32_t)state.per_set.size(); // Figure out how much memory we need for the input block based on how many sets and bindings there are // and how big each of the bindings is if (number_of_sets > 0 && device_extensions.vk_ext_descriptor_indexing) { uint32_t descriptor_count = 0; // Number of descriptors, including all array elements uint32_t binding_count = 0; // Number of bindings based on the max binding number used - for (auto desc : state.boundDescriptorSets) { + for (auto s : state.per_set) { + auto desc = s.bound_descriptor_set; auto bindings = desc->GetLayout()->GetSortedBindingSet(); if (bindings.size() > 0) { binding_count += desc->GetLayout()->GetMaxBinding() + 1; for (auto binding : bindings) { - if (binding == desc->GetLayout()->GetMaxBinding() && desc->IsVariableDescriptorCount(binding)) { + // Shader instrumentation is tracking inline uniform blocks as scalers. Don't try to validate inline uniform + // blocks + if (VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT == desc->GetLayout()->GetTypeFromBinding(binding)) { + descriptor_count++; + log_msg(report_data, VK_DEBUG_REPORT_WARNING_BIT_EXT, VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_SET_EXT, + VK_NULL_HANDLE, "UNASSIGNED-GPU-Assisted Validation Warning", + "VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT descriptors will not be validated by GPU assisted " + "validation"); + } else if (binding == desc->GetLayout()->GetMaxBinding() && desc->IsVariableDescriptorCount(binding)) { descriptor_count += desc->GetVariableDescriptorCount(); } else { descriptor_count += desc->GetDescriptorCountFromBinding(binding); @@ -1247,7 +1407,8 @@ void CoreChecks::GpuAllocateValidationResources(const VkCommandBuffer cmd_buffer // Index of the start of the sets_to_bindings array pData[0] = number_of_sets + binding_count + 1; - for (auto desc : state.boundDescriptorSets) { + for (auto s : state.per_set) { + auto desc = s.bound_descriptor_set; auto layout = desc->GetLayout(); auto bindings = layout->GetSortedBindingSet(); if (bindings.size() > 0) { @@ -1257,7 +1418,11 @@ void CoreChecks::GpuAllocateValidationResources(const VkCommandBuffer cmd_buffer *sets_to_bindings++ = bindCounter + number_of_sets + binding_count; for (auto binding : bindings) { // For each binding, fill in its size in the sizes array - if (binding == layout->GetMaxBinding() && desc->IsVariableDescriptorCount(binding)) { + // Shader instrumentation is tracking inline uniform blocks as scalers. Don't try to validate inline uniform + // blocks + if (VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT == desc->GetLayout()->GetTypeFromBinding(binding)) { + sizes[binding] = 1; + } else if (binding == layout->GetMaxBinding() && desc->IsVariableDescriptorCount(binding)) { sizes[binding] = desc->GetVariableDescriptorCount(); } else { sizes[binding] = desc->GetDescriptorCountFromBinding(binding); @@ -1265,6 +1430,13 @@ void CoreChecks::GpuAllocateValidationResources(const VkCommandBuffer cmd_buffer // Fill in the starting index for this binding in the written array in the bindings_to_written array bindings_to_written[binding] = written_index; + // Shader instrumentation is tracking inline uniform blocks as scalers. Don't try to validate inline uniform + // blocks + if (VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT == desc->GetLayout()->GetTypeFromBinding(binding)) { + pData[written_index++] = 1; + continue; + } + auto index_range = desc->GetGlobalIndexRangeFromBinding(binding, true); // For each array element in the binding, update the written array with whether it has been written for (uint32_t i = index_range.start; i < index_range.end; ++i) { @@ -1319,11 +1491,12 @@ void CoreChecks::GpuAllocateValidationResources(const VkCommandBuffer cmd_buffer if (iter != cb_node->lastBound.end()) { auto pipeline_state = iter->second.pipeline_state; if (pipeline_state && (pipeline_state->pipeline_layout.set_layouts.size() <= gpu_validation_state->desc_set_bind_index)) { - DispatchCmdBindDescriptorSets(cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_state->pipeline_layout.layout, + DispatchCmdBindDescriptorSets(cmd_buffer, bind_point, pipeline_state->pipeline_layout.layout, gpu_validation_state->desc_set_bind_index, 1, desc_sets.data(), 0, nullptr); } // Record buffer and memory info in CB state tracking - gpu_validation_state->GetGpuBufferInfo(cmd_buffer).emplace_back(output_block, input_block, desc_sets[0], desc_pool); + gpu_validation_state->GetGpuBufferInfo(cmd_buffer) + .emplace_back(output_block, input_block, desc_sets[0], desc_pool, bind_point); } else { ReportSetupProblem(VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_EXT, HandleToUint64(device), "Unable to find pipeline state"); vmaDestroyBuffer(gpu_validation_state->vmaAllocator, input_block.buffer, input_block.allocation); |