From d9d35f5a601d422d33eda718a58369ec87546087 Mon Sep 17 00:00:00 2001 From: Derek Gerstmann Date: Tue, 7 Jan 2025 13:33:20 -0800 Subject: [PATCH 1/7] Conform split request size to handle arbitrary device allocation sizes which may be larger than allocation request (even if its aligned to an alignment boundary). --- src/runtime/internal/memory_resources.h | 2 +- src/runtime/internal/region_allocator.h | 54 ++++++++++++++++++++++--- 2 files changed, 49 insertions(+), 7 deletions(-) diff --git a/src/runtime/internal/memory_resources.h b/src/runtime/internal/memory_resources.h index 0be6041519a1..e543d0183ffc 100644 --- a/src/runtime/internal/memory_resources.h +++ b/src/runtime/internal/memory_resources.h @@ -151,7 +151,7 @@ ALWAYS_INLINE size_t aligned_size(size_t offset, size_t size, size_t alignment) ALWAYS_INLINE size_t conform_size(size_t offset, size_t size, size_t alignment, size_t nearest_multiple) { size_t adjusted_size = aligned_size(offset, size, alignment); adjusted_size = (alignment > adjusted_size) ? alignment : adjusted_size; - if (nearest_multiple > 0) { + if ((nearest_multiple > 0) && ((adjusted_size % nearest_multiple) != 0)) { size_t rounded_size = (((adjusted_size + nearest_multiple - 1) / nearest_multiple) * nearest_multiple); return rounded_size; } else { diff --git a/src/runtime/internal/region_allocator.h b/src/runtime/internal/region_allocator.h index 6f39991ff79c..e6832fabaac8 100644 --- a/src/runtime/internal/region_allocator.h +++ b/src/runtime/internal/region_allocator.h @@ -74,7 +74,7 @@ class RegionAllocator { BlockRegion *coalesce_block_regions(void *user_context, BlockRegion *region); // Returns true if the given region can be split to accomodate the given size - bool can_split(const BlockRegion *region, const MemoryRequest &request) const; + bool can_split(void *use_context, const BlockRegion *region, const MemoryRequest &request) const; // Splits the given block region into a smaller region to accomodate the given size, followed by empty space for the remaining BlockRegion *split_block_region(void *user_context, BlockRegion *region, const MemoryRequest &request); @@ -195,7 +195,7 @@ MemoryRegion *RegionAllocator::reserve(void *user_context, const MemoryRequest & return nullptr; } - if (can_split(block_region, region_request)) { + if (can_split(user_context, block_region, region_request)) { #ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << "RegionAllocator: Splitting region of size ( " << (int32_t)(block_region->memory.size) << ") " << "to accomodate requested size (" << (int32_t)(region_request.size) << " bytes)"; @@ -443,8 +443,29 @@ BlockRegion *RegionAllocator::coalesce_block_regions(void *user_context, BlockRe return block_region; } -bool RegionAllocator::can_split(const BlockRegion *block_region, const MemoryRequest &split_request) const { - return (block_region && (block_region->memory.size > split_request.size) && (block_region->usage_count == 0)); +bool RegionAllocator::can_split(void* user_context, const BlockRegion *block_region, const MemoryRequest &split_request) const { + + // See if we can actually split the block region and create empty space big enough + if (block_region && (block_region->memory.size > split_request.size) && (block_region->usage_count == 0)) { + + // We can only split if there's still room left after conforming the allocation request since the + // conform method may actually grow the requested size to accomodate alignment constraints + MemoryRequest test_request = split_request; + test_request.size = block_region->memory.size - test_request.size; + test_request.offset = block_region->memory.offset + test_request.size; + int error_code = conform(user_context, &test_request); + if (error_code) { +#ifdef DEBUG_RUNTIME_INTERNAL + debug(nullptr) << "RegionAllocator: Failed to conform test request for splitting block region!\n"; +#endif + return false; + } + + if((block_region->memory.size - test_request.size) > 0){ + return true; + } + } + return false; } BlockRegion *RegionAllocator::split_block_region(void *user_context, BlockRegion *block_region, const MemoryRequest &request) { @@ -470,8 +491,9 @@ BlockRegion *RegionAllocator::split_block_region(void *user_context, BlockRegion #ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << "RegionAllocator: Splitting " - << "current region (offset=" << (int32_t)block_region->memory.offset << " size=" << (int32_t)(block_region->memory.size) << " bytes) " - << "to create empty region (offset=" << (int32_t)split_request.offset << " size=" << (int32_t)(split_request.size) << " bytes)"; + << "current region (offset=" << (int32_t)block_region->memory.offset << " size=" << (int32_t)(block_region->memory.size) << " bytes) into ...\n\t" + << "existing region (offset=" << (int32_t)block_region->memory.offset << " size=" << (int32_t)(block_region->memory.size - split_request.size) << " bytes)\n\t" + << "empty region (offset=" << (int32_t)split_request.offset << " size=" << (int32_t)(split_request.size) << " bytes)\n"; #endif BlockRegion *next_region = block_region->next_ptr; BlockRegion *empty_region = create_block_region(user_context, split_request); @@ -484,6 +506,12 @@ BlockRegion *RegionAllocator::split_block_region(void *user_context, BlockRegion empty_region->prev_ptr = block_region; block_region->next_ptr = empty_region; block_region->memory.size -= empty_region->memory.size; + +#ifdef DEBUG_RUNTIME_INTERNAL + debug(user_context) << "RegionAllocator: Split block region into ...\n\t" + << "existing region (ptr=" << (void*)block_region << " prev_ptr=" << block_region->prev_ptr << " next_ptr=" << block_region->next_ptr << " offset=" << (int32_t)block_region->memory.offset << " size=" << (int32_t)(block_region->memory.size) << " bytes)\n\t" + << "empty region (ptr=" << (void*)empty_region << " prev_ptr=" << empty_region->prev_ptr << " next_ptr=" << empty_region->next_ptr << " offset=" << (int32_t)empty_region->memory.offset << " size=" << (int32_t)(empty_region->memory.size) << " bytes)\n"; +#endif return empty_region; } @@ -605,8 +633,22 @@ int RegionAllocator::alloc_block_region(void *user_context, BlockRegion *block_r #endif halide_abort_if_false(user_context, allocators.region.allocate != nullptr); halide_abort_if_false(user_context, block_region->status == AllocationStatus::Available); + int error_code = 0; MemoryRegion *memory_region = &(block_region->memory); + if (memory_region->size <= 0) { +#ifdef DEBUG_RUNTIME_INTERNAL + debug(user_context) << " skipping zero size region (" + << "block_ptr=" << (void *)block_region->block_ptr << " " + << "block_region=" << (void *)block_region << " " + << "memory_offset=" << (uint32_t)(block_region->memory.offset) << " " + << "memory_size=" << (uint32_t)(block_region->memory.size) << " " + << "block_reserved=" << (uint32_t)block->reserved << " " + << ")\n"; +#endif + return error_code; + } + if (memory_region->handle == nullptr) { error_code = allocators.region.allocate(user_context, memory_region); memory_region->is_owner = true; From f4bd1da2d59f711093c7901591c4c06a4f852509 Mon Sep 17 00:00:00 2001 From: Derek Gerstmann Date: Tue, 7 Jan 2025 13:35:36 -0800 Subject: [PATCH 2/7] Refactor command buffer submission and descriptor set creation. Add support for push descriptor set extension. --- src/runtime/vulkan.cpp | 69 +++-- src/runtime/vulkan_extensions.h | 10 +- src/runtime/vulkan_functions.h | 1 + src/runtime/vulkan_internal.h | 42 +++ src/runtime/vulkan_memory.h | 22 +- src/runtime/vulkan_resources.h | 465 +++++++++++++++++++++++--------- 6 files changed, 461 insertions(+), 148 deletions(-) diff --git a/src/runtime/vulkan.cpp b/src/runtime/vulkan.cpp index 776088448891..2d46f3eb401f 100644 --- a/src/runtime/vulkan.cpp +++ b/src/runtime/vulkan.cpp @@ -1193,13 +1193,6 @@ WEAK int halide_vulkan_run(void *user_context, } } } - - // 2b. Create the pipeline layout - error_code = vk_create_pipeline_layout(user_context, ctx.allocator, shader_module->shader_count, shader_module->descriptor_set_layouts, &(shader_module->pipeline_layout)); - if (error_code != halide_error_code_success) { - error(user_context) << "Vulkan: Failed to create pipeline layout!\n"; - return error_code; - } } VulkanDispatchData dispatch_data = {}; @@ -1213,14 +1206,7 @@ WEAK int halide_vulkan_run(void *user_context, VulkanShaderBinding *entry_point_binding = (shader_module->shader_bindings + entry_point_index); - // 2c. Setup the compute pipeline (eg override any specializations for shared mem or workgroup size) - error_code = vk_setup_compute_pipeline(user_context, ctx.allocator, entry_point_binding, &dispatch_data, shader_module->shader_module, shader_module->pipeline_layout, &(entry_point_binding->compute_pipeline)); - if (error_code != halide_error_code_success) { - error(user_context) << "Vulkan: Failed to setup compute pipeline!\n"; - return error_code; - } - - // 2d. Create a descriptor set + // 2c. Create a descriptor set if (entry_point_binding->descriptor_set == VK_NULL_HANDLE) { // Construct a descriptor pool @@ -1244,7 +1230,7 @@ WEAK int halide_vulkan_run(void *user_context, } } - // 3a. Create a buffer for the scalar parameters + // 2d. Create a buffer for the scalar parameters if ((entry_point_binding->args_region == nullptr) && entry_point_binding->uniform_buffer_count) { size_t scalar_buffer_size = vk_estimate_scalar_uniform_buffer_size(user_context, arg_sizes, args, arg_is_buffer); if (scalar_buffer_size > 0) { @@ -1256,7 +1242,7 @@ WEAK int halide_vulkan_run(void *user_context, } } - // 3b. Update uniform buffer with scalar parameters + // 2e. Update uniform buffer with scalar parameters VkBuffer *args_buffer = nullptr; if ((entry_point_binding->args_region != nullptr) && entry_point_binding->uniform_buffer_count) { error_code = vk_update_scalar_uniform_buffer(user_context, ctx.allocator, entry_point_binding->args_region, arg_sizes, args, arg_is_buffer); @@ -1272,13 +1258,29 @@ WEAK int halide_vulkan_run(void *user_context, } } - // 3c. Update buffer bindings for descriptor set + // 2f. Update buffer bindings for descriptor set error_code = vk_update_descriptor_set(user_context, ctx.allocator, args_buffer, entry_point_binding->uniform_buffer_count, entry_point_binding->storage_buffer_count, arg_sizes, args, arg_is_buffer, entry_point_binding->descriptor_set); if (error_code != halide_error_code_success) { error(user_context) << "Vulkan: Failed to update descriptor set!\n"; return error_code; } + // 2b. Create the pipeline layout + if (shader_module->pipeline_layout == VK_NULL_HANDLE) { + error_code = vk_create_pipeline_layout(user_context, ctx.allocator, shader_module->shader_count, shader_module->descriptor_set_layouts, &(shader_module->pipeline_layout)); + if (error_code != halide_error_code_success) { + error(user_context) << "Vulkan: Failed to create pipeline layout!\n"; + return error_code; + } + } + + // 3. Setup the compute pipeline (eg override any specializations for shared mem or workgroup size) + error_code = vk_setup_compute_pipeline(user_context, ctx.allocator, entry_point_binding, &dispatch_data, shader_module->shader_module, shader_module->pipeline_layout, &(entry_point_binding->compute_pipeline)); + if (error_code != halide_error_code_success) { + error(user_context) << "Vulkan: Failed to setup compute pipeline!\n"; + return error_code; + } + // 4. Create a command buffer and pool ScopedVulkanCommandBufferAndPool cmds(user_context, ctx.allocator, ctx.queue_family_index); if (cmds.error_code != halide_error_code_success) { @@ -1287,6 +1289,31 @@ WEAK int halide_vulkan_run(void *user_context, } // 5. Fill the command buffer + error_code = vk_start_command_buffer_for_dispatch_call(user_context, cmds.command_buffer); + if (error_code != halide_error_code_success) { + error(user_context) << "Vulkan: Failed to start command buffer for dispatch call!\n"; + return error_code; + } + error_code = vk_bind_pipeline_to_command_buffer(user_context, cmds.command_buffer, entry_point_binding->compute_pipeline); + if (error_code != halide_error_code_success) { + error(user_context) << "Vulkan: Failed to bind compute pipeline to command buffer for dispatch call!\n"; + return error_code; + } + + if (vkCmdPushDescriptorSetKHR != nullptr) { + error_code = vk_push_descriptor_set(user_context, ctx.allocator, cmds.command_buffer, entry_point_binding->compute_pipeline, shader_module->pipeline_layout, entry_point_binding->descriptor_set, args_buffer, entry_point_binding->uniform_buffer_count, entry_point_binding->storage_buffer_count, arg_sizes, args, arg_is_buffer); + if (error_code != halide_error_code_success) { + error(user_context) << "Vulkan: Failed to update descriptor set!\n"; + return error_code; + } + } else { + error_code = vk_bind_descriptor_set(user_context, cmds.command_buffer, shader_module->pipeline_layout, entry_point_binding->descriptor_set, entry_point_index); + if (error_code != halide_error_code_success) { + error(user_context) << "Vulkan: Failed to bind descriptor set to command buffer for dispatch call!\n"; + return error_code; + } + } + error_code = vk_fill_command_buffer_with_dispatch_call(user_context, ctx.device, cmds.command_buffer, entry_point_binding->compute_pipeline, @@ -1299,6 +1326,12 @@ WEAK int halide_vulkan_run(void *user_context, return error_code; } + error_code = vk_end_command_buffer_for_dispatch_call(user_context, cmds.command_buffer); + if (error_code != halide_error_code_success) { + error(user_context) << "Vulkan: Failed to end command buffer for dispatch call!\n"; + return error_code; + } + // 6. Submit the command buffer to our command queue error_code = vk_submit_command_buffer(user_context, ctx.queue, cmds.command_buffer); if (error_code != halide_error_code_success) { diff --git a/src/runtime/vulkan_extensions.h b/src/runtime/vulkan_extensions.h index c2e4ea42ff1e..68fa3662c39e 100644 --- a/src/runtime/vulkan_extensions.h +++ b/src/runtime/vulkan_extensions.h @@ -203,10 +203,18 @@ uint32_t vk_get_required_device_extensions(void *user_context, StringTable &ext_ uint32_t vk_get_optional_device_extensions(void *user_context, StringTable &ext_table) { const char *optional_ext_table[] = { "VK_KHR_portability_subset", //< necessary for running under Molten (aka Vulkan on Mac) + VK_KHR_MAINTENANCE_1_EXTENSION_NAME, + VK_KHR_MAINTENANCE_2_EXTENSION_NAME, + VK_KHR_MAINTENANCE_3_EXTENSION_NAME, + VK_KHR_MAINTENANCE_4_EXTENSION_NAME, VK_KHR_MAINTENANCE_5_EXTENSION_NAME, + VK_KHR_MAINTENANCE_6_EXTENSION_NAME, + VK_KHR_MAINTENANCE_7_EXTENSION_NAME, VK_KHR_16BIT_STORAGE_EXTENSION_NAME, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, - VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME}; + VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME, + VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME}; + const uint32_t optional_ext_count = sizeof(optional_ext_table) / sizeof(optional_ext_table[0]); ext_table.fill(user_context, (const char **)optional_ext_table, optional_ext_count); return optional_ext_count; diff --git a/src/runtime/vulkan_functions.h b/src/runtime/vulkan_functions.h index 90938896f4cc..1339e0818a03 100644 --- a/src/runtime/vulkan_functions.h +++ b/src/runtime/vulkan_functions.h @@ -204,6 +204,7 @@ VULKAN_FN(vkCmdCopyBuffer2) // VULKAN_FN(vkCmdCopyImageToBuffer2) // VULKAN_FN(vkCmdEndRendering) VULKAN_FN(vkCmdPipelineBarrier2) +VULKAN_FN(vkCmdPushDescriptorSetKHR) VULKAN_FN(vkCmdResetEvent2) // VULKAN_FN(vkCmdResolveImage2) // VULKAN_FN(vkCmdSetCullMode) diff --git a/src/runtime/vulkan_internal.h b/src/runtime/vulkan_internal.h index aeef545385cc..a619789db2d5 100644 --- a/src/runtime/vulkan_internal.h +++ b/src/runtime/vulkan_internal.h @@ -106,6 +106,11 @@ int vk_destroy_command_buffer(void *user_context, VulkanMemoryAllocator *allocat struct ScopedVulkanCommandBufferAndPool; +int vk_start_command_buffer_for_dispatch_call(void *user_context, VkCommandBuffer command_buffer); +int vk_end_command_buffer_for_dispatch_call(void *user_context, VkCommandBuffer command_buffer); +int vk_bind_pipeline_to_command_buffer(void *user_context, VkCommandBuffer command_buffer, VkPipeline compute_pipeline); +int vk_bind_descriptor_set_to_command_buffer(void *user_context, VkCommandBuffer command_buffer, VkPipeline compute_pipeline); + int vk_fill_command_buffer_with_dispatch_call(void *user_context, VkDevice device, VkCommandBuffer command_buffer, @@ -175,6 +180,24 @@ int vk_create_descriptor_set(void *user_context, VkDescriptorPool descriptor_pool, VkDescriptorSet *descriptor_set); +int vk_get_descriptor_buffer_info(void *user_context, + VulkanMemoryAllocator *allocator, + VkDescriptorSet descriptor_set, + VkBuffer *scalar_args_buffer, + size_t uniform_buffer_count, + size_t storage_buffer_count, + size_t arg_sizes[], + void *args[], + int8_t arg_is_buffer[], + BlockStorage *descriptor_buffer_info_result); + +int vk_get_write_descriptor_set_info(void *user_context, + VulkanMemoryAllocator *allocator, + BlockStorage *descriptor_buffer_info, + VkDescriptorSet descriptor_set, + VkBuffer *scalar_args_buffer, + BlockStorage *write_descriptor_set_result); + int vk_update_descriptor_set(void *user_context, VulkanMemoryAllocator *allocator, VkBuffer *scalar_args_buffer, @@ -185,6 +208,25 @@ int vk_update_descriptor_set(void *user_context, int8_t arg_is_buffer[], VkDescriptorSet descriptor_set); +int vk_bind_descriptor_set(void *user_context, + VkCommandBuffer command_buffer, + VkPipelineLayout pipeline_layout, + VkDescriptorSet descriptor_set, + uint32_t descriptor_set_index); + +int vk_push_descriptor_set(void *user_context, + VulkanMemoryAllocator *allocator, + VkCommandBuffer command_buffer, + VkPipeline compute_pipeline, + VkPipelineLayout pipeline_layout, + VkDescriptorSet descriptor_set, + VkBuffer *scalar_args_buffer, + size_t uniform_buffer_count, + size_t storage_buffer_count, + size_t arg_sizes[], + void *args[], + int8_t arg_is_buffer[]); + // -- Pipeline Layout int vk_create_pipeline_layout(void *user_context, VulkanMemoryAllocator *allocator, diff --git a/src/runtime/vulkan_memory.h b/src/runtime/vulkan_memory.h index 9b32de9a15c0..b65f076e8aad 100644 --- a/src/runtime/vulkan_memory.h +++ b/src/runtime/vulkan_memory.h @@ -556,7 +556,7 @@ int VulkanMemoryAllocator::lookup_requirements(void *user_context, size_t size, #if defined(HL_VK_DEBUG_MEM) debug(nullptr) << "VulkanMemoryAllocator: Looking up requirements (" << "user_context=" << user_context << " " - << "size=" << (uint32_t)block->size << ", " + << "size=" << (uint32_t)size << ", " << "usage_flags=" << usage_flags << ") ... \n"; #endif VkBufferCreateInfo create_info = { @@ -998,7 +998,7 @@ int VulkanMemoryAllocator::conform(void *user_context, MemoryRequest *request) { #if defined(HL_VK_DEBUG_MEM) debug(nullptr) << "VulkanMemoryAllocator: Buffer requirements (" - << "requested_size=" << (uint32_t)region->size << ", " + << "requested_size=" << (uint32_t)request->size << ", " << "required_alignment=" << (uint32_t)memory_requirements.alignment << ", " << "required_size=" << (uint32_t)memory_requirements.size << ")\n"; #endif @@ -1051,7 +1051,7 @@ int VulkanMemoryAllocator::conform_region_request(void *instance_ptr, MemoryRequ #if defined(HL_VK_DEBUG_MEM) debug(nullptr) << "VulkanMemoryAllocator: Conforming region request (" << "user_context=" << user_context << " " - << "request=" << (void *)(region) << ") ... \n"; + << "request=" << (void *)(request) << ") ... \n"; #endif if ((instance->device == nullptr) || (instance->physical_device == nullptr)) { @@ -1125,6 +1125,9 @@ int VulkanMemoryAllocator::allocate_region(void *instance_ptr, MemoryRegion *reg VkResult result = vkCreateBuffer(instance->device, &create_info, instance->alloc_callbacks, buffer); if (result != VK_SUCCESS) { + debug(user_context) << "VulkanRegionAllocator: Failed to create buffer!\n\t" + << "vkCreateBuffer returned: " << vk_get_error_name(result) << "\n"; + // Allocation failed ... collect unused regions and try again ... instance->collect(user_context); result = vkCreateBuffer(instance->device, &create_info, instance->alloc_callbacks, buffer); @@ -1165,12 +1168,9 @@ int VulkanMemoryAllocator::allocate_region(void *instance_ptr, MemoryRegion *reg << "vkCreateBuffer returned: " << vk_get_error_name(result) << "\n"; return halide_error_code_device_malloc_failed; } + region->size = create_info.size; } -#ifdef DEBUG_RUNTIME - debug(nullptr) << "vkCreateBuffer: Created buffer for device region (" << (uint64_t)region->size << " bytes) ...\n"; -#endif - RegionAllocator *region_allocator = RegionAllocator::find_allocator(user_context, region); if (region_allocator == nullptr) { error(user_context) << "VulkanBlockAllocator: Unable to allocate region! Invalid region allocator!\n"; @@ -1189,6 +1189,10 @@ int VulkanMemoryAllocator::allocate_region(void *instance_ptr, MemoryRegion *reg return halide_error_code_internal_error; } +#ifdef DEBUG_RUNTIME + debug(nullptr) << "vkCreateBuffer: Created buffer for device region (" << (uint64_t)region->size << " bytes) ...\n"; +#endif + // Finally, bind buffer to the device memory result = vkBindBufferMemory(instance->device, *buffer, *device_memory, region->offset); if (result != VK_SUCCESS) { @@ -1197,6 +1201,10 @@ int VulkanMemoryAllocator::allocate_region(void *instance_ptr, MemoryRegion *reg return halide_error_code_generic_error; } +#ifdef DEBUG_RUNTIME + debug(nullptr) << "vkBindBufferMemory: Bound memory to device buffer for device region (" << (uint64_t)region->size << " bytes) ...\n"; +#endif + region->handle = (void *)buffer; region->is_owner = true; instance->region_byte_count += region->size; diff --git a/src/runtime/vulkan_resources.h b/src/runtime/vulkan_resources.h index 1e696110b5c9..bc18cf707b06 100644 --- a/src/runtime/vulkan_resources.h +++ b/src/runtime/vulkan_resources.h @@ -202,24 +202,11 @@ struct ScopedVulkanCommandBufferAndPool { } }; -int vk_fill_command_buffer_with_dispatch_call(void *user_context, - VkDevice device, - VkCommandBuffer command_buffer, - VkPipeline compute_pipeline, - VkPipelineLayout pipeline_layout, - VkDescriptorSet descriptor_set, - uint32_t descriptor_set_index, - int blocksX, int blocksY, int blocksZ) { - +int vk_start_command_buffer_for_dispatch_call(void *user_context, VkCommandBuffer command_buffer) { #ifdef DEBUG_RUNTIME debug(user_context) - << " vk_fill_command_buffer_with_dispatch_call (user_context: " << user_context << ", " - << "device: " << (void *)device << ", " - << "command_buffer: " << (void *)command_buffer << ", " - << "pipeline_layout: " << (void *)pipeline_layout << ", " - << "descriptor_set: " << (void *)descriptor_set << ", " - << "descriptor_set_index: " << descriptor_set_index << ", " - << "blocks: " << blocksX << ", " << blocksY << ", " << blocksZ << ")\n"; + << " vk_start_command_buffer_for_dispatch_call (user_context: " << user_context << ", " + << "command_buffer: " << (void *)command_buffer << ")\n"; #endif VkCommandBufferBeginInfo command_buffer_begin_info = { @@ -234,18 +221,59 @@ int vk_fill_command_buffer_with_dispatch_call(void *user_context, error(user_context) << "vkBeginCommandBuffer returned " << vk_get_error_name(result) << "\n"; return halide_error_code_generic_error; } + return halide_error_code_success; +} - vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, compute_pipeline); - vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, - descriptor_set_index, 1, &descriptor_set, 0, nullptr); - vkCmdDispatch(command_buffer, blocksX, blocksY, blocksZ); +int vk_end_command_buffer_for_dispatch_call(void *user_context, VkCommandBuffer command_buffer) { +#ifdef DEBUG_RUNTIME + debug(user_context) + << " vk_end_command_buffer_for_dispatch_call (user_context: " << user_context << ", " + << "command_buffer: " << (void *)command_buffer << ")\n"; +#endif - result = vkEndCommandBuffer(command_buffer); + VkResult result = vkEndCommandBuffer(command_buffer); if (result != VK_SUCCESS) { error(user_context) << "vkEndCommandBuffer returned " << vk_get_error_name(result) << "\n"; return halide_error_code_generic_error; } + return halide_error_code_success; +} +int vk_bind_pipeline_to_command_buffer(void *user_context, VkCommandBuffer command_buffer, VkPipeline compute_pipeline) { +#ifdef DEBUG_RUNTIME + debug(user_context) + << " vk_bind_pipeline_to_command_buffer (user_context: " << user_context << ", " + << "command_buffer: " << (void *)command_buffer << ", " + << "compute_pipeline: " << (void *)compute_pipeline << ")\n"; +#endif + vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, compute_pipeline); + return halide_error_code_success; +} + +int vk_fill_command_buffer_with_dispatch_call(void *user_context, + VkDevice device, + VkCommandBuffer command_buffer, + VkPipeline compute_pipeline, + VkPipelineLayout pipeline_layout, + VkDescriptorSet descriptor_set, + uint32_t descriptor_set_index, + int blocksX, int blocksY, int blocksZ) { + +#ifdef DEBUG_RUNTIME + debug(user_context) + << " vk_fill_command_buffer_with_dispatch_call (user_context: " << user_context << ", " + << "device: " << (void *)device << ", " + << "command_buffer: " << (void *)command_buffer << ", " + << "pipeline_layout: " << (void *)pipeline_layout << ", " + << "descriptor_set: " << (void *)descriptor_set << ", " + << "descriptor_set_index: " << descriptor_set_index << ", " + << "blocks: " << blocksX << ", " << blocksY << ", " << blocksZ << ")\n"; +#endif + +// vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, compute_pipeline); +// vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, +// descriptor_set_index, 1, &descriptor_set, 0, nullptr); + vkCmdDispatch(command_buffer, blocksX, blocksY, blocksZ); return halide_error_code_success; } @@ -353,16 +381,14 @@ int vk_create_descriptor_pool(void *user_context, pool_sizes.append(user_context, &storage_buffer_size); } - VkDescriptorPoolCreateInfo descriptor_pool_info = { - VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, // struct type - nullptr, // point to struct extending this - 0, // flags - 1, // this pool will only be used for creating one descriptor set! - (uint32_t)pool_sizes.size(), // pool size count - (const VkDescriptorPoolSize *)pool_sizes.data() // ptr to descriptr pool sizes - }; + VkDescriptorPoolCreateInfo pool_create_info{}; + pool_create_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; + pool_create_info.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT; + pool_create_info.maxSets = 1; + pool_create_info.poolSizeCount = (uint32_t)pool_sizes.size(); + pool_create_info.pPoolSizes = (const VkDescriptorPoolSize *)pool_sizes.data(); - VkResult result = vkCreateDescriptorPool(allocator->current_device(), &descriptor_pool_info, allocator->callbacks(), descriptor_pool); + VkResult result = vkCreateDescriptorPool(allocator->current_device(), &pool_create_info, allocator->callbacks(), descriptor_pool); if (result != VK_SUCCESS) { error(user_context) << "Vulkan: Failed to create descriptor pool! vkCreateDescriptorPool returned " << vk_get_error_name(result) << "\n"; return halide_error_code_generic_error; @@ -415,52 +441,45 @@ int vk_create_descriptor_set_layout(void *user_context, // add all uniform buffers first for (uint32_t n = 0; n < uniform_buffer_count; ++n) { - VkDescriptorSetLayoutBinding uniform_buffer_layout = { - (uint32_t)layout_bindings.size(), // binding index - VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, // descriptor type - 1, // descriptor count - VK_SHADER_STAGE_COMPUTE_BIT, // stage flags - nullptr // immutable samplers - }; - + // Params will be passed as UNIFORM_BUFFERs + VkDescriptorSetLayoutBinding uniform_buffer_layout_binding{}; + uniform_buffer_layout_binding.binding = (uint32_t)layout_bindings.size(), // binding index + uniform_buffer_layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + uniform_buffer_layout_binding.descriptorCount = 1; + uniform_buffer_layout_binding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; #ifdef DEBUG_RUNTIME debug(user_context) << " [" << (uint32_t)layout_bindings.size() << "] : UNIFORM_BUFFER\n"; #endif - - layout_bindings.append(user_context, &uniform_buffer_layout); + layout_bindings.append(user_context, &uniform_buffer_layout_binding); } // Add all other storage buffers for (uint32_t n = 0; n < storage_buffer_count; ++n) { - - // halide buffers will be passed as STORAGE_BUFFERS - VkDescriptorSetLayoutBinding storage_buffer_layout = { - (uint32_t)layout_bindings.size(), // binding index - VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, // descriptor type - 1, // descriptor count - VK_SHADER_STAGE_COMPUTE_BIT, // stage flags - nullptr // immutable samplers - }; + // halide buffers will be passed as STORAGE_BUFFERs + VkDescriptorSetLayoutBinding storage_buffer_layout_binding{}; + storage_buffer_layout_binding.binding = (uint32_t)layout_bindings.size(), // binding index + storage_buffer_layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + storage_buffer_layout_binding.descriptorCount = 1; + storage_buffer_layout_binding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; #ifdef DEBUG_RUNTIME debug(user_context) << " [" << (uint32_t)layout_bindings.size() << "] : STORAGE_BUFFER\n"; #endif - - layout_bindings.append(user_context, &storage_buffer_layout); + layout_bindings.append(user_context, &storage_buffer_layout_binding); } - // Create the LayoutInfo struct - VkDescriptorSetLayoutCreateInfo layout_info = { - VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, // structure type - nullptr, // pointer to a struct extending this info - 0, // flags - (uint32_t)layout_bindings.size(), // binding count - (VkDescriptorSetLayoutBinding *)layout_bindings.data() // pointer to layout bindings array - }; + VkDescriptorSetLayoutCreateInfo layout_create_info{}; + layout_create_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + layout_create_info.pNext = nullptr; + if (vkCmdPushDescriptorSetKHR != nullptr) { + layout_create_info.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR; + } + layout_create_info.bindingCount = (uint32_t)layout_bindings.size(); + layout_create_info.pBindings = (VkDescriptorSetLayoutBinding *)layout_bindings.data(); // Create the descriptor set layout - VkResult result = vkCreateDescriptorSetLayout(allocator->current_device(), &layout_info, allocator->callbacks(), layout); + VkResult result = vkCreateDescriptorSetLayout(allocator->current_device(), &layout_create_info, allocator->callbacks(), layout); if (result != VK_SUCCESS) { error(user_context) << "vkCreateDescriptorSetLayout returned " << vk_get_error_name(result) << "\n"; return halide_error_code_generic_error; @@ -524,18 +543,19 @@ int vk_create_descriptor_set(void *user_context, return halide_error_code_success; } -int vk_update_descriptor_set(void *user_context, - VulkanMemoryAllocator *allocator, - VkBuffer *scalar_args_buffer, - size_t uniform_buffer_count, - size_t storage_buffer_count, - size_t arg_sizes[], - void *args[], - int8_t arg_is_buffer[], - VkDescriptorSet descriptor_set) { +int vk_get_descriptor_buffer_info(void *user_context, + VulkanMemoryAllocator *allocator, + VkDescriptorSet descriptor_set, + VkBuffer *scalar_args_buffer, + size_t uniform_buffer_count, + size_t storage_buffer_count, + size_t arg_sizes[], + void *args[], + int8_t arg_is_buffer[], + BlockStorage *descriptor_buffer_info_result) { #ifdef DEBUG_RUNTIME debug(user_context) - << " vk_update_descriptor_set (user_context: " << user_context << ", " + << " vk_get_descriptor_buffer_info (user_context: " << user_context << ", " << "allocator: " << (void *)allocator << ", " << "scalar_args_buffer: " << (void *)scalar_args_buffer << ", " << "uniform_buffer_count: " << (uint32_t)uniform_buffer_count << ", " @@ -543,7 +563,17 @@ int vk_update_descriptor_set(void *user_context, << "descriptor_set: " << (void *)descriptor_set << ")\n"; #endif if (allocator == nullptr) { - error(user_context) << "Vulkan: Failed to create descriptor set ... invalid allocator pointer!\n"; + error(user_context) << "Vulkan: Failed to get descriptor buffer info. Invalid allocator pointer!\n"; + return halide_error_code_generic_error; + } + + if (descriptor_buffer_info_result == nullptr) { + error(user_context) << "Vulkan: Failed to get descriptor buffer info. Invalid result pointer!\n"; + return halide_error_code_generic_error; + } + + if (descriptor_buffer_info_result->current_config().entry_size != sizeof(VkDescriptorBufferInfo)) { + error(user_context) << "Vulkan: Failed to get descriptor buffer info. Invalid descriptor buffer info result!\n"; return halide_error_code_generic_error; } @@ -552,41 +582,20 @@ int vk_update_descriptor_set(void *user_context, dbi_config.entry_size = sizeof(VkDescriptorBufferInfo); BlockStorage descriptor_buffer_info(user_context, dbi_config); - BlockStorage::Config wds_config; - wds_config.minimum_capacity = storage_buffer_count + uniform_buffer_count; - wds_config.entry_size = sizeof(VkWriteDescriptorSet); - BlockStorage write_descriptor_set(user_context, wds_config); - // First binding will be the scalar args buffer (if needed) passed as a UNIFORM BUFFER - VkDescriptorBufferInfo *scalar_args_entry = nullptr; if (scalar_args_buffer != nullptr) { - VkDescriptorBufferInfo scalar_args_descriptor_buffer_info = { - *scalar_args_buffer, // the buffer - 0, // offset - VK_WHOLE_SIZE // range - }; - descriptor_buffer_info.append(user_context, &scalar_args_descriptor_buffer_info); - scalar_args_entry = (VkDescriptorBufferInfo *)descriptor_buffer_info.back(); + VkDescriptorBufferInfo scalar_args_buffer_info{}; + scalar_args_buffer_info.buffer = *scalar_args_buffer; + scalar_args_buffer_info.offset = 0; + scalar_args_buffer_info.range = VK_WHOLE_SIZE; + descriptor_buffer_info_result->append(user_context, &scalar_args_buffer_info); #ifdef DEBUG_RUNTIME - debug(user_context) << " [" << (uint32_t)write_descriptor_set.size() << "] UNIFORM_BUFFER : " + debug(user_context) << " [" << (uint32_t)descriptor_buffer_info.size() << "] UNIFORM_BUFFER : " << "buffer=" << (void *)scalar_args_buffer << " " << "offset=" << (uint32_t)(0) << " " << "size=VK_WHOLE_SIZE\n"; #endif - VkWriteDescriptorSet uniform_buffer_write_descriptor_set = { - VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, // struct type - nullptr, // pointer to struct extending this - descriptor_set, // descriptor set to update - 0, // binding slot - 0, // array elem - 1, // num to update - VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, // descriptor type - nullptr, // for images - scalar_args_entry, // info for buffer - nullptr // for texel buffers - }; - write_descriptor_set.append(user_context, &uniform_buffer_write_descriptor_set); } // Add all the other device buffers as STORAGE BUFFERs @@ -607,40 +616,252 @@ int vk_update_descriptor_set(void *user_context, VkDeviceSize range_offset = device_region->range.head_offset; VkDeviceSize range_size = device_region->size - device_region->range.head_offset - device_region->range.tail_offset; halide_abort_if_false(user_context, (device_region->size - device_region->range.head_offset - device_region->range.tail_offset) > 0); - VkDescriptorBufferInfo device_buffer_info = { - *device_buffer, // the buffer - range_offset, // range offset - range_size // range size - }; - descriptor_buffer_info.append(user_context, &device_buffer_info); - VkDescriptorBufferInfo *device_buffer_entry = (VkDescriptorBufferInfo *)descriptor_buffer_info.back(); + + VkDescriptorBufferInfo device_buffer_info{}; + device_buffer_info.buffer = *device_buffer; + device_buffer_info.offset = range_offset; + device_buffer_info.range = range_size; + descriptor_buffer_info_result->append(user_context, &device_buffer_info); #ifdef DEBUG_RUNTIME - debug(user_context) << " [" << (uint32_t)write_descriptor_set.size() << "] STORAGE_BUFFER : " + debug(user_context) << " [" << (uint32_t)descriptor_buffer_info.size() << "] STORAGE_BUFFER : " << "region=" << (void *)device_region << " " << "buffer=" << (void *)device_buffer << " " << "offset=" << (uint32_t)(range_offset) << " " << "size=" << (uint32_t)(range_size) << "\n"; #endif - - VkWriteDescriptorSet storage_buffer_write_descriptor_set = { - VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, // struct type - nullptr, // pointer to struct extending this - descriptor_set, // descriptor set to update - (uint32_t)write_descriptor_set.size(), // binding slot - 0, // array elem - 1, // num to update - VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, // descriptor type - nullptr, // for images - device_buffer_entry, // info for buffer - nullptr // for texel buffers - }; - write_descriptor_set.append(user_context, &storage_buffer_write_descriptor_set); } } + return halide_error_code_success; +} + +int vk_get_write_descriptor_set_info(void *user_context, + VulkanMemoryAllocator *allocator, + BlockStorage *descriptor_buffer_info, + VkDescriptorSet descriptor_set, + VkBuffer *scalar_args_buffer, + BlockStorage *write_descriptor_set_result) { +#ifdef DEBUG_RUNTIME + debug(user_context) + << " vk_fill_write_descriptor_set (user_context: " << user_context << ", " + << "allocator: " << (void *)allocator << ", " + << "descriptor_buffer_info: " << (void *)descriptor_buffer_info << ", " + << "descriptor_set: " << (void *)descriptor_set << ", " + << "scalar_args_buffer: " << (void *)scalar_args_buffer << ")\n"; +#endif + if (allocator == nullptr) { + error(user_context) << "Vulkan: Failed to fill write descriptor set ... invalid allocator pointer!\n"; + return halide_error_code_generic_error; + } + + if (write_descriptor_set_result == nullptr) { + error(user_context) << "Vulkan: Failed to fill write descriptor set ... invalid result pointer!\n"; + return halide_error_code_generic_error; + } + + if (write_descriptor_set_result->current_config().entry_size != sizeof(VkWriteDescriptorSet)) { + error(user_context) << "Vulkan: Failed to fill write descriptor set. Invalide write descriptor set result!\n"; + return halide_error_code_generic_error; + } + + // First binding will be the scalar args buffer (if needed) passed as a UNIFORM BUFFER + size_t index = 0; + if (scalar_args_buffer != nullptr && descriptor_buffer_info->size()) { + VkWriteDescriptorSet uniform_buffer_write_entry{}; + uniform_buffer_write_entry.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + uniform_buffer_write_entry.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + uniform_buffer_write_entry.dstSet = descriptor_set; + uniform_buffer_write_entry.dstBinding = 0; + uniform_buffer_write_entry.dstArrayElement = 0; + uniform_buffer_write_entry.descriptorCount = 1; + uniform_buffer_write_entry.pBufferInfo = (VkDescriptorBufferInfo *)(*descriptor_buffer_info)[index++]; + write_descriptor_set_result->append(user_context, &uniform_buffer_write_entry); + } + + // Add all the other device buffers as STORAGE BUFFERs + while (index < descriptor_buffer_info->size()) { + VkWriteDescriptorSet storage_buffer_write_entry{}; + storage_buffer_write_entry.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + storage_buffer_write_entry.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + storage_buffer_write_entry.dstSet = descriptor_set; + storage_buffer_write_entry.dstBinding = (uint32_t)write_descriptor_set_result->size(); + storage_buffer_write_entry.dstArrayElement = 0; + storage_buffer_write_entry.descriptorCount = 1; + storage_buffer_write_entry.pBufferInfo = (VkDescriptorBufferInfo *)(*descriptor_buffer_info)[index++]; + write_descriptor_set_result->append(user_context, &storage_buffer_write_entry); + } + + return halide_error_code_success; +} + +int vk_update_descriptor_set(void *user_context, + VulkanMemoryAllocator *allocator, + VkBuffer *scalar_args_buffer, + size_t uniform_buffer_count, + size_t storage_buffer_count, + size_t arg_sizes[], + void *args[], + int8_t arg_is_buffer[], + VkDescriptorSet descriptor_set) { +#ifdef DEBUG_RUNTIME + debug(user_context) + << " vk_update_descriptor_set (user_context: " << user_context << ", " + << "allocator: " << (void *)allocator << ", " + << "scalar_args_buffer: " << (void *)scalar_args_buffer << ", " + << "uniform_buffer_count: " << (uint32_t)uniform_buffer_count << ", " + << "storage_buffer_count: " << (uint32_t)storage_buffer_count << ", " + << "descriptor_set: " << (void *)descriptor_set << ")\n"; +#endif + if (allocator == nullptr) { + error(user_context) << "Vulkan: Failed to update descriptor set ... invalid allocator pointer!\n"; + return halide_error_code_generic_error; + } + + BlockStorage::Config dbi_config; + dbi_config.minimum_capacity = storage_buffer_count + uniform_buffer_count; + dbi_config.entry_size = sizeof(VkDescriptorBufferInfo); + BlockStorage descriptor_buffer_info(user_context, dbi_config); + + int error_code = vk_get_descriptor_buffer_info( + user_context, + allocator, + descriptor_set, + scalar_args_buffer, + uniform_buffer_count, + storage_buffer_count, + arg_sizes, + args, + arg_is_buffer, + &descriptor_buffer_info + ); + + if (error_code) { + error(user_context) << "Vulkan: Failed to update descriptor set ... get descriptor buffer info failed!\n"; + return halide_error_code_generic_error; + } + + BlockStorage::Config wds_config; + wds_config.minimum_capacity = storage_buffer_count + uniform_buffer_count; + wds_config.entry_size = sizeof(VkWriteDescriptorSet); + BlockStorage write_descriptor_set(user_context, wds_config); + + error_code = vk_get_write_descriptor_set_info( + user_context, + allocator, + &descriptor_buffer_info, + descriptor_set, + scalar_args_buffer, + &write_descriptor_set + ); + + if (error_code) { + error(user_context) << "Vulkan: Failed to update descriptor set ... write descriptor set creation failed!\n"; + return halide_error_code_generic_error; + } + // issue the update call to populate the descriptor set - vkUpdateDescriptorSets(allocator->current_device(), (uint32_t)write_descriptor_set.size(), (const VkWriteDescriptorSet *)write_descriptor_set.data(), 0, nullptr); + uint32_t update_size = (uint32_t)write_descriptor_set.size(); + const VkWriteDescriptorSet * update_data = (const VkWriteDescriptorSet *)write_descriptor_set.data(); + vkUpdateDescriptorSets(allocator->current_device(), update_size, update_data, 0, nullptr); + return halide_error_code_success; +} + +int vk_bind_descriptor_set(void *user_context, + VkCommandBuffer command_buffer, + VkPipelineLayout pipeline_layout, + VkDescriptorSet descriptor_set, + uint32_t descriptor_set_index) { +#ifdef DEBUG_RUNTIME + debug(user_context) + << " vk_bind_descriptor_set_to_compute_pipeline (user_context: " << user_context << ", " + << "command_buffer: " << (void *)command_buffer << ", " + << "pipeline_layout: " << (void *)pipeline_layout << ", " + << "descriptor_set: " << (void *)descriptor_set << ", " + << "descriptor_set_index: " << descriptor_set_index << ")\n"; +#endif + vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, + descriptor_set_index, 1, &descriptor_set, 0, nullptr); + return halide_error_code_success; +} + +int vk_push_descriptor_set(void *user_context, + VulkanMemoryAllocator *allocator, + VkCommandBuffer command_buffer, + VkPipeline compute_pipeline, + VkPipelineLayout pipeline_layout, + VkDescriptorSet descriptor_set, + VkBuffer *scalar_args_buffer, + size_t uniform_buffer_count, + size_t storage_buffer_count, + size_t arg_sizes[], + void *args[], + int8_t arg_is_buffer[]) { +#ifdef DEBUG_RUNTIME + debug(user_context) + << " vk_push_descriptor_set (user_context: " << user_context << ", " + << "allocator: " << (void *)allocator << ", " + << "scalar_args_buffer: " << (void *)scalar_args_buffer << ", " + << "uniform_buffer_count: " << (uint32_t)uniform_buffer_count << ", " + << "storage_buffer_count: " << (uint32_t)storage_buffer_count << ", " + << "descriptor_set: " << (void *)descriptor_set << ")\n"; +#endif + if (allocator == nullptr) { + error(user_context) << "Vulkan: Failed to create descriptor set ... invalid allocator pointer!\n"; + return halide_error_code_generic_error; + } + + if (allocator == nullptr) { + error(user_context) << "Vulkan: Failed to update descriptor set ... invalid allocator pointer!\n"; + return halide_error_code_generic_error; + } + + BlockStorage::Config dbi_config; + dbi_config.minimum_capacity = storage_buffer_count + uniform_buffer_count; + dbi_config.entry_size = sizeof(VkDescriptorBufferInfo); + BlockStorage descriptor_buffer_info(user_context, dbi_config); + + int error_code = vk_get_descriptor_buffer_info( + user_context, + allocator, + descriptor_set, + scalar_args_buffer, + uniform_buffer_count, + storage_buffer_count, + arg_sizes, + args, + arg_is_buffer, + &descriptor_buffer_info + ); + + if (error_code) { + error(user_context) << "Vulkan: Failed to update descriptor set ... get descriptor buffer info failed!\n"; + return halide_error_code_generic_error; + } + + BlockStorage::Config wds_config; + wds_config.minimum_capacity = storage_buffer_count + uniform_buffer_count; + wds_config.entry_size = sizeof(VkWriteDescriptorSet); + BlockStorage write_descriptor_set(user_context, wds_config); + + error_code = vk_get_write_descriptor_set_info( + user_context, + allocator, + &descriptor_buffer_info, + descriptor_set, + scalar_args_buffer, + &write_descriptor_set + ); + + if (error_code) { + error(user_context) << "Vulkan: Failed to update descriptor set ... write descriptor set creation failed!\n"; + return halide_error_code_generic_error; + } + + // issue the update call to populate the descriptor set + uint32_t update_size = (uint32_t)write_descriptor_set.size(); + const VkWriteDescriptorSet * update_data = (const VkWriteDescriptorSet *)write_descriptor_set.data(); + vkCmdPushDescriptorSetKHR(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, 0, update_size, update_data); return halide_error_code_success; } @@ -1665,7 +1886,7 @@ void vk_destroy_compilation_cache_entry(VulkanCompilationCacheEntry *cache_entry debug(user_context) << " vk_destroy_compilation_cache_entry (cache_entry: " << cache_entry << ")\n"; - if (cache_entry == nullptr) { + if ((cache_entry == nullptr) || (cache_entry->compiled_modules == nullptr)) { return; } From 96808550a30aec1abde3250715c922856ccbf337 Mon Sep 17 00:00:00 2001 From: Derek Gerstmann Date: Tue, 7 Jan 2025 14:34:51 -0800 Subject: [PATCH 3/7] Don't allocate a Descriptor Set if Push Descriptor Set is supported. --- src/runtime/vulkan.cpp | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/runtime/vulkan.cpp b/src/runtime/vulkan.cpp index 2d46f3eb401f..6d6477297499 100644 --- a/src/runtime/vulkan.cpp +++ b/src/runtime/vulkan.cpp @@ -1206,9 +1206,8 @@ WEAK int halide_vulkan_run(void *user_context, VulkanShaderBinding *entry_point_binding = (shader_module->shader_bindings + entry_point_index); - // 2c. Create a descriptor set - if (entry_point_binding->descriptor_set == VK_NULL_HANDLE) { - + // 2c. If Push Descriptor Set isn't supported, then allocate a descriptor set + if ((vkCmdPushDescriptorSetKHR == nullptr) && (entry_point_binding->descriptor_set == VK_NULL_HANDLE)) { // Construct a descriptor pool // // NOTE: while this could be re-used across multiple pipelines, we only know the storage requirements of this kernel's @@ -1258,11 +1257,13 @@ WEAK int halide_vulkan_run(void *user_context, } } - // 2f. Update buffer bindings for descriptor set - error_code = vk_update_descriptor_set(user_context, ctx.allocator, args_buffer, entry_point_binding->uniform_buffer_count, entry_point_binding->storage_buffer_count, arg_sizes, args, arg_is_buffer, entry_point_binding->descriptor_set); - if (error_code != halide_error_code_success) { - error(user_context) << "Vulkan: Failed to update descriptor set!\n"; - return error_code; + // 2f. If Push Descriptor Set isn't supported, then update the buffer bindings for the allocated descriptor set + if (vkCmdPushDescriptorSetKHR == nullptr) { + error_code = vk_update_descriptor_set(user_context, ctx.allocator, args_buffer, entry_point_binding->uniform_buffer_count, entry_point_binding->storage_buffer_count, arg_sizes, args, arg_is_buffer, entry_point_binding->descriptor_set); + if (error_code != halide_error_code_success) { + error(user_context) << "Vulkan: Failed to update descriptor set!\n"; + return error_code; + } } // 2b. Create the pipeline layout From 95cc293aa0dc9c38c95b4cdcf38ad9682dbe1a96 Mon Sep 17 00:00:00 2001 From: Derek Gerstmann Date: Tue, 7 Jan 2025 15:39:56 -0800 Subject: [PATCH 4/7] Only run tests with Vulkan if types are supported. --- src/Target.cpp | 15 +++++++++++++-- test/autoschedulers/mullapudi2016/histogram.cpp | 8 +++++++- test/correctness/bool_predicate_cast.cpp | 5 +++++ test/correctness/boundary_conditions.cpp | 4 ++++ test/correctness/convolution.cpp | 10 ++++++++-- .../convolution_multiple_kernels.cpp | 5 +++++ test/correctness/dilate3x3.cpp | 4 ++++ test/correctness/gpu_arg_types.cpp | 7 ++++++- test/correctness/gpu_dynamic_shared.cpp | 8 +++++++- test/correctness/gpu_reuse_shared_memory.cpp | 11 +++++++++++ test/correctness/gpu_transpose.cpp | 8 +++++++- test/correctness/interleave_rgb.cpp | 13 +++++++------ test/correctness/interleave_x.cpp | 5 +++++ test/correctness/logical.cpp | 12 ++++++------ test/correctness/median3x3.cpp | 8 +++++++- test/correctness/mul_div_mod.cpp | 15 +++++++++++++++ test/correctness/multiple_outputs.cpp | 4 +++- test/correctness/widening_reduction.cpp | 17 +++++++++++++---- 18 files changed, 133 insertions(+), 26 deletions(-) diff --git a/src/Target.cpp b/src/Target.cpp index bdab34b75d28..3efe57901558 100644 --- a/src/Target.cpp +++ b/src/Target.cpp @@ -1304,17 +1304,28 @@ int Target::get_arm_v8_lower_bound() const { } bool Target::supports_type(const Type &t) const { + if(has_feature(Vulkan)) { + if (t.is_float() && t.bits() == 64) { + return has_feature(Target::VulkanFloat64); + } else if (t.is_float() && t.bits() == 16) { + return has_feature(Target::VulkanFloat16); + } else if (t.is_int_or_uint() && t.bits() == 64) { + return has_feature(Target::VulkanInt64); + } else if (t.is_int_or_uint() && t.bits() == 16) { + return has_feature(Target::VulkanInt16); + } else if (t.is_int_or_uint() && t.bits() == 8) { + return has_feature(Target::VulkanInt8); + } + } if (t.bits() == 64) { if (t.is_float()) { return (!has_feature(Metal) && !has_feature(D3D12Compute) && (!has_feature(Target::OpenCL) || has_feature(Target::CLDoubles)) && - (!has_feature(Vulkan) || has_feature(Target::VulkanFloat64)) && !has_feature(WebGPU)); } else { return (!has_feature(Metal) && !has_feature(D3D12Compute) && - (!has_feature(Vulkan) || has_feature(Target::VulkanInt64)) && !has_feature(WebGPU)); } } diff --git a/test/autoschedulers/mullapudi2016/histogram.cpp b/test/autoschedulers/mullapudi2016/histogram.cpp index 3ef2ded28f0a..bcee5695c12c 100644 --- a/test/autoschedulers/mullapudi2016/histogram.cpp +++ b/test/autoschedulers/mullapudi2016/histogram.cpp @@ -120,11 +120,17 @@ double run_test(bool auto_schedule) { } int main(int argc, char **argv) { - if (get_jit_target_from_environment().arch == Target::WebAssembly) { + Halide::Target target = get_jit_target_from_environment(); + if (target.arch == Target::WebAssembly) { printf("[SKIP] Autoschedulers do not support WebAssembly.\n"); return 0; } + if (target.has_feature(Target::Vulkan) && (!target.has_feature(Target::VulkanInt8))) { + printf("[SKIP] Skipping test for Vulkan ... missing Int8 support!\n"); + return 0; + } + if (argc != 2) { fprintf(stderr, "Usage: %s \n", argv[0]); return 1; diff --git a/test/correctness/bool_predicate_cast.cpp b/test/correctness/bool_predicate_cast.cpp index 1043f329b76c..ff958eb52a3b 100644 --- a/test/correctness/bool_predicate_cast.cpp +++ b/test/correctness/bool_predicate_cast.cpp @@ -8,6 +8,11 @@ int main(int argc, char **argv) { // Test explicit casting of a predicate to an integer as part of a reduction // NOTE: triggers a convert_to_bool in Vulkan for a SelectOp Target target = get_jit_target_from_environment(); + if (target.has_feature(Target::Vulkan) && (!target.has_feature(Target::VulkanInt8))) { + printf("[SKIP] Skipping test for Vulkan ... missing Int8 support!\n"); + return 0; + } + Var x("x"), y("y"); Func input("input"); diff --git a/test/correctness/boundary_conditions.cpp b/test/correctness/boundary_conditions.cpp index 61422d130d01..02ef9ded6f2b 100644 --- a/test/correctness/boundary_conditions.cpp +++ b/test/correctness/boundary_conditions.cpp @@ -392,6 +392,10 @@ int main(int argc, char **argv) { // The wasm jit is very slow, so shorten this test here. vector_width_max = 8; } + if (target.has_feature(Target::Vulkan) && (!target.has_feature(Target::VulkanInt8))) { + printf("[SKIP] Skipping test for Vulkan ... missing Int8 support!\n"); + return 0; + } std::vector tasks; for (int vector_width = 1; vector_width <= vector_width_max; vector_width *= 2) { diff --git a/test/correctness/convolution.cpp b/test/correctness/convolution.cpp index 60689c33ec87..47040454f775 100644 --- a/test/correctness/convolution.cpp +++ b/test/correctness/convolution.cpp @@ -4,6 +4,14 @@ using namespace Halide; int main(int argc, char **argv) { + Target target = get_jit_target_from_environment(); + if (target.has_feature(Target::Vulkan)) { + if(!target.has_feature(Target::VulkanInt16)) { + printf("[SKIP] Skipping test for Vulkan ... missing Int16 support!\n"); + return 0; + } + } + // int W = 64*3, H = 64*3; const int W = 128, H = 48; @@ -64,8 +72,6 @@ int main(int argc, char **argv) { Func blur2("blur2"); blur2(x, y) = sum(tent(r.x, r.y) * input(x + r.x - 1, y + r.y - 1)); - Target target = get_jit_target_from_environment(); - if (target.has_gpu_feature()) { Var xi("xi"), yi("yi"); diff --git a/test/correctness/convolution_multiple_kernels.cpp b/test/correctness/convolution_multiple_kernels.cpp index 8a27787d36e9..831c6980ab05 100644 --- a/test/correctness/convolution_multiple_kernels.cpp +++ b/test/correctness/convolution_multiple_kernels.cpp @@ -39,6 +39,11 @@ int main(int argc, char **argv) { sum(cast(box2(r.x, r.y)) * input(x + r.x, y + r.y)); Target target = get_jit_target_from_environment(); + if (target.has_feature(Target::Vulkan) && (!target.has_feature(Target::VulkanInt16))) { + printf("[SKIP] Skipping test for Vulkan ... missing Int16 support!\n"); + return 0; + } + if (target.has_gpu_feature()) { Var xi("xi"), yi("yi"); blur.gpu_tile(x, y, xi, yi, 16, 16); diff --git a/test/correctness/dilate3x3.cpp b/test/correctness/dilate3x3.cpp index b41c8040192a..e747c82a285b 100644 --- a/test/correctness/dilate3x3.cpp +++ b/test/correctness/dilate3x3.cpp @@ -27,6 +27,10 @@ int main(int argc, char **argv) { // Schedule. Target target = get_jit_target_from_environment(); + if (target.has_feature(Target::Vulkan) && (!target.has_feature(Target::VulkanInt8))) { + printf("[SKIP] Skipping test for Vulkan ... missing Int8 support!\n"); + return 0; + } if (target.has_gpu_feature()) { Var xi("xi"), yi("yi"); dilate3x3.gpu_tile(x, y, xi, yi, 16, 16); diff --git a/test/correctness/gpu_arg_types.cpp b/test/correctness/gpu_arg_types.cpp index fe06f6849f45..214a2fdd5a39 100644 --- a/test/correctness/gpu_arg_types.cpp +++ b/test/correctness/gpu_arg_types.cpp @@ -3,10 +3,15 @@ using namespace Halide; int main(int argc, char *argv[]) { - if (!get_jit_target_from_environment().has_gpu_feature()) { + Halide::Target target = get_jit_target_from_environment(); + if (!target.has_gpu_feature()) { printf("[SKIP] No GPU target enabled.\n"); return 0; } + if (target.has_feature(Target::Vulkan) && (!target.has_feature(Target::VulkanInt16))) { + printf("[SKIP] Skipping test for Vulkan ... missing Int16 support!\n"); + return 0; + } Func f, g; Var x, y, tx, ty; diff --git a/test/correctness/gpu_dynamic_shared.cpp b/test/correctness/gpu_dynamic_shared.cpp index f98636ea8905..3ea7f2b085e7 100644 --- a/test/correctness/gpu_dynamic_shared.cpp +++ b/test/correctness/gpu_dynamic_shared.cpp @@ -11,14 +11,20 @@ int main(int argc, char **argv) { } if (t.has_feature(Target::Vulkan)) { + if (!t.has_feature(Target::VulkanV13)) { + printf("[SKIP] Skipping test for Vulkan ... missing 1.3 feature in target!\n"); + return 0; + } + const auto *interface = get_device_interface_for_device_api(DeviceAPI::Vulkan); assert(interface->compute_capability != nullptr); int major, minor; int err = interface->compute_capability(nullptr, &major, &minor); if (err != 0 || (major == 1 && minor < 3)) { - printf("[SKIP] Vulkan %d.%d is less than required 1.3.\n", major, minor); + printf("[SKIP] Vulkan runtime support %d.%d is less than required 1.3.\n", major, minor); return 0; } + if ((t.os == Target::IOS) || (t.os == Target::OSX)) { printf("[SKIP] Skipping test for Vulkan on iOS/OSX (MoltenVK doesn't support dynamic LocalSizeId yet)!\n"); return 0; diff --git a/test/correctness/gpu_reuse_shared_memory.cpp b/test/correctness/gpu_reuse_shared_memory.cpp index 37e932d78273..96ee2ff76e3b 100644 --- a/test/correctness/gpu_reuse_shared_memory.cpp +++ b/test/correctness/gpu_reuse_shared_memory.cpp @@ -172,6 +172,17 @@ int main(int argc, char **argv) { return 0; } + if (t.has_feature(Target::Vulkan)){ + if (!t.has_feature(Target::VulkanInt8)) { + printf("[SKIP] Skipping test for Vulkan ... missing Int8 support!\n"); + return 0; + } + if (!t.has_feature(Target::VulkanInt16)) { + printf("[SKIP] Skipping test for Vulkan ... missing Int16 support!\n"); + return 0; + } + } + for (auto memory_type : {MemoryType::GPUShared, MemoryType::Heap}) { printf("Running multi thread type test\n"); if (multi_thread_type_test(memory_type) != 0) { diff --git a/test/correctness/gpu_transpose.cpp b/test/correctness/gpu_transpose.cpp index 781deb004fef..503b099862f1 100644 --- a/test/correctness/gpu_transpose.cpp +++ b/test/correctness/gpu_transpose.cpp @@ -4,11 +4,17 @@ using namespace Halide; int main(int argc, char **argv) { - if (!get_jit_target_from_environment().has_gpu_feature()) { + Target t = get_jit_target_from_environment(); + if (!t.has_gpu_feature()) { printf("[SKIP] No GPU target enabled.\n"); return 0; } + if (t.has_feature(Target::Vulkan) && (!t.has_feature(Target::VulkanInt8))) { + printf("[SKIP] Skipping test for Vulkan ... missing Int8 support!\n"); + return 0; + } + ImageParam in(UInt(8), 2); Var x, y; diff --git a/test/correctness/interleave_rgb.cpp b/test/correctness/interleave_rgb.cpp index 3a679a239035..d4f08d7b44cc 100644 --- a/test/correctness/interleave_rgb.cpp +++ b/test/correctness/interleave_rgb.cpp @@ -103,14 +103,15 @@ bool test_deinterleave(int x_stride) { } int main(int argc, char **argv) { + Target target = get_jit_target_from_environment(); for (int x_stride : {3, 4}) { - if (!test_interleave(x_stride)) return 1; - if (!test_interleave(x_stride)) return 1; - if (!test_interleave(x_stride)) return 1; + if (target.supports_type(halide_type_of()) && !test_interleave(x_stride)) return 1; + if (target.supports_type(halide_type_of()) && !test_interleave(x_stride)) return 1; + if (target.supports_type(halide_type_of()) && !test_interleave(x_stride)) return 1; - if (!test_deinterleave(x_stride)) return 1; - if (!test_deinterleave(x_stride)) return 1; - if (!test_deinterleave(x_stride)) return 1; + if (target.supports_type(halide_type_of()) && !test_deinterleave(x_stride)) return 1; + if (target.supports_type(halide_type_of()) && !test_deinterleave(x_stride)) return 1; + if (target.supports_type(halide_type_of()) && !test_deinterleave(x_stride)) return 1; } printf("Success!\n"); return 0; diff --git a/test/correctness/interleave_x.cpp b/test/correctness/interleave_x.cpp index 1120390cac1e..4e5361123727 100644 --- a/test/correctness/interleave_x.cpp +++ b/test/correctness/interleave_x.cpp @@ -11,6 +11,11 @@ int main(int argc, char **argv) { interleaved(x, y) = select(x % 2 == 0, cast(3), cast(7)); Target target = get_jit_target_from_environment(); + if (target.has_feature(Target::Vulkan) && (!target.has_feature(Target::VulkanInt16))) { + printf("[SKIP] Skipping test for Vulkan ... missing support for Int16!\n"); + return 0; + } + if (target.has_gpu_feature()) { Var tx("tx"), ty("ty"); interleaved.gpu_tile(x, y, tx, ty, 16, 16); diff --git a/test/correctness/logical.cpp b/test/correctness/logical.cpp index 1bd134bc37f4..9f0d18289211 100644 --- a/test/correctness/logical.cpp +++ b/test/correctness/logical.cpp @@ -13,6 +13,12 @@ Expr u16(Expr a) { int main(int argc, char **argv) { + Target target = get_jit_target_from_environment(); + if (target.has_feature(Target::Vulkan) && (!target.has_feature(Target::VulkanInt8))) { + printf("[SKIP] Skipping test for Vulkan ... missing Int8 support!\n"); + return 0; + } + Buffer input(128, 64); for (int y = 0; y < input.height(); y++) { @@ -28,7 +34,6 @@ int main(int argc, char **argv) { ((input(x, y) > 40) && (!(input(x, y) > 50))), u8(255), u8(0)); - Target target = get_jit_target_from_environment(); if (target.has_gpu_feature()) { f.gpu_tile(x, y, xi, yi, 16, 16); f.vectorize(xi, 4); @@ -62,7 +67,6 @@ int main(int argc, char **argv) { ((input(x, y) > 40) && (!common_cond)), u8(255), u8(0)); - Target target = get_jit_target_from_environment(); if (target.has_gpu_feature()) { f.gpu_tile(x, y, xi, yi, 16, 16); f.vectorize(xi, 4); @@ -93,8 +97,6 @@ int main(int argc, char **argv) { Func f("f"); f(x, y) = select(x < 10 || x > 20 || y < 10 || y > 20, 0, input(x, y)); - Target target = get_jit_target_from_environment(); - if (target.has_gpu_feature()) { f.gpu_tile(x, y, xi, yi, 16, 16); f.vectorize(xi, 4); @@ -124,7 +126,6 @@ int main(int argc, char **argv) { Expr ten = 10; f(x, y) = select(input(x, y) > ten, u8(255), u8(0)); - Target target = get_jit_target_from_environment(); if (target.has_gpu_feature()) { f.gpu_tile(x, y, xi, yi, 16, 16); f.vectorize(xi, 4); @@ -177,7 +178,6 @@ int main(int argc, char **argv) { cpu.compute_root(); gpu.compute_root(); - Target target = get_jit_target_from_environment(); if (target.has_feature(Target::OpenCL) && n == 16 && w == 32) { // Workaround for https://github.com/halide/Halide/issues/2477 printf("Skipping uint%d -> uint%d for OpenCL\n", n, w); diff --git a/test/correctness/median3x3.cpp b/test/correctness/median3x3.cpp index 9129ea0b6418..7175cd657962 100644 --- a/test/correctness/median3x3.cpp +++ b/test/correctness/median3x3.cpp @@ -13,6 +13,13 @@ Expr mid3(Expr a, Expr b, Expr c) { } int main(int arch, char **argv) { + + Target target = get_jit_target_from_environment(); + if (target.has_feature(Target::Vulkan) && (!target.has_feature(Target::VulkanInt8))) { + printf("[SKIP] Skipping test for Vulkan ... missing Int8 support!\n"); + return 0; + } + const int W = 256, H = 256; Buffer in(W, H); // Set up the input. @@ -43,7 +50,6 @@ int main(int arch, char **argv) { median3x3(x, y) = mid3(min_max(x, y), max_min(x, y), mid_mid(x, y)); // Schedule. - Target target = get_jit_target_from_environment(); if (target.has_gpu_feature()) { Var xi("xi"), yi("yi"); median3x3.gpu_tile(x, y, xi, yi, 16, 16); diff --git a/test/correctness/mul_div_mod.cpp b/test/correctness/mul_div_mod.cpp index 8eca8141bba2..91a27766468f 100644 --- a/test/correctness/mul_div_mod.cpp +++ b/test/correctness/mul_div_mod.cpp @@ -540,6 +540,21 @@ void add_test_div_mod(int vector_width, ScheduleVariant scheduling, Target targe int main(int argc, char **argv) { Target target = get_jit_target_from_environment(); + if (target.has_feature(Target::Vulkan)){ + if (!target.has_feature(Target::VulkanInt8)) { + printf("[SKIP] Skipping test for Vulkan ... missing Int8 support!\n"); + return 0; + } + if (!target.has_feature(Target::VulkanInt16)) { + printf("[SKIP] Skipping test for Vulkan ... missing Int16 support!\n"); + return 0; + } + if (!target.has_feature(Target::VulkanInt64)) { + printf("[SKIP] Skipping test for Vulkan ... missing Int64 support!\n"); + return 0; + } + } + ScheduleVariant scheduling = CPU; if (target.has_gpu_feature()) { scheduling = TiledGPU; diff --git a/test/correctness/multiple_outputs.cpp b/test/correctness/multiple_outputs.cpp index d630cfdf082a..432c0cf4bd6d 100644 --- a/test/correctness/multiple_outputs.cpp +++ b/test/correctness/multiple_outputs.cpp @@ -4,7 +4,8 @@ using namespace Halide; int main(int argc, char **argv) { - const bool use_gpu = get_jit_target_from_environment().has_gpu_feature(); + Target target = get_jit_target_from_environment(); + const bool use_gpu = target.has_gpu_feature(); // An internal Func that produces multiple values. { @@ -93,6 +94,7 @@ int main(int argc, char **argv) { } // Now multiple output Funcs via inferred Realization + if(target.supports_type(halide_type_of()) && target.supports_type(halide_type_of())) { Func f, g; Var x, xi; diff --git a/test/correctness/widening_reduction.cpp b/test/correctness/widening_reduction.cpp index 43b79486e03a..9cdafdbebcba 100644 --- a/test/correctness/widening_reduction.cpp +++ b/test/correctness/widening_reduction.cpp @@ -9,11 +9,20 @@ using namespace Halide::Internal; int main(int arch, char **argv) { Halide::Target target = get_jit_target_from_environment(); - if (target.has_feature(Target::Vulkan) && ((target.os == Target::IOS) || target.os == Target::OSX)) { - printf("[SKIP] Skipping test for Vulkan on iOS/OSX (MoltenVK fails to convert max/min intrinsics correctly)!\n"); - return 0; + if (target.has_feature(Target::Vulkan)) { + if(!target.has_feature(Target::VulkanInt8)) { + printf("[SKIP] Skipping test for Vulkan ... missing Int8 support!\n"); + return 0; + } + if(!target.has_feature(Target::VulkanInt16)) { + printf("[SKIP] Skipping test for Vulkan ... missing Int16 support!\n"); + return 0; + } + if((target.os == Target::IOS) || (target.os == Target::OSX)) { + printf("[SKIP] Skipping test for Vulkan on iOS/OSX (MoltenVK fails to convert max/min intrinsics correctly)!\n"); + return 0; + } } - const int W = 256, H = 256; Buffer in(W, H); From 505ae8a555c5f9400f8e3839d31e074bd8acc6af Mon Sep 17 00:00:00 2001 From: Derek Gerstmann Date: Tue, 7 Jan 2025 15:44:10 -0800 Subject: [PATCH 5/7] Clang format pass --- src/Target.cpp | 2 +- src/runtime/internal/region_allocator.h | 14 +++--- src/runtime/vulkan.cpp | 4 +- src/runtime/vulkan_internal.h | 22 ++++----- src/runtime/vulkan_resources.h | 48 +++++++++----------- test/correctness/convolution.cpp | 3 +- test/correctness/gpu_reuse_shared_memory.cpp | 2 +- test/correctness/interleave_rgb.cpp | 10 ++-- test/correctness/mul_div_mod.cpp | 2 +- test/correctness/multiple_outputs.cpp | 3 +- test/correctness/widening_reduction.cpp | 6 +-- 11 files changed, 55 insertions(+), 61 deletions(-) diff --git a/src/Target.cpp b/src/Target.cpp index 3efe57901558..7744b0878bd8 100644 --- a/src/Target.cpp +++ b/src/Target.cpp @@ -1304,7 +1304,7 @@ int Target::get_arm_v8_lower_bound() const { } bool Target::supports_type(const Type &t) const { - if(has_feature(Vulkan)) { + if (has_feature(Vulkan)) { if (t.is_float() && t.bits() == 64) { return has_feature(Target::VulkanFloat64); } else if (t.is_float() && t.bits() == 16) { diff --git a/src/runtime/internal/region_allocator.h b/src/runtime/internal/region_allocator.h index e6832fabaac8..e52eb71171de 100644 --- a/src/runtime/internal/region_allocator.h +++ b/src/runtime/internal/region_allocator.h @@ -443,12 +443,12 @@ BlockRegion *RegionAllocator::coalesce_block_regions(void *user_context, BlockRe return block_region; } -bool RegionAllocator::can_split(void* user_context, const BlockRegion *block_region, const MemoryRequest &split_request) const { - +bool RegionAllocator::can_split(void *user_context, const BlockRegion *block_region, const MemoryRequest &split_request) const { + // See if we can actually split the block region and create empty space big enough if (block_region && (block_region->memory.size > split_request.size) && (block_region->usage_count == 0)) { - // We can only split if there's still room left after conforming the allocation request since the + // We can only split if there's still room left after conforming the allocation request since the // conform method may actually grow the requested size to accomodate alignment constraints MemoryRequest test_request = split_request; test_request.size = block_region->memory.size - test_request.size; @@ -461,7 +461,7 @@ bool RegionAllocator::can_split(void* user_context, const BlockRegion *block_reg return false; } - if((block_region->memory.size - test_request.size) > 0){ + if ((block_region->memory.size - test_request.size) > 0) { return true; } } @@ -509,9 +509,9 @@ BlockRegion *RegionAllocator::split_block_region(void *user_context, BlockRegion #ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << "RegionAllocator: Split block region into ...\n\t" - << "existing region (ptr=" << (void*)block_region << " prev_ptr=" << block_region->prev_ptr << " next_ptr=" << block_region->next_ptr << " offset=" << (int32_t)block_region->memory.offset << " size=" << (int32_t)(block_region->memory.size) << " bytes)\n\t" - << "empty region (ptr=" << (void*)empty_region << " prev_ptr=" << empty_region->prev_ptr << " next_ptr=" << empty_region->next_ptr << " offset=" << (int32_t)empty_region->memory.offset << " size=" << (int32_t)(empty_region->memory.size) << " bytes)\n"; -#endif + << "existing region (ptr=" << (void *)block_region << " prev_ptr=" << block_region->prev_ptr << " next_ptr=" << block_region->next_ptr << " offset=" << (int32_t)block_region->memory.offset << " size=" << (int32_t)(block_region->memory.size) << " bytes)\n\t" + << "empty region (ptr=" << (void *)empty_region << " prev_ptr=" << empty_region->prev_ptr << " next_ptr=" << empty_region->next_ptr << " offset=" << (int32_t)empty_region->memory.offset << " size=" << (int32_t)(empty_region->memory.size) << " bytes)\n"; +#endif return empty_region; } diff --git a/src/runtime/vulkan.cpp b/src/runtime/vulkan.cpp index 6d6477297499..3dcd9eced407 100644 --- a/src/runtime/vulkan.cpp +++ b/src/runtime/vulkan.cpp @@ -1299,7 +1299,7 @@ WEAK int halide_vulkan_run(void *user_context, if (error_code != halide_error_code_success) { error(user_context) << "Vulkan: Failed to bind compute pipeline to command buffer for dispatch call!\n"; return error_code; - } + } if (vkCmdPushDescriptorSetKHR != nullptr) { error_code = vk_push_descriptor_set(user_context, ctx.allocator, cmds.command_buffer, entry_point_binding->compute_pipeline, shader_module->pipeline_layout, entry_point_binding->descriptor_set, args_buffer, entry_point_binding->uniform_buffer_count, entry_point_binding->storage_buffer_count, arg_sizes, args, arg_is_buffer); @@ -1312,7 +1312,7 @@ WEAK int halide_vulkan_run(void *user_context, if (error_code != halide_error_code_success) { error(user_context) << "Vulkan: Failed to bind descriptor set to command buffer for dispatch call!\n"; return error_code; - } + } } error_code = vk_fill_command_buffer_with_dispatch_call(user_context, diff --git a/src/runtime/vulkan_internal.h b/src/runtime/vulkan_internal.h index a619789db2d5..2086542a92b6 100644 --- a/src/runtime/vulkan_internal.h +++ b/src/runtime/vulkan_internal.h @@ -181,15 +181,15 @@ int vk_create_descriptor_set(void *user_context, VkDescriptorSet *descriptor_set); int vk_get_descriptor_buffer_info(void *user_context, - VulkanMemoryAllocator *allocator, - VkDescriptorSet descriptor_set, - VkBuffer *scalar_args_buffer, - size_t uniform_buffer_count, - size_t storage_buffer_count, - size_t arg_sizes[], - void *args[], - int8_t arg_is_buffer[], - BlockStorage *descriptor_buffer_info_result); + VulkanMemoryAllocator *allocator, + VkDescriptorSet descriptor_set, + VkBuffer *scalar_args_buffer, + size_t uniform_buffer_count, + size_t storage_buffer_count, + size_t arg_sizes[], + void *args[], + int8_t arg_is_buffer[], + BlockStorage *descriptor_buffer_info_result); int vk_get_write_descriptor_set_info(void *user_context, VulkanMemoryAllocator *allocator, @@ -213,7 +213,7 @@ int vk_bind_descriptor_set(void *user_context, VkPipelineLayout pipeline_layout, VkDescriptorSet descriptor_set, uint32_t descriptor_set_index); - + int vk_push_descriptor_set(void *user_context, VulkanMemoryAllocator *allocator, VkCommandBuffer command_buffer, @@ -225,7 +225,7 @@ int vk_push_descriptor_set(void *user_context, size_t storage_buffer_count, size_t arg_sizes[], void *args[], - int8_t arg_is_buffer[]); + int8_t arg_is_buffer[]); // -- Pipeline Layout int vk_create_pipeline_layout(void *user_context, diff --git a/src/runtime/vulkan_resources.h b/src/runtime/vulkan_resources.h index bc18cf707b06..889bddfb8575 100644 --- a/src/runtime/vulkan_resources.h +++ b/src/runtime/vulkan_resources.h @@ -270,9 +270,9 @@ int vk_fill_command_buffer_with_dispatch_call(void *user_context, << "blocks: " << blocksX << ", " << blocksY << ", " << blocksZ << ")\n"; #endif -// vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, compute_pipeline); -// vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, -// descriptor_set_index, 1, &descriptor_set, 0, nullptr); + // vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, compute_pipeline); + // vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, + // descriptor_set_index, 1, &descriptor_set, 0, nullptr); vkCmdDispatch(command_buffer, blocksX, blocksY, blocksZ); return halide_error_code_success; } @@ -443,8 +443,8 @@ int vk_create_descriptor_set_layout(void *user_context, for (uint32_t n = 0; n < uniform_buffer_count; ++n) { // Params will be passed as UNIFORM_BUFFERs VkDescriptorSetLayoutBinding uniform_buffer_layout_binding{}; - uniform_buffer_layout_binding.binding = (uint32_t)layout_bindings.size(), // binding index - uniform_buffer_layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + uniform_buffer_layout_binding.binding = (uint32_t)layout_bindings.size(), // binding index + uniform_buffer_layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; uniform_buffer_layout_binding.descriptorCount = 1; uniform_buffer_layout_binding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; #ifdef DEBUG_RUNTIME @@ -458,8 +458,8 @@ int vk_create_descriptor_set_layout(void *user_context, for (uint32_t n = 0; n < storage_buffer_count; ++n) { // halide buffers will be passed as STORAGE_BUFFERs VkDescriptorSetLayoutBinding storage_buffer_layout_binding{}; - storage_buffer_layout_binding.binding = (uint32_t)layout_bindings.size(), // binding index - storage_buffer_layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + storage_buffer_layout_binding.binding = (uint32_t)layout_bindings.size(), // binding index + storage_buffer_layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; storage_buffer_layout_binding.descriptorCount = 1; storage_buffer_layout_binding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; #ifdef DEBUG_RUNTIME @@ -544,15 +544,15 @@ int vk_create_descriptor_set(void *user_context, } int vk_get_descriptor_buffer_info(void *user_context, - VulkanMemoryAllocator *allocator, - VkDescriptorSet descriptor_set, - VkBuffer *scalar_args_buffer, - size_t uniform_buffer_count, - size_t storage_buffer_count, - size_t arg_sizes[], - void *args[], - int8_t arg_is_buffer[], - BlockStorage *descriptor_buffer_info_result) { + VulkanMemoryAllocator *allocator, + VkDescriptorSet descriptor_set, + VkBuffer *scalar_args_buffer, + size_t uniform_buffer_count, + size_t storage_buffer_count, + size_t arg_sizes[], + void *args[], + int8_t arg_is_buffer[], + BlockStorage *descriptor_buffer_info_result) { #ifdef DEBUG_RUNTIME debug(user_context) << " vk_get_descriptor_buffer_info (user_context: " << user_context << ", " @@ -733,8 +733,7 @@ int vk_update_descriptor_set(void *user_context, arg_sizes, args, arg_is_buffer, - &descriptor_buffer_info - ); + &descriptor_buffer_info); if (error_code) { error(user_context) << "Vulkan: Failed to update descriptor set ... get descriptor buffer info failed!\n"; @@ -752,8 +751,7 @@ int vk_update_descriptor_set(void *user_context, &descriptor_buffer_info, descriptor_set, scalar_args_buffer, - &write_descriptor_set - ); + &write_descriptor_set); if (error_code) { error(user_context) << "Vulkan: Failed to update descriptor set ... write descriptor set creation failed!\n"; @@ -762,7 +760,7 @@ int vk_update_descriptor_set(void *user_context, // issue the update call to populate the descriptor set uint32_t update_size = (uint32_t)write_descriptor_set.size(); - const VkWriteDescriptorSet * update_data = (const VkWriteDescriptorSet *)write_descriptor_set.data(); + const VkWriteDescriptorSet *update_data = (const VkWriteDescriptorSet *)write_descriptor_set.data(); vkUpdateDescriptorSets(allocator->current_device(), update_size, update_data, 0, nullptr); return halide_error_code_success; } @@ -831,8 +829,7 @@ int vk_push_descriptor_set(void *user_context, arg_sizes, args, arg_is_buffer, - &descriptor_buffer_info - ); + &descriptor_buffer_info); if (error_code) { error(user_context) << "Vulkan: Failed to update descriptor set ... get descriptor buffer info failed!\n"; @@ -850,8 +847,7 @@ int vk_push_descriptor_set(void *user_context, &descriptor_buffer_info, descriptor_set, scalar_args_buffer, - &write_descriptor_set - ); + &write_descriptor_set); if (error_code) { error(user_context) << "Vulkan: Failed to update descriptor set ... write descriptor set creation failed!\n"; @@ -860,7 +856,7 @@ int vk_push_descriptor_set(void *user_context, // issue the update call to populate the descriptor set uint32_t update_size = (uint32_t)write_descriptor_set.size(); - const VkWriteDescriptorSet * update_data = (const VkWriteDescriptorSet *)write_descriptor_set.data(); + const VkWriteDescriptorSet *update_data = (const VkWriteDescriptorSet *)write_descriptor_set.data(); vkCmdPushDescriptorSetKHR(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, 0, update_size, update_data); return halide_error_code_success; } diff --git a/test/correctness/convolution.cpp b/test/correctness/convolution.cpp index 47040454f775..72ad5af5ee33 100644 --- a/test/correctness/convolution.cpp +++ b/test/correctness/convolution.cpp @@ -6,13 +6,12 @@ using namespace Halide; int main(int argc, char **argv) { Target target = get_jit_target_from_environment(); if (target.has_feature(Target::Vulkan)) { - if(!target.has_feature(Target::VulkanInt16)) { + if (!target.has_feature(Target::VulkanInt16)) { printf("[SKIP] Skipping test for Vulkan ... missing Int16 support!\n"); return 0; } } - // int W = 64*3, H = 64*3; const int W = 128, H = 48; diff --git a/test/correctness/gpu_reuse_shared_memory.cpp b/test/correctness/gpu_reuse_shared_memory.cpp index 96ee2ff76e3b..1a8977f75a76 100644 --- a/test/correctness/gpu_reuse_shared_memory.cpp +++ b/test/correctness/gpu_reuse_shared_memory.cpp @@ -172,7 +172,7 @@ int main(int argc, char **argv) { return 0; } - if (t.has_feature(Target::Vulkan)){ + if (t.has_feature(Target::Vulkan)) { if (!t.has_feature(Target::VulkanInt8)) { printf("[SKIP] Skipping test for Vulkan ... missing Int8 support!\n"); return 0; diff --git a/test/correctness/interleave_rgb.cpp b/test/correctness/interleave_rgb.cpp index d4f08d7b44cc..d7d045ed4125 100644 --- a/test/correctness/interleave_rgb.cpp +++ b/test/correctness/interleave_rgb.cpp @@ -106,12 +106,12 @@ int main(int argc, char **argv) { Target target = get_jit_target_from_environment(); for (int x_stride : {3, 4}) { if (target.supports_type(halide_type_of()) && !test_interleave(x_stride)) return 1; - if (target.supports_type(halide_type_of()) && !test_interleave(x_stride)) return 1; - if (target.supports_type(halide_type_of()) && !test_interleave(x_stride)) return 1; + if (target.supports_type(halide_type_of()) && !test_interleave(x_stride)) return 1; + if (target.supports_type(halide_type_of()) && !test_interleave(x_stride)) return 1; - if (target.supports_type(halide_type_of()) && !test_deinterleave(x_stride)) return 1; - if (target.supports_type(halide_type_of()) && !test_deinterleave(x_stride)) return 1; - if (target.supports_type(halide_type_of()) && !test_deinterleave(x_stride)) return 1; + if (target.supports_type(halide_type_of()) && !test_deinterleave(x_stride)) return 1; + if (target.supports_type(halide_type_of()) && !test_deinterleave(x_stride)) return 1; + if (target.supports_type(halide_type_of()) && !test_deinterleave(x_stride)) return 1; } printf("Success!\n"); return 0; diff --git a/test/correctness/mul_div_mod.cpp b/test/correctness/mul_div_mod.cpp index 91a27766468f..7368ef462e0b 100644 --- a/test/correctness/mul_div_mod.cpp +++ b/test/correctness/mul_div_mod.cpp @@ -540,7 +540,7 @@ void add_test_div_mod(int vector_width, ScheduleVariant scheduling, Target targe int main(int argc, char **argv) { Target target = get_jit_target_from_environment(); - if (target.has_feature(Target::Vulkan)){ + if (target.has_feature(Target::Vulkan)) { if (!target.has_feature(Target::VulkanInt8)) { printf("[SKIP] Skipping test for Vulkan ... missing Int8 support!\n"); return 0; diff --git a/test/correctness/multiple_outputs.cpp b/test/correctness/multiple_outputs.cpp index 432c0cf4bd6d..d42204bbd250 100644 --- a/test/correctness/multiple_outputs.cpp +++ b/test/correctness/multiple_outputs.cpp @@ -94,8 +94,7 @@ int main(int argc, char **argv) { } // Now multiple output Funcs via inferred Realization - if(target.supports_type(halide_type_of()) && target.supports_type(halide_type_of())) - { + if (target.supports_type(halide_type_of()) && target.supports_type(halide_type_of())) { Func f, g; Var x, xi; f(x) = cast(100 * x); diff --git a/test/correctness/widening_reduction.cpp b/test/correctness/widening_reduction.cpp index 9cdafdbebcba..ad720afaa7bd 100644 --- a/test/correctness/widening_reduction.cpp +++ b/test/correctness/widening_reduction.cpp @@ -10,15 +10,15 @@ int main(int arch, char **argv) { Halide::Target target = get_jit_target_from_environment(); if (target.has_feature(Target::Vulkan)) { - if(!target.has_feature(Target::VulkanInt8)) { + if (!target.has_feature(Target::VulkanInt8)) { printf("[SKIP] Skipping test for Vulkan ... missing Int8 support!\n"); return 0; } - if(!target.has_feature(Target::VulkanInt16)) { + if (!target.has_feature(Target::VulkanInt16)) { printf("[SKIP] Skipping test for Vulkan ... missing Int16 support!\n"); return 0; } - if((target.os == Target::IOS) || (target.os == Target::OSX)) { + if ((target.os == Target::IOS) || (target.os == Target::OSX)) { printf("[SKIP] Skipping test for Vulkan on iOS/OSX (MoltenVK fails to convert max/min intrinsics correctly)!\n"); return 0; } From f57ea54f95a996b937e9840c6f6a99c03731e12b Mon Sep 17 00:00:00 2001 From: Derek Gerstmann Date: Wed, 8 Jan 2025 13:07:50 -0800 Subject: [PATCH 6/7] Clang tidy fixes. --- src/runtime/vulkan_resources.h | 55 +++++++++++++++++----------------- 1 file changed, 27 insertions(+), 28 deletions(-) diff --git a/src/runtime/vulkan_resources.h b/src/runtime/vulkan_resources.h index 889bddfb8575..06176cdc1c9e 100644 --- a/src/runtime/vulkan_resources.h +++ b/src/runtime/vulkan_resources.h @@ -270,9 +270,9 @@ int vk_fill_command_buffer_with_dispatch_call(void *user_context, << "blocks: " << blocksX << ", " << blocksY << ", " << blocksZ << ")\n"; #endif - // vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, compute_pipeline); - // vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, - // descriptor_set_index, 1, &descriptor_set, 0, nullptr); +// vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, compute_pipeline); +// vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, +// descriptor_set_index, 1, &descriptor_set, 0, nullptr); vkCmdDispatch(command_buffer, blocksX, blocksY, blocksZ); return halide_error_code_success; } @@ -443,8 +443,8 @@ int vk_create_descriptor_set_layout(void *user_context, for (uint32_t n = 0; n < uniform_buffer_count; ++n) { // Params will be passed as UNIFORM_BUFFERs VkDescriptorSetLayoutBinding uniform_buffer_layout_binding{}; - uniform_buffer_layout_binding.binding = (uint32_t)layout_bindings.size(), // binding index - uniform_buffer_layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + uniform_buffer_layout_binding.binding = (uint32_t)layout_bindings.size(), // binding index + uniform_buffer_layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; uniform_buffer_layout_binding.descriptorCount = 1; uniform_buffer_layout_binding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; #ifdef DEBUG_RUNTIME @@ -458,8 +458,8 @@ int vk_create_descriptor_set_layout(void *user_context, for (uint32_t n = 0; n < storage_buffer_count; ++n) { // halide buffers will be passed as STORAGE_BUFFERs VkDescriptorSetLayoutBinding storage_buffer_layout_binding{}; - storage_buffer_layout_binding.binding = (uint32_t)layout_bindings.size(), // binding index - storage_buffer_layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + storage_buffer_layout_binding.binding = (uint32_t)layout_bindings.size(), // binding index + storage_buffer_layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; storage_buffer_layout_binding.descriptorCount = 1; storage_buffer_layout_binding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; #ifdef DEBUG_RUNTIME @@ -544,15 +544,15 @@ int vk_create_descriptor_set(void *user_context, } int vk_get_descriptor_buffer_info(void *user_context, - VulkanMemoryAllocator *allocator, - VkDescriptorSet descriptor_set, - VkBuffer *scalar_args_buffer, - size_t uniform_buffer_count, - size_t storage_buffer_count, - size_t arg_sizes[], - void *args[], - int8_t arg_is_buffer[], - BlockStorage *descriptor_buffer_info_result) { + VulkanMemoryAllocator *allocator, + VkDescriptorSet descriptor_set, + VkBuffer *scalar_args_buffer, + size_t uniform_buffer_count, + size_t storage_buffer_count, + size_t arg_sizes[], + void *args[], + int8_t arg_is_buffer[], + BlockStorage *descriptor_buffer_info_result) { #ifdef DEBUG_RUNTIME debug(user_context) << " vk_get_descriptor_buffer_info (user_context: " << user_context << ", " @@ -667,7 +667,7 @@ int vk_get_write_descriptor_set_info(void *user_context, // First binding will be the scalar args buffer (if needed) passed as a UNIFORM BUFFER size_t index = 0; - if (scalar_args_buffer != nullptr && descriptor_buffer_info->size()) { + if ((scalar_args_buffer != nullptr) && (!descriptor_buffer_info->empty())) { VkWriteDescriptorSet uniform_buffer_write_entry{}; uniform_buffer_write_entry.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; uniform_buffer_write_entry.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; @@ -733,7 +733,8 @@ int vk_update_descriptor_set(void *user_context, arg_sizes, args, arg_is_buffer, - &descriptor_buffer_info); + &descriptor_buffer_info + ); if (error_code) { error(user_context) << "Vulkan: Failed to update descriptor set ... get descriptor buffer info failed!\n"; @@ -751,7 +752,8 @@ int vk_update_descriptor_set(void *user_context, &descriptor_buffer_info, descriptor_set, scalar_args_buffer, - &write_descriptor_set); + &write_descriptor_set + ); if (error_code) { error(user_context) << "Vulkan: Failed to update descriptor set ... write descriptor set creation failed!\n"; @@ -760,7 +762,7 @@ int vk_update_descriptor_set(void *user_context, // issue the update call to populate the descriptor set uint32_t update_size = (uint32_t)write_descriptor_set.size(); - const VkWriteDescriptorSet *update_data = (const VkWriteDescriptorSet *)write_descriptor_set.data(); + const VkWriteDescriptorSet * update_data = (const VkWriteDescriptorSet *)write_descriptor_set.data(); vkUpdateDescriptorSets(allocator->current_device(), update_size, update_data, 0, nullptr); return halide_error_code_success; } @@ -809,11 +811,6 @@ int vk_push_descriptor_set(void *user_context, return halide_error_code_generic_error; } - if (allocator == nullptr) { - error(user_context) << "Vulkan: Failed to update descriptor set ... invalid allocator pointer!\n"; - return halide_error_code_generic_error; - } - BlockStorage::Config dbi_config; dbi_config.minimum_capacity = storage_buffer_count + uniform_buffer_count; dbi_config.entry_size = sizeof(VkDescriptorBufferInfo); @@ -829,7 +826,8 @@ int vk_push_descriptor_set(void *user_context, arg_sizes, args, arg_is_buffer, - &descriptor_buffer_info); + &descriptor_buffer_info + ); if (error_code) { error(user_context) << "Vulkan: Failed to update descriptor set ... get descriptor buffer info failed!\n"; @@ -847,7 +845,8 @@ int vk_push_descriptor_set(void *user_context, &descriptor_buffer_info, descriptor_set, scalar_args_buffer, - &write_descriptor_set); + &write_descriptor_set + ); if (error_code) { error(user_context) << "Vulkan: Failed to update descriptor set ... write descriptor set creation failed!\n"; @@ -856,7 +855,7 @@ int vk_push_descriptor_set(void *user_context, // issue the update call to populate the descriptor set uint32_t update_size = (uint32_t)write_descriptor_set.size(); - const VkWriteDescriptorSet *update_data = (const VkWriteDescriptorSet *)write_descriptor_set.data(); + const VkWriteDescriptorSet * update_data = (const VkWriteDescriptorSet *)write_descriptor_set.data(); vkCmdPushDescriptorSetKHR(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, 0, update_size, update_data); return halide_error_code_success; } From fbb5c4e9eec484900d92ee66862a5add9248b4ae Mon Sep 17 00:00:00 2001 From: Derek Gerstmann Date: Wed, 8 Jan 2025 13:52:13 -0800 Subject: [PATCH 7/7] Cleanup vulkan resource command function names. --- src/runtime/vulkan.cpp | 22 ++++----- src/runtime/vulkan_internal.h | 36 +++++++------- src/runtime/vulkan_resources.h | 87 ++++++++++++++++------------------ 3 files changed, 69 insertions(+), 76 deletions(-) diff --git a/src/runtime/vulkan.cpp b/src/runtime/vulkan.cpp index 3dcd9eced407..9e65fd004644 100644 --- a/src/runtime/vulkan.cpp +++ b/src/runtime/vulkan.cpp @@ -1290,12 +1290,12 @@ WEAK int halide_vulkan_run(void *user_context, } // 5. Fill the command buffer - error_code = vk_start_command_buffer_for_dispatch_call(user_context, cmds.command_buffer); + error_code = vk_begin_command_buffer(user_context, cmds.command_buffer); if (error_code != halide_error_code_success) { error(user_context) << "Vulkan: Failed to start command buffer for dispatch call!\n"; return error_code; } - error_code = vk_bind_pipeline_to_command_buffer(user_context, cmds.command_buffer, entry_point_binding->compute_pipeline); + error_code = vk_bind_pipeline(user_context, cmds.command_buffer, entry_point_binding->compute_pipeline); if (error_code != halide_error_code_success) { error(user_context) << "Vulkan: Failed to bind compute pipeline to command buffer for dispatch call!\n"; return error_code; @@ -1308,26 +1308,26 @@ WEAK int halide_vulkan_run(void *user_context, return error_code; } } else { - error_code = vk_bind_descriptor_set(user_context, cmds.command_buffer, shader_module->pipeline_layout, entry_point_binding->descriptor_set, entry_point_index); + error_code = vk_bind_descriptor_sets(user_context, cmds.command_buffer, shader_module->pipeline_layout, entry_point_binding->descriptor_set, entry_point_index); if (error_code != halide_error_code_success) { error(user_context) << "Vulkan: Failed to bind descriptor set to command buffer for dispatch call!\n"; return error_code; } } - error_code = vk_fill_command_buffer_with_dispatch_call(user_context, - ctx.device, cmds.command_buffer, - entry_point_binding->compute_pipeline, - shader_module->pipeline_layout, - entry_point_binding->descriptor_set, - entry_point_index, - blocksX, blocksY, blocksZ); + error_code = vk_dispatch_kernel(user_context, + ctx.device, cmds.command_buffer, + entry_point_binding->compute_pipeline, + shader_module->pipeline_layout, + entry_point_binding->descriptor_set, + entry_point_index, + blocksX, blocksY, blocksZ); if (error_code != halide_error_code_success) { error(user_context) << "Vulkan: Failed to fill command buffer with dispatch call!\n"; return error_code; } - error_code = vk_end_command_buffer_for_dispatch_call(user_context, cmds.command_buffer); + error_code = vk_end_command_buffer(user_context, cmds.command_buffer); if (error_code != halide_error_code_success) { error(user_context) << "Vulkan: Failed to end command buffer for dispatch call!\n"; return error_code; diff --git a/src/runtime/vulkan_internal.h b/src/runtime/vulkan_internal.h index 2086542a92b6..821db25dc98e 100644 --- a/src/runtime/vulkan_internal.h +++ b/src/runtime/vulkan_internal.h @@ -106,19 +106,19 @@ int vk_destroy_command_buffer(void *user_context, VulkanMemoryAllocator *allocat struct ScopedVulkanCommandBufferAndPool; -int vk_start_command_buffer_for_dispatch_call(void *user_context, VkCommandBuffer command_buffer); -int vk_end_command_buffer_for_dispatch_call(void *user_context, VkCommandBuffer command_buffer); -int vk_bind_pipeline_to_command_buffer(void *user_context, VkCommandBuffer command_buffer, VkPipeline compute_pipeline); -int vk_bind_descriptor_set_to_command_buffer(void *user_context, VkCommandBuffer command_buffer, VkPipeline compute_pipeline); - -int vk_fill_command_buffer_with_dispatch_call(void *user_context, - VkDevice device, - VkCommandBuffer command_buffer, - VkPipeline compute_pipeline, - VkPipelineLayout pipeline_layout, - VkDescriptorSet descriptor_set, - uint32_t descriptor_set_index, - int blocksX, int blocksY, int blocksZ); +int vk_begin_command_buffer(void *user_context, VkCommandBuffer command_buffer); +int vk_end_command_buffer(void *user_context, VkCommandBuffer command_buffer); +int vk_bind_pipeline(void *user_context, VkCommandBuffer command_buffer, VkPipeline compute_pipeline); +int vk_bind_descriptor_sets_to_command_buffer(void *user_context, VkCommandBuffer command_buffer, VkPipeline compute_pipeline); + +int vk_dispatch_kernel(void *user_context, + VkDevice device, + VkCommandBuffer command_buffer, + VkPipeline compute_pipeline, + VkPipelineLayout pipeline_layout, + VkDescriptorSet descriptor_set, + uint32_t descriptor_set_index, + int blocksX, int blocksY, int blocksZ); int vk_submit_command_buffer(void *user_context, VkQueue queue, VkCommandBuffer command_buffer); @@ -208,11 +208,11 @@ int vk_update_descriptor_set(void *user_context, int8_t arg_is_buffer[], VkDescriptorSet descriptor_set); -int vk_bind_descriptor_set(void *user_context, - VkCommandBuffer command_buffer, - VkPipelineLayout pipeline_layout, - VkDescriptorSet descriptor_set, - uint32_t descriptor_set_index); +int vk_bind_descriptor_sets(void *user_context, + VkCommandBuffer command_buffer, + VkPipelineLayout pipeline_layout, + VkDescriptorSet descriptor_set, + uint32_t descriptor_set_index); int vk_push_descriptor_set(void *user_context, VulkanMemoryAllocator *allocator, diff --git a/src/runtime/vulkan_resources.h b/src/runtime/vulkan_resources.h index 06176cdc1c9e..711c532f8987 100644 --- a/src/runtime/vulkan_resources.h +++ b/src/runtime/vulkan_resources.h @@ -202,10 +202,10 @@ struct ScopedVulkanCommandBufferAndPool { } }; -int vk_start_command_buffer_for_dispatch_call(void *user_context, VkCommandBuffer command_buffer) { +int vk_begin_command_buffer(void *user_context, VkCommandBuffer command_buffer) { #ifdef DEBUG_RUNTIME debug(user_context) - << " vk_start_command_buffer_for_dispatch_call (user_context: " << user_context << ", " + << " vk_begin_command_buffer (user_context: " << user_context << ", " << "command_buffer: " << (void *)command_buffer << ")\n"; #endif @@ -224,10 +224,10 @@ int vk_start_command_buffer_for_dispatch_call(void *user_context, VkCommandBuffe return halide_error_code_success; } -int vk_end_command_buffer_for_dispatch_call(void *user_context, VkCommandBuffer command_buffer) { +int vk_end_command_buffer(void *user_context, VkCommandBuffer command_buffer) { #ifdef DEBUG_RUNTIME debug(user_context) - << " vk_end_command_buffer_for_dispatch_call (user_context: " << user_context << ", " + << " vk_end_command_buffer (user_context: " << user_context << ", " << "command_buffer: " << (void *)command_buffer << ")\n"; #endif @@ -239,10 +239,10 @@ int vk_end_command_buffer_for_dispatch_call(void *user_context, VkCommandBuffer return halide_error_code_success; } -int vk_bind_pipeline_to_command_buffer(void *user_context, VkCommandBuffer command_buffer, VkPipeline compute_pipeline) { +int vk_bind_pipeline(void *user_context, VkCommandBuffer command_buffer, VkPipeline compute_pipeline) { #ifdef DEBUG_RUNTIME debug(user_context) - << " vk_bind_pipeline_to_command_buffer (user_context: " << user_context << ", " + << " vk_bind_pipeline (user_context: " << user_context << ", " << "command_buffer: " << (void *)command_buffer << ", " << "compute_pipeline: " << (void *)compute_pipeline << ")\n"; #endif @@ -250,18 +250,18 @@ int vk_bind_pipeline_to_command_buffer(void *user_context, VkCommandBuffer comma return halide_error_code_success; } -int vk_fill_command_buffer_with_dispatch_call(void *user_context, - VkDevice device, - VkCommandBuffer command_buffer, - VkPipeline compute_pipeline, - VkPipelineLayout pipeline_layout, - VkDescriptorSet descriptor_set, - uint32_t descriptor_set_index, - int blocksX, int blocksY, int blocksZ) { +int vk_dispatch_kernel(void *user_context, + VkDevice device, + VkCommandBuffer command_buffer, + VkPipeline compute_pipeline, + VkPipelineLayout pipeline_layout, + VkDescriptorSet descriptor_set, + uint32_t descriptor_set_index, + int blocksX, int blocksY, int blocksZ) { #ifdef DEBUG_RUNTIME debug(user_context) - << " vk_fill_command_buffer_with_dispatch_call (user_context: " << user_context << ", " + << " vk_dispatch_kernel (user_context: " << user_context << ", " << "device: " << (void *)device << ", " << "command_buffer: " << (void *)command_buffer << ", " << "pipeline_layout: " << (void *)pipeline_layout << ", " @@ -270,9 +270,6 @@ int vk_fill_command_buffer_with_dispatch_call(void *user_context, << "blocks: " << blocksX << ", " << blocksY << ", " << blocksZ << ")\n"; #endif -// vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, compute_pipeline); -// vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, -// descriptor_set_index, 1, &descriptor_set, 0, nullptr); vkCmdDispatch(command_buffer, blocksX, blocksY, blocksZ); return halide_error_code_success; } @@ -443,8 +440,8 @@ int vk_create_descriptor_set_layout(void *user_context, for (uint32_t n = 0; n < uniform_buffer_count; ++n) { // Params will be passed as UNIFORM_BUFFERs VkDescriptorSetLayoutBinding uniform_buffer_layout_binding{}; - uniform_buffer_layout_binding.binding = (uint32_t)layout_bindings.size(), // binding index - uniform_buffer_layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + uniform_buffer_layout_binding.binding = (uint32_t)layout_bindings.size(), // binding index + uniform_buffer_layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; uniform_buffer_layout_binding.descriptorCount = 1; uniform_buffer_layout_binding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; #ifdef DEBUG_RUNTIME @@ -458,8 +455,8 @@ int vk_create_descriptor_set_layout(void *user_context, for (uint32_t n = 0; n < storage_buffer_count; ++n) { // halide buffers will be passed as STORAGE_BUFFERs VkDescriptorSetLayoutBinding storage_buffer_layout_binding{}; - storage_buffer_layout_binding.binding = (uint32_t)layout_bindings.size(), // binding index - storage_buffer_layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + storage_buffer_layout_binding.binding = (uint32_t)layout_bindings.size(), // binding index + storage_buffer_layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; storage_buffer_layout_binding.descriptorCount = 1; storage_buffer_layout_binding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; #ifdef DEBUG_RUNTIME @@ -544,15 +541,15 @@ int vk_create_descriptor_set(void *user_context, } int vk_get_descriptor_buffer_info(void *user_context, - VulkanMemoryAllocator *allocator, - VkDescriptorSet descriptor_set, - VkBuffer *scalar_args_buffer, - size_t uniform_buffer_count, - size_t storage_buffer_count, - size_t arg_sizes[], - void *args[], - int8_t arg_is_buffer[], - BlockStorage *descriptor_buffer_info_result) { + VulkanMemoryAllocator *allocator, + VkDescriptorSet descriptor_set, + VkBuffer *scalar_args_buffer, + size_t uniform_buffer_count, + size_t storage_buffer_count, + size_t arg_sizes[], + void *args[], + int8_t arg_is_buffer[], + BlockStorage *descriptor_buffer_info_result) { #ifdef DEBUG_RUNTIME debug(user_context) << " vk_get_descriptor_buffer_info (user_context: " << user_context << ", " @@ -733,8 +730,7 @@ int vk_update_descriptor_set(void *user_context, arg_sizes, args, arg_is_buffer, - &descriptor_buffer_info - ); + &descriptor_buffer_info); if (error_code) { error(user_context) << "Vulkan: Failed to update descriptor set ... get descriptor buffer info failed!\n"; @@ -752,8 +748,7 @@ int vk_update_descriptor_set(void *user_context, &descriptor_buffer_info, descriptor_set, scalar_args_buffer, - &write_descriptor_set - ); + &write_descriptor_set); if (error_code) { error(user_context) << "Vulkan: Failed to update descriptor set ... write descriptor set creation failed!\n"; @@ -762,19 +757,19 @@ int vk_update_descriptor_set(void *user_context, // issue the update call to populate the descriptor set uint32_t update_size = (uint32_t)write_descriptor_set.size(); - const VkWriteDescriptorSet * update_data = (const VkWriteDescriptorSet *)write_descriptor_set.data(); + const VkWriteDescriptorSet *update_data = (const VkWriteDescriptorSet *)write_descriptor_set.data(); vkUpdateDescriptorSets(allocator->current_device(), update_size, update_data, 0, nullptr); return halide_error_code_success; } -int vk_bind_descriptor_set(void *user_context, - VkCommandBuffer command_buffer, - VkPipelineLayout pipeline_layout, - VkDescriptorSet descriptor_set, - uint32_t descriptor_set_index) { +int vk_bind_descriptor_sets(void *user_context, + VkCommandBuffer command_buffer, + VkPipelineLayout pipeline_layout, + VkDescriptorSet descriptor_set, + uint32_t descriptor_set_index) { #ifdef DEBUG_RUNTIME debug(user_context) - << " vk_bind_descriptor_set_to_compute_pipeline (user_context: " << user_context << ", " + << " vk_bind_descriptor_sets_to_compute_pipeline (user_context: " << user_context << ", " << "command_buffer: " << (void *)command_buffer << ", " << "pipeline_layout: " << (void *)pipeline_layout << ", " << "descriptor_set: " << (void *)descriptor_set << ", " @@ -826,8 +821,7 @@ int vk_push_descriptor_set(void *user_context, arg_sizes, args, arg_is_buffer, - &descriptor_buffer_info - ); + &descriptor_buffer_info); if (error_code) { error(user_context) << "Vulkan: Failed to update descriptor set ... get descriptor buffer info failed!\n"; @@ -845,8 +839,7 @@ int vk_push_descriptor_set(void *user_context, &descriptor_buffer_info, descriptor_set, scalar_args_buffer, - &write_descriptor_set - ); + &write_descriptor_set); if (error_code) { error(user_context) << "Vulkan: Failed to update descriptor set ... write descriptor set creation failed!\n"; @@ -855,7 +848,7 @@ int vk_push_descriptor_set(void *user_context, // issue the update call to populate the descriptor set uint32_t update_size = (uint32_t)write_descriptor_set.size(); - const VkWriteDescriptorSet * update_data = (const VkWriteDescriptorSet *)write_descriptor_set.data(); + const VkWriteDescriptorSet *update_data = (const VkWriteDescriptorSet *)write_descriptor_set.data(); vkCmdPushDescriptorSetKHR(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, 0, update_size, update_data); return halide_error_code_success; }