Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[vulkan] Added support for RPI 5 #8548

Draft
wants to merge 7 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions src/Target.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1304,17 +1304,28 @@ int Target::get_arm_v8_lower_bound() const {
}

bool Target::supports_type(const Type &t) const {
if (has_feature(Vulkan)) {
if (t.is_float() && t.bits() == 64) {
return has_feature(Target::VulkanFloat64);
} else if (t.is_float() && t.bits() == 16) {
return has_feature(Target::VulkanFloat16);
} else if (t.is_int_or_uint() && t.bits() == 64) {
return has_feature(Target::VulkanInt64);
} else if (t.is_int_or_uint() && t.bits() == 16) {
return has_feature(Target::VulkanInt16);
} else if (t.is_int_or_uint() && t.bits() == 8) {
return has_feature(Target::VulkanInt8);
}
}
if (t.bits() == 64) {
if (t.is_float()) {
return (!has_feature(Metal) &&
!has_feature(D3D12Compute) &&
(!has_feature(Target::OpenCL) || has_feature(Target::CLDoubles)) &&
(!has_feature(Vulkan) || has_feature(Target::VulkanFloat64)) &&
!has_feature(WebGPU));
} else {
return (!has_feature(Metal) &&
!has_feature(D3D12Compute) &&
(!has_feature(Vulkan) || has_feature(Target::VulkanInt64)) &&
!has_feature(WebGPU));
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/runtime/internal/memory_resources.h
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ ALWAYS_INLINE size_t aligned_size(size_t offset, size_t size, size_t alignment)
ALWAYS_INLINE size_t conform_size(size_t offset, size_t size, size_t alignment, size_t nearest_multiple) {
size_t adjusted_size = aligned_size(offset, size, alignment);
adjusted_size = (alignment > adjusted_size) ? alignment : adjusted_size;
if (nearest_multiple > 0) {
if ((nearest_multiple > 0) && ((adjusted_size % nearest_multiple) != 0)) {
size_t rounded_size = (((adjusted_size + nearest_multiple - 1) / nearest_multiple) * nearest_multiple);
return rounded_size;
} else {
Expand Down
54 changes: 48 additions & 6 deletions src/runtime/internal/region_allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ class RegionAllocator {
BlockRegion *coalesce_block_regions(void *user_context, BlockRegion *region);

// Returns true if the given region can be split to accomodate the given size
bool can_split(const BlockRegion *region, const MemoryRequest &request) const;
bool can_split(void *use_context, const BlockRegion *region, const MemoryRequest &request) const;

// Splits the given block region into a smaller region to accomodate the given size, followed by empty space for the remaining
BlockRegion *split_block_region(void *user_context, BlockRegion *region, const MemoryRequest &request);
Expand Down Expand Up @@ -195,7 +195,7 @@ MemoryRegion *RegionAllocator::reserve(void *user_context, const MemoryRequest &
return nullptr;
}

if (can_split(block_region, region_request)) {
if (can_split(user_context, block_region, region_request)) {
#ifdef DEBUG_RUNTIME_INTERNAL
debug(user_context) << "RegionAllocator: Splitting region of size ( " << (int32_t)(block_region->memory.size) << ") "
<< "to accomodate requested size (" << (int32_t)(region_request.size) << " bytes)";
Expand Down Expand Up @@ -443,8 +443,29 @@ BlockRegion *RegionAllocator::coalesce_block_regions(void *user_context, BlockRe
return block_region;
}

bool RegionAllocator::can_split(const BlockRegion *block_region, const MemoryRequest &split_request) const {
return (block_region && (block_region->memory.size > split_request.size) && (block_region->usage_count == 0));
bool RegionAllocator::can_split(void *user_context, const BlockRegion *block_region, const MemoryRequest &split_request) const {

// See if we can actually split the block region and create empty space big enough
if (block_region && (block_region->memory.size > split_request.size) && (block_region->usage_count == 0)) {

// We can only split if there's still room left after conforming the allocation request since the
// conform method may actually grow the requested size to accomodate alignment constraints
MemoryRequest test_request = split_request;
test_request.size = block_region->memory.size - test_request.size;
test_request.offset = block_region->memory.offset + test_request.size;
int error_code = conform(user_context, &test_request);
if (error_code) {
#ifdef DEBUG_RUNTIME_INTERNAL
debug(nullptr) << "RegionAllocator: Failed to conform test request for splitting block region!\n";
#endif
return false;
}

if ((block_region->memory.size - test_request.size) > 0) {
return true;
}
}
return false;
}

BlockRegion *RegionAllocator::split_block_region(void *user_context, BlockRegion *block_region, const MemoryRequest &request) {
Expand All @@ -470,8 +491,9 @@ BlockRegion *RegionAllocator::split_block_region(void *user_context, BlockRegion

#ifdef DEBUG_RUNTIME_INTERNAL
debug(user_context) << "RegionAllocator: Splitting "
<< "current region (offset=" << (int32_t)block_region->memory.offset << " size=" << (int32_t)(block_region->memory.size) << " bytes) "
<< "to create empty region (offset=" << (int32_t)split_request.offset << " size=" << (int32_t)(split_request.size) << " bytes)";
<< "current region (offset=" << (int32_t)block_region->memory.offset << " size=" << (int32_t)(block_region->memory.size) << " bytes) into ...\n\t"
<< "existing region (offset=" << (int32_t)block_region->memory.offset << " size=" << (int32_t)(block_region->memory.size - split_request.size) << " bytes)\n\t"
<< "empty region (offset=" << (int32_t)split_request.offset << " size=" << (int32_t)(split_request.size) << " bytes)\n";
#endif
BlockRegion *next_region = block_region->next_ptr;
BlockRegion *empty_region = create_block_region(user_context, split_request);
Expand All @@ -484,6 +506,12 @@ BlockRegion *RegionAllocator::split_block_region(void *user_context, BlockRegion
empty_region->prev_ptr = block_region;
block_region->next_ptr = empty_region;
block_region->memory.size -= empty_region->memory.size;

#ifdef DEBUG_RUNTIME_INTERNAL
debug(user_context) << "RegionAllocator: Split block region into ...\n\t"
<< "existing region (ptr=" << (void *)block_region << " prev_ptr=" << block_region->prev_ptr << " next_ptr=" << block_region->next_ptr << " offset=" << (int32_t)block_region->memory.offset << " size=" << (int32_t)(block_region->memory.size) << " bytes)\n\t"
<< "empty region (ptr=" << (void *)empty_region << " prev_ptr=" << empty_region->prev_ptr << " next_ptr=" << empty_region->next_ptr << " offset=" << (int32_t)empty_region->memory.offset << " size=" << (int32_t)(empty_region->memory.size) << " bytes)\n";
#endif
return empty_region;
}

Expand Down Expand Up @@ -605,8 +633,22 @@ int RegionAllocator::alloc_block_region(void *user_context, BlockRegion *block_r
#endif
halide_abort_if_false(user_context, allocators.region.allocate != nullptr);
halide_abort_if_false(user_context, block_region->status == AllocationStatus::Available);

int error_code = 0;
MemoryRegion *memory_region = &(block_region->memory);
if (memory_region->size <= 0) {
#ifdef DEBUG_RUNTIME_INTERNAL
debug(user_context) << " skipping zero size region ("
<< "block_ptr=" << (void *)block_region->block_ptr << " "
<< "block_region=" << (void *)block_region << " "
<< "memory_offset=" << (uint32_t)(block_region->memory.offset) << " "
<< "memory_size=" << (uint32_t)(block_region->memory.size) << " "
<< "block_reserved=" << (uint32_t)block->reserved << " "
<< ")\n";
#endif
return error_code;
}

if (memory_region->handle == nullptr) {
error_code = allocators.region.allocate(user_context, memory_region);
memory_region->is_owner = true;
Expand Down
92 changes: 63 additions & 29 deletions src/runtime/vulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1193,13 +1193,6 @@ WEAK int halide_vulkan_run(void *user_context,
}
}
}

// 2b. Create the pipeline layout
error_code = vk_create_pipeline_layout(user_context, ctx.allocator, shader_module->shader_count, shader_module->descriptor_set_layouts, &(shader_module->pipeline_layout));
if (error_code != halide_error_code_success) {
error(user_context) << "Vulkan: Failed to create pipeline layout!\n";
return error_code;
}
}

VulkanDispatchData dispatch_data = {};
Expand All @@ -1213,16 +1206,8 @@ WEAK int halide_vulkan_run(void *user_context,

VulkanShaderBinding *entry_point_binding = (shader_module->shader_bindings + entry_point_index);

// 2c. Setup the compute pipeline (eg override any specializations for shared mem or workgroup size)
error_code = vk_setup_compute_pipeline(user_context, ctx.allocator, entry_point_binding, &dispatch_data, shader_module->shader_module, shader_module->pipeline_layout, &(entry_point_binding->compute_pipeline));
if (error_code != halide_error_code_success) {
error(user_context) << "Vulkan: Failed to setup compute pipeline!\n";
return error_code;
}

// 2d. Create a descriptor set
if (entry_point_binding->descriptor_set == VK_NULL_HANDLE) {

// 2c. If Push Descriptor Set isn't supported, then allocate a descriptor set
if ((vkCmdPushDescriptorSetKHR == nullptr) && (entry_point_binding->descriptor_set == VK_NULL_HANDLE)) {
// Construct a descriptor pool
//
// NOTE: while this could be re-used across multiple pipelines, we only know the storage requirements of this kernel's
Expand All @@ -1244,7 +1229,7 @@ WEAK int halide_vulkan_run(void *user_context,
}
}

// 3a. Create a buffer for the scalar parameters
// 2d. Create a buffer for the scalar parameters
if ((entry_point_binding->args_region == nullptr) && entry_point_binding->uniform_buffer_count) {
size_t scalar_buffer_size = vk_estimate_scalar_uniform_buffer_size(user_context, arg_sizes, args, arg_is_buffer);
if (scalar_buffer_size > 0) {
Expand All @@ -1256,7 +1241,7 @@ WEAK int halide_vulkan_run(void *user_context,
}
}

// 3b. Update uniform buffer with scalar parameters
// 2e. Update uniform buffer with scalar parameters
VkBuffer *args_buffer = nullptr;
if ((entry_point_binding->args_region != nullptr) && entry_point_binding->uniform_buffer_count) {
error_code = vk_update_scalar_uniform_buffer(user_context, ctx.allocator, entry_point_binding->args_region, arg_sizes, args, arg_is_buffer);
Expand All @@ -1272,10 +1257,28 @@ WEAK int halide_vulkan_run(void *user_context,
}
}

// 3c. Update buffer bindings for descriptor set
error_code = vk_update_descriptor_set(user_context, ctx.allocator, args_buffer, entry_point_binding->uniform_buffer_count, entry_point_binding->storage_buffer_count, arg_sizes, args, arg_is_buffer, entry_point_binding->descriptor_set);
// 2f. If Push Descriptor Set isn't supported, then update the buffer bindings for the allocated descriptor set
if (vkCmdPushDescriptorSetKHR == nullptr) {
error_code = vk_update_descriptor_set(user_context, ctx.allocator, args_buffer, entry_point_binding->uniform_buffer_count, entry_point_binding->storage_buffer_count, arg_sizes, args, arg_is_buffer, entry_point_binding->descriptor_set);
if (error_code != halide_error_code_success) {
error(user_context) << "Vulkan: Failed to update descriptor set!\n";
return error_code;
}
}

// 2b. Create the pipeline layout
if (shader_module->pipeline_layout == VK_NULL_HANDLE) {
error_code = vk_create_pipeline_layout(user_context, ctx.allocator, shader_module->shader_count, shader_module->descriptor_set_layouts, &(shader_module->pipeline_layout));
if (error_code != halide_error_code_success) {
error(user_context) << "Vulkan: Failed to create pipeline layout!\n";
return error_code;
}
}

// 3. Setup the compute pipeline (eg override any specializations for shared mem or workgroup size)
error_code = vk_setup_compute_pipeline(user_context, ctx.allocator, entry_point_binding, &dispatch_data, shader_module->shader_module, shader_module->pipeline_layout, &(entry_point_binding->compute_pipeline));
if (error_code != halide_error_code_success) {
error(user_context) << "Vulkan: Failed to update descriptor set!\n";
error(user_context) << "Vulkan: Failed to setup compute pipeline!\n";
return error_code;
}

Expand All @@ -1287,18 +1290,49 @@ WEAK int halide_vulkan_run(void *user_context,
}

// 5. Fill the command buffer
error_code = vk_fill_command_buffer_with_dispatch_call(user_context,
ctx.device, cmds.command_buffer,
entry_point_binding->compute_pipeline,
shader_module->pipeline_layout,
entry_point_binding->descriptor_set,
entry_point_index,
blocksX, blocksY, blocksZ);
error_code = vk_begin_command_buffer(user_context, cmds.command_buffer);
if (error_code != halide_error_code_success) {
error(user_context) << "Vulkan: Failed to start command buffer for dispatch call!\n";
return error_code;
}
error_code = vk_bind_pipeline(user_context, cmds.command_buffer, entry_point_binding->compute_pipeline);
if (error_code != halide_error_code_success) {
error(user_context) << "Vulkan: Failed to bind compute pipeline to command buffer for dispatch call!\n";
return error_code;
}

if (vkCmdPushDescriptorSetKHR != nullptr) {
error_code = vk_push_descriptor_set(user_context, ctx.allocator, cmds.command_buffer, entry_point_binding->compute_pipeline, shader_module->pipeline_layout, entry_point_binding->descriptor_set, args_buffer, entry_point_binding->uniform_buffer_count, entry_point_binding->storage_buffer_count, arg_sizes, args, arg_is_buffer);
if (error_code != halide_error_code_success) {
error(user_context) << "Vulkan: Failed to update descriptor set!\n";
return error_code;
}
} else {
error_code = vk_bind_descriptor_sets(user_context, cmds.command_buffer, shader_module->pipeline_layout, entry_point_binding->descriptor_set, entry_point_index);
if (error_code != halide_error_code_success) {
error(user_context) << "Vulkan: Failed to bind descriptor set to command buffer for dispatch call!\n";
return error_code;
}
}

error_code = vk_dispatch_kernel(user_context,
ctx.device, cmds.command_buffer,
entry_point_binding->compute_pipeline,
shader_module->pipeline_layout,
entry_point_binding->descriptor_set,
entry_point_index,
blocksX, blocksY, blocksZ);
if (error_code != halide_error_code_success) {
error(user_context) << "Vulkan: Failed to fill command buffer with dispatch call!\n";
return error_code;
}

error_code = vk_end_command_buffer(user_context, cmds.command_buffer);
if (error_code != halide_error_code_success) {
error(user_context) << "Vulkan: Failed to end command buffer for dispatch call!\n";
return error_code;
}

// 6. Submit the command buffer to our command queue
error_code = vk_submit_command_buffer(user_context, ctx.queue, cmds.command_buffer);
if (error_code != halide_error_code_success) {
Expand Down
10 changes: 9 additions & 1 deletion src/runtime/vulkan_extensions.h
Original file line number Diff line number Diff line change
Expand Up @@ -203,10 +203,18 @@ uint32_t vk_get_required_device_extensions(void *user_context, StringTable &ext_
uint32_t vk_get_optional_device_extensions(void *user_context, StringTable &ext_table) {
const char *optional_ext_table[] = {
"VK_KHR_portability_subset", //< necessary for running under Molten (aka Vulkan on Mac)
VK_KHR_MAINTENANCE_1_EXTENSION_NAME,
VK_KHR_MAINTENANCE_2_EXTENSION_NAME,
VK_KHR_MAINTENANCE_3_EXTENSION_NAME,
VK_KHR_MAINTENANCE_4_EXTENSION_NAME,
VK_KHR_MAINTENANCE_5_EXTENSION_NAME,
VK_KHR_MAINTENANCE_6_EXTENSION_NAME,
VK_KHR_MAINTENANCE_7_EXTENSION_NAME,
VK_KHR_16BIT_STORAGE_EXTENSION_NAME,
VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME,
VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME};
VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME,
VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME};

const uint32_t optional_ext_count = sizeof(optional_ext_table) / sizeof(optional_ext_table[0]);
ext_table.fill(user_context, (const char **)optional_ext_table, optional_ext_count);
return optional_ext_count;
Expand Down
1 change: 1 addition & 0 deletions src/runtime/vulkan_functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,7 @@ VULKAN_FN(vkCmdCopyBuffer2)
// VULKAN_FN(vkCmdCopyImageToBuffer2)
// VULKAN_FN(vkCmdEndRendering)
VULKAN_FN(vkCmdPipelineBarrier2)
VULKAN_FN(vkCmdPushDescriptorSetKHR)
VULKAN_FN(vkCmdResetEvent2)
// VULKAN_FN(vkCmdResolveImage2)
// VULKAN_FN(vkCmdSetCullMode)
Expand Down
Loading