From 345ac7becf33e52a0ee3037c799ee1722764b9ce Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Fri, 19 Jul 2024 22:07:49 +0530 Subject: [PATCH 01/50] Add 26_Autoexposure --- 23_Autoexposure/CMakeLists.txt | 12 --------- 26_Autoexposure/CMakeLists.txt | 25 +++++++++++++++++++ .../config.json.template | 0 {23_Autoexposure => 26_Autoexposure}/main.cpp | 0 .../pipeline.groovy | 0 CMakeLists.txt | 1 + 6 files changed, 26 insertions(+), 12 deletions(-) delete mode 100644 23_Autoexposure/CMakeLists.txt create mode 100644 26_Autoexposure/CMakeLists.txt rename {23_Autoexposure => 26_Autoexposure}/config.json.template (100%) rename {23_Autoexposure => 26_Autoexposure}/main.cpp (100%) rename {23_Autoexposure => 26_Autoexposure}/pipeline.groovy (100%) diff --git a/23_Autoexposure/CMakeLists.txt b/23_Autoexposure/CMakeLists.txt deleted file mode 100644 index 8604e54c4..000000000 --- a/23_Autoexposure/CMakeLists.txt +++ /dev/null @@ -1,12 +0,0 @@ - -include(common RESULT_VARIABLE RES) -if(NOT RES) - message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") -endif() - -set(EXAMPLE_SOURCES - ../../src/nbl/ext/LumaMeter/CLumaMeter.cpp - ../../src/nbl/ext/ToneMapper/CToneMapper.cpp -) - -nbl_create_executable_project("${EXAMPLE_SOURCES}" "" "" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") \ No newline at end of file diff --git a/26_Autoexposure/CMakeLists.txt b/26_Autoexposure/CMakeLists.txt new file mode 100644 index 000000000..0724366c9 --- /dev/null +++ b/26_Autoexposure/CMakeLists.txt @@ -0,0 +1,25 @@ + +include(common RESULT_VARIABLE RES) +if(NOT RES) + message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") +endif() + +nbl_create_executable_project("" "" "" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") + +if(NBL_EMBED_BUILTIN_RESOURCES) + set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData) + set(RESOURCE_DIR "app_resources") + + get_filename_component(_SEARCH_DIRECTORIES_ "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE) + + file(GLOB_RECURSE BUILTIN_RESOURCE_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}" CONFIGURE_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}/*") + foreach(RES_FILE ${BUILTIN_RESOURCE_FILES}) + LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "${RES_FILE}") + endforeach() + + ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") + + LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) +endif() \ No newline at end of file diff --git a/23_Autoexposure/config.json.template b/26_Autoexposure/config.json.template similarity index 100% rename from 23_Autoexposure/config.json.template rename to 26_Autoexposure/config.json.template diff --git a/23_Autoexposure/main.cpp b/26_Autoexposure/main.cpp similarity index 100% rename from 23_Autoexposure/main.cpp rename to 26_Autoexposure/main.cpp diff --git a/23_Autoexposure/pipeline.groovy b/26_Autoexposure/pipeline.groovy similarity index 100% rename from 23_Autoexposure/pipeline.groovy rename to 26_Autoexposure/pipeline.groovy diff --git a/CMakeLists.txt b/CMakeLists.txt index 9bc4ffc23..1c173f573 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -44,6 +44,7 @@ if(NBL_BUILD_EXAMPLES) add_subdirectory(23_ArithmeticUnitTest EXCLUDE_FROM_ALL) add_subdirectory(24_ColorSpaceTest EXCLUDE_FROM_ALL) add_subdirectory(25_FilterTest EXCLUDE_FROM_ALL) + add_subdirectory(26_Autoexposure EXCLUDE_FROM_ALL) # add_subdirectory(36_CUDAInterop EXCLUDE_FROM_ALL) add_subdirectory(38_EXRSplit EXCLUDE_FROM_ALL) From 87d4794dcc5de8264528292c4a30b5284979754a Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Sat, 20 Jul 2024 00:33:17 +0530 Subject: [PATCH 02/50] Change 26_Autoexposure to SimpleWindowedApplication --- 26_Autoexposure/CMakeLists.txt | 2 +- 26_Autoexposure/main.cpp | 95 +++++++++++++++++++++++++++++++++- 2 files changed, 95 insertions(+), 2 deletions(-) diff --git a/26_Autoexposure/CMakeLists.txt b/26_Autoexposure/CMakeLists.txt index 0724366c9..34040e8c1 100644 --- a/26_Autoexposure/CMakeLists.txt +++ b/26_Autoexposure/CMakeLists.txt @@ -22,4 +22,4 @@ if(NBL_EMBED_BUILTIN_RESOURCES) ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) -endif() \ No newline at end of file +endif() diff --git a/26_Autoexposure/main.cpp b/26_Autoexposure/main.cpp index 83b62c88d..7b89917b5 100644 --- a/26_Autoexposure/main.cpp +++ b/26_Autoexposure/main.cpp @@ -1,3 +1,94 @@ +// Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#include "nbl/application_templates/MonoAssetManagerAndBuiltinResourceApplication.hpp" +#include "../common/SimpleWindowedApplication.hpp" + +#include "nbl/video/surface/CSurfaceVulkan.h" + +using namespace nbl; +using namespace core; +using namespace hlsl; +using namespace system; +using namespace asset; +using namespace ui; +using namespace video; + +//#include "app_resources/push_constants.hlsl" + +class AutoexposureApp final : public examples::SimpleWindowedApplication, public application_templates::MonoAssetManagerAndBuiltinResourceApplication +{ + using device_base_t = examples::SimpleWindowedApplication; + using asset_base_t = application_templates::MonoAssetManagerAndBuiltinResourceApplication; + using clock_t = std::chrono::steady_clock; + +public: + // Yay thanks to multiple inheritance we cannot forward ctors anymore + inline AutoexposureApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : + IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} + + // Will get called mid-initialization, via `filterDevices` between when the API Connection is created and Physical Device is chosen + inline core::vector getSurfaces() const override + { + // So let's create our Window and Surface then! + if (!m_surface) + { + { + IWindow::SCreationParams params = {}; + params.callback = core::make_smart_refctd_ptr(); + params.width = 256; + params.height = 256; + params.x = 32; + params.y = 32; + // Don't want to have a window lingering about before we're ready so create it hidden. + // Only programmatic resize, not regular. + params.flags = ui::IWindow::ECF_HIDDEN | IWindow::ECF_BORDERLESS | IWindow::ECF_RESIZABLE; + params.windowCaption = "AutoexposureApp"; + const_cast&>(m_window) = m_winMgr->createWindow(std::move(params)); + } + auto surface = CSurfaceVulkanWin32::create(smart_refctd_ptr(m_api), smart_refctd_ptr_static_cast(m_window)); + const_cast&>(m_surface) = nbl::video::CSimpleResizeSurface::create(std::move(surface)); + } + if (m_surface) + return { {m_surface->getSurface()/*,EQF_NONE*/} }; + return {}; + } + + inline bool onAppInitialized(smart_refctd_ptr&& system) override + { + // Remember to call the base class initialization! + if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + if (!asset_base_t::onAppInitialized(std::move(system))) + return false; + + return true; + } + + // We do a very simple thing, display an image and wait `DisplayImageMs` to show it + inline void workLoopBody() override + { + } + + inline bool keepRunning() override + { + return false; + } + + inline bool onAppTerminated() override + { + return device_base_t::onAppTerminated(); + } + +protected: + smart_refctd_ptr m_window; + smart_refctd_ptr> m_surface; +}; + +NBL_MAIN_FUNC(AutoexposureApp) + +#if 0 + // Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h @@ -174,4 +265,6 @@ int main() } return 0; -} \ No newline at end of file +} + +#endif \ No newline at end of file From 7a5ea7c0c217d515550f67b7a36ebdd1462870f4 Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Wed, 24 Jul 2024 18:12:34 +0530 Subject: [PATCH 03/50] Build a staging buffer and upload exr image --- 26_Autoexposure/main.cpp | 219 +++++++++++++++++++++++++++++++++------ 1 file changed, 187 insertions(+), 32 deletions(-) diff --git a/26_Autoexposure/main.cpp b/26_Autoexposure/main.cpp index 7b89917b5..fdabdd7f9 100644 --- a/26_Autoexposure/main.cpp +++ b/26_Autoexposure/main.cpp @@ -5,6 +5,7 @@ #include "../common/SimpleWindowedApplication.hpp" #include "nbl/video/surface/CSurfaceVulkan.h" +#include "nbl/asset/interchange/IAssetLoader.h" using namespace nbl; using namespace core; @@ -22,6 +23,9 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public using asset_base_t = application_templates::MonoAssetManagerAndBuiltinResourceApplication; using clock_t = std::chrono::steady_clock; + constexpr static inline std::string_view DefaultImagePathsFile = "../../media/noises/spp_benchmark_4k_512.exr"; + + public: // Yay thanks to multiple inheritance we cannot forward ctors anymore inline AutoexposureApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : @@ -62,6 +66,178 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public if (!asset_base_t::onAppInitialized(std::move(system))) return false; + /* + * We'll be using a combined image sampler for this example, which lets us assign both a sampled image and a sampler to the same binding. + * In this example we provide a sampler at descriptor set creation time, via the SBinding struct below. This specifies that the sampler for this binding is immutable, + * as evidenced by the name of the field in the SBinding. + * Samplers for combined image samplers can also be mutable, which for a binding of a descriptor set is specified also at creation time by leaving the immutableSamplers + * field set to its default (nullptr). + */ + smart_refctd_ptr dsLayout; + { + auto defaultSampler = m_device->createSampler({ + .AnisotropicFilter = 0 + }); + + const IGPUDescriptorSetLayout::SBinding bindings[1] = { { + .binding = 0, + .type = IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_FRAGMENT, + .count = 1, + .immutableSamplers = &defaultSampler + } + }; + dsLayout = m_device->createDescriptorSetLayout(bindings); + if (!dsLayout) + return logFail("Failed to Create Descriptor Layout"); + + } + + // create the descriptor set and with enough room for one image sampler + { + const uint32_t setCount = 1; + auto pool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_NONE, { &dsLayout.get(),1 }, &setCount); + if (!pool) + return logFail("Failed to Create Descriptor Pool"); + + m_descriptorSets[0] = pool->createDescriptorSet(core::smart_refctd_ptr(dsLayout)); + if (!m_descriptorSets[0]) + return logFail("Could not create Descriptor Set!"); + } + + auto queue = getGraphicsQueue(); + + // need resetttable commandbuffers for the upload utility + { + m_cmdPool = m_device->createCommandPool(queue->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); + // create the commandbuffers + if (!m_cmdPool) + return logFail("Couldn't create Command Pool!"); + if (!m_cmdPool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { m_cmdBufs.data(), 1 })) + return logFail("Couldn't create Command Buffer!"); + } + + // things for IUtilities + { + m_scratchSemaphore = m_device->createSemaphore(0); + if (!m_scratchSemaphore) + return logFail("Could not create Scratch Semaphore"); + m_scratchSemaphore->setObjectDebugName("Scratch Semaphore"); + // we don't want to overcomplicate the example with multi-queue + m_intendedSubmit.queue = queue; + // wait for nothing before upload + m_intendedSubmit.waitSemaphores = {}; + // fill later + m_intendedSubmit.commandBuffers = {}; + m_intendedSubmit.scratchSemaphore = { + .semaphore = m_scratchSemaphore.get(), + .value = 0, + .stageMask = PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS + }; + } + + // Allocate and Leave 1/4 for image uploads, to test image copy with small memory remaining + { + uint32_t localOffset = video::StreamingTransientDataBufferMT<>::invalid_value; + uint32_t maxFreeBlock = m_utils->getDefaultUpStreamingBuffer()->max_size(); + const uint32_t allocationAlignment = 64u; + const uint32_t allocationSize = (maxFreeBlock / 4) * 3; + m_utils->getDefaultUpStreamingBuffer()->multi_allocate(std::chrono::steady_clock::now() + std::chrono::microseconds(500u), 1u, &localOffset, &allocationSize, &allocationAlignment); + } + + // Load exr file into gpu + { + IAssetLoader::SAssetLoadParams params; + auto imageBundle = m_assetMgr->getAsset(DefaultImagePathsFile.data(), params); + auto cpuImg = IAsset::castDown(imageBundle.getContents().begin()[0]); + auto format = cpuImg->getCreationParameters().format; + + ICPUImageView::SCreationParams viewParams = { + .flags = ICPUImageView::E_CREATE_FLAGS::ECF_NONE, + .image = std::move(cpuImg), + .viewType = IImageView::E_TYPE::ET_2D, + .format = format, + .subresourceRange = { + .aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = ICPUImageView::remaining_mip_levels, + .baseArrayLayer = 0u, + .layerCount = ICPUImageView::remaining_array_layers + } + }; + + const auto cpuImgView = ICPUImageView::create(std::move(viewParams)); + const auto& cpuImgParams = cpuImgView->getCreationParameters(); + + // create matching size image + IGPUImage::SCreationParams imageParams = {}; + imageParams = cpuImgParams.image->getCreationParameters(); + imageParams.usage |= IGPUImage::EUF_TRANSFER_DST_BIT | IGPUImage::EUF_SAMPLED_BIT | IGPUImage::E_USAGE_FLAGS::EUF_TRANSFER_SRC_BIT; + // promote format because RGB8 and friends don't actually exist in HW + { + const IPhysicalDevice::SImageFormatPromotionRequest request = { + .originalFormat = imageParams.format, + .usages = IPhysicalDevice::SFormatImageUsages::SUsage(imageParams.usage) + }; + imageParams.format = m_physicalDevice->promoteImageFormat(request, imageParams.tiling); + } + if (imageParams.type == IGPUImage::ET_3D) + imageParams.flags |= IGPUImage::ECF_2D_ARRAY_COMPATIBLE_BIT; + m_gpuImg = m_device->createImage(std::move(imageParams)); + if (!m_gpuImg || !m_device->allocate(m_gpuImg->getMemoryReqs(), m_gpuImg.get()).isValid()) + return false; + m_gpuImg->setObjectDebugName("Autoexposure Image"); + + // we don't want to overcomplicate the example with multi-queue + auto queue = getGraphicsQueue(); + auto cmdbuf = m_cmdBufs[0].get(); + IQueue::SSubmitInfo::SCommandBufferInfo cmdbufInfo = { cmdbuf }; + m_intendedSubmit.commandBuffers = { &cmdbufInfo, 1 }; + + // there's no previous operation to wait for + const SMemoryBarrier toTransferBarrier = { + .dstStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT, + .dstAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT + }; + + // upload image and write to descriptor set + queue->startCapture(); + auto ds = m_descriptorSets[0].get(); + + cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + // change the layout of the image + const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers[] = { { + .barrier = { + .dep = toTransferBarrier + // no ownership transfers + }, + .image = m_gpuImg.get(), + // transition the whole view + .subresourceRange = cpuImgParams.subresourceRange, + // a wiping transition + .newLayout = IGPUImage::LAYOUT::TRANSFER_DST_OPTIMAL + } }; + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers }); + // upload contents and submit right away + m_utils->updateImageViaStagingBufferAutoSubmit( + m_intendedSubmit, + cpuImgParams.image->getBuffer(), + cpuImgParams.image->getCreationParameters().format, + m_gpuImg.get(), + IGPUImage::LAYOUT::TRANSFER_DST_OPTIMAL, + cpuImgParams.image->getRegions() + ); + + IGPUImageView::SCreationParams gpuImgViewParams = { + .image = m_gpuImg, + .viewType = IGPUImageView::ET_2D_ARRAY, + .format = m_gpuImg->getCreationParameters().format + }; + + m_gpuImgView = m_device->createImageView(std::move(gpuImgViewParams)); + } + return true; } @@ -83,6 +259,17 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public protected: smart_refctd_ptr m_window; smart_refctd_ptr> m_surface; + smart_refctd_ptr m_gpuImg; + smart_refctd_ptr m_gpuImgView; + + // for image uploads + smart_refctd_ptr m_scratchSemaphore; + SIntendedSubmitInfo m_intendedSubmit; + + // Command Buffers and other resources + std::array, ISwapchain::MaxImages> m_descriptorSets; + smart_refctd_ptr m_cmdPool; + std::array, ISwapchain::MaxImages> m_cmdBufs; }; NBL_MAIN_FUNC(AutoexposureApp) @@ -136,38 +323,6 @@ int main() IAssetLoader::SAssetLoadParams lp; auto imageBundle = am->getAsset("../../media/noises/spp_benchmark_4k_512.exr", lp); - E_FORMAT inFormat; - constexpr auto outFormat = EF_R8G8B8A8_SRGB; - smart_refctd_ptr outImg; - smart_refctd_ptr imgToTonemapView,outImgView; - { - auto cpuImg = IAsset::castDown(imageBundle.getContents().begin()[0]); - IGPUImage::SCreationParams imgInfo = cpuImg->getCreationParameters(); - inFormat = imgInfo.format; - - auto gpuImages = driver->getGPUObjectsFromAssets(&cpuImg.get(),&cpuImg.get()+1); - auto gpuImage = gpuImages->operator[](0u); - - IGPUImageView::SCreationParams imgViewInfo; - imgViewInfo.flags = static_cast(0u); - imgViewInfo.image = std::move(gpuImage); - imgViewInfo.viewType = IGPUImageView::ET_2D_ARRAY; - imgViewInfo.format = inFormat; - imgViewInfo.subresourceRange.aspectMask = static_cast(0u); - imgViewInfo.subresourceRange.baseMipLevel = 0; - imgViewInfo.subresourceRange.levelCount = 1; - imgViewInfo.subresourceRange.baseArrayLayer = 0; - imgViewInfo.subresourceRange.layerCount = 1; - imgToTonemapView = driver->createImageView(IGPUImageView::SCreationParams(imgViewInfo)); - - imgInfo.format = outFormat; - outImg = driver->createDeviceLocalGPUImageOnDedMem(std::move(imgInfo)); - - imgViewInfo.image = outImg; - imgViewInfo.format = outFormat; - outImgView = driver->createImageView(IGPUImageView::SCreationParams(imgViewInfo)); - } - auto glslCompiler = am->getCompilerSet(); const auto inputColorSpace = std::make_tuple(inFormat,ECP_SRGB,EOTF_IDENTITY); From 5d63d041d08fcf08dd0fc061732f10fa1274e6f3 Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Thu, 25 Jul 2024 22:17:09 +0530 Subject: [PATCH 04/50] Init surface and create the swapchain --- 26_Autoexposure/main.cpp | 66 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 63 insertions(+), 3 deletions(-) diff --git a/26_Autoexposure/main.cpp b/26_Autoexposure/main.cpp index fdabdd7f9..43f39c917 100644 --- a/26_Autoexposure/main.cpp +++ b/26_Autoexposure/main.cpp @@ -25,7 +25,6 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public constexpr static inline std::string_view DefaultImagePathsFile = "../../media/noises/spp_benchmark_4k_512.exr"; - public: // Yay thanks to multiple inheritance we cannot forward ctors anymore inline AutoexposureApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : @@ -108,6 +107,65 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public auto queue = getGraphicsQueue(); + // init the surface and create the swapchain + { + ISwapchain::SCreationParams swapchainParams = { .surface = smart_refctd_ptr(m_surface->getSurface()) }; + // Need to choose a surface format + if (!swapchainParams.deduceFormat(m_physicalDevice)) + return logFail("Could not choose a Surface Format for the Swapchain!"); + // We actually need external dependencies to ensure ordering of the Implicit Layout Transitions relative to the semaphore signals + constexpr IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = { + // wipe-transition to ATTACHMENT_OPTIMAL + { + .srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .dstSubpass = 0, + .memoryBarrier = { + // since we're uploading the image data we're about to draw + .srcStageMask = asset::PIPELINE_STAGE_FLAGS::COPY_BIT, + .srcAccessMask = asset::ACCESS_FLAGS::TRANSFER_WRITE_BIT, + .dstStageMask = asset::PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + // because we clear and don't blend + .dstAccessMask = asset::ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + } + // leave view offsets and flags default + }, + // ATTACHMENT_OPTIMAL to PRESENT_SRC + { + .srcSubpass = 0, + .dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .memoryBarrier = { + .srcStageMask = asset::PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + .srcAccessMask = asset::ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + // we can have NONE as the Destinations because the spec says so about presents + } + // leave view offsets and flags default + }, + IGPURenderpass::SCreationParams::DependenciesEnd + }; + auto scResources = std::make_unique(m_device.get(), swapchainParams.surfaceFormat.format, dependencies); + if (!scResources->getRenderpass()) + return logFail("Failed to create Renderpass!"); + if (!m_surface || !m_surface->init(queue, std::move(scResources), swapchainParams.sharedParams)) + return logFail("Could not create Window & Surface or initialize the Surface!"); + } + + // Now create the pipeline + /* { + const asset::SPushConstantRange range = { + .stageFlags = IShader::E_SHADER_STAGE::ESS_FRAGMENT, + .offset = 0, + .size = sizeof(push_constants_t) + }; + auto layout = m_device->createPipelineLayout({ &range,1 }, nullptr, nullptr, nullptr, core::smart_refctd_ptr(dsLayout)); + const IGPUShader::SSpecInfo fragSpec = { + .entryPoint = "main", + .shader = fragmentShader.get() + }; + m_pipeline = fsTriProtoPPln.createPipeline(fragSpec, layout.get(), scResources->getRenderpass()); + if (!m_pipeline) + return logFail("Could not create Graphics Pipeline!"); + }*/ + // need resetttable commandbuffers for the upload utility { m_cmdPool = m_device->createCommandPool(queue->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); @@ -257,8 +315,6 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public } protected: - smart_refctd_ptr m_window; - smart_refctd_ptr> m_surface; smart_refctd_ptr m_gpuImg; smart_refctd_ptr m_gpuImgView; @@ -270,6 +326,10 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public std::array, ISwapchain::MaxImages> m_descriptorSets; smart_refctd_ptr m_cmdPool; std::array, ISwapchain::MaxImages> m_cmdBufs; + + // window + smart_refctd_ptr m_window; + smart_refctd_ptr> m_surface; }; NBL_MAIN_FUNC(AutoexposureApp) From 640e6a38223306851d68b44b36b64fc4a863333e Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Thu, 25 Jul 2024 22:51:28 +0530 Subject: [PATCH 05/50] Load shaders and create the pipeline for full screen triagnle --- .../app_resources/present.frag.hlsl | 17 ++++++ 26_Autoexposure/main.cpp | 57 ++++++++++++++----- 2 files changed, 60 insertions(+), 14 deletions(-) create mode 100644 26_Autoexposure/app_resources/present.frag.hlsl diff --git a/26_Autoexposure/app_resources/present.frag.hlsl b/26_Autoexposure/app_resources/present.frag.hlsl new file mode 100644 index 000000000..fcddeb743 --- /dev/null +++ b/26_Autoexposure/app_resources/present.frag.hlsl @@ -0,0 +1,17 @@ +// Copyright (C) 2024-2024 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#pragma wave shader_stage(fragment) + +// vertex shader is provided by the fullScreenTriangle extension +#include +using namespace nbl::hlsl::ext::FullScreenTriangle; + +[[vk::combinedImageSampler]] [[vk::binding(0, 3)]] Texture2D texture; +[[vk::combinedImageSampler]] [[vk::binding(0, 3)]] SamplerState samplerState; + +[[vk::location(0)]] float32_t4 main(SVertexAttributes vxAttr) : SV_Target0 +{ + return texture.Sample(samplerState, vxAttr.uv); +} \ No newline at end of file diff --git a/26_Autoexposure/main.cpp b/26_Autoexposure/main.cpp index 43f39c917..cc048a3f5 100644 --- a/26_Autoexposure/main.cpp +++ b/26_Autoexposure/main.cpp @@ -6,6 +6,8 @@ #include "nbl/video/surface/CSurfaceVulkan.h" #include "nbl/asset/interchange/IAssetLoader.h" +#include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" + using namespace nbl; using namespace core; @@ -107,9 +109,11 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public auto queue = getGraphicsQueue(); - // init the surface and create the swapchain + // Gather swapchain resources + std::unique_ptr scResources; + ISwapchain::SCreationParams swapchainParams; { - ISwapchain::SCreationParams swapchainParams = { .surface = smart_refctd_ptr(m_surface->getSurface()) }; + swapchainParams = { .surface = smart_refctd_ptr(m_surface->getSurface()) }; // Need to choose a surface format if (!swapchainParams.deduceFormat(m_physicalDevice)) return logFail("Could not choose a Surface Format for the Swapchain!"); @@ -142,21 +146,41 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public }, IGPURenderpass::SCreationParams::DependenciesEnd }; - auto scResources = std::make_unique(m_device.get(), swapchainParams.surfaceFormat.format, dependencies); + scResources = std::make_unique(m_device.get(), swapchainParams.surfaceFormat.format, dependencies); if (!scResources->getRenderpass()) return logFail("Failed to create Renderpass!"); - if (!m_surface || !m_surface->init(queue, std::move(scResources), swapchainParams.sharedParams)) - return logFail("Could not create Window & Surface or initialize the Surface!"); } - // Now create the pipeline - /* { - const asset::SPushConstantRange range = { - .stageFlags = IShader::E_SHADER_STAGE::ESS_FRAGMENT, - .offset = 0, - .size = sizeof(push_constants_t) - }; - auto layout = m_device->createPipelineLayout({ &range,1 }, nullptr, nullptr, nullptr, core::smart_refctd_ptr(dsLayout)); + // Load the shaders and create the pipeline + { + // Load FSTri Shader + ext::FullScreenTriangle::ProtoPipeline fsTriProtoPPln(m_assetMgr.get(), m_device.get(), m_logger.get()); + if (!fsTriProtoPPln) + return logFail("Failed to create Full Screen Triangle protopipeline or load its vertex shader!"); + + // Load Custom Shader + auto loadCompileAndCreateShader = [&](const std::string& relPath) -> smart_refctd_ptr + { + IAssetLoader::SAssetLoadParams lp = {}; + lp.logger = m_logger.get(); + lp.workingDirectory = ""; // virtual root + auto assetBundle = m_assetMgr->getAsset(relPath, lp); + const auto assets = assetBundle.getContents(); + if (assets.empty()) + return nullptr; + + // lets go straight from ICPUSpecializedShader to IGPUSpecializedShader + auto source = IAsset::castDown(assets[0]); + if (!source) + return nullptr; + + return m_device->createShader(source.get()); + }; + auto fragmentShader = loadCompileAndCreateShader("app_resources/present.frag.hlsl"); + if (!fragmentShader) + return logFail("Failed to Load and Compile Fragment Shader!"); + + auto layout = m_device->createPipelineLayout({}, nullptr, nullptr, nullptr, core::smart_refctd_ptr(dsLayout)); const IGPUShader::SSpecInfo fragSpec = { .entryPoint = "main", .shader = fragmentShader.get() @@ -164,7 +188,11 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public m_pipeline = fsTriProtoPPln.createPipeline(fragSpec, layout.get(), scResources->getRenderpass()); if (!m_pipeline) return logFail("Could not create Graphics Pipeline!"); - }*/ + } + + // Init the surface and create the swapchain + if (!m_surface || !m_surface->init(queue, std::move(scResources), swapchainParams.sharedParams)) + return logFail("Could not create Window & Surface or initialize the Surface!"); // need resetttable commandbuffers for the upload utility { @@ -326,6 +354,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public std::array, ISwapchain::MaxImages> m_descriptorSets; smart_refctd_ptr m_cmdPool; std::array, ISwapchain::MaxImages> m_cmdBufs; + smart_refctd_ptr m_pipeline; // window smart_refctd_ptr m_window; From d69a11179989b116f936a722300691273dc15e0d Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Fri, 26 Jul 2024 02:36:09 +0530 Subject: [PATCH 06/50] Set window size according to loaded image --- 26_Autoexposure/main.cpp | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/26_Autoexposure/main.cpp b/26_Autoexposure/main.cpp index cc048a3f5..2f49bda6d 100644 --- a/26_Autoexposure/main.cpp +++ b/26_Autoexposure/main.cpp @@ -275,6 +275,21 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public return false; m_gpuImg->setObjectDebugName("Autoexposure Image"); + // set window size + const auto imageExtent = m_gpuImg->getCreationParameters().extent; + const VkExtent2D newWindowResolution = { imageExtent.width, imageExtent.height }; + + if (newWindowResolution.width != m_window->getWidth() || newWindowResolution.height != m_window->getHeight()) + { + // Resize the window + m_winMgr->setWindowSize(m_window.get(), newWindowResolution.width, newWindowResolution.height); + // Don't want to rely on the Swapchain OUT_OF_DATE causing an implicit re-create in the `acquireNextImage` because the + // swapchain may report OUT_OF_DATE after the next VBlank after the resize, not getting the message right away. + m_surface->recreateSwapchain(); + } + // Now show the window (ideally should happen just after present, but don't want to mess with acquire/recreation) + m_winMgr->show(m_window.get()); + // we don't want to overcomplicate the example with multi-queue auto queue = getGraphicsQueue(); auto cmdbuf = m_cmdBufs[0].get(); @@ -314,7 +329,6 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public IGPUImage::LAYOUT::TRANSFER_DST_OPTIMAL, cpuImgParams.image->getRegions() ); - IGPUImageView::SCreationParams gpuImgViewParams = { .image = m_gpuImg, .viewType = IGPUImageView::ET_2D_ARRAY, @@ -322,6 +336,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public }; m_gpuImgView = m_device->createImageView(std::move(gpuImgViewParams)); + queue->endCapture(); } return true; @@ -334,7 +349,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public inline bool keepRunning() override { - return false; + return true; } inline bool onAppTerminated() override From 54bf38f6661b17a1fbfe6b4c30bc4b287bf45467 Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Fri, 26 Jul 2024 02:39:26 +0530 Subject: [PATCH 07/50] Stop running if window is closed --- 26_Autoexposure/main.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/26_Autoexposure/main.cpp b/26_Autoexposure/main.cpp index 2f49bda6d..95fcab225 100644 --- a/26_Autoexposure/main.cpp +++ b/26_Autoexposure/main.cpp @@ -349,6 +349,10 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public inline bool keepRunning() override { + // Keep arunning as long as we have a surface to present to (usually this means, as long as the window is open) + if (m_surface->irrecoverable()) + return false; + return true; } From 461efd36efd02b48b07e14b027f6e51d431d296c Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Fri, 26 Jul 2024 16:24:19 +0530 Subject: [PATCH 08/50] Acquire swapchain image and present uploaded image to it --- 26_Autoexposure/main.cpp | 142 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 138 insertions(+), 4 deletions(-) diff --git a/26_Autoexposure/main.cpp b/26_Autoexposure/main.cpp index 95fcab225..fbbc31524 100644 --- a/26_Autoexposure/main.cpp +++ b/26_Autoexposure/main.cpp @@ -8,7 +8,6 @@ #include "nbl/asset/interchange/IAssetLoader.h" #include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" - using namespace nbl; using namespace core; using namespace hlsl; @@ -95,6 +94,9 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public } + // Create semaphore + m_semaphore = m_device->createSemaphore(m_submitIx); + // create the descriptor set and with enough room for one image sampler { const uint32_t setCount = 1; @@ -107,6 +109,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public return logFail("Could not create Descriptor Set!"); } + auto ds = m_descriptorSets[0].get(); auto queue = getGraphicsQueue(); // Gather swapchain resources @@ -256,7 +259,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public const auto cpuImgView = ICPUImageView::create(std::move(viewParams)); const auto& cpuImgParams = cpuImgView->getCreationParameters(); - // create matching size image + // create matching size image upto dimensions IGPUImage::SCreationParams imageParams = {}; imageParams = cpuImgParams.image->getCreationParameters(); imageParams.usage |= IGPUImage::EUF_TRANSFER_DST_BIT | IGPUImage::EUF_SAMPLED_BIT | IGPUImage::E_USAGE_FLAGS::EUF_TRANSFER_SRC_BIT; @@ -304,7 +307,6 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public // upload image and write to descriptor set queue->startCapture(); - auto ds = m_descriptorSets[0].get(); cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); // change the layout of the image @@ -331,11 +333,28 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public ); IGPUImageView::SCreationParams gpuImgViewParams = { .image = m_gpuImg, - .viewType = IGPUImageView::ET_2D_ARRAY, + .viewType = IGPUImageView::ET_2D, .format = m_gpuImg->getCreationParameters().format }; m_gpuImgView = m_device->createImageView(std::move(gpuImgViewParams)); + + IGPUDescriptorSet::SDescriptorInfo info = {}; + info.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + info.desc = m_gpuImgView; + + IGPUDescriptorSet::SWriteDescriptorSet writeDescriptors[] = { + { + .dstSet = ds, + .binding = 0, + .arrayElement = 0, + .count = 1, + .info = &info + } + }; + + m_device->updateDescriptorSets(1, writeDescriptors, 0, nullptr); + queue->endCapture(); } @@ -345,6 +364,119 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public // We do a very simple thing, display an image and wait `DisplayImageMs` to show it inline void workLoopBody() override { + // Acquire + auto acquire = m_surface->acquireNextImage(); + if (!acquire) + return; + + auto queue = getGraphicsQueue(); + auto cmdbuf = m_cmdBufs[0].get(); + auto ds = m_descriptorSets[0].get(); + + // there's no previous operation to wait for + const SMemoryBarrier toTransferBarrier = { + .dstStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT, + .dstAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT + }; + const auto gpuImgCreationParams = m_gpuImg->getCreationParameters(); + const auto gpuImgViewCreationParams = m_gpuImgView->getCreationParameters(); + + queue->startCapture(); + // Render to the Image + { + cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + + // need a pipeline barrier to transition layout + const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers[] = { { + .barrier = { + .dep = toTransferBarrier.nextBarrier(PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT,ACCESS_FLAGS::SAMPLED_READ_BIT) + }, + .image = m_gpuImg.get(), + .subresourceRange = gpuImgViewCreationParams.subresourceRange, + .oldLayout = IGPUImage::LAYOUT::TRANSFER_DST_OPTIMAL, + .newLayout = IGPUImage::LAYOUT::READ_ONLY_OPTIMAL + } }; + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers }); + + const VkRect2D currentRenderArea = + { + .offset = {0,0}, + .extent = {gpuImgCreationParams.extent.width, gpuImgCreationParams.extent.height} + }; + // set viewport + { + const asset::SViewport viewport = + { + .width = float(gpuImgCreationParams.extent.width), + .height = float(gpuImgCreationParams.extent.height) + }; + cmdbuf->setViewport({ &viewport,1 }); + } + cmdbuf->setScissor({ ¤tRenderArea,1 }); + + // begin the renderpass + { + const IGPUCommandBuffer::SClearColorValue clearValue = { .float32 = {1.f,0.f,1.f,1.f} }; + auto scRes = static_cast(m_surface->getSwapchainResources()); + const IGPUCommandBuffer::SRenderpassBeginInfo info = { + .framebuffer = scRes->getFramebuffer(acquire.imageIndex), + .colorClearValues = &clearValue, + .depthStencilClearValues = nullptr, + .renderArea = currentRenderArea + }; + cmdbuf->beginRenderPass(info, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); + } + cmdbuf->bindGraphicsPipeline(m_pipeline.get()); + cmdbuf->bindDescriptorSets(nbl::asset::EPBP_GRAPHICS, m_pipeline->getLayout(), 3, 1, &ds); + ext::FullScreenTriangle::recordDrawCall(cmdbuf); + cmdbuf->endRenderPass(); + + cmdbuf->end(); + } + + // submit + const IQueue::SSubmitInfo::SSemaphoreInfo rendered[1] = { { + .semaphore = m_semaphore.get(), + .value = ++m_submitIx, + // just as we've outputted all pixels, signal + .stageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT + } }; + { + { + const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[1] = { { + .cmdbuf = cmdbuf + } }; + // we don't need to wait for the transfer semaphore, because we submit everything to the same queue + const IQueue::SSubmitInfo::SSemaphoreInfo acquired[1] = { { + .semaphore = acquire.semaphore, + .value = acquire.acquireCount, + .stageMask = PIPELINE_STAGE_FLAGS::NONE + } }; + const IQueue::SSubmitInfo infos[1] = { { + .waitSemaphores = acquired, + .commandBuffers = commandBuffers, + .signalSemaphores = rendered + } }; + // we won't signal the sema if no success + if (queue->submit(infos) != IQueue::RESULT::SUCCESS) + m_submitIx--; + } + } + + // Present + m_surface->present(acquire.imageIndex, rendered); + getGraphicsQueue()->endCapture(); + + { + const ISemaphore::SWaitInfo cmdbufDonePending[] = { + { + .semaphore = m_semaphore.get(), + .value = m_submitIx + } + }; + if (m_device->blockForSemaphores(cmdbufDonePending) != ISemaphore::WAIT_RESULT::SUCCESS) + return; + } } inline bool keepRunning() override @@ -374,6 +506,8 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public smart_refctd_ptr m_cmdPool; std::array, ISwapchain::MaxImages> m_cmdBufs; smart_refctd_ptr m_pipeline; + smart_refctd_ptr m_semaphore; + uint64_t m_submitIx = 0; // window smart_refctd_ptr m_window; From 734fea90599aefa5b431057eb7c8796854cac6ca Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Fri, 26 Jul 2024 22:03:54 +0530 Subject: [PATCH 09/50] Set window size directly and use that for swapchain rendering --- 26_Autoexposure/main.cpp | 53 +++++++++------------------------------- 1 file changed, 12 insertions(+), 41 deletions(-) diff --git a/26_Autoexposure/main.cpp b/26_Autoexposure/main.cpp index fbbc31524..d34be555c 100644 --- a/26_Autoexposure/main.cpp +++ b/26_Autoexposure/main.cpp @@ -25,6 +25,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public using clock_t = std::chrono::steady_clock; constexpr static inline std::string_view DefaultImagePathsFile = "../../media/noises/spp_benchmark_4k_512.exr"; + constexpr static inline std::array Dimensions = { 1280, 720 }; public: // Yay thanks to multiple inheritance we cannot forward ctors anymore @@ -40,8 +41,8 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public { IWindow::SCreationParams params = {}; params.callback = core::make_smart_refctd_ptr(); - params.width = 256; - params.height = 256; + params.width = Dimensions[0]; + params.height = Dimensions[1]; params.x = 32; params.y = 32; // Don't want to have a window lingering about before we're ready so create it hidden. @@ -278,19 +279,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public return false; m_gpuImg->setObjectDebugName("Autoexposure Image"); - // set window size - const auto imageExtent = m_gpuImg->getCreationParameters().extent; - const VkExtent2D newWindowResolution = { imageExtent.width, imageExtent.height }; - - if (newWindowResolution.width != m_window->getWidth() || newWindowResolution.height != m_window->getHeight()) - { - // Resize the window - m_winMgr->setWindowSize(m_window.get(), newWindowResolution.width, newWindowResolution.height); - // Don't want to rely on the Swapchain OUT_OF_DATE causing an implicit re-create in the `acquireNextImage` because the - // swapchain may report OUT_OF_DATE after the next VBlank after the resize, not getting the message right away. - m_surface->recreateSwapchain(); - } - // Now show the window (ideally should happen just after present, but don't want to mess with acquire/recreation) + // Now show the window m_winMgr->show(m_window.get()); // we don't want to overcomplicate the example with multi-queue @@ -373,46 +362,26 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public auto cmdbuf = m_cmdBufs[0].get(); auto ds = m_descriptorSets[0].get(); - // there's no previous operation to wait for - const SMemoryBarrier toTransferBarrier = { - .dstStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT, - .dstAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT - }; - const auto gpuImgCreationParams = m_gpuImg->getCreationParameters(); - const auto gpuImgViewCreationParams = m_gpuImgView->getCreationParameters(); - queue->startCapture(); - // Render to the Image + // Render to the swapchain { cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); - // need a pipeline barrier to transition layout - const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers[] = { { - .barrier = { - .dep = toTransferBarrier.nextBarrier(PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT,ACCESS_FLAGS::SAMPLED_READ_BIT) - }, - .image = m_gpuImg.get(), - .subresourceRange = gpuImgViewCreationParams.subresourceRange, - .oldLayout = IGPUImage::LAYOUT::TRANSFER_DST_OPTIMAL, - .newLayout = IGPUImage::LAYOUT::READ_ONLY_OPTIMAL - } }; - cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers }); - const VkRect2D currentRenderArea = { .offset = {0,0}, - .extent = {gpuImgCreationParams.extent.width, gpuImgCreationParams.extent.height} + .extent = { m_window->getWidth(), m_window->getHeight() } }; // set viewport { const asset::SViewport viewport = { - .width = float(gpuImgCreationParams.extent.width), - .height = float(gpuImgCreationParams.extent.height) + .width = float(m_window->getWidth()), + .height = float(m_window->getHeight()) }; - cmdbuf->setViewport({ &viewport,1 }); + cmdbuf->setViewport({ &viewport, 1 }); } - cmdbuf->setScissor({ ¤tRenderArea,1 }); + cmdbuf->setScissor({ ¤tRenderArea, 1 }); // begin the renderpass { @@ -426,6 +395,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public }; cmdbuf->beginRenderPass(info, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); } + cmdbuf->bindGraphicsPipeline(m_pipeline.get()); cmdbuf->bindDescriptorSets(nbl::asset::EPBP_GRAPHICS, m_pipeline->getLayout(), 3, 1, &ds); ext::FullScreenTriangle::recordDrawCall(cmdbuf); @@ -467,6 +437,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public m_surface->present(acquire.imageIndex, rendered); getGraphicsQueue()->endCapture(); + // Wait for completion { const ISemaphore::SWaitInfo cmdbufDonePending[] = { { From 4a117243aa4cb096ee59a140617e88e7e6532f31 Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Mon, 5 Aug 2024 19:02:28 +0530 Subject: [PATCH 10/50] m_computeSubgroupSize --- 26_Autoexposure/app_resources/present.frag.hlsl | 2 ++ 26_Autoexposure/main.cpp | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/26_Autoexposure/app_resources/present.frag.hlsl b/26_Autoexposure/app_resources/present.frag.hlsl index fcddeb743..4f76de4cd 100644 --- a/26_Autoexposure/app_resources/present.frag.hlsl +++ b/26_Autoexposure/app_resources/present.frag.hlsl @@ -8,6 +8,8 @@ #include using namespace nbl::hlsl::ext::FullScreenTriangle; +#include + [[vk::combinedImageSampler]] [[vk::binding(0, 3)]] Texture2D texture; [[vk::combinedImageSampler]] [[vk::binding(0, 3)]] SamplerState samplerState; diff --git a/26_Autoexposure/main.cpp b/26_Autoexposure/main.cpp index d34be555c..b285c930f 100644 --- a/26_Autoexposure/main.cpp +++ b/26_Autoexposure/main.cpp @@ -347,12 +347,15 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public queue->endCapture(); } + m_computeSubgroupSize = m_physicalDevice->getLimits().maxComputeWorkgroupSubgroups; + return true; } // We do a very simple thing, display an image and wait `DisplayImageMs` to show it inline void workLoopBody() override { + // Acquire auto acquire = m_surface->acquireNextImage(); if (!acquire) @@ -483,6 +486,9 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public // window smart_refctd_ptr m_window; smart_refctd_ptr> m_surface; + + // constants + uint32_t m_computeSubgroupSize = 0; }; NBL_MAIN_FUNC(AutoexposureApp) From 7d4895a7a5287d6e3912657d168fdf385c39ec38 Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Fri, 9 Aug 2024 03:10:32 +0530 Subject: [PATCH 11/50] Allocate buffer for gathered luma values --- 26_Autoexposure/app_resources/common.hlsl | 13 ++++ .../app_resources/luma_meter.comp.hlsl | 17 ++++++ .../app_resources/present.frag.hlsl | 2 - .../app_resources/tonemap.comp.hlsl | 17 ++++++ 26_Autoexposure/main.cpp | 60 +++++++++++++------ 5 files changed, 89 insertions(+), 20 deletions(-) create mode 100644 26_Autoexposure/app_resources/common.hlsl create mode 100644 26_Autoexposure/app_resources/luma_meter.comp.hlsl create mode 100644 26_Autoexposure/app_resources/tonemap.comp.hlsl diff --git a/26_Autoexposure/app_resources/common.hlsl b/26_Autoexposure/app_resources/common.hlsl new file mode 100644 index 000000000..f2b21b7e4 --- /dev/null +++ b/26_Autoexposure/app_resources/common.hlsl @@ -0,0 +1,13 @@ +// Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _AUTOEXPOSURE_COMMON_INCLUDED_ +#define _AUTOEXPOSURE_COMMON_INCLUDED_ + +struct AutoexposurePushData +{ + uint32_t viewportSizeX, viewportSizeY; +}; + +#endif \ No newline at end of file diff --git a/26_Autoexposure/app_resources/luma_meter.comp.hlsl b/26_Autoexposure/app_resources/luma_meter.comp.hlsl new file mode 100644 index 000000000..4a0797d6d --- /dev/null +++ b/26_Autoexposure/app_resources/luma_meter.comp.hlsl @@ -0,0 +1,17 @@ +// Copyright (C) 2024-2024 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "app_resources/common.hlsl" + +[[vk::push_constant]] AutoexposurePushData pushData; + +uint32_t3 nbl::hlsl::glsl::gl_WorkGroupSize() +{ + return uint32_t3(WorkgroupSize, 1, 1); +} + +[numthreads(SubgroupSize, SubgroupSize, 1)] +void main(uint32_t3 ID : SV_GroupThreadID, uint32_t3 GroupID : SV_GroupID) +{ +} \ No newline at end of file diff --git a/26_Autoexposure/app_resources/present.frag.hlsl b/26_Autoexposure/app_resources/present.frag.hlsl index 4f76de4cd..fcddeb743 100644 --- a/26_Autoexposure/app_resources/present.frag.hlsl +++ b/26_Autoexposure/app_resources/present.frag.hlsl @@ -8,8 +8,6 @@ #include using namespace nbl::hlsl::ext::FullScreenTriangle; -#include - [[vk::combinedImageSampler]] [[vk::binding(0, 3)]] Texture2D texture; [[vk::combinedImageSampler]] [[vk::binding(0, 3)]] SamplerState samplerState; diff --git a/26_Autoexposure/app_resources/tonemap.comp.hlsl b/26_Autoexposure/app_resources/tonemap.comp.hlsl new file mode 100644 index 000000000..4a0797d6d --- /dev/null +++ b/26_Autoexposure/app_resources/tonemap.comp.hlsl @@ -0,0 +1,17 @@ +// Copyright (C) 2024-2024 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "app_resources/common.hlsl" + +[[vk::push_constant]] AutoexposurePushData pushData; + +uint32_t3 nbl::hlsl::glsl::gl_WorkGroupSize() +{ + return uint32_t3(WorkgroupSize, 1, 1); +} + +[numthreads(SubgroupSize, SubgroupSize, 1)] +void main(uint32_t3 ID : SV_GroupThreadID, uint32_t3 GroupID : SV_GroupID) +{ +} \ No newline at end of file diff --git a/26_Autoexposure/main.cpp b/26_Autoexposure/main.cpp index b285c930f..b31984844 100644 --- a/26_Autoexposure/main.cpp +++ b/26_Autoexposure/main.cpp @@ -227,6 +227,46 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public }; } + // Allocate and create buffer for Luma Gather + { + // Allocate memory + nbl::video::IDeviceMemoryAllocator::SAllocation allocation = {}; + smart_refctd_ptr buffer; + //smart_refctd_ptr ds; + { + auto build_buffer = [this]( + smart_refctd_ptr m_device, + nbl::video::IDeviceMemoryAllocator::SAllocation* allocation, + smart_refctd_ptr& buffer, + size_t buffer_size, + const char* label) + { + IGPUBuffer::SCreationParams params; + params.size = buffer_size; + params.usage = IGPUBuffer::EUF_STORAGE_BUFFER_BIT | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; + buffer = m_device->createBuffer(std::move(params)); + if (!buffer) + return logFail("Failed to create GPU buffer of size %d!\n", buffer_size); + + buffer->setObjectDebugName(label); + + auto reqs = buffer->getMemoryReqs(); + reqs.memoryTypeBits &= m_physicalDevice->getHostVisibleMemoryTypeBits(); + + *allocation = m_device->allocate(reqs, buffer.get(), IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT); + if (!allocation->isValid()) + return logFail("Failed to allocate Device Memory compatible with our GPU Buffer!\n"); + + assert(allocation->memory.get() == buffer->getBoundMemory().memory); + }; + + auto x = m_physicalDevice->getLimits(); + + build_buffer(m_device, &allocation, buffer, m_physicalDevice->getLimits().maxSubgroupSize, "Luma Gather Buffer"); + } + m_lumaGatherBDA = buffer->getDeviceAddress(); + } + // Allocate and Leave 1/4 for image uploads, to test image copy with small memory remaining { uint32_t localOffset = video::StreamingTransientDataBufferMT<>::invalid_value; @@ -347,8 +387,6 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public queue->endCapture(); } - m_computeSubgroupSize = m_physicalDevice->getLimits().maxComputeWorkgroupSubgroups; - return true; } @@ -487,8 +525,8 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public smart_refctd_ptr m_window; smart_refctd_ptr> m_surface; - // constants - uint32_t m_computeSubgroupSize = 0; + // luma gather + uint64_t m_lumaGatherBDA; }; NBL_MAIN_FUNC(AutoexposureApp) @@ -531,20 +569,6 @@ int main() if (!device) return 1; // could not create selected driver. - QToQuitEventReceiver receiver; - device->setEventReceiver(&receiver); - - IVideoDriver* driver = device->getVideoDriver(); - - nbl::io::IFileSystem* filesystem = device->getFileSystem(); - IAssetManager* am = device->getAssetManager(); - - IAssetLoader::SAssetLoadParams lp; - auto imageBundle = am->getAsset("../../media/noises/spp_benchmark_4k_512.exr", lp); - - auto glslCompiler = am->getCompilerSet(); - const auto inputColorSpace = std::make_tuple(inFormat,ECP_SRGB,EOTF_IDENTITY); - using LumaMeterClass = ext::LumaMeter::CLumaMeter; constexpr auto MeterMode = LumaMeterClass::EMM_MEDIAN; const float minLuma = 1.f/2048.f; From 0e3e125bac2a53ab1692257db53f184b209417c4 Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Fri, 9 Aug 2024 17:59:14 +0530 Subject: [PATCH 12/50] Create gpu resources for all passes --- 26_Autoexposure/main.cpp | 132 +++++++++++++++++++++++++-------------- 1 file changed, 86 insertions(+), 46 deletions(-) diff --git a/26_Autoexposure/main.cpp b/26_Autoexposure/main.cpp index b31984844..23717516f 100644 --- a/26_Autoexposure/main.cpp +++ b/26_Autoexposure/main.cpp @@ -74,43 +74,74 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public * Samplers for combined image samplers can also be mutable, which for a binding of a descriptor set is specified also at creation time by leaving the immutableSamplers * field set to its default (nullptr). */ - smart_refctd_ptr dsLayout; + smart_refctd_ptr lumaPresentDSLayout, tonemapperDSLayout; { - auto defaultSampler = m_device->createSampler({ - .AnisotropicFilter = 0 - }); - - const IGPUDescriptorSetLayout::SBinding bindings[1] = { { - .binding = 0, - .type = IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, - .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = IShader::E_SHADER_STAGE::ESS_FRAGMENT, - .count = 1, - .immutableSamplers = &defaultSampler - } + auto defaultSampler = m_device->createSampler( + { + .AnisotropicFilter = 0 + } + ); + + const IGPUDescriptorSetLayout::SBinding lumaPresentBindings[1] = { + { + .binding = 0, + .type = IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_FRAGMENT | IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1, + .immutableSamplers = &defaultSampler + } }; - dsLayout = m_device->createDescriptorSetLayout(bindings); - if (!dsLayout) - return logFail("Failed to Create Descriptor Layout"); + lumaPresentDSLayout = m_device->createDescriptorSetLayout(lumaPresentBindings); + if (!lumaPresentDSLayout) + return logFail("Failed to Create Descriptor Layout: lumaPresentDSLayout"); + const IGPUDescriptorSetLayout::SBinding tonemapperBindings[1] = { + { + .binding = 1, + .type = IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1, + .immutableSamplers = &defaultSampler + } + }; + tonemapperDSLayout = m_device->createDescriptorSetLayout(tonemapperBindings); + if (!tonemapperDSLayout) + return logFail("Failed to Create Descriptor Layout: tonemapperDSLayout"); } - // Create semaphore - m_semaphore = m_device->createSemaphore(m_submitIx); + // Create semaphores + m_lumaMeterSemaphore = m_device->createSemaphore(m_submitIx); + m_tonemapperSemaphore = m_device->createSemaphore(m_submitIx); + m_presentSemaphore = m_device->createSemaphore(m_submitIx); - // create the descriptor set and with enough room for one image sampler + // create the descriptor sets and with enough room { - const uint32_t setCount = 1; - auto pool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_NONE, { &dsLayout.get(),1 }, &setCount); - if (!pool) - return logFail("Failed to Create Descriptor Pool"); - - m_descriptorSets[0] = pool->createDescriptorSet(core::smart_refctd_ptr(dsLayout)); - if (!m_descriptorSets[0]) - return logFail("Could not create Descriptor Set!"); + constexpr uint32_t lumaPresentSetCount = 2, tonemapperSetCount = 1; + auto lumaPresentPool = m_device->createDescriptorPoolForDSLayouts( + IDescriptorPool::E_CREATE_FLAGS::ECF_NONE, + { &lumaPresentDSLayout.get(), 1 }, + &lumaPresentSetCount + ); + auto tonemapperPool = m_device->createDescriptorPoolForDSLayouts( + IDescriptorPool::E_CREATE_FLAGS::ECF_NONE, + { &tonemapperDSLayout.get(), 1 }, + &tonemapperSetCount + ); + + if (!lumaPresentPool || !tonemapperPool) + return logFail("Failed to Create Descriptor Pools"); + + m_lumaPresentDS[0] = lumaPresentPool->createDescriptorSet(core::smart_refctd_ptr(lumaPresentDSLayout)); + if (!m_lumaPresentDS[0]) + return logFail("Could not create Descriptor Set: lumaPresentDS!"); + m_tonemapperDS[0] = tonemapperPool->createDescriptorSet(core::smart_refctd_ptr(tonemapperDSLayout)); + if (!m_tonemapperDS[0]) + return logFail("Could not create Descriptor Set: tonemapperDS!"); + } - auto ds = m_descriptorSets[0].get(); auto queue = getGraphicsQueue(); // Gather swapchain resources @@ -184,13 +215,13 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public if (!fragmentShader) return logFail("Failed to Load and Compile Fragment Shader!"); - auto layout = m_device->createPipelineLayout({}, nullptr, nullptr, nullptr, core::smart_refctd_ptr(dsLayout)); + auto layout = m_device->createPipelineLayout({}, nullptr, nullptr, nullptr, core::smart_refctd_ptr(lumaPresentDSLayout)); const IGPUShader::SSpecInfo fragSpec = { .entryPoint = "main", .shader = fragmentShader.get() }; - m_pipeline = fsTriProtoPPln.createPipeline(fragSpec, layout.get(), scResources->getRenderpass()); - if (!m_pipeline) + m_presentPipeline = fsTriProtoPPln.createPipeline(fragSpec, layout.get(), scResources->getRenderpass()); + if (!m_presentPipeline) return logFail("Could not create Graphics Pipeline!"); } @@ -374,7 +405,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public IGPUDescriptorSet::SWriteDescriptorSet writeDescriptors[] = { { - .dstSet = ds, + .dstSet = m_lumaPresentDS[0].get(), .binding = 0, .arrayElement = 0, .count = 1, @@ -387,6 +418,10 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public queue->endCapture(); } + // Allocate and create texture for tonemapping + { + } + return true; } @@ -401,7 +436,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public auto queue = getGraphicsQueue(); auto cmdbuf = m_cmdBufs[0].get(); - auto ds = m_descriptorSets[0].get(); + auto ds = m_lumaPresentDS[0].get(); queue->startCapture(); // Render to the swapchain @@ -437,8 +472,8 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public cmdbuf->beginRenderPass(info, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); } - cmdbuf->bindGraphicsPipeline(m_pipeline.get()); - cmdbuf->bindDescriptorSets(nbl::asset::EPBP_GRAPHICS, m_pipeline->getLayout(), 3, 1, &ds); + cmdbuf->bindGraphicsPipeline(m_presentPipeline.get()); + cmdbuf->bindDescriptorSets(nbl::asset::EPBP_GRAPHICS, m_presentPipeline->getLayout(), 3, 1, &ds); ext::FullScreenTriangle::recordDrawCall(cmdbuf); cmdbuf->endRenderPass(); @@ -447,7 +482,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public // submit const IQueue::SSubmitInfo::SSemaphoreInfo rendered[1] = { { - .semaphore = m_semaphore.get(), + .semaphore = m_presentSemaphore.get(), .value = ++m_submitIx, // just as we've outputted all pixels, signal .stageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT @@ -482,7 +517,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public { const ISemaphore::SWaitInfo cmdbufDonePending[] = { { - .semaphore = m_semaphore.get(), + .semaphore = m_presentSemaphore.get(), .value = m_submitIx } }; @@ -506,27 +541,32 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public } protected: - smart_refctd_ptr m_gpuImg; - smart_refctd_ptr m_gpuImgView; + uint64_t m_lumaGatherBDA; + smart_refctd_ptr m_gpuImg, m_gpuTonemapImg; + smart_refctd_ptr m_gpuImgView, m_gpuTonemapImgView; // for image uploads smart_refctd_ptr m_scratchSemaphore; SIntendedSubmitInfo m_intendedSubmit; - // Command Buffers and other resources - std::array, ISwapchain::MaxImages> m_descriptorSets; + // Pipelines + smart_refctd_ptr m_presentPipeline; + smart_refctd_ptr m_lumaMeterPipeline, m_tonemapperPipeline; + + // Descriptor Sets + std::array, ISwapchain::MaxImages> m_lumaPresentDS, m_tonemapperDS; + + // Command Buffers smart_refctd_ptr m_cmdPool; std::array, ISwapchain::MaxImages> m_cmdBufs; - smart_refctd_ptr m_pipeline; - smart_refctd_ptr m_semaphore; + + // Semaphores + smart_refctd_ptr m_lumaMeterSemaphore, m_tonemapperSemaphore, m_presentSemaphore; uint64_t m_submitIx = 0; // window smart_refctd_ptr m_window; smart_refctd_ptr> m_surface; - - // luma gather - uint64_t m_lumaGatherBDA; }; NBL_MAIN_FUNC(AutoexposureApp) From cef80b3e5b961029344c4516cd5682ad2254cab7 Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Fri, 9 Aug 2024 20:06:57 +0530 Subject: [PATCH 13/50] Create shaders and pipelines --- .../app_resources/luma_meter.comp.hlsl | 3 +- ...tonemap.comp.hlsl => tonemapper.comp.hlsl} | 8 +- 26_Autoexposure/main.cpp | 97 ++++++++++++++----- 3 files changed, 78 insertions(+), 30 deletions(-) rename 26_Autoexposure/app_resources/{tonemap.comp.hlsl => tonemapper.comp.hlsl} (72%) diff --git a/26_Autoexposure/app_resources/luma_meter.comp.hlsl b/26_Autoexposure/app_resources/luma_meter.comp.hlsl index 4a0797d6d..0902baa59 100644 --- a/26_Autoexposure/app_resources/luma_meter.comp.hlsl +++ b/26_Autoexposure/app_resources/luma_meter.comp.hlsl @@ -2,6 +2,7 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h +#include "nbl/builtin/hlsl/luma_meter/luma_meter.hlsl" #include "app_resources/common.hlsl" [[vk::push_constant]] AutoexposurePushData pushData; @@ -11,7 +12,7 @@ uint32_t3 nbl::hlsl::glsl::gl_WorkGroupSize() return uint32_t3(WorkgroupSize, 1, 1); } -[numthreads(SubgroupSize, SubgroupSize, 1)] +[numthreads(DeviceSubgroupSize, DeviceSubgroupSize, 1)] void main(uint32_t3 ID : SV_GroupThreadID, uint32_t3 GroupID : SV_GroupID) { } \ No newline at end of file diff --git a/26_Autoexposure/app_resources/tonemap.comp.hlsl b/26_Autoexposure/app_resources/tonemapper.comp.hlsl similarity index 72% rename from 26_Autoexposure/app_resources/tonemap.comp.hlsl rename to 26_Autoexposure/app_resources/tonemapper.comp.hlsl index 4a0797d6d..15b543469 100644 --- a/26_Autoexposure/app_resources/tonemap.comp.hlsl +++ b/26_Autoexposure/app_resources/tonemapper.comp.hlsl @@ -2,16 +2,12 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h +#include "nbl/builtin/hlsl/glsl_compat/core.hlsl" #include "app_resources/common.hlsl" [[vk::push_constant]] AutoexposurePushData pushData; -uint32_t3 nbl::hlsl::glsl::gl_WorkGroupSize() -{ - return uint32_t3(WorkgroupSize, 1, 1); -} - -[numthreads(SubgroupSize, SubgroupSize, 1)] +[numthreads(DeviceSubgroupSize, DeviceSubgroupSize, 1)] void main(uint32_t3 ID : SV_GroupThreadID, uint32_t3 GroupID : SV_GroupID) { } \ No newline at end of file diff --git a/26_Autoexposure/main.cpp b/26_Autoexposure/main.cpp index 23717516f..aaf2ecf80 100644 --- a/26_Autoexposure/main.cpp +++ b/26_Autoexposure/main.cpp @@ -8,6 +8,8 @@ #include "nbl/asset/interchange/IAssetLoader.h" #include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" +#include "app_resources/common.hlsl" + using namespace nbl; using namespace core; using namespace hlsl; @@ -186,41 +188,90 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public return logFail("Failed to create Renderpass!"); } - // Load the shaders and create the pipeline + // Load the shaders and create the pipelines { + auto loadCompileAndCreateShader = [&](const std::string& relPath) -> smart_refctd_ptr + { + IAssetLoader::SAssetLoadParams lp = {}; + lp.logger = m_logger.get(); + lp.workingDirectory = ""; // virtual root + auto assetBundle = m_assetMgr->getAsset(relPath, lp); + const auto assets = assetBundle.getContents(); + if (assets.empty()) + return nullptr; + + // lets go straight from ICPUSpecializedShader to IGPUSpecializedShader + auto source = IAsset::castDown(assets[0]); + if (!source) + return nullptr; + const uint32_t workgroupSize = m_physicalDevice->getLimits().maxComputeWorkGroupInvocations; + const uint32_t subgroupSize = m_physicalDevice->getLimits().maxSubgroupSize; + auto overriddenSource = CHLSLCompiler::createOverridenCopy( + source.get(), + "#define WorkgroupSize %d\n#define DeviceSubgroupSize %d\n", + workgroupSize, + subgroupSize + ); + + return m_device->createShader(overriddenSource.get()); + }; + + auto createComputePipeline = [&](smart_refctd_ptr shader, smart_refctd_ptr pipeline) -> bool + { + const nbl::asset::SPushConstantRange pcRange = { + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .offset = 0, + .size = sizeof(AutoexposurePushData) + }; + + smart_refctd_ptr layout; + { + layout = m_device->createPipelineLayout({ &pcRange,1 }); + IGPUComputePipeline::SCreationParams params = {}; + params.layout = layout.get(); + params.shader.shader = shader.get(); + params.shader.entryPoint = "main"; + params.shader.entries = nullptr; + params.shader.requireFullSubgroups = true; + params.shader.requiredSubgroupSize = static_cast(5); + if (!m_device->createComputePipelines(nullptr, { ¶ms,1 }, &pipeline)) + return logFail("Failed to create compute pipeline!\n"); + } + + return true; + }; + + // Luma Meter + auto lumaMeterShader = loadCompileAndCreateShader("app_resources/luma_meter.comp.hlsl"); + if (!lumaMeterShader) + return logFail("Failed to Load and Compile Compute Shader: lumaMeterShader!"); + auto lumaPresentLayout = m_device->createPipelineLayout({}, nullptr, nullptr, nullptr, core::smart_refctd_ptr(lumaPresentDSLayout)); + if (!createComputePipeline(lumaMeterShader, m_lumaMeterPipeline)) + return logFail("Could not create Luma Meter Pipeline!"); + + // Tonemapper + auto tonemapperShader = loadCompileAndCreateShader("app_resources/tonemapper.comp.hlsl"); + if (!tonemapperShader) + return logFail("Failed to Load and Compile Compute Shader: tonemapperShader!"); + auto tonemapperLayout = m_device->createPipelineLayout({}, nullptr, nullptr, nullptr, core::smart_refctd_ptr(tonemapperDSLayout)); + if (!createComputePipeline(tonemapperShader, m_tonemapperPipeline)) + return logFail("Could not create Luma Meter Pipeline!"); + // Load FSTri Shader ext::FullScreenTriangle::ProtoPipeline fsTriProtoPPln(m_assetMgr.get(), m_device.get(), m_logger.get()); if (!fsTriProtoPPln) return logFail("Failed to create Full Screen Triangle protopipeline or load its vertex shader!"); - // Load Custom Shader - auto loadCompileAndCreateShader = [&](const std::string& relPath) -> smart_refctd_ptr - { - IAssetLoader::SAssetLoadParams lp = {}; - lp.logger = m_logger.get(); - lp.workingDirectory = ""; // virtual root - auto assetBundle = m_assetMgr->getAsset(relPath, lp); - const auto assets = assetBundle.getContents(); - if (assets.empty()) - return nullptr; - - // lets go straight from ICPUSpecializedShader to IGPUSpecializedShader - auto source = IAsset::castDown(assets[0]); - if (!source) - return nullptr; - - return m_device->createShader(source.get()); - }; - auto fragmentShader = loadCompileAndCreateShader("app_resources/present.frag.hlsl"); + // Load Fragment Shader + auto fragmentShader = loadCompileAndCreateShader("app_resources/present.frag.hlsl");; if (!fragmentShader) - return logFail("Failed to Load and Compile Fragment Shader!"); + return logFail("Failed to Load and Compile Fragment Shader: lumaMeterShader!"); - auto layout = m_device->createPipelineLayout({}, nullptr, nullptr, nullptr, core::smart_refctd_ptr(lumaPresentDSLayout)); const IGPUShader::SSpecInfo fragSpec = { .entryPoint = "main", .shader = fragmentShader.get() }; - m_presentPipeline = fsTriProtoPPln.createPipeline(fragSpec, layout.get(), scResources->getRenderpass()); + m_presentPipeline = fsTriProtoPPln.createPipeline(fragSpec, lumaPresentLayout.get(), scResources->getRenderpass()); if (!m_presentPipeline) return logFail("Could not create Graphics Pipeline!"); } From 15e489f2bcfa1407b9e831452178abb6c97d22b8 Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Mon, 12 Aug 2024 17:02:52 +0530 Subject: [PATCH 14/50] Allocate and create texture for tonemapping --- 26_Autoexposure/main.cpp | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/26_Autoexposure/main.cpp b/26_Autoexposure/main.cpp index aaf2ecf80..77636304b 100644 --- a/26_Autoexposure/main.cpp +++ b/26_Autoexposure/main.cpp @@ -471,6 +471,30 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public // Allocate and create texture for tonemapping { + IGPUImage::SCreationParams imageParams = {}; + imageParams = m_gpuImg->getCreationParameters(); + // promote format because RGB8 and friends don't actually exist in HW + { + const IPhysicalDevice::SImageFormatPromotionRequest request = { + .originalFormat = imageParams.format, + .usages = IPhysicalDevice::SFormatImageUsages::SUsage(imageParams.usage) + }; + imageParams.format = m_physicalDevice->promoteImageFormat(request, imageParams.tiling); + } + if (imageParams.type == IGPUImage::ET_3D) + imageParams.flags |= IGPUImage::ECF_2D_ARRAY_COMPATIBLE_BIT; + m_gpuTonemapImg = m_device->createImage(std::move(imageParams)); + if (!m_gpuTonemapImg || !m_device->allocate(m_gpuTonemapImg->getMemoryReqs(), m_gpuTonemapImg.get()).isValid()) + return false; + m_gpuTonemapImg->setObjectDebugName("Autoexposure Tonemapper Image"); + + IGPUImageView::SCreationParams gpuTonemapImgViewParams = { + .image = m_gpuTonemapImg, + .viewType = IGPUImageView::ET_2D, + .format = m_gpuTonemapImg->getCreationParameters().format + }; + + m_gpuTonemapImgView = m_device->createImageView(std::move(gpuTonemapImgViewParams)); } return true; @@ -479,7 +503,6 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public // We do a very simple thing, display an image and wait `DisplayImageMs` to show it inline void workLoopBody() override { - // Acquire auto acquire = m_surface->acquireNextImage(); if (!acquire) From c646c7d6f22247f74ab82877e3302250a20966b0 Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Mon, 12 Aug 2024 21:09:04 +0530 Subject: [PATCH 15/50] Create separate ds for luma and present --- 26_Autoexposure/main.cpp | 58 +++++++++++++++++++++++----------------- 1 file changed, 34 insertions(+), 24 deletions(-) diff --git a/26_Autoexposure/main.cpp b/26_Autoexposure/main.cpp index 77636304b..baa031dd4 100644 --- a/26_Autoexposure/main.cpp +++ b/26_Autoexposure/main.cpp @@ -136,7 +136,8 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public return logFail("Failed to Create Descriptor Pools"); m_lumaPresentDS[0] = lumaPresentPool->createDescriptorSet(core::smart_refctd_ptr(lumaPresentDSLayout)); - if (!m_lumaPresentDS[0]) + m_lumaPresentDS[1] = lumaPresentPool->createDescriptorSet(core::smart_refctd_ptr(lumaPresentDSLayout)); + if (!m_lumaPresentDS[0] || !m_lumaPresentDS[1]) return logFail("Could not create Descriptor Set: lumaPresentDS!"); m_tonemapperDS[0] = tonemapperPool->createDescriptorSet(core::smart_refctd_ptr(tonemapperDSLayout)); if (!m_tonemapperDS[0]) @@ -450,28 +451,8 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public m_gpuImgView = m_device->createImageView(std::move(gpuImgViewParams)); - IGPUDescriptorSet::SDescriptorInfo info = {}; - info.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; - info.desc = m_gpuImgView; - - IGPUDescriptorSet::SWriteDescriptorSet writeDescriptors[] = { - { - .dstSet = m_lumaPresentDS[0].get(), - .binding = 0, - .arrayElement = 0, - .count = 1, - .info = &info - } - }; - - m_device->updateDescriptorSets(1, writeDescriptors, 0, nullptr); - - queue->endCapture(); - } - - // Allocate and create texture for tonemapping - { - IGPUImage::SCreationParams imageParams = {}; + // Allocate and create texture for tonemapping + imageParams = {}; imageParams = m_gpuImg->getCreationParameters(); // promote format because RGB8 and friends don't actually exist in HW { @@ -495,6 +476,35 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public }; m_gpuTonemapImgView = m_device->createImageView(std::move(gpuTonemapImgViewParams)); + + IGPUDescriptorSet::SDescriptorInfo info1 = {}; + info1.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + info1.desc = m_gpuImgView; + + IGPUDescriptorSet::SDescriptorInfo info2 = {}; + info2.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + info2.desc = m_gpuImgView; + + IGPUDescriptorSet::SWriteDescriptorSet writeDescriptors[] = { + { + .dstSet = m_lumaPresentDS[0].get(), + .binding = 0, + .arrayElement = 0, + .count = 1, + .info = &info1 + }, + { + .dstSet = m_lumaPresentDS[1].get(), + .binding = 0, + .arrayElement = 0, + .count = 1, + .info = &info2 + } + }; + + m_device->updateDescriptorSets(2, writeDescriptors, 0, nullptr); + + queue->endCapture(); } return true; @@ -510,7 +520,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public auto queue = getGraphicsQueue(); auto cmdbuf = m_cmdBufs[0].get(); - auto ds = m_lumaPresentDS[0].get(); + auto ds = m_lumaPresentDS[1].get(); queue->startCapture(); // Render to the swapchain From 36d70978bd1a180245b26d8d62871c335f23ba93 Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Tue, 13 Aug 2024 14:19:11 +0530 Subject: [PATCH 16/50] Record luma meter commands --- .../app_resources/luma_meter.comp.hlsl | 3 + 26_Autoexposure/main.cpp | 59 ++++++++++++------- 2 files changed, 40 insertions(+), 22 deletions(-) diff --git a/26_Autoexposure/app_resources/luma_meter.comp.hlsl b/26_Autoexposure/app_resources/luma_meter.comp.hlsl index 0902baa59..e7d080da2 100644 --- a/26_Autoexposure/app_resources/luma_meter.comp.hlsl +++ b/26_Autoexposure/app_resources/luma_meter.comp.hlsl @@ -5,6 +5,9 @@ #include "nbl/builtin/hlsl/luma_meter/luma_meter.hlsl" #include "app_resources/common.hlsl" +[[vk::combinedImageSampler]] [[vk::binding(0)]] Texture2D texture; +[[vk::combinedImageSampler]] [[vk::binding(0)]] SamplerState samplerState; + [[vk::push_constant]] AutoexposurePushData pushData; uint32_t3 nbl::hlsl::glsl::gl_WorkGroupSize() diff --git a/26_Autoexposure/main.cpp b/26_Autoexposure/main.cpp index baa031dd4..6f214c7fe 100644 --- a/26_Autoexposure/main.cpp +++ b/26_Autoexposure/main.cpp @@ -28,6 +28,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public constexpr static inline std::string_view DefaultImagePathsFile = "../../media/noises/spp_benchmark_4k_512.exr"; constexpr static inline std::array Dimensions = { 1280, 720 }; + constexpr static inline std::array SampleCount = { 10000, 10000 }; public: // Yay thanks to multiple inheritance we cannot forward ctors anymore @@ -100,7 +101,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public const IGPUDescriptorSetLayout::SBinding tonemapperBindings[1] = { { - .binding = 1, + .binding = 0, .type = IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, @@ -217,7 +218,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public return m_device->createShader(overriddenSource.get()); }; - auto createComputePipeline = [&](smart_refctd_ptr shader, smart_refctd_ptr pipeline) -> bool + auto createComputePipeline = [&](smart_refctd_ptr& shader, smart_refctd_ptr& pipeline) -> bool { const nbl::asset::SPushConstantRange pcRange = { .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, @@ -287,7 +288,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public // create the commandbuffers if (!m_cmdPool) return logFail("Couldn't create Command Pool!"); - if (!m_cmdPool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { m_cmdBufs.data(), 1 })) + if (!m_cmdPool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { m_cmdBufs.data(), 3 })) return logFail("Couldn't create Command Buffer!"); } @@ -301,6 +302,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public m_intendedSubmit.queue = queue; // wait for nothing before upload m_intendedSubmit.waitSemaphores = {}; + m_intendedSubmit.waitSemaphores = {}; // fill later m_intendedSubmit.commandBuffers = {}; m_intendedSubmit.scratchSemaphore = { @@ -514,19 +516,32 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public inline void workLoopBody() override { // Acquire - auto acquire = m_surface->acquireNextImage(); - if (!acquire) - return; + //auto acquire = m_surface->acquireNextImage(); + //if (!acquire) + // return; - auto queue = getGraphicsQueue(); - auto cmdbuf = m_cmdBufs[0].get(); - auto ds = m_lumaPresentDS[1].get(); - - queue->startCapture(); - // Render to the swapchain + // Luma Meter { + auto queue = getComputeQueue(); + auto cmdbuf = m_cmdBufs[0].get(); + auto ds = m_lumaPresentDS[0].get(); + + const uint32_t SubgroupSize = m_physicalDevice->getLimits().maxSubgroupSize; + + queue->startCapture(); + cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + cmdbuf->bindComputePipeline(m_lumaMeterPipeline.get()); + cmdbuf->bindDescriptorSets(nbl::asset::EPBP_GRAPHICS, m_lumaMeterPipeline->getLayout(), 0, 1, &ds); + cmdbuf->dispatch(1 + (SampleCount[0] - 1) / SubgroupSize, 1 + (SampleCount[1] - 1) / SubgroupSize); + cmdbuf->end(); + } + + // Render to the swapchain + /*{ + cmdbuf3->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + const VkRect2D currentRenderArea = { .offset = {0,0}, @@ -539,9 +554,9 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public .width = float(m_window->getWidth()), .height = float(m_window->getHeight()) }; - cmdbuf->setViewport({ &viewport, 1 }); + cmdbuf3->setViewport({ &viewport, 1 }); } - cmdbuf->setScissor({ ¤tRenderArea, 1 }); + cmdbuf3->setScissor({ ¤tRenderArea, 1 }); // begin the renderpass { @@ -553,15 +568,15 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public .depthStencilClearValues = nullptr, .renderArea = currentRenderArea }; - cmdbuf->beginRenderPass(info, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); + cmdbuf3->beginRenderPass(info, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); } - cmdbuf->bindGraphicsPipeline(m_presentPipeline.get()); - cmdbuf->bindDescriptorSets(nbl::asset::EPBP_GRAPHICS, m_presentPipeline->getLayout(), 3, 1, &ds); - ext::FullScreenTriangle::recordDrawCall(cmdbuf); - cmdbuf->endRenderPass(); + cmdbuf3->bindGraphicsPipeline(m_presentPipeline.get()); + cmdbuf3->bindDescriptorSets(nbl::asset::EPBP_GRAPHICS, m_presentPipeline->getLayout(), 3, 1, &ds); + ext::FullScreenTriangle::recordDrawCall(cmdbuf3); + cmdbuf3->endRenderPass(); - cmdbuf->end(); + cmdbuf3->end(); } // submit @@ -574,7 +589,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public { { const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[1] = { { - .cmdbuf = cmdbuf + .cmdbuf = cmdbuf3 } }; // we don't need to wait for the transfer semaphore, because we submit everything to the same queue const IQueue::SSubmitInfo::SSemaphoreInfo acquired[1] = { { @@ -607,7 +622,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public }; if (m_device->blockForSemaphores(cmdbufDonePending) != ISemaphore::WAIT_RESULT::SUCCESS) return; - } + }*/ } inline bool keepRunning() override From 6addbf18516bd3f7cd06d446f285b08deb04c4fc Mon Sep 17 00:00:00 2001 From: AnastaZIuk Date: Tue, 13 Aug 2024 12:04:40 +0200 Subject: [PATCH 17/50] fix layout issues with compute pipeline in 26_Autoexposure example + update luma DSes --- .../app_resources/luma_meter.comp.hlsl | 5 ++- .../app_resources/present.frag.hlsl | 1 + 26_Autoexposure/main.cpp | 37 ++++++++----------- 3 files changed, 19 insertions(+), 24 deletions(-) diff --git a/26_Autoexposure/app_resources/luma_meter.comp.hlsl b/26_Autoexposure/app_resources/luma_meter.comp.hlsl index e7d080da2..ccdf42256 100644 --- a/26_Autoexposure/app_resources/luma_meter.comp.hlsl +++ b/26_Autoexposure/app_resources/luma_meter.comp.hlsl @@ -5,8 +5,9 @@ #include "nbl/builtin/hlsl/luma_meter/luma_meter.hlsl" #include "app_resources/common.hlsl" -[[vk::combinedImageSampler]] [[vk::binding(0)]] Texture2D texture; -[[vk::combinedImageSampler]] [[vk::binding(0)]] SamplerState samplerState; +// shared accross frag & compute - binding 0 set 3 +[[vk::combinedImageSampler]] [[vk::binding(0, 3)]] Texture2D texture; +[[vk::combinedImageSampler]] [[vk::binding(0, 3)]] SamplerState samplerState; [[vk::push_constant]] AutoexposurePushData pushData; diff --git a/26_Autoexposure/app_resources/present.frag.hlsl b/26_Autoexposure/app_resources/present.frag.hlsl index fcddeb743..8c3be5573 100644 --- a/26_Autoexposure/app_resources/present.frag.hlsl +++ b/26_Autoexposure/app_resources/present.frag.hlsl @@ -8,6 +8,7 @@ #include using namespace nbl::hlsl::ext::FullScreenTriangle; +// shared accross frag & compute - binding 0 set 3 [[vk::combinedImageSampler]] [[vk::binding(0, 3)]] Texture2D texture; [[vk::combinedImageSampler]] [[vk::binding(0, 3)]] SamplerState samplerState; diff --git a/26_Autoexposure/main.cpp b/26_Autoexposure/main.cpp index 6f214c7fe..db3816703 100644 --- a/26_Autoexposure/main.cpp +++ b/26_Autoexposure/main.cpp @@ -121,12 +121,15 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public // create the descriptor sets and with enough room { - constexpr uint32_t lumaPresentSetCount = 2, tonemapperSetCount = 1; - auto lumaPresentPool = m_device->createDescriptorPoolForDSLayouts( - IDescriptorPool::E_CREATE_FLAGS::ECF_NONE, - { &lumaPresentDSLayout.get(), 1 }, - &lumaPresentSetCount - ); + constexpr uint32_t tonemapperSetCount = 1; + + core::smart_refctd_ptr lumaPresentPool; + { + const video::IGPUDescriptorSetLayout* const layouts[] = { nullptr, nullptr, nullptr, lumaPresentDSLayout.get() }; + const uint32_t setCounts[] = { 0u, 0u, 0u, 1u }; // leaving you one for 3th set, but you can increase if you really want 2 separate DSs but I think you want single to be shared (then you also need to create 2 DSes as you did) + lumaPresentPool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_NONE, layouts, setCounts); + } + auto tonemapperPool = m_device->createDescriptorPoolForDSLayouts( IDescriptorPool::E_CREATE_FLAGS::ECF_NONE, { &tonemapperDSLayout.get(), 1 }, @@ -136,9 +139,9 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public if (!lumaPresentPool || !tonemapperPool) return logFail("Failed to Create Descriptor Pools"); + // why do you need 2 separate DSs for combined sampler? from stage flags it looks like you want them shared between compute & fragment m_lumaPresentDS[0] = lumaPresentPool->createDescriptorSet(core::smart_refctd_ptr(lumaPresentDSLayout)); - m_lumaPresentDS[1] = lumaPresentPool->createDescriptorSet(core::smart_refctd_ptr(lumaPresentDSLayout)); - if (!m_lumaPresentDS[0] || !m_lumaPresentDS[1]) + if (!m_lumaPresentDS[0]) return logFail("Could not create Descriptor Set: lumaPresentDS!"); m_tonemapperDS[0] = tonemapperPool->createDescriptorSet(core::smart_refctd_ptr(tonemapperDSLayout)); if (!m_tonemapperDS[0]) @@ -228,7 +231,8 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public smart_refctd_ptr layout; { - layout = m_device->createPipelineLayout({ &pcRange,1 }); + layout = m_device->createPipelineLayout({ &pcRange,1 }, nullptr, nullptr, nullptr, core::smart_refctd_ptr(lumaPresentDSLayout)); // dont forget your compute uses combinedImageSampler, cause of your cmd buffer errors is here + IGPUComputePipeline::SCreationParams params = {}; params.layout = layout.get(); params.shader.shader = shader.get(); @@ -483,10 +487,6 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public info1.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; info1.desc = m_gpuImgView; - IGPUDescriptorSet::SDescriptorInfo info2 = {}; - info2.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; - info2.desc = m_gpuImgView; - IGPUDescriptorSet::SWriteDescriptorSet writeDescriptors[] = { { .dstSet = m_lumaPresentDS[0].get(), @@ -494,17 +494,10 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public .arrayElement = 0, .count = 1, .info = &info1 - }, - { - .dstSet = m_lumaPresentDS[1].get(), - .binding = 0, - .arrayElement = 0, - .count = 1, - .info = &info2 } }; - m_device->updateDescriptorSets(2, writeDescriptors, 0, nullptr); + m_device->updateDescriptorSets(1, writeDescriptors, 0, nullptr); queue->endCapture(); } @@ -533,7 +526,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); cmdbuf->bindComputePipeline(m_lumaMeterPipeline.get()); - cmdbuf->bindDescriptorSets(nbl::asset::EPBP_GRAPHICS, m_lumaMeterPipeline->getLayout(), 0, 1, &ds); + cmdbuf->bindDescriptorSets(nbl::asset::EPBP_GRAPHICS, m_lumaMeterPipeline->getLayout(), 3, 1, &ds); // also if you created DS Set with 3th index you need to respect it here - firstSet tells you the index of set and count tells you what range from this index it should update, useful if you had 2 DS with lets say set index 2,3, then you can bind both with single call setting firstSet to 2, count to 2 and last argument would be pointet to your DS pointers cmdbuf->dispatch(1 + (SampleCount[0] - 1) / SubgroupSize, 1 + (SampleCount[1] - 1) / SubgroupSize); cmdbuf->end(); } From 8434f20746f399d04bff8490946b9d342b39fa55 Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Tue, 13 Aug 2024 18:14:17 +0530 Subject: [PATCH 18/50] Create two sets from common lumaPresentLayout correctly --- .../app_resources/luma_meter.comp.hlsl | 7 ++- .../app_resources/present.frag.hlsl | 8 +-- 26_Autoexposure/main.cpp | 51 +++++++++++-------- 3 files changed, 36 insertions(+), 30 deletions(-) diff --git a/26_Autoexposure/app_resources/luma_meter.comp.hlsl b/26_Autoexposure/app_resources/luma_meter.comp.hlsl index ccdf42256..9a3b5c98a 100644 --- a/26_Autoexposure/app_resources/luma_meter.comp.hlsl +++ b/26_Autoexposure/app_resources/luma_meter.comp.hlsl @@ -5,9 +5,8 @@ #include "nbl/builtin/hlsl/luma_meter/luma_meter.hlsl" #include "app_resources/common.hlsl" -// shared accross frag & compute - binding 0 set 3 -[[vk::combinedImageSampler]] [[vk::binding(0, 3)]] Texture2D texture; -[[vk::combinedImageSampler]] [[vk::binding(0, 3)]] SamplerState samplerState; +[[vk::combinedImageSampler]] [[vk::binding(0, 1)]] Texture2D texture; +[[vk::combinedImageSampler]] [[vk::binding(0, 1)]] SamplerState samplerState; [[vk::push_constant]] AutoexposurePushData pushData; @@ -19,4 +18,4 @@ uint32_t3 nbl::hlsl::glsl::gl_WorkGroupSize() [numthreads(DeviceSubgroupSize, DeviceSubgroupSize, 1)] void main(uint32_t3 ID : SV_GroupThreadID, uint32_t3 GroupID : SV_GroupID) { -} \ No newline at end of file +} diff --git a/26_Autoexposure/app_resources/present.frag.hlsl b/26_Autoexposure/app_resources/present.frag.hlsl index 8c3be5573..9a53c19eb 100644 --- a/26_Autoexposure/app_resources/present.frag.hlsl +++ b/26_Autoexposure/app_resources/present.frag.hlsl @@ -8,11 +8,11 @@ #include using namespace nbl::hlsl::ext::FullScreenTriangle; -// shared accross frag & compute - binding 0 set 3 -[[vk::combinedImageSampler]] [[vk::binding(0, 3)]] Texture2D texture; -[[vk::combinedImageSampler]] [[vk::binding(0, 3)]] SamplerState samplerState; +// binding 0 set 1 +[[vk::combinedImageSampler]] [[vk::binding(0, 1)]] Texture2D texture; +[[vk::combinedImageSampler]] [[vk::binding(0, 1)]] SamplerState samplerState; [[vk::location(0)]] float32_t4 main(SVertexAttributes vxAttr) : SV_Target0 { return texture.Sample(samplerState, vxAttr.uv); -} \ No newline at end of file +} diff --git a/26_Autoexposure/main.cpp b/26_Autoexposure/main.cpp index db3816703..23689f6fe 100644 --- a/26_Autoexposure/main.cpp +++ b/26_Autoexposure/main.cpp @@ -125,8 +125,8 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public core::smart_refctd_ptr lumaPresentPool; { - const video::IGPUDescriptorSetLayout* const layouts[] = { nullptr, nullptr, nullptr, lumaPresentDSLayout.get() }; - const uint32_t setCounts[] = { 0u, 0u, 0u, 1u }; // leaving you one for 3th set, but you can increase if you really want 2 separate DSs but I think you want single to be shared (then you also need to create 2 DSes as you did) + const video::IGPUDescriptorSetLayout* const layouts[] = { lumaPresentDSLayout.get(), lumaPresentDSLayout.get() }; + const uint32_t setCounts[] = { 1u, 1u }; lumaPresentPool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_NONE, layouts, setCounts); } @@ -139,9 +139,9 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public if (!lumaPresentPool || !tonemapperPool) return logFail("Failed to Create Descriptor Pools"); - // why do you need 2 separate DSs for combined sampler? from stage flags it looks like you want them shared between compute & fragment m_lumaPresentDS[0] = lumaPresentPool->createDescriptorSet(core::smart_refctd_ptr(lumaPresentDSLayout)); - if (!m_lumaPresentDS[0]) + m_lumaPresentDS[1] = lumaPresentPool->createDescriptorSet(core::smart_refctd_ptr(lumaPresentDSLayout)); + if (!m_lumaPresentDS[0] || !m_lumaPresentDS[1]) return logFail("Could not create Descriptor Set: lumaPresentDS!"); m_tonemapperDS[0] = tonemapperPool->createDescriptorSet(core::smart_refctd_ptr(tonemapperDSLayout)); if (!m_tonemapperDS[0]) @@ -221,20 +221,11 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public return m_device->createShader(overriddenSource.get()); }; - auto createComputePipeline = [&](smart_refctd_ptr& shader, smart_refctd_ptr& pipeline) -> bool + auto createComputePipeline = [&](smart_refctd_ptr& shader, smart_refctd_ptr& pipeline, smart_refctd_ptr pipelineLayout) -> bool { - const nbl::asset::SPushConstantRange pcRange = { - .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, - .offset = 0, - .size = sizeof(AutoexposurePushData) - }; - - smart_refctd_ptr layout; { - layout = m_device->createPipelineLayout({ &pcRange,1 }, nullptr, nullptr, nullptr, core::smart_refctd_ptr(lumaPresentDSLayout)); // dont forget your compute uses combinedImageSampler, cause of your cmd buffer errors is here - IGPUComputePipeline::SCreationParams params = {}; - params.layout = layout.get(); + params.layout = pipelineLayout.get(); params.shader.shader = shader.get(); params.shader.entryPoint = "main"; params.shader.entries = nullptr; @@ -247,20 +238,26 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public return true; }; + const nbl::asset::SPushConstantRange pcRange = { + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .offset = 0, + .size = sizeof(AutoexposurePushData) + }; + // Luma Meter auto lumaMeterShader = loadCompileAndCreateShader("app_resources/luma_meter.comp.hlsl"); if (!lumaMeterShader) return logFail("Failed to Load and Compile Compute Shader: lumaMeterShader!"); - auto lumaPresentLayout = m_device->createPipelineLayout({}, nullptr, nullptr, nullptr, core::smart_refctd_ptr(lumaPresentDSLayout)); - if (!createComputePipeline(lumaMeterShader, m_lumaMeterPipeline)) + auto lumaPresentLayout = m_device->createPipelineLayout({ &pcRange, 1 }, core::smart_refctd_ptr(lumaPresentDSLayout), core::smart_refctd_ptr(lumaPresentDSLayout), nullptr, nullptr); + if (!createComputePipeline(lumaMeterShader, m_lumaMeterPipeline, lumaPresentLayout)) return logFail("Could not create Luma Meter Pipeline!"); // Tonemapper auto tonemapperShader = loadCompileAndCreateShader("app_resources/tonemapper.comp.hlsl"); if (!tonemapperShader) return logFail("Failed to Load and Compile Compute Shader: tonemapperShader!"); - auto tonemapperLayout = m_device->createPipelineLayout({}, nullptr, nullptr, nullptr, core::smart_refctd_ptr(tonemapperDSLayout)); - if (!createComputePipeline(tonemapperShader, m_tonemapperPipeline)) + auto tonemapperLayout = m_device->createPipelineLayout({ &pcRange, 1 }, core::smart_refctd_ptr(tonemapperDSLayout), nullptr, nullptr, nullptr); + if (!createComputePipeline(tonemapperShader, m_tonemapperPipeline, tonemapperLayout)) return logFail("Could not create Luma Meter Pipeline!"); // Load FSTri Shader @@ -321,7 +318,6 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public // Allocate memory nbl::video::IDeviceMemoryAllocator::SAllocation allocation = {}; smart_refctd_ptr buffer; - //smart_refctd_ptr ds; { auto build_buffer = [this]( smart_refctd_ptr m_device, @@ -487,6 +483,10 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public info1.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; info1.desc = m_gpuImgView; + IGPUDescriptorSet::SDescriptorInfo info2 = {}; + info2.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + info2.desc = m_gpuTonemapImgView; + IGPUDescriptorSet::SWriteDescriptorSet writeDescriptors[] = { { .dstSet = m_lumaPresentDS[0].get(), @@ -494,10 +494,17 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public .arrayElement = 0, .count = 1, .info = &info1 + }, + { + .dstSet = m_lumaPresentDS[1].get(), + .binding = 0, + .arrayElement = 0, + .count = 1, + .info = &info2 } }; - m_device->updateDescriptorSets(1, writeDescriptors, 0, nullptr); + m_device->updateDescriptorSets(2, writeDescriptors, 0, nullptr); queue->endCapture(); } @@ -526,7 +533,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); cmdbuf->bindComputePipeline(m_lumaMeterPipeline.get()); - cmdbuf->bindDescriptorSets(nbl::asset::EPBP_GRAPHICS, m_lumaMeterPipeline->getLayout(), 3, 1, &ds); // also if you created DS Set with 3th index you need to respect it here - firstSet tells you the index of set and count tells you what range from this index it should update, useful if you had 2 DS with lets say set index 2,3, then you can bind both with single call setting firstSet to 2, count to 2 and last argument would be pointet to your DS pointers + cmdbuf->bindDescriptorSets(nbl::asset::EPBP_GRAPHICS, m_lumaMeterPipeline->getLayout(), 0, 1, &ds); // also if you created DS Set with 3th index you need to respect it here - firstSet tells you the index of set and count tells you what range from this index it should update, useful if you had 2 DS with lets say set index 2,3, then you can bind both with single call setting firstSet to 2, count to 2 and last argument would be pointet to your DS pointers cmdbuf->dispatch(1 + (SampleCount[0] - 1) / SubgroupSize, 1 + (SampleCount[1] - 1) / SubgroupSize); cmdbuf->end(); } From bf08caa961286ca414602873717d123d26941b41 Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Tue, 13 Aug 2024 18:56:57 +0530 Subject: [PATCH 19/50] Create compute and graphics resources separately and finish luma meter --- 26_Autoexposure/main.cpp | 75 ++++++++++++++++++++++++++++++---------- 1 file changed, 57 insertions(+), 18 deletions(-) diff --git a/26_Autoexposure/main.cpp b/26_Autoexposure/main.cpp index 23689f6fe..e6c814bc0 100644 --- a/26_Autoexposure/main.cpp +++ b/26_Autoexposure/main.cpp @@ -149,7 +149,8 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public } - auto queue = getGraphicsQueue(); + auto graphicsQueue = getGraphicsQueue(); + auto computeQueue = getComputeQueue(); // Gather swapchain resources std::unique_ptr scResources; @@ -280,17 +281,23 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public } // Init the surface and create the swapchain - if (!m_surface || !m_surface->init(queue, std::move(scResources), swapchainParams.sharedParams)) + if (!m_surface || !m_surface->init(graphicsQueue, std::move(scResources), swapchainParams.sharedParams)) return logFail("Could not create Window & Surface or initialize the Surface!"); // need resetttable commandbuffers for the upload utility { - m_cmdPool = m_device->createCommandPool(queue->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); + m_graphicsCmdPool = m_device->createCommandPool(graphicsQueue->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); + m_computeCmdPool = m_device->createCommandPool(computeQueue->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); + // create the commandbuffers - if (!m_cmdPool) - return logFail("Couldn't create Command Pool!"); - if (!m_cmdPool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { m_cmdBufs.data(), 3 })) - return logFail("Couldn't create Command Buffer!"); + if (!m_graphicsCmdPool || !m_computeCmdPool) + return logFail("Couldn't create Command Pools!"); + + if ( + !m_graphicsCmdPool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { m_graphicsCmdBufs.data(), 1 }) || + !m_computeCmdPool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { m_computeCmdBufs.data(), 2 }) + ) + return logFail("Couldn't create Command Buffers!"); } // things for IUtilities @@ -300,7 +307,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public return logFail("Could not create Scratch Semaphore"); m_scratchSemaphore->setObjectDebugName("Scratch Semaphore"); // we don't want to overcomplicate the example with multi-queue - m_intendedSubmit.queue = queue; + m_intendedSubmit.queue = graphicsQueue; // wait for nothing before upload m_intendedSubmit.waitSemaphores = {}; m_intendedSubmit.waitSemaphores = {}; @@ -409,7 +416,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public // we don't want to overcomplicate the example with multi-queue auto queue = getGraphicsQueue(); - auto cmdbuf = m_cmdBufs[0].get(); + auto cmdbuf = m_graphicsCmdBufs[0].get(); IQueue::SSubmitInfo::SCommandBufferInfo cmdbufInfo = { cmdbuf }; m_intendedSubmit.commandBuffers = { &cmdbufInfo, 1 }; @@ -515,15 +522,11 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public // We do a very simple thing, display an image and wait `DisplayImageMs` to show it inline void workLoopBody() override { - // Acquire - //auto acquire = m_surface->acquireNextImage(); - //if (!acquire) - // return; - // Luma Meter { auto queue = getComputeQueue(); - auto cmdbuf = m_cmdBufs[0].get(); + auto cmdbuf = m_computeCmdBufs[0].get(); + cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); auto ds = m_lumaPresentDS[0].get(); const uint32_t SubgroupSize = m_physicalDevice->getLimits().maxSubgroupSize; @@ -533,11 +536,47 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); cmdbuf->bindComputePipeline(m_lumaMeterPipeline.get()); - cmdbuf->bindDescriptorSets(nbl::asset::EPBP_GRAPHICS, m_lumaMeterPipeline->getLayout(), 0, 1, &ds); // also if you created DS Set with 3th index you need to respect it here - firstSet tells you the index of set and count tells you what range from this index it should update, useful if you had 2 DS with lets say set index 2,3, then you can bind both with single call setting firstSet to 2, count to 2 and last argument would be pointet to your DS pointers + cmdbuf->bindDescriptorSets(nbl::asset::EPBP_COMPUTE, m_lumaMeterPipeline->getLayout(), 0, 1, &ds); // also if you created DS Set with 3th index you need to respect it here - firstSet tells you the index of set and count tells you what range from this index it should update, useful if you had 2 DS with lets say set index 2,3, then you can bind both with single call setting firstSet to 2, count to 2 and last argument would be pointet to your DS pointers cmdbuf->dispatch(1 + (SampleCount[0] - 1) / SubgroupSize, 1 + (SampleCount[1] - 1) / SubgroupSize); cmdbuf->end(); + + { + IQueue::SSubmitInfo submit_infos[1]; + IQueue::SSubmitInfo::SCommandBufferInfo cmdBufs[] = { + { + .cmdbuf = cmdbuf + } + }; + submit_infos[0].commandBuffers = cmdBufs; + IQueue::SSubmitInfo::SSemaphoreInfo signals[] = { + { + .semaphore = m_lumaMeterSemaphore.get(), + .value = m_submitIx + 1, + .stageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT + } + }; + submit_infos[0].signalSemaphores = signals; + + queue->submit(submit_infos); + queue->endCapture(); + } + + const ISemaphore::SWaitInfo wait_infos[] = { + { + .semaphore = m_lumaMeterSemaphore.get(), + .value = m_submitIx + 1 + } + }; + m_device->blockForSemaphores(wait_infos); } + m_submitIx++; + + // Acquire + //auto acquire = m_surface->acquireNextImage(); + //if (!acquire) + // return; + // Render to the swapchain /*{ cmdbuf3->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); @@ -656,8 +695,8 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public std::array, ISwapchain::MaxImages> m_lumaPresentDS, m_tonemapperDS; // Command Buffers - smart_refctd_ptr m_cmdPool; - std::array, ISwapchain::MaxImages> m_cmdBufs; + smart_refctd_ptr m_graphicsCmdPool, m_computeCmdPool; + std::array, ISwapchain::MaxImages> m_graphicsCmdBufs, m_computeCmdBufs; // Semaphores smart_refctd_ptr m_lumaMeterSemaphore, m_tonemapperSemaphore, m_presentSemaphore; From b342c6c05e06dd348ce4cdd66fd65e12b89503bf Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Tue, 13 Aug 2024 19:28:42 +0530 Subject: [PATCH 20/50] Fix descriptor binding for luma_meter --- 26_Autoexposure/app_resources/luma_meter.comp.hlsl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/26_Autoexposure/app_resources/luma_meter.comp.hlsl b/26_Autoexposure/app_resources/luma_meter.comp.hlsl index 9a3b5c98a..bd05198b8 100644 --- a/26_Autoexposure/app_resources/luma_meter.comp.hlsl +++ b/26_Autoexposure/app_resources/luma_meter.comp.hlsl @@ -5,8 +5,8 @@ #include "nbl/builtin/hlsl/luma_meter/luma_meter.hlsl" #include "app_resources/common.hlsl" -[[vk::combinedImageSampler]] [[vk::binding(0, 1)]] Texture2D texture; -[[vk::combinedImageSampler]] [[vk::binding(0, 1)]] SamplerState samplerState; +[[vk::combinedImageSampler]] [[vk::binding(0, 0)]] Texture2D texture; +[[vk::combinedImageSampler]] [[vk::binding(0, 0)]] SamplerState samplerState; [[vk::push_constant]] AutoexposurePushData pushData; From 817c4a7bb2dfc99251299ed908b7126690986441 Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Tue, 13 Aug 2024 21:18:19 +0530 Subject: [PATCH 21/50] Create separate pipeline layouts for luma and present --- 26_Autoexposure/main.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/26_Autoexposure/main.cpp b/26_Autoexposure/main.cpp index e6c814bc0..99af2093a 100644 --- a/26_Autoexposure/main.cpp +++ b/26_Autoexposure/main.cpp @@ -249,8 +249,8 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public auto lumaMeterShader = loadCompileAndCreateShader("app_resources/luma_meter.comp.hlsl"); if (!lumaMeterShader) return logFail("Failed to Load and Compile Compute Shader: lumaMeterShader!"); - auto lumaPresentLayout = m_device->createPipelineLayout({ &pcRange, 1 }, core::smart_refctd_ptr(lumaPresentDSLayout), core::smart_refctd_ptr(lumaPresentDSLayout), nullptr, nullptr); - if (!createComputePipeline(lumaMeterShader, m_lumaMeterPipeline, lumaPresentLayout)) + auto lumaLayout = m_device->createPipelineLayout({ &pcRange, 1 }, core::smart_refctd_ptr(lumaPresentDSLayout), nullptr, nullptr, nullptr); + if (!createComputePipeline(lumaMeterShader, m_lumaMeterPipeline, lumaLayout)) return logFail("Could not create Luma Meter Pipeline!"); // Tonemapper @@ -275,7 +275,8 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public .entryPoint = "main", .shader = fragmentShader.get() }; - m_presentPipeline = fsTriProtoPPln.createPipeline(fragSpec, lumaPresentLayout.get(), scResources->getRenderpass()); + auto presentLayout = m_device->createPipelineLayout({ &pcRange, 1 }, nullptr, core::smart_refctd_ptr(lumaPresentDSLayout), nullptr, nullptr); + m_presentPipeline = fsTriProtoPPln.createPipeline(fragSpec, presentLayout.get(), scResources->getRenderpass()); if (!m_presentPipeline) return logFail("Could not create Graphics Pipeline!"); } From 7f89542ebd21ddc9b23f1acffb95111f49b1ae52 Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Tue, 13 Aug 2024 21:18:43 +0530 Subject: [PATCH 22/50] Setup luma_meter.comp.hlsl --- 26_Autoexposure/app_resources/common.hlsl | 7 +++ .../app_resources/luma_meter.comp.hlsl | 48 +++++++++++++++++++ 2 files changed, 55 insertions(+) diff --git a/26_Autoexposure/app_resources/common.hlsl b/26_Autoexposure/app_resources/common.hlsl index f2b21b7e4..3735da6a9 100644 --- a/26_Autoexposure/app_resources/common.hlsl +++ b/26_Autoexposure/app_resources/common.hlsl @@ -5,9 +5,16 @@ #ifndef _AUTOEXPOSURE_COMMON_INCLUDED_ #define _AUTOEXPOSURE_COMMON_INCLUDED_ +#include "nbl/builtin/hlsl/cpp_compat.hlsl" + struct AutoexposurePushData { + float meteringWindowScaleX, meteringWindowScaleY; + float meteringWindowOffsetX, meteringWindowOffsetY; + float lumaMin, lumaMax; + uint32_t sampleCountX, sampleCountY; uint32_t viewportSizeX, viewportSizeY; + uint64_t lumaMeterBDA; }; #endif \ No newline at end of file diff --git a/26_Autoexposure/app_resources/luma_meter.comp.hlsl b/26_Autoexposure/app_resources/luma_meter.comp.hlsl index bd05198b8..fffd80988 100644 --- a/26_Autoexposure/app_resources/luma_meter.comp.hlsl +++ b/26_Autoexposure/app_resources/luma_meter.comp.hlsl @@ -3,6 +3,7 @@ // For conditions of distribution and use, see copyright notice in nabla.h #include "nbl/builtin/hlsl/luma_meter/luma_meter.hlsl" +#include "nbl/builtin/hlsl/bda/bda_accessor.hlsl" #include "app_resources/common.hlsl" [[vk::combinedImageSampler]] [[vk::binding(0, 0)]] Texture2D texture; @@ -10,6 +11,36 @@ [[vk::push_constant]] AutoexposurePushData pushData; +using Ptr = nbl::hlsl::bda::__ptr < uint32_t >; +using PtrAccessor = nbl::hlsl::BdaAccessor < uint32_t >; + +groupshared float32_t sdata[WorkgroupSize]; + +struct SharedAccessor +{ + uint32_t get(const uint32_t index) + { + return sdata[index]; + } + + void set(const uint32_t index, const uint32_t value) + { + sdata[index] = value; + } + + void workgroupExecutionAndMemoryBarrier() + { + nbl::hlsl::glsl::barrier(); + } +}; + +struct TexAccessor +{ + float32_t3 get(float32_t2 uv) { + return texture.Sample(samplerState, uv).rgb; + } +}; + uint32_t3 nbl::hlsl::glsl::gl_WorkGroupSize() { return uint32_t3(WorkgroupSize, 1, 1); @@ -18,4 +49,21 @@ uint32_t3 nbl::hlsl::glsl::gl_WorkGroupSize() [numthreads(DeviceSubgroupSize, DeviceSubgroupSize, 1)] void main(uint32_t3 ID : SV_GroupThreadID, uint32_t3 GroupID : SV_GroupID) { + nbl::hlsl::luma_meter::LumaMeteringWindow luma_meter_window; + luma_meter_window.meteringWindowScale = float32_t2(pushData.meteringWindowScaleX, pushData.meteringWindowScaleY); + luma_meter_window.meteringWindowOffset = float32_t2(pushData.meteringWindowOffsetX, pushData.meteringWindowOffsetY); + + const Ptr val_ptr = Ptr::create(pushData.lumaMeterBDA); + PtrAccessor val_accessor = PtrAccessor::create(val_ptr); + + SharedAccessor sdata; + TexAccessor tex; + + using LumaMeter = nbl::hlsl::luma_meter::geom_luma_meter< WorkgroupSize, PtrAccessor, SharedAccessor, TexAccessor>; + LumaMeter meter = LumaMeter::create(luma_meter_window, pushData.lumaMin, pushData.lumaMax); + + uint32_t2 sampleCount = uint32_t2(pushData.sampleCountX, pushData.sampleCountY); + uint32_t2 viewportSize = uint32_t2(pushData.viewportSizeX, pushData.viewportSizeY); + + meter.gatherLuma(val_accessor, tex, sdata, sampleCount, viewportSize); } From defd45eaec2130b6c2b5aefbae524638755afa1a Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Tue, 13 Aug 2024 21:40:04 +0530 Subject: [PATCH 23/50] Pass push constants --- 26_Autoexposure/main.cpp | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/26_Autoexposure/main.cpp b/26_Autoexposure/main.cpp index 99af2093a..471d7f169 100644 --- a/26_Autoexposure/main.cpp +++ b/26_Autoexposure/main.cpp @@ -27,8 +27,11 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public using clock_t = std::chrono::steady_clock; constexpr static inline std::string_view DefaultImagePathsFile = "../../media/noises/spp_benchmark_4k_512.exr"; - constexpr static inline std::array Dimensions = { 1280, 720 }; - constexpr static inline std::array SampleCount = { 10000, 10000 }; + constexpr static inline std::array Dimensions = { 1280, 720 }; + constexpr static inline std::array SampleCount = { 10000, 10000 }; + constexpr static inline std::array MeteringWindowScale = { 0.5f, 0.5f }; + constexpr static inline std::array MeteringWindowOffset = { 0.25f, 0.25f }; + constexpr static inline std::array LumaMinMax = { 1.0f / 4096.0f, 32768.0f }; public: // Yay thanks to multiple inheritance we cannot forward ctors anymore @@ -353,8 +356,6 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public assert(allocation->memory.get() == buffer->getBoundMemory().memory); }; - auto x = m_physicalDevice->getLimits(); - build_buffer(m_device, &allocation, buffer, m_physicalDevice->getLimits().maxSubgroupSize, "Luma Gather Buffer"); } m_lumaGatherBDA = buffer->getDeviceAddress(); @@ -531,13 +532,27 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public auto ds = m_lumaPresentDS[0].get(); const uint32_t SubgroupSize = m_physicalDevice->getLimits().maxSubgroupSize; + auto pc = AutoexposurePushData + { + .meteringWindowScaleX = MeteringWindowScale[0] * m_gpuImg->getCreationParameters().extent.width, + .meteringWindowScaleY = MeteringWindowScale[1] * m_gpuImg->getCreationParameters().extent.height, + .meteringWindowOffsetX = MeteringWindowOffset[0] * m_gpuImg->getCreationParameters().extent.width, + .meteringWindowOffsetY = MeteringWindowOffset[1] * m_gpuImg->getCreationParameters().extent.height, + .lumaMin = LumaMinMax[0], + .lumaMax = LumaMinMax[1], + .sampleCountX = SampleCount[0], + .sampleCountY = SampleCount[1], + .viewportSizeX = m_gpuImg->getCreationParameters().extent.width, + .viewportSizeY = m_gpuImg->getCreationParameters().extent.height, + .lumaMeterBDA = m_lumaGatherBDA + }; queue->startCapture(); cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); - cmdbuf->bindComputePipeline(m_lumaMeterPipeline.get()); cmdbuf->bindDescriptorSets(nbl::asset::EPBP_COMPUTE, m_lumaMeterPipeline->getLayout(), 0, 1, &ds); // also if you created DS Set with 3th index you need to respect it here - firstSet tells you the index of set and count tells you what range from this index it should update, useful if you had 2 DS with lets say set index 2,3, then you can bind both with single call setting firstSet to 2, count to 2 and last argument would be pointet to your DS pointers + cmdbuf->pushConstants(m_lumaMeterPipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(pc), &pc); cmdbuf->dispatch(1 + (SampleCount[0] - 1) / SubgroupSize, 1 + (SampleCount[1] - 1) / SubgroupSize); cmdbuf->end(); From f6f8154146010a09c240aa3576a99955b5779429 Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Wed, 14 Aug 2024 19:01:24 +0530 Subject: [PATCH 24/50] Record draw pass correctly --- 26_Autoexposure/main.cpp | 114 +++++++++++++++++++++++---------------- 1 file changed, 69 insertions(+), 45 deletions(-) diff --git a/26_Autoexposure/main.cpp b/26_Autoexposure/main.cpp index 471d7f169..4a193e9dc 100644 --- a/26_Autoexposure/main.cpp +++ b/26_Autoexposure/main.cpp @@ -494,7 +494,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public IGPUDescriptorSet::SDescriptorInfo info2 = {}; info2.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; - info2.desc = m_gpuTonemapImgView; + info2.desc = m_gpuImgView; // FIXME: temporarily pass in input image IGPUDescriptorSet::SWriteDescriptorSet writeDescriptors[] = { { @@ -586,16 +586,41 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public m_device->blockForSemaphores(wait_infos); } - m_submitIx++; + // Tonemapper + { + } + + // Render to swapchain + { + // Acquire + auto acquire = m_surface->acquireNextImage(); + if (!acquire) + return; + + auto queue = getGraphicsQueue(); + auto cmdbuf = m_graphicsCmdBufs[0].get(); + cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); + auto ds = m_lumaPresentDS[1].get(); + + const uint32_t SubgroupSize = m_physicalDevice->getLimits().maxSubgroupSize; + auto pc = AutoexposurePushData + { + .meteringWindowScaleX = MeteringWindowScale[0] * m_gpuImg->getCreationParameters().extent.width, + .meteringWindowScaleY = MeteringWindowScale[1] * m_gpuImg->getCreationParameters().extent.height, + .meteringWindowOffsetX = MeteringWindowOffset[0] * m_gpuImg->getCreationParameters().extent.width, + .meteringWindowOffsetY = MeteringWindowOffset[1] * m_gpuImg->getCreationParameters().extent.height, + .lumaMin = LumaMinMax[0], + .lumaMax = LumaMinMax[1], + .sampleCountX = SampleCount[0], + .sampleCountY = SampleCount[1], + .viewportSizeX = m_gpuImg->getCreationParameters().extent.width, + .viewportSizeY = m_gpuImg->getCreationParameters().extent.height, + .lumaMeterBDA = m_lumaGatherBDA + }; - // Acquire - //auto acquire = m_surface->acquireNextImage(); - //if (!acquire) - // return; + queue->startCapture(); - // Render to the swapchain - /*{ - cmdbuf3->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); const VkRect2D currentRenderArea = { @@ -609,9 +634,9 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public .width = float(m_window->getWidth()), .height = float(m_window->getHeight()) }; - cmdbuf3->setViewport({ &viewport, 1 }); + cmdbuf->setViewport({ &viewport, 1 }); } - cmdbuf3->setScissor({ ¤tRenderArea, 1 }); + cmdbuf->setScissor({ ¤tRenderArea, 1 }); // begin the renderpass { @@ -623,28 +648,26 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public .depthStencilClearValues = nullptr, .renderArea = currentRenderArea }; - cmdbuf3->beginRenderPass(info, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); + cmdbuf->beginRenderPass(info, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); } - cmdbuf3->bindGraphicsPipeline(m_presentPipeline.get()); - cmdbuf3->bindDescriptorSets(nbl::asset::EPBP_GRAPHICS, m_presentPipeline->getLayout(), 3, 1, &ds); - ext::FullScreenTriangle::recordDrawCall(cmdbuf3); - cmdbuf3->endRenderPass(); + cmdbuf->bindGraphicsPipeline(m_presentPipeline.get()); + cmdbuf->bindDescriptorSets(nbl::asset::EPBP_GRAPHICS, m_presentPipeline->getLayout(), 1, 1, &ds); + ext::FullScreenTriangle::recordDrawCall(cmdbuf); + cmdbuf->endRenderPass(); - cmdbuf3->end(); - } + cmdbuf->end(); - // submit - const IQueue::SSubmitInfo::SSemaphoreInfo rendered[1] = { { - .semaphore = m_presentSemaphore.get(), - .value = ++m_submitIx, - // just as we've outputted all pixels, signal - .stageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT - } }; - { + // submit + const IQueue::SSubmitInfo::SSemaphoreInfo rendered[1] = { { + .semaphore = m_presentSemaphore.get(), + .value = m_submitIx + 1, + // just as we've outputted all pixels, signal + .stageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT + } }; { const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[1] = { { - .cmdbuf = cmdbuf3 + .cmdbuf = cmdbuf } }; // we don't need to wait for the transfer semaphore, because we submit everything to the same queue const IQueue::SSubmitInfo::SSemaphoreInfo acquired[1] = { { @@ -657,27 +680,28 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public .commandBuffers = commandBuffers, .signalSemaphores = rendered } }; - // we won't signal the sema if no success - if (queue->submit(infos) != IQueue::RESULT::SUCCESS) - m_submitIx--; + + queue->submit(infos); } - } - // Present - m_surface->present(acquire.imageIndex, rendered); - getGraphicsQueue()->endCapture(); + // Present + m_surface->present(acquire.imageIndex, rendered); + queue->endCapture(); - // Wait for completion - { - const ISemaphore::SWaitInfo cmdbufDonePending[] = { - { - .semaphore = m_presentSemaphore.get(), - .value = m_submitIx - } - }; - if (m_device->blockForSemaphores(cmdbufDonePending) != ISemaphore::WAIT_RESULT::SUCCESS) - return; - }*/ + // Wait for completion + { + const ISemaphore::SWaitInfo cmdbufDonePending[] = { + { + .semaphore = m_presentSemaphore.get(), + .value = m_submitIx + } + }; + if (m_device->blockForSemaphores(cmdbufDonePending) != ISemaphore::WAIT_RESULT::SUCCESS) + return; + } + } + + m_submitIx++; } inline bool keepRunning() override From 64eb610d6ecaac71cb3cfa8e051c7623d754793d Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Wed, 14 Aug 2024 19:31:35 +0530 Subject: [PATCH 25/50] Add a pipeline barrier to transition image layout --- 26_Autoexposure/main.cpp | 59 ++++++++++++++++++++++++++++------------ 1 file changed, 42 insertions(+), 17 deletions(-) diff --git a/26_Autoexposure/main.cpp b/26_Autoexposure/main.cpp index 4a193e9dc..8f0895686 100644 --- a/26_Autoexposure/main.cpp +++ b/26_Autoexposure/main.cpp @@ -423,9 +423,15 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public m_intendedSubmit.commandBuffers = { &cmdbufInfo, 1 }; // there's no previous operation to wait for - const SMemoryBarrier toTransferBarrier = { - .dstStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT, - .dstAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT + const SMemoryBarrier transferBarriers[] = { + { + .dstStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT, + .dstAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT + }, + { + .srcStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT, + .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, + } }; // upload image and write to descriptor set @@ -433,20 +439,36 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); // change the layout of the image - const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers[] = { { - .barrier = { - .dep = toTransferBarrier - // no ownership transfers - }, - .image = m_gpuImg.get(), - // transition the whole view - .subresourceRange = cpuImgParams.subresourceRange, - // a wiping transition - .newLayout = IGPUImage::LAYOUT::TRANSFER_DST_OPTIMAL - } }; - cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers }); - // upload contents and submit right away - m_utils->updateImageViaStagingBufferAutoSubmit( + const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers1[] = { + { + .barrier = { + .dep = transferBarriers[0] + // no ownership transfers + }, + .image = m_gpuImg.get(), + // transition the whole view + .subresourceRange = cpuImgParams.subresourceRange, + // a wiping transition + .newLayout = IGPUImage::LAYOUT::TRANSFER_DST_OPTIMAL + } + }; + const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers2[] = { + { + .barrier = { + .dep = transferBarriers[1] + // no ownership transfers + }, + .image = m_gpuImg.get(), + // transition the whole view + .subresourceRange = cpuImgParams.subresourceRange, + // a wiping transition + .oldLayout = IGPUImage::LAYOUT::TRANSFER_DST_OPTIMAL, + .newLayout = IGPUImage::LAYOUT::READ_ONLY_OPTIMAL + } + }; + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers1 }); + // upload contents + m_utils->updateImageViaStagingBuffer( m_intendedSubmit, cpuImgParams.image->getBuffer(), cpuImgParams.image->getCreationParameters().format, @@ -454,6 +476,9 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public IGPUImage::LAYOUT::TRANSFER_DST_OPTIMAL, cpuImgParams.image->getRegions() ); + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers2 }); + m_utils->autoSubmit(m_intendedSubmit, [&](SIntendedSubmitInfo& nextSubmit) -> bool { return true; }); + IGPUImageView::SCreationParams gpuImgViewParams = { .image = m_gpuImg, .viewType = IGPUImageView::ET_2D, From 3d3d64693993846a008988757a8ed4effbccceab Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Wed, 14 Aug 2024 20:18:06 +0530 Subject: [PATCH 26/50] Record tonemapping pass --- 26_Autoexposure/main.cpp | 60 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 59 insertions(+), 1 deletion(-) diff --git a/26_Autoexposure/main.cpp b/26_Autoexposure/main.cpp index 8f0895686..8686ed77d 100644 --- a/26_Autoexposure/main.cpp +++ b/26_Autoexposure/main.cpp @@ -79,7 +79,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public * as evidenced by the name of the field in the SBinding. * Samplers for combined image samplers can also be mutable, which for a binding of a descriptor set is specified also at creation time by leaving the immutableSamplers * field set to its default (nullptr). - */ + */ smart_refctd_ptr lumaPresentDSLayout, tonemapperDSLayout; { auto defaultSampler = m_device->createSampler( @@ -613,6 +613,64 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public // Tonemapper { + auto queue = getComputeQueue(); + auto cmdbuf = m_computeCmdBufs[1].get(); + cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); + auto ds = m_tonemapperDS[0].get(); + + const uint32_t SubgroupSize = m_physicalDevice->getLimits().maxSubgroupSize; + auto pc = AutoexposurePushData + { + .meteringWindowScaleX = MeteringWindowScale[0] * m_gpuImg->getCreationParameters().extent.width, + .meteringWindowScaleY = MeteringWindowScale[1] * m_gpuImg->getCreationParameters().extent.height, + .meteringWindowOffsetX = MeteringWindowOffset[0] * m_gpuImg->getCreationParameters().extent.width, + .meteringWindowOffsetY = MeteringWindowOffset[1] * m_gpuImg->getCreationParameters().extent.height, + .lumaMin = LumaMinMax[0], + .lumaMax = LumaMinMax[1], + .sampleCountX = SampleCount[0], + .sampleCountY = SampleCount[1], + .viewportSizeX = m_gpuImg->getCreationParameters().extent.width, + .viewportSizeY = m_gpuImg->getCreationParameters().extent.height, + .lumaMeterBDA = m_lumaGatherBDA + }; + + queue->startCapture(); + + cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + cmdbuf->bindComputePipeline(m_tonemapperPipeline.get()); + cmdbuf->bindDescriptorSets(nbl::asset::EPBP_COMPUTE, m_tonemapperPipeline->getLayout(), 0, 1, &ds); // also if you created DS Set with 3th index you need to respect it here - firstSet tells you the index of set and count tells you what range from this index it should update, useful if you had 2 DS with lets say set index 2,3, then you can bind both with single call setting firstSet to 2, count to 2 and last argument would be pointet to your DS pointers + cmdbuf->pushConstants(m_tonemapperPipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(pc), &pc); + cmdbuf->dispatch(1 + (SampleCount[0] - 1) / SubgroupSize, 1 + (SampleCount[1] - 1) / SubgroupSize); + cmdbuf->end(); + + { + IQueue::SSubmitInfo submit_infos[1]; + IQueue::SSubmitInfo::SCommandBufferInfo cmdBufs[] = { + { + .cmdbuf = cmdbuf + } + }; + submit_infos[0].commandBuffers = cmdBufs; + IQueue::SSubmitInfo::SSemaphoreInfo signals[] = { + { + .semaphore = m_tonemapperSemaphore.get(), + .value = m_submitIx + 1, + .stageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT + } + }; + submit_infos[0].signalSemaphores = signals; + + queue->submit(submit_infos); + queue->endCapture(); + } + + const ISemaphore::SWaitInfo wait_infos[] = { + { + .semaphore = m_tonemapperSemaphore.get(), + .value = m_submitIx + 1 + } + }; + m_device->blockForSemaphores(wait_infos); } // Render to swapchain From b4102dc43c84a06dda323e316c5d815f2899497e Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Fri, 16 Aug 2024 03:19:22 +0530 Subject: [PATCH 27/50] Revert "Record tonemapping pass" This reverts commit 3d3d64693993846a008988757a8ed4effbccceab. --- 26_Autoexposure/main.cpp | 60 +--------------------------------------- 1 file changed, 1 insertion(+), 59 deletions(-) diff --git a/26_Autoexposure/main.cpp b/26_Autoexposure/main.cpp index 8686ed77d..8f0895686 100644 --- a/26_Autoexposure/main.cpp +++ b/26_Autoexposure/main.cpp @@ -79,7 +79,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public * as evidenced by the name of the field in the SBinding. * Samplers for combined image samplers can also be mutable, which for a binding of a descriptor set is specified also at creation time by leaving the immutableSamplers * field set to its default (nullptr). - */ + */ smart_refctd_ptr lumaPresentDSLayout, tonemapperDSLayout; { auto defaultSampler = m_device->createSampler( @@ -613,64 +613,6 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public // Tonemapper { - auto queue = getComputeQueue(); - auto cmdbuf = m_computeCmdBufs[1].get(); - cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); - auto ds = m_tonemapperDS[0].get(); - - const uint32_t SubgroupSize = m_physicalDevice->getLimits().maxSubgroupSize; - auto pc = AutoexposurePushData - { - .meteringWindowScaleX = MeteringWindowScale[0] * m_gpuImg->getCreationParameters().extent.width, - .meteringWindowScaleY = MeteringWindowScale[1] * m_gpuImg->getCreationParameters().extent.height, - .meteringWindowOffsetX = MeteringWindowOffset[0] * m_gpuImg->getCreationParameters().extent.width, - .meteringWindowOffsetY = MeteringWindowOffset[1] * m_gpuImg->getCreationParameters().extent.height, - .lumaMin = LumaMinMax[0], - .lumaMax = LumaMinMax[1], - .sampleCountX = SampleCount[0], - .sampleCountY = SampleCount[1], - .viewportSizeX = m_gpuImg->getCreationParameters().extent.width, - .viewportSizeY = m_gpuImg->getCreationParameters().extent.height, - .lumaMeterBDA = m_lumaGatherBDA - }; - - queue->startCapture(); - - cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); - cmdbuf->bindComputePipeline(m_tonemapperPipeline.get()); - cmdbuf->bindDescriptorSets(nbl::asset::EPBP_COMPUTE, m_tonemapperPipeline->getLayout(), 0, 1, &ds); // also if you created DS Set with 3th index you need to respect it here - firstSet tells you the index of set and count tells you what range from this index it should update, useful if you had 2 DS with lets say set index 2,3, then you can bind both with single call setting firstSet to 2, count to 2 and last argument would be pointet to your DS pointers - cmdbuf->pushConstants(m_tonemapperPipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(pc), &pc); - cmdbuf->dispatch(1 + (SampleCount[0] - 1) / SubgroupSize, 1 + (SampleCount[1] - 1) / SubgroupSize); - cmdbuf->end(); - - { - IQueue::SSubmitInfo submit_infos[1]; - IQueue::SSubmitInfo::SCommandBufferInfo cmdBufs[] = { - { - .cmdbuf = cmdbuf - } - }; - submit_infos[0].commandBuffers = cmdBufs; - IQueue::SSubmitInfo::SSemaphoreInfo signals[] = { - { - .semaphore = m_tonemapperSemaphore.get(), - .value = m_submitIx + 1, - .stageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT - } - }; - submit_infos[0].signalSemaphores = signals; - - queue->submit(submit_infos); - queue->endCapture(); - } - - const ISemaphore::SWaitInfo wait_infos[] = { - { - .semaphore = m_tonemapperSemaphore.get(), - .value = m_submitIx + 1 - } - }; - m_device->blockForSemaphores(wait_infos); } // Render to swapchain From 8307e926f95b568d319ff83c109abe3313fc7fed Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Fri, 16 Aug 2024 03:32:16 +0530 Subject: [PATCH 28/50] Remove separate tonemapping pass --- 26_Autoexposure/main.cpp | 81 ++++------------------------------------ 1 file changed, 7 insertions(+), 74 deletions(-) diff --git a/26_Autoexposure/main.cpp b/26_Autoexposure/main.cpp index 8f0895686..0be4c9c3a 100644 --- a/26_Autoexposure/main.cpp +++ b/26_Autoexposure/main.cpp @@ -18,8 +18,6 @@ using namespace asset; using namespace ui; using namespace video; -//#include "app_resources/push_constants.hlsl" - class AutoexposureApp final : public examples::SimpleWindowedApplication, public application_templates::MonoAssetManagerAndBuiltinResourceApplication { using device_base_t = examples::SimpleWindowedApplication; @@ -80,7 +78,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public * Samplers for combined image samplers can also be mutable, which for a binding of a descriptor set is specified also at creation time by leaving the immutableSamplers * field set to its default (nullptr). */ - smart_refctd_ptr lumaPresentDSLayout, tonemapperDSLayout; + smart_refctd_ptr lumaPresentDSLayout; { auto defaultSampler = m_device->createSampler( { @@ -101,31 +99,14 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public lumaPresentDSLayout = m_device->createDescriptorSetLayout(lumaPresentBindings); if (!lumaPresentDSLayout) return logFail("Failed to Create Descriptor Layout: lumaPresentDSLayout"); - - const IGPUDescriptorSetLayout::SBinding tonemapperBindings[1] = { - { - .binding = 0, - .type = IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, - .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, - .count = 1, - .immutableSamplers = &defaultSampler - } - }; - tonemapperDSLayout = m_device->createDescriptorSetLayout(tonemapperBindings); - if (!tonemapperDSLayout) - return logFail("Failed to Create Descriptor Layout: tonemapperDSLayout"); } // Create semaphores m_lumaMeterSemaphore = m_device->createSemaphore(m_submitIx); - m_tonemapperSemaphore = m_device->createSemaphore(m_submitIx); m_presentSemaphore = m_device->createSemaphore(m_submitIx); // create the descriptor sets and with enough room { - constexpr uint32_t tonemapperSetCount = 1; - core::smart_refctd_ptr lumaPresentPool; { const video::IGPUDescriptorSetLayout* const layouts[] = { lumaPresentDSLayout.get(), lumaPresentDSLayout.get() }; @@ -133,23 +114,13 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public lumaPresentPool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_NONE, layouts, setCounts); } - auto tonemapperPool = m_device->createDescriptorPoolForDSLayouts( - IDescriptorPool::E_CREATE_FLAGS::ECF_NONE, - { &tonemapperDSLayout.get(), 1 }, - &tonemapperSetCount - ); - - if (!lumaPresentPool || !tonemapperPool) + if (!lumaPresentPool) return logFail("Failed to Create Descriptor Pools"); m_lumaPresentDS[0] = lumaPresentPool->createDescriptorSet(core::smart_refctd_ptr(lumaPresentDSLayout)); m_lumaPresentDS[1] = lumaPresentPool->createDescriptorSet(core::smart_refctd_ptr(lumaPresentDSLayout)); if (!m_lumaPresentDS[0] || !m_lumaPresentDS[1]) return logFail("Could not create Descriptor Set: lumaPresentDS!"); - m_tonemapperDS[0] = tonemapperPool->createDescriptorSet(core::smart_refctd_ptr(tonemapperDSLayout)); - if (!m_tonemapperDS[0]) - return logFail("Could not create Descriptor Set: tonemapperDS!"); - } auto graphicsQueue = getGraphicsQueue(); @@ -256,14 +227,6 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public if (!createComputePipeline(lumaMeterShader, m_lumaMeterPipeline, lumaLayout)) return logFail("Could not create Luma Meter Pipeline!"); - // Tonemapper - auto tonemapperShader = loadCompileAndCreateShader("app_resources/tonemapper.comp.hlsl"); - if (!tonemapperShader) - return logFail("Failed to Load and Compile Compute Shader: tonemapperShader!"); - auto tonemapperLayout = m_device->createPipelineLayout({ &pcRange, 1 }, core::smart_refctd_ptr(tonemapperDSLayout), nullptr, nullptr, nullptr); - if (!createComputePipeline(tonemapperShader, m_tonemapperPipeline, tonemapperLayout)) - return logFail("Could not create Luma Meter Pipeline!"); - // Load FSTri Shader ext::FullScreenTriangle::ProtoPipeline fsTriProtoPPln(m_assetMgr.get(), m_device.get(), m_logger.get()); if (!fsTriProtoPPln) @@ -487,32 +450,6 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public m_gpuImgView = m_device->createImageView(std::move(gpuImgViewParams)); - // Allocate and create texture for tonemapping - imageParams = {}; - imageParams = m_gpuImg->getCreationParameters(); - // promote format because RGB8 and friends don't actually exist in HW - { - const IPhysicalDevice::SImageFormatPromotionRequest request = { - .originalFormat = imageParams.format, - .usages = IPhysicalDevice::SFormatImageUsages::SUsage(imageParams.usage) - }; - imageParams.format = m_physicalDevice->promoteImageFormat(request, imageParams.tiling); - } - if (imageParams.type == IGPUImage::ET_3D) - imageParams.flags |= IGPUImage::ECF_2D_ARRAY_COMPATIBLE_BIT; - m_gpuTonemapImg = m_device->createImage(std::move(imageParams)); - if (!m_gpuTonemapImg || !m_device->allocate(m_gpuTonemapImg->getMemoryReqs(), m_gpuTonemapImg.get()).isValid()) - return false; - m_gpuTonemapImg->setObjectDebugName("Autoexposure Tonemapper Image"); - - IGPUImageView::SCreationParams gpuTonemapImgViewParams = { - .image = m_gpuTonemapImg, - .viewType = IGPUImageView::ET_2D, - .format = m_gpuTonemapImg->getCreationParameters().format - }; - - m_gpuTonemapImgView = m_device->createImageView(std::move(gpuTonemapImgViewParams)); - IGPUDescriptorSet::SDescriptorInfo info1 = {}; info1.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; info1.desc = m_gpuImgView; @@ -611,10 +548,6 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public m_device->blockForSemaphores(wait_infos); } - // Tonemapper - { - } - // Render to swapchain { // Acquire @@ -745,8 +678,8 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public protected: uint64_t m_lumaGatherBDA; - smart_refctd_ptr m_gpuImg, m_gpuTonemapImg; - smart_refctd_ptr m_gpuImgView, m_gpuTonemapImgView; + smart_refctd_ptr m_gpuImg; + smart_refctd_ptr m_gpuImgView; // for image uploads smart_refctd_ptr m_scratchSemaphore; @@ -754,17 +687,17 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public // Pipelines smart_refctd_ptr m_presentPipeline; - smart_refctd_ptr m_lumaMeterPipeline, m_tonemapperPipeline; + smart_refctd_ptr m_lumaMeterPipeline; // Descriptor Sets - std::array, ISwapchain::MaxImages> m_lumaPresentDS, m_tonemapperDS; + std::array, ISwapchain::MaxImages> m_lumaPresentDS; // Command Buffers smart_refctd_ptr m_graphicsCmdPool, m_computeCmdPool; std::array, ISwapchain::MaxImages> m_graphicsCmdBufs, m_computeCmdBufs; // Semaphores - smart_refctd_ptr m_lumaMeterSemaphore, m_tonemapperSemaphore, m_presentSemaphore; + smart_refctd_ptr m_lumaMeterSemaphore, m_presentSemaphore; uint64_t m_submitIx = 0; // window From 7b5ca0522d21fcd670ecea6d4193b159dd7fb2de Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Fri, 16 Aug 2024 15:32:40 +0530 Subject: [PATCH 29/50] Compute final EV value on CPU --- 26_Autoexposure/app_resources/common.hlsl | 1 + 26_Autoexposure/main.cpp | 37 ++++++++++++++++++++--- 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/26_Autoexposure/app_resources/common.hlsl b/26_Autoexposure/app_resources/common.hlsl index 3735da6a9..07993d58d 100644 --- a/26_Autoexposure/app_resources/common.hlsl +++ b/26_Autoexposure/app_resources/common.hlsl @@ -12,6 +12,7 @@ struct AutoexposurePushData float meteringWindowScaleX, meteringWindowScaleY; float meteringWindowOffsetX, meteringWindowOffsetY; float lumaMin, lumaMax; + float EV; uint32_t sampleCountX, sampleCountY; uint32_t viewportSizeX, viewportSizeY; uint64_t lumaMeterBDA; diff --git a/26_Autoexposure/main.cpp b/26_Autoexposure/main.cpp index 0be4c9c3a..faba912f4 100644 --- a/26_Autoexposure/main.cpp +++ b/26_Autoexposure/main.cpp @@ -290,7 +290,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public // Allocate and create buffer for Luma Gather { // Allocate memory - nbl::video::IDeviceMemoryAllocator::SAllocation allocation = {}; + m_lumaGatherAllocation = {}; smart_refctd_ptr buffer; { auto build_buffer = [this]( @@ -319,9 +319,13 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public assert(allocation->memory.get() == buffer->getBoundMemory().memory); }; - build_buffer(m_device, &allocation, buffer, m_physicalDevice->getLimits().maxSubgroupSize, "Luma Gather Buffer"); + build_buffer(m_device, &m_lumaGatherAllocation, buffer, m_physicalDevice->getLimits().maxSubgroupSize, "Luma Gather Buffer"); } m_lumaGatherBDA = buffer->getDeviceAddress(); + + auto mapped_memory = m_lumaGatherAllocation.memory->map({ 0ull, m_lumaGatherAllocation.memory->getAllocationSize() }, IDeviceMemoryAllocation::EMCAF_READ); + if (!mapped_memory) + return logFail("Failed to map the Device Memory!\n"); } // Allocate and Leave 1/4 for image uploads, to test image copy with small memory remaining @@ -486,6 +490,8 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public // We do a very simple thing, display an image and wait `DisplayImageMs` to show it inline void workLoopBody() override { + const uint32_t SubgroupSize = m_physicalDevice->getLimits().maxSubgroupSize; + // Luma Meter { auto queue = getComputeQueue(); @@ -493,7 +499,6 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); auto ds = m_lumaPresentDS[0].get(); - const uint32_t SubgroupSize = m_physicalDevice->getLimits().maxSubgroupSize; auto pc = AutoexposurePushData { .meteringWindowScaleX = MeteringWindowScale[0] * m_gpuImg->getCreationParameters().extent.width, @@ -502,6 +507,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public .meteringWindowOffsetY = MeteringWindowOffset[1] * m_gpuImg->getCreationParameters().extent.height, .lumaMin = LumaMinMax[0], .lumaMax = LumaMinMax[1], + .EV = 0.0f, .sampleCountX = SampleCount[0], .sampleCountY = SampleCount[1], .viewportSizeX = m_gpuImg->getCreationParameters().extent.width, @@ -548,6 +554,27 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public m_device->blockForSemaphores(wait_infos); } + // Get EV + { + const auto memory_range = ILogicalDevice::MappedMemoryRange( + m_lumaGatherAllocation.memory.get(), + 0ull, + m_lumaGatherAllocation.memory->getAllocationSize() + ); + + if (!m_lumaGatherAllocation.memory->getMemoryPropertyFlags().hasFlags(IDeviceMemoryAllocation::EMPF_HOST_COHERENT_BIT)) + m_device->invalidateMappedMemoryRanges(1, &memory_range); + + const uint32_t* buffData = reinterpret_cast(m_lumaGatherAllocation.memory->getMappedPointer()); + + assert(m_lumaGatherAllocation.offset == 0); // simpler than writing out all the pointer arithmetic + + m_EV = 0.0f; + for (int index = 0; index < SubgroupSize; index++) { + m_EV += buffData[index]; + } + } + // Render to swapchain { // Acquire @@ -560,7 +587,6 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); auto ds = m_lumaPresentDS[1].get(); - const uint32_t SubgroupSize = m_physicalDevice->getLimits().maxSubgroupSize; auto pc = AutoexposurePushData { .meteringWindowScaleX = MeteringWindowScale[0] * m_gpuImg->getCreationParameters().extent.width, @@ -569,6 +595,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public .meteringWindowOffsetY = MeteringWindowOffset[1] * m_gpuImg->getCreationParameters().extent.height, .lumaMin = LumaMinMax[0], .lumaMax = LumaMinMax[1], + .EV = m_EV, .sampleCountX = SampleCount[0], .sampleCountY = SampleCount[1], .viewportSizeX = m_gpuImg->getCreationParameters().extent.width, @@ -677,7 +704,9 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public } protected: + nbl::video::IDeviceMemoryAllocator::SAllocation m_lumaGatherAllocation; uint64_t m_lumaGatherBDA; + float m_EV = 0; smart_refctd_ptr m_gpuImg; smart_refctd_ptr m_gpuImgView; From edbf8d11854f3f4508316937d196e9c0e17c8b55 Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Fri, 16 Aug 2024 16:36:29 +0530 Subject: [PATCH 30/50] Compute EV correctly and tonemap in fragment shader --- .../app_resources/present.frag.hlsl | 18 +++++++++++++++++- .../app_resources/tonemapper.comp.hlsl | 13 ------------- 26_Autoexposure/main.cpp | 3 ++- 3 files changed, 19 insertions(+), 15 deletions(-) delete mode 100644 26_Autoexposure/app_resources/tonemapper.comp.hlsl diff --git a/26_Autoexposure/app_resources/present.frag.hlsl b/26_Autoexposure/app_resources/present.frag.hlsl index 9a53c19eb..5f0259fe5 100644 --- a/26_Autoexposure/app_resources/present.frag.hlsl +++ b/26_Autoexposure/app_resources/present.frag.hlsl @@ -4,6 +4,13 @@ #pragma wave shader_stage(fragment) +#include "nbl/builtin/hlsl/colorspace/EOTF.hlsl" +#include "nbl/builtin/hlsl/colorspace/encodeCIEXYZ.hlsl" +#include "nbl/builtin/hlsl/colorspace/decodeCIEXYZ.hlsl" +#include "nbl/builtin/hlsl/colorspace/OETF.hlsl" +#include "nbl/builtin/hlsl/tonemapper/operators.hlsl" +#include "app_resources/common.hlsl" + // vertex shader is provided by the fullScreenTriangle extension #include using namespace nbl::hlsl::ext::FullScreenTriangle; @@ -12,7 +19,16 @@ using namespace nbl::hlsl::ext::FullScreenTriangle; [[vk::combinedImageSampler]] [[vk::binding(0, 1)]] Texture2D texture; [[vk::combinedImageSampler]] [[vk::binding(0, 1)]] SamplerState samplerState; +[[vk::push_constant]] AutoexposurePushData pushData; + [[vk::location(0)]] float32_t4 main(SVertexAttributes vxAttr) : SV_Target0 { - return texture.Sample(samplerState, vxAttr.uv); + float32_t3 color = nbl::hlsl::colorspace::oetf::sRGB(texture.Sample(samplerState, vxAttr.uv).rgb); + float32_t3 CIEColor = mul(nbl::hlsl::colorspace::sRGBtoXYZ, color); + + nbl::hlsl::tonemapper::ReinhardParams params = nbl::hlsl::tonemapper::ReinhardParams::create(pushData.EV); + + float32_t3 tonemappedColor = mul(nbl::hlsl::colorspace::decode::XYZtoscRGB, nbl::hlsl::tonemapper::reinhard(params, CIEColor)); + + return float32_t4(nbl::hlsl::colorspace::eotf::sRGB(tonemappedColor), 1.0); } diff --git a/26_Autoexposure/app_resources/tonemapper.comp.hlsl b/26_Autoexposure/app_resources/tonemapper.comp.hlsl deleted file mode 100644 index 15b543469..000000000 --- a/26_Autoexposure/app_resources/tonemapper.comp.hlsl +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright (C) 2024-2024 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#include "nbl/builtin/hlsl/glsl_compat/core.hlsl" -#include "app_resources/common.hlsl" - -[[vk::push_constant]] AutoexposurePushData pushData; - -[numthreads(DeviceSubgroupSize, DeviceSubgroupSize, 1)] -void main(uint32_t3 ID : SV_GroupThreadID, uint32_t3 GroupID : SV_GroupID) -{ -} \ No newline at end of file diff --git a/26_Autoexposure/main.cpp b/26_Autoexposure/main.cpp index faba912f4..fe770e395 100644 --- a/26_Autoexposure/main.cpp +++ b/26_Autoexposure/main.cpp @@ -571,8 +571,9 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public m_EV = 0.0f; for (int index = 0; index < SubgroupSize; index++) { - m_EV += buffData[index]; + m_EV += static_cast(buffData[index]) / (log2(LumaMinMax[1]) - log2(LumaMinMax[0])) + log2(LumaMinMax[0]); } + m_EV /= (SampleCount[0] * SampleCount[1]); } // Render to swapchain From dca49d2048ceb42ef9eba4b24fb527101847fcbe Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Tue, 20 Aug 2024 18:27:38 +0530 Subject: [PATCH 31/50] Separate LumaMeteringWindow into a common header --- 26_Autoexposure/app_resources/luma_meter.comp.hlsl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/26_Autoexposure/app_resources/luma_meter.comp.hlsl b/26_Autoexposure/app_resources/luma_meter.comp.hlsl index fffd80988..dbb214a8c 100644 --- a/26_Autoexposure/app_resources/luma_meter.comp.hlsl +++ b/26_Autoexposure/app_resources/luma_meter.comp.hlsl @@ -49,9 +49,9 @@ uint32_t3 nbl::hlsl::glsl::gl_WorkGroupSize() [numthreads(DeviceSubgroupSize, DeviceSubgroupSize, 1)] void main(uint32_t3 ID : SV_GroupThreadID, uint32_t3 GroupID : SV_GroupID) { - nbl::hlsl::luma_meter::LumaMeteringWindow luma_meter_window; - luma_meter_window.meteringWindowScale = float32_t2(pushData.meteringWindowScaleX, pushData.meteringWindowScaleY); - luma_meter_window.meteringWindowOffset = float32_t2(pushData.meteringWindowOffsetX, pushData.meteringWindowOffsetY); + nbl::hlsl::luma_meter::MeteringWindow meter_window; + meter_window.meteringWindowScale = float32_t2(pushData.meteringWindowScaleX, pushData.meteringWindowScaleY); + meter_window.meteringWindowOffset = float32_t2(pushData.meteringWindowOffsetX, pushData.meteringWindowOffsetY); const Ptr val_ptr = Ptr::create(pushData.lumaMeterBDA); PtrAccessor val_accessor = PtrAccessor::create(val_ptr); @@ -60,7 +60,7 @@ void main(uint32_t3 ID : SV_GroupThreadID, uint32_t3 GroupID : SV_GroupID) TexAccessor tex; using LumaMeter = nbl::hlsl::luma_meter::geom_luma_meter< WorkgroupSize, PtrAccessor, SharedAccessor, TexAccessor>; - LumaMeter meter = LumaMeter::create(luma_meter_window, pushData.lumaMin, pushData.lumaMax); + LumaMeter meter = LumaMeter::create(meter_window, pushData.lumaMin, pushData.lumaMax); uint32_t2 sampleCount = uint32_t2(pushData.sampleCountX, pushData.sampleCountY); uint32_t2 viewportSize = uint32_t2(pushData.viewportSizeX, pushData.viewportSizeY); From 9e283950262bae829ed1330d83cd424a14eb39e7 Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Tue, 20 Aug 2024 18:32:55 +0530 Subject: [PATCH 32/50] Simplify luma_meter naming --- 26_Autoexposure/app_resources/luma_meter.comp.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/26_Autoexposure/app_resources/luma_meter.comp.hlsl b/26_Autoexposure/app_resources/luma_meter.comp.hlsl index dbb214a8c..241a499b7 100644 --- a/26_Autoexposure/app_resources/luma_meter.comp.hlsl +++ b/26_Autoexposure/app_resources/luma_meter.comp.hlsl @@ -59,7 +59,7 @@ void main(uint32_t3 ID : SV_GroupThreadID, uint32_t3 GroupID : SV_GroupID) SharedAccessor sdata; TexAccessor tex; - using LumaMeter = nbl::hlsl::luma_meter::geom_luma_meter< WorkgroupSize, PtrAccessor, SharedAccessor, TexAccessor>; + using LumaMeter = nbl::hlsl::luma_meter::geom_meter< WorkgroupSize, PtrAccessor, SharedAccessor, TexAccessor>; LumaMeter meter = LumaMeter::create(meter_window, pushData.lumaMin, pushData.lumaMax); uint32_t2 sampleCount = uint32_t2(pushData.sampleCountX, pushData.sampleCountY); From 18fae9f1f93f07b91a642363689435d3f1092606 Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Tue, 20 Aug 2024 19:05:22 +0530 Subject: [PATCH 33/50] Update luma examples to shared accessor api --- .../app_resources/luma_meter.comp.hlsl | 18 +++++++++--------- .../app_resources/present.frag.hlsl | 13 +++++++------ 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/26_Autoexposure/app_resources/luma_meter.comp.hlsl b/26_Autoexposure/app_resources/luma_meter.comp.hlsl index 241a499b7..1cd451286 100644 --- a/26_Autoexposure/app_resources/luma_meter.comp.hlsl +++ b/26_Autoexposure/app_resources/luma_meter.comp.hlsl @@ -11,16 +11,16 @@ [[vk::push_constant]] AutoexposurePushData pushData; -using Ptr = nbl::hlsl::bda::__ptr < uint32_t >; -using PtrAccessor = nbl::hlsl::BdaAccessor < uint32_t >; +using namespace nbl::hlsl; +using Ptr = bda::__ptr < uint32_t >; +using PtrAccessor = BdaAccessor < uint32_t >; groupshared float32_t sdata[WorkgroupSize]; - struct SharedAccessor { - uint32_t get(const uint32_t index) + void get(const uint32_t index, NBL_REF_ARG(uint32_t) value) { - return sdata[index]; + value = sdata[index]; } void set(const uint32_t index, const uint32_t value) @@ -30,7 +30,7 @@ struct SharedAccessor void workgroupExecutionAndMemoryBarrier() { - nbl::hlsl::glsl::barrier(); + glsl::barrier(); } }; @@ -41,7 +41,7 @@ struct TexAccessor } }; -uint32_t3 nbl::hlsl::glsl::gl_WorkGroupSize() +uint32_t3 glsl::gl_WorkGroupSize() { return uint32_t3(WorkgroupSize, 1, 1); } @@ -49,7 +49,7 @@ uint32_t3 nbl::hlsl::glsl::gl_WorkGroupSize() [numthreads(DeviceSubgroupSize, DeviceSubgroupSize, 1)] void main(uint32_t3 ID : SV_GroupThreadID, uint32_t3 GroupID : SV_GroupID) { - nbl::hlsl::luma_meter::MeteringWindow meter_window; + luma_meter::MeteringWindow meter_window; meter_window.meteringWindowScale = float32_t2(pushData.meteringWindowScaleX, pushData.meteringWindowScaleY); meter_window.meteringWindowOffset = float32_t2(pushData.meteringWindowOffsetX, pushData.meteringWindowOffsetY); @@ -59,7 +59,7 @@ void main(uint32_t3 ID : SV_GroupThreadID, uint32_t3 GroupID : SV_GroupID) SharedAccessor sdata; TexAccessor tex; - using LumaMeter = nbl::hlsl::luma_meter::geom_meter< WorkgroupSize, PtrAccessor, SharedAccessor, TexAccessor>; + using LumaMeter = luma_meter::geom_meter< WorkgroupSize, PtrAccessor, SharedAccessor, TexAccessor>; LumaMeter meter = LumaMeter::create(meter_window, pushData.lumaMin, pushData.lumaMax); uint32_t2 sampleCount = uint32_t2(pushData.sampleCountX, pushData.sampleCountY); diff --git a/26_Autoexposure/app_resources/present.frag.hlsl b/26_Autoexposure/app_resources/present.frag.hlsl index 5f0259fe5..2e8142823 100644 --- a/26_Autoexposure/app_resources/present.frag.hlsl +++ b/26_Autoexposure/app_resources/present.frag.hlsl @@ -13,7 +13,8 @@ // vertex shader is provided by the fullScreenTriangle extension #include -using namespace nbl::hlsl::ext::FullScreenTriangle; +using namespace nbl::hlsl; +using namespace ext::FullScreenTriangle; // binding 0 set 1 [[vk::combinedImageSampler]] [[vk::binding(0, 1)]] Texture2D texture; @@ -23,12 +24,12 @@ using namespace nbl::hlsl::ext::FullScreenTriangle; [[vk::location(0)]] float32_t4 main(SVertexAttributes vxAttr) : SV_Target0 { - float32_t3 color = nbl::hlsl::colorspace::oetf::sRGB(texture.Sample(samplerState, vxAttr.uv).rgb); - float32_t3 CIEColor = mul(nbl::hlsl::colorspace::sRGBtoXYZ, color); + float32_t3 color = colorspace::oetf::sRGB(texture.Sample(samplerState, vxAttr.uv).rgb); + float32_t3 CIEColor = mul(colorspace::sRGBtoXYZ, color); - nbl::hlsl::tonemapper::ReinhardParams params = nbl::hlsl::tonemapper::ReinhardParams::create(pushData.EV); + tonemapper::ReinhardParams params = tonemapper::ReinhardParams::create(pushData.EV); - float32_t3 tonemappedColor = mul(nbl::hlsl::colorspace::decode::XYZtoscRGB, nbl::hlsl::tonemapper::reinhard(params, CIEColor)); + float32_t3 tonemappedColor = mul(colorspace::decode::XYZtoscRGB, tonemapper::reinhard(params, CIEColor)); - return float32_t4(nbl::hlsl::colorspace::eotf::sRGB(tonemappedColor), 1.0); + return float32_t4(colorspace::eotf::sRGB(tonemappedColor), 1.0); } From 9b31c2c70eca0bcfb12d7a6a1327435954979707 Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Tue, 20 Aug 2024 20:08:55 +0530 Subject: [PATCH 34/50] Refactor tonemapping operators --- 26_Autoexposure/app_resources/present.frag.hlsl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/26_Autoexposure/app_resources/present.frag.hlsl b/26_Autoexposure/app_resources/present.frag.hlsl index 2e8142823..b8ad803ff 100644 --- a/26_Autoexposure/app_resources/present.frag.hlsl +++ b/26_Autoexposure/app_resources/present.frag.hlsl @@ -27,9 +27,9 @@ using namespace ext::FullScreenTriangle; float32_t3 color = colorspace::oetf::sRGB(texture.Sample(samplerState, vxAttr.uv).rgb); float32_t3 CIEColor = mul(colorspace::sRGBtoXYZ, color); - tonemapper::ReinhardParams params = tonemapper::ReinhardParams::create(pushData.EV); + tonemapper::Reinhard reinhard = tonemapper::Reinhard::create(pushData.EV); - float32_t3 tonemappedColor = mul(colorspace::decode::XYZtoscRGB, tonemapper::reinhard(params, CIEColor)); + float32_t3 tonemappedColor = mul(colorspace::decode::XYZtoscRGB, reinhard(CIEColor)); return float32_t4(colorspace::eotf::sRGB(tonemappedColor), 1.0); } From e987452090e0b3a321b1c92a61542f659353d4a6 Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Wed, 21 Aug 2024 17:08:09 +0530 Subject: [PATCH 35/50] Simplify push constants and remove explicit sample counts --- 26_Autoexposure/app_resources/common.hlsl | 19 ++++++--- 26_Autoexposure/main.cpp | 48 ++++++++--------------- 2 files changed, 30 insertions(+), 37 deletions(-) diff --git a/26_Autoexposure/app_resources/common.hlsl b/26_Autoexposure/app_resources/common.hlsl index 07993d58d..887607fb1 100644 --- a/26_Autoexposure/app_resources/common.hlsl +++ b/26_Autoexposure/app_resources/common.hlsl @@ -6,16 +6,23 @@ #define _AUTOEXPOSURE_COMMON_INCLUDED_ #include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/luma_meter/common.hlsl" + +namespace nbl +{ +namespace hlsl +{ struct AutoexposurePushData { - float meteringWindowScaleX, meteringWindowScaleY; - float meteringWindowOffsetX, meteringWindowOffsetY; - float lumaMin, lumaMax; - float EV; - uint32_t sampleCountX, sampleCountY; - uint32_t viewportSizeX, viewportSizeY; + nbl::hlsl::luma_meter::MeteringWindow window; + float32_t2 lumaMinMax; + float32_t EV; + uint32_t2 viewportSize; uint64_t lumaMeterBDA; }; +} +} + #endif \ No newline at end of file diff --git a/26_Autoexposure/main.cpp b/26_Autoexposure/main.cpp index fe770e395..913a68d0f 100644 --- a/26_Autoexposure/main.cpp +++ b/26_Autoexposure/main.cpp @@ -25,11 +25,10 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public using clock_t = std::chrono::steady_clock; constexpr static inline std::string_view DefaultImagePathsFile = "../../media/noises/spp_benchmark_4k_512.exr"; - constexpr static inline std::array Dimensions = { 1280, 720 }; - constexpr static inline std::array SampleCount = { 10000, 10000 }; - constexpr static inline std::array MeteringWindowScale = { 0.5f, 0.5f }; - constexpr static inline std::array MeteringWindowOffset = { 0.25f, 0.25f }; - constexpr static inline std::array LumaMinMax = { 1.0f / 4096.0f, 32768.0f }; + constexpr static inline uint32_t2 Dimensions = { 1280, 720 }; + constexpr static inline float32_t2 MeteringWindowScale = { 0.5f, 0.5f }; + constexpr static inline float32_t2 MeteringWindowOffset = { 0.25f, 0.25f }; + constexpr static inline float32_t2 LumaMinMax = { 1.0f / 4096.0f, 32768.0f }; public: // Yay thanks to multiple inheritance we cannot forward ctors anymore @@ -491,6 +490,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public inline void workLoopBody() override { const uint32_t SubgroupSize = m_physicalDevice->getLimits().maxSubgroupSize; + uint32_t2 viewportSize = { m_gpuImg->getCreationParameters().extent.width, m_gpuImg->getCreationParameters().extent.height }; // Luma Meter { @@ -501,17 +501,10 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public auto pc = AutoexposurePushData { - .meteringWindowScaleX = MeteringWindowScale[0] * m_gpuImg->getCreationParameters().extent.width, - .meteringWindowScaleY = MeteringWindowScale[1] * m_gpuImg->getCreationParameters().extent.height, - .meteringWindowOffsetX = MeteringWindowOffset[0] * m_gpuImg->getCreationParameters().extent.width, - .meteringWindowOffsetY = MeteringWindowOffset[1] * m_gpuImg->getCreationParameters().extent.height, - .lumaMin = LumaMinMax[0], - .lumaMax = LumaMinMax[1], + .window = nbl::hlsl::luma_meter::MeteringWindow::create(MeteringWindowScale, MeteringWindowOffset), + .lumaMinMax = LumaMinMax, .EV = 0.0f, - .sampleCountX = SampleCount[0], - .sampleCountY = SampleCount[1], - .viewportSizeX = m_gpuImg->getCreationParameters().extent.width, - .viewportSizeY = m_gpuImg->getCreationParameters().extent.height, + .viewportSize = viewportSize, .lumaMeterBDA = m_lumaGatherBDA }; @@ -521,7 +514,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public cmdbuf->bindComputePipeline(m_lumaMeterPipeline.get()); cmdbuf->bindDescriptorSets(nbl::asset::EPBP_COMPUTE, m_lumaMeterPipeline->getLayout(), 0, 1, &ds); // also if you created DS Set with 3th index you need to respect it here - firstSet tells you the index of set and count tells you what range from this index it should update, useful if you had 2 DS with lets say set index 2,3, then you can bind both with single call setting firstSet to 2, count to 2 and last argument would be pointet to your DS pointers cmdbuf->pushConstants(m_lumaMeterPipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(pc), &pc); - cmdbuf->dispatch(1 + (SampleCount[0] - 1) / SubgroupSize, 1 + (SampleCount[1] - 1) / SubgroupSize); + cmdbuf->dispatch(viewportSize.x / SubgroupSize, viewportSize.y / SubgroupSize); cmdbuf->end(); { @@ -571,9 +564,9 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public m_EV = 0.0f; for (int index = 0; index < SubgroupSize; index++) { - m_EV += static_cast(buffData[index]) / (log2(LumaMinMax[1]) - log2(LumaMinMax[0])) + log2(LumaMinMax[0]); + m_EV += static_cast(buffData[index]) / (log2(LumaMinMax[1]) - log2(LumaMinMax[0])) + log2(LumaMinMax[0]); } - m_EV /= (SampleCount[0] * SampleCount[1]); + m_EV /= (viewportSize.x * viewportSize.y) / 4; } // Render to swapchain @@ -590,17 +583,10 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public auto pc = AutoexposurePushData { - .meteringWindowScaleX = MeteringWindowScale[0] * m_gpuImg->getCreationParameters().extent.width, - .meteringWindowScaleY = MeteringWindowScale[1] * m_gpuImg->getCreationParameters().extent.height, - .meteringWindowOffsetX = MeteringWindowOffset[0] * m_gpuImg->getCreationParameters().extent.width, - .meteringWindowOffsetY = MeteringWindowOffset[1] * m_gpuImg->getCreationParameters().extent.height, - .lumaMin = LumaMinMax[0], - .lumaMax = LumaMinMax[1], + .window = nbl::hlsl::luma_meter::MeteringWindow::create(MeteringWindowScale, MeteringWindowOffset), + .lumaMinMax = LumaMinMax, .EV = m_EV, - .sampleCountX = SampleCount[0], - .sampleCountY = SampleCount[1], - .viewportSizeX = m_gpuImg->getCreationParameters().extent.width, - .viewportSizeY = m_gpuImg->getCreationParameters().extent.height, + .viewportSize = viewportSize, .lumaMeterBDA = m_lumaGatherBDA }; @@ -617,8 +603,8 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public { const asset::SViewport viewport = { - .width = float(m_window->getWidth()), - .height = float(m_window->getHeight()) + .width = float32_t(m_window->getWidth()), + .height = float32_t(m_window->getHeight()) }; cmdbuf->setViewport({ &viewport, 1 }); } @@ -707,7 +693,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public protected: nbl::video::IDeviceMemoryAllocator::SAllocation m_lumaGatherAllocation; uint64_t m_lumaGatherBDA; - float m_EV = 0; + float32_t m_EV = 0; smart_refctd_ptr m_gpuImg; smart_refctd_ptr m_gpuImgView; From e135e434d13df7932f87a67b30a4b731aa58b5d4 Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Wed, 21 Aug 2024 18:32:13 +0530 Subject: [PATCH 36/50] Infer sample count from viewportSize and simplify userspace HLSL --- .../app_resources/luma_meter.comp.hlsl | 15 ++++----------- 26_Autoexposure/main.cpp | 8 ++++++-- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/26_Autoexposure/app_resources/luma_meter.comp.hlsl b/26_Autoexposure/app_resources/luma_meter.comp.hlsl index 1cd451286..0cd9d78c7 100644 --- a/26_Autoexposure/app_resources/luma_meter.comp.hlsl +++ b/26_Autoexposure/app_resources/luma_meter.comp.hlsl @@ -9,12 +9,12 @@ [[vk::combinedImageSampler]] [[vk::binding(0, 0)]] Texture2D texture; [[vk::combinedImageSampler]] [[vk::binding(0, 0)]] SamplerState samplerState; -[[vk::push_constant]] AutoexposurePushData pushData; - using namespace nbl::hlsl; using Ptr = bda::__ptr < uint32_t >; using PtrAccessor = BdaAccessor < uint32_t >; +[[vk::push_constant]] AutoexposurePushData pushData; + groupshared float32_t sdata[WorkgroupSize]; struct SharedAccessor { @@ -49,10 +49,6 @@ uint32_t3 glsl::gl_WorkGroupSize() [numthreads(DeviceSubgroupSize, DeviceSubgroupSize, 1)] void main(uint32_t3 ID : SV_GroupThreadID, uint32_t3 GroupID : SV_GroupID) { - luma_meter::MeteringWindow meter_window; - meter_window.meteringWindowScale = float32_t2(pushData.meteringWindowScaleX, pushData.meteringWindowScaleY); - meter_window.meteringWindowOffset = float32_t2(pushData.meteringWindowOffsetX, pushData.meteringWindowOffsetY); - const Ptr val_ptr = Ptr::create(pushData.lumaMeterBDA); PtrAccessor val_accessor = PtrAccessor::create(val_ptr); @@ -60,10 +56,7 @@ void main(uint32_t3 ID : SV_GroupThreadID, uint32_t3 GroupID : SV_GroupID) TexAccessor tex; using LumaMeter = luma_meter::geom_meter< WorkgroupSize, PtrAccessor, SharedAccessor, TexAccessor>; - LumaMeter meter = LumaMeter::create(meter_window, pushData.lumaMin, pushData.lumaMax); - - uint32_t2 sampleCount = uint32_t2(pushData.sampleCountX, pushData.sampleCountY); - uint32_t2 viewportSize = uint32_t2(pushData.viewportSizeX, pushData.viewportSizeY); + LumaMeter meter = LumaMeter::create(pushData.lumaMinMax); - meter.gatherLuma(val_accessor, tex, sdata, sampleCount, viewportSize); + meter.gatherLuma(pushData.window, val_accessor, tex, sdata, (float32_t2)(glsl::gl_WorkGroupID() * glsl::gl_WorkGroupSize())); } diff --git a/26_Autoexposure/main.cpp b/26_Autoexposure/main.cpp index 913a68d0f..8b8ca771b 100644 --- a/26_Autoexposure/main.cpp +++ b/26_Autoexposure/main.cpp @@ -490,6 +490,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public inline void workLoopBody() override { const uint32_t SubgroupSize = m_physicalDevice->getLimits().maxSubgroupSize; + uint32_t2 viewportSize = { m_gpuImg->getCreationParameters().extent.width, m_gpuImg->getCreationParameters().extent.height }; // Luma Meter @@ -514,7 +515,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public cmdbuf->bindComputePipeline(m_lumaMeterPipeline.get()); cmdbuf->bindDescriptorSets(nbl::asset::EPBP_COMPUTE, m_lumaMeterPipeline->getLayout(), 0, 1, &ds); // also if you created DS Set with 3th index you need to respect it here - firstSet tells you the index of set and count tells you what range from this index it should update, useful if you had 2 DS with lets say set index 2,3, then you can bind both with single call setting firstSet to 2, count to 2 and last argument would be pointet to your DS pointers cmdbuf->pushConstants(m_lumaMeterPipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(pc), &pc); - cmdbuf->dispatch(viewportSize.x / SubgroupSize, viewportSize.y / SubgroupSize); + cmdbuf->dispatch(1 + (viewportSize.x - 1) / SubgroupSize, 1 + (viewportSize.y - 1) / SubgroupSize); cmdbuf->end(); { @@ -566,7 +567,10 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public for (int index = 0; index < SubgroupSize; index++) { m_EV += static_cast(buffData[index]) / (log2(LumaMinMax[1]) - log2(LumaMinMax[0])) + log2(LumaMinMax[0]); } - m_EV /= (viewportSize.x * viewportSize.y) / 4; + uint64_t sampleCount = (viewportSize.x * viewportSize.y) / 4; + uint64_t workgroupSize = SubgroupSize * SubgroupSize; + sampleCount = workgroupSize * (1 + (sampleCount - 1) / workgroupSize); + m_EV /= sampleCount; } // Render to swapchain From 57e49ae17b66dd74a4ad5b945d127f7059be2452 Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Thu, 22 Aug 2024 23:03:31 +0530 Subject: [PATCH 37/50] Templatize float type and add toXYZ method to TexAccessor --- 26_Autoexposure/app_resources/luma_meter.comp.hlsl | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/26_Autoexposure/app_resources/luma_meter.comp.hlsl b/26_Autoexposure/app_resources/luma_meter.comp.hlsl index 0cd9d78c7..1bcec5918 100644 --- a/26_Autoexposure/app_resources/luma_meter.comp.hlsl +++ b/26_Autoexposure/app_resources/luma_meter.comp.hlsl @@ -4,6 +4,7 @@ #include "nbl/builtin/hlsl/luma_meter/luma_meter.hlsl" #include "nbl/builtin/hlsl/bda/bda_accessor.hlsl" +#include "nbl/builtin/hlsl/colorspace/encodeCIEXYZ.hlsl" #include "app_resources/common.hlsl" [[vk::combinedImageSampler]] [[vk::binding(0, 0)]] Texture2D texture; @@ -18,6 +19,7 @@ using PtrAccessor = BdaAccessor < uint32_t >; groupshared float32_t sdata[WorkgroupSize]; struct SharedAccessor { + using type = float32_t; void get(const uint32_t index, NBL_REF_ARG(uint32_t) value) { value = sdata[index]; @@ -36,6 +38,10 @@ struct SharedAccessor struct TexAccessor { + static float32_t3 toXYZ(float32_t3 srgbColor) { + return dot(colorspace::sRGBtoXYZ[1], srgbColor); + } + float32_t3 get(float32_t2 uv) { return texture.Sample(samplerState, uv).rgb; } @@ -58,5 +64,5 @@ void main(uint32_t3 ID : SV_GroupThreadID, uint32_t3 GroupID : SV_GroupID) using LumaMeter = luma_meter::geom_meter< WorkgroupSize, PtrAccessor, SharedAccessor, TexAccessor>; LumaMeter meter = LumaMeter::create(pushData.lumaMinMax); - meter.gatherLuma(pushData.window, val_accessor, tex, sdata, (float32_t2)(glsl::gl_WorkGroupID() * glsl::gl_WorkGroupSize())); + meter.sampleLuma(pushData.window, val_accessor, tex, sdata, (float32_t2)(glsl::gl_WorkGroupID() * glsl::gl_WorkGroupSize())); } From f8d50e804424eecd2f6e8a0b02285c623ec66376 Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Tue, 27 Aug 2024 22:32:53 +0530 Subject: [PATCH 38/50] Refactor the example into using a 2-compute, 1-fragment architecture --- 26_Autoexposure/app_resources/common.hlsl | 2 +- .../app_resources/luma_gather.comp.hlsl | 89 +++++ .../app_resources/luma_meter.comp.hlsl | 4 +- .../app_resources/present.frag.hlsl | 22 +- 26_Autoexposure/main.cpp | 351 ++++++++++++++---- 5 files changed, 365 insertions(+), 103 deletions(-) create mode 100644 26_Autoexposure/app_resources/luma_gather.comp.hlsl diff --git a/26_Autoexposure/app_resources/common.hlsl b/26_Autoexposure/app_resources/common.hlsl index 887607fb1..b270c38ce 100644 --- a/26_Autoexposure/app_resources/common.hlsl +++ b/26_Autoexposure/app_resources/common.hlsl @@ -17,7 +17,7 @@ struct AutoexposurePushData { nbl::hlsl::luma_meter::MeteringWindow window; float32_t2 lumaMinMax; - float32_t EV; + float32_t sampleCount; uint32_t2 viewportSize; uint64_t lumaMeterBDA; }; diff --git a/26_Autoexposure/app_resources/luma_gather.comp.hlsl b/26_Autoexposure/app_resources/luma_gather.comp.hlsl new file mode 100644 index 000000000..7b14ee5be --- /dev/null +++ b/26_Autoexposure/app_resources/luma_gather.comp.hlsl @@ -0,0 +1,89 @@ +// Copyright (C) 2024-2024 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "nbl/builtin/hlsl/luma_meter/luma_meter.hlsl" +#include "nbl/builtin/hlsl/bda/bda_accessor.hlsl" +#include "nbl/builtin/hlsl/colorspace/EOTF.hlsl" +#include "nbl/builtin/hlsl/colorspace/encodeCIEXYZ.hlsl" +#include "nbl/builtin/hlsl/colorspace/decodeCIEXYZ.hlsl" +#include "nbl/builtin/hlsl/colorspace/OETF.hlsl" +#include "nbl/builtin/hlsl/tonemapper/operators.hlsl" +#include "app_resources/common.hlsl" + +[[vk::combinedImageSampler]] [[vk::binding(0, 0)]] Texture2D textureIn; +[[vk::combinedImageSampler]] [[vk::binding(0, 0)]] SamplerState samplerStateIn; +[[vk::binding(0, 3)]] RWTexture2D textureOut; + +using namespace nbl::hlsl; +using Ptr = bda::__ptr < uint32_t >; +using PtrAccessor = BdaAccessor < uint32_t >; + +[[vk::push_constant]] AutoexposurePushData pushData; + +groupshared float32_t sdata[WorkgroupSize]; +struct SharedAccessor +{ + using type = float32_t; + void get(const uint32_t index, NBL_REF_ARG(uint32_t) value) + { + value = sdata[index]; + } + + void set(const uint32_t index, const uint32_t value) + { + sdata[index] = value; + } + + void workgroupExecutionAndMemoryBarrier() + { + glsl::barrier(); + } +}; + +struct TexAccessor +{ + static float32_t3 toXYZ(float32_t3 srgbColor) { + return dot(colorspace::sRGBtoXYZ[1], srgbColor); + } + + float32_t3 get(float32_t2 uv) { + return textureIn.Sample(samplerStateIn, uv).rgb; + } +}; + +uint32_t3 glsl::gl_WorkGroupSize() +{ + return uint32_t3(WorkgroupSize, 1, 1); +} + +[numthreads(DeviceSubgroupSize, DeviceSubgroupSize, 1)] +void main(uint32_t3 ID : SV_GroupThreadID, uint32_t3 GroupID : SV_GroupID) +{ + const Ptr val_ptr = Ptr::create(pushData.lumaMeterBDA); + PtrAccessor val_accessor = PtrAccessor::create(val_ptr); + + SharedAccessor sdata; + TexAccessor tex; + + using LumaMeter = luma_meter::geom_meter< WorkgroupSize, PtrAccessor, SharedAccessor, TexAccessor>; + LumaMeter meter = LumaMeter::create(pushData.lumaMinMax, pushData.sampleCount); + + float32_t EV = meter.gatherLuma(val_accessor); + + uint32_t tid = workgroup::SubgroupContiguousIndex(); + uint32_t2 coord = { + morton2d_decode_x(tid), + morton2d_decode_y(tid) + }; + + uint32_t2 pos = glsl::gl_WorkGroupID() * glsl::gl_WorkGroupSize() + coord; + + float32_t2 uv = (float32_t2)(pos) / pushData.viewportSize; + float32_t3 color = colorspace::oetf::sRGB(tex.get(uv).rgb); + float32_t3 CIEColor = mul(colorspace::sRGBtoXYZ, color); + tonemapper::Reinhard reinhard = tonemapper::Reinhard::create(EV); + float32_t3 tonemappedColor = mul(colorspace::decode::XYZtoscRGB, reinhard(CIEColor)); + + textureOut[pos] = float32_t4(colorspace::eotf::sRGB(tonemappedColor), 1.0f); +} diff --git a/26_Autoexposure/app_resources/luma_meter.comp.hlsl b/26_Autoexposure/app_resources/luma_meter.comp.hlsl index 1bcec5918..f936d8d37 100644 --- a/26_Autoexposure/app_resources/luma_meter.comp.hlsl +++ b/26_Autoexposure/app_resources/luma_meter.comp.hlsl @@ -62,7 +62,7 @@ void main(uint32_t3 ID : SV_GroupThreadID, uint32_t3 GroupID : SV_GroupID) TexAccessor tex; using LumaMeter = luma_meter::geom_meter< WorkgroupSize, PtrAccessor, SharedAccessor, TexAccessor>; - LumaMeter meter = LumaMeter::create(pushData.lumaMinMax); + LumaMeter meter = LumaMeter::create(pushData.lumaMinMax, pushData.sampleCount); - meter.sampleLuma(pushData.window, val_accessor, tex, sdata, (float32_t2)(glsl::gl_WorkGroupID() * glsl::gl_WorkGroupSize())); + meter.sampleLuma(pushData.window, val_accessor, tex, sdata, (float32_t2)(glsl::gl_WorkGroupID() * glsl::gl_WorkGroupSize()), pushData.viewportSize); } diff --git a/26_Autoexposure/app_resources/present.frag.hlsl b/26_Autoexposure/app_resources/present.frag.hlsl index b8ad803ff..b436e248f 100644 --- a/26_Autoexposure/app_resources/present.frag.hlsl +++ b/26_Autoexposure/app_resources/present.frag.hlsl @@ -4,11 +4,6 @@ #pragma wave shader_stage(fragment) -#include "nbl/builtin/hlsl/colorspace/EOTF.hlsl" -#include "nbl/builtin/hlsl/colorspace/encodeCIEXYZ.hlsl" -#include "nbl/builtin/hlsl/colorspace/decodeCIEXYZ.hlsl" -#include "nbl/builtin/hlsl/colorspace/OETF.hlsl" -#include "nbl/builtin/hlsl/tonemapper/operators.hlsl" #include "app_resources/common.hlsl" // vertex shader is provided by the fullScreenTriangle extension @@ -17,19 +12,10 @@ using namespace nbl::hlsl; using namespace ext::FullScreenTriangle; // binding 0 set 1 -[[vk::combinedImageSampler]] [[vk::binding(0, 1)]] Texture2D texture; -[[vk::combinedImageSampler]] [[vk::binding(0, 1)]] SamplerState samplerState; - -[[vk::push_constant]] AutoexposurePushData pushData; +[[vk::combinedImageSampler]] [[vk::binding(0, 3)]] Texture2D texture; +[[vk::combinedImageSampler]] [[vk::binding(0, 3)]] SamplerState samplerState; [[vk::location(0)]] float32_t4 main(SVertexAttributes vxAttr) : SV_Target0 { - float32_t3 color = colorspace::oetf::sRGB(texture.Sample(samplerState, vxAttr.uv).rgb); - float32_t3 CIEColor = mul(colorspace::sRGBtoXYZ, color); - - tonemapper::Reinhard reinhard = tonemapper::Reinhard::create(pushData.EV); - - float32_t3 tonemappedColor = mul(colorspace::decode::XYZtoscRGB, reinhard(CIEColor)); - - return float32_t4(colorspace::eotf::sRGB(tonemappedColor), 1.0); -} + return float32_t4(texture.Sample(samplerState, vxAttr.uv).rgb, 1.0f); +} \ No newline at end of file diff --git a/26_Autoexposure/main.cpp b/26_Autoexposure/main.cpp index 8b8ca771b..570e96807 100644 --- a/26_Autoexposure/main.cpp +++ b/26_Autoexposure/main.cpp @@ -77,7 +77,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public * Samplers for combined image samplers can also be mutable, which for a binding of a descriptor set is specified also at creation time by leaving the immutableSamplers * field set to its default (nullptr). */ - smart_refctd_ptr lumaPresentDSLayout; + std::array, 3> dsLayouts; { auto defaultSampler = m_device->createSampler( { @@ -85,41 +85,96 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public } ); - const IGPUDescriptorSetLayout::SBinding lumaPresentBindings[1] = { + const IGPUDescriptorSetLayout::SBinding imgBindings[3][1] = { { - .binding = 0, - .type = IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, - .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = IShader::E_SHADER_STAGE::ESS_FRAGMENT | IShader::E_SHADER_STAGE::ESS_COMPUTE, - .count = 1, - .immutableSamplers = &defaultSampler + { + .binding = 0, + .type = IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1, + .immutableSamplers = &defaultSampler + } + }, + { + { + .binding = 0, + .type = IDescriptor::E_TYPE::ET_STORAGE_IMAGE, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1, + .immutableSamplers = nullptr + } + }, + { + { + .binding = 0, + .type = IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_FRAGMENT, + .count = 1, + .immutableSamplers = &defaultSampler + } } }; - lumaPresentDSLayout = m_device->createDescriptorSetLayout(lumaPresentBindings); - if (!lumaPresentDSLayout) - return logFail("Failed to Create Descriptor Layout: lumaPresentDSLayout"); + + bool dsLayoutCreation = true; + for (uint32_t index = 0; index < dsLayouts.size(); index++) { + dsLayouts[index] = m_device->createDescriptorSetLayout(imgBindings[index]); + dsLayoutCreation = dsLayoutCreation && dsLayouts[index]; + } + + if (!dsLayoutCreation) + return logFail("Failed to Create Descriptor Layouts"); } // Create semaphores - m_lumaMeterSemaphore = m_device->createSemaphore(m_submitIx); + m_meterSemaphore = m_device->createSemaphore(m_submitIx); + m_gatherSemaphore = m_device->createSemaphore(m_submitIx); m_presentSemaphore = m_device->createSemaphore(m_submitIx); // create the descriptor sets and with enough room { - core::smart_refctd_ptr lumaPresentPool; + std::array, 3> dsPools; + bool dsPoolCreation = true; + { + const video::IGPUDescriptorSetLayout* const layouts[] = { dsLayouts[0].get() }; + const uint32_t setCounts[] = { 1u }; + dsPools[0] = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_NONE, layouts, setCounts); + dsPoolCreation = dsPoolCreation && dsPools[0]; + } { - const video::IGPUDescriptorSetLayout* const layouts[] = { lumaPresentDSLayout.get(), lumaPresentDSLayout.get() }; - const uint32_t setCounts[] = { 1u, 1u }; - lumaPresentPool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_NONE, layouts, setCounts); + const video::IGPUDescriptorSetLayout* const layouts[] = { dsLayouts[1].get() }; + const uint32_t setCounts[] = { 1u }; + dsPools[1] = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_NONE, layouts, setCounts); + dsPoolCreation = dsPoolCreation && dsPools[1]; + } + { + const video::IGPUDescriptorSetLayout* const layouts[] = { dsLayouts[2].get() }; + const uint32_t setCounts[] = { 1u }; + dsPools[2] = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_NONE, layouts, setCounts); + dsPoolCreation = dsPoolCreation && dsPools[2]; } - if (!lumaPresentPool) + if (!dsPoolCreation) return logFail("Failed to Create Descriptor Pools"); - m_lumaPresentDS[0] = lumaPresentPool->createDescriptorSet(core::smart_refctd_ptr(lumaPresentDSLayout)); - m_lumaPresentDS[1] = lumaPresentPool->createDescriptorSet(core::smart_refctd_ptr(lumaPresentDSLayout)); - if (!m_lumaPresentDS[0] || !m_lumaPresentDS[1]) - return logFail("Could not create Descriptor Set: lumaPresentDS!"); + bool dsCreation = true; + { + m_ds[0] = dsPools[0]->createDescriptorSet(dsLayouts[0]); + dsCreation = dsCreation && m_ds[0]; + } + { + m_ds[1] = dsPools[1]->createDescriptorSet(dsLayouts[1]); + dsCreation = dsCreation && m_ds[1]; + } + { + m_ds[2] = dsPools[2]->createDescriptorSet(dsLayouts[2]); + dsCreation = dsCreation && m_ds[2]; + } + + if (!dsCreation) + return logFail("Could not create Descriptor Sets!"); } auto graphicsQueue = getGraphicsQueue(); @@ -219,13 +274,33 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public }; // Luma Meter - auto lumaMeterShader = loadCompileAndCreateShader("app_resources/luma_meter.comp.hlsl"); - if (!lumaMeterShader) - return logFail("Failed to Load and Compile Compute Shader: lumaMeterShader!"); - auto lumaLayout = m_device->createPipelineLayout({ &pcRange, 1 }, core::smart_refctd_ptr(lumaPresentDSLayout), nullptr, nullptr, nullptr); - if (!createComputePipeline(lumaMeterShader, m_lumaMeterPipeline, lumaLayout)) + auto meterShader = loadCompileAndCreateShader("app_resources/luma_meter.comp.hlsl"); + if (!meterShader) + return logFail("Failed to Load and Compile Compute Shader: meterShader!"); + auto meterLayout = m_device->createPipelineLayout( + { &pcRange, 1 }, + core::smart_refctd_ptr(dsLayouts[0]), + nullptr, + nullptr, + nullptr + ); + if (!createComputePipeline(meterShader, m_meterPipeline, meterLayout)) return logFail("Could not create Luma Meter Pipeline!"); + // Luma Gather + auto gatherShader = loadCompileAndCreateShader("app_resources/luma_gather.comp.hlsl"); + if (!gatherShader) + return logFail("Failed to Load and Compile Compute Shader: gatherShader!"); + auto gatherLayout = m_device->createPipelineLayout( + { &pcRange, 1 }, + core::smart_refctd_ptr(dsLayouts[0]), + nullptr, + nullptr, + core::smart_refctd_ptr(dsLayouts[1]) + ); + if (!createComputePipeline(gatherShader, m_gatherPipeline, gatherLayout)) + return logFail("Could not create Luma Gather Pipeline!"); + // Load FSTri Shader ext::FullScreenTriangle::ProtoPipeline fsTriProtoPPln(m_assetMgr.get(), m_device.get(), m_logger.get()); if (!fsTriProtoPPln) @@ -240,7 +315,13 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public .entryPoint = "main", .shader = fragmentShader.get() }; - auto presentLayout = m_device->createPipelineLayout({ &pcRange, 1 }, nullptr, core::smart_refctd_ptr(lumaPresentDSLayout), nullptr, nullptr); + auto presentLayout = m_device->createPipelineLayout( + { &pcRange, 1 }, + nullptr, + nullptr, + nullptr, + core::smart_refctd_ptr(dsLayouts[2]) + ); m_presentPipeline = fsTriProtoPPln.createPipeline(fragSpec, presentLayout.get(), scResources->getRenderpass()); if (!m_presentPipeline) return logFail("Could not create Graphics Pipeline!"); @@ -289,7 +370,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public // Allocate and create buffer for Luma Gather { // Allocate memory - m_lumaGatherAllocation = {}; + m_gatherAllocation = {}; smart_refctd_ptr buffer; { auto build_buffer = [this]( @@ -318,11 +399,11 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public assert(allocation->memory.get() == buffer->getBoundMemory().memory); }; - build_buffer(m_device, &m_lumaGatherAllocation, buffer, m_physicalDevice->getLimits().maxSubgroupSize, "Luma Gather Buffer"); + build_buffer(m_device, &m_gatherAllocation, buffer, m_physicalDevice->getLimits().maxSubgroupSize, "Luma Gather Buffer"); } - m_lumaGatherBDA = buffer->getDeviceAddress(); + m_gatherBDA = buffer->getDeviceAddress(); - auto mapped_memory = m_lumaGatherAllocation.memory->map({ 0ull, m_lumaGatherAllocation.memory->getAllocationSize() }, IDeviceMemoryAllocation::EMCAF_READ); + auto mapped_memory = m_gatherAllocation.memory->map({ 0ull, m_gatherAllocation.memory->getAllocationSize() }, IDeviceMemoryAllocation::EMCAF_READ); if (!mapped_memory) return logFail("Failed to map the Device Memory!\n"); } @@ -379,6 +460,13 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public return false; m_gpuImg->setObjectDebugName("Autoexposure Image"); + imageParams = m_gpuImg->getCreationParameters(); + imageParams.usage = IGPUImage::EUF_SAMPLED_BIT | IGPUImage::EUF_STORAGE_BIT; + m_tonemappedImg = m_device->createImage(std::move(imageParams)); + if (!m_tonemappedImg || !m_device->allocate(m_tonemappedImg->getMemoryReqs(), m_tonemappedImg.get()).isValid()) + return false; + m_tonemappedImg->setObjectDebugName("Tonemapped Image"); + // Now show the window m_winMgr->show(m_window.get()); @@ -448,37 +536,51 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public IGPUImageView::SCreationParams gpuImgViewParams = { .image = m_gpuImg, .viewType = IGPUImageView::ET_2D, - .format = m_gpuImg->getCreationParameters().format + .format = m_gpuImg->getCreationParameters().format, + }; + IGPUImageView::SCreationParams tonemappedImgViewParams = { + .image = m_tonemappedImg, + .viewType = IGPUImageView::ET_2D, + .format = m_tonemappedImg->getCreationParameters().format }; m_gpuImgView = m_device->createImageView(std::move(gpuImgViewParams)); + m_tonemappedImgView = m_device->createImageView(std::move(tonemappedImgViewParams)); - IGPUDescriptorSet::SDescriptorInfo info1 = {}; - info1.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; - info1.desc = m_gpuImgView; + IGPUDescriptorSet::SDescriptorInfo infos[3]; + infos[0].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + infos[0].desc = m_gpuImgView; + infos[1].info.image.imageLayout = IImage::LAYOUT::GENERAL; + infos[1].desc = m_tonemappedImgView; + infos[2].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + infos[2].desc = m_tonemappedImgView; - IGPUDescriptorSet::SDescriptorInfo info2 = {}; - info2.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; - info2.desc = m_gpuImgView; // FIXME: temporarily pass in input image IGPUDescriptorSet::SWriteDescriptorSet writeDescriptors[] = { { - .dstSet = m_lumaPresentDS[0].get(), + .dstSet = m_ds[0].get(), .binding = 0, .arrayElement = 0, .count = 1, - .info = &info1 + .info = infos }, { - .dstSet = m_lumaPresentDS[1].get(), + .dstSet = m_ds[1].get(), .binding = 0, .arrayElement = 0, .count = 1, - .info = &info2 + .info = infos + }, + { + .dstSet = m_ds[2].get(), + .binding = 0, + .arrayElement = 0, + .count = 1, + .info = infos } }; - m_device->updateDescriptorSets(2, writeDescriptors, 0, nullptr); + m_device->updateDescriptorSets(3, writeDescriptors, 0, nullptr); queue->endCapture(); } @@ -492,30 +594,38 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public const uint32_t SubgroupSize = m_physicalDevice->getLimits().maxSubgroupSize; uint32_t2 viewportSize = { m_gpuImg->getCreationParameters().extent.width, m_gpuImg->getCreationParameters().extent.height }; + float32_t sampleCount = (viewportSize.x * viewportSize.y) / 4; + uint32_t workgroupSize = SubgroupSize * SubgroupSize; + sampleCount = workgroupSize * (1 + (sampleCount - 1) / workgroupSize); // Luma Meter { auto queue = getComputeQueue(); auto cmdbuf = m_computeCmdBufs[0].get(); cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); - auto ds = m_lumaPresentDS[0].get(); + auto ds = m_ds[0].get(); auto pc = AutoexposurePushData { .window = nbl::hlsl::luma_meter::MeteringWindow::create(MeteringWindowScale, MeteringWindowOffset), .lumaMinMax = LumaMinMax, - .EV = 0.0f, + .sampleCount = sampleCount, .viewportSize = viewportSize, - .lumaMeterBDA = m_lumaGatherBDA + .lumaMeterBDA = m_gatherBDA + }; + + const uint32_t2 dispatchSize = { + 1 + ((viewportSize.x / 2) - 1) / SubgroupSize, + 1 + ((viewportSize.y / 2) - 1) / SubgroupSize }; queue->startCapture(); cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); - cmdbuf->bindComputePipeline(m_lumaMeterPipeline.get()); - cmdbuf->bindDescriptorSets(nbl::asset::EPBP_COMPUTE, m_lumaMeterPipeline->getLayout(), 0, 1, &ds); // also if you created DS Set with 3th index you need to respect it here - firstSet tells you the index of set and count tells you what range from this index it should update, useful if you had 2 DS with lets say set index 2,3, then you can bind both with single call setting firstSet to 2, count to 2 and last argument would be pointet to your DS pointers - cmdbuf->pushConstants(m_lumaMeterPipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(pc), &pc); - cmdbuf->dispatch(1 + (viewportSize.x - 1) / SubgroupSize, 1 + (viewportSize.y - 1) / SubgroupSize); + cmdbuf->bindComputePipeline(m_meterPipeline.get()); + cmdbuf->bindDescriptorSets(nbl::asset::EPBP_COMPUTE, m_meterPipeline->getLayout(), 0, 1, &ds); // also if you created DS Set with 3th index you need to respect it here - firstSet tells you the index of set and count tells you what range from this index it should update, useful if you had 2 DS with lets say set index 2,3, then you can bind both with single call setting firstSet to 2, count to 2 and last argument would be pointet to your DS pointers + cmdbuf->pushConstants(m_meterPipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(pc), &pc); + cmdbuf->dispatch(dispatchSize.x, dispatchSize.y); cmdbuf->end(); { @@ -528,7 +638,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public submit_infos[0].commandBuffers = cmdBufs; IQueue::SSubmitInfo::SSemaphoreInfo signals[] = { { - .semaphore = m_lumaMeterSemaphore.get(), + .semaphore = m_meterSemaphore.get(), .value = m_submitIx + 1, .stageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT } @@ -541,36 +651,114 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public const ISemaphore::SWaitInfo wait_infos[] = { { - .semaphore = m_lumaMeterSemaphore.get(), + .semaphore = m_meterSemaphore.get(), .value = m_submitIx + 1 } }; m_device->blockForSemaphores(wait_infos); } - // Get EV + // Luma Gather and Tonemapping { - const auto memory_range = ILogicalDevice::MappedMemoryRange( - m_lumaGatherAllocation.memory.get(), - 0ull, - m_lumaGatherAllocation.memory->getAllocationSize() - ); + auto queue = getComputeQueue(); + auto cmdbuf = m_computeCmdBufs[1].get(); + cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); + auto ds1 = m_ds[0].get(); + auto ds2 = m_ds[1].get(); + + auto pc = AutoexposurePushData + { + .window = nbl::hlsl::luma_meter::MeteringWindow::create(MeteringWindowScale, MeteringWindowOffset), + .lumaMinMax = LumaMinMax, + .sampleCount = sampleCount, + .viewportSize = viewportSize, + .lumaMeterBDA = m_gatherBDA + }; + + const uint32_t2 dispatchSize = { + 1 + ((viewportSize.x) - 1) / SubgroupSize, + 1 + ((viewportSize.y) - 1) / SubgroupSize + }; - if (!m_lumaGatherAllocation.memory->getMemoryPropertyFlags().hasFlags(IDeviceMemoryAllocation::EMPF_HOST_COHERENT_BIT)) - m_device->invalidateMappedMemoryRanges(1, &memory_range); + const SMemoryBarrier computeBarriers[] = { + { + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + }, + { + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, + } + }; - const uint32_t* buffData = reinterpret_cast(m_lumaGatherAllocation.memory->getMappedPointer()); + // change the layout of the image + const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers1[] = { + { + .barrier = { + .dep = computeBarriers[0] + // no ownership transfers + }, + .image = m_gpuImg.get(), + // transition the whole view + .subresourceRange = m_tonemappedImgView->getCreationParameters().subresourceRange, + // a wiping transition + .newLayout = IGPUImage::LAYOUT::GENERAL + } + }; + const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers2[] = { + { + .barrier = { + .dep = computeBarriers[1] + // no ownership transfers + }, + .image = m_gpuImg.get(), + // transition the whole view + .subresourceRange = m_tonemappedImgView->getCreationParameters().subresourceRange, + // a wiping transition + .oldLayout = IGPUImage::LAYOUT::GENERAL, + .newLayout = IGPUImage::LAYOUT::READ_ONLY_OPTIMAL + } + }; - assert(m_lumaGatherAllocation.offset == 0); // simpler than writing out all the pointer arithmetic + queue->startCapture(); - m_EV = 0.0f; - for (int index = 0; index < SubgroupSize; index++) { - m_EV += static_cast(buffData[index]) / (log2(LumaMinMax[1]) - log2(LumaMinMax[0])) + log2(LumaMinMax[0]); + cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + cmdbuf->bindComputePipeline(m_gatherPipeline.get()); + cmdbuf->bindDescriptorSets(nbl::asset::EPBP_COMPUTE, m_gatherPipeline->getLayout(), 0, 1, &ds1); // also if you created DS Set with 3th index you need to respect it here - firstSet tells you the index of set and count tells you what range from this index it should update, useful if you had 2 DS with lets say set index 2,3, then you can bind both with single call setting firstSet to 2, count to 2 and last argument would be pointet to your DS pointers + cmdbuf->bindDescriptorSets(nbl::asset::EPBP_COMPUTE, m_gatherPipeline->getLayout(), 3, 1, &ds2); + cmdbuf->pushConstants(m_gatherPipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(pc), &pc); + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers1 }); + cmdbuf->dispatch(dispatchSize.x, dispatchSize.y); + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers2 }); + cmdbuf->end(); + + { + IQueue::SSubmitInfo submit_infos[1]; + IQueue::SSubmitInfo::SCommandBufferInfo cmdBufs[] = { + { + .cmdbuf = cmdbuf + } + }; + submit_infos[0].commandBuffers = cmdBufs; + IQueue::SSubmitInfo::SSemaphoreInfo signals[] = { + { + .semaphore = m_gatherSemaphore.get(), + .value = m_submitIx + 1, + .stageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT + } + }; + submit_infos[0].signalSemaphores = signals; + + queue->submit(submit_infos); + queue->endCapture(); } - uint64_t sampleCount = (viewportSize.x * viewportSize.y) / 4; - uint64_t workgroupSize = SubgroupSize * SubgroupSize; - sampleCount = workgroupSize * (1 + (sampleCount - 1) / workgroupSize); - m_EV /= sampleCount; + + const ISemaphore::SWaitInfo wait_infos[] = { + { + .semaphore = m_gatherSemaphore.get(), + .value = m_submitIx + 1 + } + }; + m_device->blockForSemaphores(wait_infos); } // Render to swapchain @@ -583,15 +771,15 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public auto queue = getGraphicsQueue(); auto cmdbuf = m_graphicsCmdBufs[0].get(); cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); - auto ds = m_lumaPresentDS[1].get(); + auto ds = m_ds[2].get(); auto pc = AutoexposurePushData { .window = nbl::hlsl::luma_meter::MeteringWindow::create(MeteringWindowScale, MeteringWindowOffset), .lumaMinMax = LumaMinMax, - .EV = m_EV, + .sampleCount = sampleCount, .viewportSize = viewportSize, - .lumaMeterBDA = m_lumaGatherBDA + .lumaMeterBDA = m_gatherBDA }; queue->startCapture(); @@ -628,7 +816,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public } cmdbuf->bindGraphicsPipeline(m_presentPipeline.get()); - cmdbuf->bindDescriptorSets(nbl::asset::EPBP_GRAPHICS, m_presentPipeline->getLayout(), 1, 1, &ds); + cmdbuf->bindDescriptorSets(nbl::asset::EPBP_GRAPHICS, m_presentPipeline->getLayout(), 3, 1, &ds); ext::FullScreenTriangle::recordDrawCall(cmdbuf); cmdbuf->endRenderPass(); @@ -695,29 +883,28 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public } protected: - nbl::video::IDeviceMemoryAllocator::SAllocation m_lumaGatherAllocation; - uint64_t m_lumaGatherBDA; - float32_t m_EV = 0; - smart_refctd_ptr m_gpuImg; - smart_refctd_ptr m_gpuImgView; + nbl::video::IDeviceMemoryAllocator::SAllocation m_gatherAllocation; + uint64_t m_gatherBDA; + smart_refctd_ptr m_gpuImg, m_tonemappedImg; + smart_refctd_ptr m_gpuImgView, m_tonemappedImgView; // for image uploads smart_refctd_ptr m_scratchSemaphore; SIntendedSubmitInfo m_intendedSubmit; // Pipelines + smart_refctd_ptr m_meterPipeline, m_gatherPipeline; smart_refctd_ptr m_presentPipeline; - smart_refctd_ptr m_lumaMeterPipeline; // Descriptor Sets - std::array, ISwapchain::MaxImages> m_lumaPresentDS; + std::array, 3> m_ds; // Command Buffers smart_refctd_ptr m_graphicsCmdPool, m_computeCmdPool; - std::array, ISwapchain::MaxImages> m_graphicsCmdBufs, m_computeCmdBufs; + std::array, 2> m_graphicsCmdBufs, m_computeCmdBufs; // Semaphores - smart_refctd_ptr m_lumaMeterSemaphore, m_presentSemaphore; + smart_refctd_ptr m_meterSemaphore, m_gatherSemaphore, m_presentSemaphore; uint64_t m_submitIx = 0; // window From d3b5765eb82c268e7bdadd7369d83e6273b38570 Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Wed, 28 Aug 2024 18:00:25 +0530 Subject: [PATCH 39/50] Handle image layouts correctly --- 26_Autoexposure/main.cpp | 52 +++++++--------------------------------- 1 file changed, 8 insertions(+), 44 deletions(-) diff --git a/26_Autoexposure/main.cpp b/26_Autoexposure/main.cpp index 570e96807..6e61573d1 100644 --- a/26_Autoexposure/main.cpp +++ b/26_Autoexposure/main.cpp @@ -504,6 +504,11 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public .subresourceRange = cpuImgParams.subresourceRange, // a wiping transition .newLayout = IGPUImage::LAYOUT::TRANSFER_DST_OPTIMAL + }, + { + .image = m_tonemappedImg.get(), + .subresourceRange = cpuImgParams.subresourceRange, + .newLayout = IGPUImage::LAYOUT::GENERAL } }; const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers2[] = { @@ -552,7 +557,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public infos[0].desc = m_gpuImgView; infos[1].info.image.imageLayout = IImage::LAYOUT::GENERAL; infos[1].desc = m_tonemappedImgView; - infos[2].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + infos[2].info.image.imageLayout = IImage::LAYOUT::GENERAL; infos[2].desc = m_tonemappedImgView; @@ -569,14 +574,14 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public .binding = 0, .arrayElement = 0, .count = 1, - .info = infos + .info = infos + 1 }, { .dstSet = m_ds[2].get(), .binding = 0, .arrayElement = 0, .count = 1, - .info = infos + .info = infos + 2 } }; @@ -680,45 +685,6 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public 1 + ((viewportSize.y) - 1) / SubgroupSize }; - const SMemoryBarrier computeBarriers[] = { - { - .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, - }, - { - .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, - .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, - } - }; - - // change the layout of the image - const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers1[] = { - { - .barrier = { - .dep = computeBarriers[0] - // no ownership transfers - }, - .image = m_gpuImg.get(), - // transition the whole view - .subresourceRange = m_tonemappedImgView->getCreationParameters().subresourceRange, - // a wiping transition - .newLayout = IGPUImage::LAYOUT::GENERAL - } - }; - const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers2[] = { - { - .barrier = { - .dep = computeBarriers[1] - // no ownership transfers - }, - .image = m_gpuImg.get(), - // transition the whole view - .subresourceRange = m_tonemappedImgView->getCreationParameters().subresourceRange, - // a wiping transition - .oldLayout = IGPUImage::LAYOUT::GENERAL, - .newLayout = IGPUImage::LAYOUT::READ_ONLY_OPTIMAL - } - }; - queue->startCapture(); cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); @@ -726,9 +692,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public cmdbuf->bindDescriptorSets(nbl::asset::EPBP_COMPUTE, m_gatherPipeline->getLayout(), 0, 1, &ds1); // also if you created DS Set with 3th index you need to respect it here - firstSet tells you the index of set and count tells you what range from this index it should update, useful if you had 2 DS with lets say set index 2,3, then you can bind both with single call setting firstSet to 2, count to 2 and last argument would be pointet to your DS pointers cmdbuf->bindDescriptorSets(nbl::asset::EPBP_COMPUTE, m_gatherPipeline->getLayout(), 3, 1, &ds2); cmdbuf->pushConstants(m_gatherPipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(pc), &pc); - cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers1 }); cmdbuf->dispatch(dispatchSize.x, dispatchSize.y); - cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers2 }); cmdbuf->end(); { From 612f0f6b7b4d7d4a52d1a1af3aa8b8aaddab6bfc Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Fri, 27 Sep 2024 12:18:20 +0100 Subject: [PATCH 40/50] Simplify type --- 26_Autoexposure/app_resources/common.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/26_Autoexposure/app_resources/common.hlsl b/26_Autoexposure/app_resources/common.hlsl index b270c38ce..bf2c19920 100644 --- a/26_Autoexposure/app_resources/common.hlsl +++ b/26_Autoexposure/app_resources/common.hlsl @@ -15,7 +15,7 @@ namespace hlsl struct AutoexposurePushData { - nbl::hlsl::luma_meter::MeteringWindow window; + luma_meter::MeteringWindow window; float32_t2 lumaMinMax; float32_t sampleCount; uint32_t2 viewportSize; From cb46d82fe03e6b4f41f2dbddb45c6d9056bfa5ad Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Fri, 27 Sep 2024 12:19:02 +0100 Subject: [PATCH 41/50] Wait for correct semaphore value --- 26_Autoexposure/main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/26_Autoexposure/main.cpp b/26_Autoexposure/main.cpp index 6e61573d1..fdce953bd 100644 --- a/26_Autoexposure/main.cpp +++ b/26_Autoexposure/main.cpp @@ -821,7 +821,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public const ISemaphore::SWaitInfo cmdbufDonePending[] = { { .semaphore = m_presentSemaphore.get(), - .value = m_submitIx + .value = m_submitIx + 1 } }; if (m_device->blockForSemaphores(cmdbufDonePending) != ISemaphore::WAIT_RESULT::SUCCESS) From 1996cf33d03308c43ec503a8ca298c40c5386fe1 Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Sun, 29 Sep 2024 18:17:39 +0100 Subject: [PATCH 42/50] Remove unnecessary data members --- 26_Autoexposure/main.cpp | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/26_Autoexposure/main.cpp b/26_Autoexposure/main.cpp index fdce953bd..77d2c8e62 100644 --- a/26_Autoexposure/main.cpp +++ b/26_Autoexposure/main.cpp @@ -455,17 +455,17 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public } if (imageParams.type == IGPUImage::ET_3D) imageParams.flags |= IGPUImage::ECF_2D_ARRAY_COMPATIBLE_BIT; - m_gpuImg = m_device->createImage(std::move(imageParams)); - if (!m_gpuImg || !m_device->allocate(m_gpuImg->getMemoryReqs(), m_gpuImg.get()).isValid()) + auto gpuImg = m_device->createImage(std::move(imageParams)); + if (!gpuImg || !m_device->allocate(gpuImg->getMemoryReqs(), gpuImg.get()).isValid()) return false; - m_gpuImg->setObjectDebugName("Autoexposure Image"); + gpuImg->setObjectDebugName("Autoexposure Image"); - imageParams = m_gpuImg->getCreationParameters(); + imageParams = gpuImg->getCreationParameters(); imageParams.usage = IGPUImage::EUF_SAMPLED_BIT | IGPUImage::EUF_STORAGE_BIT; - m_tonemappedImg = m_device->createImage(std::move(imageParams)); - if (!m_tonemappedImg || !m_device->allocate(m_tonemappedImg->getMemoryReqs(), m_tonemappedImg.get()).isValid()) + auto tonemappedImg = m_device->createImage(std::move(imageParams)); + if (!tonemappedImg || !m_device->allocate(tonemappedImg->getMemoryReqs(), tonemappedImg.get()).isValid()) return false; - m_tonemappedImg->setObjectDebugName("Tonemapped Image"); + tonemappedImg->setObjectDebugName("Tonemapped Image"); // Now show the window m_winMgr->show(m_window.get()); @@ -499,14 +499,14 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public .dep = transferBarriers[0] // no ownership transfers }, - .image = m_gpuImg.get(), + .image = gpuImg.get(), // transition the whole view .subresourceRange = cpuImgParams.subresourceRange, // a wiping transition .newLayout = IGPUImage::LAYOUT::TRANSFER_DST_OPTIMAL }, { - .image = m_tonemappedImg.get(), + .image = tonemappedImg.get(), .subresourceRange = cpuImgParams.subresourceRange, .newLayout = IGPUImage::LAYOUT::GENERAL } @@ -517,7 +517,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public .dep = transferBarriers[1] // no ownership transfers }, - .image = m_gpuImg.get(), + .image = gpuImg.get(), // transition the whole view .subresourceRange = cpuImgParams.subresourceRange, // a wiping transition @@ -531,7 +531,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public m_intendedSubmit, cpuImgParams.image->getBuffer(), cpuImgParams.image->getCreationParameters().format, - m_gpuImg.get(), + gpuImg.get(), IGPUImage::LAYOUT::TRANSFER_DST_OPTIMAL, cpuImgParams.image->getRegions() ); @@ -539,14 +539,14 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public m_utils->autoSubmit(m_intendedSubmit, [&](SIntendedSubmitInfo& nextSubmit) -> bool { return true; }); IGPUImageView::SCreationParams gpuImgViewParams = { - .image = m_gpuImg, + .image = gpuImg, .viewType = IGPUImageView::ET_2D, - .format = m_gpuImg->getCreationParameters().format, + .format = gpuImg->getCreationParameters().format, }; IGPUImageView::SCreationParams tonemappedImgViewParams = { - .image = m_tonemappedImg, + .image = tonemappedImg, .viewType = IGPUImageView::ET_2D, - .format = m_tonemappedImg->getCreationParameters().format + .format = tonemappedImg->getCreationParameters().format }; m_gpuImgView = m_device->createImageView(std::move(gpuImgViewParams)); @@ -598,7 +598,8 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public { const uint32_t SubgroupSize = m_physicalDevice->getLimits().maxSubgroupSize; - uint32_t2 viewportSize = { m_gpuImg->getCreationParameters().extent.width, m_gpuImg->getCreationParameters().extent.height }; + auto gpuImgExtent = m_gpuImgView->getCreationParameters().image->getCreationParameters().extent; + uint32_t2 viewportSize = { gpuImgExtent.width, gpuImgExtent.height }; float32_t sampleCount = (viewportSize.x * viewportSize.y) / 4; uint32_t workgroupSize = SubgroupSize * SubgroupSize; sampleCount = workgroupSize * (1 + (sampleCount - 1) / workgroupSize); @@ -849,7 +850,6 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public protected: nbl::video::IDeviceMemoryAllocator::SAllocation m_gatherAllocation; uint64_t m_gatherBDA; - smart_refctd_ptr m_gpuImg, m_tonemappedImg; smart_refctd_ptr m_gpuImgView, m_tonemappedImgView; // for image uploads From bc11b4a249881acab17bacb2918cd78f5c08ba58 Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Sat, 14 Dec 2024 17:55:20 +0000 Subject: [PATCH 43/50] Use asset converter for images and descriptors --- ...ather.comp.hlsl => luma_tonemap.comp.hlsl} | 0 26_Autoexposure/main.cpp | 790 +++++++++--------- 2 files changed, 401 insertions(+), 389 deletions(-) rename 26_Autoexposure/app_resources/{luma_gather.comp.hlsl => luma_tonemap.comp.hlsl} (100%) diff --git a/26_Autoexposure/app_resources/luma_gather.comp.hlsl b/26_Autoexposure/app_resources/luma_tonemap.comp.hlsl similarity index 100% rename from 26_Autoexposure/app_resources/luma_gather.comp.hlsl rename to 26_Autoexposure/app_resources/luma_tonemap.comp.hlsl diff --git a/26_Autoexposure/main.cpp b/26_Autoexposure/main.cpp index 77d2c8e62..f6d690a00 100644 --- a/26_Autoexposure/main.cpp +++ b/26_Autoexposure/main.cpp @@ -2,7 +2,7 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h #include "nbl/application_templates/MonoAssetManagerAndBuiltinResourceApplication.hpp" -#include "../common/SimpleWindowedApplication.hpp" +#include "SimpleWindowedApplication.hpp" #include "nbl/video/surface/CSurfaceVulkan.h" #include "nbl/asset/interchange/IAssetLoader.h" @@ -24,7 +24,12 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public using asset_base_t = application_templates::MonoAssetManagerAndBuiltinResourceApplication; using clock_t = std::chrono::steady_clock; - constexpr static inline std::string_view DefaultImagePathsFile = "../../media/noises/spp_benchmark_4k_512.exr"; + static inline std::string DefaultImagePathsFile = "../../media/noises/spp_benchmark_4k_512.exr"; + static inline std::array ShaderPaths = { + "app_resources/luma_meter.comp.hlsl", + "app_resources/luma_tonemap.comp.hlsl" , + "app_resources/present.frag.hlsl" + }; constexpr static inline uint32_t2 Dimensions = { 1280, 720 }; constexpr static inline float32_t2 MeteringWindowScale = { 0.5f, 0.5f }; constexpr static inline float32_t2 MeteringWindowOffset = { 0.25f, 0.25f }; @@ -70,124 +75,29 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public if (!asset_base_t::onAppInitialized(std::move(system))) return false; - /* - * We'll be using a combined image sampler for this example, which lets us assign both a sampled image and a sampler to the same binding. - * In this example we provide a sampler at descriptor set creation time, via the SBinding struct below. This specifies that the sampler for this binding is immutable, - * as evidenced by the name of the field in the SBinding. - * Samplers for combined image samplers can also be mutable, which for a binding of a descriptor set is specified also at creation time by leaving the immutableSamplers - * field set to its default (nullptr). - */ - std::array, 3> dsLayouts; - { - auto defaultSampler = m_device->createSampler( - { - .AnisotropicFilter = 0 - } - ); - - const IGPUDescriptorSetLayout::SBinding imgBindings[3][1] = { - { - { - .binding = 0, - .type = IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, - .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, - .count = 1, - .immutableSamplers = &defaultSampler - } - }, - { - { - .binding = 0, - .type = IDescriptor::E_TYPE::ET_STORAGE_IMAGE, - .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, - .count = 1, - .immutableSamplers = nullptr - } - }, - { - { - .binding = 0, - .type = IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, - .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = IShader::E_SHADER_STAGE::ESS_FRAGMENT, - .count = 1, - .immutableSamplers = &defaultSampler - } - } - }; - - bool dsLayoutCreation = true; - for (uint32_t index = 0; index < dsLayouts.size(); index++) { - dsLayouts[index] = m_device->createDescriptorSetLayout(imgBindings[index]); - dsLayoutCreation = dsLayoutCreation && dsLayouts[index]; - } - - if (!dsLayoutCreation) - return logFail("Failed to Create Descriptor Layouts"); - } - // Create semaphores m_meterSemaphore = m_device->createSemaphore(m_submitIx); - m_gatherSemaphore = m_device->createSemaphore(m_submitIx); + m_tonemapSemaphore = m_device->createSemaphore(m_submitIx); m_presentSemaphore = m_device->createSemaphore(m_submitIx); - // create the descriptor sets and with enough room + // Create command pool and buffers { - std::array, 3> dsPools; - bool dsPoolCreation = true; - { - const video::IGPUDescriptorSetLayout* const layouts[] = { dsLayouts[0].get() }; - const uint32_t setCounts[] = { 1u }; - dsPools[0] = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_NONE, layouts, setCounts); - dsPoolCreation = dsPoolCreation && dsPools[0]; - } - { - const video::IGPUDescriptorSetLayout* const layouts[] = { dsLayouts[1].get() }; - const uint32_t setCounts[] = { 1u }; - dsPools[1] = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_NONE, layouts, setCounts); - dsPoolCreation = dsPoolCreation && dsPools[1]; - } - { - const video::IGPUDescriptorSetLayout* const layouts[] = { dsLayouts[2].get() }; - const uint32_t setCounts[] = { 1u }; - dsPools[2] = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_NONE, layouts, setCounts); - dsPoolCreation = dsPoolCreation && dsPools[2]; - } + auto gQueue = getGraphicsQueue(); + m_cmdPool = m_device->createCommandPool(gQueue->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); + if (!m_cmdPool) + return logFail("Couldn't create Command Pool!"); - if (!dsPoolCreation) - return logFail("Failed to Create Descriptor Pools"); - - bool dsCreation = true; - { - m_ds[0] = dsPools[0]->createDescriptorSet(dsLayouts[0]); - dsCreation = dsCreation && m_ds[0]; - } - { - m_ds[1] = dsPools[1]->createDescriptorSet(dsLayouts[1]); - dsCreation = dsCreation && m_ds[1]; - } - { - m_ds[2] = dsPools[2]->createDescriptorSet(dsLayouts[2]); - dsCreation = dsCreation && m_ds[2]; - } - - if (!dsCreation) - return logFail("Could not create Descriptor Sets!"); + if (!m_cmdPool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { m_cmdBufs.data(), 1 })) + return logFail("Couldn't create Command Buffer!"); } - auto graphicsQueue = getGraphicsQueue(); - auto computeQueue = getComputeQueue(); - - // Gather swapchain resources - std::unique_ptr scResources; - ISwapchain::SCreationParams swapchainParams; + // Create renderpass and init surface + nbl::video::IGPURenderpass* renderpass; { - swapchainParams = { .surface = smart_refctd_ptr(m_surface->getSurface()) }; - // Need to choose a surface format + ISwapchain::SCreationParams swapchainParams = { .surface = smart_refctd_ptr(m_surface->getSurface()) }; if (!swapchainParams.deduceFormat(m_physicalDevice)) return logFail("Could not choose a Surface Format for the Swapchain!"); + // We actually need external dependencies to ensure ordering of the Implicit Layout Transitions relative to the semaphore signals constexpr IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = { // wipe-transition to ATTACHMENT_OPTIMAL @@ -202,7 +112,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public // because we clear and don't blend .dstAccessMask = asset::ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT } - // leave view offsets and flags default + // leave view offsets and flags default }, // ATTACHMENT_OPTIMAL to PRESENT_SRC { @@ -213,31 +123,154 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public .srcAccessMask = asset::ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT // we can have NONE as the Destinations because the spec says so about presents } - // leave view offsets and flags default + // leave view offsets and flags default }, IGPURenderpass::SCreationParams::DependenciesEnd }; - scResources = std::make_unique(m_device.get(), swapchainParams.surfaceFormat.format, dependencies); - if (!scResources->getRenderpass()) + + auto scResources = std::make_unique(m_device.get(), swapchainParams.surfaceFormat.format, dependencies); + + renderpass = scResources->getRenderpass(); + + if (!renderpass) return logFail("Failed to create Renderpass!"); + + auto gQueue = getGraphicsQueue(); + if (!m_surface || !m_surface->init(gQueue, std::move(scResources), swapchainParams.sharedParams)) + return logFail("Could not create Window & Surface or initialize the Surface!"); } - // Load the shaders and create the pipelines + // Create descriptors and pipelines { - auto loadCompileAndCreateShader = [&](const std::string& relPath) -> smart_refctd_ptr - { + auto convertDSLayoutCPU2GPU = [&](std::span cpuLayouts) { + auto converter = CAssetConverter::create({ .device = m_device.get() }); + CAssetConverter::SInputs inputs = {}; + inputs.readCache = converter.get(); + inputs.logger = m_logger.get(); + CAssetConverter::SConvertParams params = {}; + params.utilities = m_utils.get(); + + std::get>(inputs.assets) = cpuLayouts; + // don't need to assert that we don't need to provide patches since layouts are not patchable + //assert(true); + auto reservation = converter->reserve(inputs); + // the `.value` is just a funny way to make the `smart_refctd_ptr` copyable + auto gpuLayouts = reservation.getGPUObjects(); + std::vector> result; + result.reserve(cpuLayouts.size()); + + for (auto& gpuLayout : gpuLayouts) { + auto layout = gpuLayout.value; + if (!layout) { + m_logger->log("Failed to convert %s into an IGPUDescriptorSetLayout handle", ILogger::ELL_ERROR); + std::exit(-1); + } + result.push_back(layout); + } + + return result; + }; + auto convertDSCPU2GPU = [&](std::span cpuDS) { + auto converter = CAssetConverter::create({ .device = m_device.get() }); + CAssetConverter::SInputs inputs = {}; + inputs.readCache = converter.get(); + inputs.logger = m_logger.get(); + CAssetConverter::SConvertParams params = {}; + params.utilities = m_utils.get(); + + std::get>(inputs.assets) = cpuDS; + // don't need to assert that we don't need to provide patches since layouts are not patchable + //assert(true); + auto reservation = converter->reserve(inputs); + // the `.value` is just a funny way to make the `smart_refctd_ptr` copyable + auto gpuDS = reservation.getGPUObjects(); + std::vector> result; + result.reserve(cpuDS.size()); + + for (auto& ds : gpuDS) { + if (!ds.value) { + m_logger->log("Failed to convert %s into an IGPUDescriptorSet handle", ILogger::ELL_ERROR); + std::exit(-1); + } + result.push_back(ds.value); + } + + return result; + }; + + ISampler::SParams samplerParams = { + .AnisotropicFilter = 0 + }; + auto defaultSampler = make_smart_refctd_ptr(samplerParams); + + std::array meterBindings = {}; + std::array tonemapBindings = {}; + std::array presentBindings = {}; + + meterBindings[0] = { + .binding = 0u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, + .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u, + .immutableSamplers = &defaultSampler + }; + tonemapBindings[0] = { + .binding = 0u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, + .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u, + .immutableSamplers = nullptr + }; + presentBindings[0] = { + .binding = 0u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, + .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_FRAGMENT, + .count = 1u, + .immutableSamplers = &defaultSampler + }; + + auto cpuMeterLayout = make_smart_refctd_ptr(meterBindings); + auto cpuTonemapLayout = make_smart_refctd_ptr(tonemapBindings); + auto cpuPresentLayout = make_smart_refctd_ptr(presentBindings); + + std::array cpuLayouts = { + cpuMeterLayout.get(), + cpuTonemapLayout.get(), + cpuPresentLayout.get() + }; + + auto gpuLayouts = convertDSLayoutCPU2GPU(cpuLayouts); + + auto cpuMeterDS = make_smart_refctd_ptr(std::move(cpuMeterLayout)); + auto cpuTonemapDS = make_smart_refctd_ptr(std::move(cpuTonemapLayout)); + auto cpuPresentDS = make_smart_refctd_ptr(std::move(cpuPresentLayout)); + + std::array cpuDS = { + cpuMeterDS.get(), + cpuTonemapDS.get(), + cpuPresentDS.get() + }; + + auto gpuDS = convertDSCPU2GPU(cpuDS); + m_meterDS = gpuDS[0]; + m_tonemapDS = gpuDS[1]; + m_presentDS = gpuDS[2]; + + // Create Shaders + auto loadAndCompileShader = [&](std::string pathToShader) { IAssetLoader::SAssetLoadParams lp = {}; - lp.logger = m_logger.get(); - lp.workingDirectory = ""; // virtual root - auto assetBundle = m_assetMgr->getAsset(relPath, lp); + auto assetBundle = m_assetMgr->getAsset(pathToShader, lp); const auto assets = assetBundle.getContents(); if (assets.empty()) - return nullptr; + { + m_logger->log("Could not load shader: ", ILogger::ELL_ERROR, pathToShader); + std::exit(-1); + } - // lets go straight from ICPUSpecializedShader to IGPUSpecializedShader auto source = IAsset::castDown(assets[0]); - if (!source) - return nullptr; const uint32_t workgroupSize = m_physicalDevice->getLimits().maxComputeWorkGroupInvocations; const uint32_t subgroupSize = m_physicalDevice->getLimits().maxSubgroupSize; auto overriddenSource = CHLSLCompiler::createOverridenCopy( @@ -246,125 +279,89 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public workgroupSize, subgroupSize ); + // The down-cast should not fail! + assert(overriddenSource); - return m_device->createShader(overriddenSource.get()); - }; - - auto createComputePipeline = [&](smart_refctd_ptr& shader, smart_refctd_ptr& pipeline, smart_refctd_ptr pipelineLayout) -> bool - { + // this time we skip the use of the asset converter since the ICPUShader->IGPUShader path is quick and simple + auto shader = m_device->createShader(overriddenSource.get()); + if (!shader) { - IGPUComputePipeline::SCreationParams params = {}; - params.layout = pipelineLayout.get(); - params.shader.shader = shader.get(); - params.shader.entryPoint = "main"; - params.shader.entries = nullptr; - params.shader.requireFullSubgroups = true; - params.shader.requiredSubgroupSize = static_cast(5); - if (!m_device->createComputePipelines(nullptr, { ¶ms,1 }, &pipeline)) - return logFail("Failed to create compute pipeline!\n"); + m_logger->log("Shader creationed failed: %s!", ILogger::ELL_ERROR, pathToShader); + std::exit(-1); } - return true; + return shader; }; - const nbl::asset::SPushConstantRange pcRange = { - .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, - .offset = 0, - .size = sizeof(AutoexposurePushData) - }; + // Create compute pipelines + { + std::array params; + std::array, 2> shaders; + std::array, 2> pipelineLayouts; + std::array, 2> pipelines; + for (int index = 0; index < 2; index++) { + shaders[index] = loadAndCompileShader(ShaderPaths[index]); + const nbl::asset::SPushConstantRange pcRange = { + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .offset = 0, + .size = sizeof(AutoexposurePushData) + }; + pipelineLayouts[index] = m_device->createPipelineLayout( + { &pcRange, 1 }, + nullptr, + nullptr, + smart_refctd_ptr(gpuLayouts[index]), + nullptr + ); + if (!pipelineLayouts[index]) { + return logFail("Failed to create pipeline layout"); + } - // Luma Meter - auto meterShader = loadCompileAndCreateShader("app_resources/luma_meter.comp.hlsl"); - if (!meterShader) - return logFail("Failed to Load and Compile Compute Shader: meterShader!"); - auto meterLayout = m_device->createPipelineLayout( - { &pcRange, 1 }, - core::smart_refctd_ptr(dsLayouts[0]), - nullptr, - nullptr, - nullptr - ); - if (!createComputePipeline(meterShader, m_meterPipeline, meterLayout)) - return logFail("Could not create Luma Meter Pipeline!"); - - // Luma Gather - auto gatherShader = loadCompileAndCreateShader("app_resources/luma_gather.comp.hlsl"); - if (!gatherShader) - return logFail("Failed to Load and Compile Compute Shader: gatherShader!"); - auto gatherLayout = m_device->createPipelineLayout( - { &pcRange, 1 }, - core::smart_refctd_ptr(dsLayouts[0]), - nullptr, - nullptr, - core::smart_refctd_ptr(dsLayouts[1]) - ); - if (!createComputePipeline(gatherShader, m_gatherPipeline, gatherLayout)) - return logFail("Could not create Luma Gather Pipeline!"); - - // Load FSTri Shader - ext::FullScreenTriangle::ProtoPipeline fsTriProtoPPln(m_assetMgr.get(), m_device.get(), m_logger.get()); - if (!fsTriProtoPPln) - return logFail("Failed to create Full Screen Triangle protopipeline or load its vertex shader!"); - - // Load Fragment Shader - auto fragmentShader = loadCompileAndCreateShader("app_resources/present.frag.hlsl");; - if (!fragmentShader) - return logFail("Failed to Load and Compile Fragment Shader: lumaMeterShader!"); - - const IGPUShader::SSpecInfo fragSpec = { - .entryPoint = "main", - .shader = fragmentShader.get() - }; - auto presentLayout = m_device->createPipelineLayout( - { &pcRange, 1 }, - nullptr, - nullptr, - nullptr, - core::smart_refctd_ptr(dsLayouts[2]) - ); - m_presentPipeline = fsTriProtoPPln.createPipeline(fragSpec, presentLayout.get(), scResources->getRenderpass()); - if (!m_presentPipeline) - return logFail("Could not create Graphics Pipeline!"); - } + params[index] = {}; + params[index].layout = pipelineLayouts[index].get(); + params[index].shader.shader = shaders[index].get(); + params[index].shader.entryPoint = "main"; + params[index].shader.entries = nullptr; + params[index].shader.requireFullSubgroups = true; + params[index].shader.requiredSubgroupSize = static_cast(5); + } + + if (!m_device->createComputePipelines(nullptr, params, pipelines.data())) { + return logFail("Failed to create compute pipeline!\n"); + } - // Init the surface and create the swapchain - if (!m_surface || !m_surface->init(graphicsQueue, std::move(scResources), swapchainParams.sharedParams)) - return logFail("Could not create Window & Surface or initialize the Surface!"); + m_meterPipeline = std::move(pipelines[0]); + m_tonemapPipeline = std::move(pipelines[1]); + } - // need resetttable commandbuffers for the upload utility - { - m_graphicsCmdPool = m_device->createCommandPool(graphicsQueue->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); - m_computeCmdPool = m_device->createCommandPool(computeQueue->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); - - // create the commandbuffers - if (!m_graphicsCmdPool || !m_computeCmdPool) - return logFail("Couldn't create Command Pools!"); - - if ( - !m_graphicsCmdPool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { m_graphicsCmdBufs.data(), 1 }) || - !m_computeCmdPool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { m_computeCmdBufs.data(), 2 }) - ) - return logFail("Couldn't create Command Buffers!"); - } + // Create graphics pipeline + { + auto scRes = static_cast(m_surface->getSwapchainResources()); + ext::FullScreenTriangle::ProtoPipeline fsTriProtoPPln(m_assetMgr.get(), m_device.get(), m_logger.get()); + if (!fsTriProtoPPln) + return logFail("Failed to create Full Screen Triangle protopipeline or load its vertex shader!"); + + // Load Fragment Shader + auto fragmentShader = loadAndCompileShader(ShaderPaths[2]); + if (!fragmentShader) + return logFail("Failed to Load and Compile Fragment Shader: lumaMeterShader!"); + + const IGPUShader::SSpecInfo fragSpec = { + .entryPoint = "main", + .shader = fragmentShader.get() + }; - // things for IUtilities - { - m_scratchSemaphore = m_device->createSemaphore(0); - if (!m_scratchSemaphore) - return logFail("Could not create Scratch Semaphore"); - m_scratchSemaphore->setObjectDebugName("Scratch Semaphore"); - // we don't want to overcomplicate the example with multi-queue - m_intendedSubmit.queue = graphicsQueue; - // wait for nothing before upload - m_intendedSubmit.waitSemaphores = {}; - m_intendedSubmit.waitSemaphores = {}; - // fill later - m_intendedSubmit.commandBuffers = {}; - m_intendedSubmit.scratchSemaphore = { - .semaphore = m_scratchSemaphore.get(), - .value = 0, - .stageMask = PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS - }; + auto presentLayout = m_device->createPipelineLayout( + {}, + nullptr, + nullptr, + std::move(gpuLayouts[2]), + nullptr + ); + m_presentPipeline = fsTriProtoPPln.createPipeline(fragSpec, presentLayout.get(), scRes->getRenderpass()); + if (!m_presentPipeline) + return logFail("Could not create Graphics Pipeline!"); + } } // Allocate and create buffer for Luma Gather @@ -397,6 +394,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public return logFail("Failed to allocate Device Memory compatible with our GPU Buffer!\n"); assert(allocation->memory.get() == buffer->getBoundMemory().memory); + return true; }; build_buffer(m_device, &m_gatherAllocation, buffer, m_physicalDevice->getLimits().maxSubgroupSize, "Luma Gather Buffer"); @@ -408,176 +406,188 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public return logFail("Failed to map the Device Memory!\n"); } - // Allocate and Leave 1/4 for image uploads, to test image copy with small memory remaining - { - uint32_t localOffset = video::StreamingTransientDataBufferMT<>::invalid_value; - uint32_t maxFreeBlock = m_utils->getDefaultUpStreamingBuffer()->max_size(); - const uint32_t allocationAlignment = 64u; - const uint32_t allocationSize = (maxFreeBlock / 4) * 3; - m_utils->getDefaultUpStreamingBuffer()->multi_allocate(std::chrono::steady_clock::now() + std::chrono::microseconds(500u), 1u, &localOffset, &allocationSize, &allocationAlignment); - } - // Load exr file into gpu + smart_refctd_ptr gpuImg; { - IAssetLoader::SAssetLoadParams params; - auto imageBundle = m_assetMgr->getAsset(DefaultImagePathsFile.data(), params); - auto cpuImg = IAsset::castDown(imageBundle.getContents().begin()[0]); - auto format = cpuImg->getCreationParameters().format; - - ICPUImageView::SCreationParams viewParams = { - .flags = ICPUImageView::E_CREATE_FLAGS::ECF_NONE, - .image = std::move(cpuImg), - .viewType = IImageView::E_TYPE::ET_2D, - .format = format, - .subresourceRange = { - .aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, - .baseMipLevel = 0u, - .levelCount = ICPUImageView::remaining_mip_levels, - .baseArrayLayer = 0u, - .layerCount = ICPUImageView::remaining_array_layers - } - }; + auto convertImgCPU2GPU = [&](ICPUImage* cpuImg) { + auto queue = getGraphicsQueue(); + auto cmdbuf = m_cmdBufs[0].get(); + cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); + std::array commandBufferInfo = { cmdbuf }; + core::smart_refctd_ptr imgFillSemaphore = m_device->createSemaphore(0); + imgFillSemaphore->setObjectDebugName("Image Fill Semaphore"); + + auto converter = CAssetConverter::create({ .device = m_device.get() }); + // We don't want to generate mip-maps for these images, to ensure that we must override the default callbacks. + struct SInputs final : CAssetConverter::SInputs + { + // we also need to override this to have concurrent sharing + inline std::span getSharedOwnershipQueueFamilies(const size_t groupCopyID, const asset::ICPUImage* buffer, const CAssetConverter::patch_t& patch) const override + { + if (familyIndices.size() > 1) + return familyIndices; + return {}; + } - const auto cpuImgView = ICPUImageView::create(std::move(viewParams)); - const auto& cpuImgParams = cpuImgView->getCreationParameters(); + inline uint8_t getMipLevelCount(const size_t groupCopyID, const ICPUImage* image, const CAssetConverter::patch_t& patch) const override + { + return image->getCreationParameters().mipLevels; + } + inline uint16_t needToRecomputeMips(const size_t groupCopyID, const ICPUImage* image, const CAssetConverter::patch_t& patch) const override + { + return 0b0u; + } - // create matching size image upto dimensions - IGPUImage::SCreationParams imageParams = {}; - imageParams = cpuImgParams.image->getCreationParameters(); - imageParams.usage |= IGPUImage::EUF_TRANSFER_DST_BIT | IGPUImage::EUF_SAMPLED_BIT | IGPUImage::E_USAGE_FLAGS::EUF_TRANSFER_SRC_BIT; - // promote format because RGB8 and friends don't actually exist in HW - { - const IPhysicalDevice::SImageFormatPromotionRequest request = { - .originalFormat = imageParams.format, - .usages = IPhysicalDevice::SFormatImageUsages::SUsage(imageParams.usage) + std::vector familyIndices; + } inputs = {}; + inputs.readCache = converter.get(); + inputs.logger = m_logger.get(); + { + const core::set uniqueFamilyIndices = { queue->getFamilyIndex(), queue->getFamilyIndex() }; + inputs.familyIndices = { uniqueFamilyIndices.begin(),uniqueFamilyIndices.end() }; + } + // scratch command buffers for asset converter transfer commands + SIntendedSubmitInfo transfer = { + .queue = queue, + .waitSemaphores = {}, + .prevCommandBuffers = {}, + .scratchCommandBuffers = commandBufferInfo, + .scratchSemaphore = { + .semaphore = imgFillSemaphore.get(), + .value = 0, + // because of layout transitions + .stageMask = PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS + } }; - imageParams.format = m_physicalDevice->promoteImageFormat(request, imageParams.tiling); - } - if (imageParams.type == IGPUImage::ET_3D) - imageParams.flags |= IGPUImage::ECF_2D_ARRAY_COMPATIBLE_BIT; - auto gpuImg = m_device->createImage(std::move(imageParams)); - if (!gpuImg || !m_device->allocate(gpuImg->getMemoryReqs(), gpuImg.get()).isValid()) - return false; - gpuImg->setObjectDebugName("Autoexposure Image"); - - imageParams = gpuImg->getCreationParameters(); - imageParams.usage = IGPUImage::EUF_SAMPLED_BIT | IGPUImage::EUF_STORAGE_BIT; - auto tonemappedImg = m_device->createImage(std::move(imageParams)); - if (!tonemappedImg || !m_device->allocate(tonemappedImg->getMemoryReqs(), tonemappedImg.get()).isValid()) - return false; - tonemappedImg->setObjectDebugName("Tonemapped Image"); - - // Now show the window - m_winMgr->show(m_window.get()); - - // we don't want to overcomplicate the example with multi-queue - auto queue = getGraphicsQueue(); - auto cmdbuf = m_graphicsCmdBufs[0].get(); - IQueue::SSubmitInfo::SCommandBufferInfo cmdbufInfo = { cmdbuf }; - m_intendedSubmit.commandBuffers = { &cmdbufInfo, 1 }; + // as per the `SIntendedSubmitInfo` one commandbuffer must be begun + cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + // Normally we'd have to inherit and override the `getFinalOwnerQueueFamily` callback to ensure that the + // compute queue becomes the owner of the buffers and images post-transfer, but in this example we use concurrent sharing + CAssetConverter::SConvertParams params = {}; + params.transfer = &transfer; + params.utilities = m_utils.get(); + + std::get>(inputs.assets) = { &cpuImg, 1 }; + // assert that we don't need to provide patches + assert(cpuImg->getImageUsageFlags().hasFlags(ICPUImage::E_USAGE_FLAGS::EUF_SAMPLED_BIT)); + auto reservation = converter->reserve(inputs); + // the `.value` is just a funny way to make the `smart_refctd_ptr` copyable + auto gpuImgs = reservation.getGPUObjects(); + for (auto& gpuImg : gpuImgs) { + if (!gpuImg) { + m_logger->log("Failed to convert %s into an IGPUImage handle", ILogger::ELL_ERROR, DefaultImagePathsFile); + std::exit(-1); + } + } - // there's no previous operation to wait for - const SMemoryBarrier transferBarriers[] = { - { - .dstStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT, - .dstAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT - }, - { - .srcStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT, - .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, + // and launch the conversions + m_api->startCapture(); + auto result = reservation.convert(params); + m_api->endCapture(); + if (!result.blocking() && result.copy() != IQueue::RESULT::SUCCESS) { + m_logger->log("Failed to record or submit conversions", ILogger::ELL_ERROR); + std::exit(-1); } + + return gpuImgs[0].value; }; - // upload image and write to descriptor set - queue->startCapture(); + smart_refctd_ptr cpuImg; + { + IAssetLoader::SAssetLoadParams lp; + SAssetBundle bundle = m_assetMgr->getAsset(DefaultImagePathsFile, lp); + if (bundle.getContents().empty()) { + m_logger->log("Couldn't load an asset.", ILogger::ELL_ERROR); + std::exit(-1); + } - cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); - // change the layout of the image - const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers1[] = { - { - .barrier = { - .dep = transferBarriers[0] - // no ownership transfers - }, - .image = gpuImg.get(), - // transition the whole view - .subresourceRange = cpuImgParams.subresourceRange, - // a wiping transition - .newLayout = IGPUImage::LAYOUT::TRANSFER_DST_OPTIMAL - }, - { - .image = tonemappedImg.get(), - .subresourceRange = cpuImgParams.subresourceRange, - .newLayout = IGPUImage::LAYOUT::GENERAL + cpuImg = IAsset::castDown(bundle.getContents()[0]); + if (!cpuImg) { + m_logger->log("Couldn't load an asset.", ILogger::ELL_ERROR); + std::exit(-1); } }; - const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers2[] = { + + gpuImg = convertImgCPU2GPU(cpuImg.get()); + } + + // create views for textures + { + auto createHDRIImage = [this](const asset::E_FORMAT colorFormat, const uint32_t width, const uint32_t height) -> smart_refctd_ptr { + IGPUImage::SCreationParams imgInfo; + imgInfo.format = colorFormat; + imgInfo.type = IGPUImage::ET_2D; + imgInfo.extent.width = width; + imgInfo.extent.height = height; + imgInfo.extent.depth = 1u; + imgInfo.mipLevels = 1u; + imgInfo.arrayLayers = 1u; + imgInfo.samples = IGPUImage::ESCF_1_BIT; + imgInfo.flags = static_cast(0u); + imgInfo.usage = asset::IImage::EUF_STORAGE_BIT | asset::IImage::EUF_TRANSFER_DST_BIT | asset::IImage::EUF_SAMPLED_BIT; + + auto image = m_device->createImage(std::move(imgInfo)); + auto imageMemReqs = image->getMemoryReqs(); + imageMemReqs.memoryTypeBits &= m_device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); + m_device->allocate(imageMemReqs, image.get()); + + return image; + }; + auto createHDRIImageView = [this](smart_refctd_ptr img) -> smart_refctd_ptr { - .barrier = { - .dep = transferBarriers[1] - // no ownership transfers - }, - .image = gpuImg.get(), - // transition the whole view - .subresourceRange = cpuImgParams.subresourceRange, - // a wiping transition - .oldLayout = IGPUImage::LAYOUT::TRANSFER_DST_OPTIMAL, - .newLayout = IGPUImage::LAYOUT::READ_ONLY_OPTIMAL - } - }; - cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers1 }); - // upload contents - m_utils->updateImageViaStagingBuffer( - m_intendedSubmit, - cpuImgParams.image->getBuffer(), - cpuImgParams.image->getCreationParameters().format, - gpuImg.get(), - IGPUImage::LAYOUT::TRANSFER_DST_OPTIMAL, - cpuImgParams.image->getRegions() - ); - cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers2 }); - m_utils->autoSubmit(m_intendedSubmit, [&](SIntendedSubmitInfo& nextSubmit) -> bool { return true; }); - - IGPUImageView::SCreationParams gpuImgViewParams = { - .image = gpuImg, - .viewType = IGPUImageView::ET_2D, - .format = gpuImg->getCreationParameters().format, - }; - IGPUImageView::SCreationParams tonemappedImgViewParams = { - .image = tonemappedImg, - .viewType = IGPUImageView::ET_2D, - .format = tonemappedImg->getCreationParameters().format - }; + auto format = img->getCreationParameters().format; + IGPUImageView::SCreationParams imgViewInfo; + imgViewInfo.image = std::move(img); + imgViewInfo.format = format; + imgViewInfo.viewType = IGPUImageView::ET_2D; + imgViewInfo.flags = static_cast(0u); + imgViewInfo.subresourceRange.aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT; + imgViewInfo.subresourceRange.baseArrayLayer = 0u; + imgViewInfo.subresourceRange.baseMipLevel = 0u; + imgViewInfo.subresourceRange.layerCount = 1u; + imgViewInfo.subresourceRange.levelCount = 1u; + + return m_device->createImageView(std::move(imgViewInfo)); + }; - m_gpuImgView = m_device->createImageView(std::move(gpuImgViewParams)); - m_tonemappedImgView = m_device->createImageView(std::move(tonemappedImgViewParams)); + auto params = gpuImg->getCreationParameters(); + auto extent = params.extent; + gpuImg->setObjectDebugName("GPU Img"); + m_gpuImgView = createHDRIImageView(gpuImg); + m_gpuImgView->setObjectDebugName("GPU Img View"); + auto outImg = createHDRIImage(asset::E_FORMAT::EF_R16G16B16A16_SFLOAT, Dimensions.x, Dimensions.y); + outImg->setObjectDebugName("Tonemapped Image"); + m_tonemappedImgView = createHDRIImageView(outImg); + m_tonemappedImgView->setObjectDebugName("Tonemapped Image View"); + } + // Update Descriptors + { IGPUDescriptorSet::SDescriptorInfo infos[3]; infos[0].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; infos[0].desc = m_gpuImgView; infos[1].info.image.imageLayout = IImage::LAYOUT::GENERAL; infos[1].desc = m_tonemappedImgView; - infos[2].info.image.imageLayout = IImage::LAYOUT::GENERAL; + infos[2].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; infos[2].desc = m_tonemappedImgView; - IGPUDescriptorSet::SWriteDescriptorSet writeDescriptors[] = { { - .dstSet = m_ds[0].get(), + .dstSet = m_meterDS.get(), .binding = 0, .arrayElement = 0, .count = 1, .info = infos }, { - .dstSet = m_ds[1].get(), + .dstSet = m_tonemapDS.get(), .binding = 0, .arrayElement = 0, .count = 1, .info = infos + 1 }, { - .dstSet = m_ds[2].get(), + .dstSet = m_presentDS.get(), .binding = 0, .arrayElement = 0, .count = 1, @@ -586,16 +596,19 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public }; m_device->updateDescriptorSets(3, writeDescriptors, 0, nullptr); - - queue->endCapture(); } + m_winMgr->setWindowSize(m_window.get(), Dimensions.x, Dimensions.y); + m_surface->recreateSwapchain(); + m_winMgr->show(m_window.get()); + return true; } // We do a very simple thing, display an image and wait `DisplayImageMs` to show it inline void workLoopBody() override { +#if 0 const uint32_t SubgroupSize = m_physicalDevice->getLimits().maxSubgroupSize; auto gpuImgExtent = m_gpuImgView->getCreationParameters().image->getCreationParameters().extent; @@ -625,7 +638,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public 1 + ((viewportSize.y / 2) - 1) / SubgroupSize }; - queue->startCapture(); + m_api->startCapture(); cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); cmdbuf->bindComputePipeline(m_meterPipeline.get()); @@ -652,7 +665,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public submit_infos[0].signalSemaphores = signals; queue->submit(submit_infos); - queue->endCapture(); + m_api->endCapture(); } const ISemaphore::SWaitInfo wait_infos[] = { @@ -686,7 +699,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public 1 + ((viewportSize.y) - 1) / SubgroupSize }; - queue->startCapture(); + m_api->startCapture(); cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); cmdbuf->bindComputePipeline(m_gatherPipeline.get()); @@ -714,7 +727,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public submit_infos[0].signalSemaphores = signals; queue->submit(submit_infos); - queue->endCapture(); + m_api->endCapture(); } const ISemaphore::SWaitInfo wait_infos[] = { @@ -747,7 +760,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public .lumaMeterBDA = m_gatherBDA }; - queue->startCapture(); + m_api->startCapture(); cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); @@ -815,7 +828,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public // Present m_surface->present(acquire.imageIndex, rendered); - queue->endCapture(); + m_api->endCapture(); // Wait for completion { @@ -830,6 +843,8 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public } } +#endif + m_submitIx++; } @@ -848,32 +863,29 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public } protected: - nbl::video::IDeviceMemoryAllocator::SAllocation m_gatherAllocation; - uint64_t m_gatherBDA; - smart_refctd_ptr m_gpuImgView, m_tonemappedImgView; - - // for image uploads - smart_refctd_ptr m_scratchSemaphore; - SIntendedSubmitInfo m_intendedSubmit; + // window + smart_refctd_ptr m_window; + smart_refctd_ptr> m_surface; // Pipelines - smart_refctd_ptr m_meterPipeline, m_gatherPipeline; + smart_refctd_ptr m_meterPipeline, m_tonemapPipeline; smart_refctd_ptr m_presentPipeline; // Descriptor Sets - std::array, 3> m_ds; + smart_refctd_ptr m_meterDS, m_tonemapDS, m_presentDS; // Command Buffers - smart_refctd_ptr m_graphicsCmdPool, m_computeCmdPool; - std::array, 2> m_graphicsCmdBufs, m_computeCmdBufs; + smart_refctd_ptr m_cmdPool; + std::array, 1> m_cmdBufs; // Semaphores - smart_refctd_ptr m_meterSemaphore, m_gatherSemaphore, m_presentSemaphore; + smart_refctd_ptr m_meterSemaphore, m_tonemapSemaphore, m_presentSemaphore; uint64_t m_submitIx = 0; - // window - smart_refctd_ptr m_window; - smart_refctd_ptr> m_surface; + // example resources + nbl::video::IDeviceMemoryAllocator::SAllocation m_gatherAllocation; + uint64_t m_gatherBDA; + smart_refctd_ptr m_gpuImgView, m_tonemappedImgView; }; NBL_MAIN_FUNC(AutoexposureApp) From 3a94cd4abb3448594d7491bbd13f5c1c5fa5335b Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Wed, 15 Jan 2025 05:41:37 +0000 Subject: [PATCH 44/50] Rewrite descriptor set logic --- 26_Autoexposure/main.cpp | 159 ++++++++++++++++++++------------------- 1 file changed, 80 insertions(+), 79 deletions(-) diff --git a/26_Autoexposure/main.cpp b/26_Autoexposure/main.cpp index f6d690a00..90300047d 100644 --- a/26_Autoexposure/main.cpp +++ b/26_Autoexposure/main.cpp @@ -203,11 +203,10 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public }; auto defaultSampler = make_smart_refctd_ptr(samplerParams); - std::array meterBindings = {}; - std::array tonemapBindings = {}; - std::array presentBindings = {}; + std::array imgSamplerbindings = {}; + std::array rwImgbindings = {}; - meterBindings[0] = { + imgSamplerbindings[0] = { .binding = 0u, .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, @@ -215,49 +214,36 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public .count = 1u, .immutableSamplers = &defaultSampler }; - tonemapBindings[0] = { + rwImgbindings[0] = { .binding = 0u, .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE | IShader::E_SHADER_STAGE::ESS_FRAGMENT, .count = 1u, .immutableSamplers = nullptr }; - presentBindings[0] = { - .binding = 0u, - .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, - .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = IShader::E_SHADER_STAGE::ESS_FRAGMENT, - .count = 1u, - .immutableSamplers = &defaultSampler - }; - auto cpuMeterLayout = make_smart_refctd_ptr(meterBindings); - auto cpuTonemapLayout = make_smart_refctd_ptr(tonemapBindings); - auto cpuPresentLayout = make_smart_refctd_ptr(presentBindings); + auto cpuImgSamplerLayout = make_smart_refctd_ptr(imgSamplerbindings); + auto cpuRWImgLayout = make_smart_refctd_ptr(rwImgbindings); - std::array cpuLayouts = { - cpuMeterLayout.get(), - cpuTonemapLayout.get(), - cpuPresentLayout.get() + std::array cpuLayouts = { + cpuImgSamplerLayout.get(), + cpuRWImgLayout.get() }; auto gpuLayouts = convertDSLayoutCPU2GPU(cpuLayouts); - auto cpuMeterDS = make_smart_refctd_ptr(std::move(cpuMeterLayout)); - auto cpuTonemapDS = make_smart_refctd_ptr(std::move(cpuTonemapLayout)); - auto cpuPresentDS = make_smart_refctd_ptr(std::move(cpuPresentLayout)); + auto cpuImgSamplerDS = make_smart_refctd_ptr(std::move(cpuImgSamplerLayout)); + auto cpuRWImgDS = make_smart_refctd_ptr(std::move(cpuRWImgLayout)); - std::array cpuDS = { - cpuMeterDS.get(), - cpuTonemapDS.get(), - cpuPresentDS.get() + std::array cpuDS = { + cpuImgSamplerDS.get(), + cpuRWImgDS.get() }; auto gpuDS = convertDSCPU2GPU(cpuDS); - m_meterDS = gpuDS[0]; - m_tonemapDS = gpuDS[1]; - m_presentDS = gpuDS[2]; + m_imgSamplerDS = gpuDS[0]; + m_rwImgDS = gpuDS[1]; // Create Shaders auto loadAndCompileShader = [&](std::string pathToShader) { @@ -299,31 +285,57 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public std::array, 2> shaders; std::array, 2> pipelineLayouts; std::array, 2> pipelines; - for (int index = 0; index < 2; index++) { - shaders[index] = loadAndCompileShader(ShaderPaths[index]); + { + shaders[0] = loadAndCompileShader(ShaderPaths[0]); const nbl::asset::SPushConstantRange pcRange = { .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, .offset = 0, .size = sizeof(AutoexposurePushData) }; - pipelineLayouts[index] = m_device->createPipelineLayout( + pipelineLayouts[0] = m_device->createPipelineLayout( { &pcRange, 1 }, + smart_refctd_ptr(gpuLayouts[0]), nullptr, nullptr, - smart_refctd_ptr(gpuLayouts[index]), nullptr ); - if (!pipelineLayouts[index]) { + if (!pipelineLayouts[0]) { + return logFail("Failed to create pipeline layout"); + } + + params[0] = {}; + params[0].layout = pipelineLayouts[0].get(); + params[0].shader.shader = shaders[0].get(); + params[0].shader.entryPoint = "main"; + params[0].shader.entries = nullptr; + params[0].shader.requireFullSubgroups = true; + params[0].shader.requiredSubgroupSize = static_cast(5); + } + { + shaders[1] = loadAndCompileShader(ShaderPaths[1]); + const nbl::asset::SPushConstantRange pcRange = { + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .offset = 0, + .size = sizeof(AutoexposurePushData) + }; + pipelineLayouts[1] = m_device->createPipelineLayout( + { &pcRange, 1 }, + smart_refctd_ptr(gpuLayouts[0]), + nullptr, + nullptr, + smart_refctd_ptr(gpuLayouts[1]) + ); + if (!pipelineLayouts[1]) { return logFail("Failed to create pipeline layout"); } - params[index] = {}; - params[index].layout = pipelineLayouts[index].get(); - params[index].shader.shader = shaders[index].get(); - params[index].shader.entryPoint = "main"; - params[index].shader.entries = nullptr; - params[index].shader.requireFullSubgroups = true; - params[index].shader.requiredSubgroupSize = static_cast(5); + params[1] = {}; + params[1].layout = pipelineLayouts[1].get(); + params[1].shader.shader = shaders[1].get(); + params[1].shader.entryPoint = "main"; + params[1].shader.entries = nullptr; + params[1].shader.requireFullSubgroups = true; + params[1].shader.requiredSubgroupSize = static_cast(5); } if (!m_device->createComputePipelines(nullptr, params, pipelines.data())) { @@ -355,8 +367,8 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public {}, nullptr, nullptr, - std::move(gpuLayouts[2]), - nullptr + nullptr, + std::move(gpuLayouts[1]) ); m_presentPipeline = fsTriProtoPPln.createPipeline(fragSpec, presentLayout.get(), scRes->getRenderpass()); if (!m_presentPipeline) @@ -563,39 +575,30 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public // Update Descriptors { - IGPUDescriptorSet::SDescriptorInfo infos[3]; + IGPUDescriptorSet::SDescriptorInfo infos[2]; infos[0].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; infos[0].desc = m_gpuImgView; infos[1].info.image.imageLayout = IImage::LAYOUT::GENERAL; infos[1].desc = m_tonemappedImgView; - infos[2].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; - infos[2].desc = m_tonemappedImgView; IGPUDescriptorSet::SWriteDescriptorSet writeDescriptors[] = { { - .dstSet = m_meterDS.get(), + .dstSet = m_imgSamplerDS.get(), .binding = 0, .arrayElement = 0, .count = 1, .info = infos }, { - .dstSet = m_tonemapDS.get(), + .dstSet = m_rwImgDS.get(), .binding = 0, .arrayElement = 0, .count = 1, .info = infos + 1 - }, - { - .dstSet = m_presentDS.get(), - .binding = 0, - .arrayElement = 0, - .count = 1, - .info = infos + 2 } }; - m_device->updateDescriptorSets(3, writeDescriptors, 0, nullptr); + m_device->updateDescriptorSets(2, writeDescriptors, 0, nullptr); } m_winMgr->setWindowSize(m_window.get(), Dimensions.x, Dimensions.y); @@ -608,9 +611,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public // We do a very simple thing, display an image and wait `DisplayImageMs` to show it inline void workLoopBody() override { -#if 0 const uint32_t SubgroupSize = m_physicalDevice->getLimits().maxSubgroupSize; - auto gpuImgExtent = m_gpuImgView->getCreationParameters().image->getCreationParameters().extent; uint32_t2 viewportSize = { gpuImgExtent.width, gpuImgExtent.height }; float32_t sampleCount = (viewportSize.x * viewportSize.y) / 4; @@ -619,10 +620,10 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public // Luma Meter { - auto queue = getComputeQueue(); - auto cmdbuf = m_computeCmdBufs[0].get(); + auto queue = getGraphicsQueue(); + auto cmdbuf = m_cmdBufs[0].get(); cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); - auto ds = m_ds[0].get(); + auto ds = m_imgSamplerDS.get(); auto pc = AutoexposurePushData { @@ -647,6 +648,8 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public cmdbuf->dispatch(dispatchSize.x, dispatchSize.y); cmdbuf->end(); + m_api->endCapture(); + { IQueue::SSubmitInfo submit_infos[1]; IQueue::SSubmitInfo::SCommandBufferInfo cmdBufs[] = { @@ -665,7 +668,6 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public submit_infos[0].signalSemaphores = signals; queue->submit(submit_infos); - m_api->endCapture(); } const ISemaphore::SWaitInfo wait_infos[] = { @@ -677,13 +679,14 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public m_device->blockForSemaphores(wait_infos); } +#if 0 // Luma Gather and Tonemapping { - auto queue = getComputeQueue(); - auto cmdbuf = m_computeCmdBufs[1].get(); + auto queue = getGraphicsQueue(); + auto cmdbuf = m_cmdBufs[0].get(); cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); - auto ds1 = m_ds[0].get(); - auto ds2 = m_ds[1].get(); + auto ds1 = m_imgSamplerDS.get(); + auto ds2 = m_rwImgDS.get(); auto pc = AutoexposurePushData { @@ -702,10 +705,10 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public m_api->startCapture(); cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); - cmdbuf->bindComputePipeline(m_gatherPipeline.get()); - cmdbuf->bindDescriptorSets(nbl::asset::EPBP_COMPUTE, m_gatherPipeline->getLayout(), 0, 1, &ds1); // also if you created DS Set with 3th index you need to respect it here - firstSet tells you the index of set and count tells you what range from this index it should update, useful if you had 2 DS with lets say set index 2,3, then you can bind both with single call setting firstSet to 2, count to 2 and last argument would be pointet to your DS pointers - cmdbuf->bindDescriptorSets(nbl::asset::EPBP_COMPUTE, m_gatherPipeline->getLayout(), 3, 1, &ds2); - cmdbuf->pushConstants(m_gatherPipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(pc), &pc); + cmdbuf->bindComputePipeline(m_tonemapPipeline.get()); + cmdbuf->bindDescriptorSets(nbl::asset::EPBP_COMPUTE, m_tonemapPipeline->getLayout(), 0, 1, &ds1); // also if you created DS Set with 3th index you need to respect it here - firstSet tells you the index of set and count tells you what range from this index it should update, useful if you had 2 DS with lets say set index 2,3, then you can bind both with single call setting firstSet to 2, count to 2 and last argument would be pointet to your DS pointers + cmdbuf->bindDescriptorSets(nbl::asset::EPBP_COMPUTE, m_tonemapPipeline->getLayout(), 3, 1, &ds2); + cmdbuf->pushConstants(m_tonemapPipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(pc), &pc); cmdbuf->dispatch(dispatchSize.x, dispatchSize.y); cmdbuf->end(); @@ -719,7 +722,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public submit_infos[0].commandBuffers = cmdBufs; IQueue::SSubmitInfo::SSemaphoreInfo signals[] = { { - .semaphore = m_gatherSemaphore.get(), + .semaphore = m_tonemapSemaphore.get(), .value = m_submitIx + 1, .stageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT } @@ -732,7 +735,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public const ISemaphore::SWaitInfo wait_infos[] = { { - .semaphore = m_gatherSemaphore.get(), + .semaphore = m_tonemapSemaphore.get(), .value = m_submitIx + 1 } }; @@ -747,9 +750,9 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public return; auto queue = getGraphicsQueue(); - auto cmdbuf = m_graphicsCmdBufs[0].get(); + auto cmdbuf = m_cmdBufs[0].get(); cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); - auto ds = m_ds[2].get(); + auto ds = m_rwImgDS.get(); auto pc = AutoexposurePushData { @@ -842,9 +845,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public return; } } - #endif - m_submitIx++; } @@ -872,7 +873,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public smart_refctd_ptr m_presentPipeline; // Descriptor Sets - smart_refctd_ptr m_meterDS, m_tonemapDS, m_presentDS; + smart_refctd_ptr m_imgSamplerDS, m_rwImgDS; // Command Buffers smart_refctd_ptr m_cmdPool; From 462e220b2af7237d75184e24e0837517fa8b467a Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Wed, 15 Jan 2025 05:41:58 +0000 Subject: [PATCH 45/50] Replace dot with mul --- 26_Autoexposure/app_resources/luma_meter.comp.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/26_Autoexposure/app_resources/luma_meter.comp.hlsl b/26_Autoexposure/app_resources/luma_meter.comp.hlsl index f936d8d37..b998f33ae 100644 --- a/26_Autoexposure/app_resources/luma_meter.comp.hlsl +++ b/26_Autoexposure/app_resources/luma_meter.comp.hlsl @@ -39,7 +39,7 @@ struct SharedAccessor struct TexAccessor { static float32_t3 toXYZ(float32_t3 srgbColor) { - return dot(colorspace::sRGBtoXYZ[1], srgbColor); + return mul(colorspace::sRGBtoXYZ, srgbColor); } float32_t3 get(float32_t2 uv) { From 9e26a74aa1bcbe5e26ee14a79d4f2ef9e2701e0d Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Wed, 15 Jan 2025 05:42:22 +0000 Subject: [PATCH 46/50] Replace combined image sampler with RWTexture2D --- 26_Autoexposure/app_resources/present.frag.hlsl | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/26_Autoexposure/app_resources/present.frag.hlsl b/26_Autoexposure/app_resources/present.frag.hlsl index b436e248f..aa8febf85 100644 --- a/26_Autoexposure/app_resources/present.frag.hlsl +++ b/26_Autoexposure/app_resources/present.frag.hlsl @@ -11,11 +11,9 @@ using namespace nbl::hlsl; using namespace ext::FullScreenTriangle; -// binding 0 set 1 -[[vk::combinedImageSampler]] [[vk::binding(0, 3)]] Texture2D texture; -[[vk::combinedImageSampler]] [[vk::binding(0, 3)]] SamplerState samplerState; +[[vk::binding(0, 3)]] RWTexture2D texture; [[vk::location(0)]] float32_t4 main(SVertexAttributes vxAttr) : SV_Target0 { - return float32_t4(texture.Sample(samplerState, vxAttr.uv).rgb, 1.0f); + return texture[vxAttr.uv]; } \ No newline at end of file From 208a58a6fbd673fbe307ae12ec16929efb45fdcf Mon Sep 17 00:00:00 2001 From: devsh Date: Tue, 21 Jan 2025 21:33:52 +0100 Subject: [PATCH 47/50] use a single asset converter throughout, always call `convert` to make sure its asset conversion cache is written to --- 26_Autoexposure/main.cpp | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/26_Autoexposure/main.cpp b/26_Autoexposure/main.cpp index 90300047d..4d436a188 100644 --- a/26_Autoexposure/main.cpp +++ b/26_Autoexposure/main.cpp @@ -140,20 +140,27 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public return logFail("Could not create Window & Surface or initialize the Surface!"); } + // One asset converter to make the cache persist + auto converter = CAssetConverter::create({ .device = m_device.get() }); + // Create descriptors and pipelines { - auto convertDSLayoutCPU2GPU = [&](std::span cpuLayouts) { - auto converter = CAssetConverter::create({ .device = m_device.get() }); + // need to hoist + CAssetConverter::SConvertParams params = {}; + params.utilities = m_utils.get(); + + auto convertDSLayoutCPU2GPU = [&](std::span cpuLayouts) + { CAssetConverter::SInputs inputs = {}; inputs.readCache = converter.get(); inputs.logger = m_logger.get(); - CAssetConverter::SConvertParams params = {}; - params.utilities = m_utils.get(); std::get>(inputs.assets) = cpuLayouts; // don't need to assert that we don't need to provide patches since layouts are not patchable //assert(true); auto reservation = converter->reserve(inputs); + // even though it does nothing when none assets refer in any way (direct or indirect) to memory or need any device operations performed, still need to call to write the cache + reservation.convert(params); // the `.value` is just a funny way to make the `smart_refctd_ptr` copyable auto gpuLayouts = reservation.getGPUObjects(); std::vector> result; @@ -170,18 +177,18 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public return result; }; - auto convertDSCPU2GPU = [&](std::span cpuDS) { - auto converter = CAssetConverter::create({ .device = m_device.get() }); + auto convertDSCPU2GPU = [&](std::span cpuDS) + { CAssetConverter::SInputs inputs = {}; inputs.readCache = converter.get(); inputs.logger = m_logger.get(); - CAssetConverter::SConvertParams params = {}; - params.utilities = m_utils.get(); std::get>(inputs.assets) = cpuDS; // don't need to assert that we don't need to provide patches since layouts are not patchable //assert(true); auto reservation = converter->reserve(inputs); + // even though it does nothing when none assets refer in any way (direct or indirect) to memory or need any device operations performed, still need to call to write the cache + reservation.convert(params); // the `.value` is just a funny way to make the `smart_refctd_ptr` copyable auto gpuDS = reservation.getGPUObjects(); std::vector> result; @@ -421,7 +428,8 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public // Load exr file into gpu smart_refctd_ptr gpuImg; { - auto convertImgCPU2GPU = [&](ICPUImage* cpuImg) { + auto convertImgCPU2GPU = [&](ICPUImage* cpuImg) + { auto queue = getGraphicsQueue(); auto cmdbuf = m_cmdBufs[0].get(); cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); @@ -429,7 +437,6 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public core::smart_refctd_ptr imgFillSemaphore = m_device->createSemaphore(0); imgFillSemaphore->setObjectDebugName("Image Fill Semaphore"); - auto converter = CAssetConverter::create({ .device = m_device.get() }); // We don't want to generate mip-maps for these images, to ensure that we must override the default callbacks. struct SInputs final : CAssetConverter::SInputs { From 10b669083fb45a7882c6328435ce4f270c7f70e5 Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Thu, 23 Jan 2025 14:40:44 +0000 Subject: [PATCH 48/50] Transition m_tonemappedImgView to GENERAL --- 26_Autoexposure/main.cpp | 68 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/26_Autoexposure/main.cpp b/26_Autoexposure/main.cpp index 4d436a188..893d892b7 100644 --- a/26_Autoexposure/main.cpp +++ b/26_Autoexposure/main.cpp @@ -580,6 +580,74 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public m_tonemappedImgView->setObjectDebugName("Tonemapped Image View"); } + // transition m_tonemappedImgView to GENERAL + { + auto transitionSemaphore = m_device->createSemaphore(0); + auto queue = getGraphicsQueue(); + auto cmdbuf = m_cmdBufs[0].get(); + cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); + + m_api->startCapture(); + + cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + + // TRANSITION m_outImgView to GENERAL (because of descriptorSets0 -> ComputeShader Writes into the image) + { + const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, + .dstStageMask = PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS, + } + }, + .image = m_tonemappedImgView->getCreationParameters().image.get(), + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::UNDEFINED, + .newLayout = IImage::LAYOUT::GENERAL + } + }; + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers }); + } + cmdbuf->end(); + + const IQueue::SSubmitInfo::SSemaphoreInfo rendered[] = + { + { + .semaphore = transitionSemaphore.get(), + .value = 1, + .stageMask = PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS + } + }; + const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[] = + { + {.cmdbuf = cmdbuf } + }; + const IQueue::SSubmitInfo infos[] = + { + { + .waitSemaphores = {}, + .commandBuffers = commandBuffers, + .signalSemaphores = rendered + } + }; + queue->submit(infos); + const ISemaphore::SWaitInfo waits[] = { + { + .semaphore = transitionSemaphore.get(), + .value = 1 + } + }; + m_device->blockForSemaphores(waits); + m_api->endCapture(); + } + // Update Descriptors { IGPUDescriptorSet::SDescriptorInfo infos[2]; From 21995eae26036586e8fbb42cd166252332f8994e Mon Sep 17 00:00:00 2001 From: Nipun Garg <24457793+nipunG314@users.noreply.github.com> Date: Thu, 23 Jan 2025 14:41:11 +0000 Subject: [PATCH 49/50] Keep direct track of m_gatherBuffer --- 26_Autoexposure/main.cpp | 50 +++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 26 deletions(-) diff --git a/26_Autoexposure/main.cpp b/26_Autoexposure/main.cpp index 893d892b7..224324e80 100644 --- a/26_Autoexposure/main.cpp +++ b/26_Autoexposure/main.cpp @@ -250,7 +250,9 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public auto gpuDS = convertDSCPU2GPU(cpuDS); m_imgSamplerDS = gpuDS[0]; + m_imgSamplerDS->setObjectDebugName("m_imgSamplerDS"); m_rwImgDS = gpuDS[1]; + m_rwImgDS->setObjectDebugName("m_rwImgDS"); // Create Shaders auto loadAndCompileShader = [&](std::string pathToShader) { @@ -387,7 +389,6 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public { // Allocate memory m_gatherAllocation = {}; - smart_refctd_ptr buffer; { auto build_buffer = [this]( smart_refctd_ptr m_device, @@ -416,9 +417,9 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public return true; }; - build_buffer(m_device, &m_gatherAllocation, buffer, m_physicalDevice->getLimits().maxSubgroupSize, "Luma Gather Buffer"); + build_buffer(m_device, &m_gatherAllocation, m_gatherBuffer, m_physicalDevice->getLimits().maxSubgroupSize, "Luma Gather Buffer"); } - m_gatherBDA = buffer->getDeviceAddress(); + m_gatherBDA = m_gatherBuffer->getDeviceAddress(); auto mapped_memory = m_gatherAllocation.memory->map({ 0ull, m_gatherAllocation.memory->getAllocationSize() }, IDeviceMemoryAllocation::EMCAF_READ); if (!mapped_memory) @@ -551,23 +552,22 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public m_device->allocate(imageMemReqs, image.get()); return image; - }; - auto createHDRIImageView = [this](smart_refctd_ptr img) -> smart_refctd_ptr - { - auto format = img->getCreationParameters().format; - IGPUImageView::SCreationParams imgViewInfo; - imgViewInfo.image = std::move(img); - imgViewInfo.format = format; - imgViewInfo.viewType = IGPUImageView::ET_2D; - imgViewInfo.flags = static_cast(0u); - imgViewInfo.subresourceRange.aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT; - imgViewInfo.subresourceRange.baseArrayLayer = 0u; - imgViewInfo.subresourceRange.baseMipLevel = 0u; - imgViewInfo.subresourceRange.layerCount = 1u; - imgViewInfo.subresourceRange.levelCount = 1u; - - return m_device->createImageView(std::move(imgViewInfo)); - }; + }; + auto createHDRIImageView = [this](smart_refctd_ptr img) -> smart_refctd_ptr { + auto format = img->getCreationParameters().format; + IGPUImageView::SCreationParams imgViewInfo; + imgViewInfo.image = std::move(img); + imgViewInfo.format = format; + imgViewInfo.viewType = IGPUImageView::ET_2D; + imgViewInfo.flags = static_cast(0u); + imgViewInfo.subresourceRange.aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT; + imgViewInfo.subresourceRange.baseArrayLayer = 0u; + imgViewInfo.subresourceRange.baseMipLevel = 0u; + imgViewInfo.subresourceRange.layerCount = 1u; + imgViewInfo.subresourceRange.levelCount = 1u; + + return m_device->createImageView(std::move(imgViewInfo)); + }; auto params = gpuImg->getCreationParameters(); auto extent = params.extent; @@ -651,7 +651,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public // Update Descriptors { IGPUDescriptorSet::SDescriptorInfo infos[2]; - infos[0].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + infos[0].info.combinedImageSampler.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; infos[0].desc = m_gpuImgView; infos[1].info.image.imageLayout = IImage::LAYOUT::GENERAL; infos[1].desc = m_tonemappedImgView; @@ -723,8 +723,6 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public cmdbuf->dispatch(dispatchSize.x, dispatchSize.y); cmdbuf->end(); - m_api->endCapture(); - { IQueue::SSubmitInfo submit_infos[1]; IQueue::SSubmitInfo::SCommandBufferInfo cmdBufs[] = { @@ -743,6 +741,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public submit_infos[0].signalSemaphores = signals; queue->submit(submit_infos); + m_api->endCapture(); } const ISemaphore::SWaitInfo wait_infos[] = { @@ -754,7 +753,6 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public m_device->blockForSemaphores(wait_infos); } -#if 0 // Luma Gather and Tonemapping { auto queue = getGraphicsQueue(); @@ -781,7 +779,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); cmdbuf->bindComputePipeline(m_tonemapPipeline.get()); - cmdbuf->bindDescriptorSets(nbl::asset::EPBP_COMPUTE, m_tonemapPipeline->getLayout(), 0, 1, &ds1); // also if you created DS Set with 3th index you need to respect it here - firstSet tells you the index of set and count tells you what range from this index it should update, useful if you had 2 DS with lets say set index 2,3, then you can bind both with single call setting firstSet to 2, count to 2 and last argument would be pointet to your DS pointers + cmdbuf->bindDescriptorSets(nbl::asset::EPBP_COMPUTE, m_tonemapPipeline->getLayout(), 0, 1, &ds1); cmdbuf->bindDescriptorSets(nbl::asset::EPBP_COMPUTE, m_tonemapPipeline->getLayout(), 3, 1, &ds2); cmdbuf->pushConstants(m_tonemapPipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(pc), &pc); cmdbuf->dispatch(dispatchSize.x, dispatchSize.y); @@ -920,7 +918,6 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public return; } } -#endif m_submitIx++; } @@ -959,6 +956,7 @@ class AutoexposureApp final : public examples::SimpleWindowedApplication, public uint64_t m_submitIx = 0; // example resources + smart_refctd_ptr m_gatherBuffer; nbl::video::IDeviceMemoryAllocator::SAllocation m_gatherAllocation; uint64_t m_gatherBDA; smart_refctd_ptr m_gpuImgView, m_tonemappedImgView; From 06dad8c118027d6ebc8ee04e19340ba643079a63 Mon Sep 17 00:00:00 2001 From: devsh Date: Thu, 23 Jan 2025 20:50:00 +0100 Subject: [PATCH 50/50] yay another DXC bug that was an absolute joy to debug, why on earth would the SPIR-V legalization pass just decide to kill implicit lod texture sampling operations and just warn? GLSL makes them into explicit lod with implied lod 0. --- 26_Autoexposure/app_resources/luma_meter.comp.hlsl | 2 +- 26_Autoexposure/app_resources/luma_tonemap.comp.hlsl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/26_Autoexposure/app_resources/luma_meter.comp.hlsl b/26_Autoexposure/app_resources/luma_meter.comp.hlsl index b998f33ae..b15a5665a 100644 --- a/26_Autoexposure/app_resources/luma_meter.comp.hlsl +++ b/26_Autoexposure/app_resources/luma_meter.comp.hlsl @@ -43,7 +43,7 @@ struct TexAccessor } float32_t3 get(float32_t2 uv) { - return texture.Sample(samplerState, uv).rgb; + return texture.SampleLevel(samplerState, uv, 0.f).rgb; } }; diff --git a/26_Autoexposure/app_resources/luma_tonemap.comp.hlsl b/26_Autoexposure/app_resources/luma_tonemap.comp.hlsl index 7b14ee5be..d7c5114d7 100644 --- a/26_Autoexposure/app_resources/luma_tonemap.comp.hlsl +++ b/26_Autoexposure/app_resources/luma_tonemap.comp.hlsl @@ -48,7 +48,7 @@ struct TexAccessor } float32_t3 get(float32_t2 uv) { - return textureIn.Sample(samplerStateIn, uv).rgb; + return textureIn.SampleLevel(samplerStateIn, uv, 0.f).rgb; } };