diff --git a/23_Autoexposure/CMakeLists.txt b/23_Autoexposure/CMakeLists.txt deleted file mode 100644 index 8604e54c4..000000000 --- a/23_Autoexposure/CMakeLists.txt +++ /dev/null @@ -1,12 +0,0 @@ - -include(common RESULT_VARIABLE RES) -if(NOT RES) - message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") -endif() - -set(EXAMPLE_SOURCES - ../../src/nbl/ext/LumaMeter/CLumaMeter.cpp - ../../src/nbl/ext/ToneMapper/CToneMapper.cpp -) - -nbl_create_executable_project("${EXAMPLE_SOURCES}" "" "" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") \ No newline at end of file diff --git a/23_Autoexposure/main.cpp b/23_Autoexposure/main.cpp deleted file mode 100644 index 83b62c88d..000000000 --- a/23_Autoexposure/main.cpp +++ /dev/null @@ -1,177 +0,0 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#define _NBL_STATIC_LIB_ -#include -#include -#include - - -#include "nbl/ext/ToneMapper/CToneMapper.h" - -#include "../common/QToQuitEventReceiver.h" - -using namespace nbl; -using namespace nbl::core; -using namespace nbl::asset; -using namespace nbl::video; - - -int main() -{ - nbl::SIrrlichtCreationParameters deviceParams; - deviceParams.Bits = 24; //may have to set to 32bit for some platforms - deviceParams.ZBufferBits = 24; //we'd like 32bit here - deviceParams.DriverType = EDT_OPENGL; //! Only Well functioning driver, software renderer left for sake of 2D image drawing - deviceParams.WindowSize = dimension2d(1280, 720); - deviceParams.Fullscreen = false; - deviceParams.Vsync = true; //! If supported by target platform - deviceParams.Doublebuffer = true; - deviceParams.Stencilbuffer = false; //! This will not even be a choice soon - - auto device = createDeviceEx(deviceParams); - if (!device) - return 1; // could not create selected driver. - - QToQuitEventReceiver receiver; - device->setEventReceiver(&receiver); - - IVideoDriver* driver = device->getVideoDriver(); - - nbl::io::IFileSystem* filesystem = device->getFileSystem(); - IAssetManager* am = device->getAssetManager(); - - IAssetLoader::SAssetLoadParams lp; - auto imageBundle = am->getAsset("../../media/noises/spp_benchmark_4k_512.exr", lp); - - E_FORMAT inFormat; - constexpr auto outFormat = EF_R8G8B8A8_SRGB; - smart_refctd_ptr outImg; - smart_refctd_ptr imgToTonemapView,outImgView; - { - auto cpuImg = IAsset::castDown(imageBundle.getContents().begin()[0]); - IGPUImage::SCreationParams imgInfo = cpuImg->getCreationParameters(); - inFormat = imgInfo.format; - - auto gpuImages = driver->getGPUObjectsFromAssets(&cpuImg.get(),&cpuImg.get()+1); - auto gpuImage = gpuImages->operator[](0u); - - IGPUImageView::SCreationParams imgViewInfo; - imgViewInfo.flags = static_cast(0u); - imgViewInfo.image = std::move(gpuImage); - imgViewInfo.viewType = IGPUImageView::ET_2D_ARRAY; - imgViewInfo.format = inFormat; - imgViewInfo.subresourceRange.aspectMask = static_cast(0u); - imgViewInfo.subresourceRange.baseMipLevel = 0; - imgViewInfo.subresourceRange.levelCount = 1; - imgViewInfo.subresourceRange.baseArrayLayer = 0; - imgViewInfo.subresourceRange.layerCount = 1; - imgToTonemapView = driver->createImageView(IGPUImageView::SCreationParams(imgViewInfo)); - - imgInfo.format = outFormat; - outImg = driver->createDeviceLocalGPUImageOnDedMem(std::move(imgInfo)); - - imgViewInfo.image = outImg; - imgViewInfo.format = outFormat; - outImgView = driver->createImageView(IGPUImageView::SCreationParams(imgViewInfo)); - } - - auto glslCompiler = am->getCompilerSet(); - const auto inputColorSpace = std::make_tuple(inFormat,ECP_SRGB,EOTF_IDENTITY); - - using LumaMeterClass = ext::LumaMeter::CLumaMeter; - constexpr auto MeterMode = LumaMeterClass::EMM_MEDIAN; - const float minLuma = 1.f/2048.f; - const float maxLuma = 65536.f; - - auto cpuLumaMeasureSpecializedShader = LumaMeterClass::createShader(glslCompiler,inputColorSpace,MeterMode,minLuma,maxLuma); - auto gpuLumaMeasureShader = driver->createShader(smart_refctd_ptr(cpuLumaMeasureSpecializedShader->getUnspecialized())); - auto gpuLumaMeasureSpecializedShader = driver->createSpecializedShader(gpuLumaMeasureShader.get(), cpuLumaMeasureSpecializedShader->getSpecializationInfo()); - - const float meteringMinUV[2] = { 0.1f,0.1f }; - const float meteringMaxUV[2] = { 0.9f,0.9f }; - LumaMeterClass::Uniforms_t uniforms; - auto lumaDispatchInfo = LumaMeterClass::buildParameters(uniforms, outImg->getCreationParameters().extent, meteringMinUV, meteringMaxUV); - - auto uniformBuffer = driver->createFilledDeviceLocalBufferOnDedMem(sizeof(uniforms),&uniforms); - - - using ToneMapperClass = ext::ToneMapper::CToneMapper; - constexpr auto TMO = ToneMapperClass::EO_ACES; - constexpr bool usingLumaMeter = MeterModegetGLSLCompiler(), - inputColorSpace, - std::make_tuple(outFormat,ECP_SRGB,OETF_sRGB), - TMO,usingLumaMeter,MeterMode,minLuma,maxLuma,usingTemporalAdapatation - ); - auto gpuTonemappingShader = driver->createShader(smart_refctd_ptr(cpuTonemappingSpecializedShader->getUnspecialized())); - auto gpuTonemappingSpecializedShader = driver->createSpecializedShader(gpuTonemappingShader.get(),cpuTonemappingSpecializedShader->getSpecializationInfo()); - - auto outImgStorage = ToneMapperClass::createViewForImage(driver,false,core::smart_refctd_ptr(outImg),{static_cast(0u),0,1,0,1}); - - auto parameterBuffer = driver->createDeviceLocalGPUBufferOnDedMem(ToneMapperClass::getParameterBufferSize()); - constexpr float Exposure = 0.f; - constexpr float Key = 0.18; - auto params = ToneMapperClass::Params_t(Exposure, Key, 0.85f); - { - params.setAdaptationFactorFromFrameDelta(0.f); - driver->updateBufferRangeViaStagingBuffer(parameterBuffer.get(),0u,sizeof(params),¶ms); - } - - auto commonPipelineLayout = ToneMapperClass::getDefaultPipelineLayout(driver,usingLumaMeter); - - auto lumaMeteringPipeline = driver->createComputePipeline(nullptr,core::smart_refctd_ptr(commonPipelineLayout),std::move(gpuLumaMeasureSpecializedShader)); - auto toneMappingPipeline = driver->createComputePipeline(nullptr,core::smart_refctd_ptr(commonPipelineLayout),std::move(gpuTonemappingSpecializedShader)); - - auto commonDescriptorSet = driver->createDescriptorSet(core::smart_refctd_ptr(commonPipelineLayout->getDescriptorSetLayout(0u))); - ToneMapperClass::updateDescriptorSet(driver,commonDescriptorSet.get(),parameterBuffer,imgToTonemapView,outImgStorage,1u,2u,usingLumaMeter ? 3u:0u,uniformBuffer,0u,usingTemporalAdapatation); - - - constexpr auto dynOffsetArrayLen = usingLumaMeter ? 2u : 1u; - - auto lumaDynamicOffsetArray = core::make_refctd_dynamic_array >(dynOffsetArrayLen,0u); - lumaDynamicOffsetArray->back() = sizeof(ToneMapperClass::Params_t); - - auto toneDynamicOffsetArray = core::make_refctd_dynamic_array >(dynOffsetArrayLen,0u); - - - auto blitFBO = driver->addFrameBuffer(); - blitFBO->attach(video::EFAP_COLOR_ATTACHMENT0, std::move(outImgView)); - - uint32_t outBufferIx = 0u; - auto lastPresentStamp = std::chrono::high_resolution_clock::now(); - while (device->run() && receiver.keepOpen()) - { - driver->beginScene(false, false); - - driver->bindComputePipeline(lumaMeteringPipeline.get()); - driver->bindDescriptorSets(EPBP_COMPUTE,commonPipelineLayout.get(),0u,1u,&commonDescriptorSet.get(),&lumaDynamicOffsetArray); - driver->pushConstants(commonPipelineLayout.get(),IGPUSpecializedShader::ESS_COMPUTE,0u,sizeof(outBufferIx),&outBufferIx); outBufferIx ^= 0x1u; - LumaMeterClass::dispatchHelper(driver,lumaDispatchInfo,true); - - driver->bindComputePipeline(toneMappingPipeline.get()); - driver->bindDescriptorSets(EPBP_COMPUTE,commonPipelineLayout.get(),0u,1u,&commonDescriptorSet.get(),&toneDynamicOffsetArray); - ToneMapperClass::dispatchHelper(driver,outImgStorage.get(),true); - - driver->blitRenderTargets(blitFBO, nullptr, false, false); - - driver->endScene(); - if (usingTemporalAdapatation) - { - auto thisPresentStamp = std::chrono::high_resolution_clock::now(); - auto microsecondsElapsedBetweenPresents = std::chrono::duration_cast(thisPresentStamp-lastPresentStamp); - lastPresentStamp = thisPresentStamp; - - params.setAdaptationFactorFromFrameDelta(float(microsecondsElapsedBetweenPresents.count())/1000000.f); - // dont override shader output - constexpr auto offsetPastLumaHistory = offsetof(decltype(params),lastFrameExtraEVAsHalf)+sizeof(decltype(params)::lastFrameExtraEVAsHalf); - auto* paramPtr = reinterpret_cast(¶ms); - driver->updateBufferRangeViaStagingBuffer(parameterBuffer.get(), offsetPastLumaHistory, sizeof(params)-offsetPastLumaHistory, paramPtr+offsetPastLumaHistory); - } - } - - return 0; -} \ No newline at end of file diff --git a/26_Autoexposure/CMakeLists.txt b/26_Autoexposure/CMakeLists.txt new file mode 100644 index 000000000..34040e8c1 --- /dev/null +++ b/26_Autoexposure/CMakeLists.txt @@ -0,0 +1,25 @@ + +include(common RESULT_VARIABLE RES) +if(NOT RES) + message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") +endif() + +nbl_create_executable_project("" "" "" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") + +if(NBL_EMBED_BUILTIN_RESOURCES) + set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData) + set(RESOURCE_DIR "app_resources") + + get_filename_component(_SEARCH_DIRECTORIES_ "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE) + + file(GLOB_RECURSE BUILTIN_RESOURCE_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}" CONFIGURE_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}/*") + foreach(RES_FILE ${BUILTIN_RESOURCE_FILES}) + LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "${RES_FILE}") + endforeach() + + ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") + + LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) +endif() diff --git a/26_Autoexposure/app_resources/common.hlsl b/26_Autoexposure/app_resources/common.hlsl new file mode 100644 index 000000000..bf2c19920 --- /dev/null +++ b/26_Autoexposure/app_resources/common.hlsl @@ -0,0 +1,28 @@ +// Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _AUTOEXPOSURE_COMMON_INCLUDED_ +#define _AUTOEXPOSURE_COMMON_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/luma_meter/common.hlsl" + +namespace nbl +{ +namespace hlsl +{ + +struct AutoexposurePushData +{ + luma_meter::MeteringWindow window; + float32_t2 lumaMinMax; + float32_t sampleCount; + uint32_t2 viewportSize; + uint64_t lumaMeterBDA; +}; + +} +} + +#endif \ No newline at end of file diff --git a/26_Autoexposure/app_resources/luma_meter.comp.hlsl b/26_Autoexposure/app_resources/luma_meter.comp.hlsl new file mode 100644 index 000000000..b15a5665a --- /dev/null +++ b/26_Autoexposure/app_resources/luma_meter.comp.hlsl @@ -0,0 +1,68 @@ +// Copyright (C) 2024-2024 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "nbl/builtin/hlsl/luma_meter/luma_meter.hlsl" +#include "nbl/builtin/hlsl/bda/bda_accessor.hlsl" +#include "nbl/builtin/hlsl/colorspace/encodeCIEXYZ.hlsl" +#include "app_resources/common.hlsl" + +[[vk::combinedImageSampler]] [[vk::binding(0, 0)]] Texture2D texture; +[[vk::combinedImageSampler]] [[vk::binding(0, 0)]] SamplerState samplerState; + +using namespace nbl::hlsl; +using Ptr = bda::__ptr < uint32_t >; +using PtrAccessor = BdaAccessor < uint32_t >; + +[[vk::push_constant]] AutoexposurePushData pushData; + +groupshared float32_t sdata[WorkgroupSize]; +struct SharedAccessor +{ + using type = float32_t; + void get(const uint32_t index, NBL_REF_ARG(uint32_t) value) + { + value = sdata[index]; + } + + void set(const uint32_t index, const uint32_t value) + { + sdata[index] = value; + } + + void workgroupExecutionAndMemoryBarrier() + { + glsl::barrier(); + } +}; + +struct TexAccessor +{ + static float32_t3 toXYZ(float32_t3 srgbColor) { + return mul(colorspace::sRGBtoXYZ, srgbColor); + } + + float32_t3 get(float32_t2 uv) { + return texture.SampleLevel(samplerState, uv, 0.f).rgb; + } +}; + +uint32_t3 glsl::gl_WorkGroupSize() +{ + return uint32_t3(WorkgroupSize, 1, 1); +} + +[numthreads(DeviceSubgroupSize, DeviceSubgroupSize, 1)] +void main(uint32_t3 ID : SV_GroupThreadID, uint32_t3 GroupID : SV_GroupID) +{ + const Ptr val_ptr = Ptr::create(pushData.lumaMeterBDA); + PtrAccessor val_accessor = PtrAccessor::create(val_ptr); + + SharedAccessor sdata; + TexAccessor tex; + + using LumaMeter = luma_meter::geom_meter< WorkgroupSize, PtrAccessor, SharedAccessor, TexAccessor>; + LumaMeter meter = LumaMeter::create(pushData.lumaMinMax, pushData.sampleCount); + + meter.sampleLuma(pushData.window, val_accessor, tex, sdata, (float32_t2)(glsl::gl_WorkGroupID() * glsl::gl_WorkGroupSize()), pushData.viewportSize); +} diff --git a/26_Autoexposure/app_resources/luma_tonemap.comp.hlsl b/26_Autoexposure/app_resources/luma_tonemap.comp.hlsl new file mode 100644 index 000000000..d7c5114d7 --- /dev/null +++ b/26_Autoexposure/app_resources/luma_tonemap.comp.hlsl @@ -0,0 +1,89 @@ +// Copyright (C) 2024-2024 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "nbl/builtin/hlsl/luma_meter/luma_meter.hlsl" +#include "nbl/builtin/hlsl/bda/bda_accessor.hlsl" +#include "nbl/builtin/hlsl/colorspace/EOTF.hlsl" +#include "nbl/builtin/hlsl/colorspace/encodeCIEXYZ.hlsl" +#include "nbl/builtin/hlsl/colorspace/decodeCIEXYZ.hlsl" +#include "nbl/builtin/hlsl/colorspace/OETF.hlsl" +#include "nbl/builtin/hlsl/tonemapper/operators.hlsl" +#include "app_resources/common.hlsl" + +[[vk::combinedImageSampler]] [[vk::binding(0, 0)]] Texture2D textureIn; +[[vk::combinedImageSampler]] [[vk::binding(0, 0)]] SamplerState samplerStateIn; +[[vk::binding(0, 3)]] RWTexture2D textureOut; + +using namespace nbl::hlsl; +using Ptr = bda::__ptr < uint32_t >; +using PtrAccessor = BdaAccessor < uint32_t >; + +[[vk::push_constant]] AutoexposurePushData pushData; + +groupshared float32_t sdata[WorkgroupSize]; +struct SharedAccessor +{ + using type = float32_t; + void get(const uint32_t index, NBL_REF_ARG(uint32_t) value) + { + value = sdata[index]; + } + + void set(const uint32_t index, const uint32_t value) + { + sdata[index] = value; + } + + void workgroupExecutionAndMemoryBarrier() + { + glsl::barrier(); + } +}; + +struct TexAccessor +{ + static float32_t3 toXYZ(float32_t3 srgbColor) { + return dot(colorspace::sRGBtoXYZ[1], srgbColor); + } + + float32_t3 get(float32_t2 uv) { + return textureIn.SampleLevel(samplerStateIn, uv, 0.f).rgb; + } +}; + +uint32_t3 glsl::gl_WorkGroupSize() +{ + return uint32_t3(WorkgroupSize, 1, 1); +} + +[numthreads(DeviceSubgroupSize, DeviceSubgroupSize, 1)] +void main(uint32_t3 ID : SV_GroupThreadID, uint32_t3 GroupID : SV_GroupID) +{ + const Ptr val_ptr = Ptr::create(pushData.lumaMeterBDA); + PtrAccessor val_accessor = PtrAccessor::create(val_ptr); + + SharedAccessor sdata; + TexAccessor tex; + + using LumaMeter = luma_meter::geom_meter< WorkgroupSize, PtrAccessor, SharedAccessor, TexAccessor>; + LumaMeter meter = LumaMeter::create(pushData.lumaMinMax, pushData.sampleCount); + + float32_t EV = meter.gatherLuma(val_accessor); + + uint32_t tid = workgroup::SubgroupContiguousIndex(); + uint32_t2 coord = { + morton2d_decode_x(tid), + morton2d_decode_y(tid) + }; + + uint32_t2 pos = glsl::gl_WorkGroupID() * glsl::gl_WorkGroupSize() + coord; + + float32_t2 uv = (float32_t2)(pos) / pushData.viewportSize; + float32_t3 color = colorspace::oetf::sRGB(tex.get(uv).rgb); + float32_t3 CIEColor = mul(colorspace::sRGBtoXYZ, color); + tonemapper::Reinhard reinhard = tonemapper::Reinhard::create(EV); + float32_t3 tonemappedColor = mul(colorspace::decode::XYZtoscRGB, reinhard(CIEColor)); + + textureOut[pos] = float32_t4(colorspace::eotf::sRGB(tonemappedColor), 1.0f); +} diff --git a/26_Autoexposure/app_resources/present.frag.hlsl b/26_Autoexposure/app_resources/present.frag.hlsl new file mode 100644 index 000000000..aa8febf85 --- /dev/null +++ b/26_Autoexposure/app_resources/present.frag.hlsl @@ -0,0 +1,19 @@ +// Copyright (C) 2024-2024 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#pragma wave shader_stage(fragment) + +#include "app_resources/common.hlsl" + +// vertex shader is provided by the fullScreenTriangle extension +#include +using namespace nbl::hlsl; +using namespace ext::FullScreenTriangle; + +[[vk::binding(0, 3)]] RWTexture2D texture; + +[[vk::location(0)]] float32_t4 main(SVertexAttributes vxAttr) : SV_Target0 +{ + return texture[vxAttr.uv]; +} \ No newline at end of file diff --git a/23_Autoexposure/config.json.template b/26_Autoexposure/config.json.template similarity index 100% rename from 23_Autoexposure/config.json.template rename to 26_Autoexposure/config.json.template diff --git a/26_Autoexposure/main.cpp b/26_Autoexposure/main.cpp new file mode 100644 index 000000000..224324e80 --- /dev/null +++ b/26_Autoexposure/main.cpp @@ -0,0 +1,1101 @@ +// Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#include "nbl/application_templates/MonoAssetManagerAndBuiltinResourceApplication.hpp" +#include "SimpleWindowedApplication.hpp" + +#include "nbl/video/surface/CSurfaceVulkan.h" +#include "nbl/asset/interchange/IAssetLoader.h" +#include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" + +#include "app_resources/common.hlsl" + +using namespace nbl; +using namespace core; +using namespace hlsl; +using namespace system; +using namespace asset; +using namespace ui; +using namespace video; + +class AutoexposureApp final : public examples::SimpleWindowedApplication, public application_templates::MonoAssetManagerAndBuiltinResourceApplication +{ + using device_base_t = examples::SimpleWindowedApplication; + using asset_base_t = application_templates::MonoAssetManagerAndBuiltinResourceApplication; + using clock_t = std::chrono::steady_clock; + + static inline std::string DefaultImagePathsFile = "../../media/noises/spp_benchmark_4k_512.exr"; + static inline std::array ShaderPaths = { + "app_resources/luma_meter.comp.hlsl", + "app_resources/luma_tonemap.comp.hlsl" , + "app_resources/present.frag.hlsl" + }; + constexpr static inline uint32_t2 Dimensions = { 1280, 720 }; + constexpr static inline float32_t2 MeteringWindowScale = { 0.5f, 0.5f }; + constexpr static inline float32_t2 MeteringWindowOffset = { 0.25f, 0.25f }; + constexpr static inline float32_t2 LumaMinMax = { 1.0f / 4096.0f, 32768.0f }; + +public: + // Yay thanks to multiple inheritance we cannot forward ctors anymore + inline AutoexposureApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : + IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} + + // Will get called mid-initialization, via `filterDevices` between when the API Connection is created and Physical Device is chosen + inline core::vector getSurfaces() const override + { + // So let's create our Window and Surface then! + if (!m_surface) + { + { + IWindow::SCreationParams params = {}; + params.callback = core::make_smart_refctd_ptr(); + params.width = Dimensions[0]; + params.height = Dimensions[1]; + params.x = 32; + params.y = 32; + // Don't want to have a window lingering about before we're ready so create it hidden. + // Only programmatic resize, not regular. + params.flags = ui::IWindow::ECF_HIDDEN | IWindow::ECF_BORDERLESS | IWindow::ECF_RESIZABLE; + params.windowCaption = "AutoexposureApp"; + const_cast&>(m_window) = m_winMgr->createWindow(std::move(params)); + } + auto surface = CSurfaceVulkanWin32::create(smart_refctd_ptr(m_api), smart_refctd_ptr_static_cast(m_window)); + const_cast&>(m_surface) = nbl::video::CSimpleResizeSurface::create(std::move(surface)); + } + if (m_surface) + return { {m_surface->getSurface()/*,EQF_NONE*/} }; + return {}; + } + + inline bool onAppInitialized(smart_refctd_ptr&& system) override + { + // Remember to call the base class initialization! + if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + if (!asset_base_t::onAppInitialized(std::move(system))) + return false; + + // Create semaphores + m_meterSemaphore = m_device->createSemaphore(m_submitIx); + m_tonemapSemaphore = m_device->createSemaphore(m_submitIx); + m_presentSemaphore = m_device->createSemaphore(m_submitIx); + + // Create command pool and buffers + { + auto gQueue = getGraphicsQueue(); + m_cmdPool = m_device->createCommandPool(gQueue->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); + if (!m_cmdPool) + return logFail("Couldn't create Command Pool!"); + + if (!m_cmdPool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { m_cmdBufs.data(), 1 })) + return logFail("Couldn't create Command Buffer!"); + } + + // Create renderpass and init surface + nbl::video::IGPURenderpass* renderpass; + { + ISwapchain::SCreationParams swapchainParams = { .surface = smart_refctd_ptr(m_surface->getSurface()) }; + if (!swapchainParams.deduceFormat(m_physicalDevice)) + return logFail("Could not choose a Surface Format for the Swapchain!"); + + // We actually need external dependencies to ensure ordering of the Implicit Layout Transitions relative to the semaphore signals + constexpr IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = { + // wipe-transition to ATTACHMENT_OPTIMAL + { + .srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .dstSubpass = 0, + .memoryBarrier = { + // since we're uploading the image data we're about to draw + .srcStageMask = asset::PIPELINE_STAGE_FLAGS::COPY_BIT, + .srcAccessMask = asset::ACCESS_FLAGS::TRANSFER_WRITE_BIT, + .dstStageMask = asset::PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + // because we clear and don't blend + .dstAccessMask = asset::ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + } + // leave view offsets and flags default + }, + // ATTACHMENT_OPTIMAL to PRESENT_SRC + { + .srcSubpass = 0, + .dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .memoryBarrier = { + .srcStageMask = asset::PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + .srcAccessMask = asset::ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + // we can have NONE as the Destinations because the spec says so about presents + } + // leave view offsets and flags default + }, + IGPURenderpass::SCreationParams::DependenciesEnd + }; + + auto scResources = std::make_unique(m_device.get(), swapchainParams.surfaceFormat.format, dependencies); + + renderpass = scResources->getRenderpass(); + + if (!renderpass) + return logFail("Failed to create Renderpass!"); + + auto gQueue = getGraphicsQueue(); + if (!m_surface || !m_surface->init(gQueue, std::move(scResources), swapchainParams.sharedParams)) + return logFail("Could not create Window & Surface or initialize the Surface!"); + } + + // One asset converter to make the cache persist + auto converter = CAssetConverter::create({ .device = m_device.get() }); + + // Create descriptors and pipelines + { + // need to hoist + CAssetConverter::SConvertParams params = {}; + params.utilities = m_utils.get(); + + auto convertDSLayoutCPU2GPU = [&](std::span cpuLayouts) + { + CAssetConverter::SInputs inputs = {}; + inputs.readCache = converter.get(); + inputs.logger = m_logger.get(); + + std::get>(inputs.assets) = cpuLayouts; + // don't need to assert that we don't need to provide patches since layouts are not patchable + //assert(true); + auto reservation = converter->reserve(inputs); + // even though it does nothing when none assets refer in any way (direct or indirect) to memory or need any device operations performed, still need to call to write the cache + reservation.convert(params); + // the `.value` is just a funny way to make the `smart_refctd_ptr` copyable + auto gpuLayouts = reservation.getGPUObjects(); + std::vector> result; + result.reserve(cpuLayouts.size()); + + for (auto& gpuLayout : gpuLayouts) { + auto layout = gpuLayout.value; + if (!layout) { + m_logger->log("Failed to convert %s into an IGPUDescriptorSetLayout handle", ILogger::ELL_ERROR); + std::exit(-1); + } + result.push_back(layout); + } + + return result; + }; + auto convertDSCPU2GPU = [&](std::span cpuDS) + { + CAssetConverter::SInputs inputs = {}; + inputs.readCache = converter.get(); + inputs.logger = m_logger.get(); + + std::get>(inputs.assets) = cpuDS; + // don't need to assert that we don't need to provide patches since layouts are not patchable + //assert(true); + auto reservation = converter->reserve(inputs); + // even though it does nothing when none assets refer in any way (direct or indirect) to memory or need any device operations performed, still need to call to write the cache + reservation.convert(params); + // the `.value` is just a funny way to make the `smart_refctd_ptr` copyable + auto gpuDS = reservation.getGPUObjects(); + std::vector> result; + result.reserve(cpuDS.size()); + + for (auto& ds : gpuDS) { + if (!ds.value) { + m_logger->log("Failed to convert %s into an IGPUDescriptorSet handle", ILogger::ELL_ERROR); + std::exit(-1); + } + result.push_back(ds.value); + } + + return result; + }; + + ISampler::SParams samplerParams = { + .AnisotropicFilter = 0 + }; + auto defaultSampler = make_smart_refctd_ptr(samplerParams); + + std::array imgSamplerbindings = {}; + std::array rwImgbindings = {}; + + imgSamplerbindings[0] = { + .binding = 0u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, + .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u, + .immutableSamplers = &defaultSampler + }; + rwImgbindings[0] = { + .binding = 0u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, + .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE | IShader::E_SHADER_STAGE::ESS_FRAGMENT, + .count = 1u, + .immutableSamplers = nullptr + }; + + auto cpuImgSamplerLayout = make_smart_refctd_ptr(imgSamplerbindings); + auto cpuRWImgLayout = make_smart_refctd_ptr(rwImgbindings); + + std::array cpuLayouts = { + cpuImgSamplerLayout.get(), + cpuRWImgLayout.get() + }; + + auto gpuLayouts = convertDSLayoutCPU2GPU(cpuLayouts); + + auto cpuImgSamplerDS = make_smart_refctd_ptr(std::move(cpuImgSamplerLayout)); + auto cpuRWImgDS = make_smart_refctd_ptr(std::move(cpuRWImgLayout)); + + std::array cpuDS = { + cpuImgSamplerDS.get(), + cpuRWImgDS.get() + }; + + auto gpuDS = convertDSCPU2GPU(cpuDS); + m_imgSamplerDS = gpuDS[0]; + m_imgSamplerDS->setObjectDebugName("m_imgSamplerDS"); + m_rwImgDS = gpuDS[1]; + m_rwImgDS->setObjectDebugName("m_rwImgDS"); + + // Create Shaders + auto loadAndCompileShader = [&](std::string pathToShader) { + IAssetLoader::SAssetLoadParams lp = {}; + auto assetBundle = m_assetMgr->getAsset(pathToShader, lp); + const auto assets = assetBundle.getContents(); + if (assets.empty()) + { + m_logger->log("Could not load shader: ", ILogger::ELL_ERROR, pathToShader); + std::exit(-1); + } + + auto source = IAsset::castDown(assets[0]); + const uint32_t workgroupSize = m_physicalDevice->getLimits().maxComputeWorkGroupInvocations; + const uint32_t subgroupSize = m_physicalDevice->getLimits().maxSubgroupSize; + auto overriddenSource = CHLSLCompiler::createOverridenCopy( + source.get(), + "#define WorkgroupSize %d\n#define DeviceSubgroupSize %d\n", + workgroupSize, + subgroupSize + ); + // The down-cast should not fail! + assert(overriddenSource); + + // this time we skip the use of the asset converter since the ICPUShader->IGPUShader path is quick and simple + auto shader = m_device->createShader(overriddenSource.get()); + if (!shader) + { + m_logger->log("Shader creationed failed: %s!", ILogger::ELL_ERROR, pathToShader); + std::exit(-1); + } + + return shader; + }; + + // Create compute pipelines + { + std::array params; + std::array, 2> shaders; + std::array, 2> pipelineLayouts; + std::array, 2> pipelines; + { + shaders[0] = loadAndCompileShader(ShaderPaths[0]); + const nbl::asset::SPushConstantRange pcRange = { + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .offset = 0, + .size = sizeof(AutoexposurePushData) + }; + pipelineLayouts[0] = m_device->createPipelineLayout( + { &pcRange, 1 }, + smart_refctd_ptr(gpuLayouts[0]), + nullptr, + nullptr, + nullptr + ); + if (!pipelineLayouts[0]) { + return logFail("Failed to create pipeline layout"); + } + + params[0] = {}; + params[0].layout = pipelineLayouts[0].get(); + params[0].shader.shader = shaders[0].get(); + params[0].shader.entryPoint = "main"; + params[0].shader.entries = nullptr; + params[0].shader.requireFullSubgroups = true; + params[0].shader.requiredSubgroupSize = static_cast(5); + } + { + shaders[1] = loadAndCompileShader(ShaderPaths[1]); + const nbl::asset::SPushConstantRange pcRange = { + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .offset = 0, + .size = sizeof(AutoexposurePushData) + }; + pipelineLayouts[1] = m_device->createPipelineLayout( + { &pcRange, 1 }, + smart_refctd_ptr(gpuLayouts[0]), + nullptr, + nullptr, + smart_refctd_ptr(gpuLayouts[1]) + ); + if (!pipelineLayouts[1]) { + return logFail("Failed to create pipeline layout"); + } + + params[1] = {}; + params[1].layout = pipelineLayouts[1].get(); + params[1].shader.shader = shaders[1].get(); + params[1].shader.entryPoint = "main"; + params[1].shader.entries = nullptr; + params[1].shader.requireFullSubgroups = true; + params[1].shader.requiredSubgroupSize = static_cast(5); + } + + if (!m_device->createComputePipelines(nullptr, params, pipelines.data())) { + return logFail("Failed to create compute pipeline!\n"); + } + + m_meterPipeline = std::move(pipelines[0]); + m_tonemapPipeline = std::move(pipelines[1]); + } + + // Create graphics pipeline + { + auto scRes = static_cast(m_surface->getSwapchainResources()); + ext::FullScreenTriangle::ProtoPipeline fsTriProtoPPln(m_assetMgr.get(), m_device.get(), m_logger.get()); + if (!fsTriProtoPPln) + return logFail("Failed to create Full Screen Triangle protopipeline or load its vertex shader!"); + + // Load Fragment Shader + auto fragmentShader = loadAndCompileShader(ShaderPaths[2]); + if (!fragmentShader) + return logFail("Failed to Load and Compile Fragment Shader: lumaMeterShader!"); + + const IGPUShader::SSpecInfo fragSpec = { + .entryPoint = "main", + .shader = fragmentShader.get() + }; + + auto presentLayout = m_device->createPipelineLayout( + {}, + nullptr, + nullptr, + nullptr, + std::move(gpuLayouts[1]) + ); + m_presentPipeline = fsTriProtoPPln.createPipeline(fragSpec, presentLayout.get(), scRes->getRenderpass()); + if (!m_presentPipeline) + return logFail("Could not create Graphics Pipeline!"); + } + } + + // Allocate and create buffer for Luma Gather + { + // Allocate memory + m_gatherAllocation = {}; + { + auto build_buffer = [this]( + smart_refctd_ptr m_device, + nbl::video::IDeviceMemoryAllocator::SAllocation* allocation, + smart_refctd_ptr& buffer, + size_t buffer_size, + const char* label) + { + IGPUBuffer::SCreationParams params; + params.size = buffer_size; + params.usage = IGPUBuffer::EUF_STORAGE_BUFFER_BIT | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; + buffer = m_device->createBuffer(std::move(params)); + if (!buffer) + return logFail("Failed to create GPU buffer of size %d!\n", buffer_size); + + buffer->setObjectDebugName(label); + + auto reqs = buffer->getMemoryReqs(); + reqs.memoryTypeBits &= m_physicalDevice->getHostVisibleMemoryTypeBits(); + + *allocation = m_device->allocate(reqs, buffer.get(), IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT); + if (!allocation->isValid()) + return logFail("Failed to allocate Device Memory compatible with our GPU Buffer!\n"); + + assert(allocation->memory.get() == buffer->getBoundMemory().memory); + return true; + }; + + build_buffer(m_device, &m_gatherAllocation, m_gatherBuffer, m_physicalDevice->getLimits().maxSubgroupSize, "Luma Gather Buffer"); + } + m_gatherBDA = m_gatherBuffer->getDeviceAddress(); + + auto mapped_memory = m_gatherAllocation.memory->map({ 0ull, m_gatherAllocation.memory->getAllocationSize() }, IDeviceMemoryAllocation::EMCAF_READ); + if (!mapped_memory) + return logFail("Failed to map the Device Memory!\n"); + } + + // Load exr file into gpu + smart_refctd_ptr gpuImg; + { + auto convertImgCPU2GPU = [&](ICPUImage* cpuImg) + { + auto queue = getGraphicsQueue(); + auto cmdbuf = m_cmdBufs[0].get(); + cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); + std::array commandBufferInfo = { cmdbuf }; + core::smart_refctd_ptr imgFillSemaphore = m_device->createSemaphore(0); + imgFillSemaphore->setObjectDebugName("Image Fill Semaphore"); + + // We don't want to generate mip-maps for these images, to ensure that we must override the default callbacks. + struct SInputs final : CAssetConverter::SInputs + { + // we also need to override this to have concurrent sharing + inline std::span getSharedOwnershipQueueFamilies(const size_t groupCopyID, const asset::ICPUImage* buffer, const CAssetConverter::patch_t& patch) const override + { + if (familyIndices.size() > 1) + return familyIndices; + return {}; + } + + inline uint8_t getMipLevelCount(const size_t groupCopyID, const ICPUImage* image, const CAssetConverter::patch_t& patch) const override + { + return image->getCreationParameters().mipLevels; + } + inline uint16_t needToRecomputeMips(const size_t groupCopyID, const ICPUImage* image, const CAssetConverter::patch_t& patch) const override + { + return 0b0u; + } + + std::vector familyIndices; + } inputs = {}; + inputs.readCache = converter.get(); + inputs.logger = m_logger.get(); + { + const core::set uniqueFamilyIndices = { queue->getFamilyIndex(), queue->getFamilyIndex() }; + inputs.familyIndices = { uniqueFamilyIndices.begin(),uniqueFamilyIndices.end() }; + } + // scratch command buffers for asset converter transfer commands + SIntendedSubmitInfo transfer = { + .queue = queue, + .waitSemaphores = {}, + .prevCommandBuffers = {}, + .scratchCommandBuffers = commandBufferInfo, + .scratchSemaphore = { + .semaphore = imgFillSemaphore.get(), + .value = 0, + // because of layout transitions + .stageMask = PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS + } + }; + // as per the `SIntendedSubmitInfo` one commandbuffer must be begun + cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + // Normally we'd have to inherit and override the `getFinalOwnerQueueFamily` callback to ensure that the + // compute queue becomes the owner of the buffers and images post-transfer, but in this example we use concurrent sharing + CAssetConverter::SConvertParams params = {}; + params.transfer = &transfer; + params.utilities = m_utils.get(); + + std::get>(inputs.assets) = { &cpuImg, 1 }; + // assert that we don't need to provide patches + assert(cpuImg->getImageUsageFlags().hasFlags(ICPUImage::E_USAGE_FLAGS::EUF_SAMPLED_BIT)); + auto reservation = converter->reserve(inputs); + // the `.value` is just a funny way to make the `smart_refctd_ptr` copyable + auto gpuImgs = reservation.getGPUObjects(); + for (auto& gpuImg : gpuImgs) { + if (!gpuImg) { + m_logger->log("Failed to convert %s into an IGPUImage handle", ILogger::ELL_ERROR, DefaultImagePathsFile); + std::exit(-1); + } + } + + // and launch the conversions + m_api->startCapture(); + auto result = reservation.convert(params); + m_api->endCapture(); + if (!result.blocking() && result.copy() != IQueue::RESULT::SUCCESS) { + m_logger->log("Failed to record or submit conversions", ILogger::ELL_ERROR); + std::exit(-1); + } + + return gpuImgs[0].value; + }; + + smart_refctd_ptr cpuImg; + { + IAssetLoader::SAssetLoadParams lp; + SAssetBundle bundle = m_assetMgr->getAsset(DefaultImagePathsFile, lp); + if (bundle.getContents().empty()) { + m_logger->log("Couldn't load an asset.", ILogger::ELL_ERROR); + std::exit(-1); + } + + cpuImg = IAsset::castDown(bundle.getContents()[0]); + if (!cpuImg) { + m_logger->log("Couldn't load an asset.", ILogger::ELL_ERROR); + std::exit(-1); + } + }; + + gpuImg = convertImgCPU2GPU(cpuImg.get()); + } + + // create views for textures + { + auto createHDRIImage = [this](const asset::E_FORMAT colorFormat, const uint32_t width, const uint32_t height) -> smart_refctd_ptr { + IGPUImage::SCreationParams imgInfo; + imgInfo.format = colorFormat; + imgInfo.type = IGPUImage::ET_2D; + imgInfo.extent.width = width; + imgInfo.extent.height = height; + imgInfo.extent.depth = 1u; + imgInfo.mipLevels = 1u; + imgInfo.arrayLayers = 1u; + imgInfo.samples = IGPUImage::ESCF_1_BIT; + imgInfo.flags = static_cast(0u); + imgInfo.usage = asset::IImage::EUF_STORAGE_BIT | asset::IImage::EUF_TRANSFER_DST_BIT | asset::IImage::EUF_SAMPLED_BIT; + + auto image = m_device->createImage(std::move(imgInfo)); + auto imageMemReqs = image->getMemoryReqs(); + imageMemReqs.memoryTypeBits &= m_device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); + m_device->allocate(imageMemReqs, image.get()); + + return image; + }; + auto createHDRIImageView = [this](smart_refctd_ptr img) -> smart_refctd_ptr { + auto format = img->getCreationParameters().format; + IGPUImageView::SCreationParams imgViewInfo; + imgViewInfo.image = std::move(img); + imgViewInfo.format = format; + imgViewInfo.viewType = IGPUImageView::ET_2D; + imgViewInfo.flags = static_cast(0u); + imgViewInfo.subresourceRange.aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT; + imgViewInfo.subresourceRange.baseArrayLayer = 0u; + imgViewInfo.subresourceRange.baseMipLevel = 0u; + imgViewInfo.subresourceRange.layerCount = 1u; + imgViewInfo.subresourceRange.levelCount = 1u; + + return m_device->createImageView(std::move(imgViewInfo)); + }; + + auto params = gpuImg->getCreationParameters(); + auto extent = params.extent; + gpuImg->setObjectDebugName("GPU Img"); + m_gpuImgView = createHDRIImageView(gpuImg); + m_gpuImgView->setObjectDebugName("GPU Img View"); + auto outImg = createHDRIImage(asset::E_FORMAT::EF_R16G16B16A16_SFLOAT, Dimensions.x, Dimensions.y); + outImg->setObjectDebugName("Tonemapped Image"); + m_tonemappedImgView = createHDRIImageView(outImg); + m_tonemappedImgView->setObjectDebugName("Tonemapped Image View"); + } + + // transition m_tonemappedImgView to GENERAL + { + auto transitionSemaphore = m_device->createSemaphore(0); + auto queue = getGraphicsQueue(); + auto cmdbuf = m_cmdBufs[0].get(); + cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); + + m_api->startCapture(); + + cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + + // TRANSITION m_outImgView to GENERAL (because of descriptorSets0 -> ComputeShader Writes into the image) + { + const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, + .dstStageMask = PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS, + } + }, + .image = m_tonemappedImgView->getCreationParameters().image.get(), + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::UNDEFINED, + .newLayout = IImage::LAYOUT::GENERAL + } + }; + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers }); + } + cmdbuf->end(); + + const IQueue::SSubmitInfo::SSemaphoreInfo rendered[] = + { + { + .semaphore = transitionSemaphore.get(), + .value = 1, + .stageMask = PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS + } + }; + const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[] = + { + {.cmdbuf = cmdbuf } + }; + const IQueue::SSubmitInfo infos[] = + { + { + .waitSemaphores = {}, + .commandBuffers = commandBuffers, + .signalSemaphores = rendered + } + }; + queue->submit(infos); + const ISemaphore::SWaitInfo waits[] = { + { + .semaphore = transitionSemaphore.get(), + .value = 1 + } + }; + m_device->blockForSemaphores(waits); + m_api->endCapture(); + } + + // Update Descriptors + { + IGPUDescriptorSet::SDescriptorInfo infos[2]; + infos[0].info.combinedImageSampler.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + infos[0].desc = m_gpuImgView; + infos[1].info.image.imageLayout = IImage::LAYOUT::GENERAL; + infos[1].desc = m_tonemappedImgView; + + IGPUDescriptorSet::SWriteDescriptorSet writeDescriptors[] = { + { + .dstSet = m_imgSamplerDS.get(), + .binding = 0, + .arrayElement = 0, + .count = 1, + .info = infos + }, + { + .dstSet = m_rwImgDS.get(), + .binding = 0, + .arrayElement = 0, + .count = 1, + .info = infos + 1 + } + }; + + m_device->updateDescriptorSets(2, writeDescriptors, 0, nullptr); + } + + m_winMgr->setWindowSize(m_window.get(), Dimensions.x, Dimensions.y); + m_surface->recreateSwapchain(); + m_winMgr->show(m_window.get()); + + return true; + } + + // We do a very simple thing, display an image and wait `DisplayImageMs` to show it + inline void workLoopBody() override + { + const uint32_t SubgroupSize = m_physicalDevice->getLimits().maxSubgroupSize; + auto gpuImgExtent = m_gpuImgView->getCreationParameters().image->getCreationParameters().extent; + uint32_t2 viewportSize = { gpuImgExtent.width, gpuImgExtent.height }; + float32_t sampleCount = (viewportSize.x * viewportSize.y) / 4; + uint32_t workgroupSize = SubgroupSize * SubgroupSize; + sampleCount = workgroupSize * (1 + (sampleCount - 1) / workgroupSize); + + // Luma Meter + { + auto queue = getGraphicsQueue(); + auto cmdbuf = m_cmdBufs[0].get(); + cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); + auto ds = m_imgSamplerDS.get(); + + auto pc = AutoexposurePushData + { + .window = nbl::hlsl::luma_meter::MeteringWindow::create(MeteringWindowScale, MeteringWindowOffset), + .lumaMinMax = LumaMinMax, + .sampleCount = sampleCount, + .viewportSize = viewportSize, + .lumaMeterBDA = m_gatherBDA + }; + + const uint32_t2 dispatchSize = { + 1 + ((viewportSize.x / 2) - 1) / SubgroupSize, + 1 + ((viewportSize.y / 2) - 1) / SubgroupSize + }; + + m_api->startCapture(); + + cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + cmdbuf->bindComputePipeline(m_meterPipeline.get()); + cmdbuf->bindDescriptorSets(nbl::asset::EPBP_COMPUTE, m_meterPipeline->getLayout(), 0, 1, &ds); // also if you created DS Set with 3th index you need to respect it here - firstSet tells you the index of set and count tells you what range from this index it should update, useful if you had 2 DS with lets say set index 2,3, then you can bind both with single call setting firstSet to 2, count to 2 and last argument would be pointet to your DS pointers + cmdbuf->pushConstants(m_meterPipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(pc), &pc); + cmdbuf->dispatch(dispatchSize.x, dispatchSize.y); + cmdbuf->end(); + + { + IQueue::SSubmitInfo submit_infos[1]; + IQueue::SSubmitInfo::SCommandBufferInfo cmdBufs[] = { + { + .cmdbuf = cmdbuf + } + }; + submit_infos[0].commandBuffers = cmdBufs; + IQueue::SSubmitInfo::SSemaphoreInfo signals[] = { + { + .semaphore = m_meterSemaphore.get(), + .value = m_submitIx + 1, + .stageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT + } + }; + submit_infos[0].signalSemaphores = signals; + + queue->submit(submit_infos); + m_api->endCapture(); + } + + const ISemaphore::SWaitInfo wait_infos[] = { + { + .semaphore = m_meterSemaphore.get(), + .value = m_submitIx + 1 + } + }; + m_device->blockForSemaphores(wait_infos); + } + + // Luma Gather and Tonemapping + { + auto queue = getGraphicsQueue(); + auto cmdbuf = m_cmdBufs[0].get(); + cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); + auto ds1 = m_imgSamplerDS.get(); + auto ds2 = m_rwImgDS.get(); + + auto pc = AutoexposurePushData + { + .window = nbl::hlsl::luma_meter::MeteringWindow::create(MeteringWindowScale, MeteringWindowOffset), + .lumaMinMax = LumaMinMax, + .sampleCount = sampleCount, + .viewportSize = viewportSize, + .lumaMeterBDA = m_gatherBDA + }; + + const uint32_t2 dispatchSize = { + 1 + ((viewportSize.x) - 1) / SubgroupSize, + 1 + ((viewportSize.y) - 1) / SubgroupSize + }; + + m_api->startCapture(); + + cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + cmdbuf->bindComputePipeline(m_tonemapPipeline.get()); + cmdbuf->bindDescriptorSets(nbl::asset::EPBP_COMPUTE, m_tonemapPipeline->getLayout(), 0, 1, &ds1); + cmdbuf->bindDescriptorSets(nbl::asset::EPBP_COMPUTE, m_tonemapPipeline->getLayout(), 3, 1, &ds2); + cmdbuf->pushConstants(m_tonemapPipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(pc), &pc); + cmdbuf->dispatch(dispatchSize.x, dispatchSize.y); + cmdbuf->end(); + + { + IQueue::SSubmitInfo submit_infos[1]; + IQueue::SSubmitInfo::SCommandBufferInfo cmdBufs[] = { + { + .cmdbuf = cmdbuf + } + }; + submit_infos[0].commandBuffers = cmdBufs; + IQueue::SSubmitInfo::SSemaphoreInfo signals[] = { + { + .semaphore = m_tonemapSemaphore.get(), + .value = m_submitIx + 1, + .stageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT + } + }; + submit_infos[0].signalSemaphores = signals; + + queue->submit(submit_infos); + m_api->endCapture(); + } + + const ISemaphore::SWaitInfo wait_infos[] = { + { + .semaphore = m_tonemapSemaphore.get(), + .value = m_submitIx + 1 + } + }; + m_device->blockForSemaphores(wait_infos); + } + + // Render to swapchain + { + // Acquire + auto acquire = m_surface->acquireNextImage(); + if (!acquire) + return; + + auto queue = getGraphicsQueue(); + auto cmdbuf = m_cmdBufs[0].get(); + cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); + auto ds = m_rwImgDS.get(); + + auto pc = AutoexposurePushData + { + .window = nbl::hlsl::luma_meter::MeteringWindow::create(MeteringWindowScale, MeteringWindowOffset), + .lumaMinMax = LumaMinMax, + .sampleCount = sampleCount, + .viewportSize = viewportSize, + .lumaMeterBDA = m_gatherBDA + }; + + m_api->startCapture(); + + cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + + const VkRect2D currentRenderArea = + { + .offset = {0,0}, + .extent = { m_window->getWidth(), m_window->getHeight() } + }; + // set viewport + { + const asset::SViewport viewport = + { + .width = float32_t(m_window->getWidth()), + .height = float32_t(m_window->getHeight()) + }; + cmdbuf->setViewport({ &viewport, 1 }); + } + cmdbuf->setScissor({ ¤tRenderArea, 1 }); + + // begin the renderpass + { + const IGPUCommandBuffer::SClearColorValue clearValue = { .float32 = {1.f,0.f,1.f,1.f} }; + auto scRes = static_cast(m_surface->getSwapchainResources()); + const IGPUCommandBuffer::SRenderpassBeginInfo info = { + .framebuffer = scRes->getFramebuffer(acquire.imageIndex), + .colorClearValues = &clearValue, + .depthStencilClearValues = nullptr, + .renderArea = currentRenderArea + }; + cmdbuf->beginRenderPass(info, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); + } + + cmdbuf->bindGraphicsPipeline(m_presentPipeline.get()); + cmdbuf->bindDescriptorSets(nbl::asset::EPBP_GRAPHICS, m_presentPipeline->getLayout(), 3, 1, &ds); + ext::FullScreenTriangle::recordDrawCall(cmdbuf); + cmdbuf->endRenderPass(); + + cmdbuf->end(); + + // submit + const IQueue::SSubmitInfo::SSemaphoreInfo rendered[1] = { { + .semaphore = m_presentSemaphore.get(), + .value = m_submitIx + 1, + // just as we've outputted all pixels, signal + .stageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT + } }; + { + const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[1] = { { + .cmdbuf = cmdbuf + } }; + // we don't need to wait for the transfer semaphore, because we submit everything to the same queue + const IQueue::SSubmitInfo::SSemaphoreInfo acquired[1] = { { + .semaphore = acquire.semaphore, + .value = acquire.acquireCount, + .stageMask = PIPELINE_STAGE_FLAGS::NONE + } }; + const IQueue::SSubmitInfo infos[1] = { { + .waitSemaphores = acquired, + .commandBuffers = commandBuffers, + .signalSemaphores = rendered + } }; + + queue->submit(infos); + } + + // Present + m_surface->present(acquire.imageIndex, rendered); + m_api->endCapture(); + + // Wait for completion + { + const ISemaphore::SWaitInfo cmdbufDonePending[] = { + { + .semaphore = m_presentSemaphore.get(), + .value = m_submitIx + 1 + } + }; + if (m_device->blockForSemaphores(cmdbufDonePending) != ISemaphore::WAIT_RESULT::SUCCESS) + return; + } + } + m_submitIx++; + } + + inline bool keepRunning() override + { + // Keep arunning as long as we have a surface to present to (usually this means, as long as the window is open) + if (m_surface->irrecoverable()) + return false; + + return true; + } + + inline bool onAppTerminated() override + { + return device_base_t::onAppTerminated(); + } + +protected: + // window + smart_refctd_ptr m_window; + smart_refctd_ptr> m_surface; + + // Pipelines + smart_refctd_ptr m_meterPipeline, m_tonemapPipeline; + smart_refctd_ptr m_presentPipeline; + + // Descriptor Sets + smart_refctd_ptr m_imgSamplerDS, m_rwImgDS; + + // Command Buffers + smart_refctd_ptr m_cmdPool; + std::array, 1> m_cmdBufs; + + // Semaphores + smart_refctd_ptr m_meterSemaphore, m_tonemapSemaphore, m_presentSemaphore; + uint64_t m_submitIx = 0; + + // example resources + smart_refctd_ptr m_gatherBuffer; + nbl::video::IDeviceMemoryAllocator::SAllocation m_gatherAllocation; + uint64_t m_gatherBDA; + smart_refctd_ptr m_gpuImgView, m_tonemappedImgView; +}; + +NBL_MAIN_FUNC(AutoexposureApp) + +#if 0 + +// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#define _NBL_STATIC_LIB_ +#include +#include +#include + + +#include "nbl/ext/ToneMapper/CToneMapper.h" + +#include "../common/QToQuitEventReceiver.h" + +using namespace nbl; +using namespace nbl::core; +using namespace nbl::asset; +using namespace nbl::video; + + +int main() +{ + nbl::SIrrlichtCreationParameters deviceParams; + deviceParams.Bits = 24; //may have to set to 32bit for some platforms + deviceParams.ZBufferBits = 24; //we'd like 32bit here + deviceParams.DriverType = EDT_OPENGL; //! Only Well functioning driver, software renderer left for sake of 2D image drawing + deviceParams.WindowSize = dimension2d(1280, 720); + deviceParams.Fullscreen = false; + deviceParams.Vsync = true; //! If supported by target platform + deviceParams.Doublebuffer = true; + deviceParams.Stencilbuffer = false; //! This will not even be a choice soon + + auto device = createDeviceEx(deviceParams); + if (!device) + return 1; // could not create selected driver. + + using LumaMeterClass = ext::LumaMeter::CLumaMeter; + constexpr auto MeterMode = LumaMeterClass::EMM_MEDIAN; + const float minLuma = 1.f/2048.f; + const float maxLuma = 65536.f; + + auto cpuLumaMeasureSpecializedShader = LumaMeterClass::createShader(glslCompiler,inputColorSpace,MeterMode,minLuma,maxLuma); + auto gpuLumaMeasureShader = driver->createShader(smart_refctd_ptr(cpuLumaMeasureSpecializedShader->getUnspecialized())); + auto gpuLumaMeasureSpecializedShader = driver->createSpecializedShader(gpuLumaMeasureShader.get(), cpuLumaMeasureSpecializedShader->getSpecializationInfo()); + + const float meteringMinUV[2] = { 0.1f,0.1f }; + const float meteringMaxUV[2] = { 0.9f,0.9f }; + LumaMeterClass::Uniforms_t uniforms; + auto lumaDispatchInfo = LumaMeterClass::buildParameters(uniforms, outImg->getCreationParameters().extent, meteringMinUV, meteringMaxUV); + + auto uniformBuffer = driver->createFilledDeviceLocalBufferOnDedMem(sizeof(uniforms),&uniforms); + + + using ToneMapperClass = ext::ToneMapper::CToneMapper; + constexpr auto TMO = ToneMapperClass::EO_ACES; + constexpr bool usingLumaMeter = MeterModegetGLSLCompiler(), + inputColorSpace, + std::make_tuple(outFormat,ECP_SRGB,OETF_sRGB), + TMO,usingLumaMeter,MeterMode,minLuma,maxLuma,usingTemporalAdapatation + ); + auto gpuTonemappingShader = driver->createShader(smart_refctd_ptr(cpuTonemappingSpecializedShader->getUnspecialized())); + auto gpuTonemappingSpecializedShader = driver->createSpecializedShader(gpuTonemappingShader.get(),cpuTonemappingSpecializedShader->getSpecializationInfo()); + + auto outImgStorage = ToneMapperClass::createViewForImage(driver,false,core::smart_refctd_ptr(outImg),{static_cast(0u),0,1,0,1}); + + auto parameterBuffer = driver->createDeviceLocalGPUBufferOnDedMem(ToneMapperClass::getParameterBufferSize()); + constexpr float Exposure = 0.f; + constexpr float Key = 0.18; + auto params = ToneMapperClass::Params_t(Exposure, Key, 0.85f); + { + params.setAdaptationFactorFromFrameDelta(0.f); + driver->updateBufferRangeViaStagingBuffer(parameterBuffer.get(),0u,sizeof(params),¶ms); + } + + auto commonPipelineLayout = ToneMapperClass::getDefaultPipelineLayout(driver,usingLumaMeter); + + auto lumaMeteringPipeline = driver->createComputePipeline(nullptr,core::smart_refctd_ptr(commonPipelineLayout),std::move(gpuLumaMeasureSpecializedShader)); + auto toneMappingPipeline = driver->createComputePipeline(nullptr,core::smart_refctd_ptr(commonPipelineLayout),std::move(gpuTonemappingSpecializedShader)); + + auto commonDescriptorSet = driver->createDescriptorSet(core::smart_refctd_ptr(commonPipelineLayout->getDescriptorSetLayout(0u))); + ToneMapperClass::updateDescriptorSet(driver,commonDescriptorSet.get(),parameterBuffer,imgToTonemapView,outImgStorage,1u,2u,usingLumaMeter ? 3u:0u,uniformBuffer,0u,usingTemporalAdapatation); + + + constexpr auto dynOffsetArrayLen = usingLumaMeter ? 2u : 1u; + + auto lumaDynamicOffsetArray = core::make_refctd_dynamic_array >(dynOffsetArrayLen,0u); + lumaDynamicOffsetArray->back() = sizeof(ToneMapperClass::Params_t); + + auto toneDynamicOffsetArray = core::make_refctd_dynamic_array >(dynOffsetArrayLen,0u); + + + auto blitFBO = driver->addFrameBuffer(); + blitFBO->attach(video::EFAP_COLOR_ATTACHMENT0, std::move(outImgView)); + + uint32_t outBufferIx = 0u; + auto lastPresentStamp = std::chrono::high_resolution_clock::now(); + while (device->run() && receiver.keepOpen()) + { + driver->beginScene(false, false); + + driver->bindComputePipeline(lumaMeteringPipeline.get()); + driver->bindDescriptorSets(EPBP_COMPUTE,commonPipelineLayout.get(),0u,1u,&commonDescriptorSet.get(),&lumaDynamicOffsetArray); + driver->pushConstants(commonPipelineLayout.get(),IGPUSpecializedShader::ESS_COMPUTE,0u,sizeof(outBufferIx),&outBufferIx); outBufferIx ^= 0x1u; + LumaMeterClass::dispatchHelper(driver,lumaDispatchInfo,true); + + driver->bindComputePipeline(toneMappingPipeline.get()); + driver->bindDescriptorSets(EPBP_COMPUTE,commonPipelineLayout.get(),0u,1u,&commonDescriptorSet.get(),&toneDynamicOffsetArray); + ToneMapperClass::dispatchHelper(driver,outImgStorage.get(),true); + + driver->blitRenderTargets(blitFBO, nullptr, false, false); + + driver->endScene(); + if (usingTemporalAdapatation) + { + auto thisPresentStamp = std::chrono::high_resolution_clock::now(); + auto microsecondsElapsedBetweenPresents = std::chrono::duration_cast(thisPresentStamp-lastPresentStamp); + lastPresentStamp = thisPresentStamp; + + params.setAdaptationFactorFromFrameDelta(float(microsecondsElapsedBetweenPresents.count())/1000000.f); + // dont override shader output + constexpr auto offsetPastLumaHistory = offsetof(decltype(params),lastFrameExtraEVAsHalf)+sizeof(decltype(params)::lastFrameExtraEVAsHalf); + auto* paramPtr = reinterpret_cast(¶ms); + driver->updateBufferRangeViaStagingBuffer(parameterBuffer.get(), offsetPastLumaHistory, sizeof(params)-offsetPastLumaHistory, paramPtr+offsetPastLumaHistory); + } + } + + return 0; +} + +#endif \ No newline at end of file diff --git a/23_Autoexposure/pipeline.groovy b/26_Autoexposure/pipeline.groovy similarity index 100% rename from 23_Autoexposure/pipeline.groovy rename to 26_Autoexposure/pipeline.groovy diff --git a/CMakeLists.txt b/CMakeLists.txt index d840850a6..85332594d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -62,7 +62,8 @@ if(NBL_BUILD_EXAMPLES) add_subdirectory(23_ArithmeticUnitTest EXCLUDE_FROM_ALL) add_subdirectory(24_ColorSpaceTest EXCLUDE_FROM_ALL) add_subdirectory(25_FilterTest EXCLUDE_FROM_ALL) - add_subdirectory(27_MPMCScheduler EXCLUDE_FROM_ALL) + add_subdirectory(26_Autoexposure EXCLUDE_FROM_ALL) + add_subdirectory(27_MPMCScheduler EXCLUDE_FROM_ALL) add_subdirectory(28_FFTBloom EXCLUDE_FROM_ALL) # add_subdirectory(36_CUDAInterop EXCLUDE_FROM_ALL)