diff --git a/CMakeLists.txt b/CMakeLists.txt index b5518522f6..1799619c34 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -189,8 +189,12 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") set_property(TARGET all-targets PROPERTY FOLDER Misc) set_property(TARGET AMDGPU PROPERTY FOLDER Misc) - set_property(TARGET benchmark PROPERTY FOLDER Misc) - set_property(TARGET benchmark_main PROPERTY FOLDER Misc) + if (TARGET benchmark) + set_property(TARGET benchmark PROPERTY FOLDER Misc) + endif() + if (TARGET benchmark_main) + set_property(TARGET benchmark_main PROPERTY FOLDER Misc) + endif() set_property(TARGET distribution PROPERTY FOLDER Misc) set_property(TARGET Engine PROPERTY FOLDER Misc) set_property(TARGET install-distribution PROPERTY FOLDER Misc) @@ -210,8 +214,14 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") set_property(TARGET CONFIGURE_LLVM_NATIVE PROPERTY FOLDER Misc) set_property(TARGET CREATE_LLVM_NATIVE PROPERTY FOLDER Misc) endif() +#if _WIN32 + if (MSVC) + # We can't use LLVM_OPTIMIZED_TABLEGEN on Windows, and the 32-bit llvm-tblgen can easily + # to run out of memory. Tell the linker to allow addresses larger than 2GB. + set_property(TARGET llvm-tblgen PROPERTY LINK_FLAGS "/LARGEADDRESSAWARE") + endif() +#endif if (LLPC_BUILD_TESTS) - set_property(TARGET check-all PROPERTY FOLDER Tests) if(NOT LLPC_IS_STANDALONE) set_property(TARGET check-amber PROPERTY FOLDER "LLPC Tests") endif() diff --git a/compilerutils/CMakeLists.txt b/compilerutils/CMakeLists.txt index 588e02e7f8..1aae6b3b59 100644 --- a/compilerutils/CMakeLists.txt +++ b/compilerutils/CMakeLists.txt @@ -12,8 +12,10 @@ function(set_compiler_options PROJECT_NAME) endfunction() add_llvm_library(LLVMCompilerUtils + lib/ArgPromotion.cpp lib/CompilerUtils.cpp lib/TypeLowering.cpp + lib/TypesMetadata.cpp DEPENDS intrinsics_gen diff --git a/llvmraytracing/include/llvmraytracing/TypesMetadata.h b/compilerutils/include/compilerutils/ArgPromotion.h similarity index 61% rename from llvmraytracing/include/llvmraytracing/TypesMetadata.h rename to compilerutils/include/compilerutils/ArgPromotion.h index e1db8e80d7..736fef2dc4 100644 --- a/llvmraytracing/include/llvmraytracing/TypesMetadata.h +++ b/compilerutils/include/compilerutils/ArgPromotion.h @@ -10,8 +10,8 @@ * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * - * The above copyright notice and this permission notice shall be included in - *all copies or substantial portions of the Software. + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, @@ -23,27 +23,25 @@ * **********************************************************************************************************************/ -//===- TypesMetadata.h - Pointee type metadata for processing DXIL ---------==// +// Promotion of pointer args to by-value. #pragma once -#include "llvm/Bitcode/BitcodeReader.h" - namespace llvm { +class Function; +class SmallBitVector; +} // namespace llvm -/// Return element type of a function argument resolving opaque pointers -/// via !types metadata where appropriate. -/// Returns nullptr for non-pointers. -Type *getFuncArgPtrElementType(const Argument *Arg); +namespace CompilerUtils { -/// Return element type of a function argument resolving opaque pointers -/// via !types metadata where appropriate. -/// Returns nullptr for non-pointers. -Type *getFuncArgPtrElementType(const Function *F, int ArgNo); +// Replace struct return type with its first element type. +llvm::Function *unpackStructReturnType(llvm::Function *Fn); -/// LLVM parser callback which adds !types metadata during DXIL parsing -void DXILValueTypeMetadataCallback(Value *V, unsigned TypeID, - GetTypeByIDTy GetTypeByID, - GetContainedTypeIDTy GetContainedTypeID); +// Turn StructRet argument into return type, modifying pointee type metadata as appropriate. +llvm::Function *lowerStructRetArgument(llvm::Function *Fn); -} // namespace llvm +// Promote pointer (by-ref) arguments to by-value, according to PromotionMask +// and using pointee type metadata. +llvm::Function *promotePointerArguments(llvm::Function *Fn, const llvm::SmallBitVector &PromotionMask); + +} // namespace CompilerUtils diff --git a/compilerutils/include/compilerutils/CompilerUtils.h b/compilerutils/include/compilerutils/CompilerUtils.h index 506dcad925..745d6adb5d 100644 --- a/compilerutils/include/compilerutils/CompilerUtils.h +++ b/compilerutils/include/compilerutils/CompilerUtils.h @@ -129,7 +129,6 @@ class CrossModuleInliner { // The caller has to handle the erasure afterwards. void replaceAllPointerUses(llvm::IRBuilder<> *builder, llvm::Value *oldPointerValue, llvm::Value *newPointerValue, llvm::SmallVectorImpl &toBeRemoved); - } // namespace CompilerUtils namespace llvm { @@ -141,6 +140,31 @@ namespace llvm { // fixed. PointerType *getWithSamePointeeType(PointerType *ptrTy, unsigned addressSpace); +/// Free-standing helpers. + +// Helper to visit all calls of a function. +// Expected type for Callback: +// void(CallInst &) +template void forEachCall(Function &F, CallbackTy Callback) { + static_assert(std::is_invocable_v); + for (auto &Use : make_early_inc_range(F.uses())) { + if (auto *CInst = dyn_cast(Use.getUser())) + if (CInst->isCallee(&Use)) + Callback(*CInst); + } +} + +// For each basic block in Func, find the terminator. If it is contained in +// TerminatorOpcodes, then apply the callback on the terminator. +template >> +void forEachTerminator(Function *Func, ArrayRef TerminatorOpcodes, CallbackTy Callback) { + for (auto &BB : *Func) { + auto *Terminator = BB.getTerminator(); + if (llvm::find(TerminatorOpcodes, Terminator->getOpcode()) != TerminatorOpcodes.end()) + Callback(*Terminator); + } +} + } // namespace llvm #endif diff --git a/compilerutils/include/compilerutils/LoweringPointerTupleMap.h b/compilerutils/include/compilerutils/LoweringPointerTupleMap.h index 0da24faa5c..15fa207471 100644 --- a/compilerutils/include/compilerutils/LoweringPointerTupleMap.h +++ b/compilerutils/include/compilerutils/LoweringPointerTupleMap.h @@ -37,7 +37,7 @@ #include #include -namespace compilerutils { +namespace CompilerUtils { /// @brief A key-value map from pointer keys to tuples of pointers that is optimized for value and type lowering uses /// @@ -252,4 +252,4 @@ template class LoweringPoint } }; -} // namespace compilerutils +} // namespace CompilerUtils diff --git a/compilerutils/include/compilerutils/TypeLowering.h b/compilerutils/include/compilerutils/TypeLowering.h index b3c1b42d2a..4782a022a2 100644 --- a/compilerutils/include/compilerutils/TypeLowering.h +++ b/compilerutils/include/compilerutils/TypeLowering.h @@ -61,6 +61,8 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/IR/IRBuilder.h" +namespace CompilerUtils { + class TypeLowering; /// Given a type, check if it should be replaced. @@ -169,14 +171,16 @@ class TypeLowering { llvm::SmallVector> m_constantRules; /// Cache mappings of types (including no-op mappings). - compilerutils::LoweringPointerTupleMap m_typeConversions; + CompilerUtils::LoweringPointerTupleMap m_typeConversions; llvm::IRBuilder<> m_builder; /// Map original values to type-converted values. - compilerutils::LoweringPointerTupleMap m_valueMap; + CompilerUtils::LoweringPointerTupleMap m_valueMap; std::vector>> m_phis; std::vector m_instructionsToErase; llvm::SmallVector m_functionsToErase; }; + +} // namespace CompilerUtils diff --git a/compilerutils/include/compilerutils/TypesMetadata.h b/compilerutils/include/compilerutils/TypesMetadata.h new file mode 100644 index 0000000000..2e319de7c2 --- /dev/null +++ b/compilerutils/include/compilerutils/TypesMetadata.h @@ -0,0 +1,117 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + +//===- TypesMetadata.h - Pointee type metadata for processing DXIL ---------==// + +#pragma once + +#include "llvm/Bitcode/BitcodeReader.h" + +namespace llvm { + +class MDTuple; + +// A function argument type and pointee type. +class TypedArgTy { +private: + Type *ArgTy = nullptr; + Type *ElemTy = nullptr; + +public: + TypedArgTy() {} + TypedArgTy(Type *Arg); + TypedArgTy(Type *Arg, Type *Elem); + + static TypedArgTy get(const Argument *Arg); + static TypedArgTy get(const Function *F, const unsigned ArgNo); + + Type *asType() const { return ArgTy; } + Type *getPointerElementType() const; + + bool isPointerTy() const; + bool isVoidTy() const; + Metadata *getTypeMetadata(LLVMContext &Context); + + bool operator==(const TypedArgTy &RHS) const { return (ArgTy == RHS.ArgTy) && (ElemTy == RHS.ElemTy); } +}; + +// A wrapper round FunctionType and metadata for the pointee type(s) of any pointer return type and parameters. +class TypedFuncTy { +public: + TypedFuncTy() {} + + // Construct a TypedFuncTy for the given result type and arg types. + // This constructs the !pointeetys metadata; that can then be attached to a function + // using writeMetadata(). + TypedFuncTy(TypedArgTy ResultTy, ArrayRef ArgTys); + + // Get a TypedFuncTy for the given Function, looking up the !pointeetys metadata. + static TypedFuncTy get(const Function *F); + + // Get the IR FunctionType. + FunctionType *asFunctionType() const { return FuncTy; } + + // Get a TypedArgTy for the return type. + TypedArgTy getReturnType() const; + + // Get a TypedArgTy for a parameter type. + TypedArgTy getParamType(unsigned Idx) const; + + // Push a TypedArgTy for each parameter onto the supplied vector. + void getParamTypes(SmallVectorImpl &ArgTys) const; + + // Write the metadata (if any) onto the specified function. Typically used when creating a new function + // and using our constructor that takes TypedArgTy for return type and arg types. + void writeMetadata(Function *F) const; + + static constexpr const char *MDTypesName = "pointeetys"; + +private: + // Shared code for getReturnType and getParamType. This decodes the !pointeetys metadata. + Type *getPointeeType(Type *Ty, unsigned Idx) const; + + FunctionType *FuncTy = nullptr; + MDTuple *Meta = nullptr; +}; + +/// Return element type of a function argument resolving opaque pointers +/// via !pointeetys metadata where appropriate. +/// Returns nullptr for non-pointers. +Type *getFuncArgPtrElementType(const Argument *Arg); + +/// Return element type of a function argument resolving opaque pointers +/// via !pointeetys metadata where appropriate. +/// Returns nullptr for non-pointers. +Type *getFuncArgPtrElementType(const Function *F, int ArgNo); + +/// Get element type of function return type resolving opaque pointers +/// via !pointeetys metadata where appropriate. +Type *getFuncReturnPtrElementType(const Function *F); + +/// LLVM parser callback which adds !pointeetys metadata during DXIL parsing +void DXILValueTypeMetadataCallback(Value *V, unsigned TypeID, GetTypeByIDTy GetTypeByID, + GetContainedTypeIDTy GetContainedTypeID); + +} // namespace llvm diff --git a/compilerutils/lib/ArgPromotion.cpp b/compilerutils/lib/ArgPromotion.cpp new file mode 100644 index 0000000000..a8bd2b06e7 --- /dev/null +++ b/compilerutils/lib/ArgPromotion.cpp @@ -0,0 +1,224 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + +// Promotion of pointer args to by-value. + +#include "compilerutils/ArgPromotion.h" +#include "compilerutils/CompilerUtils.h" +#include "compilerutils/TypesMetadata.h" +#include "llvm/ADT/SmallBitVector.h" +#include "llvm/IR/IRBuilder.h" + +using namespace llvm; + +static Function *cloneFunctionHeaderWithTypes(Function &F, TypedFuncTy NewType, AttributeList FnAttr) { + FunctionType *FuncTy = NewType.asFunctionType(); + Function *NewFunc = CompilerUtils::cloneFunctionHeader(F, FuncTy, FnAttr); + NewType.writeMetadata(NewFunc); + return NewFunc; +} + +/// Copy the function body from the old function. +static Function *cloneFunctionWithTypes(Function *Fn, TypedFuncTy NewFnTy, AttributeList FnAttrs) { + // Erase outdated types metadata to avoid being propagated to the new + // function. + Fn->eraseMetadata(Fn->getContext().getMDKindID(TypedFuncTy::MDTypesName)); + Function *NewFn = cloneFunctionHeaderWithTypes(*Fn, NewFnTy, FnAttrs); + NewFn->splice(NewFn->begin(), Fn); + NewFn->takeName(Fn); + Fn->replaceAllUsesWith(ConstantExpr::getBitCast(NewFn, Fn->getType())); + return NewFn; +} + +/// Unpack the return (struct) type of the input function, which means change +/// the return type to its first element type. This may generate invalid IR in +/// general, call this with extra caution. +Function *CompilerUtils::unpackStructReturnType(Function *Fn) { + auto *RetTy = Fn->getReturnType(); + assert(RetTy->isStructTy()); + auto *NewRetTy = RetTy->getStructElementType(0); + + SmallVector ArgTys; + TypedFuncTy::get(Fn).getParamTypes(ArgTys); + TypedFuncTy NewFnTy(NewRetTy, ArgTys); + auto *NewFn = cloneFunctionWithTypes(Fn, NewFnTy, Fn->getAttributes()); + llvm::forEachCall(*NewFn, [&](CallInst &Call) { + // Update callee function type. + Call.setCalledFunction(NewFn); + }); + + // Copy argument names and replace argument uses. + for (const auto &[OldArg, NewArg] : llvm::zip(Fn->args(), NewFn->args())) { + NewArg.setName(OldArg.getName()); + if (!NewFn->isDeclaration()) + OldArg.replaceAllUsesWith(&NewArg); + } + IRBuilder<> B(Fn->getContext()); + llvm::forEachTerminator(NewFn, {Instruction::Ret}, [&](Instruction &Terminator) { + B.SetInsertPoint(&Terminator); + Value *RetExtractVal = B.CreateExtractValue(Terminator.getOperand(0), {0}); + B.CreateRet(RetExtractVal); + Terminator.eraseFromParent(); + }); + Fn->eraseFromParent(); + return NewFn; +} + +// Turn `StructRet` argument into more canonical return statement. +Function *CompilerUtils::lowerStructRetArgument(Function *Fn) { + assert(Fn->getReturnType()->isVoidTy()); + auto *RetArg = Fn->getArg(0); + if (!RetArg->hasStructRetAttr()) + RetArg = Fn->getArg(1); + assert(RetArg->hasStructRetAttr()); + unsigned RetArgIdx = RetArg->getArgNo(); + Type *RetTy = RetArg->getParamStructRetType(); + + AttributeList FnAttrs = Fn->getAttributes(); + SmallVector ArgAttrs; + SmallVector NewArgTys; + SmallVector OldArgTys; + TypedFuncTy::get(Fn).getParamTypes(OldArgTys); + for (unsigned Idx = 0; Idx < Fn->arg_size(); Idx++) { + if (Idx != RetArgIdx) { + ArgAttrs.push_back(FnAttrs.getParamAttrs(Idx)); + NewArgTys.push_back(OldArgTys[Idx]); + } + } + + TypedFuncTy NewFnTy(RetTy, NewArgTys); + auto NewFnAttr = AttributeList::get(Fn->getContext(), FnAttrs.getFnAttrs(), FnAttrs.getRetAttrs(), ArgAttrs); + Function *NewFn = cloneFunctionWithTypes(Fn, NewFnTy, NewFnAttr); + + IRBuilder<> B(Fn->getContext()); + llvm::forEachCall(*NewFn, [&](CallInst &Call) { + B.SetInsertPoint(&Call); + Value *StructRetArg = nullptr; + SmallVector Args; + for (const auto &[Idx, Arg] : llvm::enumerate(Call.args())) { + if (Idx == RetArgIdx) { + StructRetArg = Arg; + continue; + } + Args.push_back(Arg); + } + auto *NewRet = B.CreateCall(NewFn, Args); + B.CreateStore(NewRet, StructRetArg); + Call.eraseFromParent(); + }); + + // Copy argument names and replace argument uses. + for (const auto &[ArgNo, NewArg] : llvm::enumerate(NewFn->args())) { + auto *OldArg = Fn->getArg(ArgNo >= RetArgIdx ? ArgNo + 1 : ArgNo); + NewArg.setName(OldArg->getName()); + if (!NewFn->isDeclaration()) + OldArg->replaceAllUsesWith(&NewArg); + } + + if (!NewFn->isDeclaration()) { + B.SetInsertPointPastAllocas(NewFn); + auto *RetAlloca = B.CreateAlloca(RetTy); + RetArg->replaceAllUsesWith(RetAlloca); + + // Replace returns with return value + llvm::forEachTerminator(NewFn, {Instruction::Ret}, [&](Instruction &Terminator) { + B.SetInsertPoint(&Terminator); + Value *RetLoad = B.CreateLoad(RetTy, RetAlloca); + B.CreateRet(RetLoad); + Terminator.eraseFromParent(); + }); + } + Fn->eraseFromParent(); + return NewFn; +} + +/// Promote pointer argument type to its value type if the corresponding bit in +/// `PromotionMask` is being set. +Function *CompilerUtils::promotePointerArguments(Function *Fn, const SmallBitVector &PromotionMask) { + SmallVector ArgTys; + SmallVector ParamAttrs; + + // Do nothing if the promotion mask is zero. + if (PromotionMask.none()) + return Fn; + + auto FnAttrs = Fn->getAttributes(); + // The function might not have types metadata, in that + // case nothing needs to be done. + if (!Fn->getMetadata(TypedFuncTy::MDTypesName)) + return Fn; + + for (const auto &[ArgNo, Arg] : llvm::enumerate(Fn->args())) { + TypedArgTy ArgTy = TypedArgTy::get(&Arg); + + // Promote the pointer type to its value type if the bit in `PromotionMask` + // is set. + if (PromotionMask[ArgNo]) { + assert(ArgTy.isPointerTy()); + ArgTys.push_back(ArgTy.getPointerElementType()); + ParamAttrs.push_back({}); + continue; + } + ArgTys.push_back(ArgTy); + ParamAttrs.push_back(FnAttrs.getParamAttrs(ArgNo)); + } + + TypedFuncTy NewFuncTy(TypedFuncTy::get(Fn).getReturnType(), ArgTys); + auto NewFnAttr = AttributeList::get(Fn->getContext(), FnAttrs.getFnAttrs(), FnAttrs.getRetAttrs(), ParamAttrs); + auto *NewFn = cloneFunctionWithTypes(Fn, NewFuncTy, NewFnAttr); + + IRBuilder<> B(Fn->getContext()); + // Change argument types at call sites. + llvm::forEachCall(*NewFn, [&](CallInst &Call) { + B.SetInsertPoint(&Call); + for (const auto &[ArgNo, ArgPair] : llvm::enumerate(llvm::zip(Call.args(), NewFn->args()))) { + auto &CallArg = std::get<0>(ArgPair); + auto &NewArg = std::get<1>(ArgPair); + if (CallArg->getType() != NewArg.getType()) { + auto *NewOp = B.CreateLoad(NewArg.getType(), CallArg); + Call.setArgOperand(ArgNo, NewOp); + } + } + // Update Callee function type. + Call.setCalledFunction(NewFn); + }); + + // Replace argument uses. + for (const auto &[OldArg, NewArg] : llvm::zip(Fn->args(), NewFn->args())) { + Value *NewValue = &NewArg; + NewArg.setName(OldArg.getName()); + if (!NewFn->isDeclaration()) { + if (NewArg.getType() != OldArg.getType()) { + B.SetInsertPointPastAllocas(NewFn); + auto *ArgAlloca = B.CreateAlloca(NewArg.getType()); + B.CreateStore(&NewArg, ArgAlloca); + NewValue = ArgAlloca; + } + OldArg.replaceAllUsesWith(NewValue); + } + } + Fn->eraseFromParent(); + return NewFn; +} diff --git a/compilerutils/lib/CompilerUtils.cpp b/compilerutils/lib/CompilerUtils.cpp index 8bf7ca653d..9cbc790228 100644 --- a/compilerutils/lib/CompilerUtils.cpp +++ b/compilerutils/lib/CompilerUtils.cpp @@ -421,6 +421,10 @@ void CompilerUtils::replaceAllPointerUses(IRBuilder<> *builder, Value *oldPointe return getWithSamePointeeType(ptrTy, newAS); }; +#ifndef NDEBUG + DenseSet PhiElems; +#endif + while (!worklist.empty()) { Use *ptrUse = worklist.pop_back_val(); Value *ptr = cast(ptrUse); @@ -501,8 +505,42 @@ void CompilerUtils::replaceAllPointerUses(IRBuilder<> *builder, Value *oldPointe } break; } + case Instruction::PHI: { + auto *oldType = inst->getType(); + if (oldType->isPointerTy()) { +#ifndef NDEBUG + // Check that all inputs to the phi are handled + if (!PhiElems.erase(ptr)) { + // Was not in the map, so add the other elements + for (auto &incoming : cast(inst)->incoming_values()) { + if (incoming.get() != ptr) { + PhiElems.insert(incoming.get()); + } + } + } +#endif + + Type *newType = getMutatedPtrTy(oldType); + // No further processing if the type has the correct pointer type + if (newType == oldType) + continue; + + inst->mutateType(newType); + } + break; + } } worklist.append(usesRange.begin(), usesRange.end()); } + +#ifndef NDEBUG + if (!PhiElems.empty()) { + errs() << "Unhandled inputs to phi: "; + for (auto *phi : PhiElems) { + phi->dump(); + } + } + assert(PhiElems.empty() && "All phi inputs need to be handled, otherwise we end in an inconsistent state"); +#endif } diff --git a/compilerutils/lib/TypeLowering.cpp b/compilerutils/lib/TypeLowering.cpp index 36f2db1cad..0346c9794b 100644 --- a/compilerutils/lib/TypeLowering.cpp +++ b/compilerutils/lib/TypeLowering.cpp @@ -28,6 +28,7 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Instructions.h" +using namespace CompilerUtils; using namespace llvm; namespace { diff --git a/compilerutils/lib/TypesMetadata.cpp b/compilerutils/lib/TypesMetadata.cpp new file mode 100644 index 0000000000..23f0e01004 --- /dev/null +++ b/compilerutils/lib/TypesMetadata.cpp @@ -0,0 +1,244 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + +//===- TypesMetadata.cpp - Generators, decoders and wrappers for metadata --==// +// +// This file implements metadata functions for the DXIL continuations +// +//===----------------------------------------------------------------------===// + +// The metadata format is as follows: +// +// A function that has pointer return type or param type has !pointeetys metadata attached, which is a tuple. +// There are two formats, the simple format and the general format. +// +// Simple format, used if there is no more than one pointer param: +// - If the return type is a pointer, an entry for it. +// - If there is a pointer param, an entry for it. +// So the simple format could be one entry (either return type or a single param) or two entries (return type +// plus single param). +// +// General format, used if there is more than one pointer param: +// - An entry for the return type (null if it is not a pointer) +// - An entry for each parameter (null if it is not a pointer). +// Trailing null entries are truncated from the tuple. +// +// In either format, each entry is a poison value of the pointee type, or (for the general format) null +// if the corresponding return type or param is not a pointer. + +#include "compilerutils/TypesMetadata.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" + +using namespace llvm; + +TypedArgTy::TypedArgTy(Type *Arg) { + assert(!Arg->isPointerTy() && "pointers are not supported by this constructor"); + ArgTy = Arg; + ElemTy = nullptr; +} + +TypedArgTy::TypedArgTy(Type *Arg, Type *Elem) : ArgTy(Arg), ElemTy(Elem) { + assert(isa(asType()) == isPointerTy() && + "Pointer result must have pointee type, and non-pointer result must not have pointee type"); +} + +TypedArgTy TypedArgTy::get(const Argument *Arg) { + // only consult metadata for pointer types + Type *ArgTy = Arg->getType(); + if (!ArgTy->isPointerTy()) + return TypedArgTy(ArgTy, nullptr); + return TypedFuncTy::get(Arg->getParent()).getParamType(Arg->getArgNo()); +} + +TypedArgTy TypedArgTy::get(const Function *F, const unsigned ArgNo) { + return get(F->getArg(ArgNo)); +} + +Type *TypedArgTy::getPointerElementType() const { + assert(ElemTy && "cannot get element type of non-pointer"); + return ElemTy; +} + +bool TypedArgTy::isPointerTy() const { + return !!ElemTy; +} + +bool TypedArgTy::isVoidTy() const { + return ArgTy->isVoidTy(); +} + +// Get a TypedFuncTy for the given Function, looking up the !pointeetys metadata. +TypedFuncTy TypedFuncTy::get(const Function *F) { + TypedFuncTy Result; + Result.FuncTy = F->getFunctionType(); + Result.Meta = dyn_cast_or_null(F->getMetadata(MDTypesName)); + return Result; +} + +// Construct a TypedFuncTy for the given result type and arg types. +// This constructs the !pointeetys metadata; that can then be attached to a function +// using writeMetadata(). +TypedFuncTy::TypedFuncTy(TypedArgTy ResultTy, ArrayRef ArgTys) { + SmallVector BareArgTys; + SmallVector PointeeTys; + unsigned SimpleFormatArgIdx = UINT_MAX; + if (ResultTy.isPointerTy()) + PointeeTys.push_back(ConstantAsMetadata::get(PoisonValue::get(ResultTy.getPointerElementType()))); + for (unsigned ArgIdx = 0; ArgIdx != ArgTys.size(); ++ArgIdx) { + const TypedArgTy &ArgTy = ArgTys[ArgIdx]; + BareArgTys.push_back(ArgTy.asType()); + if (ArgTy.isPointerTy()) { + // Pointer arg. Add its pointee type to the array that will form the metadata tuple. + Metadata *PointeeTy = ConstantAsMetadata::get(PoisonValue::get(ArgTy.getPointerElementType())); + if (PointeeTys.size() <= 1) { + if (SimpleFormatArgIdx == UINT_MAX) { + // In simple format, and we can stay in simple format. + SimpleFormatArgIdx = ArgIdx; + PointeeTys.push_back(PointeeTy); + } else { + // In simple format, but this is the second pointer arg so we need to transition to general format. + // That involves moving the first pointer arg pointee type to its proper index. + unsigned FirstPointerArgIdx = PointeeTys.size() - 1; + PointeeTys.resize(ArgIdx + 2); + std::swap(PointeeTys[FirstPointerArgIdx], PointeeTys[SimpleFormatArgIdx + 1]); + PointeeTys.back() = PointeeTy; + } + } else { + // Already in general format. + PointeeTys.resize(ArgIdx + 2); + PointeeTys.back() = PointeeTy; + } + } + } + FuncTy = FunctionType::get(ResultTy.asType(), BareArgTys, /*isVarArg=*/false); + if (!PointeeTys.empty()) + Meta = MDTuple::get(FuncTy->getContext(), PointeeTys); +} + +// Shared code for getReturnType and getParamType. This decodes the !pointeetys metadata. +// +// @param Ty : Return type or parameter type, just so we can check it is actually a pointer +// @param Idx : 0 for return type, N+1 for param N +Type *TypedFuncTy::getPointeeType(Type *Ty, unsigned Idx) const { + if (!isa(Ty)) + return nullptr; + Type *PointeeTy = nullptr; + if (Meta) { + Metadata *Entry = nullptr; + unsigned NumOperands = Meta->getNumOperands(); + if (Idx == 0) { + // Getting return pointee type. That works the same in simple or general format. + if (NumOperands >= 1) + Entry = Meta->getOperand(0); + } else { + if (NumOperands == 1 || (NumOperands == 2 && isa(FuncTy->getReturnType()))) { + // Simple format (only one entry, or two entries where the first one is the return pointee type). + Entry = Meta->getOperand(NumOperands - 1); + } else { + // General format. + Entry = Meta->getOperand(Idx); + } + } + if (Entry) + PointeeTy = dyn_cast(Entry)->getType(); + } + assert(PointeeTy && "Malformed pointee type metadata"); + return PointeeTy; +} + +// Write the metadata (if any) onto the specified function. Typically used when creating a new function +// and using our constructor that takes TypedArgTy for return type and arg types. +void TypedFuncTy::writeMetadata(Function *F) const { + assert(F->getFunctionType() == FuncTy); + if (Meta) + F->setMetadata(MDTypesName, Meta); +} + +// Get a TypedArgTy for the return type. +TypedArgTy TypedFuncTy::getReturnType() const { + Type *Ty = FuncTy->getReturnType(); + return TypedArgTy(Ty, getPointeeType(Ty, 0)); +} + +// Get a TypedArgTy for a parameter type. +TypedArgTy TypedFuncTy::getParamType(unsigned Idx) const { + Type *Ty = FuncTy->getParamType(Idx); + return TypedArgTy(Ty, getPointeeType(Ty, Idx + 1)); +} + +// Push a TypedArgTy for each parameter onto the supplied vector. +void TypedFuncTy::getParamTypes(SmallVectorImpl &ArgTys) const { + for (unsigned Idx = 0; Idx != FuncTy->getNumParams(); ++Idx) + ArgTys.push_back(getParamType(Idx)); +} + +// Return element type of a function argument resolving opaque pointers +// via !pointeetys metadata where appropriate. +// Returns nullptr for non-pointers. +Type *llvm::getFuncArgPtrElementType(const Argument *Arg) { + auto *ArgTy = Arg->getType(); + if (!ArgTy->isPointerTy()) + return nullptr; + + return TypedArgTy::get(Arg).getPointerElementType(); +} + +// Return element type of a function argument resolving opaque pointers +// via !pointeetys metadata where appropriate. +// Returns nullptr for non-pointers. +Type *llvm::getFuncArgPtrElementType(const Function *F, int ArgNo) { + return getFuncArgPtrElementType(F->getArg(ArgNo)); +} + +/// Get element type of function return type resolving opaque pointers +/// via !pointeetys metadata where appropriate. +Type *llvm::getFuncReturnPtrElementType(const Function *F) { + if (!isa(F->getFunctionType()->getReturnType())) + return nullptr; + return TypedFuncTy::get(F).getReturnType().getPointerElementType(); +} + +/// LLVM parser callback which adds !pointeetys metadata during DXIL parsing. +void llvm::DXILValueTypeMetadataCallback(Value *V, unsigned TypeID, GetTypeByIDTy GetTypeByID, + GetContainedTypeIDTy GetContainedTypeID) { + if (auto FuncTy = dyn_cast(GetTypeByID(TypeID))) { + // This is a function. Set up the metadata if there are any pointer types. + TypedArgTy ReturnTy; + if (isa(FuncTy->getReturnType())) + ReturnTy = TypedArgTy(FuncTy->getReturnType(), GetTypeByID(GetContainedTypeID(GetContainedTypeID(TypeID, 0), 0))); + else + ReturnTy = FuncTy->getReturnType(); + SmallVector ArgTys; + for (unsigned Idx = 0; Idx != FuncTy->getNumParams(); ++Idx) { + Type *ArgTy = FuncTy->getParamType(Idx); + if (isa(ArgTy)) + ArgTys.push_back(TypedArgTy(ArgTy, GetTypeByID(GetContainedTypeID(GetContainedTypeID(TypeID, Idx + 1), 0)))); + else + ArgTys.push_back(ArgTy); + } + TypedFuncTy(ReturnTy, ArgTys).writeMetadata(cast(V)); + } +} diff --git a/compilerutils/test/inc/link-constant-expr-global.ll b/compilerutils/test/cross-module-inliner/inc/link-constant-expr-global.ll similarity index 100% rename from compilerutils/test/inc/link-constant-expr-global.ll rename to compilerutils/test/cross-module-inliner/inc/link-constant-expr-global.ll diff --git a/compilerutils/test/inc/link-control-flow.ll b/compilerutils/test/cross-module-inliner/inc/link-control-flow.ll similarity index 100% rename from compilerutils/test/inc/link-control-flow.ll rename to compilerutils/test/cross-module-inliner/inc/link-control-flow.ll diff --git a/compilerutils/test/inc/link-existing-func-name.ll b/compilerutils/test/cross-module-inliner/inc/link-existing-func-name.ll similarity index 100% rename from compilerutils/test/inc/link-existing-func-name.ll rename to compilerutils/test/cross-module-inliner/inc/link-existing-func-name.ll diff --git a/compilerutils/test/inc/link-existing-global-name.ll b/compilerutils/test/cross-module-inliner/inc/link-existing-global-name.ll similarity index 100% rename from compilerutils/test/inc/link-existing-global-name.ll rename to compilerutils/test/cross-module-inliner/inc/link-existing-global-name.ll diff --git a/compilerutils/test/inc/link-fold-const.ll b/compilerutils/test/cross-module-inliner/inc/link-fold-const.ll similarity index 100% rename from compilerutils/test/inc/link-fold-const.ll rename to compilerutils/test/cross-module-inliner/inc/link-fold-const.ll diff --git a/compilerutils/test/inc/link-func-metadata.ll b/compilerutils/test/cross-module-inliner/inc/link-func-metadata.ll similarity index 100% rename from compilerutils/test/inc/link-func-metadata.ll rename to compilerutils/test/cross-module-inliner/inc/link-func-metadata.ll diff --git a/compilerutils/test/inc/link-global-initializer.ll b/compilerutils/test/cross-module-inliner/inc/link-global-initializer.ll similarity index 100% rename from compilerutils/test/inc/link-global-initializer.ll rename to compilerutils/test/cross-module-inliner/inc/link-global-initializer.ll diff --git a/compilerutils/test/inc/link-global-same-struct.ll b/compilerutils/test/cross-module-inliner/inc/link-global-same-struct.ll similarity index 100% rename from compilerutils/test/inc/link-global-same-struct.ll rename to compilerutils/test/cross-module-inliner/inc/link-global-same-struct.ll diff --git a/compilerutils/test/inc/link-global-same-type.ll b/compilerutils/test/cross-module-inliner/inc/link-global-same-type.ll similarity index 100% rename from compilerutils/test/inc/link-global-same-type.ll rename to compilerutils/test/cross-module-inliner/inc/link-global-same-type.ll diff --git a/compilerutils/test/inc/link-simple.ll b/compilerutils/test/cross-module-inliner/inc/link-simple.ll similarity index 100% rename from compilerutils/test/inc/link-simple.ll rename to compilerutils/test/cross-module-inliner/inc/link-simple.ll diff --git a/compilerutils/test/link-constant-expr-global.ll b/compilerutils/test/cross-module-inliner/link-constant-expr-global.ll similarity index 100% rename from compilerutils/test/link-constant-expr-global.ll rename to compilerutils/test/cross-module-inliner/link-constant-expr-global.ll diff --git a/compilerutils/test/link-control-flow.ll b/compilerutils/test/cross-module-inliner/link-control-flow.ll similarity index 100% rename from compilerutils/test/link-control-flow.ll rename to compilerutils/test/cross-module-inliner/link-control-flow.ll diff --git a/compilerutils/test/link-existing-func-name.ll b/compilerutils/test/cross-module-inliner/link-existing-func-name.ll similarity index 100% rename from compilerutils/test/link-existing-func-name.ll rename to compilerutils/test/cross-module-inliner/link-existing-func-name.ll diff --git a/compilerutils/test/link-existing-global-multi.ll b/compilerutils/test/cross-module-inliner/link-existing-global-multi.ll similarity index 100% rename from compilerutils/test/link-existing-global-multi.ll rename to compilerutils/test/cross-module-inliner/link-existing-global-multi.ll diff --git a/compilerutils/test/link-existing-global-name.ll b/compilerutils/test/cross-module-inliner/link-existing-global-name.ll similarity index 100% rename from compilerutils/test/link-existing-global-name.ll rename to compilerutils/test/cross-module-inliner/link-existing-global-name.ll diff --git a/compilerutils/test/link-fold-const.ll b/compilerutils/test/cross-module-inliner/link-fold-const.ll similarity index 100% rename from compilerutils/test/link-fold-const.ll rename to compilerutils/test/cross-module-inliner/link-fold-const.ll diff --git a/compilerutils/test/link-func-metadata.ll b/compilerutils/test/cross-module-inliner/link-func-metadata.ll similarity index 100% rename from compilerutils/test/link-func-metadata.ll rename to compilerutils/test/cross-module-inliner/link-func-metadata.ll diff --git a/compilerutils/test/link-global-initializer.ll b/compilerutils/test/cross-module-inliner/link-global-initializer.ll similarity index 100% rename from compilerutils/test/link-global-initializer.ll rename to compilerutils/test/cross-module-inliner/link-global-initializer.ll diff --git a/compilerutils/test/link-global-same-struct.ll b/compilerutils/test/cross-module-inliner/link-global-same-struct.ll similarity index 88% rename from compilerutils/test/link-global-same-struct.ll rename to compilerutils/test/cross-module-inliner/link-global-same-struct.ll index 9150ec4d58..93d339cec0 100644 --- a/compilerutils/test/link-global-same-struct.ll +++ b/compilerutils/test/cross-module-inliner/link-global-same-struct.ll @@ -11,7 +11,7 @@ declare i32 @inline_fun() define i32 @main() { ; CHECK-LABEL: define i32 @main() { -; CHECK-NEXT: [[RESULT_I:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_MYSTRUCT_0:%.*]], ptr @"glob.cloned.{{.*}}", i64 0, i32 1), align 4 +; CHECK-NEXT: [[RESULT_I:%.*]] = load i32, ptr getelementptr inbounds (i8, ptr @"glob.cloned.{{.*}}", i64 4), align 4 ; CHECK-NEXT: ret i32 [[RESULT_I]] ; %result = call i32 @inline_fun() diff --git a/compilerutils/test/link-global-same-type.ll b/compilerutils/test/cross-module-inliner/link-global-same-type.ll similarity index 100% rename from compilerutils/test/link-global-same-type.ll rename to compilerutils/test/cross-module-inliner/link-global-same-type.ll diff --git a/compilerutils/test/link-simple.ll b/compilerutils/test/cross-module-inliner/link-simple.ll similarity index 100% rename from compilerutils/test/link-simple.ll rename to compilerutils/test/cross-module-inliner/link-simple.ll diff --git a/compilerutils/tool/cross-module-inline/cross-module-inline.cpp b/compilerutils/tool/cross-module-inline/cross-module-inline.cpp index fbc1484816..affe2e470d 100644 --- a/compilerutils/tool/cross-module-inline/cross-module-inline.cpp +++ b/compilerutils/tool/cross-module-inline/cross-module-inline.cpp @@ -30,6 +30,7 @@ */ #include "compilerutils/CompilerUtils.h" +#include "llvm/IR/Module.h" #include "llvm/IRReader/IRReader.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" diff --git a/imported/llvm-dialects b/imported/llvm-dialects index ed4b46e842..6ff7d39046 160000 --- a/imported/llvm-dialects +++ b/imported/llvm-dialects @@ -1 +1 @@ -Subproject commit ed4b46e8425066a96a5e79afc29bce3d82eecf71 +Subproject commit 6ff7d39046e280e446fd69aa08c6c6524c68c728 diff --git a/include/vkgcDefs.h b/include/vkgcDefs.h index aed46de1b2..044f96fd3c 100644 --- a/include/vkgcDefs.h +++ b/include/vkgcDefs.h @@ -485,6 +485,10 @@ struct PipelineOptions { /// loading it from userdata bool bindlessTextureMode; ///< For OGL only, true if bindless textures are used bool bindlessImageMode; ///< For OGL only, true if bindless images are used + bool enablePolygonStipple; ///< For OGL only, enable polygon stipple pattern. + bool enableLineSmooth; ///< For OGL only, enable line smooth mode. + bool emulateWideLineStipple; ///< For OGL only, enable line AA stipple. + bool enablePointSmooth; ///< For OGL only, enable point smooth mode. const auto &getGlState() const { return *this; } #else struct GLState { @@ -499,12 +503,17 @@ struct PipelineOptions { bool disableBaseVertex; ///< For OGL only, force the BaseVertex builtin to 0 instead of bool bindlessTextureMode; ///< For OGL only, true if bindless textures are used bool bindlessImageMode; ///< For OGL only, true if bindless images are used + bool enablePolygonStipple; ///< For OGL only, enable polygon stipple pattern. + bool enableLineSmooth; ///< For OGL only, enable line smooth mode. + bool emulateWideLineStipple; ///< For OGL only, enable line AA stipple. + bool enablePointSmooth; ///< For OGL only, enable point smooth mode. } glState; const auto &getGlState() const { return glState; } #endif unsigned reserved20; bool enablePrimGeneratedQuery; ///< If set, primitive generated query is enabled bool disablePerCompFetch; ///< Disable per component fetch in uber fetch shader. + bool reserved21; }; /// Prototype of allocator for output data buffer, used in shader-specific operations. @@ -878,6 +887,9 @@ struct PipelineShaderOptions { /// Binding ID offset of default uniform block unsigned constantBufferBindingOffset; + + /// Let dmask bits be fully enabled when call 'image.sample.c', for depth compare mode swizzling workaround. + bool imageSampleDrefReturnsRgba; }; /// Represents YCbCr sampler meta data in resource descriptor @@ -1088,9 +1100,14 @@ enum DispatchDimSwizzleMode : unsigned { FlattenWidthHeight, ///< Flatten width and height to x, and depth to y }; +/// Enumerates modes of raytracing compiling. This only takes effect when the pipeline is compiled in indirect mode. enum class LlpcRaytracingMode : unsigned { +#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 74 None = 0, // Not goto any raytracing compiling path - Legacy, // LLpc Legacy compiling path +#else + Auto = 0, // Automatically select the raytracing compiling path +#endif + Legacy, // LLpc Legacy compiling path #if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 69 Gpurt2, // Raytracing lowering at the end of spirvLower. #else @@ -1323,6 +1340,8 @@ struct GraphicsPipelineBuildInfo { bool enableColorClampVs; ///< Enable clamp vertex output color bool enableColorClampFs; ///< Enable clamp fragment output color bool enableFlatShade; ///< Whether enable flat shade. + float lineSmooth[4]; ///< Line smooth pattern + float pointSmooth[2]; ///< Point smooth pattern } glState; const auto &getGlState() const { return glState; } #endif diff --git a/lgc/CMakeLists.txt b/lgc/CMakeLists.txt index e49c3a34fc..10080aac5c 100644 --- a/lgc/CMakeLists.txt +++ b/lgc/CMakeLists.txt @@ -156,6 +156,7 @@ target_sources(LLVMlgc PRIVATE patch/PatchInvariantLoads.cpp patch/PatchLlvmIrInclusion.cpp patch/PatchLoadScalarizer.cpp + patch/PatchMulDx9Zero.cpp patch/PatchLoopMetadata.cpp patch/PatchNullFragShader.cpp patch/PatchPeepholeOpt.cpp @@ -175,6 +176,7 @@ target_sources(LLVMlgc PRIVATE patch/CombineCooperativeMatrix.cpp patch/LowerCooperativeMatrix.cpp patch/LowerGpuRt.cpp + patch/LowerRayQueryWrapper.cpp ) # lgc/state diff --git a/lgc/builder/Builder.cpp b/lgc/builder/Builder.cpp index fa9b9214e7..9bdd469aff 100644 --- a/lgc/builder/Builder.cpp +++ b/lgc/builder/Builder.cpp @@ -89,8 +89,8 @@ VectorType *BuilderCommon::getDescTy(ResourceNodeType descType) { // Get the type of pointer to descriptor. // // @param descType : Descriptor type, one of the ResourceNodeType values -Type *BuilderCommon::getDescPtrTy(ResourceNodeType descType) { - return getDescTy(descType)->getPointerTo(ADDR_SPACE_CONST); +Type *BuilderCommon::getDescPtrTy() { + return getPtrTy(ADDR_SPACE_CONST); } // ==================================================================================================================== @@ -99,6 +99,12 @@ unsigned Builder::getAddrSpaceConst() { return ADDR_SPACE_CONST; } +// ==================================================================================================================== +// Get address space of global memory. +unsigned Builder::getAddrSpaceGlobal() { + return ADDR_SPACE_GLOBAL; +} + // ==================================================================================================================== // Get address space of local (thread-global) memory. unsigned Builder::getAddrSpaceLocal() { @@ -211,10 +217,10 @@ Constant *BuilderCommon::getFpConstant(Type *ty, APFloat value) { // Create alloca for given input type. // // @param ty : pointer type. -Value *BuilderCommon::CreateAllocaAtFuncEntry(Type *ty) { +Value *BuilderCommon::CreateAllocaAtFuncEntry(Type *ty, const Twine &allocaName) { IRBuilderBase::InsertPointGuard ipg(*this); SetInsertPointPastAllocas(GetInsertBlock()->getParent()); - return CreateAlloca(ty); + return CreateAlloca(ty, GetInsertBlock()->getModule()->getDataLayout().getAllocaAddrSpace(), nullptr, allocaName); } // ===================================================================================================================== diff --git a/lgc/builder/BuilderBase.cpp b/lgc/builder/BuilderBase.cpp index 951c0687e7..49458c0e4e 100644 --- a/lgc/builder/BuilderBase.cpp +++ b/lgc/builder/BuilderBase.cpp @@ -80,6 +80,31 @@ CallInst *BuilderCommon::CreateNamedCall(StringRef funcName, Type *retTy, ArrayR return CompilerUtils::createNamedCall(*this, funcName, retTy, args, attribs, instName); } +// Create an llvm.assume call to annotate the dereferenceable and alignment attributes of the pointer. We only insert +// the call if dereferenceable > 0 or align > 1. +// +// @param ptr : The pointer to be annotated. +// @param dereferenceable : the dereferenceable size (in bytes) of the pointer +// @param align : the alignment of the pointer. +CallInst *BuilderCommon::CreateAssumptionDereferenceableAndAlign(Value *ptr, unsigned dereferenceable, unsigned align) { + SmallVector Ops; + if (align > 1) { + OperandBundleDefT alignOpB("align", std::vector{ptr, getInt32(align)}); + Ops.push_back(alignOpB); + } + + if (dereferenceable > 0) { + OperandBundleDefT dereferenceableOpB("dereferenceable", + std::vector{ptr, getInt32(dereferenceable)}); + Ops.push_back(dereferenceableOpB); + } + + if (Ops.empty()) + return nullptr; + + return CreateAssumption(getTrue(), Ops); +} + // ===================================================================================================================== // Emits a amdgcn.reloc.constant intrinsic that represents a relocatable i32 value with the given symbol name // diff --git a/lgc/builder/BuilderRecorder.cpp b/lgc/builder/BuilderRecorder.cpp index 972d7a1bdf..d143ae8373 100644 --- a/lgc/builder/BuilderRecorder.cpp +++ b/lgc/builder/BuilderRecorder.cpp @@ -1070,7 +1070,7 @@ Value *Builder::CreateGetDescStride(ResourceNodeType concreteType, ResourceNodeT // @param instName : Name to give instruction(s) Value *Builder::CreateGetDescPtr(ResourceNodeType concreteType, ResourceNodeType abstractType, uint64_t descSet, unsigned binding, const Twine &instName) { - return record(BuilderOpcode::GetDescPtr, getDescPtrTy(concreteType), + return record(BuilderOpcode::GetDescPtr, getDescPtrTy(), {getInt32(static_cast(concreteType)), getInt32(static_cast(abstractType)), getInt64(descSet), getInt32(binding)}, instName); diff --git a/lgc/builder/DescBuilder.cpp b/lgc/builder/DescBuilder.cpp index 6ef7fbbf69..bf716ec257 100644 --- a/lgc/builder/DescBuilder.cpp +++ b/lgc/builder/DescBuilder.cpp @@ -28,6 +28,7 @@ * @brief LLPC source file: implementation of Builder methods for descriptor loads *********************************************************************************************************************** */ +#include "compilerutils/CompilerUtils.h" #include "lgc/LgcContext.h" #include "lgc/LgcDialect.h" #include "lgc/builder/BuilderImpl.h" @@ -55,9 +56,10 @@ using namespace llvm; // @param descIndex : Descriptor index // @param flags : BufferFlag* bit settings // @param stride : stride for index mode access +// @param convertFatPointer : Whether to convert to a fat pointer // @param instName : Name to give instruction(s) Value *BuilderImpl::createBufferDesc(uint64_t descSet, unsigned binding, Value *descIndex, unsigned flags, - unsigned stride, const Twine &instName) { + unsigned stride, bool convertFatPointer, const Twine &instName) { Value *desc = nullptr; bool return64Address = false; descIndex = scalarizeIfUniform(descIndex, flags & BufferFlagNonUniform); @@ -94,6 +96,9 @@ Value *BuilderImpl::createBufferDesc(uint64_t descSet, unsigned binding, Value * if (!node) report_fatal_error("Resource node not found"); + const bool isCompact = (node && (node->concreteType == ResourceNodeType::DescriptorBufferCompact || + node->concreteType == ResourceNodeType::DescriptorConstBufferCompact)); + if (node == topNode && isa(descIndex) && node->concreteType != ResourceNodeType::InlineBuffer) { // Handle a descriptor in the root table (a "dynamic descriptor") specially, as long as it is not variably // indexed and is not an InlineBuffer. @@ -113,12 +118,27 @@ Value *BuilderImpl::createBufferDesc(uint64_t descSet, unsigned binding, Value * desc = create(descTy, dwordOffset * 4); if (return64Address) return desc; + assert(convertFatPointer); + if (isCompact) { + desc = CreateBitCast(desc, getInt64Ty()); + if (stride == 0) + desc = create(desc); + else + desc = create(desc, getInt32(stride)); + } else { + desc = create(desc); + } } else if (node->concreteType == ResourceNodeType::InlineBuffer) { // Handle an inline buffer specially. Get a pointer to it, then expand to a descriptor. Value *descPtr = getDescPtr(node->concreteType, topNode, node, binding); if (return64Address) return descPtr; - desc = buildInlineBufferDesc(descPtr, stride); + assert(convertFatPointer); + desc = CreatePtrToInt(descPtr, getInt64Ty()); + if (stride == 0) + desc = create(desc); + else + desc = create(desc, getInt32(stride)); } else { ResourceNodeType resType = node->concreteType; ResourceNodeType abstractType = node->abstractType; @@ -144,33 +164,19 @@ Value *BuilderImpl::createBufferDesc(uint64_t descSet, unsigned binding, Value * } // Cast it to the right type. - descPtr = CreateBitCast(descPtr, getDescPtrTy(resType)); - // Load the descriptor. - desc = CreateLoad(getDescTy(resType), descPtr); - - { - // Force convert the buffer view to raw view. - if (flags & BufferFlagForceRawView) { - Value *desc1 = CreateExtractElement(desc, 1); - Value *desc2 = CreateExtractElement(desc, 2); - Value *desc3 = CreateExtractElement(desc, 3); - // stride is 14 bits in dword1[29:16] - Value *stride = CreateAnd(CreateLShr(desc1, getInt32(16)), getInt32(0x3fff)); - stride = CreateBinaryIntrinsic(Intrinsic::smax, stride, getInt32(1)); - // set srd with new stride = 0 and new num_record = stride * num_record, num_record is dword2[31:0] - desc = CreateInsertElement(desc, CreateAnd(desc1, getInt32(0xc000ffff)), 1); - desc = CreateInsertElement(desc, CreateMul(stride, desc2), 2); - // gfx10 and gfx11 have oob fields with 2 bits in dword3[29:28] here force to set to 3 as OOB_COMPLETE mode. - desc = CreateInsertElement(desc, CreateOr(desc3, getInt32(0x30000000)), 3); - } + descPtr = CreateBitCast(descPtr, getDescPtrTy()); + if (convertFatPointer) { + bool forceRawView = flags & BufferFlagForceRawView; + if (stride == 0) + desc = create(descPtr, forceRawView, isCompact); + else + desc = create(descPtr, forceRawView, isCompact, getInt32(stride)); + } else { + // Load the descriptor. + desc = CreateLoad(getDescTy(resType), descPtr); } } - if (node && (node->concreteType == ResourceNodeType::DescriptorBufferCompact || - node->concreteType == ResourceNodeType::DescriptorConstBufferCompact)) { - desc = buildBufferCompactDesc(desc, stride); - } - if (!instName.isTriviallyEmpty()) desc->setName(instName); @@ -235,7 +241,7 @@ Value *BuilderImpl::CreateGetDescPtr(ResourceNodeType concreteType, ResourceNode // NOTE: Resource node may be DescriptorTexelBuffer, but it is defined as OpTypeSampledImage in SPIRV, // In this case, a caller may search for the DescriptorSampler and not find it. We return nullptr and // expect the caller to handle it. - return PoisonValue::get(getDescPtrTy(concreteType)); + return PoisonValue::get(getDescPtrTy()); } assert(node && "missing resource node"); } @@ -274,8 +280,7 @@ Value *BuilderImpl::CreateGetDescPtr(ResourceNodeType concreteType, ResourceNode descPtr = getDescPtr(concreteType, topNode, node, binding); } - // Cast to the right pointer type. - return CreateBitCast(descPtr, getDescPtrTy(concreteType)); + return descPtr; } // ===================================================================================================================== @@ -353,6 +358,7 @@ Value *BuilderImpl::getDescPtr(ResourceNodeType concreteType, const ResourceNode AddressExtender extender(GetInsertBlock()->getParent()); Value *descPtr = create(getInt32Ty(), topNode->offsetInDwords * 4); descPtr = extender.extend(descPtr, highHalf, getPtrTy(ADDR_SPACE_CONST), *this); + CreateAssumptionDereferenceableAndAlign(descPtr, ~0u, 4); return CreateConstGEP1_32(getInt8Ty(), descPtr, offsetInBytes); } @@ -368,47 +374,37 @@ Value *BuilderImpl::scalarizeIfUniform(Value *value, bool isNonUniform) { return value; } -// ===================================================================================================================== -// Calculate a buffer descriptor for an inline buffer -// -// @param descPtr : Pointer to inline buffer -// @param stride : stride for the buffer descriptor to access in index mode -Value *BuilderImpl::buildInlineBufferDesc(Value *descPtr, unsigned stride) { - // Bitcast the pointer to v2i32 - descPtr = CreatePtrToInt(descPtr, getInt64Ty()); - descPtr = CreateBitCast(descPtr, FixedVectorType::get(getInt32Ty(), 2)); - - return buildBufferCompactDesc(descPtr, stride); -} - // ===================================================================================================================== // Build buffer compact descriptor // // @param desc : The buffer descriptor base to build for the buffer compact descriptor // @param stride : stride for the buffer descriptor to access in index mode Value *BuilderImpl::buildBufferCompactDesc(Value *desc, unsigned stride) { + // Bitcast the pointer to v2i32 + desc = CreatePtrToInt(desc, getInt64Ty()); + desc = CreateBitCast(desc, FixedVectorType::get(getInt32Ty(), 2)); const GfxIpVersion gfxIp = m_pipelineState->getTargetInfo().getGfxIpVersion(); // Extract compact buffer descriptor - Value *descElem0 = CreateExtractElement(desc, uint64_t(0)); - Value *descElem1 = CreateExtractElement(desc, 1); + Value *addrLo = CreateExtractElement(desc, uint64_t(0)); + Value *addrHi = CreateExtractElement(desc, 1); // Build normal buffer descriptor Value *bufDesc = PoisonValue::get(FixedVectorType::get(getInt32Ty(), 4)); { // Dword 0 - bufDesc = CreateInsertElement(bufDesc, descElem0, uint64_t(0)); + bufDesc = CreateInsertElement(bufDesc, addrLo, uint64_t(0)); // Dword 1 SqBufRsrcWord1 sqBufRsrcWord1 = {}; sqBufRsrcWord1.bits.baseAddressHi = UINT16_MAX; - descElem1 = CreateAnd(descElem1, getInt32(sqBufRsrcWord1.u32All)); + addrHi = CreateAnd(addrHi, getInt32(sqBufRsrcWord1.u32All)); if (stride) { SqBufRsrcWord1 sqBufRsrcWord1Stride = {}; sqBufRsrcWord1Stride.bits.stride = stride; - descElem1 = CreateOr(descElem1, getInt32(sqBufRsrcWord1Stride.u32All)); + addrHi = CreateOr(addrHi, getInt32(sqBufRsrcWord1Stride.u32All)); } - bufDesc = CreateInsertElement(bufDesc, descElem1, 1); + bufDesc = CreateInsertElement(bufDesc, addrHi, 1); // Dword 2 SqBufRsrcWord2 sqBufRsrcWord2 = {}; diff --git a/lgc/builder/ImageBuilder.cpp b/lgc/builder/ImageBuilder.cpp index 363ebefd7c..edead69449 100644 --- a/lgc/builder/ImageBuilder.cpp +++ b/lgc/builder/ImageBuilder.cpp @@ -1004,7 +1004,9 @@ Value *BuilderImpl::CreateImageSampleGather(Type *resultTy, unsigned dim, unsign // Dmask. unsigned dmask = 15; - if (address[ImageAddressIdxZCompare]) + bool imageSampleDrefReturnsRgba = + getPipelineState()->getShaderOptions(ShaderStage::Fragment).imageSampleDrefReturnsRgba; + if (!imageSampleDrefReturnsRgba && address[ImageAddressIdxZCompare]) dmask = 1; else if (!isSample) { dmask = 1; @@ -1111,12 +1113,10 @@ Value *BuilderImpl::CreateImageSampleGather(Type *resultTy, unsigned dim, unsign if (samplerDesc->getType()->isVectorTy()) { const unsigned samplerDescArgIndex = imageDescArgIndex + 1; - if (flags & ImageFlagNonUniformSampler) { + if (flags & ImageFlagNonUniformSampler) nonUniformArgIndexes.push_back(samplerDescArgIndex); - } else { - // TODO: Re-add the condition once backend fix the waterfall loop bug. + else if (flags & ImageFlagEnforceReadFirstLaneSampler) enforceReadFirstLane(imageOp, samplerDescArgIndex); - } } if (!nonUniformArgIndexes.empty()) @@ -2009,7 +2009,8 @@ Value *BuilderImpl::transformImageDesc(Value *imageDesc, bool mustLoad, bool isT // Explicitly load the descriptor from the descriptor pointer Type *descType = FixedVectorType::get(getInt32Ty(), isTexelBuffer ? 4 : 8); - Value *desc = CreateLoad(descType, imageDesc); + // Use smaller alignment for better load speculation. + Value *desc = CreateAlignedLoad(descType, imageDesc, Align(4)); cast(desc)->setMetadata(LLVMContext::MD_invariant_load, MDNode::get(getContext(), {})); return desc; } @@ -2026,7 +2027,8 @@ Value *BuilderImpl::transformSamplerDesc(Value *samplerDesc) { // Explicitly load the descriptor from the descriptor pointer Type *descType = FixedVectorType::get(getInt32Ty(), 4); - Value *desc = CreateLoad(descType, samplerDesc); + // Use smaller alignment for better load speculation. + Value *desc = CreateAlignedLoad(descType, samplerDesc, Align(4)); cast(desc)->setMetadata(LLVMContext::MD_invariant_load, MDNode::get(getContext(), {})); return desc; } diff --git a/lgc/builder/InOutBuilder.cpp b/lgc/builder/InOutBuilder.cpp index 14cd7b5032..0d1101b659 100644 --- a/lgc/builder/InOutBuilder.cpp +++ b/lgc/builder/InOutBuilder.cpp @@ -557,6 +557,36 @@ void BuilderImpl::markGenericInputOutputUsage(bool isOutput, unsigned location, // Mark usage for interpolation info. markInterpolationInfo(inOutInfo); } + + if (isOutput && m_shaderStage == ShaderStage::Mesh) { + // Record number of components for mesh shader outputs + for (unsigned i = 0; i < locationCount; ++i) { + unsigned numComponents = 0; + if (inOutInfo.getNumComponents() > 4) { + assert(locationCount % 2 == 0); // Must have even number of locations for 64-bit data type + assert(inOutInfo.getComponent() == 0); // Start component must be 0 in this case + // NOTE: For 64-bit vec3/vec4 data types, they will occupy two consecutive locations, we only record the number + // of components to the former one and skip the latter one. + if (i % 2 != 0) + continue; + numComponents = inOutInfo.getNumComponents(); + } else { + numComponents = inOutInfo.getComponent() + inOutInfo.getNumComponents(); + } + + // Update numComponents. If exists, always keep its max value. + if (inOutInfo.isPerPrimitive()) { + numComponents = + std::max(numComponents, resUsage->inOutUsage.mesh.primitiveOutputComponents[location + i].first); + resUsage->inOutUsage.mesh.primitiveOutputComponents[location + i] = {numComponents, + static_cast(InvalidValue)}; + } else { + numComponents = std::max(numComponents, resUsage->inOutUsage.mesh.vertexOutputComponents[location + i].first); + resUsage->inOutUsage.mesh.vertexOutputComponents[location + i] = {numComponents, + static_cast(InvalidValue)}; + } + } + } } // ===================================================================================================================== @@ -1923,7 +1953,22 @@ void BuilderImpl::markBuiltInInputUsage(BuiltInKind &builtIn, unsigned arraySize case BuiltInSamplePosOffset: usage.fs.runAtSampleRate = true; break; - + case BuiltInPrimType: + usage.fs.primType = true; + break; + case BuiltInPrimCoord: + usage.fs.primCoord = true; + // This is an emulated built-in variable. + // Generated after all the defined attributes, stored in Z/W channel. + // Hence its value also depends on interp mode. + if (getPipelineState()->getRasterizerState().perSampleShading) + usage.fs.sample = true; + else + usage.fs.center = true; + break; + case BuiltInLineStipple: + usage.fs.lineStipple = true; + break; default: break; } diff --git a/lgc/include/lgc/builder/BuilderImpl.h b/lgc/include/lgc/builder/BuilderImpl.h index 652b7b920c..09553be177 100644 --- a/lgc/include/lgc/builder/BuilderImpl.h +++ b/lgc/include/lgc/builder/BuilderImpl.h @@ -285,14 +285,14 @@ class BuilderImpl : public BuilderDefs { public: // Create a buffer descriptor. llvm::Value *CreateBufferDesc(uint64_t descSet, unsigned binding, llvm::Value *descIndex, unsigned flags, - const llvm::Twine &instName = "") { - return createBufferDesc(descSet, binding, descIndex, flags, 0); + bool convertFatPointer, const llvm::Twine &instName = "") { + return createBufferDesc(descSet, binding, descIndex, flags, 0, convertFatPointer); } // Create a strided buffer descriptor. llvm::Value *CreateStridedBufferDesc(uint64_t descSet, unsigned binding, llvm::Value *descIndex, unsigned flags, unsigned stride, const llvm::Twine &instName = "") { - return createBufferDesc(descSet, binding, descIndex, flags, stride); + return createBufferDesc(descSet, binding, descIndex, flags, stride, true); } // Create a get of the stride (in bytes) of a descriptor. @@ -306,9 +306,6 @@ class BuilderImpl : public BuilderDefs { // Create a load of the push constants pointer. llvm::Value *CreateLoadPushConstantsPtr(const llvm::Twine &instName = ""); - // Calculate a buffer descriptor for an inline buffer - llvm::Value *buildInlineBufferDesc(llvm::Value *descPtr, unsigned stride); - // Check whether vertex buffer descriptors are in a descriptor array binding instead of the VertexBufferTable. bool useVertexBufferDescArray(); @@ -331,7 +328,7 @@ class BuilderImpl : public BuilderDefs { // Create a buffer descriptor. llvm::Value *createBufferDesc(uint64_t descSet, unsigned binding, llvm::Value *descIndex, unsigned flags, - unsigned stride, const llvm::Twine &instName = ""); + unsigned stride, bool convertFatPointer, const llvm::Twine &instName = ""); // ------------------------------------------------------------------------------------------------------------------- // Image operations diff --git a/lgc/include/lgc/builder/BuilderReplayer.h b/lgc/include/lgc/builder/BuilderReplayer.h index 1e113e8308..0b7f9948ab 100644 --- a/lgc/include/lgc/builder/BuilderReplayer.h +++ b/lgc/include/lgc/builder/BuilderReplayer.h @@ -34,6 +34,11 @@ #include "llvm/IR/PassManager.h" #include +namespace llvm { +class CallInst; +class Value; +} // namespace llvm + namespace lgc { class BuilderImpl; diff --git a/lgc/include/lgc/patch/LowerGpuRt.h b/lgc/include/lgc/patch/LowerGpuRt.h index 7923c1d9b2..de44bec437 100644 --- a/lgc/include/lgc/patch/LowerGpuRt.h +++ b/lgc/include/lgc/patch/LowerGpuRt.h @@ -33,6 +33,12 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/IR/PassManager.h" +namespace llvm { +class Value; +class Type; +class Instruction; +} // namespace llvm + namespace lgc { class Builder; class PipelineState; @@ -45,6 +51,7 @@ class GpurtStackReadOp; class GpurtLdsStackInitOp; class GpurtLdsStackStoreOp; class GpurtGetBoxSortHeuristicModeOp; +class GpurtGetRayQueryDispatchIdOp; class GpurtGetStaticFlagsOp; class GpurtGetTriangleCompressionModeOp; class GpurtGetFlattenedGroupThreadIdOp; @@ -52,6 +59,8 @@ class GpurtFloatWithRoundModeOp; class GpurtDispatchThreadIdFlatOp; class GpurtContinuationStackIsGlobalOp; class GpurtWaveScanOp; +class GpurtGetKnownSetRayFlagsOp; +class GpurtGetKnownUnsetRayFlagsOp; class LowerGpuRt : public llvm::PassInfoMixin { public: @@ -72,6 +81,7 @@ class LowerGpuRt : public llvm::PassInfoMixin { void visitLdsStackInit(lgc::GpurtLdsStackInitOp &inst); void visitLdsStackStore(lgc::GpurtLdsStackStoreOp &inst); void visitGetBoxSortHeuristicMode(lgc::GpurtGetBoxSortHeuristicModeOp &inst); + void visitGetRayQueryDispatchId(lgc::GpurtGetRayQueryDispatchIdOp &inst); void visitGetStaticFlags(lgc::GpurtGetStaticFlagsOp &inst); void visitGetTriangleCompressionMode(lgc::GpurtGetTriangleCompressionModeOp &inst); void visitGetFlattenedGroupThreadId(lgc::GpurtGetFlattenedGroupThreadIdOp &inst); @@ -79,6 +89,8 @@ class LowerGpuRt : public llvm::PassInfoMixin { void visitGpurtDispatchThreadIdFlatOp(lgc::GpurtDispatchThreadIdFlatOp &inst); void visitContinuationStackIsGlobalOp(lgc::GpurtContinuationStackIsGlobalOp &inst); void visitWaveScanOp(lgc::GpurtWaveScanOp &inst); + void visitGetKnownSetRayFlagsOp(lgc::GpurtGetKnownSetRayFlagsOp &inst); + void visitGetKnownUnsetRayFlagsOp(lgc::GpurtGetKnownUnsetRayFlagsOp &inst); llvm::Value *m_stack = nullptr; // Stack array to hold stack value llvm::Type *m_stackTy = nullptr; // Stack type PipelineState *m_pipelineState = nullptr; // Pipeline state diff --git a/lgc/include/lgc/patch/PatchBufferOp.h b/lgc/include/lgc/patch/PatchBufferOp.h index 0b8d6ee522..332d5bdd5e 100644 --- a/lgc/include/lgc/patch/PatchBufferOp.h +++ b/lgc/include/lgc/patch/PatchBufferOp.h @@ -53,6 +53,8 @@ namespace lgc { class BufferAddrToPtrOp; class BufferDescToPtrOp; class StridedBufferDescToPtrOp; +class BufferLoadDescToPtrOp; +class StridedBufferLoadDescToPtrOp; class StridedBufferAddrAndStrideToPtrOp; class StridedIndexAddOp; class BufferLengthOp; @@ -85,10 +87,12 @@ class BufferOpLowering { public: #if LLVM_MAIN_REVISION && LLVM_MAIN_REVISION < 458033 // Old version of the code - BufferOpLowering(TypeLowering &typeLowering, PipelineState &pipelineState, llvm::DivergenceInfo &divergenceInfo); + BufferOpLowering(CompilerUtils::TypeLowering &typeLowering, PipelineState &pipelineState, + llvm::DivergenceInfo &divergenceInfo); #else // New version of the code (also handles unknown version, which we treat as latest) - BufferOpLowering(TypeLowering &typeLowering, PipelineState &pipelineState, llvm::UniformityInfo &uniformityInfo); + BufferOpLowering(CompilerUtils::TypeLowering &typeLowering, PipelineState &pipelineState, + llvm::UniformityInfo &uniformityInfo); #endif static void registerVisitors(llvm_dialects::VisitorBuilder &builder); @@ -102,6 +106,8 @@ class BufferOpLowering { void visitBufferAddrToPtr(BufferAddrToPtrOp &op); void visitBufferDescToPtr(BufferDescToPtrOp &descToPtr); void visitStridedBufferDescToPtr(StridedBufferDescToPtrOp &descToPtr); + void visitBufferLoadDescToPtr(BufferLoadDescToPtrOp &loadDescToPtr); + void visitStridedBufferLoadDescToPtr(StridedBufferLoadDescToPtrOp &loadDescToPtr); void visitStridedBufferAddrAndStrideToPtr(StridedBufferAddrAndStrideToPtrOp &addrAndStrideToPtr); void visitStridedIndexAdd(StridedIndexAddOp &indexAdd); void visitBufferLength(BufferLengthOp &length); @@ -133,8 +139,10 @@ class BufferOpLowering { llvm::Value *createGlobalPointerAccess(llvm::Value *const bufferDesc, llvm::Value *const offset, llvm::Type *const type, llvm::Instruction &inst, const llvm::function_ref callback); + llvm::Value *createCompactDesc(llvm::Value *const buffAddress, llvm::Value *const stride); + llvm::Value *createLoadDesc(llvm::Value *buffAddress, bool forceRawView, bool isCompact); - TypeLowering &m_typeLowering; + CompilerUtils::TypeLowering &m_typeLowering; llvm::IRBuilder<> m_builder; PipelineState &m_pipelineState; diff --git a/lgc/include/lgc/patch/PatchEntryPointMutate.h b/lgc/include/lgc/patch/PatchEntryPointMutate.h index e56338b3ef..70b83baa65 100644 --- a/lgc/include/lgc/patch/PatchEntryPointMutate.h +++ b/lgc/include/lgc/patch/PatchEntryPointMutate.h @@ -172,6 +172,10 @@ class PatchEntryPointMutate : public Patch, public llvm::PassInfoMixin attribValues); void exportVertexAttribs(BuilderBase &builder); diff --git a/lgc/include/lgc/patch/PatchMulDx9Zero.h b/lgc/include/lgc/patch/PatchMulDx9Zero.h new file mode 100644 index 0000000000..47861db8df --- /dev/null +++ b/lgc/include/lgc/patch/PatchMulDx9Zero.h @@ -0,0 +1,68 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ +/** + *********************************************************************************************************************** + * @file PatchMulDx9Zero.h + * @brief LLPC header file: contains declaration of class lgc::PatchMulDx9Zero. + *********************************************************************************************************************** + */ +#pragma once + +#include "lgc/Builder.h" +#include "lgc/patch/Patch.h" +#include "lgc/state/PipelineShaders.h" +#include "lgc/state/PipelineState.h" +#include "llvm/IR/InstVisitor.h" + +namespace lgc { + +// ===================================================================================================================== +// Represents the pass of LLVM patching operations when detect muldx9zero pattern like: +// ((b==0.0 ? 0.0 : a) * (a==0.0 ? 0.0 : b)) or +// ((b==0.0 ? 0.0 : a) * (a==0.0 ? 0.0 : b)) or +// fma((b==0.0 ? 0.0 : a), (a==0.0 ? 0.0 : b), c) +class PatchMulDx9Zero final : public llvm::InstVisitor, public llvm::PassInfoMixin { +public: + explicit PatchMulDx9Zero(); + + llvm::PreservedAnalyses run(llvm::Function &function, llvm::FunctionAnalysisManager &analysisManager); + + static llvm::StringRef name() { + return "Run the pass to lower fmul or fma following Dx9 rules where 0 times anything produces 0.0\n"; + } + + virtual void visitCallInst(llvm::CallInst &callInst); + + virtual void visitBinaryOperator(llvm::BinaryOperator &binaryOp); + + std::optional> isMulDx9Zero(llvm::Value *lhs, llvm::Value *rhs); + +private: + bool m_changed; + + std::unique_ptr> m_builder; +}; + +} // namespace lgc diff --git a/lgc/include/lgc/patch/ShaderInputs.h b/lgc/include/lgc/patch/ShaderInputs.h index 9672d4f9f7..390cc43699 100644 --- a/lgc/include/lgc/patch/ShaderInputs.h +++ b/lgc/include/lgc/patch/ShaderInputs.h @@ -68,7 +68,9 @@ enum class ShaderInput : unsigned { MultiDispatchInfo, // Multiple dispatch info, include TG_SIZE and etc. // FS SGPRs - PrimMask, // Primitive mask + PrimMask, // Primitive mask + CollisionWaveId, // POPS collision wave ID + ProvokingVtxInfo, // Provoking vertex info // Appears in hardware HS, ES, VS SGPRs OffChipLdsBase, // Off-chip LDS buffer base @@ -91,9 +93,6 @@ enum class ShaderInput : unsigned { // Unmerged hardware HS SGPRs TfBufferBase, // TF buffer base - // FS SGPRs - ProvokingVtxInfo, // Provoking vertex info - FirstVgpr, // Enums less than this are SGPRs // API VS VGPRs diff --git a/lgc/include/lgc/state/AbiMetadata.h b/lgc/include/lgc/state/AbiMetadata.h index a11fa5bef9..017b3a7dc5 100644 --- a/lgc/include/lgc/state/AbiMetadata.h +++ b/lgc/include/lgc/state/AbiMetadata.h @@ -153,7 +153,8 @@ static constexpr char PsInputSemantic[] = ".ps_input_semantic"; static constexpr char PsDummyExport[] = ".ps_dummy_export"; static constexpr char PrerasterOutputSemantic[] = ".preraster_output_semantic"; static constexpr char ShaderFunctions[] = ".shader_functions"; -}; // namespace PipelineMetadataKey +static constexpr char UsesCps[] = ".uses_cps"; +} // namespace PipelineMetadataKey namespace HardwareStageMetadataKey { static constexpr char EntryPoint[] = ".entry_point"; @@ -354,6 +355,7 @@ static constexpr char ExecOnNoop[] = ".exec_on_noop"; static constexpr char AlphaToMaskDisable[] = ".alpha_to_mask_disable"; static constexpr char DepthBeforeShader[] = ".depth_before_shader"; static constexpr char ConservativeZExport[] = ".conservative_z_export"; +static constexpr char PrimitiveOrderedPixelShader[] = ".primitive_ordered_pixel_shader"; static constexpr char PreShaderDepthCoverageEnable[] = ".pre_shader_depth_coverage_enable"; }; // namespace DbShaderControlMetadataKey @@ -378,6 +380,7 @@ static constexpr char PrimAttr[] = ".prim_attr"; namespace SpiPsInControlMetadataKey { static constexpr char NumInterps[] = ".num_interps"; static constexpr char NumPrimInterp[] = ".num_prim_interp"; +static constexpr char ParamGen[] = ".param_gen"; static constexpr char PsW32En[] = ".ps_w32_en"; }; // namespace SpiPsInControlMetadataKey @@ -642,8 +645,9 @@ inline const char *shaderStageToApiName(ShaderStageEnum stage) { return ".pixel"; case ShaderStage::Compute: return ".compute"; + default: + llvm::report_fatal_error("No api name for this shader stage"); } - llvm::report_fatal_error("No api name for this shader stage"); } // The names of hardware shader stages used in PAL metadata, in Util::Abi::HardwareStage order. diff --git a/lgc/include/lgc/state/Defs.h b/lgc/include/lgc/state/Defs.h index 18ae6090cf..073a144f66 100644 --- a/lgc/include/lgc/state/Defs.h +++ b/lgc/include/lgc/state/Defs.h @@ -62,7 +62,7 @@ const static char ImmutableConvertingSamplerGlobal[] = "lgc.immutable.converting const static char EsGsEntryPoint[] = "lgc.shader.ESGS.main"; const static char LsHsEntryPoint[] = "lgc.shader.LSHS.main"; -const static char NggAttribExport[] = "lgc.ngg.attrib.export"; +const static char NggAttributeThroughMemory[] = "lgc.ngg.attribute.through.memory"; const static char NggXfbExport[] = "lgc.ngg.xfb.export."; const static char NggWriteGsOutput[] = "lgc.ngg.write.GS.output."; const static char NggReadGsOutput[] = "lgc.ngg.read.GS.output."; diff --git a/lgc/include/lgc/state/IntrinsDefs.h b/lgc/include/lgc/state/IntrinsDefs.h index e6a3c32a86..39ba91c3d4 100644 --- a/lgc/include/lgc/state/IntrinsDefs.h +++ b/lgc/include/lgc/state/IntrinsDefs.h @@ -43,11 +43,12 @@ static const unsigned MaxGeometryOutputVertices = (1 << 11) - 1; static const unsigned MaxComputeWorkgroupSize = (1 << 16) - 1; // Messages that can be generated by using s_sendmsg -static const unsigned HsTessFactor = 2; // HS Tessellation factor is all zero or one -static const unsigned GsDone = 3; // GS wave is done -static const unsigned GsAllocReq = 9; // GS requests that parameter cache space be allocated -static const unsigned GsCut = 0x12; // [3:0] = 2 (GS), [5:4] = 1 (cut) -static const unsigned GsEmit = 0x22; // [3:0] = 2 (GS), [5:4] = 2 (emit) +static const unsigned HsTessFactor = 2; // HS Tessellation factor is all zero or one +static const unsigned GsDone = 3; // GS wave is done +static const unsigned OrderedPsDone = 7; // Signal end of primitive ordered pixel shading critical section +static const unsigned GsAllocReq = 9; // GS requests that parameter cache space be allocated +static const unsigned GsCut = 0x12; // [3:0] = 2 (GS), [5:4] = 1 (cut) +static const unsigned GsEmit = 0x22; // [3:0] = 2 (GS), [5:4] = 2 (emit) static const unsigned GsCutStreaM0 = 0x12; // [3:0] = 2 (GS), [5:4] = 1 (cut), [9:8] = 0 (stream0) static const unsigned GsCutStreaM1 = 0x112; // [3:0] = 2 (GS), [5:4] = 1 (cut), [9:8] = 1 (stream1) diff --git a/lgc/include/lgc/state/PipelineState.h b/lgc/include/lgc/state/PipelineState.h index 9bc8bed882..018be0f05b 100644 --- a/lgc/include/lgc/state/PipelineState.h +++ b/lgc/include/lgc/state/PipelineState.h @@ -352,6 +352,9 @@ class PipelineState final : public Pipeline { // Checks if SW-emulated stream-out should be enabled bool enableSwXfb(); + // Checks if we export vertex/primitive attributes by parameter export instruction + bool exportAttributeByExportInstruction() const; + // Gets resource usage of the specified shader stage ResourceUsage *getShaderResourceUsage(ShaderStageEnum shaderStage); @@ -518,7 +521,7 @@ class PipelineState final : public Pipeline { llvm::MutableArrayRef values(reinterpret_cast(&value), sizeof(value) / sizeof(unsigned)); unsigned count = std::min(metaNode->getNumOperands(), unsigned(values.size())); for (unsigned index = 0; index < count; ++index) - values[index] = llvm::mdconst::dyn_extract(metaNode->getOperand(index))->getZExtValue(); + values[index] = llvm::mdconst::extract(metaNode->getOperand(index))->getZExtValue(); return count; } diff --git a/lgc/include/lgc/state/ResourceUsage.h b/lgc/include/lgc/state/ResourceUsage.h index 6b17320cd5..b4316ff5cc 100644 --- a/lgc/include/lgc/state/ResourceUsage.h +++ b/lgc/include/lgc/state/ResourceUsage.h @@ -330,6 +330,9 @@ struct ResourceUsage { unsigned baryCoord : 1; // Whether gl_BaryCoordKHR is used unsigned baryCoordNoPerspKHR : 1; // Whether gl_BaryCoordNoPerspKHR is used, distinction from // gl_BaryCoordNoPersp + unsigned primType : 1; // Whether primTy is used, for OGL special FS emulation usage. + unsigned primCoord : 1; // Whether primCoord is used, for OGL internal FS emulation usage. + unsigned lineStipple : 1; // Whether lineStipple is used, for OGL internal FS emulation usage. // Output unsigned fragDepth : 1; // Whether gl_FragDepth is used unsigned sampleMask : 1; // Whether gl_SampleMask[] is used @@ -459,13 +462,10 @@ struct ResourceUsage { } gs; struct { - // Map from built-in output IDs to their export slots (to fragment shader): - std::map vertexBuiltInExportSlots; - std::map primitiveBuiltInExportSlots; - - // Export count for generic outputs (excluding those special outputs to which the built-ins are mapped) - unsigned vertexGenericOutputExportCount = 0; - unsigned primitiveGenericOutputExportCount = 0; + // Map from output locations to their number of components: > (including + // those special outputs to which built-ins are mapped) + std::map> vertexOutputComponents; + std::map> primitiveOutputComponents; } mesh; struct { @@ -590,9 +590,10 @@ struct InterfaceData { // Fragment shader struct { - unsigned viewId; // View ID - unsigned primMask; // Primitive mask - unsigned compositeData; // CompositeData + unsigned viewId; // View ID + unsigned primMask; // Primitive mask + unsigned collisionWaveId; // POPS collision wave ID (pre-GFX11) + unsigned compositeData; // CompositeData // Perspective interpolation (I/J) struct { @@ -609,6 +610,8 @@ struct InterfaceData { unsigned centroid; // Centroid } linearInterp; + unsigned lineStipple; // Line Stipple Tex Ena (f32) + // FragCoord struct { unsigned x; // X channel diff --git a/lgc/interface/lgc/Builder.h b/lgc/interface/lgc/Builder.h index 85ef8a388a..f48bf0a7ad 100644 --- a/lgc/interface/lgc/Builder.h +++ b/lgc/interface/lgc/Builder.h @@ -919,6 +919,9 @@ class Builder : public BuilderDefs { // Get address space of constant memory. static unsigned getAddrSpaceConst(); + // Get address space of global memory. + static unsigned getAddrSpaceGlobal(); + // Get address space of local (thread-global) memory. static unsigned getAddrSpaceLocal(); diff --git a/lgc/interface/lgc/BuilderCommon.h b/lgc/interface/lgc/BuilderCommon.h index 519758946d..47d158b997 100644 --- a/lgc/interface/lgc/BuilderCommon.h +++ b/lgc/interface/lgc/BuilderCommon.h @@ -58,9 +58,7 @@ class BuilderCommon : public llvm_dialects::Builder { llvm::VectorType *getDescTy(ResourceNodeType descType); // Get the type of pointer to descriptor. - // - // @param descType : Descriptor type, one of the ResourceNodeType values - llvm::Type *getDescPtrTy(ResourceNodeType descType); + llvm::Type *getDescPtrTy(); // Get the type of pointer returned by CreateLoadBufferDesc. llvm::PointerType *getBufferDescTy(); @@ -88,6 +86,14 @@ class BuilderCommon : public llvm_dialects::Builder { llvm::CallInst *CreateNamedCall(llvm::StringRef funcName, llvm::Type *retTy, llvm::ArrayRef args, llvm::ArrayRef attribs, const llvm::Twine &instName = ""); + // Create an llvm.assume call to annotate the dereferenceable and alignment attributes of the pointer. We only insert + // the call if dereferenceable > 0 or align > 1. + // + // @param ptr : The pointer to be annotated. + // @param dereferenceable : the dereferenceable size (in bytes) of the pointer + // @param align : the alignment of the pointer. + llvm::CallInst *CreateAssumptionDereferenceableAndAlign(llvm::Value *ptr, unsigned dereferenceable, unsigned align); + // Create code to build a vector out of a number of scalar elements of the same type. llvm::Value *CreateBuildVector(llvm::ArrayRef elements, const llvm::Twine &instName = ""); @@ -98,7 +104,7 @@ class BuilderCommon : public llvm_dialects::Builder { // Create alloca for given input type. // // @param ty : pointer type. - llvm::Value *CreateAllocaAtFuncEntry(llvm::Type *ty); + llvm::Value *CreateAllocaAtFuncEntry(llvm::Type *ty, const llvm::Twine &allocaName = ""); // Create a "debug break". // diff --git a/lgc/interface/lgc/BuiltInDefs.h b/lgc/interface/lgc/BuiltInDefs.h index 73e38a7fdc..b0c6ef281f 100644 --- a/lgc/interface/lgc/BuiltInDefs.h +++ b/lgc/interface/lgc/BuiltInDefs.h @@ -133,4 +133,7 @@ BUILTIN(GsWaveId, BuiltInInternalBase + 10, N, G, i32) BUILTIN(UnswizzledLocalInvocationId, BuiltInInternalBase + 11, N, C, i32) BUILTIN(UnswizzledLocalInvocationIndex, BuiltInInternalBase + 12, N, C, i32) -BUILTIN(EdgeFlag, BuiltInInternalBase + 18, V, V, i32) // EdgeFlag output +BUILTIN(EdgeFlag, BuiltInInternalBase + 18, V, V, i32) // EdgeFlag output +BUILTIN(PrimType, BuiltInInternalBase + 20, N, P, i32) // Prim Type input +BUILTIN(PrimCoord, BuiltInInternalBase + 21, N, P, v2f32) // Prim Coord input +BUILTIN(LineStipple, BuiltInInternalBase + 22, N, P, f32) // Line Stipple input diff --git a/lgc/interface/lgc/CommonDefs.h b/lgc/interface/lgc/CommonDefs.h index 39af5ff976..4ae27eeabb 100644 --- a/lgc/interface/lgc/CommonDefs.h +++ b/lgc/interface/lgc/CommonDefs.h @@ -99,17 +99,17 @@ class ShaderStageMask { constexpr explicit ShaderStageMask(ShaderStageEnum stage) { assert(static_cast(stage) < 32 && "ShaderStage mask overflowed"); m_value = 1U << static_cast(stage); - }; + } constexpr explicit ShaderStageMask(std::initializer_list stages) { for (auto stage : stages) *this |= ShaderStageMask(stage); - }; + } template constexpr explicit ShaderStageMask(const std::array &stages) { for (auto stage : stages) *this |= ShaderStageMask(stage); - }; + } constexpr static ShaderStageMask fromRaw(uint32_t mask) { ShaderStageMask result; diff --git a/lgc/interface/lgc/LgcDialect.td b/lgc/interface/lgc/LgcDialect.td index 2f80b43bf9..8f904b2be7 100644 --- a/lgc/interface/lgc/LgcDialect.td +++ b/lgc/interface/lgc/LgcDialect.td @@ -47,7 +47,7 @@ class LgcOp traits_ = []> : Op; def BufferAddrToPtrOp : LgcOp<"buffer.addr.to.ptr", [Memory<[]>, WillReturn]> { - let arguments = (ins I64:$addr); + let arguments = (ins I64:$address); let results = (outs BufferPointer:$result); let summary = "convert a buffer address into a buffer fat pointer"; @@ -162,6 +162,26 @@ def LoadStridedBufferDescOp : LgcOp<"load.strided.buffer.desc", [Memory<[]>, Wil }]; } +def BufferLoadDescToPtrOp : LgcOp<"buffer.load.desc.to.ptr", [Memory<[]>, WillReturn]> { + let arguments = (ins ConstantPointer:$descPtr, AttrI1:$forceRawView, AttrI1:$isCompact); + let results = (outs BufferPointer:$result); + + let summary = "convert a constant buffer pointer to a fat buffer pointer and implicitly do the load"; + let description = [{ + Given a constant buffer pointer, load from the pointer and returns a fat buffer pointer to the start of the buffer. + }]; +} + +def StridedBufferLoadDescToPtrOp : LgcOp<"stride.buffer.load.desc.to.ptr", [Memory<[]>, WillReturn]> { + let arguments = (ins ConstantPointer:$descPtr, AttrI1:$forceRawView, AttrI1:$isCompact, I32:$stride); + let results = (outs BufferStridedPointer:$result); + + let summary = "convert a constant buffer pointer to a fat strided buffer pointer and implicitly do the load"; + let description = [{ + Given a constant buffer pointer, load from the buffer pointer and returns a fat strided buffer descriptor pointer to the start of the buffer. + }]; +} + def DebugPrintfOp : LgcOp<"debug.printf", [Memory<[(readwrite InaccessibleMem)]>, WillReturn]> { let arguments = (ins ImmutableStringAttr:$format, varargs:$args); let results = (outs); @@ -256,30 +276,19 @@ def GetMeshBuiltinInputOp : LgcOp<"get.mesh.builtin.input", [Memory<[]>, WillRet }]; } -def WriteMeshVertexOutputOp : LgcOp<"write.mesh.vertex.output", [Memory<[]>]> { - let arguments = (ins I32:$output_offset, I32:$vertex_index, value:$output_value); - let results = (outs); - - let summary = "Write mesh shader vertex outputs"; - let description = [{ - In the mesh shader, write mesh shader vertex outputs to LDS. - - `output_offset` is the relative offset of this output (in dwords) within all outputs of the indexed vertex. - `vertex_index` is the vertex index specifying which vertex to write. - `output_value` is the output value to write. - }]; -} - -def WriteMeshPrimitiveOutputOp : LgcOp<"write.mesh.primitive.output", [Memory<[]>]> { - let arguments = (ins I32:$output_offset, I32:$primitive_index, value:$output_value); +def WriteMeshOutputOp : LgcOp<"write.mesh.output", [Memory<[]>]> { + let arguments = (ins AttrI1:$is_primitive, AttrI32:$location, I32:$location_offset, I32:$component_index, I32:$prim_or_vertex_index, value:$output_value); let results = (outs); - let summary = "Write mesh shader primitive outputs"; + let summary = "Write mesh shader primitive/vertex outputs"; let description = [{ - In the mesh shader, write mesh shader primitive outputs to LDS. + In the mesh shader, write mesh shader primitive/vertex outputs to LDS. - `output_offset` is the relative offset of this output (in dwords) within all outputs of the indexed primitive. - `primitive_index` is the primitive index specifying which primitive to write. + `is_primitive` indicates if this write is for a primitive output or for a vertex output. + `location` is the start location of this output. + `location_offset` is the relative location offset of this output, used by arrayed outputs. + `component_index` is the component index of this output when component addressing is involved. + `prim_or_vertex_index` is the primitive/vertex index specifying which primitive/vertex to write. `output_value` is the output value to write. }]; } @@ -370,6 +379,26 @@ def InputImportInterpolatedOp : LgcOp<"input.import.interpolated", [Memory<[]>, }]; } +def PopsBeginInterlockOp : LgcOp<"pops.begin.interlock", [WillReturn]> { + let arguments = (ins); + let results = (outs); + + let summary = "begin a POPS critical section"; + let description = [{ + Only used in PS to begin a critical section of primitive ordered pixel shading (POPS). + }]; +} + +def PopsEndInterlockOp : LgcOp<"pops.end.interlock", [WillReturn]> { + let arguments = (ins); + let results = (outs); + + let summary = "end a POPS critical section"; + let description = [{ + Only used in PS to end a critical section of primitive ordered pixel shading (POPS). + }]; +} + def LoadUserDataOp : LgcOp<"load.user.data", [Memory<[]>, WillReturn]> { let arguments = (ins AttrI32:$offset); let results = (outs value:$result); diff --git a/lgc/interface/lgc/ModuleBunch.h b/lgc/interface/lgc/ModuleBunch.h index a7b80245ca..dbc5c81404 100644 --- a/lgc/interface/lgc/ModuleBunch.h +++ b/lgc/interface/lgc/ModuleBunch.h @@ -28,6 +28,7 @@ #pragma once +#include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" #include "llvm/Passes/PassBuilder.h" @@ -49,22 +50,29 @@ class ModuleBunch { using iterator = llvm::pointee_iterator>::iterator>; // Access the Modules in the ModuleBunch, without erasing/removing/replacing them. - iterator begin() const { return iterator(Modules.begin()); } - iterator end() const { return iterator(Modules.end()); } + iterator begin() const { + assert(isNormalized()); + return iterator(Modules.begin()); + } + iterator end() const { + assert(isNormalized()); + return iterator(Modules.end()); + } size_t size() const { return end() - begin(); } bool empty() const { return size() == 0; } // Access the array of Modules in the ModuleBunch, directly accessing the unique_ptrs // for erasing/removing/replacing them. // After doing that, call renormalize() to remove any holes. - MutableArrayRef> getMutableModules() { - assert(isNormalized()); - return Modules; - } + MutableArrayRef> getMutableModules() { return Modules; } // Add Module to ModuleBunch, taking ownership. Invalidates modules() iterator. void addModule(std::unique_ptr module); + // Remove Module from ModuleBunch, returning ownership to the caller. + // Returns empty unique_ptr if Module not found. + std::unique_ptr removeModule(const Module *moduleToRemove); + // Renormalize ModuleBunch's array of Modules after manipulation by user. // Invalidates modules() iterator. void renormalize(); @@ -103,6 +111,12 @@ inline raw_ostream &operator<<(raw_ostream &O, const ModuleBunch &MB) { return O; } +#if !defined(LLVM_MAIN_REVISION) || LLVM_MAIN_REVISION >= 503109 +template <> inline void printIRUnitNameForStackTrace(raw_ostream &OS, const ModuleBunch &IR) { + OS << "Anonymous ModuleBunch \""; +} +#endif + extern template class PassManager; extern template class AnalysisManager; extern template class AllAnalysesOn; diff --git a/lgc/interface/lgc/Pipeline.h b/lgc/interface/lgc/Pipeline.h index 7bdb636f6c..fe382cf6f8 100644 --- a/lgc/interface/lgc/Pipeline.h +++ b/lgc/interface/lgc/Pipeline.h @@ -196,6 +196,8 @@ union Options { bool forceFragColorDummyExport; // Force dummy export is added to fragment shader color export. unsigned reserved22; bool dynamicTopology; // Whether primitive topology is dynamic. + bool reserved23; + bool forceUserDataSpill; // Whether to force all user data to be spilled (Currently only for RT). }; }; static_assert(sizeof(Options) == sizeof(Options::u32All)); @@ -312,6 +314,8 @@ union ShaderOptions { InvariantLoadsOption aggressiveInvariantLoads; bool reserved; + /// Let dmask bits be fully enabled when call 'image.sample.c', for depth compare mode swizzling workaround. + bool imageSampleDrefReturnsRgba; }; }; static_assert(sizeof(ShaderOptions) == sizeof(ShaderOptions::u32All)); @@ -675,6 +679,7 @@ struct FragmentShaderMode { unsigned postDepthCoverage; unsigned earlyAndLatFragmentTests; unsigned innerCoverage; + unsigned enablePops; unsigned waveOpsExcludeHelperLanes; unsigned noReciprocalFragCoordW; ConservativeDepth conservativeDepth; diff --git a/lgc/patch/FragColorExport.cpp b/lgc/patch/FragColorExport.cpp index b7dd372a75..c8459c1514 100644 --- a/lgc/patch/FragColorExport.cpp +++ b/lgc/patch/FragColorExport.cpp @@ -41,6 +41,7 @@ #include "lgc/util/AddressExtender.h" #include "lgc/util/BuilderBase.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicsAMDGPU.h" @@ -271,7 +272,7 @@ Value *FragColorExport::handleColorExportInstructions(Value *output, unsigned hw builder.getTrue() // vm }; - exportCall = builder.CreateNamedCall("llvm.amdgcn.exp.compr.v2f16", builder.getVoidTy(), args, {}); + exportCall = builder.CreateIntrinsic(builder.getVoidTy(), Intrinsic::amdgcn_exp_compr, args); } else { Value *args[] = { builder.getInt32(EXP_TARGET_MRT_0 + hwColorExport), // tgt @@ -284,7 +285,7 @@ Value *FragColorExport::handleColorExportInstructions(Value *output, unsigned hw builder.getTrue() // vm }; - exportCall = builder.CreateNamedCall("llvm.amdgcn.exp.f32", builder.getVoidTy(), args, {}); + exportCall = builder.CreateIntrinsic(builder.getVoidTy(), Intrinsic::amdgcn_exp, args); } return exportCall; @@ -446,6 +447,21 @@ PreservedAnalyses LowerFragColorExport::run(Module &module, ModuleAnalysisManage BuilderBase builder(module.getContext()); builder.SetInsertPoint(retInst); + for (auto &inst : instructions(fragEntryPoint)) { + unsigned addrSpace = 0; + if (auto store = dyn_cast(&inst)) + addrSpace = store->getPointerAddressSpace(); + else if (auto atomicRmw = dyn_cast(&inst)) + addrSpace = atomicRmw->getPointerAddressSpace(); + else if (auto atomicXchg = dyn_cast(&inst)) + addrSpace = atomicXchg->getPointerAddressSpace(); + + if (addrSpace == ADDR_SPACE_GLOBAL) { + m_resUsage->resourceWrite = true; + break; + } + } + collectExportInfoForBuiltinOutput(fragEntryPoint, builder); collectExportInfoForGenericOutputs(fragEntryPoint, builder); @@ -458,9 +474,11 @@ PreservedAnalyses LowerFragColorExport::run(Module &module, ModuleAnalysisManage // Just according to the dualSourceBlendEnable flag. Value *dynamicIsDualSource = builder.getInt32(0); if (m_pipelineState->getTargetInfo().getGfxIpVersion().major >= 11) { - dynamicIsDualSource = ShaderInputs::getSpecialUserData(UserDataMapping::CompositeData, builder); - dynamicIsDualSource = builder.CreateIntrinsic(Intrinsic::amdgcn_ubfe, builder.getInt32Ty(), - {dynamicIsDualSource, builder.getInt32(7), builder.getInt32(1)}); + if (m_pipelineState->isUnlinked() || m_pipelineState->getColorExportState().dualSourceBlendDynamicEnable) { + dynamicIsDualSource = ShaderInputs::getSpecialUserData(UserDataMapping::CompositeData, builder); + dynamicIsDualSource = builder.CreateIntrinsic(Intrinsic::amdgcn_ubfe, builder.getInt32Ty(), + {dynamicIsDualSource, builder.getInt32(7), builder.getInt32(1)}); + } } bool willGenerateColorExportShader = m_pipelineState->isUnlinked() && !m_pipelineState->hasColorExportFormats(); @@ -470,7 +488,8 @@ PreservedAnalyses LowerFragColorExport::run(Module &module, ModuleAnalysisManage } FragColorExport fragColorExport(m_pipelineState->getLgcContext()); - bool dummyExport = m_resUsage->builtInUsage.fs.discard || m_pipelineState->getOptions().forceFragColorDummyExport; + bool dummyExport = m_resUsage->builtInUsage.fs.discard || m_pipelineState->getOptions().forceFragColorDummyExport || + m_pipelineState->getShaderModes()->getFragmentShaderMode().enablePops; FragColorExport::Key key = FragColorExport::computeKey(m_info, m_pipelineState); fragColorExport.generateExportInstructions(m_info, m_exportValues, dummyExport, m_pipelineState->getPalMetadata(), builder, dynamicIsDualSource, key); @@ -840,7 +859,7 @@ Value *FragColorExport::dualSourceSwizzle(unsigned waveSize, BuilderBase &builde builder.getFalse(), // done builder.getTrue() // vm }; - builder.CreateNamedCall("llvm.amdgcn.exp.f32", builder.getVoidTy(), args0, {}); + builder.CreateIntrinsic(builder.getVoidTy(), Intrinsic::amdgcn_exp, args0); Value *args1[] = { builder.getInt32(EXP_TARGET_DUAL_SRC_1), // tgt @@ -852,7 +871,7 @@ Value *FragColorExport::dualSourceSwizzle(unsigned waveSize, BuilderBase &builde builder.getFalse(), // done builder.getTrue() // vm }; - return builder.CreateNamedCall("llvm.amdgcn.exp.f32", builder.getVoidTy(), args1, {}); + return builder.CreateIntrinsic(builder.getVoidTy(), Intrinsic::amdgcn_exp, args1); } // ===================================================================================================================== diff --git a/lgc/patch/LowerDebugPrintf.cpp b/lgc/patch/LowerDebugPrintf.cpp index 3ccc0d5862..ad8ab7bdae 100644 --- a/lgc/patch/LowerDebugPrintf.cpp +++ b/lgc/patch/LowerDebugPrintf.cpp @@ -88,10 +88,9 @@ PreservedAnalyses LowerDebugPrintf::run(Module &module, ModuleAnalysisManager &a for (auto func : printfFuncs) { // Create printbuffer Descriptor at the beginning of the function which contains DebugPrintf dialect ops builder.SetInsertPointPastAllocas(func); - m_debugPrintfBuffer = hasPrintfDesc - ? m_debugPrintfBuffer = builder.create(builder.CreateBufferDesc( - InternalDescriptorSetId, PrintfBufferBindingId, builder.getInt32(0), 2)) - : nullptr; + m_debugPrintfBuffer = hasPrintfDesc ? builder.CreateBufferDesc(InternalDescriptorSetId, PrintfBufferBindingId, + builder.getInt32(0), 2, true) + : nullptr; lowerDebugfPrintOpVisitor.visit(*this, *func); } diff --git a/lgc/patch/LowerDesc.cpp b/lgc/patch/LowerDesc.cpp index 198964c6d8..dbb7a17269 100644 --- a/lgc/patch/LowerDesc.cpp +++ b/lgc/patch/LowerDesc.cpp @@ -93,12 +93,12 @@ void LowerDesc::visitLoadBufferDesc(LoadBufferDescOp &op) { // available in LGC as we don't expect front-end would required such usage.) assert(!(flags & Builder::BufferFlagAddress) && "Returning a 64-bit address is unsupported by lgc.load.buffer.desc"); - Value *desc = builder.CreateBufferDesc(op.getDescSet(), op.getBinding(), op.getDescIndex(), flags); + Value *desc = builder.CreateBufferDesc(op.getDescSet(), op.getBinding(), op.getDescIndex(), flags, true); m_toErase.push_back(&op); // Convert to fat pointer. - op.replaceAllUsesWith(builder.create(desc)); + op.replaceAllUsesWith(desc); } // ===================================================================================================================== @@ -121,6 +121,6 @@ void LowerDesc::visitLoadStridedBufferDesc(LoadStridedBufferDescOp &op) { m_toErase.push_back(&op); - op.replaceAllUsesWith(builder.create(desc)); + op.replaceAllUsesWith(desc); } } // namespace lgc diff --git a/lgc/patch/LowerGpuRt.cpp b/lgc/patch/LowerGpuRt.cpp index 916101b8e9..7366a351b5 100644 --- a/lgc/patch/LowerGpuRt.cpp +++ b/lgc/patch/LowerGpuRt.cpp @@ -72,6 +72,7 @@ PreservedAnalyses LowerGpuRt::run(Module &module, ModuleAnalysisManager &analysi .add(&LowerGpuRt::visitLdsStackInit) .add(&LowerGpuRt::visitLdsStackStore) .add(&LowerGpuRt::visitGetBoxSortHeuristicMode) + .add(&LowerGpuRt::visitGetRayQueryDispatchId) .add(&LowerGpuRt::visitGetStaticFlags) .add(&LowerGpuRt::visitGetTriangleCompressionMode) .add(&LowerGpuRt::visitGetFlattenedGroupThreadId) @@ -79,6 +80,8 @@ PreservedAnalyses LowerGpuRt::run(Module &module, ModuleAnalysisManager &analysi .add(&LowerGpuRt::visitGpurtDispatchThreadIdFlatOp) .add(&LowerGpuRt::visitContinuationStackIsGlobalOp) .add(&LowerGpuRt::visitWaveScanOp) + .add(&LowerGpuRt::visitGetKnownSetRayFlagsOp) + .add(&LowerGpuRt::visitGetKnownUnsetRayFlagsOp) .build(); visitor.visit(*this, module); @@ -318,7 +321,7 @@ void LowerGpuRt::visitFloatWithRoundMode(lgc::GpurtFloatWithRoundModeOp &inst) { enum OperationType : uint32_t { Add = 0, Sub, Mul }; auto func = inst.getCalledFunction(); - auto retType = cast(func->getReturnType()); + auto retType = func->getReturnType(); Value *src0 = inst.getSrc0(); Value *src1 = inst.getSrc1(); uint32_t rm = cast(inst.getRoundMode())->getZExtValue(); @@ -421,6 +424,27 @@ void LowerGpuRt::visitGetBoxSortHeuristicMode(GpurtGetBoxSortHeuristicModeOp &in m_funcsToLower.insert(inst.getCalledFunction()); } +// ===================================================================================================================== +// Visit "GpurtGetRayQueryDispatchIdOp" instruction +// +// @param inst : The dialect instruction to process +void LowerGpuRt::visitGetRayQueryDispatchId(GpurtGetRayQueryDispatchIdOp &inst) { + m_builder->SetInsertPoint(&inst); + auto stage = getShaderStage(m_builder->GetInsertBlock()->getParent()); + // Local thread ID for graphics shader Stage, global thread ID for compute/raytracing shader stage + Value *dispatchId = nullptr; + if (stage != ShaderStage::Compute) { + auto subThreadId = m_builder->CreateReadBuiltInInput(lgc::BuiltInSubgroupLocalInvocationId); + Value *zero = m_builder->getInt32(0); + dispatchId = m_builder->CreateBuildVector({subThreadId, zero, zero}); + } else { + dispatchId = m_builder->CreateReadBuiltInInput(lgc::BuiltInGlobalInvocationId); + } + dispatchId->takeName(&inst); + inst.replaceAllUsesWith(dispatchId); + inst.eraseFromParent(); +} + // ===================================================================================================================== // Visit "GpurtGetStaticFlagsOp" instruction // @@ -499,4 +523,28 @@ void LowerGpuRt::visitContinuationStackIsGlobalOp(GpurtContinuationStackIsGlobal m_funcsToLower.insert(inst.getCalledFunction()); } +// ===================================================================================================================== +// Visit "GpurtGetKnownSetRayFlagsOp" instruction +// +// @param inst : The dialect instruction to process +void LowerGpuRt::visitGetKnownSetRayFlagsOp(lgc::GpurtGetKnownSetRayFlagsOp &inst) { + m_builder->SetInsertPoint(&inst); + auto flags = lgc::gpurt::getKnownSetRayFlags(*inst.getModule()); + inst.replaceAllUsesWith(m_builder->getInt32(flags)); + m_callsToLower.push_back(&inst); + m_funcsToLower.insert(inst.getCalledFunction()); +} + +// ===================================================================================================================== +// Visit "GpurtGetKnownUnsetRayFlagsOp" instruction +// +// @param inst : The dialect instruction to process +void LowerGpuRt::visitGetKnownUnsetRayFlagsOp(lgc::GpurtGetKnownUnsetRayFlagsOp &inst) { + m_builder->SetInsertPoint(&inst); + auto flags = lgc::gpurt::getKnownUnsetRayFlags(*inst.getModule()); + inst.replaceAllUsesWith(m_builder->getInt32(flags)); + m_callsToLower.push_back(&inst); + m_funcsToLower.insert(inst.getCalledFunction()); +} + } // namespace lgc diff --git a/lgc/patch/LowerRayQueryWrapper.cpp b/lgc/patch/LowerRayQueryWrapper.cpp new file mode 100644 index 0000000000..5f3f20bc00 --- /dev/null +++ b/lgc/patch/LowerRayQueryWrapper.cpp @@ -0,0 +1,42 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + +// LowerRayQueryWrapper.cpp : LGC pass that is a wrapper around LowerRayQuery, which lowers rayQuery ops by +// inlining GPURT functions. + +#include "LowerRayQueryWrapper.h" +#include "lgc/state/PipelineState.h" + +using namespace lgc; +using namespace lgc::rt; +using namespace llvm; + +// ===================================================================================================================== +// Run the pass. +PreservedAnalyses LowerRayQueryWrapper::run(Module &module, ModuleAnalysisManager &analysisManager) { + PipelineState *pipelineState = analysisManager.getResult(module).getPipelineState(); + m_staticFlags = pipelineState->getOptions().rtStaticPipelineFlags; + return LowerRayQuery::run(module, analysisManager); +} diff --git a/lgc/patch/LowerRayQueryWrapper.h b/lgc/patch/LowerRayQueryWrapper.h new file mode 100644 index 0000000000..b3e9e639f1 --- /dev/null +++ b/lgc/patch/LowerRayQueryWrapper.h @@ -0,0 +1,37 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + +// LowerRayQueryWrapper.h : LGC pass that is a wrapper around LowerRayQuery, which lowers rayQuery ops by +// inlining GPURT functions. + +#pragma once + +#include "llvmraytracing/LowerRayQuery.h" + +class LowerRayQueryWrapper : public lgc::rt::LowerRayQuery { +public: + // Run the pass. + llvm::PreservedAnalyses run(llvm::Module &module, llvm::ModuleAnalysisManager &analysisManager); +}; diff --git a/lgc/patch/MeshTaskShader.cpp b/lgc/patch/MeshTaskShader.cpp index 6e47c76187..a693be432e 100644 --- a/lgc/patch/MeshTaskShader.cpp +++ b/lgc/patch/MeshTaskShader.cpp @@ -69,16 +69,14 @@ MeshTaskShader::~MeshTaskShader() { // @param pipelineState : Pipeline state // @param entryPoint : Entry-point of mesh shader // @param ldsLayout : Mesh shader LDS layout (could be null) +// @param outputsLayout : Mesh shader outputs layout (could be null) unsigned MeshTaskShader::layoutMeshShaderLds(PipelineState *pipelineState, Function *entryPoint, - MeshLdsLayout *ldsLayout) { + MeshLdsLayout *ldsLayout, MeshOutputsLayout *outputsLayout) { if (!pipelineState->hasShaderStage(ShaderStage::Mesh)) return 0; // Mesh shader absent (standalone compiler tries to compile a single task shader) - assert(getShaderStage(entryPoint) == ShaderStage::Mesh); // Must be mesh shader - - auto gfxIp = pipelineState->getTargetInfo().getGfxIpVersion(); - assert(gfxIp >= GfxIpVersion({10, 3})); // Must be GFX10.3+ - (void(gfxIp)); // Unused + assert(getShaderStage(entryPoint) == ShaderStage::Mesh); // Must be mesh shader + assert(pipelineState->getTargetInfo().getGfxIpVersion() >= GfxIpVersion({10, 3})); // Must be GFX10.3+ // // The LDS layout of mesh shader is something as follow (consists of two main parts): @@ -104,6 +102,7 @@ unsigned MeshTaskShader::layoutMeshShaderLds(PipelineState *pipelineState, Funct assert(meshMode.outputPrimitives <= NggMaxThreadsPerSubgroup); const auto resUsage = pipelineState->getShaderResourceUsage(ShaderStage::Mesh); + const auto nextStage = pipelineState->getNextShaderStage(ShaderStage::Mesh); unsigned meshLdsSizeInDwords = 0; unsigned ldsOffsetInDwords = 0; @@ -116,6 +115,24 @@ unsigned MeshTaskShader::layoutMeshShaderLds(PipelineState *pipelineState, Funct LLPC_OUTS("\n"); }; + auto printOutputLayoutInfo = [=](unsigned location, unsigned numComponents, unsigned relativeOffset, + unsigned exportSlot, BuiltInKind forBuiltIn) { + if (numComponents > 4) { + LLPC_OUTS(format("-- location = %u-%u, components = %u, offset = %u", location, location + 1, numComponents, + relativeOffset)); + } else { + LLPC_OUTS(format("-- location = %u, components = %u, offset = %u", location, numComponents, relativeOffset)); + } + + if (exportSlot != InvalidValue) + LLPC_OUTS(format(", export = %u", exportSlot)); + + if (forBuiltIn != InvalidValue) + LLPC_OUTS(" (builtin = " << PipelineState::getBuiltInName(forBuiltIn) << ")"); + + LLPC_OUTS("\n"); + }; + if (ldsLayout) { LLPC_OUTS("===============================================================================\n"); LLPC_OUTS("// LLPC mesh shader LDS region info (in dwords) and general info\n\n"); @@ -169,21 +186,212 @@ unsigned MeshTaskShader::layoutMeshShaderLds(PipelineState *pipelineState, Funct meshLdsSizeInDwords += ldsRegionSize; // Per-vertex outputs - const unsigned vertexStride = 4 * resUsage->inOutUsage.outputMapLocCount; // Corresponds to vec4 output + auto &vertexOutputComponents = resUsage->inOutUsage.mesh.vertexOutputComponents; + unsigned vertexStride = 0; + for (auto &vertexOutput : vertexOutputComponents) { + const auto numComponents = vertexOutput.second.first; + vertexStride += numComponents; // Calculate total number of components of vertex outputs + } + ldsRegionSize = vertexStride * meshMode.outputVertices; if (ldsLayout) { printLdsRegionInfo("Per-vertex Output", ldsOffsetInDwords, ldsRegionSize); (*ldsLayout)[MeshLdsRegion::VertexOutput] = std::make_pair(ldsOffsetInDwords, ldsRegionSize); + + assert(outputsLayout); + outputsLayout->vertexStride = vertexStride; + + unsigned offsetInVertex = 0; + unsigned exportSlot = 0; + unsigned exportCount = 0; + + for (auto &vertexOutput : vertexOutputComponents) { + const auto location = vertexOutput.first; + const auto &[numComponents, forBuiltIn] = vertexOutput.second; + + outputsLayout->offsetsInVertex[location] = offsetInVertex; // Map output locations to relative offsets in vertex + offsetInVertex += numComponents; + + if (forBuiltIn == InvalidValue) { + // Only consider vertex generic outputs, vertex built-ins will be handled later on + if (nextStage == ShaderStage::Fragment) { + // Input/output matching must have been done in resource collecting pass, just use the location as export slot + outputsLayout->vertexGenericExports[location] = location; + exportCount = std::max(exportCount, location + 1); + if (numComponents > 4) { + outputsLayout->vertexGenericExports[location + 1] = location + 1; + exportCount = std::max(exportCount, location + 2); + } + } else { + // If next stage is missing, we try to assign continuous export slots + assert(!nextStage); + + outputsLayout->vertexGenericExports[location] = exportSlot++; + ++exportCount; + if (numComponents > 4) { + outputsLayout->vertexGenericExports[location + 1] = exportSlot++; + ++exportCount; + } + } + } + } + + // Consider those special outputs mapped from vertex built-ins + if (nextStage == ShaderStage::Fragment) { + const auto fsResUsage = pipelineState->getShaderResourceUsage(ShaderStage::Fragment); + const auto &fsBuiltInUsage = fsResUsage->builtInUsage.fs; + auto &fsInOutUsage = fsResUsage->inOutUsage; + + if (fsBuiltInUsage.clipDistance > 0 || fsBuiltInUsage.cullDistance > 0) { + if (fsBuiltInUsage.clipDistance > 0) { + assert(fsInOutUsage.builtInInputLocMap.count(BuiltInClipDistance) > 0); + exportSlot = fsInOutUsage.builtInInputLocMap[BuiltInClipDistance]; + outputsLayout->vertexBuiltInExports[BuiltInClipDistance] = exportSlot; + } + + if (fsBuiltInUsage.cullDistance > 0) { + assert(fsInOutUsage.builtInInputLocMap.count(BuiltInCullDistance) > 0); + exportSlot = fsInOutUsage.builtInInputLocMap[BuiltInCullDistance]; + outputsLayout->vertexBuiltInExports[BuiltInCullDistance] = exportSlot; + } + + unsigned startSlot = InvalidValue; + if (fsBuiltInUsage.clipDistance > 0) { + startSlot = outputsLayout->vertexBuiltInExports[BuiltInClipDistance]; + } else { + assert(fsBuiltInUsage.cullDistance > 0); + startSlot = outputsLayout->vertexBuiltInExports[BuiltInCullDistance]; + } + exportCount = std::max(exportCount, + startSlot + ((fsBuiltInUsage.clipDistance + fsBuiltInUsage.cullDistance > 4) ? 2 : 1)); + } + } else { + assert(!nextStage); + + const auto &builtInUsage = resUsage->builtInUsage.mesh; + if (builtInUsage.clipDistance > 0 || builtInUsage.cullDistance > 0) { + unsigned startSlot = exportSlot; + + if (builtInUsage.clipDistance > 0) + outputsLayout->vertexBuiltInExports[BuiltInClipDistance] = startSlot; + + if (builtInUsage.cullDistance > 0) { + if (builtInUsage.clipDistance >= 4) + ++startSlot; + outputsLayout->vertexBuiltInExports[BuiltInCullDistance] = startSlot; + } + + exportSlot += (builtInUsage.clipDistance + builtInUsage.cullDistance > 4) ? 2 : 1; + exportCount += (builtInUsage.clipDistance + builtInUsage.cullDistance > 4) ? 2 : 1; + } + } + outputsLayout->vertexExportCount = exportCount; + ldsOffsetInDwords += ldsRegionSize; } meshLdsSizeInDwords += ldsRegionSize; // Per-primitive outputs - const unsigned primitiveStride = 4 * resUsage->inOutUsage.perPrimitiveOutputMapLocCount; // Corresponds to vec4 output + auto &primitiveOutputComponents = resUsage->inOutUsage.mesh.primitiveOutputComponents; + unsigned primitiveStride = 0; + for (auto &primitiveOutput : primitiveOutputComponents) { + const auto numComponents = primitiveOutput.second.first; + primitiveStride += numComponents; // Calculate total number of components of primitive outputs + } + ldsRegionSize = primitiveStride * meshMode.outputPrimitives; if (ldsLayout) { printLdsRegionInfo("Per-primitive Output", ldsOffsetInDwords, ldsRegionSize); (*ldsLayout)[MeshLdsRegion::PrimitiveOutput] = std::make_pair(ldsOffsetInDwords, ldsRegionSize); + + assert(outputsLayout); + outputsLayout->primitiveStride = primitiveStride; + + unsigned offsetInPrimitive = 0; + const unsigned startSlot = outputsLayout->vertexExportCount; + unsigned exportSlot = startSlot; + unsigned exportCount = 0; + + for (auto &primitiveOutput : primitiveOutputComponents) { + const auto location = primitiveOutput.first; + const auto &[numComponents, forBuiltIn] = primitiveOutput.second; + + outputsLayout->offsetsInPrimitive[location] = + offsetInPrimitive; // Map output locations to relative offsets in primitive + offsetInPrimitive += numComponents; + + if (forBuiltIn == InvalidValue) { + // Only consider primitive generic outputs, primitive built-ins will be handled later on + if (nextStage == ShaderStage::Fragment) { + // Input/output matching must have been done in resource collecting pass, just use the location as export slot + outputsLayout->primitiveGenericExports[location] = startSlot + location; + exportCount = std::max(exportCount, location + 1); + if (numComponents > 4) { + outputsLayout->primitiveGenericExports[location + 1] = startSlot + location + 1; + exportCount = std::max(exportCount, location + 2); + } + } else { + // If next stage is missing, we try to assign continuous export slots + assert(!nextStage); + + outputsLayout->primitiveGenericExports[location] = exportSlot++; + ++exportCount; + if (numComponents > 4) { + outputsLayout->primitiveGenericExports[location + 1] = exportSlot++; + ++exportCount; + } + } + } + } + + // Consider those special outputs mapped from primitive built-ins + if (nextStage == ShaderStage::Fragment) { + // Built-in matching must have been done in resource collecting pass, just use the location as export slot + const auto fsResUsage = pipelineState->getShaderResourceUsage(ShaderStage::Fragment); + const auto &fsBuiltInUsage = fsResUsage->builtInUsage.fs; + auto &fsInOutUsage = fsResUsage->inOutUsage; + + if (fsBuiltInUsage.primitiveId) { + assert(fsInOutUsage.perPrimitiveBuiltInInputLocMap.count(BuiltInPrimitiveId) > 0); + const unsigned location = fsInOutUsage.perPrimitiveBuiltInInputLocMap[BuiltInPrimitiveId]; + outputsLayout->primitiveBuiltInExports[BuiltInPrimitiveId] = startSlot + location; + exportCount = std::max(exportCount, location + 1); + } + + if (fsBuiltInUsage.layer) { + assert(fsInOutUsage.perPrimitiveBuiltInInputLocMap.count(BuiltInLayer) > 0); + const unsigned location = fsInOutUsage.perPrimitiveBuiltInInputLocMap[BuiltInLayer]; + outputsLayout->primitiveBuiltInExports[BuiltInLayer] = startSlot + location; + exportCount = std::max(exportCount, location + 1); + } + + if (fsBuiltInUsage.viewportIndex) { + assert(fsInOutUsage.perPrimitiveBuiltInInputLocMap.count(BuiltInViewportIndex) > 0); + const unsigned location = fsInOutUsage.perPrimitiveBuiltInInputLocMap[BuiltInViewportIndex]; + outputsLayout->primitiveBuiltInExports[BuiltInViewportIndex] = startSlot + location; + exportCount = std::max(exportCount, location + 1); + } + } else { + assert(!nextStage); + + const auto &builtInUsage = resUsage->builtInUsage.mesh; + if (builtInUsage.primitiveId) { + outputsLayout->primitiveBuiltInExports[BuiltInPrimitiveId] = exportSlot++; + ++exportCount; + } + + if (builtInUsage.layer) { + outputsLayout->primitiveBuiltInExports[BuiltInLayer] = exportSlot++; + ++exportCount; + } + + if (builtInUsage.viewportIndex) { + outputsLayout->primitiveBuiltInExports[BuiltInViewportIndex] = exportSlot++; + ++exportCount; + } + } + outputsLayout->primitiveExportCount = exportCount; + ldsOffsetInDwords += ldsRegionSize; } meshLdsSizeInDwords += ldsRegionSize; @@ -234,6 +442,41 @@ unsigned MeshTaskShader::layoutMeshShaderLds(PipelineState *pipelineState, Funct printLdsRegionInfo("Internal Mesh LDS", 0, meshLdsSizeInDwords); printLdsRegionInfo("Shared Variable LDS", 0, sharedVarLdsSizeInDwords); printLdsRegionInfo("Total LDS", 0, meshLdsSizeInDwords + sharedVarLdsSizeInDwords); + + if (!outputsLayout->offsetsInVertex.empty()) { + LLPC_OUTS("\nVertex Outputs Layout (stride = " << outputsLayout->vertexStride + << ", exports = " << outputsLayout->vertexExportCount << "):\n"); + for (auto &vertexOutput : outputsLayout->offsetsInVertex) { + const auto &[location, offsetInVertex] = vertexOutput; + const auto &[numComponents, forBuiltIn] = vertexOutputComponents[location]; + unsigned exportSlot = InvalidValue; + if (forBuiltIn != InvalidValue) { + if (outputsLayout->vertexBuiltInExports.count(forBuiltIn) > 0) + exportSlot = outputsLayout->vertexBuiltInExports[forBuiltIn]; + } else { + exportSlot = outputsLayout->vertexGenericExports[location]; + } + printOutputLayoutInfo(location, numComponents, offsetInVertex, exportSlot, forBuiltIn); + } + } + + if (!outputsLayout->offsetsInPrimitive.empty()) { + LLPC_OUTS("\nPrimitive outputs layout (stride = " << outputsLayout->primitiveStride << ", exports = " + << outputsLayout->primitiveExportCount << "):\n"); + for (auto &primitiveOutput : outputsLayout->offsetsInPrimitive) { + const auto &[location, offsetInPrimitive] = primitiveOutput; + const auto &[numComponents, forBuiltIn] = primitiveOutputComponents[location]; + unsigned exportSlot = InvalidValue; + if (forBuiltIn != InvalidValue) { + if (outputsLayout->primitiveBuiltInExports.count(forBuiltIn) > 0) + exportSlot = outputsLayout->primitiveBuiltInExports[forBuiltIn]; + } else { + exportSlot = outputsLayout->primitiveGenericExports[location]; + } + printOutputLayoutInfo(location, numComponents, offsetInPrimitive, exportSlot, forBuiltIn); + } + } + LLPC_OUTS("\n"); LLPC_OUTS("Workgroup Size (X, Y, Z) = (" << meshMode.workgroupSizeX << ", " << meshMode.workgroupSizeY << ", " << meshMode.workgroupSizeZ << ")\n"); @@ -258,7 +501,7 @@ unsigned MeshTaskShader::layoutMeshShaderLds(PipelineState *pipelineState, Funct LLPC_OUTS("Max Vertices = " << meshMode.outputVertices << ", Max Primitives = " << meshMode.outputPrimitives << "\n"); if (!meshSharedVars.empty()) { - LLPC_OUTS("Shared variables:\n"); + LLPC_OUTS("Shared Variables:\n"); for (auto meshSharedVar : meshSharedVars) { assert(meshSharedVar->getAlignment() == 4); // Must be 1 dword const auto sizeInBytes = @@ -266,8 +509,8 @@ unsigned MeshTaskShader::layoutMeshShaderLds(PipelineState *pipelineState, Funct assert(sizeInBytes % 4 == 0); // Must be multiple of 4 const auto sizeInDwords = sizeInBytes / 4; - LLPC_OUTS("Name = " << meshSharedVar->getName() << ", Type = " << getTypeName(meshSharedVar->getValueType()) - << ", Size (in dwords) = " << sizeInDwords << "\n"); + LLPC_OUTS("-- name = " << meshSharedVar->getName() << ", type = " << getTypeName(meshSharedVar->getValueType()) + << ", size (in dwords) = " << sizeInDwords << "\n"); } } LLPC_OUTS("\n"); @@ -403,7 +646,7 @@ void MeshTaskShader::processMeshShader(Function *entryPoint) { // - SetMeshPrimitiveCulled -> Write null primitive flag to LDS // - GetMeshBuiltinInput -> Lower mesh built-in input // - TaskPayloadPtr -> Transform task payload descriptor - // - WriteMeshVertexOutput/WriteMeshPrimitiveOutput -> Write output data to LDS + // - WriteMeshOutput -> Write output data to LDS // } // // Barrier (if needBarrierFlag) @@ -463,7 +706,7 @@ void MeshTaskShader::processMeshShader(Function *entryPoint) { const unsigned waveSize = m_pipelineState->getShaderWaveSize(ShaderStage::Mesh); // Setup LDS layout - layoutMeshShaderLds(m_pipelineState, entryPoint, &m_ldsLayout); + layoutMeshShaderLds(m_pipelineState, entryPoint, &m_ldsLayout, &m_outputsLayout); m_lds = getOrCreateMeshLds(entryPoint->getParent()); // Mutate mesh shader entry-point @@ -921,6 +1164,9 @@ void MeshTaskShader::processMeshShader(Function *entryPoint) { m_builder.CreateRetVoid(); } + + // Mesh shader processing is done. We can safely update its input/output usage with final results. + updateMeshShaderInOutUsage(); } // ===================================================================================================================== @@ -998,7 +1244,8 @@ void MeshTaskShader::lowerEmitMeshTasks(EmitMeshTasksOp &emitMeshTasksOp) { auto emitMeshTasksBlock = checkEmitMeshTasksBlock->splitBasicBlock(emitMeshTasksCall, ".emitMeshTasks"); auto endEmitMeshTasksBlock = emitMeshTasksBlock->splitBasicBlock(emitMeshTasksCall, ".endEmitMeshTasks"); - SyncScope::ID agentScope = m_builder.getContext().getOrInsertSyncScopeID("agent"); + SyncScope::ID agentScope = m_builder.getContext().getOrInsertSyncScopeID("agent"); // Device level + // Modify ".checkEmitMeshTasks" block { m_builder.SetInsertPoint(checkEmitMeshTasksBlock->getTerminator()); @@ -1067,9 +1314,11 @@ void MeshTaskShader::lowerEmitMeshTasks(EmitMeshTasksOp &emitMeshTasksOp) { groupCount = m_builder.CreateInsertElement(groupCount, groupCountY, 1); groupCount = m_builder.CreateInsertElement(groupCount, groupCountZ, 2); - m_builder.CreateIntrinsic( - Intrinsic::amdgcn_raw_buffer_store, groupCount->getType(), - {groupCount, drawDataRingBufDesc, m_builder.getInt32(0), drawDataRingEntryOffset, m_builder.getInt32(0)}); + CoherentFlag coherent = {}; + + m_builder.CreateIntrinsic(Intrinsic::amdgcn_raw_buffer_store, groupCount->getType(), + {groupCount, drawDataRingBufDesc, m_builder.getInt32(0), drawDataRingEntryOffset, + m_builder.getInt32(coherent.u32All)}); // NOTE: Only the lowest 8 bits are for us to write. Value *readyBit = getDrawDataReadyBit(entryPoint); @@ -1077,7 +1326,7 @@ void MeshTaskShader::lowerEmitMeshTasks(EmitMeshTasksOp &emitMeshTasksOp) { m_builder.CreateIntrinsic(Intrinsic::amdgcn_raw_buffer_store, readyBit->getType(), {readyBit, drawDataRingBufDesc, m_builder.getInt32(3 * sizeof(unsigned)), - drawDataRingEntryOffset, m_builder.getInt32(0)}); + drawDataRingEntryOffset, m_builder.getInt32(coherent.u32All)}); } // Construct ".endEmitMeshTasks" block @@ -1342,55 +1591,60 @@ void MeshTaskShader::lowerSetMeshPrimitiveCulled(SetMeshPrimitiveCulledOp &setMe } // ===================================================================================================================== -// Lower write mesh vertex output. Write mesh shader vertex outputs to LDS. -// -// @param writeMeshVertexOutputOp : Call instruction op to write vertex output for mesh shader -void MeshTaskShader::lowerWriteMeshVertexOutput(WriteMeshVertexOutputOp &writeMeshVertexOutputOp) { - m_builder.SetInsertPoint(&writeMeshVertexOutputOp); - - assert(getShaderStage(writeMeshVertexOutputOp.getFunction()) == ShaderStage::Mesh); - - auto outputOffset = writeMeshVertexOutputOp.getOutputOffset(); - auto vertexIndex = writeMeshVertexOutputOp.getVertexIndex(); - auto outputValue = writeMeshVertexOutputOp.getOutputValue(); - - const auto resUsage = m_pipelineState->getShaderResourceUsage(ShaderStage::Mesh); - const unsigned vertexStride = 4 * resUsage->inOutUsage.outputMapLocCount; // Corresponds to vec4 output - - Value *ldsStart = m_builder.getInt32(getMeshShaderLdsRegionStart(MeshLdsRegion::VertexOutput)); - Value *ldsOffset = m_builder.CreateMul(vertexIndex, m_builder.getInt32(vertexStride)); - ldsOffset = m_builder.CreateAdd(ldsOffset, outputOffset); - ldsOffset = m_builder.CreateAdd(ldsStart, ldsOffset); - - writeValueToLds(outputValue, ldsOffset); - - m_callsToRemove.push_back(&writeMeshVertexOutputOp); -} - -// ===================================================================================================================== -// Lower write mesh primitive output. Write mesh shader primitive outputs to LDS. +// Lower write mesh vertex/primitive output. Write mesh shader vertex/primitive outputs to LDS. // -// @param writeMeshPrimitiveOutputOp : Call instruction op to write primitive output for mesh shader -void MeshTaskShader::lowerWriteMeshPrimitiveOutput(WriteMeshPrimitiveOutputOp &writeMeshPrimitiveOutputOp) { - m_builder.SetInsertPoint(&writeMeshPrimitiveOutputOp); - - assert(getShaderStage(writeMeshPrimitiveOutputOp.getFunction()) == ShaderStage::Mesh); +// @param WriteMeshOutputOp : Call instruction op to write vertex/primitive output for mesh shader +void MeshTaskShader::lowerWriteMeshOutput(WriteMeshOutputOp &writeMeshOutputOp) { + m_builder.SetInsertPoint(&writeMeshOutputOp); + + assert(getShaderStage(writeMeshOutputOp.getFunction()) == ShaderStage::Mesh); + + auto isPrimitive = writeMeshOutputOp.getIsPrimitive(); + auto location = writeMeshOutputOp.getLocation(); + auto locationOffset = writeMeshOutputOp.getLocationOffset(); + auto componentIndex = writeMeshOutputOp.getComponentIndex(); + auto primOrVertexIndex = writeMeshOutputOp.getPrimOrVertexIndex(); + auto outputValue = writeMeshOutputOp.getOutputValue(); + + auto &outputComponents = + isPrimitive + ? m_pipelineState->getShaderResourceUsage(ShaderStage::Mesh)->inOutUsage.mesh.primitiveOutputComponents + : m_pipelineState->getShaderResourceUsage(ShaderStage::Mesh)->inOutUsage.mesh.vertexOutputComponents; + + // ldsOffset = ldsStart + primOrVertexIndex * primOrVertexStride + + // offsetInPrimOrVertex + locationIndex * numComponents + componentIndex + Value *ldsStart = m_builder.getInt32( + getMeshShaderLdsRegionStart(isPrimitive ? MeshLdsRegion::PrimitiveOutput : MeshLdsRegion::VertexOutput)); + const unsigned primOrVertexStride = isPrimitive ? m_outputsLayout.primitiveStride : m_outputsLayout.vertexStride; + Value *primOrVertexOffset = m_builder.CreateMul(primOrVertexIndex, m_builder.getInt32(primOrVertexStride)); + + Value *offsetInPrimOrVertex = m_builder.getInt32(getOutputOffsetInPrimOrVertex(location, isPrimitive)); + if (locationOffset != m_builder.getInt32(0)) { + auto locationIndex = locationOffset; + + assert(outputComponents.count(location) > 0); // Must exist + unsigned numComponents = outputComponents[location].first; + + if (numComponents > 4) { + // NOTE: Here we encounter 64-bit vec3/vec4 data types. Such types will occupy two consecutive locations and the + // provided location offset must be divided by 2 to get real location index. + locationIndex = m_builder.CreateLShr(locationOffset, 2); + } - auto outputOffset = writeMeshPrimitiveOutputOp.getOutputOffset(); - auto primitiveIndex = writeMeshPrimitiveOutputOp.getPrimitiveIndex(); - auto outputValue = writeMeshPrimitiveOutputOp.getOutputValue(); + offsetInPrimOrVertex = m_builder.CreateAdd(offsetInPrimOrVertex, + m_builder.CreateMul(locationIndex, m_builder.getInt32(numComponents))); + } - const auto resUsage = m_pipelineState->getShaderResourceUsage(ShaderStage::Mesh); - const unsigned primitiveStride = 4 * resUsage->inOutUsage.perPrimitiveOutputMapLocCount; // Corresponds to vec4 output + if (componentIndex != m_builder.getInt32(0)) + offsetInPrimOrVertex = m_builder.CreateAdd(offsetInPrimOrVertex, componentIndex); - Value *ldsStart = m_builder.getInt32(getMeshShaderLdsRegionStart(MeshLdsRegion::PrimitiveOutput)); - Value *ldsOffset = m_builder.CreateMul(primitiveIndex, m_builder.getInt32(primitiveStride)); - ldsOffset = m_builder.CreateAdd(ldsOffset, outputOffset); - ldsOffset = m_builder.CreateAdd(ldsStart, ldsOffset); + auto ldsOffset = ldsStart; + ldsOffset = m_builder.CreateAdd(ldsOffset, primOrVertexOffset); + ldsOffset = m_builder.CreateAdd(ldsOffset, offsetInPrimOrVertex); writeValueToLds(outputValue, ldsOffset); - m_callsToRemove.push_back(&writeMeshPrimitiveOutputOp); + m_callsToRemove.push_back(&writeMeshOutputOp); } // ===================================================================================================================== @@ -1703,8 +1957,7 @@ void MeshTaskShader::lowerMeshShaderBody(BasicBlock *apiMeshEntryBlock, BasicBlo .add(&MeshTaskShader::lowerSetMeshPrimitiveIndices) .add(&MeshTaskShader::lowerSetMeshPrimitiveCulled) .add(&MeshTaskShader::lowerGetMeshBuiltinInput) - .add(&MeshTaskShader::lowerWriteMeshVertexOutput) - .add(&MeshTaskShader::lowerWriteMeshPrimitiveOutput) + .add(&MeshTaskShader::lowerWriteMeshOutput) .build(); visitor.visit(*this, *entryPoint); @@ -1720,7 +1973,6 @@ void MeshTaskShader::lowerMeshShaderBody(BasicBlock *apiMeshEntryBlock, BasicBlo // Export primitive (primitive connectivity data, primitive payload, and primitive attributes). void MeshTaskShader::exportPrimitive() { const auto &builtInUsage = m_pipelineState->getShaderResourceUsage(ShaderStage::Mesh)->builtInUsage.mesh; - auto &inOutUsage = m_pipelineState->getShaderResourceUsage(ShaderStage::Mesh)->inOutUsage; Value *ldsStart = m_builder.getInt32(getMeshShaderLdsRegionStart(MeshLdsRegion::PrimitiveIndices)); Value *ldsOffset = m_builder.CreateAdd(ldsStart, m_waveThreadInfo.primOrVertexIndex); @@ -1835,37 +2087,56 @@ void MeshTaskShader::exportPrimitive() { // Primitive attribute export follows vertex attribute export SmallVector primAttrExports; - unsigned startSlot = inOutUsage.mesh.vertexGenericOutputExportCount; - for (auto &vertexBuiltIn : inOutUsage.mesh.vertexBuiltInExportSlots) { - const unsigned exportSlot = vertexBuiltIn.second; - startSlot = std::max(startSlot, exportSlot + 1); - } - // Export primitive attributes (from generic outputs) ldsStart = m_builder.getInt32(getMeshShaderLdsRegionStart(MeshLdsRegion::PrimitiveOutput)); - auto primitiveStride = 4 * inOutUsage.perPrimitiveOutputMapLocCount; - auto ldsOffsetBase = m_builder.CreateMul(m_waveThreadInfo.primOrVertexIndex, m_builder.getInt32(primitiveStride)); - ldsOffsetBase = m_builder.CreateAdd(ldsStart, ldsOffsetBase); + auto primitiveOffset = + m_builder.CreateMul(m_waveThreadInfo.primOrVertexIndex, m_builder.getInt32(m_outputsLayout.primitiveStride)); + + auto &primitiveOutputComponents = + m_pipelineState->getShaderResourceUsage(ShaderStage::Mesh)->inOutUsage.mesh.primitiveOutputComponents; + for (auto &primitiveOutput : primitiveOutputComponents) { + const auto location = primitiveOutput.first; + const auto &[numComponents, forBuiltIn] = primitiveOutput.second; + assert(numComponents > 0); + + if (forBuiltIn != InvalidValue) + continue; // Skip those special outputs mapped from primitive built-ins. They will be handled later on. + + auto offsetInPrimitive = m_builder.getInt32(getOutputOffsetInPrimOrVertex(location, true)); + + auto ldsOffset = ldsStart; + ldsOffset = m_builder.CreateAdd(ldsOffset, primitiveOffset); + ldsOffset = m_builder.CreateAdd(ldsOffset, offsetInPrimitive); - for (unsigned exportSlot = 0; exportSlot < inOutUsage.mesh.primitiveGenericOutputExportCount; ++exportSlot) { - auto ldsOffset = m_builder.CreateAdd(ldsOffsetBase, m_builder.getInt32(4 * exportSlot)); - auto exportValue = readValueFromLds(FixedVectorType::get(m_builder.getFloatTy(), 4), ldsOffset); + auto exportValue = readValueFromLds(FixedVectorType::get(m_builder.getFloatTy(), numComponents), ldsOffset); - std::array exportValues; - for (unsigned j = 0; j < 4; ++j) - exportValues[j] = m_builder.CreateExtractElement(exportValue, j); + SmallVector exporteValues; + for (unsigned i = 0; i < numComponents; ++i) + exporteValues.push_back(m_builder.CreateExtractElement(exportValue, i)); - primAttrExports.push_back({startSlot + exportSlot, exportValues}); - ++inOutUsage.primExpCount; + // Do array padding + if (numComponents <= 4) { + while (exporteValues.size() < 4) // <4 x float> + exporteValues.push_back(nullptr); + } else { + while (exporteValues.size() < 8) // <8 x float> + exporteValues.push_back(nullptr); + } + + unsigned exportSlot = getOutputExportSlot(location, true); + assert(exportSlot != InvalidValue); + primAttrExports.push_back({exportSlot, exporteValues[0], exporteValues[1], exporteValues[2], exporteValues[3]}); + if (numComponents > 4) + primAttrExports.push_back( + {exportSlot + 1, exporteValues[4], exporteValues[5], exporteValues[6], exporteValues[7]}); } // Export primitive attributes (from built-ins as generic ones) if (builtInUsage.primitiveId) { - if (inOutUsage.mesh.primitiveBuiltInExportSlots.count(BuiltInPrimitiveId) > 0) { + const unsigned exportSlot = getOutputExportSlot(BuiltInPrimitiveId, true); + if (exportSlot != InvalidValue) { assert(primitiveId); - const unsigned exportSlot = inOutUsage.mesh.primitiveBuiltInExportSlots[BuiltInPrimitiveId]; - primAttrExports.push_back({startSlot + exportSlot, primitiveId}); - ++inOutUsage.primExpCount; + primAttrExports.push_back({exportSlot, primitiveId}); } } @@ -1886,11 +2157,10 @@ void MeshTaskShader::exportPrimitive() { } if (exportLayer) { - if (inOutUsage.mesh.primitiveBuiltInExportSlots.count(BuiltInLayer) > 0) { + const unsigned exportSlot = getOutputExportSlot(BuiltInLayer, true); + if (exportSlot != InvalidValue) { assert(fsLayer); - const unsigned exportSlot = inOutUsage.mesh.primitiveBuiltInExportSlots[BuiltInLayer]; - primAttrExports.push_back({startSlot + exportSlot, fsLayer}); - ++inOutUsage.primExpCount; + primAttrExports.push_back({exportSlot, fsLayer}); } } @@ -1911,11 +2181,10 @@ void MeshTaskShader::exportPrimitive() { } if (exportViewportIndex) { - if (inOutUsage.mesh.primitiveBuiltInExportSlots.count(BuiltInViewportIndex) > 0) { + const unsigned exportSlot = getOutputExportSlot(BuiltInViewportIndex, true); + if (exportSlot != InvalidValue) { assert(fsViewportIndex); - const unsigned exportSlot = inOutUsage.mesh.primitiveBuiltInExportSlots[BuiltInViewportIndex]; - primAttrExports.push_back({startSlot + exportSlot, fsViewportIndex}); - ++inOutUsage.primExpCount; + primAttrExports.push_back({exportSlot, fsViewportIndex}); } } @@ -1926,7 +2195,6 @@ void MeshTaskShader::exportPrimitive() { // Export vertex (vertex position data and vertex attributes). void MeshTaskShader::exportVertex() { const auto &builtInUsage = m_pipelineState->getShaderResourceUsage(ShaderStage::Mesh)->builtInUsage.mesh; - auto &inOutUsage = m_pipelineState->getShaderResourceUsage(ShaderStage::Mesh)->inOutUsage; // Export vertex position data SmallVector posExports; @@ -2002,21 +2270,46 @@ void MeshTaskShader::exportVertex() { // Export vertex attributes (from generic outputs) Value *ldsStart = m_builder.getInt32(getMeshShaderLdsRegionStart(MeshLdsRegion::VertexOutput)); - auto vertexStride = 4 * inOutUsage.outputMapLocCount; - auto ldsOffsetBase = m_builder.CreateMul(m_waveThreadInfo.primOrVertexIndex, m_builder.getInt32(vertexStride)); - ldsOffsetBase = m_builder.CreateAdd(ldsStart, ldsOffsetBase); + auto vertexOffset = + m_builder.CreateMul(m_waveThreadInfo.primOrVertexIndex, m_builder.getInt32(m_outputsLayout.vertexStride)); + + auto &vertexOutputComponents = + m_pipelineState->getShaderResourceUsage(ShaderStage::Mesh)->inOutUsage.mesh.vertexOutputComponents; + for (auto &vertexOutput : vertexOutputComponents) { + const auto location = vertexOutput.first; + const auto &[numComponents, forBuiltIn] = vertexOutput.second; + assert(numComponents > 0); + + if (forBuiltIn != InvalidValue) + continue; // Skip those special outputs mapped from vertex built-ins. They will be handled later on. + + auto offsetInVertex = m_builder.getInt32(getOutputOffsetInPrimOrVertex(location, false)); + + auto ldsOffset = ldsStart; + ldsOffset = m_builder.CreateAdd(ldsOffset, vertexOffset); + ldsOffset = m_builder.CreateAdd(ldsOffset, offsetInVertex); - for (unsigned exportSlot = 0; exportSlot < inOutUsage.mesh.vertexGenericOutputExportCount; ++exportSlot) { - auto ldsOffset = m_builder.CreateAdd(ldsOffsetBase, m_builder.getInt32(4 * exportSlot)); - auto exportValue = readValueFromLds(FixedVectorType::get(m_builder.getFloatTy(), 4), ldsOffset); + auto exportValue = readValueFromLds(FixedVectorType::get(m_builder.getFloatTy(), numComponents), ldsOffset); - std::array exportValues = {m_builder.CreateExtractElement(exportValue, static_cast(0)), - m_builder.CreateExtractElement(exportValue, 1), - m_builder.CreateExtractElement(exportValue, 2), - m_builder.CreateExtractElement(exportValue, 3)}; + SmallVector exporteValues; + for (unsigned i = 0; i < numComponents; ++i) + exporteValues.push_back(m_builder.CreateExtractElement(exportValue, i)); - vertAttrExports.push_back({exportSlot, exportValues}); - ++inOutUsage.expCount; + // Do array padding + if (numComponents <= 4) { + while (exporteValues.size() < 4) // <4 x float> + exporteValues.push_back(nullptr); + } else { + while (exporteValues.size() < 8) // <8 x float> + exporteValues.push_back(nullptr); + } + + unsigned exportSlot = getOutputExportSlot(location, false); + assert(exportSlot != InvalidValue); + vertAttrExports.push_back({exportSlot, exporteValues[0], exporteValues[1], exporteValues[2], exporteValues[3]}); + if (numComponents > 4) + vertAttrExports.push_back( + {exportSlot + 1, exporteValues[4], exporteValues[5], exporteValues[6], exporteValues[7]}); } // Export vertex attributes (from built-ins as generic ones) @@ -2057,24 +2350,20 @@ void MeshTaskShader::exportVertex() { } if (exportClipCullDistance) { - unsigned exportSlot = InvalidValue; - if (inOutUsage.mesh.vertexBuiltInExportSlots.count(BuiltInClipDistance) > 0) { - exportSlot = inOutUsage.mesh.vertexBuiltInExportSlots[BuiltInClipDistance]; - } else { - assert(inOutUsage.mesh.vertexBuiltInExportSlots.count(BuiltInCullDistance) > 0); - exportSlot = inOutUsage.mesh.vertexBuiltInExportSlots[BuiltInCullDistance]; + unsigned exportSlot = getOutputExportSlot(BuiltInClipDistance, false); + if (exportSlot == InvalidValue) { + // If ClipDistance doesn't exist, check CullDistance once again + exportSlot = getOutputExportSlot(BuiltInCullDistance, false); } assert(exportSlot != InvalidValue); vertAttrExports.push_back( {exportSlot, {clipCullDistances[0], clipCullDistances[1], clipCullDistances[2], clipCullDistances[3]}}); - ++inOutUsage.expCount; if (clipCullDistances.size() > 4) { // Do the second exporting vertAttrExports.push_back( {exportSlot + 1, {clipCullDistances[4], clipCullDistances[5], clipCullDistances[6], clipCullDistances[7]}}); - ++inOutUsage.expCount; } } } @@ -2102,7 +2391,7 @@ void MeshTaskShader::collectMeshStatsInfo(Function *entryPoint, Value *numMeshPr const uint64_t numMeshThreads = meshMode.workgroupSizeX * meshMode.workgroupSizeY * meshMode.workgroupSizeZ; Value *meshPipeStatsBufPtr = m_pipelineSysValues.get(entryPoint)->getMeshPipeStatsBufPtr(); - SyncScope::ID agentScope = m_builder.getContext().getOrInsertSyncScopeID("agent"); + SyncScope::ID agentScope = m_builder.getContext().getOrInsertSyncScopeID("agent"); // Device level // // Record numMeshThreads @@ -2196,7 +2485,8 @@ void MeshTaskShader::doExport(ExportKind kind, ArrayRef exports) { exportDone = true; // Last export if (m_gfxIp.major >= 11) { - if (kind == ExportKind::Pos || kind == ExportKind::Prim) { + if (m_pipelineState->exportAttributeByExportInstruction() || kind == ExportKind::Pos || + kind == ExportKind::Prim) { m_builder.CreateIntrinsic(Intrinsic::amdgcn_exp_row, valueTy, { m_builder.getInt32(target + exports[i].slot), // tgt @@ -2210,6 +2500,7 @@ void MeshTaskShader::doExport(ExportKind kind, ArrayRef exports) { }); } else { assert(kind == ExportKind::VertAttr || kind == ExportKind::PrimAttr); + assert(!m_pipelineState->exportAttributeByExportInstruction()); Value *valueToStore = PoisonValue::get(FixedVectorType::get(valueTy, 4)); for (unsigned j = 0; j < 4; ++j) { @@ -2260,32 +2551,14 @@ void MeshTaskShader::doExport(ExportKind kind, ArrayRef exports) { void MeshTaskShader::prepareAttribRingAccess() { assert(m_gfxIp.major >= 11); // Must be GFX11+ - // The allocated numbers of vertex/primitive attributes are something as follow: - // 1. Generic vertex attributes - // 2. Vertex attributes mapped from vertex builtins - // 3. Generic primitive attributes - // 4. Primitive attributes mapped from primitive builtins - const auto &inOutUsage = m_pipelineState->getShaderResourceUsage(ShaderStage::Mesh)->inOutUsage.mesh; - unsigned vertAttribCount = inOutUsage.vertexGenericOutputExportCount; - for (auto &vertexBuiltIn : inOutUsage.vertexBuiltInExportSlots) { - const unsigned exportSlot = vertexBuiltIn.second; - vertAttribCount = std::max(vertAttribCount, exportSlot + 1); - } - - unsigned primAttribCount = inOutUsage.primitiveGenericOutputExportCount; - for (auto &primitiveBuiltIn : inOutUsage.primitiveBuiltInExportSlots) { - const unsigned exportSlot = primitiveBuiltIn.second; - primAttribCount = std::max(primAttribCount, exportSlot + 1); - } - - unsigned attribCount = vertAttribCount + primAttribCount; + unsigned attribCount = m_outputsLayout.vertexExportCount + m_outputsLayout.primitiveExportCount; if (attribCount == 0) return; // No attribute export // NOTE: HW allocates and manages attribute ring based on the register fields: VS_EXPORT_COUNT and PRIM_EXPORT_COUNT. // When VS_EXPORT_COUNT = 0, HW assumes there is still a vertex attribute exported even though this is not what we // want. Hence, we should reserve param0 as a dummy vertex attribute. - if (vertAttribCount == 0) { + if (m_outputsLayout.vertexExportCount == 0) { m_hasNoVertexAttrib = true; ++attribCount; // Count in this dummy vertex attribute } @@ -2569,19 +2842,25 @@ Value *MeshTaskShader::readMeshBuiltInFromLds(BuiltInKind builtIn) { break; } - Value *ldsOffset = nullptr; + // ldsOffset = ldsStart + primOrVertexIndex * primOrVertexStride + offsetInPrimOrVertex + Value *primOrVertexOffset = nullptr; if (region == MeshLdsRegion::VertexOutput) { - auto vertexStride = 4 * inOutUsage.outputMapLocCount; - ldsOffset = m_builder.CreateMul(m_waveThreadInfo.primOrVertexIndex, m_builder.getInt32(vertexStride)); + primOrVertexOffset = + m_builder.CreateMul(m_waveThreadInfo.primOrVertexIndex, m_builder.getInt32(m_outputsLayout.vertexStride)); } else { assert(region == MeshLdsRegion::PrimitiveOutput); - auto primitiveStride = 4 * inOutUsage.perPrimitiveOutputMapLocCount; - ldsOffset = m_builder.CreateMul(m_waveThreadInfo.primOrVertexIndex, m_builder.getInt32(primitiveStride)); + primOrVertexOffset = + m_builder.CreateMul(m_waveThreadInfo.primOrVertexIndex, m_builder.getInt32(m_outputsLayout.primitiveStride)); } - ldsOffset = m_builder.CreateAdd(ldsOffset, m_builder.getInt32(4 * location)); - Value *ldsStart = m_builder.getInt32(getMeshShaderLdsRegionStart(region)); - ldsOffset = m_builder.CreateAdd(ldsStart, ldsOffset); + Value *ldsStart = m_builder.getInt32(getMeshShaderLdsRegionStart( + region == MeshLdsRegion::PrimitiveOutput ? MeshLdsRegion::PrimitiveOutput : MeshLdsRegion::VertexOutput)); + Value *offsetInPrimOrVertex = + m_builder.getInt32(getOutputOffsetInPrimOrVertex(location, region == MeshLdsRegion::PrimitiveOutput)); + + auto ldsOffset = ldsStart; + ldsOffset = m_builder.CreateAdd(ldsOffset, primOrVertexOffset); + ldsOffset = m_builder.CreateAdd(ldsOffset, offsetInPrimOrVertex); return readValueFromLds(readTy, ldsOffset); } @@ -2673,6 +2952,49 @@ Value *MeshTaskShader::convertToHwShadingRate(Value *primitiveShadingRate) { return hwShadingRate; } +// ===================================================================================================================== +// Update input/output usage in resource usage for mesh shader. The info will be used to build register metadata later +// on. +void MeshTaskShader::updateMeshShaderInOutUsage() { + auto &inOutUsage = m_pipelineState->getShaderResourceUsage(ShaderStage::Mesh)->inOutUsage; + + inOutUsage.expCount = m_outputsLayout.vertexExportCount; + inOutUsage.primExpCount = m_outputsLayout.primitiveExportCount; + + // For part pipeline, below info will be used to build the metadata ".preraster_output_semantic" to correctly map + // output locations specified by API mesh shader to HW export slots. The export slots will be used to fill the + // register field SPI_PS_INPUT_CNTL.OFFSET during pipeline linking. + if (m_pipelineState->isUnlinked()) { + inOutUsage.outputLocInfoMap.clear(); + for (auto &genericExport : m_outputsLayout.vertexGenericExports) { + const auto &[location, exportSlot] = genericExport; + InOutLocationInfo locInfo = {}; + locInfo.setLocation(location); + InOutLocationInfo newLocInfo = {}; + newLocInfo.setLocation(exportSlot); + inOutUsage.outputLocInfoMap[locInfo] = newLocInfo; + } + + inOutUsage.builtInOutputLocMap.clear(); + for (auto &builtInExport : m_outputsLayout.vertexBuiltInExports) { + const auto &[builtIn, exportSlot] = builtInExport; + inOutUsage.builtInOutputLocMap[builtIn] = exportSlot; + } + + inOutUsage.perPrimitiveOutputLocMap.clear(); + for (auto &genericExport : m_outputsLayout.primitiveGenericExports) { + const auto &[location, exportSlot] = genericExport; + inOutUsage.perPrimitiveOutputLocMap[location] = exportSlot; + } + + inOutUsage.perPrimitiveBuiltInOutputLocMap.clear(); + for (auto &builtInExport : m_outputsLayout.primitiveBuiltInExports) { + const auto &[builtIn, exportSlot] = builtInExport; + inOutUsage.perPrimitiveBuiltInOutputLocMap[builtIn] = exportSlot; + } + } +} + // ===================================================================================================================== // Check if barrier completion flag is needed. Barrier completion flag is to address this case: // diff --git a/lgc/patch/MeshTaskShader.h b/lgc/patch/MeshTaskShader.h index 853a783c99..27204e5617 100644 --- a/lgc/patch/MeshTaskShader.h +++ b/lgc/patch/MeshTaskShader.h @@ -61,6 +61,26 @@ enum class MeshLdsRegion : unsigned { // Map: LDS Region -> typedef std::unordered_map> MeshLdsLayout; +// Mesh shader outputs layout +struct MeshOutputsLayout { + std::map vertexBuiltInExports; // Map from vertex built-in output ID to export slot + std::map vertexGenericExports; // Map from vertex output location to export slot + // (exported as vertex attributes) + unsigned vertexExportCount; // Vertex export count + + std::map primitiveBuiltInExports; // Map from primitive built-in output ID to export slot + // (exported as primitive attributes) + std::map primitiveGenericExports; // Map from primitive output location to export slot + unsigned primitiveExportCount; // Primitive export count + + unsigned vertexStride; // Vertex stride (in dwords) + std::map offsetsInVertex; // Map from output location to output offset within a vertex (in dwords) + + unsigned primitiveStride; // Primitive stride (in dwords) + std::map offsetsInPrimitive; // Map from output location to output offset within a primitive + // (in dwords) +}; + // ===================================================================================================================== // Represents the handler of mesh/task shader. class MeshTaskShader { @@ -69,7 +89,7 @@ class MeshTaskShader { ~MeshTaskShader(); static unsigned layoutMeshShaderLds(PipelineState *pipelineState, llvm::Function *entryPoint, - MeshLdsLayout *ldsLayout = nullptr); + MeshLdsLayout *ldsLayout = nullptr, MeshOutputsLayout *outputsLayout = nullptr); void process(llvm::Function *taskEntryPoint, llvm::Function *meshEntryPoint); @@ -86,8 +106,7 @@ class MeshTaskShader { void lowerSetMeshPrimitiveIndices(SetMeshPrimitiveIndicesOp &setMeshPrimitiveIndicesOp); void lowerSetMeshPrimitiveCulled(SetMeshPrimitiveCulledOp &setMeshPrimitiveCulledOp); void lowerGetMeshBuiltinInput(GetMeshBuiltinInputOp &getMeshBuiltinInputOp); - void lowerWriteMeshVertexOutput(WriteMeshVertexOutputOp &writeMeshVertexOutputOp); - void lowerWriteMeshPrimitiveOutput(WriteMeshPrimitiveOutputOp &writeMeshPrimitiveOutputOp); + void lowerWriteMeshOutput(WriteMeshOutputOp &writeMeshOutputOp); void initWaveThreadInfo(llvm::Function *entryPoint); llvm::Value *getShaderRingEntryIndex(llvm::Function *entryPoint); @@ -130,6 +149,7 @@ class MeshTaskShader { llvm::Value *readMeshBuiltInFromLds(BuiltInKind builtIn); llvm::Value *convertToHwShadingRate(llvm::Value *primitiveShadingRate); + void updateMeshShaderInOutUsage(); bool checkNeedBarrierFlag(llvm::Function *entryPoint); @@ -138,6 +158,40 @@ class MeshTaskShader { return m_ldsLayout[region].first; } + unsigned getOutputOffsetInPrimOrVertex(unsigned location, bool inPrimitive) { + if (inPrimitive) { + assert(m_outputsLayout.offsetsInPrimitive.count(location) > 0); // Must exist + return m_outputsLayout.offsetsInPrimitive[location]; + } + + assert(m_outputsLayout.offsetsInVertex.count(location) > 0); // Must exist + return m_outputsLayout.offsetsInVertex[location]; + } + + unsigned getOutputExportSlot(unsigned location, bool primitive) { + if (primitive) { + if (m_outputsLayout.primitiveGenericExports.count(location) > 0) + return m_outputsLayout.primitiveGenericExports[location]; + return InvalidValue; // Not exist + } + + if (m_outputsLayout.vertexGenericExports.count(location) > 0) + return m_outputsLayout.vertexGenericExports[location]; + return InvalidValue; // Not exist + } + + unsigned getOutputExportSlot(BuiltInKind builtIn, bool primitive) { + if (primitive) { + if (m_outputsLayout.primitiveBuiltInExports.count(builtIn) > 0) + return m_outputsLayout.primitiveBuiltInExports[builtIn]; + return InvalidValue; // Not exist + } + + if (m_outputsLayout.vertexBuiltInExports.count(builtIn) > 0) + return m_outputsLayout.vertexBuiltInExports[builtIn]; + return InvalidValue; // Not exist + } + llvm::Value *readValueFromLds(llvm::Type *readTy, llvm::Value *ldsOffset); void writeValueToLds(llvm::Value *writeValue, llvm::Value *ldsOffset); void atomicOpWithLds(llvm::AtomicRMWInst::BinOp atomicOp, llvm::Value *atomicValue, llvm::Value *ldsOffset); @@ -185,7 +239,8 @@ class MeshTaskShader { GfxIpVersion m_gfxIp; // Graphics IP version info - MeshLdsLayout m_ldsLayout; // Mesh shader LDS layout + MeshLdsLayout m_ldsLayout; // Mesh shader LDS layout + MeshOutputsLayout m_outputsLayout; // Mesh shader outputs layout }; } // namespace lgc diff --git a/lgc/patch/NggPrimShader.cpp b/lgc/patch/NggPrimShader.cpp index 24db846493..de17aef8ac 100644 --- a/lgc/patch/NggPrimShader.cpp +++ b/lgc/patch/NggPrimShader.cpp @@ -3117,8 +3117,10 @@ void NggPrimShader::runEs(ArrayRef args) { return; } - if (m_gfxIp.major >= 11 && !m_hasGs) // For GS, vertex attribute exports are in copy shader - processVertexAttribExport(m_esHandlers.main); + if (!m_pipelineState->exportAttributeByExportInstruction()) { + if (!m_hasGs) // For GS, ATM is done in copy shader + exportVertexAttributeThroughMemory(m_esHandlers.main); + } Value *esGsOffset = nullptr; if (m_hasGs) { @@ -3162,16 +3164,9 @@ void NggPrimShader::runEs(ArrayRef args) { SmallVector esArgs; - // Setup attribute ring base and relative vertex index in subgroup as two additional arguments to export vertex - // attributes through memory - if (m_gfxIp.major >= 11 && !m_hasGs) { // For GS, vertex attribute exports are in copy shader - const auto attribCount = - m_pipelineState->getShaderResourceUsage(m_hasTes ? ShaderStage::TessEval : ShaderStage::Vertex) - ->inOutUsage.expCount; - if (attribCount > 0) { - esArgs.push_back(m_nggInputs.attribRingBase); - esArgs.push_back(m_nggInputs.threadIdInSubgroup); - } + if (!m_pipelineState->exportAttributeByExportInstruction()) { + if (!m_hasGs) // For GS, ATM is in copy shader + appendAttributeThroughMemoryArguments(esArgs); } // Set up user data SGPRs @@ -3361,17 +3356,8 @@ Value *NggPrimShader::runPartEs(ArrayRef args, Value *position) { SmallVector partEsArgs; - // Setup attribute ring base and relative vertex index in subgroup as two additional arguments to export vertex - // attributes through memory - if (m_gfxIp.major >= 11 && deferredVertexExport) { - const auto attribCount = - m_pipelineState->getShaderResourceUsage(m_hasTes ? ShaderStage::TessEval : ShaderStage::Vertex) - ->inOutUsage.expCount; - if (attribCount > 0) { - partEsArgs.push_back(m_nggInputs.attribRingBase); - partEsArgs.push_back(m_nggInputs.threadIdInSubgroup); - } - } + if (!m_pipelineState->exportAttributeByExportInstruction() && deferredVertexExport) + appendAttributeThroughMemoryArguments(partEsArgs); if (deferredVertexExport) partEsArgs.push_back(position); // Setup vertex position data as the additional argument @@ -3421,7 +3407,8 @@ void NggPrimShader::splitEs() { if (func.isIntrinsic() && func.getIntrinsicID() == Intrinsic::amdgcn_exp) expFuncs.push_back(&func); else if (m_gfxIp.major >= 11) { - if (func.getName().starts_with(lgcName::NggAttribExport) || func.getName().starts_with(lgcName::NggXfbExport)) + if (func.getName().starts_with(lgcName::NggAttributeThroughMemory) || + func.getName().starts_with(lgcName::NggXfbExport)) expFuncs.push_back(&func); } } @@ -3592,8 +3579,8 @@ void NggPrimShader::splitEs() { } } - if (m_gfxIp.major >= 11) - processVertexAttribExport(esVertexExporter); + if (!m_pipelineState->exportAttributeByExportInstruction()) + exportVertexAttributeThroughMemory(esVertexExporter); // Remove original ES since it is no longer needed assert(m_esHandlers.main->use_empty()); @@ -3851,13 +3838,8 @@ void NggPrimShader::runCopyShader(ArrayRef args) { SmallVector copyShaderArgs; if (m_gfxIp.major >= 11) { - // Setup attribute ring base and relative vertex index in subgroup as two additional arguments to export vertex - // attributes through memory - const auto attribCount = m_pipelineState->getShaderResourceUsage(ShaderStage::Geometry)->inOutUsage.expCount; - if (attribCount > 0) { - copyShaderArgs.push_back(m_nggInputs.attribRingBase); - copyShaderArgs.push_back(m_nggInputs.threadIdInSubgroup); - } + if (!m_pipelineState->exportAttributeByExportInstruction()) + appendAttributeThroughMemoryArguments(copyShaderArgs); // Global table auto userData = args[NumSpecialSgprInputs]; @@ -3876,8 +3858,8 @@ void NggPrimShader::runCopyShader(ArrayRef args) { // ===================================================================================================================== // Mutates copy shader to handle the reading GS outputs from GS-VS ring. void NggPrimShader::mutateCopyShader() { - if (m_gfxIp.major >= 11) - processVertexAttribExport(m_gsHandlers.copyShader); + if (!m_pipelineState->exportAttributeByExportInstruction()) + exportVertexAttributeThroughMemory(m_gsHandlers.copyShader); IRBuilder<>::InsertPointGuard guard(m_builder); @@ -6104,14 +6086,15 @@ Value *NggPrimShader::ballot(Value *value) { } // ===================================================================================================================== -// Processes vertex attribute export calls in the target function. We mutate the argument list of the target function +// Export vertex attribute through memory (ATM) by handing the calls. We mutate the argument list of the target function // by adding two additional arguments (one is attribute ring base and the other is relative vertex index in subgroup). // Also, we expand all export calls by replacing it with real instructions that do vertex attribute exporting through // memory. // // @param [in/out] target : Target function to process vertex attribute export -void NggPrimShader::processVertexAttribExport(Function *&target) { - assert(m_gfxIp.major >= 11); // For GFX11+ +void NggPrimShader::exportVertexAttributeThroughMemory(Function *&target) { + assert(m_gfxIp.major >= 11); // For GFX11+ + assert(!m_pipelineState->exportAttributeByExportInstruction()); // ATM is allowed ShaderStageEnum shaderStage = m_hasGs ? ShaderStage::Geometry : (m_hasTes ? ShaderStage::TessEval : ShaderStage::Vertex); @@ -6156,7 +6139,7 @@ void NggPrimShader::processVertexAttribExport(Function *&target) { SmallVector removedCalls; for (auto &func : target->getParent()->functions()) { - if (func.getName().starts_with(lgcName::NggAttribExport)) { + if (func.getName().starts_with(lgcName::NggAttributeThroughMemory)) { for (auto user : func.users()) { CallInst *const call = dyn_cast(user); assert(call); @@ -6264,6 +6247,28 @@ void NggPrimShader::processVertexAttribExport(Function *&target) { } } +// ===================================================================================================================== +// Append additional arguments to the argument list for attribute-through-memory (ATM) of the specified shader stage. +// Currently, two arguments are required to do attribute-through-memory: (1) the attribute ring base; (2) relative +// vertex index in NGG subgroup. +// +// @param [in/out] args : The arguments that will be appended to +void NggPrimShader::appendAttributeThroughMemoryArguments(SmallVectorImpl &args) { + assert(m_gfxIp.major >= 11); // For GFX11+ + assert(!m_pipelineState->exportAttributeByExportInstruction()); // ATM is allowed + + const auto attribCount = + m_pipelineState + ->getShaderResourceUsage(m_hasGs ? ShaderStage::Geometry + : (m_hasTes ? ShaderStage::TessEval : ShaderStage::Vertex)) + ->inOutUsage.expCount; + if (attribCount == 0) + return; // No attributes + + args.push_back(m_nggInputs.attribRingBase); + args.push_back(m_nggInputs.threadIdInSubgroup); +} + // ===================================================================================================================== // Processes SW emulated transform feedback when API GS is not present. // @@ -7198,7 +7203,8 @@ Value *NggPrimShader::fetchXfbOutput(Function *target, ArrayRef args expFuncs.push_back(&func); } else { if ((func.isIntrinsic() && func.getIntrinsicID() == Intrinsic::amdgcn_exp) || - func.getName().starts_with(lgcName::NggAttribExport) || func.getName().starts_with(lgcName::NggXfbExport)) + func.getName().starts_with(lgcName::NggAttributeThroughMemory) || + func.getName().starts_with(lgcName::NggXfbExport)) expFuncs.push_back(&func); } } @@ -7215,7 +7221,9 @@ Value *NggPrimShader::fetchXfbOutput(Function *target, ArrayRef args Function *xfbFetcher = target; if (dontClone) { - processVertexAttribExport(target); + if (!m_pipelineState->exportAttributeByExportInstruction()) + exportVertexAttributeThroughMemory(target); + xfbFetcher = addFunctionArgs(target, xfbReturnTy, {}, {}, 0); // Original target function is no longer needed @@ -7425,16 +7433,9 @@ Value *NggPrimShader::fetchXfbOutput(Function *target, ArrayRef args // If we don't clone the target function, we are going to run it and handle vertex attribute through memory here. if (dontClone) { - // Setup attribute ring base and relative vertex index in subgroup as two additional arguments to export vertex - // attributes through memory - if (m_gfxIp.major >= 11 && !m_hasGs) { // For GS, vertex attribute exports are in copy shader - const auto attribCount = - m_pipelineState->getShaderResourceUsage(m_hasTes ? ShaderStage::TessEval : ShaderStage::Vertex) - ->inOutUsage.expCount; - if (attribCount > 0) { - xfbFetcherArgs.push_back(m_nggInputs.attribRingBase); - xfbFetcherArgs.push_back(m_nggInputs.threadIdInSubgroup); - } + if (!m_pipelineState->exportAttributeByExportInstruction()) { + if (!m_hasGs) // For GS, ATM is done in copy shader + appendAttributeThroughMemoryArguments(xfbFetcherArgs); } } diff --git a/lgc/patch/NggPrimShader.h b/lgc/patch/NggPrimShader.h index 71a8e27982..ff7afecfcf 100644 --- a/lgc/patch/NggPrimShader.h +++ b/lgc/patch/NggPrimShader.h @@ -298,7 +298,8 @@ class NggPrimShader { llvm::Value *fetchCullDistanceSignMask(llvm::Value *vertexIndex); llvm::Value *calcVertexItemOffset(unsigned streamId, llvm::Value *vertexIndex); - void processVertexAttribExport(llvm::Function *&target); + void exportVertexAttributeThroughMemory(llvm::Function *&target); + void appendAttributeThroughMemoryArguments(llvm::SmallVectorImpl &args); void processSwXfb(llvm::ArrayRef args); void processSwXfbWithGs(llvm::ArrayRef args); diff --git a/lgc/patch/PassRegistry.inc b/lgc/patch/PassRegistry.inc index f3750af730..ecf66e9967 100644 --- a/lgc/patch/PassRegistry.inc +++ b/lgc/patch/PassRegistry.inc @@ -73,6 +73,7 @@ LLPC_LOOP_PASS("lgc-patch-loop-metadata", PatchLoopMetadata) LLPC_FUNCTION_PASS("lgc-patch-buffer-op", PatchBufferOp) LLPC_MODULE_PASS("lgc-patch-workarounds", PatchWorkarounds) LLPC_FUNCTION_PASS("lgc-patch-load-scalarizer", PatchLoadScalarizer) +LLPC_FUNCTION_PASS("lgc-patch-mul-dx9-zero", PatchMulDx9Zero) LLPC_MODULE_PASS("lgc-patch-null-frag-shader", PatchNullFragShader) LLPC_MODULE_PASS("lgc-patch-tcs-passthrough-shader", TcsPassthroughShader) LLPC_MODULE_PASS("lgc-patch-image-op-collect", PatchImageOpCollect) @@ -84,6 +85,7 @@ LLPC_MODULE_PASS("lgc-lower-desc", LowerDesc) LLPC_FUNCTION_PASS("lgc-combine-cooperative-matrix", CombineCooperativeMatrix) LLPC_MODULE_PASS("lgc-lower-cooperative-matrix", LowerCooperativeMatrix) LLPC_MODULE_PASS("lgc-lower-gpurt", LowerGpuRt) +LLPC_MODULE_PASS("lgc-lower-rayquery", LowerRayQueryWrapper) #undef LLPC_PASS #undef LLPC_LOOP_PASS diff --git a/lgc/patch/Patch.cpp b/lgc/patch/Patch.cpp index 9e759fec1e..746bf9de2c 100644 --- a/lgc/patch/Patch.cpp +++ b/lgc/patch/Patch.cpp @@ -29,6 +29,7 @@ *********************************************************************************************************************** */ #include "lgc/patch/Patch.h" +#include "LowerRayQueryWrapper.h" #include "PatchNullFragShader.h" #include "llvmraytracing/Continuations.h" #include "lgc/LgcContext.h" @@ -55,6 +56,7 @@ #include "lgc/patch/PatchLlvmIrInclusion.h" #include "lgc/patch/PatchLoadScalarizer.h" #include "lgc/patch/PatchLoopMetadata.h" +#include "lgc/patch/PatchMulDx9Zero.h" #include "lgc/patch/PatchPeepholeOpt.h" #include "lgc/patch/PatchPreparePipelineAbi.h" #include "lgc/patch/PatchReadFirstLane.h" @@ -139,6 +141,9 @@ void Patch::addPasses(PipelineState *pipelineState, lgc::PassManager &passMgr, T if (patchTimer) LgcContext::createAndAddStartStopTimer(passMgr, patchTimer, true); + if (pipelineState->getOptions().useGpurt) { + passMgr.addPass(LowerRayQueryWrapper()); + } const auto indirectMode = pipelineState->getOptions().rtIndirectMode; if (indirectMode == RayTracingIndirectMode::ContinuationsContinufy || indirectMode == RayTracingIndirectMode::Continuations) { @@ -180,7 +185,6 @@ void Patch::addPasses(PipelineState *pipelineState, lgc::PassManager &passMgr, T } passMgr.addPass(IPSCCPPass()); - passMgr.addPass(createModuleToFunctionPassAdaptor(CombineCooperativeMatrix())); // Lower the cooperative matrix passMgr.addPass(LowerCooperativeMatrix()); @@ -429,6 +433,7 @@ void Patch::addOptimizationPasses(lgc::PassManager &passMgr, uint32_t optLevel) scalarizerOptions.ScalarizeMinBits = 32; fpm.addPass(ScalarizerPass(scalarizerOptions)); #endif + fpm.addPass(PatchMulDx9Zero()); fpm.addPass(PatchLoadScalarizer()); fpm.addPass(InstSimplifyPass()); fpm.addPass(NewGVNPass()); @@ -508,17 +513,19 @@ Constant *Patch::getLdsVariable(PipelineState *pipelineState, Function *func, bo const unsigned ldsSize = staticLdsSize + rtLdsSize; // See if module already has LDS variable. + GlobalVariable *lds = nullptr; auto oldLds = func->getParent()->getNamedValue(ldsName); - if (oldLds) - return cast(oldLds); - - // Else create LDS variable for this function. - // LDS type: [ldsSize * i32], address space 3 const auto i32Ty = Type::getInt32Ty(*context); - const auto ldsTy = ArrayType::get(i32Ty, ldsSize); - auto lds = new GlobalVariable(*module, ldsTy, false, GlobalValue::ExternalLinkage, nullptr, Twine(ldsName), nullptr, - GlobalValue::NotThreadLocal, ADDR_SPACE_LOCAL); - lds->setAlignment(MaybeAlign(sizeof(unsigned))); + if (oldLds) { + lds = cast(oldLds); + } else { + // Else create LDS variable for this function. + // LDS type: [ldsSize * i32], address space 3 + const auto ldsTy = ArrayType::get(i32Ty, ldsSize); + lds = new GlobalVariable(*module, ldsTy, false, GlobalValue::ExternalLinkage, nullptr, Twine(ldsName), nullptr, + GlobalValue::NotThreadLocal, ADDR_SPACE_LOCAL); + lds->setAlignment(MaybeAlign(sizeof(unsigned))); + } if (rtStack) { auto *offset = Constant::getIntegerValue(i32Ty, APInt(32, staticLdsSize)); diff --git a/lgc/patch/PatchBufferOp.cpp b/lgc/patch/PatchBufferOp.cpp index 4cf36678ac..6c3044f299 100644 --- a/lgc/patch/PatchBufferOp.cpp +++ b/lgc/patch/PatchBufferOp.cpp @@ -54,6 +54,7 @@ #define DEBUG_TYPE "lgc-patch-buffer-op" +using namespace CompilerUtils; using namespace llvm; using namespace lgc; @@ -199,6 +200,8 @@ void BufferOpLowering::registerVisitors(llvm_dialects::VisitorBuildergetType(); - if (op == AtomicRMWInst::FMin || op == AtomicRMWInst::FMax || op == AtomicRMWInst::FAdd) { + if (op == AtomicRMWInst::FMin || op == AtomicRMWInst::FMax) { Value *const pointer = atomicRmwInst.getPointerOperand(); m_builder.SetInsertPoint(&atomicRmwInst); Intrinsic::ID intrinsic = Intrinsic::not_intrinsic; @@ -606,9 +610,6 @@ void BufferOpLowering::visitAtomicRMWInst(AtomicRMWInst &atomicRmwInst) { case AtomicRMWInst::FMax: intrinsic = Intrinsic::amdgcn_global_atomic_fmax; break; - case AtomicRMWInst::FAdd: - intrinsic = Intrinsic::amdgcn_global_atomic_fadd; - break; default: llvm_unreachable("Should never be called!"); break; @@ -624,7 +625,7 @@ void BufferOpLowering::visitAtomicRMWInst(AtomicRMWInst &atomicRmwInst) { } else if (atomicRmwInst.getPointerAddressSpace() == ADDR_SPACE_LOCAL) { AtomicRMWInst::BinOp op = atomicRmwInst.getOperation(); Type *const storeType = atomicRmwInst.getValOperand()->getType(); - if (op == AtomicRMWInst::FMin || op == AtomicRMWInst::FMax || op == AtomicRMWInst::FAdd) { + if (op == AtomicRMWInst::FMin || op == AtomicRMWInst::FMax) { Value *const pointer = atomicRmwInst.getPointerOperand(); m_builder.SetInsertPoint(&atomicRmwInst); Intrinsic::ID intrinsic = Intrinsic::not_intrinsic; @@ -635,9 +636,6 @@ void BufferOpLowering::visitAtomicRMWInst(AtomicRMWInst &atomicRmwInst) { case AtomicRMWInst::FMax: intrinsic = Intrinsic::amdgcn_ds_fmax; break; - case AtomicRMWInst::FAdd: - intrinsic = Intrinsic::amdgcn_ds_fadd; - break; default: llvm_unreachable("Should never be called!"); break; @@ -653,6 +651,7 @@ void BufferOpLowering::visitAtomicRMWInst(AtomicRMWInst &atomicRmwInst) { m_typeLowering.eraseInstruction(&atomicRmwInst); atomicRmwInst.replaceAllUsesWith(atomicCall); } +#endif } } @@ -671,13 +670,12 @@ void BufferOpLowering::visitBitCastInst(BitCastInst &bitCastInst) { // ===================================================================================================================== // Lower a buffer.addr.to.ptr op, to convert an i64 address to a buffer fat pointer. void BufferOpLowering::visitBufferAddrToPtr(BufferAddrToPtrOp &op) { - BuilderImpl builder(&m_pipelineState); - builder.setShaderStage(getShaderStage(op.getFunction())); - builder.SetInsertPoint(&op); + m_builder.SetInsertPoint(&op); + + Value *address = m_builder.CreatePtrToInt(op.getAddress(), m_builder.getInt64Ty()); + address = m_builder.CreateBitCast(address, FixedVectorType::get(m_builder.getInt32Ty(), 2)); + Value *descriptor = createCompactDesc(address, nullptr); - // Extend the i64 address to a <4 x i32> descriptor. - Value *descriptor = builder.buildBufferCompactDesc( - builder.CreateBitCast(op.getAddr(), FixedVectorType::get(builder.getInt32Ty(), 2)), 0); m_typeLowering.replaceInstruction(&op, {descriptor, ConstantPointerNull::get(m_offsetType)}); auto &di = m_descriptors[descriptor]; @@ -744,46 +742,9 @@ void BufferOpLowering::visitStridedBufferDescToPtr(StridedBufferDescToPtrOp &des void BufferOpLowering::visitStridedBufferAddrAndStrideToPtr(StridedBufferAddrAndStrideToPtrOp &addrAndStrideToPtr) { m_builder.SetInsertPoint(&addrAndStrideToPtr); - auto *addrLo = m_builder.CreateTrunc(addrAndStrideToPtr.getAddress(), m_builder.getInt32Ty()); - - // Build normal buffer descriptor - // Dword 0 - Value *bufDesc = PoisonValue::get(FixedVectorType::get(m_builder.getInt32Ty(), 4)); - bufDesc = m_builder.CreateInsertElement(bufDesc, addrLo, uint64_t(0)); - - // Dword 1 - auto *addrHi = - m_builder.CreateTrunc(m_builder.CreateLShr(addrAndStrideToPtr.getAddress(), 32), m_builder.getInt32Ty()); - auto *stride = m_builder.CreateShl(addrAndStrideToPtr.getStride(), 16); - addrHi = m_builder.CreateOr(addrHi, stride); - bufDesc = m_builder.CreateInsertElement(bufDesc, addrHi, 1); - - // Dword 2 - SqBufRsrcWord2 sqBufRsrcWord2{}; - sqBufRsrcWord2.bits.numRecords = UINT32_MAX; - bufDesc = m_builder.CreateInsertElement(bufDesc, m_builder.getInt32(sqBufRsrcWord2.u32All), 2); - - // Dword 3 - SqBufRsrcWord3 sqBufRsrcWord3{}; - sqBufRsrcWord3.bits.dstSelX = BUF_DST_SEL_X; - sqBufRsrcWord3.bits.dstSelY = BUF_DST_SEL_Y; - sqBufRsrcWord3.bits.dstSelZ = BUF_DST_SEL_Z; - sqBufRsrcWord3.bits.dstSelW = BUF_DST_SEL_W; - - auto gfxIp = m_pipelineState.getTargetInfo().getGfxIpVersion(); - if (gfxIp.major == 10) { - sqBufRsrcWord3.gfx10.format = BUF_FORMAT_32_UINT; - sqBufRsrcWord3.gfx10.resourceLevel = 1; - sqBufRsrcWord3.gfx10.oobSelect = 2; - assert(sqBufRsrcWord3.u32All == 0x21014FAC); - } else if (gfxIp.major >= 11) { - sqBufRsrcWord3.gfx11.format = BUF_FORMAT_32_UINT; - sqBufRsrcWord3.gfx11.oobSelect = 2; - assert(sqBufRsrcWord3.u32All == 0x20014FAC); - } else { - llvm_unreachable("Not implemented!"); - } - bufDesc = m_builder.CreateInsertElement(bufDesc, m_builder.getInt32(sqBufRsrcWord3.u32All), 3); + Value *address = m_builder.CreatePtrToInt(addrAndStrideToPtr.getAddress(), m_builder.getInt64Ty()); + address = m_builder.CreateBitCast(address, FixedVectorType::get(m_builder.getInt32Ty(), 2)); + Value *bufDesc = createCompactDesc(address, addrAndStrideToPtr.getStride()); Constant *const nullPointerOff = ConstantPointerNull::get(m_offsetType); m_typeLowering.replaceInstruction(&addrAndStrideToPtr, {bufDesc, nullPointerOff, m_builder.getInt32(0)}); @@ -799,6 +760,65 @@ void BufferOpLowering::visitStridedBufferAddrAndStrideToPtr(StridedBufferAddrAnd #endif } +// ===================================================================================================================== +// Visits "buffer.load.desc.to.ptr" instruction. +// +// @param loadDescToPtr : The instruction +void BufferOpLowering::visitBufferLoadDescToPtr(BufferLoadDescToPtrOp &loadDescToPtr) { + m_builder.SetInsertPoint(&loadDescToPtr); + Value *descriptor = + createLoadDesc(loadDescToPtr.getDescPtr(), loadDescToPtr.getForceRawView(), loadDescToPtr.getIsCompact()); + + if (loadDescToPtr.getIsCompact()) + descriptor = createCompactDesc(descriptor, nullptr); + + m_typeLowering.replaceInstruction(&loadDescToPtr, {descriptor, ConstantPointerNull::get(m_offsetType)}); + + auto &di = m_descriptors[descriptor]; + + // The loadInst isn't computed by UniformityAnalysis so that we should use its source for divergent check + Value *loadSrc = loadDescToPtr.getDescPtr(); + +#if LLVM_MAIN_REVISION && LLVM_MAIN_REVISION < 458033 + // Old version of the code + di.divergent = m_uniformityInfo.isDivergent(*loadSrc); +#else + // New version of the code (also handles unknown version, which we treat as latest) + di.divergent = m_uniformityInfo.isDivergent(loadSrc); +#endif + LLVM_DEBUG(dbgs() << (di.divergent.value() ? "Divergent" : "Uniform") << " descriptor: " << *descriptor << '\n'); +} + +// ===================================================================================================================== +// Visits "strided.buffer.load.desc.to.ptr" instruction. +// +// @param loadDescToPtr : The instruction +void BufferOpLowering::visitStridedBufferLoadDescToPtr(StridedBufferLoadDescToPtrOp &loadDescToPtr) { + m_builder.SetInsertPoint(&loadDescToPtr); + Value *descriptor = + createLoadDesc(loadDescToPtr.getDescPtr(), loadDescToPtr.getForceRawView(), loadDescToPtr.getIsCompact()); + + if (loadDescToPtr.getIsCompact()) + descriptor = createCompactDesc(descriptor, loadDescToPtr.getStride()); + + m_typeLowering.replaceInstruction(&loadDescToPtr, + {descriptor, ConstantPointerNull::get(m_offsetType), m_builder.getInt32(0)}); + + auto &di = m_descriptors[descriptor]; + + // The loadInst isn't computed by UniformityAnalysis so that we should use its source for divergent check + Value *loadSrc = loadDescToPtr.getDescPtr(); + +#if LLVM_MAIN_REVISION && LLVM_MAIN_REVISION < 458033 + // Old version of the code + di.divergent = m_uniformityInfo.isDivergent(*loadSrc); +#else + // New version of the code (also handles unknown version, which we treat as latest) + di.divergent = m_uniformityInfo.isDivergent(loadSrc); +#endif + LLVM_DEBUG(dbgs() << (di.divergent.value() ? "Divergent" : "Uniform") << " descriptor: " << *descriptor << '\n'); +} + // ===================================================================================================================== // Visits "strided.index.add" instruction. // @@ -1365,9 +1385,9 @@ void BufferOpLowering::postVisitLoadTfeOp(LoadTfeOp &loadTfe) { bufferLoad = m_builder.CreateIntrinsic(Intrinsic::amdgcn_raw_buffer_load, loadTfe.getType(), {bufferDesc, offset, m_builder.getInt32(0), m_builder.getInt32(0)}); } else { - bufferLoad = m_builder.CreateIntrinsic( - Intrinsic::amdgcn_struct_buffer_load, loadTfe.getType(), - {bufferDesc, m_builder.getInt32(0), offset, m_builder.getInt32(0), m_builder.getInt32(0)}); + Value *index = pointerValues[2]; + bufferLoad = m_builder.CreateIntrinsic(Intrinsic::amdgcn_struct_buffer_load, loadTfe.getType(), + {bufferDesc, index, offset, m_builder.getInt32(0), m_builder.getInt32(0)}); } if (getDescriptorInfo(bufferDesc).divergent.value()) { BuilderImpl builderImpl(&m_pipelineState); @@ -1841,3 +1861,83 @@ Value *BufferOpLowering::createGlobalPointerAccess(Value *const bufferDesc, Valu } return nullptr; } + +// ===================================================================================================================== +// Create a normal buffer descriptor +// +// @param buffAddress : The buffer address +// @param stride : The stride for strided buffer +Value *BufferOpLowering::createCompactDesc(Value *buffAddress, Value *stride) { + // Extract compact buffer descriptor + Value *addrLo = m_builder.CreateExtractElement(buffAddress, uint64_t(0)); + Value *addrHi = m_builder.CreateExtractElement(buffAddress, 1); + + // Build normal buffer descriptor + // Dword 0 + Value *bufDesc = PoisonValue::get(FixedVectorType::get(m_builder.getInt32Ty(), 4)); + bufDesc = m_builder.CreateInsertElement(bufDesc, addrLo, uint64_t(0)); + + // Dword 1 + if (stride) + addrHi = m_builder.CreateOr(addrHi, m_builder.CreateShl(stride, 16)); + bufDesc = m_builder.CreateInsertElement(bufDesc, addrHi, 1); + + // Dword 2 + SqBufRsrcWord2 sqBufRsrcWord2 = {}; + sqBufRsrcWord2.bits.numRecords = UINT32_MAX; + bufDesc = m_builder.CreateInsertElement(bufDesc, m_builder.getInt32(sqBufRsrcWord2.u32All), 2); + + // Dword 3 + SqBufRsrcWord3 sqBufRsrcWord3 = {}; + sqBufRsrcWord3.bits.dstSelX = BUF_DST_SEL_X; + sqBufRsrcWord3.bits.dstSelY = BUF_DST_SEL_Y; + sqBufRsrcWord3.bits.dstSelZ = BUF_DST_SEL_Z; + sqBufRsrcWord3.bits.dstSelW = BUF_DST_SEL_W; + + auto gfxIp = m_pipelineState.getTargetInfo().getGfxIpVersion(); + if (gfxIp.major == 10) { + sqBufRsrcWord3.gfx10.format = BUF_FORMAT_32_UINT; + sqBufRsrcWord3.gfx10.resourceLevel = 1; + sqBufRsrcWord3.gfx10.oobSelect = 2; + assert(sqBufRsrcWord3.u32All == 0x21014FAC); + } else if (gfxIp.major >= 11) { + sqBufRsrcWord3.gfx11.format = BUF_FORMAT_32_UINT; + sqBufRsrcWord3.gfx11.oobSelect = 2; + assert(sqBufRsrcWord3.u32All == 0x20014FAC); + } else { + llvm_unreachable("Not implemented!"); + } + bufDesc = m_builder.CreateInsertElement(bufDesc, m_builder.getInt32(sqBufRsrcWord3.u32All), 3); + return bufDesc; +} + +// ===================================================================================================================== +// Create a load from the given buffer address +// +// @param buffAddress : The buffer address +// @param forceView : Whether to force a raw buffer view +// @param isCompact : Whether to load a compact buffer +Value *BufferOpLowering::createLoadDesc(Value *buffAddress, bool forceRawView, bool isCompact) { + Type *descTy = FixedVectorType::get(m_builder.getInt32Ty(), isCompact ? 2 : 4); + Value *descriptor = m_builder.CreateLoad(descTy, buffAddress); + { + // Force convert the buffer view to raw view. + if (forceRawView) { + Value *desc1 = m_builder.CreateExtractElement(descriptor, 1); + Value *desc2 = m_builder.CreateExtractElement(descriptor, 2); + Value *desc3 = m_builder.CreateExtractElement(descriptor, 3); + // stride is 14 bits in dword1[29:16] + Value *stride = + m_builder.CreateAnd(m_builder.CreateLShr(desc1, m_builder.getInt32(16)), m_builder.getInt32(0x3fff)); + stride = m_builder.CreateBinaryIntrinsic(Intrinsic::smax, stride, m_builder.getInt32(1)); + // set srd with new stride = 0 and new num_record = stride * num_record, num_record is dword2[31:0] + descriptor = + m_builder.CreateInsertElement(descriptor, m_builder.CreateAnd(desc1, m_builder.getInt32(0xc000ffff)), 1); + descriptor = m_builder.CreateInsertElement(descriptor, m_builder.CreateMul(stride, desc2), 2); + // gfx10 and gfx11 have oob fields with 2 bits in dword3[29:28] here force to set to 3 as OOB_COMPLETE mode. + descriptor = + m_builder.CreateInsertElement(descriptor, m_builder.CreateOr(desc3, m_builder.getInt32(0x30000000)), 3); + } + } + return descriptor; +} diff --git a/lgc/patch/PatchCheckShaderCache.cpp b/lgc/patch/PatchCheckShaderCache.cpp index c48b08991c..538f02a4d0 100644 --- a/lgc/patch/PatchCheckShaderCache.cpp +++ b/lgc/patch/PatchCheckShaderCache.cpp @@ -110,10 +110,9 @@ PreservedAnalyses PatchCheckShaderCache::run(Module &module, ModuleAnalysisManag // locations of generic outputs). We have to add it to shader hash calculation. streamMapEntries(resUsage->inOutUsage.gs.builtInOutLocs, stream); } else if (stage == ShaderStage::Mesh) { - // NOTE: For mesh shader, those two special map info (from built-in IDs to export locations of vertex/primitive - // attributes) is used to export vertex/primitive attributes. - streamMapEntries(resUsage->inOutUsage.mesh.vertexBuiltInExportSlots, stream); - streamMapEntries(resUsage->inOutUsage.mesh.primitiveBuiltInExportSlots, stream); + // NOTE: For mesh shader, those two special maps are used to export vertex/primitive attributes. + streamMapEntries(resUsage->inOutUsage.mesh.vertexOutputComponents, stream); + streamMapEntries(resUsage->inOutUsage.mesh.primitiveOutputComponents, stream); } // Store the result of the hash for this shader stage. diff --git a/lgc/patch/PatchEntryPointMutate.cpp b/lgc/patch/PatchEntryPointMutate.cpp index 3deff7bf30..c5e357af0e 100644 --- a/lgc/patch/PatchEntryPointMutate.cpp +++ b/lgc/patch/PatchEntryPointMutate.cpp @@ -165,6 +165,8 @@ PreservedAnalyses PatchEntryPointMutate::run(Module &module, ModuleAnalysisManag processGroupMemcpy(module); processDriverTableLoad(module); + processPops(module); + return PreservedAnalyses::none(); } @@ -507,6 +509,204 @@ void PatchEntryPointMutate::lowerGroupMemcpy(GroupMemcpyOp &groupMemcpyOp) { } } +// ===================================================================================================================== +// Process PopsBeginCriticalSectionOp and PopsEndCriticalSectionOp. +// +// @param module : LLVM module +void PatchEntryPointMutate::processPops(llvm::Module &module) { + SmallVector callsToRemove; + + struct Payload { + SmallVectorImpl &callsToRemove; + PatchEntryPointMutate *self; + }; + + Payload payload = {callsToRemove, this}; + static auto visitor = llvm_dialects::VisitorBuilder() + .setStrategy(llvm_dialects::VisitorStrategy::ByFunctionDeclaration) + .add([](auto &payload, auto &op) { + payload.self->lowerPopsBeginInterlock(op); + payload.callsToRemove.push_back(&op); + }) + .add([](auto &payload, auto &op) { + payload.self->lowerPopsEndInterlock(op); + payload.callsToRemove.push_back(&op); + }) + .build(); + visitor.visit(payload, module); + + for (auto call : payload.callsToRemove) + call->eraseFromParent(); +} + +// ===================================================================================================================== +// Lower PopsBeginInterlockOp. +// +// @param popsBeginInterlockOp : Call instruction op to begin a POPS critical section +void PatchEntryPointMutate::lowerPopsBeginInterlock(PopsBeginInterlockOp &popsBeginInterlockOp) { + Function *entryPoint = popsBeginInterlockOp.getFunction(); + assert(getShaderStage(entryPoint) == ShaderStage::Fragment); // Must be FS + + BuilderBase builder(&popsBeginInterlockOp); + + // + // The processing is something like this: + // + // Pre-GFX11: + // The layout of collision wave ID is as follow: + // + // +------------+-----------+---------------------------+-----------------+ + // | Overlapped | Packer ID | Newest Overlapped Wave ID | Current Wave ID | + // | [31] | [29:28] | [25:16] | [9:0] | + // +------------+-----------+---------------------------+-----------------+ + // + // POPS_BEGIN_INTERLOCK() { + // isOverlapped = collisionWaveId[31] + // if (isOverlapped) { + // packerId = collisionWaveId[29:28] + // s_setreg(HW_REG_POPS_PACKER, (packerId << 1) & 0x1)) + // + // currentWaveId = collisionWaveId[9:0] + // waveIdRemapOffset = -(currentWaveId + 1) = ~currentWaveId + // + // newestOverlappedWaveId = collisionWaveId[25:16] + // newestOverlappedWaveId += waveIdRemapOffset + // + // Load srcPopsExitingWaveId + // srcPopsExitingWaveId += waveIdRemapOffset + // while (srcPopsExitingWaveId <= newestOverlappedWaveId) { + // s_sleep(0xFFFF) + // Reload srcPopsExitingWaveId + // srcPopsExitingWaveId += waveIdRemapOffset + // } + // } + // } + // + // GFX11+: + // POPS_BEGIN_INTERLOCK() { + // s_wait_event(EXPORT_READY) + // } + // + auto gfxIp = m_pipelineState->getTargetInfo().getGfxIpVersion(); + if (gfxIp.major >= 11) { + builder.CreateIntrinsic(builder.getVoidTy(), Intrinsic::amdgcn_s_wait_event_export_ready, {}); + return; + } + + auto &entryArgIdxs = m_pipelineState->getShaderInterfaceData(ShaderStage::Fragment)->entryArgIdxs.fs; + auto collisionWaveId = getFunctionArgument(entryPoint, entryArgIdxs.collisionWaveId); + + auto checkOverlapBlock = builder.GetInsertBlock(); + auto processOverlapBlock = checkOverlapBlock->splitBasicBlock(&popsBeginInterlockOp, ".processOverlap"); + auto waveWaitingHeaderBlock = processOverlapBlock->splitBasicBlock(&popsBeginInterlockOp, ".waveWaitingHeader"); + auto waveWaitingBodyBlock = waveWaitingHeaderBlock->splitBasicBlock(&popsBeginInterlockOp, ".waveWaitingBody"); + auto endProcessOverlapBlock = waveWaitingBodyBlock->splitBasicBlock(&popsBeginInterlockOp, ".endProcessOverlap"); + + // Modify ".checkOverlap" block + { + builder.SetInsertPoint(checkOverlapBlock->getTerminator()); + + auto isOverlapped = builder.CreateAnd(builder.CreateLShr(collisionWaveId, 31), 0x1); + isOverlapped = builder.CreateTrunc(isOverlapped, builder.getInt1Ty()); + builder.CreateCondBr(isOverlapped, processOverlapBlock, endProcessOverlapBlock); + + checkOverlapBlock->getTerminator()->eraseFromParent(); // Remove old terminator + } + + // Construct ".processOverlap" block + Value *waveIdRemapOffset = nullptr; + Value *newestOverlappedWaveId = nullptr; + { + builder.SetInsertPoint(processOverlapBlock->getTerminator()); + + auto packerId = builder.CreateAnd(builder.CreateLShr(collisionWaveId, 28), 0x3); + // POPS_PACKER: [0] Enable; [2:1] Packer ID + auto hwReg = [=](unsigned hwRegId, unsigned offset, unsigned size) { + // The HW register of s_setreg has this layout: + // [5:0] ID of HW register; [10:6] Offset; [15:11] Size + return ((hwRegId) | (offset << 6) | ((size - 1) << 11)); + }; + static const unsigned HwRegPopsPacker = 25; + auto popsPacker = builder.CreateOr(builder.CreateShl(packerId, 1), 0x1); + builder.CreateIntrinsic(builder.getVoidTy(), Intrinsic::amdgcn_s_setreg, + {builder.getInt32(hwReg(HwRegPopsPacker, 0, 3)), popsPacker}); + + // waveIdRemapOffset = -(currentWaveId + 1) = ~currentWaveId + auto currentWaveId = builder.CreateAnd(collisionWaveId, 0x3FF); + waveIdRemapOffset = builder.CreateNot(currentWaveId); + + // newestOverlappedWaveId += waveIdRemapOffset + newestOverlappedWaveId = builder.CreateAnd(builder.CreateLShr(collisionWaveId, 16), 0x3FF); + newestOverlappedWaveId = builder.CreateAdd(newestOverlappedWaveId, waveIdRemapOffset); + } + + // Construct ".waveWaitingHeader" block + { + builder.SetInsertPoint(waveWaitingHeaderBlock->getTerminator()); + + Value *popsExitingWaveId = + builder.CreateIntrinsic(builder.getInt32Ty(), Intrinsic::amdgcn_pops_exiting_wave_id, {}); + popsExitingWaveId = builder.CreateAdd(popsExitingWaveId, waveIdRemapOffset); + + Value *needToWait = builder.CreateICmpULE(popsExitingWaveId, newestOverlappedWaveId); + builder.CreateCondBr(needToWait, waveWaitingBodyBlock, endProcessOverlapBlock); + + waveWaitingHeaderBlock->getTerminator()->eraseFromParent(); // Remove old terminator + } + + // Construct ".waveWaitingBody" block + { + builder.SetInsertPoint(waveWaitingBodyBlock->getTerminator()); + + static const unsigned WaitTime = 0xFFFF; + builder.CreateIntrinsic(Intrinsic::amdgcn_s_sleep, {}, builder.getInt32(WaitTime)); + + builder.CreateBr(waveWaitingHeaderBlock); + + waveWaitingBodyBlock->getTerminator()->eraseFromParent(); // Remove old terminator + } + + // Currently, nothing to do to construct ".endProcessOverlap" block +} + +// ===================================================================================================================== +// Lower PopsEndInterlockOp. +// +// @param popsEndInterlockOp : Call instruction op to end a POPS critical section +void PatchEntryPointMutate::lowerPopsEndInterlock(PopsEndInterlockOp &popsEndInterlockOp) { + Function *entryPoint = popsEndInterlockOp.getFunction(); + assert(getShaderStage(entryPoint) == ShaderStage::Fragment); // Must be FS + + BuilderBase builder(&popsEndInterlockOp); + + // + // The processing is something like this: + // + // Pre-GFX11: + // POPS_END_INTERLOCK() { + // s_wait_vscnt null, 0x0 + // s_sendmsg(MSG_ORDERED_PS_DONE) + // } + // + // GFX11+: + // POPS_END_INTERLOCK() { + // s_wait_vscnt null, 0x0 + // } + // + + // Add s_wait_vscnt null, 0x0 to make sure the completion of all writes + SyncScope::ID syncScope = builder.getContext().getOrInsertSyncScopeID("agent"); + builder.CreateFence(AtomicOrdering::Release, syncScope); + + auto gfxIp = m_pipelineState->getTargetInfo().getGfxIpVersion(); + if (gfxIp.major < 11) { + auto &entryArgIdxs = m_pipelineState->getShaderInterfaceData(ShaderStage::Fragment)->entryArgIdxs.fs; + auto primMask = getFunctionArgument(entryPoint, entryArgIdxs.primMask); + + builder.CreateIntrinsic(Intrinsic::amdgcn_s_sendmsg, {}, {builder.getInt32(OrderedPsDone), primMask}); + } +} + // ===================================================================================================================== // Lower as.continuation.reference call. // @@ -1482,6 +1682,8 @@ void PatchEntryPointMutate::setFuncAttrs(Function *entryPoint) { spiPsInputAddr.bits.frontFaceEna = builtInUsage.frontFacing; spiPsInputAddr.bits.ancillaryEna = builtInUsage.sampleId; spiPsInputAddr.bits.ancillaryEna |= builtInUsage.shadingRate; + spiPsInputAddr.bits.ancillaryEna |= builtInUsage.primType; + spiPsInputAddr.bits.lineStippleTexEna |= builtInUsage.lineStipple; spiPsInputAddr.bits.sampleCoverageEna = builtInUsage.sampleMaskIn; builder.addAttribute("InitialPSInputAddr", std::to_string(spiPsInputAddr.u32All)); @@ -2011,6 +2213,13 @@ void PatchEntryPointMutate::finalizeUserDataArgs(SmallVectorImpl &u userDataEnd += userDataArg.argDwordSize; assert(userDataEnd < userDataAvailable && "too many system value user data args"); + if (m_pipelineState->getOptions().forceUserDataSpill) { + // Force all user data to be spilled; should only be used by indirect RT. + assert(m_pipelineState->getOptions().rtIndirectMode != RayTracingIndirectMode::NotIndirect); + spill = true; + userDataAvailable = userDataEnd; + } + if (m_computeWithCalls) { // In compute with calls, the user data layout must be the same across all shaders and therefore cannot depend // on an individual shader's usage pattern. diff --git a/lgc/patch/PatchInOutImportExport.cpp b/lgc/patch/PatchInOutImportExport.cpp index 09027bf9fb..f49e2b0a22 100644 --- a/lgc/patch/PatchInOutImportExport.cpp +++ b/lgc/patch/PatchInOutImportExport.cpp @@ -1640,27 +1640,24 @@ Value *PatchInOutImportExport::patchGsGenericInputImport(Type *inputTy, unsigned Value *PatchInOutImportExport::performFsFloatInterpolation(BuilderBase &builder, Value *attr, Value *channel, Value *coordI, Value *coordJ, Value *primMask) { Value *result = nullptr; - Attribute::AttrKind attribs[] = {Attribute::ReadNone}; if (m_gfxIp.major >= 11) { // llvm.amdgcn.lds.param.load(attr_channel, attr, m0) Value *param = - builder.CreateNamedCall("llvm.amdgcn.lds.param.load", builder.getFloatTy(), {channel, attr, primMask}, attribs); + builder.CreateIntrinsic(builder.getFloatTy(), Intrinsic::amdgcn_lds_param_load, {channel, attr, primMask}); // tmp = llvm.amdgcn.interp.inreg.p10(p10, coordI, p0) - result = - builder.CreateNamedCall("llvm.amdgcn.interp.inreg.p10", builder.getFloatTy(), {param, coordI, param}, attribs); + result = builder.CreateIntrinsic(builder.getFloatTy(), Intrinsic::amdgcn_interp_inreg_p10, {param, coordI, param}); // llvm.amdgcn.interp.inreg.p2(p20, coordJ, tmp) - result = - builder.CreateNamedCall("llvm.amdgcn.interp.inreg.p2", builder.getFloatTy(), {param, coordJ, result}, attribs); + result = builder.CreateIntrinsic(builder.getFloatTy(), Intrinsic::amdgcn_interp_inreg_p2, {param, coordJ, result}); } else { // llvm.amdgcn.interp.p1(coordI, attr_channel, attr, m0) - result = builder.CreateNamedCall("llvm.amdgcn.interp.p1", builder.getFloatTy(), {coordI, channel, attr, primMask}, - attribs); + result = + builder.CreateIntrinsic(builder.getFloatTy(), Intrinsic::amdgcn_interp_p1, {coordI, channel, attr, primMask}); // llvm.amdgcn.interp.p2(p1, coordJ, attr_channel, attr, m0) - result = builder.CreateNamedCall("llvm.amdgcn.interp.p2", builder.getFloatTy(), - {result, coordJ, channel, attr, primMask}, attribs); + result = builder.CreateIntrinsic(builder.getFloatTy(), Intrinsic::amdgcn_interp_p2, + {result, coordJ, channel, attr, primMask}); } return result; } @@ -1679,11 +1676,10 @@ Value *PatchInOutImportExport::performFsHalfInterpolation(BuilderBase &builder, Value *coordI, Value *coordJ, Value *primMask, Value *highHalf) { Value *result = nullptr; - Attribute::AttrKind attribs[] = {Attribute::ReadNone}; if (m_gfxIp.major >= 11) { // llvm.amdgcn.lds.param.load(attr_channel, attr, m0) Value *param = - builder.CreateNamedCall("llvm.amdgcn.lds.param.load", builder.getFloatTy(), {channel, attr, primMask}, attribs); + builder.CreateIntrinsic(builder.getFloatTy(), Intrinsic::amdgcn_lds_param_load, {channel, attr, primMask}); #if LLVM_MAIN_REVISION && LLVM_MAIN_REVISION < 494282 // Old version of code @@ -1702,12 +1698,12 @@ Value *PatchInOutImportExport::performFsHalfInterpolation(BuilderBase &builder, result = builder.CreateIntrinsic(builder.getHalfTy(), interpP2Intrinsic, {param, coordJ, result, highHalf}); } else { // llvm.amdgcn.interp.p1.f16(coordI, attr_channel, attr, highhalf, m0) - result = builder.CreateNamedCall("llvm.amdgcn.interp.p1.f16", builder.getFloatTy(), - {coordI, channel, attr, highHalf, primMask}, attribs); + result = builder.CreateIntrinsic(builder.getFloatTy(), Intrinsic::amdgcn_interp_p1_f16, + {coordI, channel, attr, highHalf, primMask}); // llvm.amdgcn.interp.p2.f16(p1, coordJ, attr_channel, attr, highhalf, m0) - result = builder.CreateNamedCall("llvm.amdgcn.interp.p2.f16", builder.getHalfTy(), - {result, coordJ, channel, attr, highHalf, primMask}, attribs); + result = builder.CreateIntrinsic(builder.getHalfTy(), Intrinsic::amdgcn_interp_p2_f16, + {result, coordJ, channel, attr, highHalf, primMask}); } return result; } @@ -1729,8 +1725,8 @@ Value *PatchInOutImportExport::performFsParameterLoad(BuilderBase &builder, Valu if (m_gfxIp.major >= 11) { // llvm.amdgcn.lds.param.load(attr_channel, attr, m0) - compValue = builder.CreateNamedCall("llvm.amdgcn.lds.param.load", builder.getFloatTy(), {channel, attr, primMask}, - {Attribute::ReadNone}); + compValue = + builder.CreateIntrinsic(builder.getFloatTy(), Intrinsic::amdgcn_lds_param_load, {channel, attr, primMask}); DppCtrl dppCtrl; if (interpParam == INTERP_PARAM_P0) dppCtrl = DppCtrl::DppQuadPerm0000; @@ -1755,7 +1751,7 @@ Value *PatchInOutImportExport::performFsParameterLoad(BuilderBase &builder, Valu attr, // attr primMask // m0 }; - compValue = builder.CreateNamedCall("llvm.amdgcn.interp.mov", builder.getFloatTy(), args, {Attribute::ReadNone}); + compValue = builder.CreateIntrinsic(builder.getFloatTy(), Intrinsic::amdgcn_interp_mov, args); } // Two int8s are also packed like 16-bit in a 32-bit channel in previous export stage if (bitWidth == 8 || bitWidth == 16) { @@ -2086,21 +2082,10 @@ void PatchInOutImportExport::patchGsGenericOutputExport(Value *output, unsigned void PatchInOutImportExport::patchMeshGenericOutputExport(Value *output, unsigned location, Value *locOffset, Value *compIdx, Value *vertexOrPrimitiveIdx, bool isPerPrimitive, BuilderBase &builder) { - // outputOffset = (location + locOffset) * 4 + compIdx * (bitWidth == 64 ? 2 : 1) - Value *outputOffset = builder.CreateAdd(builder.getInt32(location), locOffset); - outputOffset = builder.CreateShl(outputOffset, 2); - - auto outputTy = output->getType(); - if (outputTy->getScalarSizeInBits() == 64) { + if (output->getType()->getScalarSizeInBits() == 64) compIdx = builder.CreateShl(compIdx, 1); - } - - outputOffset = builder.CreateAdd(outputOffset, compIdx); - if (isPerPrimitive) - builder.create(outputOffset, vertexOrPrimitiveIdx, output); - else - builder.create(outputOffset, vertexOrPrimitiveIdx, output); + builder.create(isPerPrimitive, location, locOffset, compIdx, vertexOrPrimitiveIdx, output); } // ===================================================================================================================== @@ -2669,6 +2654,30 @@ Value *PatchInOutImportExport::patchFsBuiltInInputImport(Type *inputTy, unsigned input = getShadingRate(builder); break; } + case BuiltInPrimType: { + input = getPrimType(builder); + break; + } + case BuiltInLineStipple: { + input = getLineStipple(builder); + break; + } + case BuiltInPrimCoord: { + assert(inOutUsage.builtInInputLocMap.find(BuiltInPrimCoord) != inOutUsage.builtInInputLocMap.end()); + const unsigned loc = inOutUsage.builtInInputLocMap[BuiltInPrimCoord]; + + // Emulation for primCoord vGpr, specially, its value comes from z/w (ST) value, hence should be vec4 when interp. + const unsigned builtInId = + m_pipelineState->getRasterizerState().perSampleShading ? BuiltInInterpPerspSample : BuiltInInterpPerspCenter; + Value *interpValue = + patchFsBuiltInInputImport(FixedVectorType::get(builder.getFloatTy(), 4), builtInId, nullptr, builder); + Value *result = patchFsGenericInputImport(FixedVectorType::get(builder.getFloatTy(), 4), loc, nullptr, nullptr, + false, InOutInfo::InterpModeSmooth, interpValue, false, builder); + input = PoisonValue::get(FixedVectorType::get(builder.getFloatTy(), 2)); + input = builder.CreateInsertElement(input, builder.CreateExtractElement(result, 2), builder.getInt32(0)); + input = builder.CreateInsertElement(input, builder.CreateExtractElement(result, 3), builder.getInt32(1)); + break; + } // Handle internal-use built-ins for sample position emulation case BuiltInNumSamples: { if (m_pipelineState->isUnlinked() || m_pipelineState->getRasterizerState().dynamicSampleInfo) { @@ -3426,15 +3435,10 @@ void PatchInOutImportExport::patchMeshBuiltInOutputExport(Value *output, unsigne (void(builtInUsage)); // Unused - // outputOffset = location * 4 + elemIdx - Value *outputOffset = builder.getInt32(4 * loc); - if (elemIdx) - outputOffset = builder.CreateAdd(builder.getInt32(4 * loc), elemIdx); + if (!elemIdx) + elemIdx = builder.getInt32(0); - if (isPerPrimitive) - builder.create(outputOffset, vertexOrPrimitiveIdx, output); - else - builder.create(outputOffset, vertexOrPrimitiveIdx, output); + builder.create(isPerPrimitive, loc, builder.getInt32(0), elemIdx, vertexOrPrimitiveIdx, output); } // ===================================================================================================================== @@ -5402,6 +5406,29 @@ void PatchInOutImportExport::exportShadingRate(Value *shadingRate, BuilderBase & builder.getFalse()}); // src0 } +// ===================================================================================================================== +// Gets HW primitive type from ancillary bits. +Value *PatchInOutImportExport::getPrimType(BuilderBase &builder) { + assert(m_shaderStage == ShaderStage::Fragment); + auto &entryArgIdxs = m_pipelineState->getShaderInterfaceData(ShaderStage::Fragment)->entryArgIdxs.fs; + auto ancillary = getFunctionArgument(m_entryPoint, entryArgIdxs.ancillary); + + // Prim Type = Ancillary[1:0] + return builder.CreateAnd(ancillary, 0x3); +} + +// ===================================================================================================================== +// Gets HW line stipple value from lineStipple value. +// +// @param builder : the builder to use +Value *PatchInOutImportExport::getLineStipple(BuilderBase &builder) { + assert(m_shaderStage == ShaderStage::Fragment); + auto &entryArgIdxs = m_pipelineState->getShaderInterfaceData(ShaderStage::Fragment)->entryArgIdxs.fs; + auto line_stipple = getFunctionArgument(m_entryPoint, entryArgIdxs.lineStipple); + + return builder.CreateBitCast(line_stipple, builder.getFloatTy()); +} + // ===================================================================================================================== // Gets HW shading rate and converts them to LGC definitions. // @@ -5510,7 +5537,7 @@ void PatchInOutImportExport::exportVertexAttribs(BuilderBase &builder) { } for (auto &attribExport : m_attribExports) { - if (m_gfxIp.major <= 10) { + if (m_pipelineState->exportAttributeByExportInstruction()) { unsigned channelMask = 0; for (unsigned i = 0; i < 4; ++i) { assert(attribExport.second[i]); @@ -5531,10 +5558,10 @@ void PatchInOutImportExport::exportVertexAttribs(BuilderBase &builder) { Value *attribValue = PoisonValue::get(FixedVectorType::get(builder.getFloatTy(), 4)); // Always be <4 x float> for (unsigned i = 0; i < 4; ++i) attribValue = builder.CreateInsertElement(attribValue, attribExport.second[i], i); - // NOTE: For GFX11+, vertex attributes are exported through memory. This call will be expanded when NGG primitive + // NOTE: Create a call if we export vertex attribute through memory. This call will be expanded when NGG primitive // shader is generated. The arguments are: buffer descriptor of attribute ring, attribute location, and attribute // export value. - builder.CreateNamedCall(lgcName::NggAttribExport, builder.getVoidTy(), + builder.CreateNamedCall(lgcName::NggAttributeThroughMemory, builder.getVoidTy(), {m_pipelineSysValues.get(m_entryPoint)->getAttribRingBufDesc(), builder.getInt32(attribExport.first), attribValue}, {}); diff --git a/lgc/patch/PatchInvariantLoads.cpp b/lgc/patch/PatchInvariantLoads.cpp index 2890046d90..651d6a5ec7 100644 --- a/lgc/patch/PatchInvariantLoads.cpp +++ b/lgc/patch/PatchInvariantLoads.cpp @@ -162,6 +162,7 @@ PreservedAnalyses PatchInvariantLoads::run(Function &function, FunctionAnalysisM case Intrinsic::amdgcn_init_exec_from_input: case Intrinsic::invariant_start: case Intrinsic::invariant_end: + case Intrinsic::assume: continue; default: break; diff --git a/lgc/patch/PatchMulDx9Zero.cpp b/lgc/patch/PatchMulDx9Zero.cpp new file mode 100644 index 0000000000..b9cdb6f537 --- /dev/null +++ b/lgc/patch/PatchMulDx9Zero.cpp @@ -0,0 +1,173 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ +/** + *********************************************************************************************************************** + * @file PatchMulDx9Zero.cpp + * @brief LLPC source file: contains implementation of class lgc::PatchMulDx9Zero. + *********************************************************************************************************************** + */ +#include "lgc/patch/PatchMulDx9Zero.h" +#include "lgc/state/PipelineShaders.h" +#include "lgc/state/PipelineState.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicsAMDGPU.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +#define DEBUG_TYPE "lgc-patch-mul-dx9-zero" + +using namespace lgc; +using namespace llvm; +using namespace PatternMatch; + +namespace lgc { +// ===================================================================================================================== +PatchMulDx9Zero::PatchMulDx9Zero() : m_changed(false) { +} + +// ===================================================================================================================== +// Executes this LLVM pass on the specified LLVM function. +// +// @param [in/out] function : LLVM function to be run on, following patterns will be detected in the function +// ((b==0.0 ? 0.0 : a) * (a==0.0 ? 0.0 : b)) or +// ((b==0.0 ? 0.0 : a) * (a==0.0 ? 0.0 : b)) or +// fma((b==0.0 ? 0.0 : a), (a==0.0 ? 0.0 : b), c) +// @param [in/out] analysisManager : Analysis manager to use for this transformation +// @returns : The preserved analyses (The analyses that are still valid after this pass) +PreservedAnalyses PatchMulDx9Zero::run(Function &function, FunctionAnalysisManager &analysisManager) { + LLVM_DEBUG(dbgs() << "Run the pass Patch-Mul-Dx9Zero-Opt\n"); + + m_builder = std::make_unique>(function.getContext()); + + visit(function); + + return m_changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); +} + +// ===================================================================================================================== +// Visits call instruction. +// +// @param callInst : Call instruction +void PatchMulDx9Zero::visitCallInst(CallInst &callInst) { + auto callee = callInst.getCalledFunction(); + if (!callee) + return; + + // Replace fma with amdgcn_fma_legacy intrinsic when detect patterns like: + // fma((b==0.0 ? 0.0 : a), (a==0.0 ? 0.0 : b), c) + if (callee->isIntrinsic() && callee->getIntrinsicID() == Intrinsic::fma) { + Value *src1 = callInst.getArgOperand(0); + Value *src2 = callInst.getArgOperand(1); + auto matchValue = isMulDx9Zero(src1, src2); + if (matchValue != std::nullopt) { + m_builder->SetInsertPoint(&callInst); + m_builder->setFastMathFlags(callInst.getFastMathFlags()); + Value *transformSrc1 = matchValue->first; + Value *transformSrc2 = matchValue->second; + Value *src3 = callInst.getArgOperand(2); + Value *ffmazResult = + m_builder->CreateIntrinsic(Intrinsic::amdgcn_fma_legacy, {}, {transformSrc1, transformSrc2, src3}); + m_changed = true; + callInst.replaceAllUsesWith(ffmazResult); + callInst.dropAllReferences(); + callInst.eraseFromParent(); + } + } +} + +// ===================================================================================================================== +// Visits binary operator instruction. +// +// @param binaryOp : Binary operator instruction +void PatchMulDx9Zero::visitBinaryOperator(BinaryOperator &binaryOp) { + Instruction::BinaryOps opCode = binaryOp.getOpcode(); + + // Replace mul with amdgcn_fmul_legacy intrinsic when detect patterns like: + // ((b==0.0 ? 0.0 : a) * (a==0.0 ? 0.0 : b)) + if (opCode == Instruction::FMul) { + auto src1 = binaryOp.getOperand(0); + auto src2 = binaryOp.getOperand(1); + auto matchValue = isMulDx9Zero(src1, src2); + if (matchValue != std::nullopt) { + m_builder->SetInsertPoint(&binaryOp); + m_builder->setFastMathFlags(binaryOp.getFastMathFlags()); + Value *transformSrc1 = matchValue->first; + Value *transformSrc2 = matchValue->second; + Value *fmulzResult = + m_builder->CreateIntrinsic(Intrinsic::amdgcn_fmul_legacy, {}, {transformSrc1, transformSrc2}); + m_changed = true; + binaryOp.replaceAllUsesWith(fmulzResult); + binaryOp.dropAllReferences(); + binaryOp.eraseFromParent(); + } + } +} + +// ===================================================================================================================== +// Checks whether a multiply of lhs with rhs using the given fast-math flags can be transformed into a multiply +// with DX9 zero semantics. If so, returns a pair of operands for the new multiply. +// @param lhs : left operand for the operation +// @param rhs: right operand for the operation +std::optional> PatchMulDx9Zero::isMulDx9Zero(Value *lhs, Value *rhs) { + Value *lhsCmpValue = nullptr; + Value *lhsFalseValue = nullptr; + Value *rhsCmpValue = nullptr; + Value *rhsFalseValue = nullptr; + FCmpInst::Predicate pred = FCmpInst::FCMP_OEQ; + + // Only transform for float32. + if (!(lhs->getType()->isFloatTy() && rhs->getType()->isFloatTy())) + return std::nullopt; + + // Detect whether A = (b==0.0 ? 0.0 : a) and parse out b and a + bool lhsMatch = + match(lhs, m_Select(m_FCmp(pred, m_Value(lhsCmpValue), m_AnyZeroFP()), m_Zero(), m_Value(lhsFalseValue))); + // Detect whether B = (a'==0.0 ? 0.0 : b') and output a' and b' + bool rhsMatch = + match(rhs, m_Select(m_FCmp(pred, m_Value(rhsCmpValue), m_AnyZeroFP()), m_Zero(), m_Value(rhsFalseValue))); + + // If b == b' && a == a' then use fmul_legacy(a,b) instead of fmul(A,B) + if (lhsMatch && rhsMatch && (lhsCmpValue == rhsFalseValue) && (rhsCmpValue == lhsFalseValue)) { + return std::make_pair(lhsFalseValue, rhsFalseValue); + } + if (lhsMatch && (lhsCmpValue == rhs)) { + if (auto *constLhsFalseValue = dyn_cast(lhsFalseValue); + constLhsFalseValue && !constLhsFalseValue->isZero()) { + // Detect pattern: ((b==0.0 ? 0.0 : a) * b) when a is constant but not zero. + return std::make_pair(lhsFalseValue, rhs); + } + } + if (rhsMatch && (lhs == rhsCmpValue)) { + if (auto *constRhsFalseValue = dyn_cast(rhsFalseValue); + constRhsFalseValue && !constRhsFalseValue->isZero()) { + // Detect pattern: (a * (a==0.0 ? 0.0 : b)) when b is constant but not zero. + return std::make_pair(lhs, rhsFalseValue); + } + } + return std::nullopt; +} +} // namespace lgc diff --git a/lgc/patch/PatchReadFirstLane.cpp b/lgc/patch/PatchReadFirstLane.cpp index 301e0c46a0..5b40e1fdbc 100644 --- a/lgc/patch/PatchReadFirstLane.cpp +++ b/lgc/patch/PatchReadFirstLane.cpp @@ -536,7 +536,7 @@ void ReadFirstLaneOptimizer::applyReadFirstLane(Instruction *inst, BuilderBase & if (isFloat) newInst = builder.CreateBitCast(inst, builder.getInt32Ty()); - Value *readFirstLane = builder.CreateIntrinsic(Intrinsic::amdgcn_readfirstlane, {}, newInst); + Value *readFirstLane = builder.CreateIntrinsic(builder.getInt32Ty(), Intrinsic::amdgcn_readfirstlane, newInst); Value *replaceInst = nullptr; if (isFloat) { diff --git a/lgc/patch/PatchResourceCollect.cpp b/lgc/patch/PatchResourceCollect.cpp index 4d8f42f2b6..b2c79d1f78 100644 --- a/lgc/patch/PatchResourceCollect.cpp +++ b/lgc/patch/PatchResourceCollect.cpp @@ -1009,7 +1009,8 @@ void PatchResourceCollect::processShader() { if (m_shaderStage == ShaderStage::Fragment) { if (m_pipelineState->getRasterizerState().perSampleShading) { if (m_resUsage->builtInUsage.fs.fragCoord || m_resUsage->builtInUsage.fs.pointCoord || - m_resUsage->builtInUsage.fs.sampleMaskIn || m_resUsage->resourceWrite) + m_resUsage->builtInUsage.fs.primCoord || m_resUsage->builtInUsage.fs.sampleMaskIn || + m_resUsage->resourceWrite) m_resUsage->builtInUsage.fs.runAtSampleRate = true; } @@ -1435,6 +1436,9 @@ void PatchResourceCollect::clearInactiveBuiltInInput() { if (builtInUsage.fs.baryCoordPullModel && m_activeInputBuiltIns.find(BuiltInBaryCoordPullModel) == m_activeInputBuiltIns.end()) builtInUsage.fs.baryCoordPullModel = false; + + if (builtInUsage.fs.primCoord && m_activeInputBuiltIns.find(BuiltInPrimCoord) == m_activeInputBuiltIns.end()) + builtInUsage.fs.primCoord = false; } } @@ -2323,118 +2327,62 @@ void PatchResourceCollect::mapBuiltInToGenericInOut() { unsigned availPerPrimitiveOutMapLoc = inOutUsage.perPrimitiveOutputMapLocCount; // Map per-vertex built-in outputs to generic ones - if (builtInUsage.mesh.position) - inOutUsage.builtInOutputLocMap[BuiltInPosition] = availOutMapLoc++; + if (builtInUsage.mesh.position) { + inOutUsage.builtInOutputLocMap[BuiltInPosition] = availOutMapLoc; + inOutUsage.mesh.vertexOutputComponents[availOutMapLoc] = {4, BuiltInPosition}; // vec4 + ++availOutMapLoc; + } - if (builtInUsage.mesh.pointSize) - inOutUsage.builtInOutputLocMap[BuiltInPointSize] = availOutMapLoc++; + if (builtInUsage.mesh.pointSize) { + inOutUsage.builtInOutputLocMap[BuiltInPointSize] = availOutMapLoc; + inOutUsage.mesh.vertexOutputComponents[availOutMapLoc] = {1, BuiltInPointSize}; // float + ++availOutMapLoc; + } if (builtInUsage.mesh.clipDistance > 0) { - inOutUsage.builtInOutputLocMap[BuiltInClipDistance] = availOutMapLoc++; + inOutUsage.builtInOutputLocMap[BuiltInClipDistance] = availOutMapLoc; + inOutUsage.mesh.vertexOutputComponents[availOutMapLoc] = {static_cast(builtInUsage.mesh.clipDistance), + BuiltInClipDistance}; // float[] + ++availOutMapLoc; + if (builtInUsage.mesh.clipDistance > 4) ++availOutMapLoc; } if (builtInUsage.mesh.cullDistance > 0) { - inOutUsage.builtInOutputLocMap[BuiltInCullDistance] = availOutMapLoc++; + inOutUsage.builtInOutputLocMap[BuiltInCullDistance] = availOutMapLoc; + inOutUsage.mesh.vertexOutputComponents[availOutMapLoc] = {static_cast(builtInUsage.mesh.cullDistance), + BuiltInCullDistance}; // float[] + ++availOutMapLoc; + if (builtInUsage.mesh.cullDistance > 4) ++availOutMapLoc; } // Map per-primitive built-in outputs to generic ones - if (builtInUsage.mesh.primitiveId) - inOutUsage.perPrimitiveBuiltInOutputLocMap[BuiltInPrimitiveId] = availPerPrimitiveOutMapLoc++; - - if (builtInUsage.mesh.viewportIndex) - inOutUsage.perPrimitiveBuiltInOutputLocMap[BuiltInViewportIndex] = availPerPrimitiveOutMapLoc++; - - if (builtInUsage.mesh.layer) - inOutUsage.perPrimitiveBuiltInOutputLocMap[BuiltInLayer] = availPerPrimitiveOutMapLoc++; - - if (builtInUsage.mesh.primitiveShadingRate) - inOutUsage.perPrimitiveBuiltInOutputLocMap[BuiltInPrimitiveShadingRate] = availPerPrimitiveOutMapLoc++; - - // Map per-vertex built-in outputs to exported locations - if (nextStage == ShaderStage::Fragment) { - // Mesh shader ==> FS - const auto &nextBuiltInUsage = nextResUsage->builtInUsage.fs; - auto &nextInOutUsage = nextResUsage->inOutUsage; - - if (nextBuiltInUsage.clipDistance > 0) { - assert(nextInOutUsage.builtInInputLocMap.find(BuiltInClipDistance) != nextInOutUsage.builtInInputLocMap.end()); - const unsigned mapLoc = nextInOutUsage.builtInInputLocMap[BuiltInClipDistance]; - inOutUsage.mesh.vertexBuiltInExportSlots[BuiltInClipDistance] = mapLoc; - } - - if (nextBuiltInUsage.cullDistance > 0) { - assert(nextInOutUsage.builtInInputLocMap.find(BuiltInCullDistance) != nextInOutUsage.builtInInputLocMap.end()); - const unsigned mapLoc = nextInOutUsage.builtInInputLocMap[BuiltInCullDistance]; - inOutUsage.mesh.vertexBuiltInExportSlots[BuiltInCullDistance] = mapLoc; - } - } else if (!nextStage) { - // Mesh shader only - unsigned availExportLoc = inOutUsage.outputMapLocCount; - - if (builtInUsage.mesh.clipDistance > 0 || builtInUsage.mesh.cullDistance > 0) { - unsigned exportLoc = availExportLoc++; - if (builtInUsage.mesh.clipDistance + builtInUsage.mesh.cullDistance > 4) { - assert(builtInUsage.mesh.clipDistance + builtInUsage.mesh.cullDistance <= MaxClipCullDistanceCount); - ++availExportLoc; // Occupy two locations - } - - if (builtInUsage.mesh.clipDistance > 0) - inOutUsage.mesh.vertexBuiltInExportSlots[BuiltInClipDistance] = exportLoc; - - if (builtInUsage.mesh.cullDistance > 0) { - if (builtInUsage.mesh.clipDistance >= 4) - ++exportLoc; - inOutUsage.mesh.vertexBuiltInExportSlots[BuiltInCullDistance] = exportLoc; - } - } + if (builtInUsage.mesh.primitiveId) { + inOutUsage.perPrimitiveBuiltInOutputLocMap[BuiltInPrimitiveId] = availPerPrimitiveOutMapLoc; + inOutUsage.mesh.primitiveOutputComponents[availPerPrimitiveOutMapLoc] = {1, BuiltInPrimitiveId}; // int + ++availPerPrimitiveOutMapLoc; } - // Map per-primitive built-in outputs to exported locations - if (nextStage == ShaderStage::Fragment) { - // Mesh shader ==> FS - const auto &nextBuiltInUsage = nextResUsage->builtInUsage.fs; - auto &nextInOutUsage = nextResUsage->inOutUsage; - - if (nextBuiltInUsage.primitiveId) { - assert(nextInOutUsage.perPrimitiveBuiltInInputLocMap.find(BuiltInPrimitiveId) != - nextInOutUsage.perPrimitiveBuiltInInputLocMap.end()); - const unsigned mapLoc = nextInOutUsage.perPrimitiveBuiltInInputLocMap[BuiltInPrimitiveId]; - inOutUsage.mesh.primitiveBuiltInExportSlots[BuiltInPrimitiveId] = mapLoc; - } - - if (nextBuiltInUsage.layer) { - assert(nextInOutUsage.perPrimitiveBuiltInInputLocMap.find(BuiltInLayer) != - nextInOutUsage.perPrimitiveBuiltInInputLocMap.end()); - const unsigned mapLoc = nextInOutUsage.perPrimitiveBuiltInInputLocMap[BuiltInLayer]; - inOutUsage.mesh.primitiveBuiltInExportSlots[BuiltInLayer] = mapLoc; - } - - if (nextBuiltInUsage.viewportIndex) { - assert(nextInOutUsage.perPrimitiveBuiltInInputLocMap.find(BuiltInViewportIndex) != - nextInOutUsage.perPrimitiveBuiltInInputLocMap.end()); - const unsigned mapLoc = nextInOutUsage.perPrimitiveBuiltInInputLocMap[BuiltInViewportIndex]; - inOutUsage.mesh.primitiveBuiltInExportSlots[BuiltInViewportIndex] = mapLoc; - } - } else if (!nextStage) { - // Mesh shader only - unsigned availPerPrimitiveExportLoc = inOutUsage.perPrimitiveOutputMapLocCount; - - if (builtInUsage.mesh.primitiveId) - inOutUsage.mesh.primitiveBuiltInExportSlots[BuiltInPrimitiveId] = availPerPrimitiveExportLoc++; - - if (builtInUsage.mesh.layer) - inOutUsage.mesh.primitiveBuiltInExportSlots[BuiltInLayer] = availPerPrimitiveExportLoc++; + if (builtInUsage.mesh.viewportIndex) { + inOutUsage.perPrimitiveBuiltInOutputLocMap[BuiltInViewportIndex] = availPerPrimitiveOutMapLoc; + inOutUsage.mesh.primitiveOutputComponents[availPerPrimitiveOutMapLoc] = {1, BuiltInViewportIndex}; // int + ++availPerPrimitiveOutMapLoc; + } - if (builtInUsage.mesh.viewportIndex) - inOutUsage.mesh.primitiveBuiltInExportSlots[BuiltInViewportIndex] = availPerPrimitiveExportLoc++; + if (builtInUsage.mesh.layer) { + inOutUsage.perPrimitiveBuiltInOutputLocMap[BuiltInLayer] = availPerPrimitiveOutMapLoc; + inOutUsage.mesh.primitiveOutputComponents[availPerPrimitiveOutMapLoc] = {1, BuiltInLayer}; // int + ++availPerPrimitiveOutMapLoc; } - inOutUsage.mesh.vertexGenericOutputExportCount = inOutUsage.outputMapLocCount; - inOutUsage.mesh.primitiveGenericOutputExportCount = inOutUsage.perPrimitiveOutputMapLocCount; + if (builtInUsage.mesh.primitiveShadingRate) { + inOutUsage.perPrimitiveBuiltInOutputLocMap[BuiltInPrimitiveShadingRate] = availPerPrimitiveOutMapLoc; + inOutUsage.mesh.primitiveOutputComponents[availPerPrimitiveOutMapLoc] = {1, BuiltInPrimitiveShadingRate}; // int + ++availPerPrimitiveOutMapLoc; + } inOutUsage.outputMapLocCount = std::max(inOutUsage.outputMapLocCount, availOutMapLoc); inOutUsage.perPrimitiveOutputMapLocCount = @@ -2486,6 +2434,10 @@ void PatchResourceCollect::mapBuiltInToGenericInOut() { } } + // Woule be the last interpolated attributes' Z/W value. + if (builtInUsage.fs.primCoord) + inOutUsage.builtInInputLocMap[BuiltInPrimCoord] = availInMapLoc++; + inOutUsage.inputMapLocCount = std::max(inOutUsage.inputMapLocCount, availInMapLoc); inOutUsage.perPrimitiveInputMapLocCount = std::max(inOutUsage.perPrimitiveInputMapLocCount, availPerPrimitiveInMapLoc); @@ -2995,6 +2947,25 @@ void PatchResourceCollect::updateOutputLocInfoMapWithUnpack() { if (m_shaderStage == ShaderStage::Geometry) inOutUsage.gs.outLocCount[streamId] = std::max(inOutUsage.gs.outLocCount[streamId], newLocMappedTo + 1); } + + // After location mapping is done, we update the location/components map of mesh shader vertex outputs with new + // locations. + if (m_shaderStage == ShaderStage::Mesh) { + // Make a copy and clear the old map + auto vertexOutputComponents = inOutUsage.mesh.vertexOutputComponents; + inOutUsage.mesh.vertexOutputComponents.clear(); + + // Setup a new map with new locations + for (auto &locInfoPair : outputLocInfoMap) { + const unsigned location = locInfoPair.first.getLocation(); + const unsigned newLocation = locInfoPair.second.getLocation(); + + if (vertexOutputComponents.count(location) == 0) + continue; // Skip if not found + + inOutUsage.mesh.vertexOutputComponents[newLocation] = vertexOutputComponents[location]; + } + } } // @@ -3087,6 +3058,25 @@ void PatchResourceCollect::updateOutputLocInfoMapWithUnpack() { assert(newLocMappedTo != InvalidValue); locPair.second = newLocMappedTo; } + + // After location mapping is done, we update the location/components map of mesh shader primitive outputs with + // new locations. + if (m_shaderStage == ShaderStage::Mesh) { + // Make a copy and clear the old map + auto primitiveOutputComponents = inOutUsage.mesh.primitiveOutputComponents; + inOutUsage.mesh.primitiveOutputComponents.clear(); + + // Setup a new map with new locations + for (auto &locPair : perPrimitiveOutputLocMap) { + const unsigned location = locPair.first; + const unsigned newLocation = locPair.second; + + if (primitiveOutputComponents.count(location) == 0) + continue; // Skip if not found + + inOutUsage.mesh.primitiveOutputComponents[newLocation] = primitiveOutputComponents[location]; + } + } } m_outputCalls.clear(); @@ -3372,14 +3362,16 @@ void PatchResourceCollect::reassembleOutputExportCalls() { for (unsigned vectorComp = 0, elemIdx = baseElementIdx; vectorComp < compCount; vectorComp += 1, elemIdx += 2) { assert(elemIdx < MaxNumElems); Value *component = elementsInfo.elements[elemIdx]; - assert(component); - if (Value *highElem = elementsInfo.elements[elemIdx + 1]) { - // Two 16 - bit elements packed as a 32 - bit scalar - highElem = builder.CreateShl(highElem, 16); - component = builder.CreateOr(component, highElem); + // A component is not exported but next stage may import it, just use poison. + if (component) { + if (Value *highElem = elementsInfo.elements[elemIdx + 1]) { + // Two 16 - bit elements packed as a 32 - bit scalar + highElem = builder.CreateShl(highElem, 16); + component = builder.CreateOr(component, highElem); + } + component = builder.CreateBitCast(component, builder.getFloatTy()); + outValue = builder.CreateInsertElement(outValue, component, vectorComp); } - component = builder.CreateBitCast(component, builder.getFloatTy()); - outValue = builder.CreateInsertElement(outValue, component, vectorComp); } } assert(outValue); diff --git a/lgc/patch/PatchSetupTargetFeatures.cpp b/lgc/patch/PatchSetupTargetFeatures.cpp index 01eca8701e..fbcdcda5f8 100644 --- a/lgc/patch/PatchSetupTargetFeatures.cpp +++ b/lgc/patch/PatchSetupTargetFeatures.cpp @@ -131,7 +131,7 @@ void PatchSetupTargetFeatures::setupTargetFeatures(Module *module) { // sgpr needs to be preloaded for COMPUTE_PGM_RSRC2.tg_size_en (Work-Group Info). // This is needed for LDS spilling. for (unsigned i = 0, e = func->arg_size(); i != e; ++i) { - if (func->getArg(i)->getName().equals("MultiDispatchInfo")) { + if (func->getArg(i)->getName() == "MultiDispatchInfo") { builder.addAttribute("amdgpu-work-group-info-arg-no", std::to_string(i)); } } diff --git a/lgc/patch/RegisterMetadataBuilder.cpp b/lgc/patch/RegisterMetadataBuilder.cpp index 8e452560e8..27f9662d5f 100644 --- a/lgc/patch/RegisterMetadataBuilder.cpp +++ b/lgc/patch/RegisterMetadataBuilder.cpp @@ -133,13 +133,18 @@ void RegisterMetadataBuilder::buildPalMetadata() { // Fill ".preraster_output_semantic" auto resUsage = m_pipelineState->getShaderResourceUsage(lastVertexProcessingStage.value()); auto &outputLocInfoMap = resUsage->inOutUsage.outputLocInfoMap; + auto &perPrimitiveOutputLocMap = resUsage->inOutUsage.perPrimitiveOutputLocMap; auto &builtInOutputLocMap = resUsage->inOutUsage.builtInOutputLocMap; - // Collect semantic info for generic input and builtIns {gl_ClipDistance, gl_CulDistance, gl_Layer, - // gl_ViewportIndex} that exports via generic output as well. - if (!outputLocInfoMap.empty() || !builtInOutputLocMap.empty()) { + auto &perPrimitiveBuiltInOutputLocMap = resUsage->inOutUsage.perPrimitiveBuiltInOutputLocMap; + + // Collect semantic info for generic input and builtIns {ClipDistance, CulDistance, Layer, + // ViewportIndex, PrimitiveId} that exports via generic output as well. + if (!outputLocInfoMap.empty() || !perPrimitiveOutputLocMap.empty() || !builtInOutputLocMap.empty() || + !perPrimitiveBuiltInOutputLocMap.empty()) { auto preRasterOutputSemanticNode = getPipelineNode()[Util::Abi::PipelineMetadataKey::PrerasterOutputSemantic].getArray(true); unsigned elemIdx = 0; + for (auto locInfoPair : outputLocInfoMap) { auto preRasterOutputSemanticElem = preRasterOutputSemanticNode[elemIdx].getMap(true); preRasterOutputSemanticElem[Util::Abi::PrerasterOutputSemanticMetadataKey::Semantic] = @@ -149,9 +154,29 @@ void RegisterMetadataBuilder::buildPalMetadata() { ++elemIdx; } + for (auto locInfoPair : perPrimitiveOutputLocMap) { + auto preRasterOutputSemanticElem = preRasterOutputSemanticNode[elemIdx].getMap(true); + preRasterOutputSemanticElem[Util::Abi::PrerasterOutputSemanticMetadataKey::Semantic] = + MaxBuiltInSemantic + locInfoPair.first; + preRasterOutputSemanticElem[Util::Abi::PrerasterOutputSemanticMetadataKey::Index] = locInfoPair.second; + ++elemIdx; + } + for (auto locPair : builtInOutputLocMap) { if (locPair.first == BuiltInClipDistance || locPair.first == BuiltInCullDistance || - locPair.first == BuiltInLayer || locPair.first == BuiltInViewportIndex) { + locPair.first == BuiltInLayer || locPair.first == BuiltInViewportIndex || + locPair.first == BuiltInPrimitiveId) { + assert(locPair.first < MaxBuiltInSemantic); + auto preRasterOutputSemanticElem = preRasterOutputSemanticNode[elemIdx].getMap(true); + preRasterOutputSemanticElem[Util::Abi::PrerasterOutputSemanticMetadataKey::Semantic] = locPair.first; + preRasterOutputSemanticElem[Util::Abi::PrerasterOutputSemanticMetadataKey::Index] = locPair.second; + ++elemIdx; + } + } + + for (auto locPair : perPrimitiveBuiltInOutputLocMap) { + if (locPair.first == BuiltInLayer || locPair.first == BuiltInViewportIndex || + locPair.first == BuiltInPrimitiveId) { assert(locPair.first < MaxBuiltInSemantic); auto preRasterOutputSemanticElem = preRasterOutputSemanticNode[elemIdx].getMap(true); preRasterOutputSemanticElem[Util::Abi::PrerasterOutputSemanticMetadataKey::Semantic] = locPair.first; @@ -750,10 +775,18 @@ void RegisterMetadataBuilder::buildPsRegisters() { getGraphicsRegNode()[Util::Abi::GraphicsRegisterMetadataKey::PsLoadProvokingVtx] = true; } + // PA_SC_SHADER_CONTROL + if (m_gfxIp.major < 11 && m_pipelineState->getShaderModes()->getFragmentShaderMode().enablePops) { + auto paScShaderControl = + getGraphicsRegNode()[Util::Abi::GraphicsRegisterMetadataKey::PaScShaderControl].getMap(true); + paScShaderControl[Util::Abi::PaScShaderControlMetadataKey::LoadCollisionWaveid] = true; + } + // PA_SC_MODE_CNTL_1 getGraphicsRegNode()[Util::Abi::GraphicsRegisterMetadataKey::PsIterSample] = m_pipelineState->getShaderResourceUsage(shaderStage)->builtInUsage.fs.runAtSampleRate > 0; + bool allowRez = shaderOptions.allowReZ; // DB_SHADER_CONTROL ZOrder zOrder = LATE_Z; bool execOnHeirFail = false; @@ -764,7 +797,7 @@ void RegisterMetadataBuilder::buildPsRegisters() { else if (resUsage->resourceWrite) { zOrder = LATE_Z; execOnHeirFail = true; - } else if (shaderOptions.allowReZ) + } else if (allowRez) zOrder = EARLY_Z_THEN_RE_Z; else zOrder = EARLY_Z_THEN_LATE_Z; @@ -788,6 +821,8 @@ void RegisterMetadataBuilder::buildPsRegisters() { fragmentMode.earlyFragmentTests && resUsage->resourceWrite; dbShaderControl[Util::Abi::DbShaderControlMetadataKey::ExecOnHierFail] = execOnHeirFail; dbShaderControl[Util::Abi::DbShaderControlMetadataKey::ConservativeZExport] = conservativeZExport; + dbShaderControl[Util::Abi::DbShaderControlMetadataKey::PrimitiveOrderedPixelShader] = + static_cast(fragmentMode.enablePops); dbShaderControl[Util::Abi::DbShaderControlMetadataKey::PreShaderDepthCoverageEnable] = fragmentMode.postDepthCoverage; // SPI_PS_INPUT_CNTL_0..31 @@ -806,12 +841,18 @@ void RegisterMetadataBuilder::buildPsRegisters() { constexpr unsigned PassThroughMode = (1 << 5); unsigned pointCoordLoc = InvalidValue; + unsigned primCoordLoc = InvalidValue; auto builtInInputLocMapIt = resUsage->inOutUsage.builtInInputLocMap.find(BuiltInPointCoord); if (builtInInputLocMapIt != resUsage->inOutUsage.builtInInputLocMap.end()) { // Get generic input corresponding to gl_PointCoord (to set the field PT_SPRITE_TEX) pointCoordLoc = builtInInputLocMapIt->second; } + auto builtInInputLocMapIte = resUsage->inOutUsage.builtInInputLocMap.find(BuiltInPrimCoord); + if (builtInInputLocMapIte != resUsage->inOutUsage.builtInInputLocMap.end()) { + primCoordLoc = builtInInputLocMapIte->second; + } + msgpack::ArrayDocNode spiPsInputCnt = getGraphicsRegNode()[Util::Abi::GraphicsRegisterMetadataKey::SpiPsInputCntl].getArray(true); const std::vector dummyInterpInfo{{0, false, false, false, false, false, false}}; @@ -862,6 +903,9 @@ void RegisterMetadataBuilder::buildPsRegisters() { spiPsInputCntlInfo.offset = UseDefaultVal; } + if (primCoordLoc == i) { + spiPsInputCntlInfo.offset = UseDefaultVal; + } // NOTE: Set SPI_PS_INPUT_CNTL_* here, but the register can still be changed later, // when it becomes known that gl_ViewportIndex is not used and fields OFFSET and FLAT_SHADE // can be amended. @@ -890,6 +934,12 @@ void RegisterMetadataBuilder::buildPsRegisters() { spiPsInControl[Util::Abi::SpiPsInControlMetadataKey::NumPrimInterp] = numPrimInterp; const auto waveSize = m_pipelineState->getShaderWaveSize(shaderStage); spiPsInControl[Util::Abi::SpiPsInControlMetadataKey::PsW32En] = (waveSize == 32); + // .param_gen + if (primCoordLoc != InvalidValue) { + // Not included in num_interps, but won't influence other defined inputs. + spiPsInControl[Util::Abi::SpiPsInControlMetadataKey::ParamGen] = true; + spiPsInControl[Util::Abi::SpiPsInControlMetadataKey::NumInterps] = numInterp - 1; + } // SPI_INTERP_CONTROL_0 if (pointCoordLoc != InvalidValue) { @@ -918,10 +968,11 @@ void RegisterMetadataBuilder::buildPsRegisters() { // Fill .ps_input_semantic for partial pipeline if (m_pipelineState->isUnlinked()) { - // Collect semantic info for generic input and builtIns {gl_ClipDistance, gl_CulDistance, gl_Layer, - // gl_ViewportIndex} that exports via generic output as well. + // Collect semantic info for generic input and builtIns {ClipDistance, CulDistance, Layer, + // ViewportIndex, PrimitiveId} that exports via generic output as well. auto &inputLocInfoMap = resUsage->inOutUsage.inputLocInfoMap; auto &builtInInputLocMap = resUsage->inOutUsage.builtInInputLocMap; + if (!inputLocInfoMap.empty() || !builtInInputLocMap.empty()) { auto psInputSemanticNode = getPipelineNode()[Util::Abi::PipelineMetadataKey::PsInputSemantic].getArray(true); unsigned elemIdx = 0; @@ -934,7 +985,8 @@ void RegisterMetadataBuilder::buildPsRegisters() { for (auto locPair : builtInInputLocMap) { if (locPair.first == BuiltInClipDistance || locPair.first == BuiltInCullDistance || - locPair.first == BuiltInLayer || locPair.first == BuiltInViewportIndex) { + locPair.first == BuiltInLayer || locPair.first == BuiltInViewportIndex || + locPair.first == BuiltInPrimitiveId) { assert(locPair.first < MaxBuiltInSemantic); auto psInputSemanticElem = psInputSemanticNode[elemIdx].getMap(true); psInputSemanticElem[Util::Abi::PsInputSemanticMetadataKey::Semantic] = locPair.first; diff --git a/lgc/patch/ShaderInputs.cpp b/lgc/patch/ShaderInputs.cpp index 5895a4af38..a79d7e975f 100644 --- a/lgc/patch/ShaderInputs.cpp +++ b/lgc/patch/ShaderInputs.cpp @@ -210,6 +210,10 @@ const char *ShaderInputs::getInputName(ShaderInput inputKind) { return "MultiDispatchInfo"; case ShaderInput::PrimMask: return "PrimMask"; + case ShaderInput::CollisionWaveId: + return "CollisionWaveId"; + case ShaderInput::ProvokingVtxInfo: + return "ProvokingVtxInfo"; case ShaderInput::OffChipLdsBase: return "OffChipLdsBase"; case ShaderInput::StreamOutInfo: @@ -232,8 +236,6 @@ const char *ShaderInputs::getInputName(ShaderInput inputKind) { return "EsGsOffset"; case ShaderInput::TfBufferBase: return "TfBufferBase"; - case ShaderInput::ProvokingVtxInfo: - return "ProvokingVtxInfo"; case ShaderInput::VertexId: return "VertexId"; case ShaderInput::RelVertexId: @@ -497,7 +499,8 @@ static const ShaderInputDesc GsSgprInputs[] = { // SGPRs: FS static const ShaderInputDesc FsSgprInputs[] = { {ShaderInput::PrimMask, offsetof(InterfaceData, entryArgIdxs.fs.primMask), true}, - {ShaderInput::ProvokingVtxInfo, offsetof(InterfaceData, entryArgIdxs.fs.provokingVtxInfo), false}, + {ShaderInput::CollisionWaveId, offsetof(InterfaceData, entryArgIdxs.fs.collisionWaveId)}, + {ShaderInput::ProvokingVtxInfo, offsetof(InterfaceData, entryArgIdxs.fs.provokingVtxInfo)}, }; // SGPRs: CS @@ -556,7 +559,7 @@ static const ShaderInputDesc FsVgprInputs[] = { {ShaderInput::LinearInterpSample, offsetof(InterfaceData, entryArgIdxs.fs.linearInterp.sample), true}, {ShaderInput::LinearInterpCenter, offsetof(InterfaceData, entryArgIdxs.fs.linearInterp.center), true}, {ShaderInput::LinearInterpCentroid, offsetof(InterfaceData, entryArgIdxs.fs.linearInterp.centroid), true}, - {ShaderInput::LineStipple, 0, true}, + {ShaderInput::LineStipple, offsetof(InterfaceData, entryArgIdxs.fs.lineStipple), true}, {ShaderInput::FragCoordX, offsetof(InterfaceData, entryArgIdxs.fs.fragCoord.x), true}, {ShaderInput::FragCoordY, offsetof(InterfaceData, entryArgIdxs.fs.fragCoord.y), true}, {ShaderInput::FragCoordZ, offsetof(InterfaceData, entryArgIdxs.fs.fragCoord.z), true}, @@ -630,6 +633,11 @@ uint64_t ShaderInputs::getShaderArgTys(PipelineState *pipelineState, ShaderStage } } break; + case ShaderStage::Fragment: + if (pipelineState->getTargetInfo().getGfxIpVersion().major < 11 && + pipelineState->getShaderModes()->getFragmentShaderMode().enablePops) + getShaderInputUsage(shaderStage, ShaderInput::CollisionWaveId)->enable(); + break; default: break; } diff --git a/lgc/patch/VertexFetch.cpp b/lgc/patch/VertexFetch.cpp index 70c618aeff..87706b3fc8 100644 --- a/lgc/patch/VertexFetch.cpp +++ b/lgc/patch/VertexFetch.cpp @@ -641,7 +641,7 @@ PreservedAnalyses LowerVertexFetch::run(Module &module, ModuleAnalysisManager &a builder.setShaderStage(ShaderStage::Vertex); builder.SetInsertPointPastAllocas(vertexFetches[0]->getFunction()); auto desc = builder.CreateBufferDesc(InternalDescriptorSetId, FetchShaderInternalBufferBinding, builder.getInt32(0), - Builder::BufferFlagAddress); + Builder::BufferFlagAddress, false); auto descPtr = builder.CreateIntToPtr(desc, builder.getPtrTy(ADDR_SPACE_CONST)); @@ -1460,15 +1460,15 @@ Value *VertexFetchImpl::loadVertexBufferDescriptor(unsigned binding, BuilderImpl IRBuilder<>::InsertPointGuard guard(builder); builder.SetInsertPointPastAllocas(builder.GetInsertBlock()->getParent()); auto descPtr = builderImpl.CreateBufferDesc(InternalDescriptorSetId, CurrentAttributeBufferBinding, - builderImpl.getInt32(0), lgc::Builder::BufferFlagAddress); + builderImpl.getInt32(0), lgc::Builder::BufferFlagAddress, false); // Create descriptor by a 64-bits pointer - m_curAttribBufferDescr = builderImpl.buildInlineBufferDesc(descPtr, 0); + m_curAttribBufferDescr = builderImpl.buildBufferCompactDesc(descPtr, 0); } vtxDesc = m_curAttribBufferDescr; } else { // Create descriptor for vertex buffer vtxDesc = builderImpl.CreateBufferDesc(InternalDescriptorSetId, GenericVertexFetchShaderBinding, - builderImpl.getInt32(binding), lgc::Builder::BufferFlagNonConst); + builderImpl.getInt32(binding), lgc::Builder::BufferFlagNonConst, false); } return vtxDesc; diff --git a/lgc/state/PalMetadata.cpp b/lgc/state/PalMetadata.cpp index 72d3e6d82b..91f900830c 100644 --- a/lgc/state/PalMetadata.cpp +++ b/lgc/state/PalMetadata.cpp @@ -242,6 +242,7 @@ void PalMetadata::mergeFromBlob(llvm::StringRef blob, bool isGlueCode) { mapKey.getString() == Util::Abi::SpiPsInputAddrMetadataKey::PosZFloatEna || mapKey.getString() == Util::Abi::SpiPsInputAddrMetadataKey::SampleCoverageEna || mapKey.getString() == Util::Abi::SpiPsInControlMetadataKey::NumInterps || + mapKey.getString() == Util::Abi::SpiPsInControlMetadataKey::ParamGen || mapKey.getString() == Util::Abi::SpiPsInControlMetadataKey::NumPrimInterp || mapKey.getString() == Util::Abi::SpiPsInControlMetadataKey::PsW32En || mapKey.getString() == Util::Abi::VgtShaderStagesEnMetadataKey::DynamicHs || @@ -484,6 +485,12 @@ void PalMetadata::finalizePipeline(bool isWholePipeline) { if (options.resourceHash != 0) m_pipelineNode[Util::Abi::PipelineMetadataKey::ResourceHash] = options.resourceHash; + // Set usesCps if applicable. + bool usesCps = options.rtIndirectMode == RayTracingIndirectMode::ContinuationsContinufy || + options.rtIndirectMode == RayTracingIndirectMode::Continuations; + if (usesCps) + m_pipelineNode[Util::Abi::PipelineMetadataKey::UsesCps] = true; + // The rest of this function is used only for whole pipeline PAL metadata or an ELF link. if (!isWholePipeline) return; diff --git a/lgc/state/PipelineState.cpp b/lgc/state/PipelineState.cpp index a4379a9c8d..770167b544 100644 --- a/lgc/state/PipelineState.cpp +++ b/lgc/state/PipelineState.cpp @@ -498,7 +498,8 @@ void PipelineState::readShaderStageMask(Module *module) { // ===================================================================================================================== // Get the last vertex processing shader stage in this pipeline, or ShaderStage::Invalid if none. std::optional PipelineState::getLastVertexProcessingStage() const { - for (auto stage : {ShaderStage::CopyShader, ShaderStage::Geometry, ShaderStage::TessEval, ShaderStage::Vertex}) { + for (auto stage : {ShaderStage::Mesh, ShaderStage::CopyShader, ShaderStage::Geometry, ShaderStage::TessEval, + ShaderStage::Vertex}) { if (m_stageMask.contains(stage)) return stage; } @@ -837,17 +838,17 @@ void PipelineState::readUserDataNodes(Module *module) { nextNode->concreteType = getResourceTypeFromName(cast(metadataNode->getOperand(0))); // Operand 1: matchType nextNode->abstractType = - static_cast(mdconst::dyn_extract(metadataNode->getOperand(1))->getZExtValue()); + static_cast(mdconst::extract(metadataNode->getOperand(1))->getZExtValue()); // Operand 2: visibility - nextNode->visibility = mdconst::dyn_extract(metadataNode->getOperand(2))->getZExtValue(); + nextNode->visibility = mdconst::extract(metadataNode->getOperand(2))->getZExtValue(); // Operand 3: offsetInDwords - nextNode->offsetInDwords = mdconst::dyn_extract(metadataNode->getOperand(3))->getZExtValue(); + nextNode->offsetInDwords = mdconst::extract(metadataNode->getOperand(3))->getZExtValue(); // Operand 4: sizeInDwords - nextNode->sizeInDwords = mdconst::dyn_extract(metadataNode->getOperand(4))->getZExtValue(); + nextNode->sizeInDwords = mdconst::extract(metadataNode->getOperand(4))->getZExtValue(); if (nextNode->concreteType == ResourceNodeType::DescriptorTableVaPtr) { // Operand 5: number of nodes in inner table - unsigned innerNodeCount = mdconst::dyn_extract(metadataNode->getOperand(5))->getZExtValue(); + unsigned innerNodeCount = mdconst::extract(metadataNode->getOperand(5))->getZExtValue(); // Go into inner table. assert(!endThisInnerTable); endThisInnerTable = endNextInnerTable; @@ -859,14 +860,14 @@ void PipelineState::readUserDataNodes(Module *module) { if (nextNode->concreteType == ResourceNodeType::IndirectUserDataVaPtr || nextNode->concreteType == ResourceNodeType::StreamOutTableVaPtr) { // Operand 5: Size of the indirect data in dwords - nextNode->indirectSizeInDwords = mdconst::dyn_extract(metadataNode->getOperand(5))->getZExtValue(); + nextNode->indirectSizeInDwords = mdconst::extract(metadataNode->getOperand(5))->getZExtValue(); } else { // Operand 5: set - nextNode->set = mdconst::dyn_extract(metadataNode->getOperand(5))->getZExtValue(); + nextNode->set = mdconst::extract(metadataNode->getOperand(5))->getZExtValue(); // Operand 6: binding - nextNode->binding = mdconst::dyn_extract(metadataNode->getOperand(6))->getZExtValue(); + nextNode->binding = mdconst::extract(metadataNode->getOperand(6))->getZExtValue(); // Operand 7: stride - nextNode->stride = mdconst::dyn_extract(metadataNode->getOperand(7))->getZExtValue(); + nextNode->stride = mdconst::extract(metadataNode->getOperand(7))->getZExtValue(); nextNode->immutableValue = nullptr; // Operand 8 onward: immutable descriptor constants constexpr unsigned ImmutableStartOperand = 8; @@ -877,7 +878,7 @@ void PipelineState::readUserDataNodes(Module *module) { nextNode->immutableValue = m_immutableValueAllocs.back().get(); for (unsigned i = 0; i != immutableSizeInDwords; ++i) m_immutableValueAllocs.back()[i] = - mdconst::dyn_extract(metadataNode->getOperand(ImmutableStartOperand + i))->getZExtValue(); + mdconst::extract(metadataNode->getOperand(ImmutableStartOperand + i))->getZExtValue(); } } // Move on to next node to write in table. @@ -1704,6 +1705,22 @@ bool PipelineState::enableSwXfb() { return enableXfb(); } +// ===================================================================================================================== +// Checks if we export vertex/primitive attributes by parameter export instruction. +bool PipelineState::exportAttributeByExportInstruction() const { + const auto gfxIp = getTargetInfo().getGfxIpVersion(); + switch (gfxIp.major) { + case 10: + return true; // Always use parameter export instruction + case 11: + return false; // Always use attribute-through-memory (ATM) + default: + llvm_unreachable("Unexpected GFX generation!"); + } + + return false; +} + // ===================================================================================================================== // Gets resource usage of the specified shader stage // diff --git a/lgc/state/ShaderStage.cpp b/lgc/state/ShaderStage.cpp index 78b3868fea..3f956c06db 100644 --- a/lgc/state/ShaderStage.cpp +++ b/lgc/state/ShaderStage.cpp @@ -97,7 +97,7 @@ std::optional lgc::getShaderStage(const GlobalObject *func) { // Check for the metadata that is added by PipelineState::link. MDNode *stageMetaNode = func->getMetadata(ShaderStageMetadata); if (stageMetaNode) - return ShaderStageEnum(mdconst::dyn_extract(stageMetaNode->getOperand(0))->getZExtValue()); + return ShaderStageEnum(mdconst::extract(stageMetaNode->getOperand(0))->getZExtValue()); return std::nullopt; } diff --git a/lgc/test/CsLowerDebugPrintf.lgc b/lgc/test/CsLowerDebugPrintf.lgc index d380f37da8..766b907742 100644 --- a/lgc/test/CsLowerDebugPrintf.lgc +++ b/lgc/test/CsLowerDebugPrintf.lgc @@ -78,58 +78,58 @@ attributes #2 = { nounwind willreturn memory(none) } ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP2]], i64 0 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to i64 ; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr addrspace(4) +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr addrspace(4) [[TMP5]], i32 4), "dereferenceable"(ptr addrspace(4) [[TMP5]], i32 -1) ] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP5]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP6]], align 16 -; CHECK-NEXT: [[TMP8:%.*]] = call ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32> [[TMP7]]) -; CHECK-NEXT: [[TMP9:%.*]] = call <3 x i32> @lgc.shader.input.WorkgroupId(i32 0) #[[ATTR2:[0-9]+]] -; CHECK-NEXT: [[TMP10:%.*]] = mul <3 x i32> [[TMP9]], -; CHECK-NEXT: [[TMP11:%.*]] = call i32 @lgc.shader.input.LocalInvocationId(i32 49) #[[ATTR2]] -; CHECK-NEXT: [[TMP12:%.*]] = and i32 [[TMP11]], 1023 -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <3 x i32> poison, i32 [[TMP12]], i64 0 -; CHECK-NEXT: [[TMP14:%.*]] = lshr i32 [[TMP11]], 10 -; CHECK-NEXT: [[TMP15:%.*]] = and i32 [[TMP14]], 1023 -; CHECK-NEXT: [[TMP16:%.*]] = insertelement <3 x i32> [[TMP13]], i32 [[TMP15]], i64 1 -; CHECK-NEXT: [[TMP17:%.*]] = lshr i32 [[TMP14]], 10 -; CHECK-NEXT: [[TMP18:%.*]] = insertelement <3 x i32> [[TMP16]], i32 [[TMP17]], i64 2 -; CHECK-NEXT: [[TMP19:%.*]] = insertelement <3 x i32> [[TMP18]], i32 0, i64 1 -; CHECK-NEXT: [[TMP20:%.*]] = insertelement <3 x i32> [[TMP19]], i32 0, i64 2 -; CHECK-NEXT: [[TMP21:%.*]] = call <3 x i32> @lgc.reconfigure.local.invocation.id(<3 x i32> [[TMP20]], i32 0) #[[ATTR2]] -; CHECK-NEXT: [[TMP22:%.*]] = add <3 x i32> [[TMP10]], [[TMP21]] -; CHECK-NEXT: [[__LLPC_INPUT_PROXY_GL_GLOBALINVOCATIONID_0_VEC_EXTRACT:%.*]] = extractelement <3 x i32> [[TMP22]], i64 0 -; CHECK-NEXT: [[TMP23:%.*]] = atomicrmw add ptr addrspace(7) [[TMP8]], i64 3 monotonic, align 8 -; CHECK-NEXT: [[TMP24:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP23]], i64 536870912) -; CHECK-NEXT: [[TMP25:%.*]] = trunc i64 [[TMP24]] to i32 -; CHECK-NEXT: [[TMP26:%.*]] = add i32 [[TMP25]], 4 -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr i32, ptr addrspace(7) [[TMP8]], i32 [[TMP26]] -; CHECK-NEXT: store i32 {{-?[0-9]+}}, ptr addrspace(7) [[TMP27]], align 4 -; CHECK-NEXT: [[TMP28:%.*]] = add i32 [[TMP26]], 1 -; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr addrspace(7) [[TMP8]], i32 [[TMP28]] -; CHECK-NEXT: store i32 {{-?[0-9]+}}, ptr addrspace(7) [[TMP29]], align 4 -; CHECK-NEXT: [[TMP30:%.*]] = add i32 [[TMP28]], 1 -; CHECK-NEXT: [[TMP31:%.*]] = getelementptr i32, ptr addrspace(7) [[TMP8]], i32 [[TMP30]] -; CHECK-NEXT: store i32 [[__LLPC_INPUT_PROXY_GL_GLOBALINVOCATIONID_0_VEC_EXTRACT]], ptr addrspace(7) [[TMP31]], align 4 -; CHECK-NEXT: [[TMP32:%.*]] = add i32 [[TMP30]], 1 -; CHECK-NEXT: [[TMP33:%.*]] = atomicrmw add ptr addrspace(7) [[TMP8]], i64 6 monotonic, align 8 -; CHECK-NEXT: [[TMP34:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP33]], i64 536870912) -; CHECK-NEXT: [[TMP35:%.*]] = trunc i64 [[TMP34]] to i32 -; CHECK-NEXT: [[TMP36:%.*]] = add i32 [[TMP35]], 4 -; CHECK-NEXT: [[TMP37:%.*]] = getelementptr i32, ptr addrspace(7) [[TMP8]], i32 [[TMP36]] -; CHECK-NEXT: store i32 {{-?[0-9]+}}, ptr addrspace(7) [[TMP37]], align 4 -; CHECK-NEXT: [[TMP38:%.*]] = add i32 [[TMP36]], 1 -; CHECK-NEXT: [[TMP39:%.*]] = getelementptr i32, ptr addrspace(7) [[TMP8]], i32 [[TMP38]] -; CHECK-NEXT: store i32 {{-?[0-9]+}}, ptr addrspace(7) [[TMP39]], align 4 -; CHECK-NEXT: [[TMP40:%.*]] = add i32 [[TMP38]], 1 -; CHECK-NEXT: [[TMP41:%.*]] = getelementptr i32, ptr addrspace(7) [[TMP8]], i32 [[TMP40]] -; CHECK-NEXT: store i32 0, ptr addrspace(7) [[TMP41]], align 4 -; CHECK-NEXT: [[TMP42:%.*]] = add i32 [[TMP40]], 1 -; CHECK-NEXT: [[TMP43:%.*]] = getelementptr i32, ptr addrspace(7) [[TMP8]], i32 [[TMP42]] -; CHECK-NEXT: store i32 {{-?[0-9]+}}, ptr addrspace(7) [[TMP43]], align 4 -; CHECK-NEXT: [[TMP44:%.*]] = add i32 [[TMP42]], 1 -; CHECK-NEXT: [[TMP45:%.*]] = getelementptr i32, ptr addrspace(7) [[TMP8]], i32 [[TMP44]] -; CHECK-NEXT: store i32 0, ptr addrspace(7) [[TMP45]], align 4 -; CHECK-NEXT: [[TMP46:%.*]] = add i32 [[TMP44]], 1 -; CHECK-NEXT: [[TMP47:%.*]] = getelementptr i32, ptr addrspace(7) [[TMP8]], i32 [[TMP46]] -; CHECK-NEXT: store i32 {{-?[0-9]+}}, ptr addrspace(7) [[TMP47]], align 4 -; CHECK-NEXT: [[TMP48:%.*]] = add i32 [[TMP46]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = call ptr addrspace(7) @lgc.buffer.load.desc.to.ptr(ptr addrspace(4) [[TMP6]], i1 false, i1 false) +; CHECK-NEXT: [[TMP8:%.*]] = call <3 x i32> @lgc.shader.input.WorkgroupId(i32 0) #[[ATTR2:[0-9]+]] +; CHECK-NEXT: [[TMP9:%.*]] = mul <3 x i32> [[TMP8]], +; CHECK-NEXT: [[TMP10:%.*]] = call i32 @lgc.shader.input.LocalInvocationId(i32 49) #[[ATTR2]] +; CHECK-NEXT: [[TMP11:%.*]] = and i32 [[TMP10]], 1023 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <3 x i32> poison, i32 [[TMP11]], i64 0 +; CHECK-NEXT: [[TMP13:%.*]] = lshr i32 [[TMP10]], 10 +; CHECK-NEXT: [[TMP14:%.*]] = and i32 [[TMP13]], 1023 +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <3 x i32> [[TMP12]], i32 [[TMP14]], i64 1 +; CHECK-NEXT: [[TMP16:%.*]] = lshr i32 [[TMP13]], 10 +; CHECK-NEXT: [[TMP17:%.*]] = insertelement <3 x i32> [[TMP15]], i32 [[TMP16]], i64 2 +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <3 x i32> [[TMP17]], i32 0, i64 1 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <3 x i32> [[TMP18]], i32 0, i64 2 +; CHECK-NEXT: [[TMP20:%.*]] = call <3 x i32> @lgc.reconfigure.local.invocation.id(<3 x i32> [[TMP19]], i32 0) #[[ATTR2]] +; CHECK-NEXT: [[TMP21:%.*]] = add <3 x i32> [[TMP9]], [[TMP20]] +; CHECK-NEXT: [[__LLPC_INPUT_PROXY_GL_GLOBALINVOCATIONID_0_VEC_EXTRACT:%.*]] = extractelement <3 x i32> [[TMP21]], i64 0 +; CHECK-NEXT: [[TMP22:%.*]] = atomicrmw add ptr addrspace(7) [[TMP7]], i64 3 monotonic, align 8 +; CHECK-NEXT: [[TMP23:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP22]], i64 536870912) +; CHECK-NEXT: [[TMP24:%.*]] = trunc i64 [[TMP23]] to i32 +; CHECK-NEXT: [[TMP25:%.*]] = add i32 [[TMP24]], 4 +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr addrspace(7) [[TMP7]], i32 [[TMP25]] +; CHECK-NEXT: store i32 {{-?[0-9]+}}, ptr addrspace(7) [[TMP26]], align 4 +; CHECK-NEXT: [[TMP27:%.*]] = add i32 [[TMP25]], 1 +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr addrspace(7) [[TMP7]], i32 [[TMP27]] +; CHECK-NEXT: store i32 {{-?[0-9]+}}, ptr addrspace(7) [[TMP28]], align 4 +; CHECK-NEXT: [[TMP29:%.*]] = add i32 [[TMP27]], 1 +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i32, ptr addrspace(7) [[TMP7]], i32 [[TMP29]] +; CHECK-NEXT: store i32 [[__LLPC_INPUT_PROXY_GL_GLOBALINVOCATIONID_0_VEC_EXTRACT]], ptr addrspace(7) [[TMP30]], align 4 +; CHECK-NEXT: [[TMP31:%.*]] = add i32 [[TMP29]], 1 +; CHECK-NEXT: [[TMP32:%.*]] = atomicrmw add ptr addrspace(7) [[TMP7]], i64 6 monotonic, align 8 +; CHECK-NEXT: [[TMP33:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP32]], i64 536870912) +; CHECK-NEXT: [[TMP34:%.*]] = trunc i64 [[TMP33]] to i32 +; CHECK-NEXT: [[TMP35:%.*]] = add i32 [[TMP34]], 4 +; CHECK-NEXT: [[TMP36:%.*]] = getelementptr i32, ptr addrspace(7) [[TMP7]], i32 [[TMP35]] +; CHECK-NEXT: store i32 {{-?[0-9]+}}, ptr addrspace(7) [[TMP36]], align 4 +; CHECK-NEXT: [[TMP37:%.*]] = add i32 [[TMP35]], 1 +; CHECK-NEXT: [[TMP38:%.*]] = getelementptr i32, ptr addrspace(7) [[TMP7]], i32 [[TMP37]] +; CHECK-NEXT: store i32 {{-?[0-9]+}}, ptr addrspace(7) [[TMP38]], align 4 +; CHECK-NEXT: [[TMP39:%.*]] = add i32 [[TMP37]], 1 +; CHECK-NEXT: [[TMP40:%.*]] = getelementptr i32, ptr addrspace(7) [[TMP7]], i32 [[TMP39]] +; CHECK-NEXT: store i32 0, ptr addrspace(7) [[TMP40]], align 4 +; CHECK-NEXT: [[TMP41:%.*]] = add i32 [[TMP39]], 1 +; CHECK-NEXT: [[TMP42:%.*]] = getelementptr i32, ptr addrspace(7) [[TMP7]], i32 [[TMP41]] +; CHECK-NEXT: store i32 {{-?[0-9]+}}, ptr addrspace(7) [[TMP42]], align 4 +; CHECK-NEXT: [[TMP43:%.*]] = add i32 [[TMP41]], 1 +; CHECK-NEXT: [[TMP44:%.*]] = getelementptr i32, ptr addrspace(7) [[TMP7]], i32 [[TMP43]] +; CHECK-NEXT: store i32 0, ptr addrspace(7) [[TMP44]], align 4 +; CHECK-NEXT: [[TMP45:%.*]] = add i32 [[TMP43]], 1 +; CHECK-NEXT: [[TMP46:%.*]] = getelementptr i32, ptr addrspace(7) [[TMP7]], i32 [[TMP45]] +; CHECK-NEXT: store i32 {{-?[0-9]+}}, ptr addrspace(7) [[TMP46]], align 4 +; CHECK-NEXT: [[TMP47:%.*]] = add i32 [[TMP45]], 1 ; CHECK-NEXT: ret void ; diff --git a/lgc/test/MulDx9Zero.lgc b/lgc/test/MulDx9Zero.lgc new file mode 100644 index 0000000000..0e9e6b2bda --- /dev/null +++ b/lgc/test/MulDx9Zero.lgc @@ -0,0 +1,144 @@ +; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --tool lgc +; RUN: lgc -mcpu=gfx1030 --emit-llvm -v -o=- - <%s | FileCheck --check-prefixes=CHECK %s + +@r0 = addrspace(1) global <4 x float> zeroinitializer +@r1 = addrspace(1) global <4 x float> zeroinitializer +@r2 = addrspace(1) global <4 x float> zeroinitializer +define dllexport spir_func void @main() #0 !spirv.ExecutionModel !3 !lgc.shaderstage !4 { +.entry: + %2257 = load <4 x float>, ptr addrspace(1) @r0, align 16 + %2258 = shufflevector <4 x float> %2257, <4 x float> %2257, <3 x i32> + %2259 = load <4 x float>, ptr addrspace(1) @r1, align 16 + %2260 = shufflevector <4 x float> %2259, <4 x float> %2259, <3 x i32> + %2261 = extractelement <3 x float> %2258, i64 0 + %2262 = fcmp oeq float %2261, 0.000000e+00 + %2263 = insertelement <3 x i1> poison, i1 %2262, i64 0 + %2264 = extractelement <3 x float> %2258, i64 1 + %2265 = fcmp oeq float %2264, 0.000000e+00 + %2266 = insertelement <3 x i1> %2263, i1 %2265, i64 1 + %2267 = extractelement <3 x float> %2258, i64 2 + %2268 = fcmp oeq float %2267, 0.000000e+00 + %2269 = insertelement <3 x i1> %2266, i1 %2268, i64 2 + %2270 = extractelement <3 x i1> %2269, i64 0 + %2271 = extractelement <3 x float> %2260, i64 0 + %2272 = select reassoc nnan nsz arcp contract afn i1 %2270, float 0.000000e+00, float %2271 + %2273 = insertelement <3 x float> poison, float %2272, i64 0 + %2274 = extractelement <3 x i1> %2269, i64 1 + %2275 = extractelement <3 x float> %2260, i64 1 + %2276 = select reassoc nnan nsz arcp contract afn i1 %2274, float 0.000000e+00, float %2275 + %2277 = insertelement <3 x float> %2273, float %2276, i64 1 + %2278 = extractelement <3 x i1> %2269, i64 2 + %2279 = extractelement <3 x float> %2260, i64 2 + %2280 = select reassoc nnan nsz arcp contract afn i1 %2278, float 0.000000e+00, float %2279 + %2281 = insertelement <3 x float> %2277, float %2280, i64 2 + %2282 = extractelement <3 x float> %2260, i64 0 + %2283 = fcmp oeq float %2282, 0.000000e+00 + %2284 = insertelement <3 x i1> poison, i1 %2283, i64 0 + %2285 = extractelement <3 x float> %2260, i64 1 + %2286 = fcmp oeq float %2285, 0.000000e+00 + %2287 = insertelement <3 x i1> %2284, i1 %2286, i64 1 + %2288 = extractelement <3 x float> %2260, i64 2 + %2289 = fcmp oeq float %2288, 0.000000e+00 + %2290 = insertelement <3 x i1> %2287, i1 %2289, i64 2 + %2291 = extractelement <3 x i1> %2290, i64 0 + %2292 = extractelement <3 x float> %2258, i64 0 + %2293 = select reassoc nnan nsz arcp contract afn i1 %2291, float 0.000000e+00, float %2292 + %2294 = insertelement <3 x float> poison, float %2293, i64 0 + %2295 = extractelement <3 x i1> %2290, i64 1 + %2296 = extractelement <3 x float> %2258, i64 1 + %2297 = select reassoc nnan nsz arcp contract afn i1 %2295, float 0.000000e+00, float %2296 + %2298 = insertelement <3 x float> %2294, float %2297, i64 1 + %2299 = extractelement <3 x i1> %2290, i64 2 + %2300 = extractelement <3 x float> %2258, i64 2 + %2301 = select reassoc nnan nsz arcp contract afn i1 %2299, float 0.000000e+00, float %2300 + %2302 = insertelement <3 x float> %2298, float %2301, i64 2 + %2303 = fmul reassoc nnan nsz arcp contract afn <3 x float> %2281, %2302 + %2304 = load <4 x float>, ptr addrspace(1) @r2, align 16 + %2305 = shufflevector <3 x float> %2303, <3 x float> poison, <4 x i32> + %2306 = shufflevector <4 x float> %2304, <4 x float> %2305, <4 x i32> + store <4 x float> %2306, ptr addrspace(1) @r2, align 16 + ret void +} + +attributes #0 = { nounwind } + +!0 = !{{ i64, i64 } { i64 16908289, i64 34359738368 }} +!1 = !{{ i64, i64 } { i64 16908288, i64 34359738368 }} +!2 = !{{ i64, i64 } { i64 16908288, i64 137438953472 }} +!3 = !{i32 4} +!4 = !{i32 6} +; CHECK-LABEL: @main( +; CHECK-NEXT: .entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr addrspace(1) @r0, align 16 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> [[TMP0]], <3 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr addrspace(1) @r1, align 16 +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP2]], <3 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <3 x float> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = fcmp oeq float [[TMP4]], 0.000000e+00 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <3 x i1> poison, i1 [[TMP5]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <3 x float> [[TMP1]], i64 1 +; CHECK-NEXT: [[TMP8:%.*]] = fcmp oeq float [[TMP7]], 0.000000e+00 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <3 x i1> [[TMP6]], i1 [[TMP8]], i64 1 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <3 x float> [[TMP1]], i64 2 +; CHECK-NEXT: [[TMP11:%.*]] = fcmp oeq float [[TMP10]], 0.000000e+00 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <3 x i1> [[TMP9]], i1 [[TMP11]], i64 2 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <3 x i1> [[TMP12]], i64 0 +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <3 x float> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP15:%.*]] = select reassoc nnan nsz arcp contract afn i1 [[TMP13]], float 0.000000e+00, float [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = insertelement <3 x float> poison, float [[TMP15]], i64 0 +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <3 x i1> [[TMP12]], i64 1 +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <3 x float> [[TMP3]], i64 1 +; CHECK-NEXT: [[TMP19:%.*]] = select reassoc nnan nsz arcp contract afn i1 [[TMP17]], float 0.000000e+00, float [[TMP18]] +; CHECK-NEXT: [[TMP20:%.*]] = insertelement <3 x float> [[TMP16]], float [[TMP19]], i64 1 +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <3 x i1> [[TMP12]], i64 2 +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <3 x float> [[TMP3]], i64 2 +; CHECK-NEXT: [[TMP23:%.*]] = select reassoc nnan nsz arcp contract afn i1 [[TMP21]], float 0.000000e+00, float [[TMP22]] +; CHECK-NEXT: [[TMP24:%.*]] = insertelement <3 x float> [[TMP20]], float [[TMP23]], i64 2 +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <3 x float> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP26:%.*]] = fcmp oeq float [[TMP25]], 0.000000e+00 +; CHECK-NEXT: [[TMP27:%.*]] = insertelement <3 x i1> poison, i1 [[TMP26]], i64 0 +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <3 x float> [[TMP3]], i64 1 +; CHECK-NEXT: [[TMP29:%.*]] = fcmp oeq float [[TMP28]], 0.000000e+00 +; CHECK-NEXT: [[TMP30:%.*]] = insertelement <3 x i1> [[TMP27]], i1 [[TMP29]], i64 1 +; CHECK-NEXT: [[TMP31:%.*]] = extractelement <3 x float> [[TMP3]], i64 2 +; CHECK-NEXT: [[TMP32:%.*]] = fcmp oeq float [[TMP31]], 0.000000e+00 +; CHECK-NEXT: [[TMP33:%.*]] = insertelement <3 x i1> [[TMP30]], i1 [[TMP32]], i64 2 +; CHECK-NEXT: [[TMP34:%.*]] = extractelement <3 x i1> [[TMP33]], i64 0 +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <3 x float> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP36:%.*]] = select reassoc nnan nsz arcp contract afn i1 [[TMP34]], float 0.000000e+00, float [[TMP35]] +; CHECK-NEXT: [[TMP37:%.*]] = insertelement <3 x float> poison, float [[TMP36]], i64 0 +; CHECK-NEXT: [[TMP38:%.*]] = extractelement <3 x i1> [[TMP33]], i64 1 +; CHECK-NEXT: [[TMP39:%.*]] = extractelement <3 x float> [[TMP1]], i64 1 +; CHECK-NEXT: [[TMP40:%.*]] = select reassoc nnan nsz arcp contract afn i1 [[TMP38]], float 0.000000e+00, float [[TMP39]] +; CHECK-NEXT: [[TMP41:%.*]] = insertelement <3 x float> [[TMP37]], float [[TMP40]], i64 1 +; CHECK-NEXT: [[TMP42:%.*]] = extractelement <3 x i1> [[TMP33]], i64 2 +; CHECK-NEXT: [[TMP43:%.*]] = extractelement <3 x float> [[TMP1]], i64 2 +; CHECK-NEXT: [[TMP44:%.*]] = select reassoc nnan nsz arcp contract afn i1 [[TMP42]], float 0.000000e+00, float [[TMP43]] +; CHECK-NEXT: [[TMP45:%.*]] = insertelement <3 x float> [[TMP41]], float [[TMP44]], i64 2 +; CHECK-NEXT: [[TMP46:%.*]] = fmul reassoc nnan nsz arcp contract afn <3 x float> [[TMP24]], [[TMP45]] +; CHECK-NEXT: [[TMP47:%.*]] = load <4 x float>, ptr addrspace(1) @r2, align 16 +; CHECK-NEXT: [[TMP48:%.*]] = shufflevector <3 x float> [[TMP46]], <3 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP49:%.*]] = shufflevector <4 x float> [[TMP47]], <4 x float> [[TMP48]], <4 x i32> +; CHECK-NEXT: store <4 x float> [[TMP49]], ptr addrspace(1) @r2, align 16 +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: @_amdgpu_ps_main( +; CHECK-NEXT: .entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr addrspace(1) @r0, align 16 +; CHECK-NEXT: [[DOTI2:%.*]] = extractelement <4 x float> [[TMP0]], i64 2 +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr addrspace(1) @r1, align 16 +; CHECK-NEXT: [[DOTI21:%.*]] = extractelement <4 x float> [[TMP1]], i64 2 +; CHECK-NEXT: [[DOTI1:%.*]] = extractelement <4 x float> [[TMP1]], i64 1 +; CHECK-NEXT: [[DOTI0:%.*]] = extractelement <4 x float> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = call reassoc nnan nsz arcp contract afn float @llvm.amdgcn.fmul.legacy(float [[DOTI2]], float [[DOTI0]]) +; CHECK-NEXT: [[TMP3:%.*]] = call reassoc nnan nsz arcp contract afn float @llvm.amdgcn.fmul.legacy(float [[DOTI2]], float [[DOTI1]]) +; CHECK-NEXT: [[TMP4:%.*]] = call reassoc nnan nsz arcp contract afn float @llvm.amdgcn.fmul.legacy(float [[DOTI2]], float [[DOTI21]]) +; CHECK-NEXT: [[TMP5:%.*]] = load <4 x float>, ptr addrspace(1) @r2, align 16 +; CHECK-NEXT: [[DOTUPTO019:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i64 0 +; CHECK-NEXT: [[DOTUPTO120:%.*]] = insertelement <4 x float> [[DOTUPTO019]], float [[TMP3]], i64 1 +; CHECK-NEXT: [[DOTUPTO221:%.*]] = insertelement <4 x float> [[DOTUPTO120]], float [[TMP4]], i64 2 +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[DOTUPTO221]], <4 x float> [[TMP5]], <4 x i32> +; CHECK-NEXT: store <4 x float> [[TMP6]], ptr addrspace(1) @r2, align 16 +; CHECK-NEXT: ret void +; diff --git a/lgc/test/SubgroupClusteredReduction.lgc b/lgc/test/SubgroupClusteredReduction.lgc index 40b53f9584..6ccb6e960b 100644 --- a/lgc/test/SubgroupClusteredReduction.lgc +++ b/lgc/test/SubgroupClusteredReduction.lgc @@ -28,11 +28,11 @@ declare i32 @lgc.create.subgroup.clustered.reduction.i32(...) ; CHECK-NEXT: [[TMP6:%.*]] = or i32 [[TMP4]], [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.update.dpp.i32(i32 undef, i32 [[TMP6]], i32 320, i32 15, i32 15, i1 true) ; CHECK-NEXT: [[TMP8:%.*]] = or i32 [[TMP6]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.permlanex16(i32 undef, i32 [[TMP8]], i32 -1, i32 -1, i1 true, i1 false) +; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.permlanex16{{(.i32)?}}(i32 undef, i32 [[TMP8]], i32 -1, i32 -1, i1 true, i1 false) ; CHECK-NEXT: [[TMP10:%.*]] = or i32 [[TMP8]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.permlane64(i32 [[TMP10]]) +; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.permlane64{{(.i32)?}}(i32 [[TMP10]]) ; CHECK-NEXT: [[TMP12:%.*]] = or i32 [[TMP10]], [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP12]]) +; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.readfirstlane{{(.i32)?}}(i32 [[TMP12]]) ; CHECK-NEXT: [[R1:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP13]]) ; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.amdgcn.set.inactive.i32(i32 [[VALUE2:%.*]], i32 0) ; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.amdgcn.update.dpp.i32(i32 undef, i32 [[TMP14]], i32 177, i32 15, i32 15, i1 true) @@ -43,7 +43,7 @@ declare i32 @lgc.create.subgroup.clustered.reduction.i32(...) ; CHECK-NEXT: [[TMP20:%.*]] = or i32 [[TMP18]], [[TMP19]] ; CHECK-NEXT: [[TMP21:%.*]] = call i32 @llvm.amdgcn.update.dpp.i32(i32 undef, i32 [[TMP20]], i32 320, i32 15, i32 15, i1 true) ; CHECK-NEXT: [[TMP22:%.*]] = or i32 [[TMP20]], [[TMP21]] -; CHECK-NEXT: [[TMP23:%.*]] = call i32 @llvm.amdgcn.permlanex16(i32 undef, i32 [[TMP22]], i32 -1, i32 -1, i1 true, i1 false) +; CHECK-NEXT: [[TMP23:%.*]] = call i32 @llvm.amdgcn.permlanex16{{(.i32)?}}(i32 undef, i32 [[TMP22]], i32 -1, i32 -1, i1 true, i1 false) ; CHECK-NEXT: [[TMP24:%.*]] = or i32 [[TMP22]], [[TMP23]] ; CHECK-NEXT: [[R2:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP24]]) ; CHECK-NEXT: [[R:%.*]] = add i32 [[R2]], [[R1]] diff --git a/lgc/test/TestWaterfallLoopForStruct.lgc b/lgc/test/TestWaterfallLoopForStruct.lgc index c43691bddf..915a969554 100644 --- a/lgc/test/TestWaterfallLoopForStruct.lgc +++ b/lgc/test/TestWaterfallLoopForStruct.lgc @@ -82,6 +82,7 @@ attributes #2 = { nounwind willreturn memory(read) } ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP3]], i64 0 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr addrspace(4) +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr addrspace(4) [[TMP6]], i32 4), "dereferenceable"(ptr addrspace(4) [[TMP6]], i32 -1) ] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP6]], i32 0 ; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { ptr addrspace(4), i32, i32, i32 } poison, i32 32, 1 ; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { ptr addrspace(4), i32, i32, i32 } [[TMP8]], i32 32, 2 @@ -89,7 +90,7 @@ attributes #2 = { nounwind willreturn memory(read) } ; CHECK-NEXT: [[TMP11:%.*]] = mul i32 [[TMP2]], 32 ; CHECK-NEXT: [[TMP12:%.*]] = sext i32 [[TMP11]] to i64 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP7]], i64 [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP13]], align 32, !invariant.load !12 +; CHECK-NEXT: [[TMP14:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP13]], align 4, !invariant.load !12 ; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.amdgcn.waterfall.begin.i32(i32 0, i32 [[TMP11]]) ; CHECK-NEXT: [[TMP16:%.*]] = call <8 x i32> @llvm.amdgcn.waterfall.readfirstlane.v8i32.v8i32(i32 [[TMP15]], <8 x i32> [[TMP14]]) ; CHECK-NEXT: [[TMP17:%.*]] = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 15, i32 1, i32 1, <8 x i32> [[TMP16]], i32 1, i32 0) @@ -103,6 +104,6 @@ attributes #2 = { nounwind willreturn memory(read) } ; CHECK-NEXT: [[TMP25:%.*]] = extractvalue { <4 x float>, i32 } [[TMP23]], 0 ; CHECK-NEXT: [[TMP26:%.*]] = icmp sgt i32 [[TMP24]], 0 ; CHECK-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], <4 x float> [[TMP25]], <4 x float> zeroinitializer -; CHECK-NEXT: call void @lgc.output.export.generic.i32.i32.v4f32(i32 0, i32 0, <4 x float> [[TMP27]]) #[[ATTR5:[0-9]+]] +; CHECK-NEXT: call void @lgc.output.export.generic.i32.i32.v4f32(i32 0, i32 0, <4 x float> [[TMP27]]) #[[ATTR6:[0-9]+]] ; CHECK-NEXT: ret void ; diff --git a/lgc/test/TextureRange.lgc b/lgc/test/TextureRange.lgc index 3cda8fc4f8..46893b37a5 100644 --- a/lgc/test/TextureRange.lgc +++ b/lgc/test/TextureRange.lgc @@ -2,8 +2,8 @@ ; RUN: lgc %s -print-after=lgc-lower-desc -o /dev/null 2>&1 - <%s | FileCheck --check-prefixes=CHECK %s ; CHECK: call <2 x i32> @lgc.load.user.data__v2i32(i32 24) -; CHECK: call ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32> -; CHECK: [[varindex0:%[0-9]+]] = call ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32> +; CHECK: call ptr addrspace(7) @lgc.buffer.addr.to.ptr(i64 +; CHECK: [[varindex0:%[0-9]+]] = call ptr addrspace(7) @lgc.buffer.addr.to.ptr(i64 ; CHECK: [[varindex1:%[0-9]+]] = load i32, ptr addrspace(7) [[varindex0]], align 4 ; CHECK-NEXT: [[varindex2:%[0-9]+]] = sext i32 [[varindex1]] to i64 ; CHECK-NEXT: getelementptr <{ [4294967295 x float] }>, ptr addrspace(7) %{{.*}}, i64 0, i32 0, i64 [[varindex2]] @@ -11,10 +11,10 @@ ; CHECK-NEXT: [[desc1vec:%[0-9]+]] = insertelement <2 x i32> %{{[^,]+}}, i32 [[desc1lo]], i64 0 ; CHECK-NEXT: [[desc1lohi:%[0-9]+]] = bitcast <2 x i32> [[desc1vec]] to i64 ; CHECK-NEXT: [[desc1:%[0-9]+]] = inttoptr i64 [[desc1lohi]] to ptr addrspace(4) +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr addrspace(4) [[desc1]], i32 4), "dereferenceable"(ptr addrspace(4) [[desc1]], i32 -1) ] ; CHECK-NEXT: %{{.*}} = getelementptr i8, ptr addrspace(4) [[desc1]], i32 32 ; RUN: lgc -mcpu=gfx1030 -o - - <%s | FileCheck --check-prefixes=SHADER_TEST %s -; SHADER_TEST: s_and_b32 {{.*}}, s4, 0xffff ; SHADER_TEST: s_mov_b32 {{.*}}, 0x21014fac ; SHADER_TEST: s_mov_b32 {{.*}}, -1 ; SHADER_TEST: s_load_dwordx8 [[desc:.*]], {{.*}}, 0x20 diff --git a/lgc/test/Transforms/CpsLowering/continuation-basic.lgc b/lgc/test/Transforms/CpsLowering/continuation-basic.lgc index fa8b13b879..cb41cd1279 100644 --- a/lgc/test/Transforms/CpsLowering/continuation-basic.lgc +++ b/lgc/test/Transforms/CpsLowering/continuation-basic.lgc @@ -53,7 +53,7 @@ define void @test({i32} %state, i32 %arg, ptr %table) !lgc.cps !0 !lgc.shadersta ; CHECK-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 ; CHECK-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], i32 [[TMP31]], i32 [[TMP29]] ; CHECK-NEXT: [[TMP34:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP33]], i1 true) -; CHECK-NEXT: [[TMP35:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP18]], i32 [[TMP34]]) +; CHECK-NEXT: [[TMP35:%.*]] = call i32 @llvm.amdgcn.readlane{{(.i32)?}}(i32 [[TMP18]], i32 [[TMP34]]) ; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i32 [[TMP18]], [[TMP35]] ; CHECK-NEXT: [[TMP37:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP36]]) ; CHECK-NEXT: [[TMP38:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP35]]) diff --git a/lgc/test/Transforms/CpsLowering/cps-entry-point.lgc b/lgc/test/Transforms/CpsLowering/cps-entry-point.lgc index 84fd25e3ba..8aecb2319d 100644 --- a/lgc/test/Transforms/CpsLowering/cps-entry-point.lgc +++ b/lgc/test/Transforms/CpsLowering/cps-entry-point.lgc @@ -47,7 +47,7 @@ define dllexport spir_func void @lgc.shader.CS.main() local_unnamed_addr #0 !lgc ; CHECK-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 ; CHECK-NEXT: [[TMP22:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP21]]) ; CHECK-NEXT: [[TMP23:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP22]], i1 true) -; CHECK-NEXT: [[TMP24:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP20]], i32 [[TMP23]]) +; CHECK-NEXT: [[TMP24:%.*]] = call i32 @llvm.amdgcn.readlane{{(.i32)?}}(i32 [[TMP20]], i32 [[TMP23]]) ; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP20]], [[TMP24]] ; CHECK-NEXT: [[TMP26:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP25]]) ; CHECK-NEXT: [[TMP27:%.*]] = and i32 [[TMP24]], -64 diff --git a/lgc/test/Transforms/CpsLowering/cps-from-continufy.lgc b/lgc/test/Transforms/CpsLowering/cps-from-continufy.lgc index 0d97e2cb53..dc9f6d1f1e 100644 --- a/lgc/test/Transforms/CpsLowering/cps-from-continufy.lgc +++ b/lgc/test/Transforms/CpsLowering/cps-from-continufy.lgc @@ -106,7 +106,7 @@ define spir_func void @_rgen_1({} %state, i32 %rcr) #0 !spirv.ExecutionModel !15 ; CHECK-NEXT: [[TMP88:%.*]] = icmp ne i32 [[TMP87]], 0 ; CHECK-NEXT: [[TMP89:%.*]] = select i1 [[TMP88]], i32 [[TMP87]], i32 [[TMP85]] ; CHECK-NEXT: [[TMP90:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP89]], i1 true) -; CHECK-NEXT: [[TMP91:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP74]], i32 [[TMP90]]) +; CHECK-NEXT: [[TMP91:%.*]] = call i32 @llvm.amdgcn.readlane{{(.i32)?}}(i32 [[TMP74]], i32 [[TMP90]]) ; CHECK-NEXT: [[TMP92:%.*]] = icmp eq i32 [[TMP74]], [[TMP91]] ; CHECK-NEXT: [[TMP93:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP92]]) ; CHECK-NEXT: [[TMP94:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP91]]) @@ -266,7 +266,7 @@ define void @_rgen_1.resume.0({} %0, i32 %1, [1 x i32] %2) !spirv.ExecutionModel ; CHECK-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 ; CHECK-NEXT: [[TMP47:%.*]] = select i1 [[TMP46]], i32 [[TMP45]], i32 [[TMP43]] ; CHECK-NEXT: [[TMP48:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP47]], i1 true) -; CHECK-NEXT: [[TMP49:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP32]], i32 [[TMP48]]) +; CHECK-NEXT: [[TMP49:%.*]] = call i32 @llvm.amdgcn.readlane{{(.i32)?}}(i32 [[TMP32]], i32 [[TMP48]]) ; CHECK-NEXT: [[TMP50:%.*]] = icmp eq i32 [[TMP32]], [[TMP49]] ; CHECK-NEXT: [[TMP51:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP50]]) ; CHECK-NEXT: [[TMP52:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP49]]) diff --git a/lgc/test/Transforms/CpsLowering/cps-stack-lowering.lgc b/lgc/test/Transforms/CpsLowering/cps-stack-lowering.lgc index 170c39683d..793f4bbdad 100644 --- a/lgc/test/Transforms/CpsLowering/cps-stack-lowering.lgc +++ b/lgc/test/Transforms/CpsLowering/cps-stack-lowering.lgc @@ -69,7 +69,7 @@ define void @test.0({} %unused) !lgc.cps !{i32 1} !lgc.shaderstage !{i32 7} { ; CHECK-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0 ; CHECK-NEXT: [[TMP44:%.*]] = select i1 [[TMP43]], i32 [[TMP42]], i32 [[TMP40]] ; CHECK-NEXT: [[TMP45:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP44]], i1 true) -; CHECK-NEXT: [[TMP46:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP29]], i32 [[TMP45]]) +; CHECK-NEXT: [[TMP46:%.*]] = call i32 @llvm.amdgcn.readlane{{(.i32)?}}(i32 [[TMP29]], i32 [[TMP45]]) ; CHECK-NEXT: [[TMP47:%.*]] = icmp eq i32 [[TMP29]], [[TMP46]] ; CHECK-NEXT: [[TMP48:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP47]]) ; CHECK-NEXT: [[TMP49:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP46]]) @@ -171,7 +171,7 @@ define void @test.1({} %no_state, ptr addrspace(32) %p2, i32 %q1) !lgc.cps !{i32 ; CHECK-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 ; CHECK-NEXT: [[TMP35:%.*]] = select i1 [[TMP34]], i32 [[TMP33]], i32 [[TMP31]] ; CHECK-NEXT: [[TMP36:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP35]], i1 true) -; CHECK-NEXT: [[TMP37:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP20]], i32 [[TMP36]]) +; CHECK-NEXT: [[TMP37:%.*]] = call i32 @llvm.amdgcn.readlane{{(.i32)?}}(i32 [[TMP20]], i32 [[TMP36]]) ; CHECK-NEXT: [[TMP38:%.*]] = icmp eq i32 [[TMP20]], [[TMP37]] ; CHECK-NEXT: [[TMP39:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP38]]) ; CHECK-NEXT: [[TMP40:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP37]]) @@ -269,7 +269,7 @@ define void @test.2({ ptr addrspace(32) } %state) !lgc.cps !{i32 1} !lgc.shaders ; CHECK-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 ; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], i32 [[TMP36]], i32 [[TMP34]] ; CHECK-NEXT: [[TMP39:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP38]], i1 true) -; CHECK-NEXT: [[TMP40:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP23]], i32 [[TMP39]]) +; CHECK-NEXT: [[TMP40:%.*]] = call i32 @llvm.amdgcn.readlane{{(.i32)?}}(i32 [[TMP23]], i32 [[TMP39]]) ; CHECK-NEXT: [[TMP41:%.*]] = icmp eq i32 [[TMP23]], [[TMP40]] ; CHECK-NEXT: [[TMP42:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP41]]) ; CHECK-NEXT: [[TMP43:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP40]]) @@ -392,7 +392,7 @@ define void @test.gep({} %unused) !lgc.cps !{i32 1} !lgc.shaderstage !{i32 7} { ; CHECK-NEXT: [[TMP52:%.*]] = icmp ne i32 [[TMP51]], 0 ; CHECK-NEXT: [[TMP53:%.*]] = select i1 [[TMP52]], i32 [[TMP51]], i32 [[TMP49]] ; CHECK-NEXT: [[TMP54:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP53]], i1 true) -; CHECK-NEXT: [[TMP55:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP38]], i32 [[TMP54]]) +; CHECK-NEXT: [[TMP55:%.*]] = call i32 @llvm.amdgcn.readlane{{(.i32)?}}(i32 [[TMP38]], i32 [[TMP54]]) ; CHECK-NEXT: [[TMP56:%.*]] = icmp eq i32 [[TMP38]], [[TMP55]] ; CHECK-NEXT: [[TMP57:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP56]]) ; CHECK-NEXT: [[TMP58:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP55]]) @@ -509,7 +509,7 @@ define void @test.nested.gep({} %unused) !lgc.cps !{i32 1} !lgc.shaderstage !{i3 ; CHECK-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 ; CHECK-NEXT: [[TMP41:%.*]] = select i1 [[TMP40]], i32 [[TMP39]], i32 [[TMP37]] ; CHECK-NEXT: [[TMP42:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP41]], i1 true) -; CHECK-NEXT: [[TMP43:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP26]], i32 [[TMP42]]) +; CHECK-NEXT: [[TMP43:%.*]] = call i32 @llvm.amdgcn.readlane{{(.i32)?}}(i32 [[TMP26]], i32 [[TMP42]]) ; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i32 [[TMP26]], [[TMP43]] ; CHECK-NEXT: [[TMP45:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP44]]) ; CHECK-NEXT: [[TMP46:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP43]]) @@ -607,7 +607,7 @@ define void @test.i64.reference({} %no_state, ptr addrspace(32) %p2, i32 %q1) !l ; CHECK-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 ; CHECK-NEXT: [[TMP35:%.*]] = select i1 [[TMP34]], i32 [[TMP33]], i32 [[TMP31]] ; CHECK-NEXT: [[TMP36:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP35]], i1 true) -; CHECK-NEXT: [[TMP37:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP20]], i32 [[TMP36]]) +; CHECK-NEXT: [[TMP37:%.*]] = call i32 @llvm.amdgcn.readlane{{(.i32)?}}(i32 [[TMP20]], i32 [[TMP36]]) ; CHECK-NEXT: [[TMP38:%.*]] = icmp eq i32 [[TMP20]], [[TMP37]] ; CHECK-NEXT: [[TMP39:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP38]]) ; CHECK-NEXT: [[TMP40:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP37]]) diff --git a/lgc/test/Transforms/CpsLowering/cps-unify-exits.lgc b/lgc/test/Transforms/CpsLowering/cps-unify-exits.lgc index 9e3e27882d..a2c79432d8 100644 --- a/lgc/test/Transforms/CpsLowering/cps-unify-exits.lgc +++ b/lgc/test/Transforms/CpsLowering/cps-unify-exits.lgc @@ -69,7 +69,7 @@ define void @unify_jumps({i32} %state, i32 %arg, ptr %table) !lgc.cps !0 !lgc.sh ; CHECK-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 ; CHECK-NEXT: [[TMP41:%.*]] = select i1 [[TMP40]], i32 [[TMP39]], i32 [[TMP37]] ; CHECK-NEXT: [[TMP42:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP41]], i1 true) -; CHECK-NEXT: [[TMP43:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP26]], i32 [[TMP42]]) +; CHECK-NEXT: [[TMP43:%.*]] = call i32 @llvm.amdgcn.readlane{{(.i32)?}}(i32 [[TMP26]], i32 [[TMP42]]) ; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i32 [[TMP26]], [[TMP43]] ; CHECK-NEXT: [[TMP45:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP44]]) ; CHECK-NEXT: [[TMP46:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP43]]) @@ -188,7 +188,7 @@ define void @unify_jump_ret({i32} %state, i32 %arg, ptr %table) !lgc.cps !0 !lgc ; CHECK-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 ; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 [[TMP34]], i32 [[TMP32]] ; CHECK-NEXT: [[TMP37:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP36]], i1 true) -; CHECK-NEXT: [[TMP38:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP21]], i32 [[TMP37]]) +; CHECK-NEXT: [[TMP38:%.*]] = call i32 @llvm.amdgcn.readlane{{(.i32)?}}(i32 [[TMP21]], i32 [[TMP37]]) ; CHECK-NEXT: [[TMP39:%.*]] = icmp eq i32 [[TMP21]], [[TMP38]] ; CHECK-NEXT: [[TMP40:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[TMP39]]) ; CHECK-NEXT: [[TMP41:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP38]]) diff --git a/lgc/test/Transforms/LowerCooperativeMatrix/convert.lgc b/lgc/test/Transforms/LowerCooperativeMatrix/convert.lgc index 8e0650f523..146fa69acf 100644 --- a/lgc/test/Transforms/LowerCooperativeMatrix/convert.lgc +++ b/lgc/test/Transforms/LowerCooperativeMatrix/convert.lgc @@ -28,28 +28,28 @@ define <8 x float> @convert_f16_to_factor(<8 x float> %accum) { ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i32> [[TMP6]], i64 0 ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[TMP6]], i64 0 -; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.permlanex16(i32 [[TMP7]], i32 [[TMP8]], i32 1985229328, i32 -19088744, i1 false, i1 false) +; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.permlanex16{{(.i32)?}}(i32 [[TMP7]], i32 [[TMP8]], i32 1985229328, i32 -19088744, i1 false, i1 false) ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP6]], i64 1 ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP6]], i64 1 -; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.amdgcn.permlanex16(i32 [[TMP10]], i32 [[TMP11]], i32 1985229328, i32 -19088744, i1 false, i1 false) +; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.amdgcn.permlanex16{{(.i32)?}}(i32 [[TMP10]], i32 [[TMP11]], i32 1985229328, i32 -19088744, i1 false, i1 false) ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <8 x i32> [[TMP6]], i64 2 ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i32> [[TMP6]], i64 2 -; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.amdgcn.permlanex16(i32 [[TMP13]], i32 [[TMP14]], i32 1985229328, i32 -19088744, i1 false, i1 false) +; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.amdgcn.permlanex16{{(.i32)?}}(i32 [[TMP13]], i32 [[TMP14]], i32 1985229328, i32 -19088744, i1 false, i1 false) ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[TMP6]], i64 3 ; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i32> [[TMP6]], i64 3 -; CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.amdgcn.permlanex16(i32 [[TMP16]], i32 [[TMP17]], i32 1985229328, i32 -19088744, i1 false, i1 false) +; CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.amdgcn.permlanex16{{(.i32)?}}(i32 [[TMP16]], i32 [[TMP17]], i32 1985229328, i32 -19088744, i1 false, i1 false) ; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i32> [[TMP6]], i64 4 ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i32> [[TMP6]], i64 4 -; CHECK-NEXT: [[TMP21:%.*]] = call i32 @llvm.amdgcn.permlanex16(i32 [[TMP19]], i32 [[TMP20]], i32 1985229328, i32 -19088744, i1 false, i1 false) +; CHECK-NEXT: [[TMP21:%.*]] = call i32 @llvm.amdgcn.permlanex16{{(.i32)?}}(i32 [[TMP19]], i32 [[TMP20]], i32 1985229328, i32 -19088744, i1 false, i1 false) ; CHECK-NEXT: [[TMP22:%.*]] = extractelement <8 x i32> [[TMP6]], i64 5 ; CHECK-NEXT: [[TMP23:%.*]] = extractelement <8 x i32> [[TMP6]], i64 5 -; CHECK-NEXT: [[TMP24:%.*]] = call i32 @llvm.amdgcn.permlanex16(i32 [[TMP22]], i32 [[TMP23]], i32 1985229328, i32 -19088744, i1 false, i1 false) +; CHECK-NEXT: [[TMP24:%.*]] = call i32 @llvm.amdgcn.permlanex16{{(.i32)?}}(i32 [[TMP22]], i32 [[TMP23]], i32 1985229328, i32 -19088744, i1 false, i1 false) ; CHECK-NEXT: [[TMP25:%.*]] = extractelement <8 x i32> [[TMP6]], i64 6 ; CHECK-NEXT: [[TMP26:%.*]] = extractelement <8 x i32> [[TMP6]], i64 6 -; CHECK-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.permlanex16(i32 [[TMP25]], i32 [[TMP26]], i32 1985229328, i32 -19088744, i1 false, i1 false) +; CHECK-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.permlanex16{{(.i32)?}}(i32 [[TMP25]], i32 [[TMP26]], i32 1985229328, i32 -19088744, i1 false, i1 false) ; CHECK-NEXT: [[TMP28:%.*]] = extractelement <8 x i32> [[TMP6]], i64 7 ; CHECK-NEXT: [[TMP29:%.*]] = extractelement <8 x i32> [[TMP6]], i64 7 -; CHECK-NEXT: [[TMP30:%.*]] = call i32 @llvm.amdgcn.permlanex16(i32 [[TMP28]], i32 [[TMP29]], i32 1985229328, i32 -19088744, i1 false, i1 false) +; CHECK-NEXT: [[TMP30:%.*]] = call i32 @llvm.amdgcn.permlanex16{{(.i32)?}}(i32 [[TMP28]], i32 [[TMP29]], i32 1985229328, i32 -19088744, i1 false, i1 false) ; CHECK-NEXT: [[TMP31:%.*]] = insertelement <8 x i32> poison, i32 [[TMP9]], i64 0 ; CHECK-NEXT: [[TMP32:%.*]] = insertelement <8 x i32> [[TMP31]], i32 [[TMP12]], i64 1 ; CHECK-NEXT: [[TMP33:%.*]] = insertelement <8 x i32> [[TMP32]], i32 [[TMP15]], i64 2 diff --git a/lgc/test/Transforms/PatchBufferOp/strided-buffer-ops.lgc b/lgc/test/Transforms/PatchBufferOp/strided-buffer-ops.lgc index cae2e83a00..6fc5998cb8 100644 --- a/lgc/test/Transforms/PatchBufferOp/strided-buffer-ops.lgc +++ b/lgc/test/Transforms/PatchBufferOp/strided-buffer-ops.lgc @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool lgc --version 2 -; RUN: lgc --mcpu=gfx1100 -o - -passes='require,module(lgc-lower-desc),module(lgc-patch-entry-point-mutate),function(lgc-patch-buffer-op)' %s | FileCheck --check-prefixes=GFX11 %s +; RUN: lgc --mcpu=gfx1100 -o - -passes="require,module(lgc-lower-desc),module(lgc-patch-entry-point-mutate),function(lgc-patch-buffer-op)" %s | FileCheck --check-prefixes=GFX11 %s define amdgpu_kernel void @strided_buffer_desc_to_ptr(<4 x i32> inreg %desc, ptr %out) { ; GFX11-LABEL: define amdgpu_gfx void @strided_buffer_desc_to_ptr @@ -166,13 +166,13 @@ define float @addr_and_stride_to_ptr(i64 %addr, i32 %stride) { ; GFX11-LABEL: define amdgpu_gfx float @addr_and_stride_to_ptr ; GFX11-SAME: (i64 [[ADDR:%.*]], i32 [[STRIDE:%.*]], i32 inreg noundef [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg noundef [[NUMWORKGROUPSPTR:%.*]], i32 inreg noundef [[USERDATA0:%.*]], i32 inreg noundef [[USERDATA1:%.*]], i32 inreg noundef [[USERDATA2:%.*]], i32 inreg noundef [[USERDATA3:%.*]], i32 inreg noundef [[USERDATA4:%.*]], i32 inreg noundef [[USERDATA5:%.*]], i32 inreg noundef [[PAD6:%.*]], i32 inreg noundef [[PAD7:%.*]], i32 inreg noundef [[PAD8:%.*]], i32 inreg noundef [[PAD9:%.*]], i32 inreg noundef [[PAD10:%.*]], i32 inreg noundef [[PAD11:%.*]], i32 inreg noundef [[SPILLTABLE:%.*]], <3 x i32> inreg noundef [[WORKGROUPID:%.*]], i32 inreg noundef [[MULTIDISPATCHINFO:%.*]], i32 noundef [[LOCALINVOCATIONID:%.*]]) #[[ATTR0]] { ; GFX11-NEXT: entry: -; GFX11-NEXT: [[TMP0:%.*]] = trunc i64 [[ADDR]] to i32 -; GFX11-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0 -; GFX11-NEXT: [[TMP2:%.*]] = lshr i64 [[ADDR]], 32 -; GFX11-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32 +; GFX11-NEXT: [[TMP0:%.*]] = bitcast i64 [[ADDR]] to <2 x i32> +; GFX11-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[TMP0]], i64 0 +; GFX11-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[TMP0]], i64 1 +; GFX11-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i64 0 ; GFX11-NEXT: [[TMP4:%.*]] = shl i32 [[STRIDE]], 16 -; GFX11-NEXT: [[TMP5:%.*]] = or i32 [[TMP3]], [[TMP4]] -; GFX11-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[TMP5]], i64 1 +; GFX11-NEXT: [[TMP5:%.*]] = or i32 [[TMP2]], [[TMP4]] +; GFX11-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[TMP5]], i64 1 ; GFX11-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 -1, i64 2 ; GFX11-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 536956844, i64 3 ; GFX11-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.struct.buffer.load.i32(<4 x i32> [[TMP8]], i32 0, i32 0, i32 0, i32 0) @@ -189,13 +189,13 @@ define float @addr_and_stride_to_ptr_index(i64 %addr, i32 %index, i32 %stride) { ; GFX11-LABEL: define amdgpu_gfx float @addr_and_stride_to_ptr_index ; GFX11-SAME: (i64 [[ADDR:%.*]], i32 [[INDEX:%.*]], i32 [[STRIDE:%.*]], i32 inreg noundef [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg noundef [[NUMWORKGROUPSPTR:%.*]], i32 inreg noundef [[USERDATA0:%.*]], i32 inreg noundef [[USERDATA1:%.*]], i32 inreg noundef [[USERDATA2:%.*]], i32 inreg noundef [[USERDATA3:%.*]], i32 inreg noundef [[USERDATA4:%.*]], i32 inreg noundef [[USERDATA5:%.*]], i32 inreg noundef [[PAD6:%.*]], i32 inreg noundef [[PAD7:%.*]], i32 inreg noundef [[PAD8:%.*]], i32 inreg noundef [[PAD9:%.*]], i32 inreg noundef [[PAD10:%.*]], i32 inreg noundef [[PAD11:%.*]], i32 inreg noundef [[SPILLTABLE:%.*]], <3 x i32> inreg noundef [[WORKGROUPID:%.*]], i32 inreg noundef [[MULTIDISPATCHINFO:%.*]], i32 noundef [[LOCALINVOCATIONID:%.*]]) #[[ATTR0]] { ; GFX11-NEXT: entry: -; GFX11-NEXT: [[TMP0:%.*]] = trunc i64 [[ADDR]] to i32 -; GFX11-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0 -; GFX11-NEXT: [[TMP2:%.*]] = lshr i64 [[ADDR]], 32 -; GFX11-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32 +; GFX11-NEXT: [[TMP0:%.*]] = bitcast i64 [[ADDR]] to <2 x i32> +; GFX11-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[TMP0]], i64 0 +; GFX11-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[TMP0]], i64 1 +; GFX11-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i64 0 ; GFX11-NEXT: [[TMP4:%.*]] = shl i32 [[STRIDE]], 16 -; GFX11-NEXT: [[TMP5:%.*]] = or i32 [[TMP3]], [[TMP4]] -; GFX11-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[TMP5]], i64 1 +; GFX11-NEXT: [[TMP5:%.*]] = or i32 [[TMP2]], [[TMP4]] +; GFX11-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[TMP5]], i64 1 ; GFX11-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 -1, i64 2 ; GFX11-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 536956844, i64 3 ; GFX11-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.struct.buffer.load.i32(<4 x i32> [[TMP8]], i32 [[INDEX]], i32 0, i32 0, i32 0) @@ -213,13 +213,13 @@ define float @addr_and_stride_to_ptr_index_offset(i64 %addr, i32 %index, i32 %st ; GFX11-LABEL: define amdgpu_gfx float @addr_and_stride_to_ptr_index_offset ; GFX11-SAME: (i64 [[ADDR:%.*]], i32 [[INDEX:%.*]], i32 [[STRIDE:%.*]], i32 inreg noundef [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg noundef [[NUMWORKGROUPSPTR:%.*]], i32 inreg noundef [[USERDATA0:%.*]], i32 inreg noundef [[USERDATA1:%.*]], i32 inreg noundef [[USERDATA2:%.*]], i32 inreg noundef [[USERDATA3:%.*]], i32 inreg noundef [[USERDATA4:%.*]], i32 inreg noundef [[USERDATA5:%.*]], i32 inreg noundef [[PAD6:%.*]], i32 inreg noundef [[PAD7:%.*]], i32 inreg noundef [[PAD8:%.*]], i32 inreg noundef [[PAD9:%.*]], i32 inreg noundef [[PAD10:%.*]], i32 inreg noundef [[PAD11:%.*]], i32 inreg noundef [[SPILLTABLE:%.*]], <3 x i32> inreg noundef [[WORKGROUPID:%.*]], i32 inreg noundef [[MULTIDISPATCHINFO:%.*]], i32 noundef [[LOCALINVOCATIONID:%.*]]) #[[ATTR0]] { ; GFX11-NEXT: entry: -; GFX11-NEXT: [[TMP0:%.*]] = trunc i64 [[ADDR]] to i32 -; GFX11-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0 -; GFX11-NEXT: [[TMP2:%.*]] = lshr i64 [[ADDR]], 32 -; GFX11-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32 +; GFX11-NEXT: [[TMP0:%.*]] = bitcast i64 [[ADDR]] to <2 x i32> +; GFX11-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[TMP0]], i64 0 +; GFX11-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[TMP0]], i64 1 +; GFX11-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i64 0 ; GFX11-NEXT: [[TMP4:%.*]] = shl i32 [[STRIDE]], 16 -; GFX11-NEXT: [[TMP5:%.*]] = or i32 [[TMP3]], [[TMP4]] -; GFX11-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[TMP5]], i64 1 +; GFX11-NEXT: [[TMP5:%.*]] = or i32 [[TMP2]], [[TMP4]] +; GFX11-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[TMP5]], i64 1 ; GFX11-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 -1, i64 2 ; GFX11-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 536956844, i64 3 ; GFX11-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.struct.buffer.load.i32(<4 x i32> [[TMP8]], i32 [[INDEX]], i32 ptrtoint (ptr addrspace(6) getelementptr inbounds (i8, ptr addrspace(6) null, i32 8) to i32), i32 0, i32 0) @@ -238,13 +238,13 @@ define float @addr_and_stride_to_ptr_offset_index(i64 %addr, i32 %index, i32 %st ; GFX11-LABEL: define amdgpu_gfx float @addr_and_stride_to_ptr_offset_index ; GFX11-SAME: (i64 [[ADDR:%.*]], i32 [[INDEX:%.*]], i32 [[STRIDE:%.*]], i32 inreg noundef [[GLOBALTABLE:%.*]], ptr addrspace(4) inreg noundef [[NUMWORKGROUPSPTR:%.*]], i32 inreg noundef [[USERDATA0:%.*]], i32 inreg noundef [[USERDATA1:%.*]], i32 inreg noundef [[USERDATA2:%.*]], i32 inreg noundef [[USERDATA3:%.*]], i32 inreg noundef [[USERDATA4:%.*]], i32 inreg noundef [[USERDATA5:%.*]], i32 inreg noundef [[PAD6:%.*]], i32 inreg noundef [[PAD7:%.*]], i32 inreg noundef [[PAD8:%.*]], i32 inreg noundef [[PAD9:%.*]], i32 inreg noundef [[PAD10:%.*]], i32 inreg noundef [[PAD11:%.*]], i32 inreg noundef [[SPILLTABLE:%.*]], <3 x i32> inreg noundef [[WORKGROUPID:%.*]], i32 inreg noundef [[MULTIDISPATCHINFO:%.*]], i32 noundef [[LOCALINVOCATIONID:%.*]]) #[[ATTR0]] { ; GFX11-NEXT: entry: -; GFX11-NEXT: [[TMP0:%.*]] = trunc i64 [[ADDR]] to i32 -; GFX11-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0 -; GFX11-NEXT: [[TMP2:%.*]] = lshr i64 [[ADDR]], 32 -; GFX11-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32 +; GFX11-NEXT: [[TMP0:%.*]] = bitcast i64 [[ADDR]] to <2 x i32> +; GFX11-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[TMP0]], i64 0 +; GFX11-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[TMP0]], i64 1 +; GFX11-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i64 0 ; GFX11-NEXT: [[TMP4:%.*]] = shl i32 [[STRIDE]], 16 -; GFX11-NEXT: [[TMP5:%.*]] = or i32 [[TMP3]], [[TMP4]] -; GFX11-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[TMP5]], i64 1 +; GFX11-NEXT: [[TMP5:%.*]] = or i32 [[TMP2]], [[TMP4]] +; GFX11-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[TMP5]], i64 1 ; GFX11-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 -1, i64 2 ; GFX11-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 536956844, i64 3 ; GFX11-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.struct.buffer.load.i32(<4 x i32> [[TMP8]], i32 [[INDEX]], i32 ptrtoint (ptr addrspace(6) getelementptr inbounds (i8, ptr addrspace(6) null, i32 8) to i32), i32 0, i32 0) @@ -270,22 +270,23 @@ define amdgpu_kernel void @constant_strided_buffer_desc_to_ptr_index(<4 x i32> i ; GFX11-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(4) ; GFX11-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[USERDATA3]], i64 0 ; GFX11-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[USERDATA4]], i64 1 -; GFX11-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP6]], i64 0 -; GFX11-NEXT: [[TMP8:%.*]] = extractelement <2 x i32> [[TMP6]], i64 1 -; GFX11-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> poison, i32 [[TMP7]], i64 0 -; GFX11-NEXT: [[TMP10:%.*]] = and i32 [[TMP8]], 65535 -; GFX11-NEXT: [[TMP11:%.*]] = or i32 [[TMP10]], 1048576 -; GFX11-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP11]], i64 1 -; GFX11-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 -1, i64 2 -; GFX11-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 805392300, i64 3 -; GFX11-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP14]], i64 1 -; GFX11-NEXT: [[TMP16:%.*]] = lshr i32 [[TMP15]], 16 -; GFX11-NEXT: [[TMP17:%.*]] = and i32 [[TMP16]], 16383 -; GFX11-NEXT: [[TMP18:%.*]] = mul i32 24, [[TMP17]] -; GFX11-NEXT: [[TMP19:%.*]] = add i32 0, [[TMP18]] -; GFX11-NEXT: [[TMP20:%.*]] = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> [[TMP14]], i32 [[TMP19]], i32 0), !invariant.load [[META7:![0-9]+]] -; GFX11-NEXT: [[TMP21:%.*]] = bitcast i32 [[TMP20]] to float -; GFX11-NEXT: store float [[TMP21]], ptr [[OUT]], align 4 +; GFX11-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to i64 +; GFX11-NEXT: [[TMP8:%.*]] = bitcast i64 [[TMP7]] to <2 x i32> +; GFX11-NEXT: [[TMP9:%.*]] = extractelement <2 x i32> [[TMP8]], i64 0 +; GFX11-NEXT: [[TMP10:%.*]] = extractelement <2 x i32> [[TMP8]], i64 1 +; GFX11-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP9]], i64 0 +; GFX11-NEXT: [[TMP12:%.*]] = or i32 [[TMP10]], 1048576 +; GFX11-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP12]], i64 1 +; GFX11-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 -1, i64 2 +; GFX11-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> [[TMP14]], i32 536956844, i64 3 +; GFX11-NEXT: [[TMP16:%.*]] = extractelement <4 x i32> [[TMP15]], i64 1 +; GFX11-NEXT: [[TMP17:%.*]] = lshr i32 [[TMP16]], 16 +; GFX11-NEXT: [[TMP18:%.*]] = and i32 [[TMP17]], 16383 +; GFX11-NEXT: [[TMP19:%.*]] = mul i32 24, [[TMP18]] +; GFX11-NEXT: [[TMP20:%.*]] = add i32 0, [[TMP19]] +; GFX11-NEXT: [[TMP21:%.*]] = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> [[TMP15]], i32 [[TMP20]], i32 0), !invariant.load [[META7:![0-9]+]] +; GFX11-NEXT: [[TMP22:%.*]] = bitcast i32 [[TMP21]] to float +; GFX11-NEXT: store float [[TMP22]], ptr [[OUT]], align 4 ; GFX11-NEXT: ret void ; entry: diff --git a/lgc/test/Transforms/ReadFirstLane/PatchReadLane.lgc b/lgc/test/Transforms/ReadFirstLane/PatchReadLane.lgc index 4278762b19..5bd599ad09 100644 --- a/lgc/test/Transforms/ReadFirstLane/PatchReadLane.lgc +++ b/lgc/test/Transforms/ReadFirstLane/PatchReadLane.lgc @@ -14,7 +14,7 @@ define dllexport amdgpu_cs void @icmp_eq_true(i32 inreg %0, i32 inreg %1, <3 x i ; CHECK-LABEL: @icmp_eq_true( ; CHECK-NEXT: .entry: ; CHECK-NEXT: [[LOCALINVOCATIONID_I0:%.*]] = extractelement <3 x i32> [[LOCALINVOCATIONID:%.*]], i32 0 -; CHECK-NEXT: [[SCALAR:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[LOCALINVOCATIONID_I0]], i32 0) +; CHECK-NEXT: [[SCALAR:%.*]] = call i32 @llvm.amdgcn.readlane{{(.i32)?}}(i32 [[LOCALINVOCATIONID_I0]], i32 0) ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[LOCALINVOCATIONID_I0]], [[SCALAR]] ; CHECK-NEXT: br i1 [[CMP]], label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: BB1: @@ -49,7 +49,7 @@ define dllexport amdgpu_cs void @icmp_ne_false(i32 inreg %0, i32 inreg %1, <3 x ; CHECK-LABEL: @icmp_ne_false( ; CHECK-NEXT: .entry: ; CHECK-NEXT: [[LOCALINVOCATIONID_I0:%.*]] = extractelement <3 x i32> [[LOCALINVOCATIONID:%.*]], i32 0 -; CHECK-NEXT: [[SCALAR:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[LOCALINVOCATIONID_I0]], i32 0) +; CHECK-NEXT: [[SCALAR:%.*]] = call i32 @llvm.amdgcn.readlane{{(.i32)?}}(i32 [[LOCALINVOCATIONID_I0]], i32 0) ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[LOCALINVOCATIONID_I0]], [[SCALAR]] ; CHECK-NEXT: br i1 [[CMP]], label [[BB2:%.*]], label [[BB1:%.*]] ; CHECK: BB1: @@ -84,7 +84,7 @@ define dllexport amdgpu_cs void @icmp_eq_false(i32 inreg %0, i32 inreg %1, <3 x ; CHECK-LABEL: @icmp_eq_false( ; CHECK-NEXT: .entry: ; CHECK-NEXT: [[LOCALINVOCATIONID_I0:%.*]] = extractelement <3 x i32> [[LOCALINVOCATIONID:%.*]], i32 0 -; CHECK-NEXT: [[SCALAR:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[LOCALINVOCATIONID_I0]], i32 0) +; CHECK-NEXT: [[SCALAR:%.*]] = call i32 @llvm.amdgcn.readlane{{(.i32)?}}(i32 [[LOCALINVOCATIONID_I0]], i32 0) ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[LOCALINVOCATIONID_I0]], [[SCALAR]] ; CHECK-NEXT: br i1 [[CMP]], label [[BB2:%.*]], label [[BB1:%.*]] ; CHECK: BB1: @@ -119,7 +119,7 @@ define dllexport amdgpu_cs void @icmp_ne_true(i32 inreg %0, i32 inreg %1, <3 x i ; CHECK-LABEL: @icmp_ne_true( ; CHECK-NEXT: .entry: ; CHECK-NEXT: [[LOCALINVOCATIONID_I0:%.*]] = extractelement <3 x i32> [[LOCALINVOCATIONID:%.*]], i32 0 -; CHECK-NEXT: [[SCALAR:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[LOCALINVOCATIONID_I0]], i32 0) +; CHECK-NEXT: [[SCALAR:%.*]] = call i32 @llvm.amdgcn.readlane{{(.i32)?}}(i32 [[LOCALINVOCATIONID_I0]], i32 0) ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[LOCALINVOCATIONID_I0]], [[SCALAR]] ; CHECK-NEXT: br i1 [[CMP]], label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: BB1: diff --git a/lgc/test/Transforms/ReadFirstLane/issue2746.lgc b/lgc/test/Transforms/ReadFirstLane/issue2746.lgc index dfb3de35e2..14c07cd6d3 100644 --- a/lgc/test/Transforms/ReadFirstLane/issue2746.lgc +++ b/lgc/test/Transforms/ReadFirstLane/issue2746.lgc @@ -4,7 +4,7 @@ define i32 @f() { ; CHECK-LABEL: define i32 @f() { ; CHECK-NEXT: [[MBC:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) -; CHECK-NEXT: [[RFL:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[MBC]]) +; CHECK-NEXT: [[RFL:%.*]] = call i32 @llvm.amdgcn.readfirstlane{{(.i32)?}}(i32 [[MBC]]) ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[MBC]], [[RFL]] ; CHECK-NEXT: [[BAL:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[CMP]]) ; CHECK-NEXT: ret i32 [[BAL]] diff --git a/lgc/test/Transforms/ReadFirstLane/simple.lgc b/lgc/test/Transforms/ReadFirstLane/simple.lgc index af7f8e2804..a1669d741a 100644 --- a/lgc/test/Transforms/ReadFirstLane/simple.lgc +++ b/lgc/test/Transforms/ReadFirstLane/simple.lgc @@ -4,7 +4,7 @@ define i32 @simple(i32 %x) { ; CHECK-LABEL: @simple( ; CHECK-NEXT: [[Y:%.*]] = mul i32 [[X:%.*]], 2 -; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[Y]]) +; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.amdgcn.readfirstlane{{(.i32)?}}(i32 [[Y]]) ; CHECK-NEXT: ret i32 [[R]] ; %y = mul i32 %x, 2 @@ -15,9 +15,9 @@ define i32 @simple(i32 %x) { define i32 @simple2(i32 %x) { ; CHECK-LABEL: @simple2( ; CHECK-NEXT: [[A:%.*]] = mul i32 [[X:%.*]], 5 -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[A]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.readfirstlane{{(.i32)?}}(i32 [[A]]) ; CHECK-NEXT: [[B:%.*]] = add i32 [[TMP1]], 2 -; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[B]]) +; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.amdgcn.readfirstlane{{(.i32)?}}(i32 [[B]]) ; CHECK-NEXT: ret i32 [[R]] ; %a = mul i32 %x, 5 @@ -32,8 +32,8 @@ define <2 x i32> @vec2(ptr addrspace(4) %ptr) { ; CHECK-NEXT: [[DESC:%.*]] = load <2 x i32>, ptr addrspace(4) [[PTR_OFS]], align 16 ; CHECK-NEXT: [[DESC_0:%.*]] = extractelement <2 x i32> [[DESC]], i32 0 ; CHECK-NEXT: [[DESC_1:%.*]] = extractelement <2 x i32> [[DESC]], i32 1 -; CHECK-NEXT: [[RF_0:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[DESC_0]]) -; CHECK-NEXT: [[RF_1:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[DESC_1]]) +; CHECK-NEXT: [[RF_0:%.*]] = call i32 @llvm.amdgcn.readfirstlane{{(.i32)?}}(i32 [[DESC_0]]) +; CHECK-NEXT: [[RF_1:%.*]] = call i32 @llvm.amdgcn.readfirstlane{{(.i32)?}}(i32 [[DESC_1]]) ; CHECK-NEXT: [[OUT_0:%.*]] = insertelement <2 x i32> poison, i32 [[RF_0]], i32 0 ; CHECK-NEXT: [[OUT_1:%.*]] = insertelement <2 x i32> [[OUT_0]], i32 [[RF_1]], i32 1 ; CHECK-NEXT: ret <2 x i32> [[OUT_1]] diff --git a/lgc/test/scalarizationOfDescriptorLoadsTest1.lgc b/lgc/test/scalarizationOfDescriptorLoadsTest1.lgc index 186f904c28..4b3903cef3 100644 --- a/lgc/test/scalarizationOfDescriptorLoadsTest1.lgc +++ b/lgc/test/scalarizationOfDescriptorLoadsTest1.lgc @@ -17,20 +17,20 @@ define dllexport spir_func void @lgc.shader.VS.main() local_unnamed_addr #0 !spi ; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[TMP0]], poison ; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr addrspace(4) poison, i64 [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP7]], align 32, !invariant.load [[META16:![0-9]+]] +; CHECK-NEXT: [[TMP8:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP7]], align 4, !invariant.load [[META16:![0-9]+]] ; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.waterfall.begin.i32(i32 0, i32 [[TMP5]]) ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 [[TMP9]], i32 [[TMP5]]) ; CHECK-NEXT: [[TMP11:%.*]] = sext i32 [[TMP10]] to i64 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr addrspace(4) poison, i64 [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP12]], align 32, !invariant.load [[META16]] +; CHECK-NEXT: [[TMP13:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP12]], align 4, !invariant.load [[META16]] ; CHECK-NEXT: [[TMP14:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 0, <8 x i32> [[TMP13]], i32 0, i32 0) ; CHECK-NEXT: [[TMP15:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.waterfall.end.v4f32(i32 [[TMP9]], <4 x float> [[TMP14]]) -; CHECK-NEXT: [[TMP22:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP3]], align 32, !invariant.load [[META16]] +; CHECK-NEXT: [[TMP22:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP3]], align 4, !invariant.load [[META16]] ; CHECK-NEXT: [[TMP16:%.*]] = call i32 @llvm.amdgcn.waterfall.begin.i32(i32 0, i32 [[TMP1]]) ; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 [[TMP16]], i32 [[TMP1]]) ; CHECK-NEXT: [[TMP18:%.*]] = sext i32 [[TMP17]] to i64 ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr addrspace(4) poison, i64 [[TMP18]] -; CHECK-NEXT: [[TMP20:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP19]], align 32, !invariant.load [[META16]] +; CHECK-NEXT: [[TMP20:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP19]], align 4, !invariant.load [[META16]] ; CHECK-NEXT: [[TMP21:%.*]] = call <8 x i32> @llvm.amdgcn.waterfall.last.use.v8i32(i32 [[TMP16]], <8 x i32> [[TMP20]]) ; CHECK-NEXT: call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> [[TMP15]], i32 15, i32 1, <8 x i32> [[TMP21]], i32 0, i32 0) ; CHECK-NEXT: ret void diff --git a/lgc/test/scalarizationOfDescriptorLoadsTest10.lgc b/lgc/test/scalarizationOfDescriptorLoadsTest10.lgc index 8d8609b8ea..acd93c3ad6 100644 --- a/lgc/test/scalarizationOfDescriptorLoadsTest10.lgc +++ b/lgc/test/scalarizationOfDescriptorLoadsTest10.lgc @@ -21,11 +21,13 @@ define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spi ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP4]], i64 0 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to i64 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr addrspace(4) +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr addrspace(4) [[TMP7]], i32 4), "dereferenceable"(ptr addrspace(4) [[TMP7]], i32 -1) ] ; CHECK-NEXT: [[I1:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP7]], i32 0 ; CHECK-NEXT: [[TMP8:%.*]] = call i32 @lgc.load.user.data__i32(i32 0) ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP8]], i64 0 ; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to i64 ; CHECK-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr addrspace(4) +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr addrspace(4) [[TMP11]], i32 4), "dereferenceable"(ptr addrspace(4) [[TMP11]], i32 -1) ] ; CHECK-NEXT: [[I2:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP11]], i32 32 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -44,19 +46,19 @@ define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spi ; CHECK-NEXT: [[I6:%.*]] = mul i32 [[PHI_IND]], [[PHI]] ; CHECK-NEXT: [[I7:%.*]] = sext i32 [[I6]] to i64 ; CHECK-NEXT: [[I8:%.*]] = getelementptr i8, ptr addrspace(4) [[I2]], i64 [[I7]] -; CHECK-NEXT: [[TMP21:%.*]] = load <8 x i32>, ptr addrspace(4) [[I5]], align 32, !invariant.load [[META10:![0-9]+]] -; CHECK-NEXT: [[TMP22:%.*]] = load <4 x i32>, ptr addrspace(4) [[I8]], align 16, !invariant.load [[META10]] +; CHECK-NEXT: [[TMP21:%.*]] = load <8 x i32>, ptr addrspace(4) [[I5]], align 4, !invariant.load [[META10:![0-9]+]] +; CHECK-NEXT: [[TMP22:%.*]] = load <4 x i32>, ptr addrspace(4) [[I8]], align 4, !invariant.load [[META10]] ; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.amdgcn.waterfall.begin.i32(i32 0, i32 [[I3]]) ; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 [[TMP12]], i32 [[I3]]) ; CHECK-NEXT: [[TMP14:%.*]] = sext i32 [[TMP13]] to i64 ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr addrspace(4) [[I1]], i64 [[TMP14]] -; CHECK-NEXT: [[TMP16:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP15]], align 32, !invariant.load [[META10]] +; CHECK-NEXT: [[TMP16:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP15]], align 4, !invariant.load [[META10]] ; CHECK-NEXT: [[TMP17:%.*]] = sext i32 [[TMP13]] to i64 ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr addrspace(4) [[I2]], i64 [[TMP17]] -; CHECK-NEXT: [[TMP19:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP18]], align 16, !invariant.load [[META10]] +; CHECK-NEXT: [[TMP19:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP18]], align 4, !invariant.load [[META10]] ; CHECK-NEXT: [[TMP20:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 0.000000e+00, float 0.000000e+00, <8 x i32> [[TMP16]], <4 x i32> [[TMP19]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[I11:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.waterfall.end.v4f32(i32 [[TMP12]], <4 x float> [[TMP20]]) -; CHECK-NEXT: call void @lgc.output.export.generic.i32.i32.v4f32(i32 0, i32 0, <4 x float> [[I11]]) #[[ATTR5:[0-9]+]] +; CHECK-NEXT: call void @lgc.output.export.generic.i32.i32.v4f32(i32 0, i32 0, <4 x float> [[I11]]) #[[ATTR6:[0-9]+]] ; CHECK-NEXT: [[IND]] = add i32 [[PHI_IND]], 1 ; CHECK-NEXT: [[COND2:%.*]] = icmp ne i32 [[IND]], 1000 ; CHECK-NEXT: br i1 [[COND2]], label [[LOOP]], label [[EXIT:%.*]] @@ -108,10 +110,6 @@ declare ptr addrspace(4) @lgc.create.get.desc.ptr.p4(...) local_unnamed_addr #1 ; Function Attrs: nounwind memory(none) declare i32 @lgc.create.get.desc.stride__i32(...) local_unnamed_addr #1 -declare spir_func void @"spirv.NonUniform.s[s[p4,i32,i32,i32],s[p4,i32,i32]]"({ { ptr addrspace(4), i32, i32, i32 }, { ptr addrspace(4), i32, i32 } }) local_unnamed_addr - -declare spir_func void @"spirv.NonUniform.s[a3v8i32,s[v4i32,i32]]"({ [3 x <8 x i32>], { <4 x i32>, i32 } }) local_unnamed_addr - ; Function Attrs: nounwind willreturn memory(read) declare <4 x float> @lgc.create.image.sample.v4f32(...) local_unnamed_addr #2 diff --git a/lgc/test/scalarizationOfDescriptorLoadsTest11.lgc b/lgc/test/scalarizationOfDescriptorLoadsTest11.lgc index 68797e0c4e..e6fc0b8e0a 100644 --- a/lgc/test/scalarizationOfDescriptorLoadsTest11.lgc +++ b/lgc/test/scalarizationOfDescriptorLoadsTest11.lgc @@ -21,11 +21,13 @@ define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spi ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP4]], i64 0 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to i64 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr addrspace(4) +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr addrspace(4) [[TMP7]], i32 4), "dereferenceable"(ptr addrspace(4) [[TMP7]], i32 -1) ] ; CHECK-NEXT: [[I1:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP7]], i32 0 ; CHECK-NEXT: [[TMP8:%.*]] = call i32 @lgc.load.user.data__i32(i32 0) ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP8]], i64 0 ; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to i64 ; CHECK-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr addrspace(4) +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr addrspace(4) [[TMP11]], i32 4), "dereferenceable"(ptr addrspace(4) [[TMP11]], i32 -1) ] ; CHECK-NEXT: [[I2:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP11]], i32 32 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -44,15 +46,15 @@ define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spi ; CHECK-NEXT: br label [[LOOP_LATCH]] ; CHECK: loop.latch: ; CHECK-NEXT: [[PHI:%.*]] = phi ptr addrspace(4) [ [[I5]], [[BB1]] ], [ [[I8]], [[BB2]] ] -; CHECK-NEXT: [[I10:%.*]] = load <8 x i32>, ptr addrspace(4) [[PHI]], align 32, !invariant.load [[META10:![0-9]+]] -; CHECK-NEXT: [[I9:%.*]] = load <4 x i32>, ptr addrspace(4) [[PHI]], align 16, !invariant.load [[META10]] +; CHECK-NEXT: [[I10:%.*]] = load <8 x i32>, ptr addrspace(4) [[PHI]], align 4, !invariant.load [[META10:![0-9]+]] +; CHECK-NEXT: [[I9:%.*]] = load <4 x i32>, ptr addrspace(4) [[PHI]], align 4, !invariant.load [[META10]] ; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.amdgcn.waterfall.begin.v8i32(i32 0, <8 x i32> [[I10]]) ; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.waterfall.begin.v4i32(i32 [[TMP12]], <4 x i32> [[I9]]) ; CHECK-NEXT: [[TMP14:%.*]] = call <8 x i32> @llvm.amdgcn.waterfall.readfirstlane.v8i32.v8i32(i32 [[TMP13]], <8 x i32> [[I10]]) ; CHECK-NEXT: [[TMP15:%.*]] = call <4 x i32> @llvm.amdgcn.waterfall.readfirstlane.v4i32.v4i32(i32 [[TMP13]], <4 x i32> [[I9]]) ; CHECK-NEXT: [[TMP16:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 0.000000e+00, float 0.000000e+00, <8 x i32> [[TMP14]], <4 x i32> [[TMP15]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[I11:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.waterfall.end.v4f32(i32 [[TMP13]], <4 x float> [[TMP16]]) -; CHECK-NEXT: call void @lgc.output.export.generic.i32.i32.v4f32(i32 0, i32 0, <4 x float> [[I11]]) #[[ATTR5:[0-9]+]] +; CHECK-NEXT: call void @lgc.output.export.generic.i32.i32.v4f32(i32 0, i32 0, <4 x float> [[I11]]) #[[ATTR6:[0-9]+]] ; CHECK-NEXT: [[IND]] = add i32 [[PHI_IND]], 1 ; CHECK-NEXT: [[COND2:%.*]] = icmp ne i32 [[IND]], 1000 ; CHECK-NEXT: br i1 [[COND2]], label [[LOOP]], label [[EXIT:%.*]] @@ -104,10 +106,6 @@ declare ptr addrspace(4) @lgc.create.get.desc.ptr.p4(...) local_unnamed_addr #1 ; Function Attrs: nounwind memory(none) declare i32 @lgc.create.get.desc.stride__i32(...) local_unnamed_addr #1 -declare spir_func void @"spirv.NonUniform.s[s[p4,i32,i32,i32],s[p4,i32,i32]]"({ { ptr addrspace(4), i32, i32, i32 }, { ptr addrspace(4), i32, i32 } }) local_unnamed_addr - -declare spir_func void @"spirv.NonUniform.s[a3v8i32,s[v4i32,i32]]"({ [3 x <8 x i32>], { <4 x i32>, i32 } }) local_unnamed_addr - ; Function Attrs: nounwind willreturn memory(read) declare <4 x float> @lgc.create.image.sample.v4f32(...) local_unnamed_addr #2 diff --git a/lgc/test/scalarizationOfDescriptorLoadsTest12.lgc b/lgc/test/scalarizationOfDescriptorLoadsTest12.lgc index e5909c6acb..300fa99474 100644 --- a/lgc/test/scalarizationOfDescriptorLoadsTest12.lgc +++ b/lgc/test/scalarizationOfDescriptorLoadsTest12.lgc @@ -22,11 +22,13 @@ define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spi ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP4]], i64 0 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to i64 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr addrspace(4) +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr addrspace(4) [[TMP7]], i32 4), "dereferenceable"(ptr addrspace(4) [[TMP7]], i32 -1) ] ; CHECK-NEXT: [[I1:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP7]], i32 0 ; CHECK-NEXT: [[TMP8:%.*]] = call i32 @lgc.load.user.data__i32(i32 0) ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP8]], i64 0 ; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to i64 ; CHECK-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr addrspace(4) +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr addrspace(4) [[TMP11]], i32 4), "dereferenceable"(ptr addrspace(4) [[TMP11]], i32 -1) ] ; CHECK-NEXT: [[I2:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP11]], i32 32 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -38,16 +40,16 @@ define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spi ; CHECK-NEXT: [[I6:%.*]] = mul i32 [[PHI_IND]], 48 ; CHECK-NEXT: [[I7:%.*]] = sext i32 [[I6]] to i64 ; CHECK-NEXT: [[I8:%.*]] = getelementptr i8, ptr addrspace(4) [[I2]], i64 [[I7]] -; CHECK-NEXT: [[TMP21:%.*]] = load <8 x i32>, ptr addrspace(4) [[I5]], align 32, !invariant.load [[META10:![0-9]+]] -; CHECK-NEXT: [[TMP22:%.*]] = load <4 x i32>, ptr addrspace(4) [[I8]], align 16, !invariant.load [[META10]] +; CHECK-NEXT: [[TMP21:%.*]] = load <8 x i32>, ptr addrspace(4) [[I5]], align 4, !invariant.load [[META10:![0-9]+]] +; CHECK-NEXT: [[TMP22:%.*]] = load <4 x i32>, ptr addrspace(4) [[I8]], align 4, !invariant.load [[META10]] ; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.amdgcn.waterfall.begin.i32(i32 0, i32 [[I3]]) ; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 [[TMP12]], i32 [[I3]]) ; CHECK-NEXT: [[TMP14:%.*]] = sext i32 [[TMP13]] to i64 ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr addrspace(4) [[I1]], i64 [[TMP14]] -; CHECK-NEXT: [[TMP16:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP15]], align 32, !invariant.load [[META10]] +; CHECK-NEXT: [[TMP16:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP15]], align 4, !invariant.load [[META10]] ; CHECK-NEXT: [[TMP17:%.*]] = sext i32 [[TMP13]] to i64 ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr addrspace(4) [[I2]], i64 [[TMP17]] -; CHECK-NEXT: [[TMP19:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP18]], align 16, !invariant.load [[META10]] +; CHECK-NEXT: [[TMP19:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP18]], align 4, !invariant.load [[META10]] ; CHECK-NEXT: [[TMP20:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 0.000000e+00, float 0.000000e+00, <8 x i32> [[TMP16]], <4 x i32> [[TMP19]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[I11]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.waterfall.end.v4f32(i32 [[TMP12]], <4 x float> [[TMP20]]) ; CHECK-NEXT: [[I12:%.*]] = fadd <4 x float> [[PHI_IMG]], @@ -55,7 +57,7 @@ define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spi ; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[IND]], 1000 ; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: -; CHECK-NEXT: call void @lgc.output.export.generic.i32.i32.v4f32(i32 0, i32 0, <4 x float> [[I12]]) #[[ATTR5:[0-9]+]] +; CHECK-NEXT: call void @lgc.output.export.generic.i32.i32.v4f32(i32 0, i32 0, <4 x float> [[I12]]) #[[ATTR6:[0-9]+]] ; CHECK-NEXT: ret void ; .entry: @@ -94,10 +96,6 @@ declare ptr addrspace(4) @lgc.create.get.desc.ptr.p4(...) local_unnamed_addr #1 ; Function Attrs: nounwind memory(none) declare i32 @lgc.create.get.desc.stride__i32(...) local_unnamed_addr #1 -declare spir_func void @"spirv.NonUniform.s[s[p4,i32,i32,i32],s[p4,i32,i32]]"({ { ptr addrspace(4), i32, i32, i32 }, { ptr addrspace(4), i32, i32 } }) local_unnamed_addr - -declare spir_func void @"spirv.NonUniform.s[a3v8i32,s[v4i32,i32]]"({ [3 x <8 x i32>], { <4 x i32>, i32 } }) local_unnamed_addr - ; Function Attrs: nounwind willreturn memory(read) declare <4 x float> @lgc.create.image.sample.v4f32(...) local_unnamed_addr #2 diff --git a/lgc/test/scalarizationOfDescriptorLoadsTest13.lgc b/lgc/test/scalarizationOfDescriptorLoadsTest13.lgc index 9cb2e24f8f..40cd05470a 100644 --- a/lgc/test/scalarizationOfDescriptorLoadsTest13.lgc +++ b/lgc/test/scalarizationOfDescriptorLoadsTest13.lgc @@ -22,11 +22,13 @@ define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spi ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP4]], i64 0 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to i64 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr addrspace(4) +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr addrspace(4) [[TMP7]], i32 4), "dereferenceable"(ptr addrspace(4) [[TMP7]], i32 -1) ] ; CHECK-NEXT: [[I1:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP7]], i32 0 ; CHECK-NEXT: [[TMP8:%.*]] = call i32 @lgc.load.user.data__i32(i32 0) ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP8]], i64 0 ; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to i64 ; CHECK-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr addrspace(4) +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr addrspace(4) [[TMP11]], i32 4), "dereferenceable"(ptr addrspace(4) [[TMP11]], i32 -1) ] ; CHECK-NEXT: [[I2:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP11]], i32 32 ; CHECK-NEXT: [[I3:%.*]] = mul i32 [[I]], 48 ; CHECK-NEXT: [[I4:%.*]] = sext i32 [[I3]] to i64 @@ -38,18 +40,18 @@ define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spi ; CHECK-NEXT: [[I6:%.*]] = mul i32 [[PHI_IND]], 48 ; CHECK-NEXT: [[I7:%.*]] = sext i32 [[I6]] to i64 ; CHECK-NEXT: [[I8]] = getelementptr i8, ptr addrspace(4) [[I2]], i64 [[I7]] -; CHECK-NEXT: [[PHI_LOAD:%.*]] = load <8 x i32>, ptr addrspace(4) [[PHI_LOAD1]], align 32, !invariant.load [[META10:![0-9]+]] -; CHECK-NEXT: [[TMP20:%.*]] = load <4 x i32>, ptr addrspace(4) [[I8]], align 16, !invariant.load [[META10]] +; CHECK-NEXT: [[PHI_LOAD:%.*]] = load <8 x i32>, ptr addrspace(4) [[PHI_LOAD1]], align 4, !invariant.load [[META10:![0-9]+]] +; CHECK-NEXT: [[TMP20:%.*]] = load <4 x i32>, ptr addrspace(4) [[I8]], align 4, !invariant.load [[META10]] ; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.amdgcn.waterfall.begin.v8i32(i32 0, <8 x i32> [[PHI_LOAD]]) ; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.waterfall.begin.i32(i32 [[TMP12]], i32 [[I6]]) ; CHECK-NEXT: [[TMP14:%.*]] = call <8 x i32> @llvm.amdgcn.waterfall.readfirstlane.v8i32.v8i32(i32 [[TMP13]], <8 x i32> [[PHI_LOAD]]) ; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 [[TMP13]], i32 [[I6]]) ; CHECK-NEXT: [[TMP16:%.*]] = sext i32 [[TMP15]] to i64 ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr addrspace(4) [[I2]], i64 [[TMP16]] -; CHECK-NEXT: [[TMP18:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP17]], align 16, !invariant.load [[META10]] +; CHECK-NEXT: [[TMP18:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP17]], align 4, !invariant.load [[META10]] ; CHECK-NEXT: [[TMP19:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 0.000000e+00, float 0.000000e+00, <8 x i32> [[TMP14]], <4 x i32> [[TMP18]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[I11:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.waterfall.end.v4f32(i32 [[TMP13]], <4 x float> [[TMP19]]) -; CHECK-NEXT: call void @lgc.output.export.generic.i32.i32.v4f32(i32 0, i32 0, <4 x float> [[I11]]) #[[ATTR5:[0-9]+]] +; CHECK-NEXT: call void @lgc.output.export.generic.i32.i32.v4f32(i32 0, i32 0, <4 x float> [[I11]]) #[[ATTR6:[0-9]+]] ; CHECK-NEXT: [[IND]] = add i32 [[PHI_IND]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[IND]], 1000 ; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]] @@ -91,10 +93,6 @@ declare ptr addrspace(4) @lgc.create.get.desc.ptr.p4(...) local_unnamed_addr #1 ; Function Attrs: nounwind memory(none) declare i32 @lgc.create.get.desc.stride__i32(...) local_unnamed_addr #1 -declare spir_func void @"spirv.NonUniform.s[s[p4,i32,i32,i32],s[p4,i32,i32]]"({ { ptr addrspace(4), i32, i32, i32 }, { ptr addrspace(4), i32, i32 } }) local_unnamed_addr - -declare spir_func void @"spirv.NonUniform.s[a3v8i32,s[v4i32,i32]]"({ [3 x <8 x i32>], { <4 x i32>, i32 } }) local_unnamed_addr - ; Function Attrs: nounwind willreturn memory(read) declare <4 x float> @lgc.create.image.sample.v4f32(...) local_unnamed_addr #2 diff --git a/lgc/test/scalarizationOfDescriptorLoadsTest14.lgc b/lgc/test/scalarizationOfDescriptorLoadsTest14.lgc index 34edce02c5..77614a9a16 100644 --- a/lgc/test/scalarizationOfDescriptorLoadsTest14.lgc +++ b/lgc/test/scalarizationOfDescriptorLoadsTest14.lgc @@ -23,11 +23,13 @@ define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spi ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP4]], i64 0 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to i64 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr addrspace(4) +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr addrspace(4) [[TMP7]], i32 4), "dereferenceable"(ptr addrspace(4) [[TMP7]], i32 -1) ] ; CHECK-NEXT: [[I1:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP7]], i32 0 ; CHECK-NEXT: [[TMP8:%.*]] = call i32 @lgc.load.user.data__i32(i32 0) ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP8]], i64 0 ; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to i64 ; CHECK-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr addrspace(4) +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr addrspace(4) [[TMP11]], i32 4), "dereferenceable"(ptr addrspace(4) [[TMP11]], i32 -1) ] ; CHECK-NEXT: [[I2:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP11]], i32 32 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -39,16 +41,16 @@ define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spi ; CHECK-NEXT: [[I6:%.*]] = mul i32 [[PHI_IND]], 48 ; CHECK-NEXT: [[I7:%.*]] = sext i32 [[I6]] to i64 ; CHECK-NEXT: [[I8:%.*]] = getelementptr i8, ptr addrspace(4) [[I2]], i64 [[I7]] -; CHECK-NEXT: [[TMP25:%.*]] = load <8 x i32>, ptr addrspace(4) [[I5]], align 32, !invariant.load [[META10:![0-9]+]] -; CHECK-NEXT: [[TMP26:%.*]] = load <4 x i32>, ptr addrspace(4) [[I8]], align 16, !invariant.load [[META10]] +; CHECK-NEXT: [[TMP25:%.*]] = load <8 x i32>, ptr addrspace(4) [[I5]], align 4, !invariant.load [[META10:![0-9]+]] +; CHECK-NEXT: [[TMP26:%.*]] = load <4 x i32>, ptr addrspace(4) [[I8]], align 4, !invariant.load [[META10]] ; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.amdgcn.waterfall.begin.i32(i32 0, i32 [[I3]]) ; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 [[TMP12]], i32 [[I3]]) ; CHECK-NEXT: [[TMP14:%.*]] = sext i32 [[TMP13]] to i64 ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr addrspace(4) [[I1]], i64 [[TMP14]] -; CHECK-NEXT: [[TMP16:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP15]], align 32, !invariant.load [[META10]] +; CHECK-NEXT: [[TMP16:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP15]], align 4, !invariant.load [[META10]] ; CHECK-NEXT: [[TMP17:%.*]] = sext i32 [[TMP13]] to i64 ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr addrspace(4) [[I2]], i64 [[TMP17]] -; CHECK-NEXT: [[TMP19:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP18]], align 16, !invariant.load [[META10]] +; CHECK-NEXT: [[TMP19:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP18]], align 4, !invariant.load [[META10]] ; CHECK-NEXT: [[TMP20:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 0.000000e+00, float 0.000000e+00, <8 x i32> [[TMP16]], <4 x i32> [[TMP19]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[I11]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.waterfall.end.v4f32(i32 [[TMP12]], <4 x float> [[TMP20]]) ; CHECK-NEXT: [[I12:%.*]] = fadd <4 x float> [[PHI_IMG]], @@ -56,12 +58,12 @@ define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spi ; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[IND]], 1000 ; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: -; CHECK-NEXT: [[TMP27:%.*]] = load <8 x i32>, ptr addrspace(4) [[I8]], align 32, !invariant.load [[META10]] +; CHECK-NEXT: [[TMP27:%.*]] = load <8 x i32>, ptr addrspace(4) [[I8]], align 4, !invariant.load [[META10]] ; CHECK-NEXT: [[TMP21:%.*]] = call i32 @llvm.amdgcn.waterfall.begin.i32(i32 0, i32 [[I6]]) ; CHECK-NEXT: [[TMP22:%.*]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 [[TMP21]], i32 [[I6]]) ; CHECK-NEXT: [[TMP23:%.*]] = sext i32 [[TMP22]] to i64 ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr addrspace(4) [[I2]], i64 [[TMP23]] -; CHECK-NEXT: [[TMP28:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP24]], align 32, !invariant.load [[META10]] +; CHECK-NEXT: [[TMP28:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP24]], align 4, !invariant.load [[META10]] ; CHECK-NEXT: [[TMP29:%.*]] = call <8 x i32> @llvm.amdgcn.waterfall.last.use.v8i32(i32 [[TMP21]], <8 x i32> [[TMP28]]) ; CHECK-NEXT: call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> [[I12]], i32 15, i32 1, <8 x i32> [[TMP29]], i32 0, i32 0) ; CHECK-NEXT: ret void @@ -102,10 +104,6 @@ declare ptr addrspace(4) @lgc.create.get.desc.ptr.p4(...) local_unnamed_addr #1 ; Function Attrs: nounwind memory(none) declare i32 @lgc.create.get.desc.stride__i32(...) local_unnamed_addr #1 -declare spir_func void @"spirv.NonUniform.s[s[p4,i32,i32,i32],s[p4,i32,i32]]"({ { ptr addrspace(4), i32, i32, i32 }, { ptr addrspace(4), i32, i32 } }) local_unnamed_addr - -declare spir_func void @"spirv.NonUniform.s[a3v8i32,s[v4i32,i32]]"({ [3 x <8 x i32>], { <4 x i32>, i32 } }) local_unnamed_addr - ; Function Attrs: nounwind willreturn memory(read) declare <4 x float> @lgc.create.image.sample.v4f32(...) local_unnamed_addr #2 diff --git a/lgc/test/scalarizationOfDescriptorLoadsTest15.lgc b/lgc/test/scalarizationOfDescriptorLoadsTest15.lgc index ab426db63d..536bfbdf55 100644 --- a/lgc/test/scalarizationOfDescriptorLoadsTest15.lgc +++ b/lgc/test/scalarizationOfDescriptorLoadsTest15.lgc @@ -22,11 +22,13 @@ define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spi ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP4]], i64 0 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to i64 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr addrspace(4) +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr addrspace(4) [[TMP7]], i32 4), "dereferenceable"(ptr addrspace(4) [[TMP7]], i32 -1) ] ; CHECK-NEXT: [[I1:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP7]], i32 0 ; CHECK-NEXT: [[TMP8:%.*]] = call i32 @lgc.load.user.data__i32(i32 0) ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP8]], i64 0 ; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to i64 ; CHECK-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr addrspace(4) +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr addrspace(4) [[TMP11]], i32 4), "dereferenceable"(ptr addrspace(4) [[TMP11]], i32 -1) ] ; CHECK-NEXT: [[I2:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP11]], i32 32 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -38,25 +40,25 @@ define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spi ; CHECK-NEXT: [[I6:%.*]] = mul i32 [[PHI_IND]], 48 ; CHECK-NEXT: [[I7:%.*]] = sext i32 [[I6]] to i64 ; CHECK-NEXT: [[I8:%.*]] = getelementptr i8, ptr addrspace(4) [[I2]], i64 [[I7]] -; CHECK-NEXT: [[TMP25:%.*]] = load <8 x i32>, ptr addrspace(4) [[I5]], align 32, !invariant.load [[META10:![0-9]+]] -; CHECK-NEXT: [[TMP26:%.*]] = load <4 x i32>, ptr addrspace(4) [[I8]], align 16, !invariant.load [[META10]] +; CHECK-NEXT: [[TMP25:%.*]] = load <8 x i32>, ptr addrspace(4) [[I5]], align 4, !invariant.load [[META10:![0-9]+]] +; CHECK-NEXT: [[TMP26:%.*]] = load <4 x i32>, ptr addrspace(4) [[I8]], align 4, !invariant.load [[META10]] ; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.amdgcn.waterfall.begin.i32(i32 0, i32 [[I3]]) ; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 [[TMP12]], i32 [[I3]]) ; CHECK-NEXT: [[TMP14:%.*]] = sext i32 [[TMP13]] to i64 ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr addrspace(4) [[I1]], i64 [[TMP14]] -; CHECK-NEXT: [[TMP16:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP15]], align 32, !invariant.load [[META10]] +; CHECK-NEXT: [[TMP16:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP15]], align 4, !invariant.load [[META10]] ; CHECK-NEXT: [[TMP17:%.*]] = sext i32 [[TMP13]] to i64 ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr addrspace(4) [[I2]], i64 [[TMP17]] -; CHECK-NEXT: [[TMP19:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP18]], align 16, !invariant.load [[META10]] +; CHECK-NEXT: [[TMP19:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP18]], align 4, !invariant.load [[META10]] ; CHECK-NEXT: [[TMP20:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 0.000000e+00, float 0.000000e+00, <8 x i32> [[TMP16]], <4 x i32> [[TMP19]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[I11]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.waterfall.end.v4f32(i32 [[TMP12]], <4 x float> [[TMP20]]) ; CHECK-NEXT: [[I12:%.*]] = fadd <4 x float> [[PHI_IMG]], -; CHECK-NEXT: [[TMP27:%.*]] = load <8 x i32>, ptr addrspace(4) [[I8]], align 32, !invariant.load [[META10]] +; CHECK-NEXT: [[TMP27:%.*]] = load <8 x i32>, ptr addrspace(4) [[I8]], align 4, !invariant.load [[META10]] ; CHECK-NEXT: [[TMP21:%.*]] = call i32 @llvm.amdgcn.waterfall.begin.i32(i32 0, i32 [[I6]]) ; CHECK-NEXT: [[TMP22:%.*]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 [[TMP21]], i32 [[I6]]) ; CHECK-NEXT: [[TMP23:%.*]] = sext i32 [[TMP22]] to i64 ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr addrspace(4) [[I2]], i64 [[TMP23]] -; CHECK-NEXT: [[TMP28:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP24]], align 32, !invariant.load [[META10]] +; CHECK-NEXT: [[TMP28:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP24]], align 4, !invariant.load [[META10]] ; CHECK-NEXT: [[TMP29:%.*]] = call <8 x i32> @llvm.amdgcn.waterfall.last.use.v8i32(i32 [[TMP21]], <8 x i32> [[TMP28]]) ; CHECK-NEXT: call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> [[I12]], i32 15, i32 1, <8 x i32> [[TMP29]], i32 0, i32 0) ; CHECK-NEXT: [[IND]] = add i32 [[PHI_IND]], 1 @@ -101,10 +103,6 @@ declare ptr addrspace(4) @lgc.create.get.desc.ptr.p4(...) local_unnamed_addr #1 ; Function Attrs: nounwind memory(none) declare i32 @lgc.create.get.desc.stride__i32(...) local_unnamed_addr #1 -declare spir_func void @"spirv.NonUniform.s[s[p4,i32,i32,i32],s[p4,i32,i32]]"({ { ptr addrspace(4), i32, i32, i32 }, { ptr addrspace(4), i32, i32 } }) local_unnamed_addr - -declare spir_func void @"spirv.NonUniform.s[a3v8i32,s[v4i32,i32]]"({ [3 x <8 x i32>], { <4 x i32>, i32 } }) local_unnamed_addr - ; Function Attrs: nounwind willreturn memory(read) declare <4 x float> @lgc.create.image.sample.v4f32(...) local_unnamed_addr #2 diff --git a/lgc/test/scalarizationOfDescriptorLoadsTest16.lgc b/lgc/test/scalarizationOfDescriptorLoadsTest16.lgc index 39f2ed7d68..959f5fe58f 100644 --- a/lgc/test/scalarizationOfDescriptorLoadsTest16.lgc +++ b/lgc/test/scalarizationOfDescriptorLoadsTest16.lgc @@ -22,11 +22,13 @@ define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spi ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP4]], i64 0 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to i64 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr addrspace(4) +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr addrspace(4) [[TMP7]], i32 4), "dereferenceable"(ptr addrspace(4) [[TMP7]], i32 -1) ] ; CHECK-NEXT: [[I1:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP7]], i32 0 ; CHECK-NEXT: [[TMP8:%.*]] = call i32 @lgc.load.user.data__i32(i32 0) ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP8]], i64 0 ; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to i64 ; CHECK-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr addrspace(4) +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr addrspace(4) [[TMP11]], i32 4), "dereferenceable"(ptr addrspace(4) [[TMP11]], i32 -1) ] ; CHECK-NEXT: [[I2:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP11]], i32 32 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -39,12 +41,12 @@ define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spi ; CHECK-NEXT: [[I8:%.*]] = getelementptr i8, ptr addrspace(4) [[I2]], i64 [[I7]] ; CHECK-NEXT: [[I10:%.*]] = load <4 x i32>, ptr addrspace(4) [[I5]], align 32, !invariant.load [[META10:![0-9]+]] ; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i32> [[I10]] to <4 x float> -; CHECK-NEXT: [[TMP17:%.*]] = load <8 x i32>, ptr addrspace(4) [[I8]], align 32, !invariant.load [[META10]] +; CHECK-NEXT: [[TMP17:%.*]] = load <8 x i32>, ptr addrspace(4) [[I8]], align 4, !invariant.load [[META10]] ; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.waterfall.begin.i32(i32 0, i32 [[I6]]) ; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 [[TMP13]], i32 [[I6]]) ; CHECK-NEXT: [[TMP15:%.*]] = sext i32 [[TMP14]] to i64 ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr addrspace(4) [[I2]], i64 [[TMP15]] -; CHECK-NEXT: [[TMP18:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP16]], align 32, !invariant.load [[META10]] +; CHECK-NEXT: [[TMP18:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP16]], align 4, !invariant.load [[META10]] ; CHECK-NEXT: [[TMP19:%.*]] = call <8 x i32> @llvm.amdgcn.waterfall.last.use.v8i32(i32 [[TMP13]], <8 x i32> [[TMP18]]) ; CHECK-NEXT: call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> [[TMP12]], i32 15, i32 1, <8 x i32> [[TMP19]], i32 0, i32 0) ; CHECK-NEXT: [[IND]] = add i32 [[PHI_IND]], 1 @@ -87,10 +89,6 @@ declare ptr addrspace(4) @lgc.create.get.desc.ptr.p4(...) local_unnamed_addr #1 ; Function Attrs: nounwind memory(none) declare i32 @lgc.create.get.desc.stride__i32(...) local_unnamed_addr #1 -declare spir_func void @"spirv.NonUniform.s[s[p4,i32,i32,i32],s[p4,i32,i32]]"({ { ptr addrspace(4), i32, i32, i32 }, { ptr addrspace(4), i32, i32 } }) local_unnamed_addr - -declare spir_func void @"spirv.NonUniform.s[a3v8i32,s[v4i32,i32]]"({ [3 x <8 x i32>], { <4 x i32>, i32 } }) local_unnamed_addr - ; Function Attrs: nounwind willreturn memory(read) declare <4 x float> @lgc.create.image.sample.v4f32(...) local_unnamed_addr #2 diff --git a/lgc/test/scalarizationOfDescriptorLoadsTest2.lgc b/lgc/test/scalarizationOfDescriptorLoadsTest2.lgc index c3ed6f1215..21c05a91d3 100644 --- a/lgc/test/scalarizationOfDescriptorLoadsTest2.lgc +++ b/lgc/test/scalarizationOfDescriptorLoadsTest2.lgc @@ -17,12 +17,12 @@ define dllexport spir_func void @lgc.shader.VS.main() local_unnamed_addr #0 !spi ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(4) poison, i64 [[TMP2]] ; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP5]], align 16, !invariant.load [[META16:![0-9]+]] ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <4 x float> -; CHECK-NEXT: [[TMP14:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP3]], align 32, !invariant.load [[META16]] +; CHECK-NEXT: [[TMP14:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP3]], align 4, !invariant.load [[META16]] ; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.waterfall.begin.i32(i32 0, i32 [[TMP1]]) ; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 [[TMP8]], i32 [[TMP1]]) ; CHECK-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr addrspace(4) poison, i64 [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP11]], align 32, !invariant.load [[META16]] +; CHECK-NEXT: [[TMP12:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP11]], align 4, !invariant.load [[META16]] ; CHECK-NEXT: [[TMP13:%.*]] = call <8 x i32> @llvm.amdgcn.waterfall.last.use.v8i32(i32 [[TMP8]], <8 x i32> [[TMP12]]) ; CHECK-NEXT: call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> [[TMP7]], i32 15, i32 1, <8 x i32> [[TMP13]], i32 0, i32 0) ; CHECK-NEXT: ret void diff --git a/lgc/test/scalarizationOfDescriptorLoadsTest3.lgc b/lgc/test/scalarizationOfDescriptorLoadsTest3.lgc index 4616e701a2..748eb7bb27 100644 --- a/lgc/test/scalarizationOfDescriptorLoadsTest3.lgc +++ b/lgc/test/scalarizationOfDescriptorLoadsTest3.lgc @@ -18,12 +18,12 @@ define dllexport spir_func void @lgc.shader.VS.main() local_unnamed_addr #0 !spi ; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[TMP0]], poison ; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr addrspace(4) poison, i64 [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP7]], align 32, !invariant.load [[META16]] +; CHECK-NEXT: [[TMP8:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP7]], align 4, !invariant.load [[META16]] ; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.waterfall.begin.i32(i32 0, i32 [[TMP5]]) ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 [[TMP9]], i32 [[TMP5]]) ; CHECK-NEXT: [[TMP11:%.*]] = sext i32 [[TMP10]] to i64 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr addrspace(4) poison, i64 [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP12]], align 32, !invariant.load [[META16]] +; CHECK-NEXT: [[TMP13:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP12]], align 4, !invariant.load [[META16]] ; CHECK-NEXT: [[TMP14:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 0.000000e+00, float 0.000000e+00, <8 x i32> [[TMP13]], <4 x i32> , i1 false, i32 0, i32 0) ; CHECK-NEXT: [[TMP15:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.waterfall.end.v4f32(i32 [[TMP9]], <4 x float> [[TMP14]]) ; CHECK-NEXT: ret void diff --git a/lgc/test/scalarizationOfDescriptorLoadsTest4.lgc b/lgc/test/scalarizationOfDescriptorLoadsTest4.lgc index 1fbc044bd6..29fe33a9e3 100644 --- a/lgc/test/scalarizationOfDescriptorLoadsTest4.lgc +++ b/lgc/test/scalarizationOfDescriptorLoadsTest4.lgc @@ -21,14 +21,14 @@ define dllexport spir_func void @lgc.shader.VS.main() local_unnamed_addr #0 !spi ; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr addrspace(4) poison, i64 [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = call ptr addrspace(4) @foo1(i32 [[TMP0]]) -; CHECK-NEXT: [[TMP19:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP7]], align 32, !invariant.load [[META16]] -; CHECK-NEXT: [[TMP9:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP8]], align 16, !invariant.load [[META16]] +; CHECK-NEXT: [[TMP19:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP7]], align 4, !invariant.load [[META16]] +; CHECK-NEXT: [[TMP9:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP8]], align 4, !invariant.load [[META16]] ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.waterfall.begin.i32(i32 0, i32 [[TMP5]]) ; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.waterfall.begin.v4i32(i32 [[TMP10]], <4 x i32> [[TMP9]]) ; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 [[TMP11]], i32 [[TMP5]]) ; CHECK-NEXT: [[TMP13:%.*]] = sext i32 [[TMP12]] to i64 ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr addrspace(4) poison, i64 [[TMP13]] -; CHECK-NEXT: [[TMP15:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP14]], align 32, !invariant.load [[META16]] +; CHECK-NEXT: [[TMP15:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP14]], align 4, !invariant.load [[META16]] ; CHECK-NEXT: [[TMP16:%.*]] = call <4 x i32> @llvm.amdgcn.waterfall.readfirstlane.v4i32.v4i32(i32 [[TMP11]], <4 x i32> [[TMP9]]) ; CHECK-NEXT: [[TMP17:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 0.000000e+00, float 0.000000e+00, <8 x i32> [[TMP15]], <4 x i32> [[TMP16]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[TMP18:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.waterfall.end.v4f32(i32 [[TMP11]], <4 x float> [[TMP17]]) diff --git a/lgc/test/scalarizationOfDescriptorLoadsTest5.lgc b/lgc/test/scalarizationOfDescriptorLoadsTest5.lgc index 085be16a66..845b764733 100644 --- a/lgc/test/scalarizationOfDescriptorLoadsTest5.lgc +++ b/lgc/test/scalarizationOfDescriptorLoadsTest5.lgc @@ -20,20 +20,20 @@ define dllexport spir_func void @lgc.shader.VS.main() local_unnamed_addr #0 !spi ; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[TMP0]], poison ; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr addrspace(4) poison, i64 [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP7]], align 32, !invariant.load [[META16:![0-9]+]] +; CHECK-NEXT: [[TMP8:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP7]], align 4, !invariant.load [[META16:![0-9]+]] ; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.waterfall.begin.i32(i32 0, i32 [[TMP5]]) ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 [[TMP9]], i32 [[TMP5]]) ; CHECK-NEXT: [[TMP11:%.*]] = sext i32 [[TMP10]] to i64 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr addrspace(4) poison, i64 [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP12]], align 32, !invariant.load [[META16]] +; CHECK-NEXT: [[TMP13:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP12]], align 4, !invariant.load [[META16]] ; CHECK-NEXT: [[TMP14:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 0, <8 x i32> [[TMP13]], i32 0, i32 0) ; CHECK-NEXT: [[TMP15:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.waterfall.end.v4f32(i32 [[TMP9]], <4 x float> [[TMP14]]) -; CHECK-NEXT: [[TMP22:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP3]], align 32, !invariant.load [[META16]] +; CHECK-NEXT: [[TMP22:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP3]], align 4, !invariant.load [[META16]] ; CHECK-NEXT: [[TMP16:%.*]] = call i32 @llvm.amdgcn.waterfall.begin.i32(i32 0, i32 [[TMP1]]) ; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 [[TMP16]], i32 [[TMP1]]) ; CHECK-NEXT: [[TMP18:%.*]] = sext i32 [[TMP17]] to i64 ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr addrspace(4) poison, i64 [[TMP18]] -; CHECK-NEXT: [[TMP20:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP19]], align 32, !invariant.load [[META16]] +; CHECK-NEXT: [[TMP20:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP19]], align 4, !invariant.load [[META16]] ; CHECK-NEXT: [[TMP21:%.*]] = call <8 x i32> @llvm.amdgcn.waterfall.last.use.v8i32(i32 [[TMP16]], <8 x i32> [[TMP20]]) ; CHECK-NEXT: call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> [[TMP15]], i32 15, i32 1, <8 x i32> [[TMP21]], i32 0, i32 0) ; CHECK-NEXT: br label [[RET]] diff --git a/lgc/test/scalarizationOfDescriptorLoadsTest6.lgc b/lgc/test/scalarizationOfDescriptorLoadsTest6.lgc index 42398f1ce4..c934bb95e1 100644 --- a/lgc/test/scalarizationOfDescriptorLoadsTest6.lgc +++ b/lgc/test/scalarizationOfDescriptorLoadsTest6.lgc @@ -34,14 +34,16 @@ define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spi ; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[TMP14]], i64 0 ; CHECK-NEXT: [[TMP16:%.*]] = bitcast <2 x i32> [[TMP15]] to i64 ; CHECK-NEXT: [[TMP17:%.*]] = inttoptr i64 [[TMP16]] to ptr addrspace(4) +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr addrspace(4) [[TMP17]], i32 4), "dereferenceable"(ptr addrspace(4) [[TMP17]], i32 -1) ] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP17]], i32 0 -; CHECK-NEXT: [[TMP19:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP18]], align 32, !invariant.load [[META24:![0-9]+]] +; CHECK-NEXT: [[TMP19:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP18]], align 4, !invariant.load [[META24:![0-9]+]] ; CHECK-NEXT: [[TMP20:%.*]] = call <4 x i32> @llvm.amdgcn.image.load.1d.v4i32.i32(i32 15, i32 [[DOT0]], <8 x i32> [[TMP19]], i32 0, i32 0), !invariant.load [[META24]] ; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i32> [[TMP20]], i64 0 ; CHECK-NEXT: [[TMP22:%.*]] = call i32 @lgc.load.user.data__i32(i32 36) ; CHECK-NEXT: [[TMP23:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP22]], i64 0 ; CHECK-NEXT: [[TMP24:%.*]] = bitcast <2 x i32> [[TMP23]] to i64 ; CHECK-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP24]] to ptr addrspace(4) +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr addrspace(4) [[TMP25]], i32 4), "dereferenceable"(ptr addrspace(4) [[TMP25]], i32 -1) ] ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP25]], i32 16 ; CHECK-NEXT: [[TMP27:%.*]] = mul i32 [[TMP21]], 32 ; CHECK-NEXT: [[TMP28:%.*]] = sext i32 [[TMP27]] to i64 @@ -50,35 +52,36 @@ define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spi ; CHECK-NEXT: [[TMP32:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP31]], i64 0 ; CHECK-NEXT: [[TMP33:%.*]] = bitcast <2 x i32> [[TMP32]] to i64 ; CHECK-NEXT: [[TMP34:%.*]] = inttoptr i64 [[TMP33]] to ptr addrspace(4) +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr addrspace(4) [[TMP34]], i32 4), "dereferenceable"(ptr addrspace(4) [[TMP34]], i32 -1) ] ; CHECK-NEXT: [[TMP35:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP34]], i32 0 -; CHECK-NEXT: [[TMP52:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP29]], align 32, !invariant.load [[META24]] -; CHECK-NEXT: [[TMP36:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP35]], align 16, !invariant.load [[META24]] +; CHECK-NEXT: [[TMP52:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP29]], align 4, !invariant.load [[META24]] +; CHECK-NEXT: [[TMP36:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP35]], align 4, !invariant.load [[META24]] ; CHECK-NEXT: [[TMP37:%.*]] = call i32 @llvm.amdgcn.waterfall.begin.i32(i32 0, i32 [[TMP27]]) ; CHECK-NEXT: [[TMP38:%.*]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 [[TMP37]], i32 [[TMP27]]) ; CHECK-NEXT: [[TMP39:%.*]] = sext i32 [[TMP38]] to i64 ; CHECK-NEXT: [[TMP40:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP26]], i64 [[TMP39]] -; CHECK-NEXT: [[TMP41:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP40]], align 32, !invariant.load [[META24]] +; CHECK-NEXT: [[TMP41:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP40]], align 4, !invariant.load [[META24]] ; CHECK-NEXT: [[TMP42:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 0.000000e+00, float 0.000000e+00, <8 x i32> [[TMP41]], <4 x i32> [[TMP36]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[TMP43:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.waterfall.end.v4f32(i32 [[TMP37]], <4 x float> [[TMP42]]) ; CHECK-NEXT: [[TMP44:%.*]] = mul i32 [[TMP7]], 32 ; CHECK-NEXT: [[TMP45:%.*]] = sext i32 [[TMP44]] to i64 ; CHECK-NEXT: [[TMP46:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP26]], i64 [[TMP45]] -; CHECK-NEXT: [[TMP47:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP46]], align 32, !invariant.load [[META24]] -; CHECK-NEXT: [[TMP59:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP35]], align 16, !invariant.load [[META24]] +; CHECK-NEXT: [[TMP47:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP46]], align 4, !invariant.load [[META24]] +; CHECK-NEXT: [[TMP59:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP35]], align 4, !invariant.load [[META24]] ; CHECK-NEXT: [[TMP48:%.*]] = call i32 @llvm.amdgcn.waterfall.begin.i32(i32 0, i32 [[TMP44]]) ; CHECK-NEXT: [[TMP49:%.*]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 [[TMP48]], i32 [[TMP44]]) ; CHECK-NEXT: [[TMP50:%.*]] = sext i32 [[TMP49]] to i64 ; CHECK-NEXT: [[TMP51:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP26]], i64 [[TMP50]] -; CHECK-NEXT: [[TMP67:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP51]], align 32, !invariant.load [[META24]] +; CHECK-NEXT: [[TMP67:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP51]], align 4, !invariant.load [[META24]] ; CHECK-NEXT: [[TMP53:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 0.000000e+00, float 0.000000e+00, <8 x i32> [[TMP67]], <4 x i32> [[TMP59]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[TMP54:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.waterfall.end.v4f32(i32 [[TMP48]], <4 x float> [[TMP53]]) -; CHECK-NEXT: [[TMP68:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP29]], align 32, !invariant.load [[META24]] -; CHECK-NEXT: [[TMP69:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP35]], align 16, !invariant.load [[META24]] +; CHECK-NEXT: [[TMP68:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP29]], align 4, !invariant.load [[META24]] +; CHECK-NEXT: [[TMP69:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP35]], align 4, !invariant.load [[META24]] ; CHECK-NEXT: [[TMP55:%.*]] = call i32 @llvm.amdgcn.waterfall.begin.i32(i32 0, i32 [[TMP27]]) ; CHECK-NEXT: [[TMP56:%.*]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 [[TMP55]], i32 [[TMP27]]) ; CHECK-NEXT: [[TMP57:%.*]] = sext i32 [[TMP56]] to i64 ; CHECK-NEXT: [[TMP58:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP26]], i64 [[TMP57]] -; CHECK-NEXT: [[TMP70:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP58]], align 32, !invariant.load [[META24]] +; CHECK-NEXT: [[TMP70:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP58]], align 4, !invariant.load [[META24]] ; CHECK-NEXT: [[TMP60:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 0.000000e+00, float 0.000000e+00, <8 x i32> [[TMP70]], <4 x i32> [[TMP69]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[TMP61:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.waterfall.end.v4f32(i32 [[TMP55]], <4 x float> [[TMP60]]) ; CHECK-NEXT: [[TMP62]] = fadd reassoc nnan nsz arcp contract afn <4 x float> [[DOT09]], [[TMP61]] diff --git a/lgc/test/scalarizationOfDescriptorLoadsTest7.lgc b/lgc/test/scalarizationOfDescriptorLoadsTest7.lgc index 4b1edb4ae3..37f97ccd90 100644 --- a/lgc/test/scalarizationOfDescriptorLoadsTest7.lgc +++ b/lgc/test/scalarizationOfDescriptorLoadsTest7.lgc @@ -21,11 +21,13 @@ define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spi ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP4]], i64 0 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to i64 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr addrspace(4) +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr addrspace(4) [[TMP7]], i32 4), "dereferenceable"(ptr addrspace(4) [[TMP7]], i32 -1) ] ; CHECK-NEXT: [[I1:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP7]], i32 0 ; CHECK-NEXT: [[TMP8:%.*]] = call i32 @lgc.load.user.data__i32(i32 0) ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP8]], i64 0 ; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to i64 ; CHECK-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr addrspace(4) +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr addrspace(4) [[TMP11]], i32 4), "dereferenceable"(ptr addrspace(4) [[TMP11]], i32 -1) ] ; CHECK-NEXT: [[I3:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP11]], i32 32 ; CHECK-NEXT: br label [[BB1:%.*]] ; CHECK: bb1: @@ -39,16 +41,16 @@ define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spi ; CHECK-NEXT: [[I10:%.*]] = getelementptr i8, ptr addrspace(4) [[I3]], i64 [[I9]] ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: bb3: -; CHECK-NEXT: [[TMP14:%.*]] = load <8 x i32>, ptr addrspace(4) [[I7]], align 32, !invariant.load [[META10:![0-9]+]] -; CHECK-NEXT: [[TMP19:%.*]] = load <4 x i32>, ptr addrspace(4) [[I3]], align 16, !invariant.load [[META10]] +; CHECK-NEXT: [[TMP14:%.*]] = load <8 x i32>, ptr addrspace(4) [[I7]], align 4, !invariant.load [[META10:![0-9]+]] +; CHECK-NEXT: [[TMP19:%.*]] = load <4 x i32>, ptr addrspace(4) [[I3]], align 4, !invariant.load [[META10]] ; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.amdgcn.waterfall.begin.i32(i32 0, i32 [[I5]]) ; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 [[TMP12]], i32 [[I5]]) ; CHECK-NEXT: [[TMP17:%.*]] = sext i32 [[TMP13]] to i64 ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr addrspace(4) [[I1]], i64 [[TMP17]] -; CHECK-NEXT: [[TMP16:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP18]], align 32, !invariant.load [[META10]] +; CHECK-NEXT: [[TMP16:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP18]], align 4, !invariant.load [[META10]] ; CHECK-NEXT: [[TMP20:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 0.000000e+00, float 0.000000e+00, <8 x i32> [[TMP16]], <4 x i32> [[TMP19]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[I13:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.waterfall.end.v4f32(i32 [[TMP12]], <4 x float> [[TMP20]]) -; CHECK-NEXT: call void @lgc.output.export.generic.i32.i32.v4f32(i32 0, i32 0, <4 x float> [[I13]]) #[[ATTR5:[0-9]+]] +; CHECK-NEXT: call void @lgc.output.export.generic.i32.i32.v4f32(i32 0, i32 0, <4 x float> [[I13]]) #[[ATTR6:[0-9]+]] ; CHECK-NEXT: ret void ; .entry: @@ -85,10 +87,6 @@ declare ptr addrspace(4) @lgc.create.get.desc.ptr.p4(...) local_unnamed_addr #1 ; Function Attrs: nounwind memory(none) declare i32 @lgc.create.get.desc.stride__i32(...) local_unnamed_addr #1 -declare spir_func void @"spirv.NonUniform.s[s[p4,i32,i32,i32],s[p4,i32,i32]]"({ { ptr addrspace(4), i32, i32, i32 }, { ptr addrspace(4), i32, i32 } }) local_unnamed_addr - -declare spir_func void @"spirv.NonUniform.s[a3v8i32,s[v4i32,i32]]"({ [3 x <8 x i32>], { <4 x i32>, i32 } }) local_unnamed_addr - ; Function Attrs: nounwind willreturn memory(read) declare <4 x float> @lgc.create.image.sample.v4f32(...) local_unnamed_addr #2 diff --git a/lgc/test/scalarizationOfDescriptorLoadsTest8.lgc b/lgc/test/scalarizationOfDescriptorLoadsTest8.lgc index 4958e7432e..cd823cdc5b 100644 --- a/lgc/test/scalarizationOfDescriptorLoadsTest8.lgc +++ b/lgc/test/scalarizationOfDescriptorLoadsTest8.lgc @@ -21,11 +21,13 @@ define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spi ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP4]], i64 0 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to i64 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr addrspace(4) +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr addrspace(4) [[TMP7]], i32 4), "dereferenceable"(ptr addrspace(4) [[TMP7]], i32 -1) ] ; CHECK-NEXT: [[I1:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP7]], i32 0 ; CHECK-NEXT: [[TMP8:%.*]] = call i32 @lgc.load.user.data__i32(i32 0) ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP8]], i64 0 ; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to i64 ; CHECK-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr addrspace(4) +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr addrspace(4) [[TMP11]], i32 4), "dereferenceable"(ptr addrspace(4) [[TMP11]], i32 -1) ] ; CHECK-NEXT: [[I2:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP11]], i32 32 ; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[I]], 0 ; CHECK-NEXT: br i1 [[COND]], label [[BB1:%.*]], label [[BB2:%.*]] @@ -41,19 +43,19 @@ define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spi ; CHECK-NEXT: [[I6:%.*]] = mul i32 [[I]], [[PHI]] ; CHECK-NEXT: [[I7:%.*]] = sext i32 [[I6]] to i64 ; CHECK-NEXT: [[I8:%.*]] = getelementptr i8, ptr addrspace(4) [[I2]], i64 [[I7]] -; CHECK-NEXT: [[TMP21:%.*]] = load <8 x i32>, ptr addrspace(4) [[I5]], align 32, !invariant.load [[META10:![0-9]+]] -; CHECK-NEXT: [[TMP22:%.*]] = load <4 x i32>, ptr addrspace(4) [[I8]], align 16, !invariant.load [[META10]] +; CHECK-NEXT: [[TMP21:%.*]] = load <8 x i32>, ptr addrspace(4) [[I5]], align 4, !invariant.load [[META10:![0-9]+]] +; CHECK-NEXT: [[TMP22:%.*]] = load <4 x i32>, ptr addrspace(4) [[I8]], align 4, !invariant.load [[META10]] ; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.amdgcn.waterfall.begin.i32(i32 0, i32 [[I3]]) ; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 [[TMP12]], i32 [[I3]]) ; CHECK-NEXT: [[TMP14:%.*]] = sext i32 [[TMP13]] to i64 ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr addrspace(4) [[I1]], i64 [[TMP14]] -; CHECK-NEXT: [[TMP16:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP15]], align 32, !invariant.load [[META10]] +; CHECK-NEXT: [[TMP16:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP15]], align 4, !invariant.load [[META10]] ; CHECK-NEXT: [[TMP17:%.*]] = sext i32 [[TMP13]] to i64 ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr addrspace(4) [[I2]], i64 [[TMP17]] -; CHECK-NEXT: [[TMP19:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP18]], align 16, !invariant.load [[META10]] +; CHECK-NEXT: [[TMP19:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP18]], align 4, !invariant.load [[META10]] ; CHECK-NEXT: [[TMP20:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 0.000000e+00, float 0.000000e+00, <8 x i32> [[TMP16]], <4 x i32> [[TMP19]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[I11:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.waterfall.end.v4f32(i32 [[TMP12]], <4 x float> [[TMP20]]) -; CHECK-NEXT: call void @lgc.output.export.generic.i32.i32.v4f32(i32 0, i32 0, <4 x float> [[I11]]) #[[ATTR5:[0-9]+]] +; CHECK-NEXT: call void @lgc.output.export.generic.i32.i32.v4f32(i32 0, i32 0, <4 x float> [[I11]]) #[[ATTR6:[0-9]+]] ; CHECK-NEXT: ret void ; .entry: @@ -92,10 +94,6 @@ declare ptr addrspace(4) @lgc.create.get.desc.ptr.p4(...) local_unnamed_addr #1 ; Function Attrs: nounwind memory(none) declare i32 @lgc.create.get.desc.stride__i32(...) local_unnamed_addr #1 -declare spir_func void @"spirv.NonUniform.s[s[p4,i32,i32,i32],s[p4,i32,i32]]"({ { ptr addrspace(4), i32, i32, i32 }, { ptr addrspace(4), i32, i32 } }) local_unnamed_addr - -declare spir_func void @"spirv.NonUniform.s[a3v8i32,s[v4i32,i32]]"({ [3 x <8 x i32>], { <4 x i32>, i32 } }) local_unnamed_addr - ; Function Attrs: nounwind willreturn memory(read) declare <4 x float> @lgc.create.image.sample.v4f32(...) local_unnamed_addr #2 diff --git a/lgc/test/scalarizationOfDescriptorLoadsTest9.lgc b/lgc/test/scalarizationOfDescriptorLoadsTest9.lgc index c188e6376b..dbe9969ddc 100644 --- a/lgc/test/scalarizationOfDescriptorLoadsTest9.lgc +++ b/lgc/test/scalarizationOfDescriptorLoadsTest9.lgc @@ -20,11 +20,13 @@ define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spi ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP4]], i64 0 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to i64 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr addrspace(4) +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr addrspace(4) [[TMP7]], i32 4), "dereferenceable"(ptr addrspace(4) [[TMP7]], i32 -1) ] ; CHECK-NEXT: [[I1:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP7]], i32 0 ; CHECK-NEXT: [[TMP8:%.*]] = call i32 @lgc.load.user.data__i32(i32 0) ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP8]], i64 0 ; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to i64 ; CHECK-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr addrspace(4) +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr addrspace(4) [[TMP11]], i32 4), "dereferenceable"(ptr addrspace(4) [[TMP11]], i32 -1) ] ; CHECK-NEXT: [[I2:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP11]], i32 32 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -37,19 +39,19 @@ define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spi ; CHECK-NEXT: [[I8:%.*]] = getelementptr i8, ptr addrspace(4) [[I2]], i64 [[I7]] ; CHECK-NEXT: [[I9:%.*]] = load <4 x i32>, ptr addrspace(4) [[I8]], align 16, !invariant.load [[META10:![0-9]+]] ; CHECK-NEXT: [[I10:%.*]] = load <8 x i32>, ptr addrspace(4) [[I5]], align 32, !invariant.load [[META10]] -; CHECK-NEXT: [[TMP21:%.*]] = load <8 x i32>, ptr addrspace(4) [[I5]], align 32, !invariant.load [[META10]] -; CHECK-NEXT: [[TMP22:%.*]] = load <4 x i32>, ptr addrspace(4) [[I8]], align 16, !invariant.load [[META10]] +; CHECK-NEXT: [[TMP21:%.*]] = load <8 x i32>, ptr addrspace(4) [[I5]], align 4, !invariant.load [[META10]] +; CHECK-NEXT: [[TMP22:%.*]] = load <4 x i32>, ptr addrspace(4) [[I8]], align 4, !invariant.load [[META10]] ; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.amdgcn.waterfall.begin.i32(i32 0, i32 [[I3]]) ; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 [[TMP12]], i32 [[I3]]) ; CHECK-NEXT: [[TMP14:%.*]] = sext i32 [[TMP13]] to i64 ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr addrspace(4) [[I1]], i64 [[TMP14]] -; CHECK-NEXT: [[TMP16:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP15]], align 32, !invariant.load [[META10]] +; CHECK-NEXT: [[TMP16:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP15]], align 4, !invariant.load [[META10]] ; CHECK-NEXT: [[TMP17:%.*]] = sext i32 [[TMP13]] to i64 ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr addrspace(4) [[I2]], i64 [[TMP17]] -; CHECK-NEXT: [[TMP19:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP18]], align 16, !invariant.load [[META10]] +; CHECK-NEXT: [[TMP19:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP18]], align 4, !invariant.load [[META10]] ; CHECK-NEXT: [[TMP20:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 0.000000e+00, float 0.000000e+00, <8 x i32> [[TMP16]], <4 x i32> [[TMP19]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[I11:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.waterfall.end.v4f32(i32 [[TMP12]], <4 x float> [[TMP20]]) -; CHECK-NEXT: call void @lgc.output.export.generic.i32.i32.v4f32(i32 0, i32 0, <4 x float> [[I11]]) #[[ATTR5:[0-9]+]] +; CHECK-NEXT: call void @lgc.output.export.generic.i32.i32.v4f32(i32 0, i32 0, <4 x float> [[I11]]) #[[ATTR6:[0-9]+]] ; CHECK-NEXT: [[IND]] = add i32 [[PHI_IND]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[IND]], 1000 ; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]] @@ -92,10 +94,6 @@ declare ptr addrspace(4) @lgc.create.get.desc.ptr.p4(...) local_unnamed_addr #1 ; Function Attrs: nounwind memory(none) declare i32 @lgc.create.get.desc.stride__i32(...) local_unnamed_addr #1 -declare spir_func void @"spirv.NonUniform.s[s[p4,i32,i32,i32],s[p4,i32,i32]]"({ { ptr addrspace(4), i32, i32, i32 }, { ptr addrspace(4), i32, i32 } }) local_unnamed_addr - -declare spir_func void @"spirv.NonUniform.s[a3v8i32,s[v4i32,i32]]"({ [3 x <8 x i32>], { <4 x i32>, i32 } }) local_unnamed_addr - ; Function Attrs: nounwind willreturn memory(read) declare <4 x float> @lgc.create.image.sample.v4f32(...) local_unnamed_addr #2 diff --git a/lgc/util/Internal.cpp b/lgc/util/Internal.cpp index 4aca887f26..e7a94d8bdb 100644 --- a/lgc/util/Internal.cpp +++ b/lgc/util/Internal.cpp @@ -29,6 +29,7 @@ *********************************************************************************************************************** */ #include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" #include "llvm/Support/raw_os_ostream.h" #if !defined(_WIN32) diff --git a/lgc/util/MbStandardInstrumentations.cpp b/lgc/util/MbStandardInstrumentations.cpp index 8c5de79205..af7ddcc8ee 100644 --- a/lgc/util/MbStandardInstrumentations.cpp +++ b/lgc/util/MbStandardInstrumentations.cpp @@ -31,6 +31,7 @@ #include "lgc/MbStandardInstrumentations.h" #include "llvm/IR/PrintPasses.h" #include "llvm/IR/Verifier.h" +#include "llvm/Support/FormatVariadic.h" using namespace llvm; @@ -215,7 +216,7 @@ void MbPrintIRInstrumentation::pushModuleDesc(StringRef PassID, Any IR) { MbPrintIRInstrumentation::PrintModuleDesc MbPrintIRInstrumentation::popModuleDesc(StringRef PassID) { assert(!ModuleDescStack.empty() && "empty ModuleDescStack"); PrintModuleDesc ModuleDesc = ModuleDescStack.pop_back_val(); - assert(std::get<2>(ModuleDesc).equals(PassID) && "malformed ModuleDescStack"); + assert(std::get<2>(ModuleDesc) == PassID && "malformed ModuleDescStack"); return ModuleDesc; } diff --git a/lgc/util/ModuleBunch.cpp b/lgc/util/ModuleBunch.cpp index 24f71f94a9..7a2f747f4a 100644 --- a/lgc/util/ModuleBunch.cpp +++ b/lgc/util/ModuleBunch.cpp @@ -29,6 +29,7 @@ #include "lgc/ModuleBunch.h" #include "llvm/IR/PassManagerImpl.h" #include "llvm/IR/PrintPasses.h" +#include "llvm/Support/FormatVariadic.h" namespace llvm { @@ -45,6 +46,16 @@ void ModuleBunch::addModule(std::unique_ptr module) { Modules.push_back(std::move(module)); } +// Remove Module from ModuleBunch, returning ownership to the caller. +// Returns empty unique_ptr if Module not found. +std::unique_ptr ModuleBunch::removeModule(const Module *moduleToRemove) { + for (std::unique_ptr &module : Modules) { + if (module && &*module == moduleToRemove) + return std::move(module); + } + return nullptr; +} + // Renormalize ModuleBunch's array of Modules after manipulation by user. // Invalidates modules() iterator. void ModuleBunch::renormalize() { @@ -295,16 +306,6 @@ std::optional> MbPassBuilder::parsePip return {std::move(ResultPipeline)}; } -// Copied from PassBuilder.cpp. -static std::optional parseRepeatPassName(StringRef Name) { - if (!Name.consume_front("repeat<") || !Name.consume_back(">")) - return std::nullopt; - int Count; - if (Name.getAsInteger(0, Count) || Count <= 0) - return std::nullopt; - return Count; -} - // Copied from PassBuilder.cpp. /// Tests whether registered callbacks will accept a given pass name. /// @@ -339,9 +340,6 @@ template static bool isModuleBunchPassName(StringRef Name, if (Name == "coro-cond") return true; - // Explicitly handle custom-parsed pass names. - if (parseRepeatPassName(Name)) - return true; return callbacksAcceptPassName(Name, Callbacks); } @@ -394,13 +392,6 @@ Error MbPassBuilder::parseModuleBunchPass(ModuleBunchPassManager &MBPM, const Pi MBPM.addPass(std::move(NestedMBPM)); return Error::success(); } - if (auto Count = parseRepeatPassName(Name)) { - ModuleBunchPassManager NestedMBPM; - if (auto Err = parseModuleBunchPassPipeline(NestedMBPM, InnerPipeline)) - return Err; - MBPM.addPass(createRepeatedPass(*Count, std::move(NestedMBPM))); - return Error::success(); - } // TODO: // For any other nested pass manager ("module", "function" etc) we want to invoke // parseModulePassPipeline etc, but we can't as it is private in PassBuilder. So diff --git a/llpc/CMakeLists.txt b/llpc/CMakeLists.txt index 277bde621a..41330609a9 100644 --- a/llpc/CMakeLists.txt +++ b/llpc/CMakeLists.txt @@ -63,9 +63,10 @@ if(ICD_BUILD_LLPC) set(LLVM_ENABLE_TERMINFO OFF CACHE BOOL Force) if (NOT WIN32) # Build optimized version of llvm-tblgen even in debug builds, for faster build times. - # - # Don't turn this on on Windows, because the required "cross compile" setup doesn't work in the internal CMake - # setup on Windows. +#if _WIN32 + # Don't enable this on Windows, because the required "cross compile" setup doesn't work + # in the internal CMake setup on Windows. +#endif set(LLVM_OPTIMIZED_TABLEGEN ON CACHE BOOL Force) endif() @@ -168,6 +169,7 @@ if(ICD_BUILD_LLPC) endif() endif() +#if _WIN32 if(WIN32) target_compile_definitions(llpcinternal PRIVATE NOMINMAX # windows.h defines min/max which conflicts with the use of std::min / max @@ -175,6 +177,7 @@ if(WIN32) _UNICODE ) endif() +#endif target_include_directories(llpcinternal PUBLIC @@ -192,9 +195,11 @@ target_include_directories(llpcinternal ${LLVM_INCLUDE_DIRS} ) +#if _WIN32 if(WIN32) target_compile_definitions(llpcinternal PRIVATE VK_USE_PLATFORM_WIN32_KHR) endif() +#endif if(ICD_BUILD_LLPC) # llpc/context @@ -217,7 +222,6 @@ if(ICD_BUILD_LLPC) lower/llpcSpirvLowerInstMetaRemove.cpp lower/llpcSpirvLowerMath.cpp lower/llpcSpirvLowerMemoryOp.cpp - lower/llpcSpirvLowerRayQuery.cpp lower/LowerPostInline.cpp lower/llpcSpirvLowerRayTracing.cpp lower/llpcSpirvLowerTerminator.cpp @@ -279,8 +283,10 @@ if(ICD_BUILD_LLPC) if(UNIX) set(BUILD_OS lnx) +#if _WIN32 elseif(WIN32) set(BUILD_OS win) +#endif endif() endif() diff --git a/llpc/context/llpcCompiler.cpp b/llpc/context/llpcCompiler.cpp index bc85bd630c..b4603a1c8b 100644 --- a/llpc/context/llpcCompiler.cpp +++ b/llpc/context/llpcCompiler.cpp @@ -61,6 +61,7 @@ #include "lgc/Builder.h" #include "lgc/ElfLinker.h" #include "lgc/EnumIterator.h" +#include "lgc/GpurtDialect.h" #include "lgc/LgcCpsDialect.h" #include "lgc/LgcRtDialect.h" #include "lgc/PassManager.h" @@ -786,12 +787,12 @@ static bool getSymbolInfoFromSpvVariable(const SPIRVVariable *spvVar, ResourceNo } if (varElemTy->getOpCode() == OpTypeMatrix) { symbolInfo->columnCount = varElemTy->getMatrixColumnCount(); - if (varElemTy->getMatrixColumnType()->getOpCode() == OpTypeVector) - symbolInfo->componentCount = varElemTy->getMatrixColumnType()->getVectorComponentCount(); varElemTy = varElemTy->getMatrixColumnType(); } - if (varElemTy->getOpCode() == OpTypeVector) + if (varElemTy->getOpCode() == OpTypeVector) { + symbolInfo->componentCount = varElemTy->getVectorComponentCount(); varElemTy = varElemTy->getVectorComponentType(); + } switch (varElemTy->getOpCode()) { case OpTypeInt: { @@ -1020,8 +1021,8 @@ void Compiler::buildShaderModuleResourceUsage( if (func) { if (auto em = func->getExecutionMode(ExecutionModeLocalSize)) { shaderModuleUsage.localSizeX = em->getLiterals()[0]; - shaderModuleUsage.localSizeX = em->getLiterals()[1]; - shaderModuleUsage.localSizeX = em->getLiterals()[2]; + shaderModuleUsage.localSizeY = em->getLiterals()[1]; + shaderModuleUsage.localSizeZ = em->getLiterals()[2]; } } @@ -1110,6 +1111,7 @@ void Compiler::buildShaderModuleResourceUsage( ResourceNodeData textureSymbol = {}; textureSymbol.binding = defaultUniformSymbol.binding; textureSymbol.location = defaultUniformSymbol.location; + textureSymbol.spvId = defaultUniformSymbol.spvId; textureSymbol.arraySize = getSamplerArraySizeInSpvStruct(varElemTy) * defaultUniformSymbol.arraySize; textureSymbol.isDefaultUniformSampler = true; if (textureSymbol.arraySize > 0) @@ -1790,7 +1792,6 @@ Result Compiler::buildPipelineInternal(Context *context, ArrayRef> modules(shaderInfo.size()); unsigned stageSkipMask = 0; - unsigned numStagesWithRayQuery = 0; bool enableAdvancedBlend = false; const GraphicsPipelineBuildInfo *pipelineInfo = nullptr; @@ -1865,8 +1866,6 @@ Result Compiler::buildPipelineInternal(Context *context, ArrayRefusage.enableRayQuery) { assert(!moduleData->usage.rayQueryLibrary); context->ensureGpurtLibrary(); - lowerPassMgr->addPass(SpirvLowerRayQuery(false)); - ++numStagesWithRayQuery; } if (shaderIndex == ShaderStageFragment && enableAdvancedBlend) { @@ -1905,28 +1904,6 @@ Result Compiler::buildPipelineInternal(Context *context, ArrayRef(shaderInfoEntry->pModuleData); - if (!moduleData || !moduleData->usage.enableRayQuery) - continue; - - // Modules are consumed by linking, so clone as needed. - Linker linker(*modules[shaderIndex]); - if (linker.linkInModule(CloneModule(*gpurtContext.theModule))) - result = Result::ErrorInvalidShader; - } - } - SmallVector, ShaderStageGfxCount> modulesToLink; for (unsigned shaderIndex = 0; shaderIndex < shaderInfo.size() && result == Result::Success; ++shaderIndex) { // Per-shader SPIR-V lowering passes. @@ -2675,13 +2652,7 @@ Result Compiler::BuildRayTracingPipeline(const RayTracingPipelineBuildInfo *pipe const auto &shaderInfo = pipelineInfo->pShaders[i]; const ShaderModuleData *moduleData = reinterpret_cast(shaderInfo.pModuleData); if (moduleData->usage.hasTraceRay) { - pipelineOut->hasTraceRay = true; - summary.usesTraceRay = true; - - // TODO: Leverage static analysis (could be moved to *after* the compilation of shaders?) - summary.knownSetRayFlags = 0; - summary.knownUnsetRayFlags = 0; break; } } @@ -2692,6 +2663,7 @@ Result Compiler::BuildRayTracingPipeline(const RayTracingPipelineBuildInfo *pipe rayTracingShaderInfo.push_back(&pipelineInfo->pShaders[i]); auto &shaderInfo = rayTracingShaderInfo[i]; const ShaderModuleData *moduleData = reinterpret_cast(shaderInfo->pModuleData); + if (shaderInfo->entryStage == ShaderStageRayTracingAnyHit || shaderInfo->entryStage == ShaderStageRayTracingIntersect) { if (moduleData->usage.enableRayQuery) { @@ -2700,9 +2672,12 @@ Result Compiler::BuildRayTracingPipeline(const RayTracingPipelineBuildInfo *pipe } } - // For continuations mode, it must be indirect mode. - if (rayTracingContext.isContinuationsMode()) - rayTracingContext.setIndirectPipeline(); + if (summary.knownSetRayFlags != ~0u || summary.knownUnsetRayFlags != ~0u) { + auto knownBits = KnownBits(32); + knownBits.One = APInt(32, summary.knownSetRayFlags); + knownBits.Zero = APInt(32, summary.knownUnsetRayFlags); + rayTracingContext.updateRayFlagsKnownBits(knownBits); + } // Add entry module PipelineShaderInfo raygenMainShaderInfo = pipelineInfo->pShaders[0]; @@ -2714,6 +2689,12 @@ Result Compiler::BuildRayTracingPipeline(const RayTracingPipelineBuildInfo *pipe helperThreadProvider); if (result == Result::Success) { + auto knownFlags = rayTracingContext.getRayFlagsKnownBits(); + summary.knownSetRayFlags &= knownFlags.One.getZExtValue(); + summary.knownUnsetRayFlags &= knownFlags.Zero.getZExtValue(); + + pipelineOut->hasTraceRay = summary.hasTraceRayModule; + std::string summaryMsgpack = summary.encodeMsgpack(); void *allocBuf = nullptr; size_t shaderGroupHandleSize = pipelineInfo->shaderGroupCount * sizeof(RayTracingShaderIdentifier); @@ -2797,6 +2778,9 @@ Result Compiler::BuildRayTracingPipeline(const RayTracingPipelineBuildInfo *pipe pipelineOut->librarySummary.pCode = summaryOut; pipelineOut->librarySummary.codeSize = summaryMsgpack.size(); memcpy(summaryOut, summaryMsgpack.data(), summaryMsgpack.size()); + + pipelineOut->isCps = rayTracingContext.isContinuationsMode(); + pipelineOut->isCps |= rayTracingContext.getRaytracingMode() == Vkgc::LlpcRaytracingMode::Continufy; } return result; @@ -2848,6 +2832,9 @@ Result Compiler::buildRayTracingPipelineElf(Context *context, std::unique_ptrgetIndirectStageMask() == 0) { options.rtIndirectMode = lgc::RayTracingIndirectMode::NotIndirect; } else if (rtContext->isContinuationsMode() && !LgcContext::getEmitLgc()) { + // Assure indirect mode setting here, indirect stage mask may change after SPIRVReader. + options.rtIndirectMode = lgc::RayTracingIndirectMode::Continuations; + // For continuations mode, we need to run LowerRaytracingPipelinePass here first separately because we need to // collect metadata added by the pass std::unique_ptr passMgr(lgc::PassManager::Create(context->getLgcContext())); @@ -3092,8 +3079,6 @@ Result Compiler::buildRayTracingPipelineInternal(RayTracingContext &rtContext, std::vector> modules(shaderInfo.size()); mainContext->setBuilder(builderContext->createBuilder(&*pipeline)); - const bool continuationsMode = (pipelineInfo->mode == Vkgc::LlpcRaytracingMode::Continuations); - bool needTraversal = false; mainContext->ensureGpurtLibrary(); @@ -3115,10 +3100,6 @@ Result Compiler::buildRayTracingPipelineInternal(RayTracingContext &rtContext, if (!shaderInfoEntry->pModuleData) continue; - const ShaderModuleData *moduleData = reinterpret_cast(shaderInfoEntry->pModuleData); - if (moduleData->usage.enableRayQuery || moduleData->usage.hasTraceRay) - needTraversal = true; - std::unique_ptr lowerPassMgr(lgc::PassManager::Create(builderContext)); lowerPassMgr->setPassIndex(&passIndex); SpirvLower::registerTranslationPasses(*lowerPassMgr); @@ -3127,8 +3108,6 @@ Result Compiler::buildRayTracingPipelineInternal(RayTracingContext &rtContext, lowerPassMgr->addPass(SpirvLowerTranslator(shaderInfoEntry->entryStage, shaderInfoEntry)); lowerPassMgr->addPass(SpirvLowerCfgMerges()); lowerPassMgr->addPass(AlwaysInlinerPass()); - if (moduleData->usage.enableRayQuery) - lowerPassMgr->addPass(SpirvLowerRayQuery()); // Run the passes. bool success = runPasses(&*lowerPassMgr, modules[shaderIndex].get()); @@ -3138,9 +3117,8 @@ Result Compiler::buildRayTracingPipelineInternal(RayTracingContext &rtContext, } } - // Step 2: Link rayquery modules + // Step 2: Set up traversal module and kernel entry std::vector> newModules; - std::vector moduleUsesRayQuery; // Record which module calls TraceRay(), except the first one (For indirect mode, it is the entry function which will // never call TraceRay(). For inlined mode, we don't need to care). std::vector moduleCallsTraceRay; @@ -3153,31 +3131,30 @@ Result Compiler::buildRayTracingPipelineInternal(RayTracingContext &rtContext, auto indirectStageMask = rtContext.getIndirectStageMask() & ShaderStageAllRayTracingBit; assert(indirectStageMask == 0 || indirectStageMask == ShaderStageAllRayTracingBit); + const bool isContinuationsMode = rtContext.isContinuationsMode(); + + // TODO: Do not build launch kernel for library. std::unique_ptr entry = std::move(modules.back()); modules.pop_back(); shaderInfo = shaderInfo.drop_back(); newModules.push_back(std::move(entry)); - moduleUsesRayQuery.push_back(false); for (unsigned shaderIndex = 0; shaderIndex < pipelineInfo->shaderCount; ++shaderIndex) { const auto *shaderInfoEntry = shaderInfo[shaderIndex]; const ShaderModuleData *moduleData = reinterpret_cast(shaderInfoEntry->pModuleData); auto shaderModule = std::move(modules[shaderIndex]); - if (moduleData->usage.enableRayQuery) { - Linker linker(*shaderModule); - if (linker.linkInModule(CloneModule(*gpurtContext.theModule))) - return Result::ErrorInvalidShader; - } - newModules.push_back(std::move(shaderModule)); moduleCallsTraceRay.push_back(moduleData->usage.hasTraceRay); - moduleUsesRayQuery.push_back(moduleData->usage.enableRayQuery); } - // TODO: For continuations, we only need to compile the GpuRt module separately if there are TraceRay usages - // to compile the Traversal shader. For callable shaders, it is not required. + bool needTraversal = rtContext.getRayTracingLibrarySummary().usesTraceRay; + + // When compiling library, we do not build traversal module, as we will always use the one from complete pipeline. + if (rtContext.getRayTracingPipelineBuildInfo()->libraryMode == LibraryMode::Library) + needTraversal = false; + if (needTraversal) { auto fetchRayTracingFuncName = [&](Vkgc::RAYTRACING_ENTRY_FUNC attribute) -> StringRef { return mainContext->getPipelineContext()->getRayTracingFunctionName(attribute); @@ -3185,12 +3162,12 @@ Result Compiler::buildRayTracingPipelineInternal(RayTracingContext &rtContext, StringRef traceRayFuncName = fetchRayTracingFuncName(Vkgc::RT_ENTRY_TRACE_RAY); // For continuations, the entry is _cont_Traversal. constexpr char ContTraceRayFuncName[] = "_cont_Traversal"; - if (continuationsMode) + if (isContinuationsMode) traceRayFuncName = ContTraceRayFuncName; std::unique_ptr traversal = CloneModule(*gpurtContext.theModule); - // Prepare GpuRt module to be compiled separately + // Prepare traversal module to be compiled separately for (auto funcIt = traversal->begin(), funcEnd = traversal->end(); funcIt != funcEnd;) { Function *func = &*funcIt++; if (func->getName().starts_with(traceRayFuncName)) { @@ -3207,22 +3184,20 @@ Result Compiler::buildRayTracingPipelineInternal(RayTracingContext &rtContext, newModules.push_back(std::move(traversal)); moduleCallsTraceRay.push_back(false); - moduleUsesRayQuery.push_back(false); } assert(moduleCallsTraceRay.size() == (newModules.size() - 1)); - assert(moduleUsesRayQuery.size() == newModules.size()); + // Step 3: Run lower passes on all modules for (unsigned i = 0; i < newModules.size(); i++) { auto module = (newModules[i].get()); std::unique_ptr passMgr(lgc::PassManager::Create(builderContext)); SpirvLower::registerLoweringPasses(*passMgr); LowerFlag flag = {}; flag.isRayTracing = true; - flag.isRayQuery = moduleUsesRayQuery[i]; flag.isInternalRtShader = false; SpirvLower::addPasses(mainContext, ShaderStageCompute, *passMgr, timerProfiler.getTimer(TimerLower), flag); - if (continuationsMode) { + if (isContinuationsMode) { passMgr->addPass(PrepareContinuations()); } bool success = runPasses(&*passMgr, module); @@ -3232,6 +3207,7 @@ Result Compiler::buildRayTracingPipelineInternal(RayTracingContext &rtContext, } } + // Step 4: Link module if necessary if (indirectStageMask == 0) { auto &mainModule = newModules[0]; Linker linker(*mainModule); @@ -3257,15 +3233,17 @@ Result Compiler::buildRayTracingPipelineInternal(RayTracingContext &rtContext, entry = std::move(newModules[0]); std::unique_ptr traversalModule; - if (indirectStageMask != 0) { + if (indirectStageMask != 0 && needTraversal) { traversalModule = std::move(newModules.back()); newModules.pop_back(); + rtContext.getRayTracingLibrarySummary().hasTraceRayModule = true; } InternalHelperThreadProvider ourHelperThreadProvider; if (cl::AddRtHelpers && !helperThreadProvider) helperThreadProvider = &ourHelperThreadProvider; + // Step 5: Generate ELFs if (helperThreadProvider) { std::vector results(newModules.size(), Result::Success); std::vector modulePointers; @@ -3332,10 +3310,14 @@ Result Compiler::buildRayTracingPipelineInternal(RayTracingContext &rtContext, // Build traversal at last after we gather all needed information. if (traversalModule) { - if (rtContext.isContinuationsMode()) + if (isContinuationsMode) ContHelper::setPreservedPayloadRegisterCount(*traversalModule, rtContext.getRayTracingLibrarySummary().maxUsedPayloadRegisterCount); + auto rayFlagsKnownBits = rtContext.getRayFlagsKnownBits(); + lgc::gpurt::setKnownSetRayFlags(*traversalModule, rayFlagsKnownBits.One.getZExtValue()); + lgc::gpurt::setKnownUnsetRayFlags(*traversalModule, rayFlagsKnownBits.Zero.getZExtValue()); + Result result = buildRayTracingPipelineElf(mainContext, std::move(traversalModule), pipelineElfs[newModules.size()], shaderProps, moduleCallsTraceRay, newModules.size(), pipeline, timerProfiler); @@ -3389,24 +3371,8 @@ void Compiler::adjustRayTracingElf(ElfPackage *pipelineElf, RayTracingContext *r // 1. Add raytracing pipeline indirect pipeline metadata // The metadata is needed for RGP to correctly show different subtype of shaders. // Determine the shader subtype by name - auto subtype = "Unknown"; - if (auto shaderStage = tryGetLgcRtShaderStageFromName(shaderFunctionName)) { - auto stage = shaderStage.value(); - if (stage == lgc::rt::RayTracingShaderStage::RayGeneration) - subtype = "RayGeneration"; - else if (stage == lgc::rt::RayTracingShaderStage::Miss) - subtype = "Miss"; - else if (stage == lgc::rt::RayTracingShaderStage::AnyHit) - subtype = "AnyHit"; - else if (stage == lgc::rt::RayTracingShaderStage::ClosestHit) - subtype = "ClosestHit"; - else if (stage == lgc::rt::RayTracingShaderStage::Intersection) - subtype = "Intersection"; - else if (stage == lgc::rt::RayTracingShaderStage::Callable) - subtype = "Callable"; - else if (stage == lgc::rt::RayTracingShaderStage::Traversal) - subtype = "Traversal"; - } + auto subtype = lgc::rt::getShaderSubtypeForRtShaderStage( + tryGetLgcRtShaderStageFromName(shaderFunctionName).value_or(lgc::rt::RayTracingShaderStage::Count)); shaderFunction[".shader_subtype"] = subtype; // 2. Apply the .internal_pipeline_hash to .api_shader_hash in .shader_functions section diff --git a/llpc/context/llpcContext.cpp b/llpc/context/llpcContext.cpp index b148ed4ef1..6793151f55 100644 --- a/llpc/context/llpcContext.cpp +++ b/llpc/context/llpcContext.cpp @@ -40,7 +40,6 @@ #include "llpcSpirvLowerAccessChain.h" #include "llpcSpirvLowerCfgMerges.h" #include "llpcSpirvLowerGlobal.h" -#include "llpcSpirvLowerRayQuery.h" #include "llpcSpirvLowerTranslator.h" #include "llpcSpirvProcessGpuRtLibrary.h" #include "llpcTimerProfiler.h" @@ -53,6 +52,7 @@ #include "lgc/LgcCpsDialect.h" #include "lgc/LgcDialect.h" #include "lgc/LgcRtDialect.h" +#include "lgc/LgcRtqDialect.h" #include "lgc/PassManager.h" #include "lgc/RuntimeContext.h" #include "llvm/Bitcode/BitcodeReader.h" @@ -81,6 +81,7 @@ using namespace lgc; using namespace lgc::rt; +using namespace lgc::rtq; using namespace llvm; using namespace lgc::cps; @@ -90,8 +91,8 @@ namespace Llpc { // // @param gfxIp : Graphics IP version info Context::Context(GfxIpVersion gfxIp) : LLVMContext(), m_gfxIp(gfxIp) { - m_dialectContext = llvm_dialects::DialectContext::make(*this); + m_dialectContext = llvm_dialects::DialectContext::make(*this); reset(); } @@ -279,7 +280,6 @@ void Context::ensureGpurtLibrary() { lowerPassMgr->addPass(SpirvLowerCfgMerges()); lowerPassMgr->addPass(SpirvProcessGpuRtLibrary()); - lowerPassMgr->addPass(SpirvLowerRayQuery(true)); lowerPassMgr->addPass(AlwaysInlinerPass()); lowerPassMgr->addPass(SpirvLowerAccessChain()); lowerPassMgr->addPass(SpirvLowerGlobal()); diff --git a/llpc/context/llpcContext.h b/llpc/context/llpcContext.h index 0bb2595e40..ebc730a825 100644 --- a/llpc/context/llpcContext.h +++ b/llpc/context/llpcContext.h @@ -123,7 +123,6 @@ class Context : public llvm::LLVMContext { void setModuleTargetMachine(llvm::Module *module); void ensureGpurtLibrary(); - void ensureGfxRuntimeLibrary(); private: diff --git a/llpc/context/llpcPipelineContext.cpp b/llpc/context/llpcPipelineContext.cpp index 5338cffcd0..21ba35235f 100644 --- a/llpc/context/llpcPipelineContext.cpp +++ b/llpc/context/llpcPipelineContext.cpp @@ -578,6 +578,7 @@ ShaderOptions PipelineContext::computeShaderOptions(const PipelineShaderInfo &sh shaderOptions.debugMode = shaderInfo.options.debugMode; shaderOptions.allowReZ = shaderInfo.options.allowReZ; shaderOptions.forceLateZ = shaderInfo.options.forceLateZ; + shaderOptions.imageSampleDrefReturnsRgba = shaderInfo.options.imageSampleDrefReturnsRgba; shaderOptions.vgprLimit = shaderInfo.options.vgprLimit; diff --git a/llpc/context/llpcRayTracingContext.cpp b/llpc/context/llpcRayTracingContext.cpp index bc552ccf20..acd4d69d4e 100644 --- a/llpc/context/llpcRayTracingContext.cpp +++ b/llpc/context/llpcRayTracingContext.cpp @@ -49,7 +49,8 @@ RayTracingContext::RayTracingContext(GfxIpVersion gfxIP, const RayTracingPipelin const PipelineShaderInfo *representativeShaderInfo, MetroHash::Hash *pipelineHash, MetroHash::Hash *cacheHash, unsigned indirectStageMask) : PipelineContext(gfxIP, pipelineHash, cacheHash), m_pipelineInfo(pipelineInfo), m_representativeShaderInfo(), - m_linked(false), m_indirectStageMask(indirectStageMask), m_entryName(""), m_callableDataMaxSize(0) { + m_linked(false), m_indirectStageMask(indirectStageMask), m_entryName(""), m_callableDataMaxSize(0), + m_rayFlagsKnownBits(std::nullopt) { const Vkgc::BinaryData *gpurtShaderLibrary = nullptr; #if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 62 gpurtShaderLibrary = &pipelineInfo->shaderTraceRay; @@ -149,6 +150,29 @@ unsigned RayTracingContext::getAttributeDataSize() { return divideCeil(m_rtLibSummary.maxHitAttributeSize, 4); } +// ===================================================================================================================== +// Check whether the pipeline is compiled in continuations mode +bool RayTracingContext::isContinuationsMode() const { + bool isContinuations = false; + +#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 74 + if (getRaytracingMode() == Vkgc::LlpcRaytracingMode::Continuations) { + // Client require continuations mode explicitly. + isContinuations = true; + } +#else + // Continuations mode is only enabled for indirect mode. + if (getIndirectStageMask() != 0) { + if (getRaytracingMode() == Vkgc::LlpcRaytracingMode::Auto) { + } else if (getRaytracingMode() == Vkgc::LlpcRaytracingMode::Continuations) { + // Client require continuations mode explicitly. + isContinuations = true; + } + } +#endif + return isContinuations; +} + // ===================================================================================================================== // If the builtIn is builtIn used in raytracing // @@ -278,7 +302,7 @@ lgc::Options RayTracingContext::computePipelineOptions() const { #if LLPC_CLIENT_INTERFACE_MAJOR_VERSION > 68 if (m_pipelineInfo->mode == Vkgc::LlpcRaytracingMode::Continufy) options.rtIndirectMode = lgc::RayTracingIndirectMode::ContinuationsContinufy; - else if (m_pipelineInfo->mode == Vkgc::LlpcRaytracingMode::Continuations) + else if (isContinuationsMode()) options.rtIndirectMode = lgc::RayTracingIndirectMode::Continuations; options.cpsFlags = m_pipelineInfo->cpsFlags; diff --git a/llpc/context/llpcRayTracingContext.h b/llpc/context/llpcRayTracingContext.h index c601a3a1bb..c520c81854 100644 --- a/llpc/context/llpcRayTracingContext.h +++ b/llpc/context/llpcRayTracingContext.h @@ -32,6 +32,7 @@ #include "llpcPipelineContext.h" #include "lgc/RayTracingLibrarySummary.h" +#include "llvm/Support/KnownBits.h" #include namespace lgc { @@ -116,9 +117,18 @@ class RayTracingContext : public PipelineContext { bool hasPipelineLibrary() { return m_pipelineInfo->hasPipelineLibrary; } unsigned hasLibraryStage(unsigned stageMask) { return m_pipelineInfo->pipelineLibStageMask & stageMask; } bool isReplay() { return m_pipelineInfo->isReplay; } - Vkgc::LlpcRaytracingMode getRaytracingMode() { return m_pipelineInfo->mode; } - bool isContinuationsMode() { return getRaytracingMode() == Vkgc::LlpcRaytracingMode::Continuations; } + Vkgc::LlpcRaytracingMode getRaytracingMode() const { return m_pipelineInfo->mode; } + bool isContinuationsMode() const; unsigned getCpsFlag() { return m_pipelineInfo->cpsFlags; } + void updateRayFlagsKnownBits(const llvm::KnownBits &knownBits) { + if (m_rayFlagsKnownBits.has_value()) { + m_rayFlagsKnownBits = m_rayFlagsKnownBits->intersectWith(knownBits); + } else { + m_rayFlagsKnownBits = knownBits; + } + } + + llvm::KnownBits getRayFlagsKnownBits() const { return m_rayFlagsKnownBits.value_or(llvm::KnownBits()); } protected: // Give the pipeline options to the middle-end, and/or hash them. @@ -142,6 +152,7 @@ class RayTracingContext : public PipelineContext { unsigned m_callableDataMaxSize; // Callable maximum size std::set> m_builtIns; // Collected raytracing lgc::RayTracingLibrarySummary m_rtLibSummary = {}; + std::optional m_rayFlagsKnownBits; }; } // namespace Llpc diff --git a/llpc/include/llpc.h b/llpc/include/llpc.h index ff6d917946..be52632f4b 100644 --- a/llpc/include/llpc.h +++ b/llpc/include/llpc.h @@ -157,6 +157,7 @@ struct RayTracingPipelineBuildOut { BinaryData librarySummary; ///< Output MsgPack summary for use in library link; not created ///< when compiling in pure pipeline mode bool hasTraceRay; ///< Output whether have traceray module + bool isCps; ///< Output whether is the pipeline is compiled in CPS mode }; #if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 66 diff --git a/llpc/lower/LowerGLCompatibility.cpp b/llpc/lower/LowerGLCompatibility.cpp index aeae5b78fe..41e023b585 100644 --- a/llpc/lower/LowerGLCompatibility.cpp +++ b/llpc/lower/LowerGLCompatibility.cpp @@ -68,7 +68,7 @@ PreservedAnalyses LowerGLCompatibility::run(Module &module, ModuleAnalysisManage if (!needLowerClipVertex() && !needLowerFrontColor() && !needLowerBackColor() && !needLowerFrontSecondaryColor() && !needLowerBackSecondaryColor() && !needEmulateDrawPixels() && !needEmulateTwoSideLighting() && - !needEmulateBitmap() && !needLowerFragColor()) + !needEmulateBitmap() && !needLowerFragColor() && !needEmulateSmoothStipple()) return PreservedAnalyses::all(); buildPatchPositionInfo(); @@ -94,6 +94,9 @@ PreservedAnalyses LowerGLCompatibility::run(Module &module, ModuleAnalysisManage if (needEmulateDrawPixels()) emulateDrawPixels(); + if (needEmulateSmoothStipple()) + emulateSmoothStipple(); + // Two side lighting patch should place just before bitmap patch. if (needEmulateTwoSideLighting()) emulateTwoSideLighting(); @@ -115,6 +118,7 @@ bool LowerGLCompatibility::needRun() { ->getPipelineShaderInfo(m_shaderStage) ->pModuleData); auto *buildInfo = static_cast(m_context->getPipelineBuildInfo()); + auto options = m_context->getPipelineContext()->getPipelineOptions(); result |= moduleData->usage.useClipVertex; result |= moduleData->usage.useFrontColor; result |= moduleData->usage.useBackColor; @@ -125,6 +129,9 @@ bool LowerGLCompatibility::needRun() { result |= buildInfo->glState.enableBitmap; result |= buildInfo->glState.enableBitmapLsb; result |= buildInfo->glState.enableColorClampFs; + result |= options->getGlState().enablePolygonStipple; + result |= options->getGlState().enableLineSmooth; + result |= options->getGlState().enablePointSmooth; } return result; } @@ -136,7 +143,7 @@ bool LowerGLCompatibility::needRun() { unsigned LowerGLCompatibility::getUniformLocation(llvm::GlobalVariable *var) { assert(var->getType()->getAddressSpace() == SPIRAS_Uniform && var->hasMetadata(gSPIRVMD::UniformConstant)); MDNode *metaNode = var->getMetadata(gSPIRVMD::UniformConstant); - return mdconst::dyn_extract(metaNode->getOperand(3))->getZExtValue(); + return mdconst::extract(metaNode->getOperand(3))->getZExtValue(); } // ===================================================================================================================== @@ -280,7 +287,7 @@ void LowerGLCompatibility::collectEmulationResource() { llvm::SmallVector mds; MDNode *metaNode = global.getMetadata(gSPIRVMD::InOut); assert(metaNode); - auto inOutMetaConst = mdconst::dyn_extract(metaNode->getOperand(0)); + auto inOutMetaConst = mdconst::extract(metaNode->getOperand(0)); auto valueType = global.getValueType(); bool isStructureOrArrayOfStructure = (valueType->isStructTy() || (valueType->isArrayTy() && valueType->getArrayElementType()->isStructTy())); @@ -310,7 +317,7 @@ void LowerGLCompatibility::collectEmulationResource() { llvm::SmallVector mds; MDNode *metaNode = global.getMetadata(gSPIRVMD::InOut); assert(metaNode); - auto inOutMetaConst = mdconst::dyn_extract(metaNode->getOperand(0)); + auto inOutMetaConst = mdconst::extract(metaNode->getOperand(0)); auto valueType = global.getValueType(); bool isStructureOrArrayOfStructure = (valueType->isStructTy() || (valueType->isArrayTy() && valueType->getArrayElementType()->isStructTy())); @@ -384,7 +391,7 @@ void LowerGLCompatibility::collectEmulationResource() { auto glOut = cast(m_out); MDNode *metaNode = glOut->getMetadata(gSPIRVMD::InOut); assert(metaNode); - auto inOutMetaConst = mdconst::dyn_extract(metaNode->getOperand(0)); + auto inOutMetaConst = mdconst::extract(metaNode->getOperand(0)); for (User *user : m_out->users()) { SmallVector indexOperands; // The user is a GEP @@ -510,8 +517,16 @@ bool LowerGLCompatibility::needEmulateTwoSideLighting() { // Check whether need do emulate for bitmap. bool LowerGLCompatibility::needEmulateBitmap() { auto *buildInfo = static_cast(m_context->getPipelineBuildInfo()); + return (m_shaderStage == ShaderStageFragment) && buildInfo->glState.enableBitmap; +} + +// ===================================================================================================================== +// Check whether need do emulate point/line smooth and line/polygon stipple. +bool LowerGLCompatibility::needEmulateSmoothStipple() { + auto options = m_context->getPipelineContext()->getPipelineOptions(); return (m_shaderStage == ShaderStageFragment) && - (buildInfo->glState.enableBitmap || buildInfo->glState.enableBitmapLsb); + (options->getGlState().enablePolygonStipple || options->getGlState().enableLineSmooth || + options->getGlState().enablePointSmooth); } // ===================================================================================================================== @@ -889,6 +904,221 @@ void LowerGLCompatibility::emulateBitmap() { m_builder->CreateCondBr(cmp, m_entryPointEnd, m_originalEntryBlock); } +// ===================================================================================================================== +// Patch alpha scaling factor to the 4th channel of a fragment output, excluding built-in variables. +// +// @param [in] val : input value for alpha scaling, which is an output in fragment stage. +// @param [in] valTy : current input value's type, should be global's valueType in top-level. +// @param [in] metaVal : metadata value of current output variable. +// @param [in] alphaScaleVal : calculated alpha scaling results, default value is one. +void LowerGLCompatibility::patchAlphaScaling(Value *val, Type *valTy, Constant *metaVal, Value *alphaScaleVal) { + ShaderInOutMetadata outputMeta = {}; + + if (valTy->isArrayTy()) { + outputMeta.U64All[0] = cast(metaVal->getOperand(2))->getZExtValue(); + outputMeta.U64All[1] = cast(metaVal->getOperand(3))->getZExtValue(); + + if (!outputMeta.IsBuiltIn) { + auto elemMeta = cast(metaVal->getOperand(1)); + const uint64_t elemCount = val->getType()->getArrayNumElements(); + for (unsigned idx = 0; idx < elemCount; ++idx) { + Value *elem = m_builder->CreateExtractValue(val, {idx}, ""); + patchAlphaScaling(elem, elem->getType(), elemMeta, alphaScaleVal); + } + } + } else if (valTy->isStructTy()) { + const uint64_t memberCount = val->getType()->getStructNumElements(); + for (unsigned memberIdx = 0; memberIdx < memberCount; ++memberIdx) { + auto memberMeta = cast(metaVal->getOperand(memberIdx)); + Value *member = m_builder->CreateExtractValue(val, {memberIdx}); + patchAlphaScaling(member, member->getType(), memberMeta, alphaScaleVal); + } + } else { + Constant *inOutMetaConst = cast(metaVal); + outputMeta.U64All[0] = cast(inOutMetaConst->getOperand(0))->getZExtValue(); + outputMeta.U64All[1] = cast(inOutMetaConst->getOperand(1))->getZExtValue(); + + // When enabling line smooth, alpha channel will be patched with a scaling factor. + if (!outputMeta.IsBuiltIn && outputMeta.NumComponents == 4 && alphaScaleVal) { + Value *outputValue = m_builder->CreateLoad(valTy, val); + Value *scaledAlpha = m_builder->CreateExtractElement(outputValue, 3); + Value *alphaScaleFactor = m_builder->CreateLoad(m_builder->getFloatTy(), alphaScaleVal); + scaledAlpha = m_builder->CreateFMul(alphaScaleFactor, scaledAlpha); + outputValue = m_builder->CreateInsertElement(outputValue, scaledAlpha, m_builder->getInt32(3)); + m_builder->CreateStore(outputValue, val); + } + } +} + +// ===================================================================================================================== +// Emulate for point/line smooth and line/polygon stipple. +void LowerGLCompatibility::emulateSmoothStipple() { + auto options = m_context->getPipelineContext()->getPipelineOptions(); + auto pipelineBuildInfo = static_cast(m_context->getPipelineBuildInfo()); + bool needYInvert = pipelineBuildInfo->getGlState().originUpperLeft; + m_builder->SetInsertPointPastAllocas(m_entryPoint); + // Acquire FragCoord. + Value *fragCoord = m_builder->CreateReadBuiltInInput(lgc::BuiltInKind::BuiltInFragCoord); + // Acquire PrimType. + // 0 : point. + // 1 : line. + // 2 : triangle. + // 3 : rectangle. + // PrimType (i32) : comes from HW PS Input : ANCILLARY_ENA - Prim Type[1:0] + Value *primType = m_builder->CreateReadBuiltInInput(lgc::BuiltInKind::BuiltInPrimType); + + // 1. Patch Polygon Stipple. + if (options->getGlState().enablePolygonStipple) { + constexpr uint32_t PolygonStippleSize = 32; // For Y Invert. + + // If this is in triangle mode, skip emulation. + Value *isTriangle = m_builder->CreateICmpUGT(primType, m_builder->getInt32(1)); + m_builder->SetInsertPoint(SplitBlockAndInsertIfThen(isTriangle, m_builder->GetInsertPoint(), false)); + + Value *calcFragCoord = m_builder->CreateFPToUI(fragCoord, FixedVectorType::get(m_builder->getInt32Ty(), 4)); + Value *calcFragCoordX = m_builder->CreateExtractElement(calcFragCoord, m_builder->getInt32(0)); + Value *calcFragCoordY = m_builder->CreateExtractElement(calcFragCoord, m_builder->getInt32(1)); + Value *bufferDesc = m_builder->create( + Vkgc::InternalDescriptorSetId, Vkgc::InternalBinding::PixelOpInternalBinding, m_builder->getInt32(0), + lgc::Builder::BufferFlagNonConst); + + // For Y Invert + if (needYInvert) { + Value *winSizeOffset = + m_builder->CreateInBoundsGEP(m_builder->getInt32Ty(), bufferDesc, m_builder->getInt32(PolygonStippleSize)); + winSizeOffset = m_builder->CreateLoad(m_builder->getInt32Ty(), winSizeOffset); + calcFragCoordY = m_builder->CreateSub(winSizeOffset, calcFragCoordY); + } + + // active = ( x % 32 ) & ( y % 32 ) + // HW load polygon stipple pattern in right order in Bytes here, y offset doesn't need to be reverted. + Value *yOffset = m_builder->CreateAnd(calcFragCoordY, m_builder->getInt32(0x1fu)); + Value *descPtr = m_builder->CreateInBoundsGEP(m_builder->getInt32Ty(), bufferDesc, yOffset); + Value *stipplePattern = m_builder->CreateLoad(m_builder->getInt32Ty(), descPtr); + + // xOffset = ( x % 32 ) / 8 + Value *xOffset = m_builder->CreateAnd(calcFragCoordX, m_builder->getInt32(0x18u)); + // xInByteOffset = x % 8 + Value *xInByteOffset = m_builder->CreateAnd(calcFragCoordX, m_builder->getInt32(0x7u)); + // xInByteOffset = 7 - xInByteOffset + // Due to concern with default turned on option LsbFirst, x bits are in reverse order within each 8 bits pattern. + if (pipelineBuildInfo->glState.enableBitmapLsb) { + xInByteOffset = m_builder->CreateSub(m_builder->getInt32(0x7u), xInByteOffset); + } + // xOffset = xInByteOffset + xOffset + xOffset = m_builder->CreateAdd(xOffset, xInByteOffset); + + Value *shouldDiscard = m_builder->CreateExtractBitField(stipplePattern, xOffset, m_builder->getInt32(1), false); + shouldDiscard = m_builder->CreateICmpEQ(shouldDiscard, m_builder->getInt32(0)); + m_builder->SetInsertPoint(SplitBlockAndInsertIfThen(shouldDiscard, m_builder->GetInsertPoint(), false)); + m_builder->CreateKill(); + } + + // 2. Patch Line Smooth. + if (options->getGlState().enableLineSmooth) { + Value *isLine = m_builder->CreateICmpEQ(primType, m_builder->getInt32(1)); + Value *alphaScaleVal = m_builder->CreateAllocaAtFuncEntry(m_builder->getFloatTy(), "patchAlphaScale"); + m_builder->CreateStore(ConstantFP::get(m_builder->getFloatTy(), 1.0), alphaScaleVal); + m_builder->SetInsertPoint(SplitBlockAndInsertIfThen(isLine, m_builder->GetInsertPoint(), false)); + + // Get const for line smooth + Value *lineSmoothConstArr[4]; + for (uint32_t i = 0; i < 4; i++) + lineSmoothConstArr[i] = ConstantFP::get(m_builder->getFloatTy(), pipelineBuildInfo->getGlState().lineSmooth[i]); + + // Emulate line stipple with wide AA line + if (options->getGlState().emulateWideLineStipple) { + // LineStipple (f32) is read from SPIA:LINE_STIPPLE_TEX_ENA + Value *lineStipple = m_builder->CreateReadBuiltInInput(lgc::BuiltInKind::BuiltInLineStipple); + Value *lineStippleScale = lineSmoothConstArr[2]; + Value *lineStipplePattern = m_builder->CreateBitCast(lineSmoothConstArr[3], m_builder->getInt32Ty()); + + Value *result = m_builder->CreateFMul(lineStipple, lineStippleScale); + result = m_builder->CreateFPToSI(result, m_builder->getInt32Ty()); + result = m_builder->CreateAnd(result, m_builder->getInt32(15)); + result = m_builder->CreateShl(m_builder->getInt32(1), result); + // lineSmooth[3] is the line stipple pattern, it is integer in memory. + result = m_builder->CreateAnd(result, lineStipplePattern); + Value *shouldDiscard = m_builder->CreateICmpEQ(result, m_builder->getInt32(0)); + m_builder->SetInsertPoint(SplitBlockAndInsertIfThen(shouldDiscard, m_builder->GetInsertPoint(), false)); + m_builder->CreateKill(); + } + + // Primitive Coord (fp32vec2) + Value *primCoord = m_builder->CreateReadBuiltInInput(lgc::BuiltInKind::BuiltInPrimCoord); + Value *negHalfLineWidth = m_builder->CreateFNeg(lineSmoothConstArr[0]); + Value *lineWidth = m_builder->CreateFMul(lineSmoothConstArr[0], ConstantFP::get(m_builder->getFloatTy(), 2.0)); + Value *alphaBias = lineSmoothConstArr[1]; + + primCoord = m_builder->CreateExtractElement(primCoord, 1); + Value *scaledVal = m_builder->CreateFma(primCoord, lineWidth, negHalfLineWidth); + // Recalculate alpha scale value which will be inserted into frag color's alpha channel, when doing smooth. + scaledVal = m_builder->CreateIntrinsic(Intrinsic::fabs, scaledVal->getType(), scaledVal); + scaledVal = m_builder->CreateFSub(alphaBias, scaledVal); + m_builder->CreateStore(scaledVal, alphaScaleVal); + + m_builder->SetInsertPoint(m_retInst); + for (GlobalVariable &global : m_module->globals()) { + auto addrSpace = global.getType()->getAddressSpace(); + if (addrSpace == SPIRAS_Output) { + auto outputMetaVal = mdconst::extract(global.getMetadata(gSPIRVMD::InOut)->getOperand(0)); + patchAlphaScaling(&global, global.getValueType(), outputMetaVal, alphaScaleVal); + } + } + } + + // 3. Patch Point Smooth. + if (options->getGlState().enablePointSmooth) { + Value *isPoint = m_builder->CreateICmpEQ(primType, m_builder->getInt32(0)); + Value *alphaScaleVal = m_builder->CreateAllocaAtFuncEntry(m_builder->getFloatTy(), "patchAlphaScale"); + m_builder->CreateStore(ConstantFP::get(m_builder->getFloatTy(), 1.0), alphaScaleVal); + m_builder->SetInsertPoint(SplitBlockAndInsertIfThen(isPoint, m_builder->GetInsertPoint(), false)); + // Primitive Coord (fp32vec2) + Value *primCoord = + m_builder->CreateReadBuiltInInput(lgc::BuiltInKind::BuiltInPrimCoord); // Get const for line smooth + + Value *pointSmoothConstArr[2]; + for (uint32_t i = 0; i < 2; i++) + pointSmoothConstArr[i] = ConstantFP::get(m_builder->getFloatTy(), pipelineBuildInfo->getGlState().pointSmooth[i]); + + Value *halfPointSize = pointSmoothConstArr[0]; + Value *alphaBias = pointSmoothConstArr[1]; + + Value *negHalfPointSize = m_builder->CreateFNeg(halfPointSize); + Value *negHalfPointSizeVal = PoisonValue::get(FixedVectorType::get(m_builder->getFloatTy(), 2)); + negHalfPointSizeVal = m_builder->CreateInsertElement(negHalfPointSizeVal, negHalfPointSize, m_builder->getInt32(0)); + negHalfPointSizeVal = m_builder->CreateInsertElement(negHalfPointSizeVal, negHalfPointSize, m_builder->getInt32(1)); + Value *pointSize = m_builder->CreateFMul(halfPointSize, ConstantFP::get(m_builder->getFloatTy(), 2.0)); + Value *pointSizeVal = PoisonValue::get(FixedVectorType::get(m_builder->getFloatTy(), 2)); + pointSizeVal = m_builder->CreateInsertElement(pointSizeVal, pointSize, m_builder->getInt32(0)); + pointSizeVal = m_builder->CreateInsertElement(pointSizeVal, pointSize, m_builder->getInt32(1)); + + Value *scaledVal = m_builder->CreateFma(primCoord, pointSizeVal, negHalfPointSizeVal); + Value *alphaScale = m_builder->CreateDotProduct(scaledVal, scaledVal); + alphaScale = m_builder->CreateSqrt(alphaScale); + alphaScale = m_builder->CreateFSub(halfPointSize, alphaScale); + Value *discard = m_builder->CreateFCmpULT(alphaScale, ConstantFP::get(m_builder->getFloatTy(), 0)); + Instruction *InsertI = &*m_builder->GetInsertPoint(); + Instruction *thenInst = nullptr; + Instruction *elseInst = nullptr; + SplitBlockAndInsertIfThenElse(discard, InsertI, &thenInst, &elseInst); + m_builder->SetInsertPoint(thenInst); + m_builder->CreateKill(); + m_builder->SetInsertPoint(elseInst); + alphaScale = m_builder->CreateFAdd(alphaScale, alphaBias); + m_builder->CreateStore(alphaScale, alphaScaleVal); + + m_builder->SetInsertPoint(m_retInst); + for (GlobalVariable &global : m_module->globals()) { + auto addrSpace = global.getType()->getAddressSpace(); + if (addrSpace == SPIRAS_Output) { + auto outputMetaVal = mdconst::extract(global.getMetadata(gSPIRVMD::InOut)->getOperand(0)); + patchAlphaScaling(&global, global.getValueType(), outputMetaVal, alphaScaleVal); + } + } + } +} + // ===================================================================================================================== // Does lowering operations for GLSL variable "gl_ClipVertex". void LowerGLCompatibility::lowerClipVertex() { diff --git a/llpc/lower/LowerGLCompatibility.h b/llpc/lower/LowerGLCompatibility.h index cda79dbc91..1a4d19f433 100644 --- a/llpc/lower/LowerGLCompatibility.h +++ b/llpc/lower/LowerGLCompatibility.h @@ -33,7 +33,9 @@ #include "SPIRVInternal.h" #include "llpcSpirvLower.h" #include "lgc/Builder.h" +#include "lgc/LgcDialect.h" #include "llvm/IR/PassManager.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" namespace Llpc { @@ -67,6 +69,7 @@ class LowerGLCompatibility : public SpirvLower, public llvm::PassInfoMixingetPipelineContext()->getRayTracingState(); ComputeShaderMode mode = {}; - mode.workgroupSizeX = rtState->threadGroupSizeX; - mode.workgroupSizeY = rtState->threadGroupSizeY; - mode.workgroupSizeZ = rtState->threadGroupSizeZ; + // NOTE: For continuations, we only support flatten threadgroup (more precisely, numthreads(32, 1, 1)) so far. + assert(rtState->dispatchRaysThreadGroupSize == 32); + mode.workgroupSizeX = rtState->dispatchRaysThreadGroupSize; + mode.workgroupSizeY = 1; + mode.workgroupSizeZ = 1; mode.noLocalInvocationIdInCalls = true; Pipeline::setComputeShaderMode(module, mode); module.getOrInsertNamedMetadata(ContHelper::MDLgcCpsModuleName); diff --git a/llpc/lower/ProcessGfxRuntimeLibrary.cpp b/llpc/lower/ProcessGfxRuntimeLibrary.cpp index 3dac36e0a2..64add0fc3e 100644 --- a/llpc/lower/ProcessGfxRuntimeLibrary.cpp +++ b/llpc/lower/ProcessGfxRuntimeLibrary.cpp @@ -29,12 +29,13 @@ *********************************************************************************************************************** */ #include "ProcessGfxRuntimeLibrary.h" +#include "compilerutils/ArgPromotion.h" +#include "compilerutils/TypesMetadata.h" #include "llpcSpirvLowerInternalLibraryIntrinsicUtil.h" #include "llpcSpirvLowerUtil.h" -#include "llvmraytracing/Continuations.h" -#include "llvmraytracing/ContinuationsUtil.h" #include "lgc/Builder.h" #include "llvm/ADT/SmallBitVector.h" +#include "llvm/IR/Module.h" #define DEBUG_TYPE "process-gfxruntime-library" using namespace lgc; @@ -82,12 +83,12 @@ void ProcessGfxRuntimeLibrary::processLibraryFunction(Function *&func) { SmallBitVector promotionMask(func->arg_size()); for (unsigned argId = 0; argId < func->arg_size(); ++argId) { auto *arg = func->getArg(argId); - ContArgTy argTy = ContArgTy::get(func, arg); + TypedArgTy argTy = TypedArgTy::get(arg); if (!argTy.isPointerTy()) continue; promotionMask.set(argId); } - func = promotePointerArguments(func, promotionMask); + func = CompilerUtils::promotePointerArguments(func, promotionMask); return; } diff --git a/llpc/lower/llpcSpirvLower.cpp b/llpc/lower/llpcSpirvLower.cpp index d6738dc96f..d0958d69ac 100644 --- a/llpc/lower/llpcSpirvLower.cpp +++ b/llpc/lower/llpcSpirvLower.cpp @@ -33,6 +33,7 @@ #include "LowerPostInline.h" #include "llpcContext.h" #include "llpcDebug.h" +#include "llpcRayTracingContext.h" #include "llpcSpirvLowerAccessChain.h" #include "llpcSpirvLowerCfgMerges.h" #include "llpcSpirvLowerConstImmediateStore.h" @@ -182,8 +183,7 @@ void SpirvLower::addPasses(Context *context, ShaderStage stage, lgc::PassManager // And do inlining after SpirvLowerRayTracing as it will produce some extra functions. if (lowerFlag.isRayTracing) { assert(context->getPipelineType() == PipelineType::RayTracing); - auto *pipelineInfo = static_cast(context->getPipelineBuildInfo()); - if (pipelineInfo->mode != Vkgc::LlpcRaytracingMode::Continuations) { + if (!static_cast(context->getPipelineContext())->isContinuationsMode()) { passMgr.addPass(SpirvLowerRayTracing()); passMgr.addPass(AlwaysInlinerPass()); } @@ -215,7 +215,6 @@ void SpirvLower::addPasses(Context *context, ShaderStage stage, lgc::PassManager // @param [in/out] passMgr : Pass manager void SpirvLower::registerTranslationPasses(lgc::PassManager &passMgr) { passMgr.registerPass("llpc-spirv-lower-translator", SpirvLowerTranslator::name()); - passMgr.registerPass("llpc-spirv-lower-ray-query", SpirvLowerRayQuery::name()); passMgr.registerPass("llpc-spirv-lower-gpurt-library", SpirvProcessGpuRtLibrary::name()); } diff --git a/llpc/lower/llpcSpirvLowerCfgMerges.h b/llpc/lower/llpcSpirvLowerCfgMerges.h index b4549166db..112dfbbaa5 100644 --- a/llpc/lower/llpcSpirvLowerCfgMerges.h +++ b/llpc/lower/llpcSpirvLowerCfgMerges.h @@ -31,8 +31,13 @@ #pragma once #include "llpcSpirvLower.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/IR/PassManager.h" +namespace llvm { +class Value; +} // namespace llvm + namespace Llpc { // ===================================================================================================================== diff --git a/llpc/lower/llpcSpirvLowerConstImmediateStore.h b/llpc/lower/llpcSpirvLowerConstImmediateStore.h index b506268dda..8fa3fd9bac 100644 --- a/llpc/lower/llpcSpirvLowerConstImmediateStore.h +++ b/llpc/lower/llpcSpirvLowerConstImmediateStore.h @@ -36,6 +36,7 @@ namespace llvm { class AllocaInst; class StoreInst; +class Value; } // namespace llvm namespace Llpc { diff --git a/llpc/lower/llpcSpirvLowerCooperativeMatrix.cpp b/llpc/lower/llpcSpirvLowerCooperativeMatrix.cpp index a4524ee56c..a1dedf2ad4 100644 --- a/llpc/lower/llpcSpirvLowerCooperativeMatrix.cpp +++ b/llpc/lower/llpcSpirvLowerCooperativeMatrix.cpp @@ -37,6 +37,7 @@ #include "lgc/BuilderCommon.h" #include "lgc/LgcDialect.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" #define DEBUG_TYPE "llpc-spirv-lower-cooperative-matrix" diff --git a/llpc/lower/llpcSpirvLowerGlobal.cpp b/llpc/lower/llpcSpirvLowerGlobal.cpp index bc3233c22d..1a0b7b9e2f 100644 --- a/llpc/lower/llpcSpirvLowerGlobal.cpp +++ b/llpc/lower/llpcSpirvLowerGlobal.cpp @@ -31,7 +31,7 @@ #include "llpcSpirvLowerGlobal.h" #include "SPIRVInternal.h" #include "compilerutils/CompilerUtils.h" -#include "continuations/ContinuationsUtil.h" +#include "compilerutils/TypesMetadata.h" #include "llpcContext.h" #include "llpcDebug.h" #include "llpcGraphicsContext.h" @@ -515,7 +515,7 @@ void SpirvLowerGlobal::lowerInOut(llvm::GlobalVariable *globalVar) { ty = m_builder->getInt64Ty(); MDNode *metaNode = globalVar->getMetadata(gSPIRVMD::InOut); assert(metaNode); - auto meta = mdconst::dyn_extract(metaNode->getOperand(0)); + auto meta = mdconst::extract(metaNode->getOperand(0)); m_builder->SetInsertPointPastAllocas(m_entryPoint); Value *proxy = m_builder->CreateAlloca(ty, dataLayout.getAllocaAddrSpace(), nullptr, @@ -581,12 +581,8 @@ void SpirvLowerGlobal::lowerInOutUsersInPlace(llvm::GlobalVariable *globalVar, l Instruction *inst = cast(user); if (auto *gep = dyn_cast(inst)) { - // We currently expect that GEPs are only used on the global variable directly, with the global variable's type. - // The SpirvLowerAccessChain pass ensures this. - // // TODO: As LLVM is moving away from GEPs towards ptradds, we need a better solution, probably by adding our // own "structured GEP" operation. - assert(current == globalVar && gep->getSourceElementType() == globalVar->getValueType()); assert(cast(gep->idx_begin()[0])->isNullValue()); for (unsigned i = 1, e = gep->getNumIndices(); i < e; ++i) @@ -608,7 +604,7 @@ void SpirvLowerGlobal::lowerInOutUsersInPlace(llvm::GlobalVariable *globalVar, l MDNode *metaNode = globalVar->getMetadata(gSPIRVMD::InOut); assert(metaNode); - auto inOutMetaVal = mdconst::dyn_extract(metaNode->getOperand(0)); + auto inOutMetaVal = mdconst::extract(metaNode->getOperand(0)); auto indexOperands = ArrayRef(indexStack); @@ -1480,7 +1476,7 @@ void SpirvLowerGlobal::lowerBufferBlock() { MDNode *blockMetaNode = global.getMetadata(gSPIRVMD::Block); if (blockMetaNode) { ShaderBlockMetadata blockMeta = {}; - auto blockMetaNodeVal = mdconst::dyn_extract(blockMetaNode->getOperand(0)); + auto blockMetaNodeVal = mdconst::extract(blockMetaNode->getOperand(0)); if (auto meta = dyn_cast(blockMetaNodeVal)) { blockMeta.U64All = meta->getZExtValue(); } else if (auto metaStruct = dyn_cast(blockMetaNodeVal)) { @@ -1497,8 +1493,8 @@ void SpirvLowerGlobal::lowerBufferBlock() { MDNode *const resMetaNode = global.getMetadata(gSPIRVMD::Resource); assert(resMetaNode); - const unsigned descSet = mdconst::dyn_extract(resMetaNode->getOperand(0))->getZExtValue(); - const unsigned binding = mdconst::dyn_extract(resMetaNode->getOperand(1))->getZExtValue(); + const unsigned descSet = mdconst::extract(resMetaNode->getOperand(0))->getZExtValue(); + const unsigned binding = mdconst::extract(resMetaNode->getOperand(1))->getZExtValue(); // AtomicCounter is emulated following same impl of SSBO, only qualifier 'offset' will be used in its // MD now. Using a new MD kind to detect it. AtomicCounter's type should be uint, not a structure. @@ -1507,7 +1503,7 @@ void SpirvLowerGlobal::lowerBufferBlock() { ShaderBlockMetadata atomicCounterMeta = {}; if (atomicCounterMD) { atomicCounterMeta.U64All = - cast(mdconst::dyn_extract(atomicCounterMD->getOperand(0)))->getZExtValue(); + cast(mdconst::extract(atomicCounterMD->getOperand(0)))->getZExtValue(); } convertUsersOfConstantsToInstructions(&global); @@ -1698,8 +1694,8 @@ void SpirvLowerGlobal::lowerBufferBlock() { MDNode *const resMetaNode1 = globals[nextGlobalIdx]->getMetadata(gSPIRVMD::Resource); assert(resMetaNode); - descSets[1] = mdconst::dyn_extract(resMetaNode1->getOperand(0))->getZExtValue(); - bindings[1] = mdconst::dyn_extract(resMetaNode1->getOperand(1))->getZExtValue(); + descSets[1] = mdconst::extract(resMetaNode1->getOperand(0))->getZExtValue(); + bindings[1] = mdconst::extract(resMetaNode1->getOperand(1))->getZExtValue(); if (!nextGlobalIdx) { std::swap(descSets[0], descSets[1]); @@ -1827,7 +1823,7 @@ void SpirvLowerGlobal::lowerAliasedVal() { auto meta = global.getMetadata(gSPIRVMD::Lds); if (!meta) return; - const unsigned aliased = mdconst::dyn_extract(meta->getOperand(0))->getZExtValue(); + const unsigned aliased = mdconst::extract(meta->getOperand(0))->getZExtValue(); if (aliased) { unsigned inBits = static_cast(m_module->getDataLayout().getTypeSizeInBits(global.getValueType())); if (inBits > maxInBits) { @@ -1971,9 +1967,9 @@ void SpirvLowerGlobal::lowerUniformConstants() { for (auto &eachFunc : globalUsers) { MDNode *metaNode = global.getMetadata(gSPIRVMD::UniformConstant); - auto uniformConstantsSet = mdconst::dyn_extract(metaNode->getOperand(0))->getZExtValue(); - auto uniformConstantsBinding = mdconst::dyn_extract(metaNode->getOperand(1))->getZExtValue(); - auto uniformConstantsOffset = mdconst::dyn_extract(metaNode->getOperand(2))->getZExtValue(); + auto uniformConstantsSet = mdconst::extract(metaNode->getOperand(0))->getZExtValue(); + auto uniformConstantsBinding = mdconst::extract(metaNode->getOperand(1))->getZExtValue(); + auto uniformConstantsOffset = mdconst::extract(metaNode->getOperand(2))->getZExtValue(); m_builder->SetInsertPointPastAllocas(eachFunc.first); Value *bufferDesc = m_builder->create( @@ -2011,7 +2007,7 @@ Value *SpirvLowerGlobal::interpolateInputElement(Type *returnTy, unsigned interp MDNode *metaNode = gv->getMetadata(gSPIRVMD::InOut); assert(metaNode); - auto inputMeta = mdconst::dyn_extract(metaNode->getOperand(0)); + auto inputMeta = mdconst::extract(metaNode->getOperand(0)); auto hasAllConstantIndices = [](ArrayRef &indexOperands) { // if indexOperands is empty then add_of will return TRUE. @@ -2186,7 +2182,7 @@ void SpirvLowerGlobal::handleVolatileInput(GlobalVariable *input, Value *proxy) MDNode *metaNode = input->getMetadata(gSPIRVMD::InOut); assert(metaNode); - auto meta = mdconst::dyn_extract(metaNode->getOperand(0)); + auto meta = mdconst::extract(metaNode->getOperand(0)); ShaderInOutMetadata inOutMeta = {}; inOutMeta.U64All[0] = cast(meta->getOperand(0))->getZExtValue(); @@ -2320,19 +2316,22 @@ void SpirvLowerGlobal::changeRtFunctionSignature() { } if (rayTracingContext->isContinuationsMode()) { - SmallVector contArgTys; - - auto var = m_shaderStage == ShaderStageRayTracingCallable ? incomingCallableDataVar : incomingPayloadVar; - auto payloadTy = var ? var->getValueType() : StructType::get(*m_context); - if (!isa(payloadTy)) - payloadTy = StructType::get(*m_context, {payloadTy}, false); - contArgTys.push_back(ContArgTy(pointerTy, payloadTy)); - if ((m_shaderStage == ShaderStageRayTracingAnyHit) || (m_shaderStage == ShaderStageRayTracingClosestHit)) { - auto type = ArrayType::get(m_builder->getInt32Ty(), rayTracingContext->getAttributeDataSize()); - contArgTys.push_back(ContArgTy(pointerTy, type)); + SmallVector contArgTys; + + // We don't have hit attribute in argument for IS in continuations mode. + if (m_shaderStage != ShaderStageRayTracingIntersect) { + auto var = m_shaderStage == ShaderStageRayTracingCallable ? incomingCallableDataVar : incomingPayloadVar; + auto payloadTy = var ? var->getValueType() : StructType::get(*m_context); + if (!isa(payloadTy)) + payloadTy = StructType::get(*m_context, {payloadTy}, false); + contArgTys.push_back(TypedArgTy(pointerTy, payloadTy)); + if ((m_shaderStage == ShaderStageRayTracingAnyHit) || (m_shaderStage == ShaderStageRayTracingClosestHit)) { + auto type = ArrayType::get(m_builder->getInt32Ty(), rayTracingContext->getAttributeDataSize()); + contArgTys.push_back(TypedArgTy(pointerTy, type)); + } } - ContFuncTy contFuncTy(m_builder->getVoidTy(), contArgTys); + TypedFuncTy contFuncTy(m_builder->getVoidTy(), contArgTys); contFuncTy.writeMetadata(newFunc); } diff --git a/llpc/lower/llpcSpirvLowerMath.cpp b/llpc/lower/llpcSpirvLowerMath.cpp index 6edff999b6..44c07f5413 100644 --- a/llpc/lower/llpcSpirvLowerMath.cpp +++ b/llpc/lower/llpcSpirvLowerMath.cpp @@ -39,7 +39,6 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/Debug.h" @@ -53,7 +52,6 @@ using namespace lgc; using namespace llvm; -using namespace PatternMatch; using namespace SPIRV; using namespace Llpc; @@ -532,115 +530,6 @@ void SpirvLowerMathFloatOp::visitBinaryOperator(BinaryOperator &binaryOp) { return; } } - - // Replace mul with amdgcn_fmul_legacy intrinsic when detect patterns like: - // ((b==0.0 ? 0.0 : a) * (a==0.0 ? 0.0 : b)) - if (opCode == Instruction::FMul) { - emitFFmulzInst(binaryOp); - } -} - -// ===================================================================================================================== -// Replace mul with amdgcn_fmul_legacy intrinsic when detect patterns like: -// ((b==0.0 ? 0.0 : a) * (a==0.0 ? 0.0 : b)) -// @param binaryOp : Binary operator instruction -void SpirvLowerMathFloatOp::emitFFmulzInst(BinaryOperator &binaryOp) { - auto src1 = binaryOp.getOperand(0); - auto src2 = binaryOp.getOperand(1); - FastMathFlags fastMathFlags = binaryOp.getFastMathFlags(); - auto matchValue = isMulDx9Zero(src1, src2, fastMathFlags); - if (matchValue != std::nullopt) { - IRBuilder<> builder(*m_context); - builder.SetInsertPoint(&binaryOp); - builder.setFastMathFlags(binaryOp.getFastMathFlags()); - Value *transformSrc1 = matchValue->first; - Value *transformSrc2 = matchValue->second; - Value *fmulzResult = builder.CreateIntrinsic(Intrinsic::amdgcn_fmul_legacy, {}, {transformSrc1, transformSrc2}); - - m_changed = true; - binaryOp.replaceAllUsesWith(fmulzResult); - binaryOp.dropAllReferences(); - binaryOp.eraseFromParent(); - } -} - -// ===================================================================================================================== -// Replace fma with amdgcn_fma_legacy intrinsic when detect patterns like: -// fma((b==0.0 ? 0.0 : a), (a==0.0 ? 0.0 : b), c) -// @param inst : Instruction to be replaced if needed -void SpirvLowerMathFloatOp::emitFFmazInst(Instruction *inst) { - assert(inst); - CallInst *fmaCallInst = dyn_cast(inst); - Value *src1 = fmaCallInst->getArgOperand(0); - Value *src2 = fmaCallInst->getArgOperand(1); - FastMathFlags fastMathFlags = inst->getFastMathFlags(); - auto matchValue = isMulDx9Zero(src1, src2, fastMathFlags); - if (matchValue != std::nullopt) { - IRBuilder<> builder(*m_context); - builder.SetInsertPoint(inst); - builder.setFastMathFlags(inst->getFastMathFlags()); - Value *transformSrc1 = matchValue->first; - Value *transformSrc2 = matchValue->second; - Value *src3 = fmaCallInst->getArgOperand(2); - Value *ffmazResult = - builder.CreateIntrinsic(Intrinsic::amdgcn_fma_legacy, {}, {transformSrc1, transformSrc2, src3}); - - m_changed = true; - inst->replaceAllUsesWith(ffmazResult); - inst->dropAllReferences(); - inst->eraseFromParent(); - } -} - -// ===================================================================================================================== -// Checks whether a multiply of lhs with rhs using the given fast-math flags can be transformed into a multiply -// with DX9 zero semantics. If so, returns a pair of operands for the new multiply. -// @param lhs : left operand for the operation -// @param rhs: right operand for the operation -// @param fastMathFlags: fastmath flags for the opreration -std::optional> SpirvLowerMathFloatOp::isMulDx9Zero(Value *lhs, Value *rhs, - FastMathFlags fastMathFlags) { - Value *lhsCmpValue = nullptr; - Value *lhsFalseValue = nullptr; - Value *rhsCmpValue = nullptr; - Value *rhsFalseValue = nullptr; - FCmpInst::Predicate pred = FCmpInst::FCMP_OEQ; - - // If the fast math flags might have INFs, when a = intf then a == 0 ? 0.0 : b is b and a * b = inf * 0 = nan - // This is incorrect so it needs to add related check here - if (!fastMathFlags.noInfs()) - return std::nullopt; - - // Only transform for float32. - if (!(lhs->getType()->isFloatTy() && rhs->getType()->isFloatTy())) - return std::nullopt; - - // Detect whether A = (b==0.0 ? 0.0 : a) and parse out b and a - bool lhsMatch = - match(lhs, m_Select(m_FCmp(pred, m_Value(lhsCmpValue), m_AnyZeroFP()), m_Zero(), m_Value(lhsFalseValue))); - // Detect whether B = (a'==0.0 ? 0.0 : b') and output a' and b' - bool rhsMatch = - match(rhs, m_Select(m_FCmp(pred, m_Value(rhsCmpValue), m_AnyZeroFP()), m_Zero(), m_Value(rhsFalseValue))); - - // If b == b' && a == a' then use fmul_legacy(a,b) instead of fmul(A,B) - if (lhsMatch && rhsMatch && (lhsCmpValue == rhsFalseValue) && (rhsCmpValue == lhsFalseValue)) { - return std::make_pair(lhsFalseValue, rhsFalseValue); - } - if (lhsMatch && (lhsCmpValue == rhs)) { - if (auto *constLhsFalseValue = dyn_cast(lhsFalseValue); - constLhsFalseValue && !constLhsFalseValue->isZero()) { - // Detect pattern: ((b==0.0 ? 0.0 : a) * b) when a is constant but not zero. - return std::make_pair(lhsFalseValue, rhs); - } - } - if (rhsMatch && (lhs == rhsCmpValue)) { - if (auto *constRhsFalseValue = dyn_cast(rhsFalseValue); - constRhsFalseValue && !constRhsFalseValue->isZero()) { - // Detect pattern: (a * (a==0.0 ? 0.0 : b)) when b is constant but not zero. - return std::make_pair(lhs, rhsFalseValue); - } - } - return std::nullopt; } // ===================================================================================================================== @@ -656,13 +545,6 @@ void SpirvLowerMathFloatOp::visitCallInst(CallInst &callInst) { // NOTE: FABS will be optimized by backend compiler with sign bit removed via AND. flushDenormIfNeeded(&callInst); } - - // Replace fma with amdgcn_fma_legacy intrinsic when detect patterns like: - // fma((b==0.0 ? 0.0 : a), (a==0.0 ? 0.0 : b), c) - auto mangledName = callee->getName(); - if (mangledName.starts_with("lgc.create.fma")) { - emitFFmazInst(&callInst); - } } // ===================================================================================================================== diff --git a/llpc/lower/llpcSpirvLowerMath.h b/llpc/lower/llpcSpirvLowerMath.h index 6b41e51d5c..5899096e5a 100644 --- a/llpc/lower/llpcSpirvLowerMath.h +++ b/llpc/lower/llpcSpirvLowerMath.h @@ -92,10 +92,6 @@ class SpirvLowerMathFloatOp : public SpirvLowerMath, virtual void visitBinaryOperator(llvm::BinaryOperator &binaryOp); virtual void visitCallInst(llvm::CallInst &callInst); virtual void visitFPTruncInst(llvm::FPTruncInst &fptruncInst); - void emitFFmulzInst(llvm::BinaryOperator &binaryOp); - void emitFFmazInst(llvm::Instruction *inst); - std::optional> isMulDx9Zero(llvm::Value *lhs, llvm::Value *rhs, - llvm::FastMathFlags flags); static llvm::StringRef name() { return "Lower SPIR-V math floating point optimisation"; } }; diff --git a/llpc/lower/llpcSpirvLowerMemoryOp.cpp b/llpc/lower/llpcSpirvLowerMemoryOp.cpp index 8c1b205ded..35fe5b0e41 100644 --- a/llpc/lower/llpcSpirvLowerMemoryOp.cpp +++ b/llpc/lower/llpcSpirvLowerMemoryOp.cpp @@ -203,6 +203,11 @@ bool SpirvLowerMemoryOp::needExpandDynamicIndex(GetElementPtrInst *getElemPtr, u // Always expand for vector auto vectorTy = dyn_cast(indexedTy); *dynIndexBound = vectorTy->getNumElements(); + } else if (isa(indexedTy)) { + // Skip scalar integer type + // NOTE: Normal SPIR-V translation won't generate this, it may come from our internally inserted + // instructions to do pointer increment. + allowExpand = false; } else { llvm_unreachable("Should never be called!"); allowExpand = false; diff --git a/llpc/lower/llpcSpirvLowerRayQuery.cpp b/llpc/lower/llpcSpirvLowerRayQuery.cpp deleted file mode 100644 index 651c5be43b..0000000000 --- a/llpc/lower/llpcSpirvLowerRayQuery.cpp +++ /dev/null @@ -1,1374 +0,0 @@ -/* - *********************************************************************************************************************** - * - * Copyright (c) 2019-2024 Advanced Micro Devices, Inc. All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - **********************************************************************************************************************/ -/** - *********************************************************************************************************************** - * @file llpcSpirvLowerRayQuery.cpp - * @brief LLPC source file: contains implementation of class Llpc::SpirvLowerRayQuery. - *********************************************************************************************************************** - */ - -#include "llpcSpirvLowerRayQuery.h" -#include "SPIRVInternal.h" -#include "compilerutils/CompilerUtils.h" -#include "llpcContext.h" -#include "llpcSpirvLowerUtil.h" -#include "llvmraytracing/GpurtContext.h" -#include "lgc/Builder.h" -#include "lgc/GpurtDialect.h" -#include "llvm-dialects/Dialect/Visitor.h" -#include "llvm/ADT/ScopeExit.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/IntrinsicsAMDGPU.h" - -#define DEBUG_TYPE "llpc-spirv-lower-ray-query" - -using namespace spv; -using namespace llvm; -using namespace Llpc; -using namespace CompilerUtils; - -namespace SPIRV { -extern const char *MetaNameSpirvOp; -} // namespace SPIRV - -namespace RtName { -const char *LdsUsage = "LdsUsage"; -const char *PrevRayQueryObj = "PrevRayQueryObj"; -const char *RayQueryObjGen = "RayQueryObjGen"; -} // namespace RtName - -// Enum for the RayDesc -namespace RayDescParams { -enum : unsigned { - Origin = 0, // 0, Origin - TMin, // 1, T Min - Direction, // 2, Direction - TMax // 3, T Max -}; -} // namespace RayDescParams - -// Enum for the RaySystem -namespace RaySystemParams { -enum : unsigned { - CurrNodePtr = 0, // 0, Current Node pointer - RayTCurrent, // 1, T Current - InstanceNodePtr, // 2, Instance node pointer - InstanceContribution, // 3, Instance contribution - GeometryIndex, // 4, Geometry index - PrimitiveIndex, // 5, Primitive index - Barycentrics, // 6, Barycentrics - FrontFace, // 7, Front face - Origin, // 8, Ray origin - Direction // 9, Ray direction -}; -} // namespace RaySystemParams - -namespace RayQueryParams { -enum : unsigned { - BvhLo = 0, // 0, Acceleration structure address low bits - BvhHi, // 1, Acceleration structure address high bits - TopLevelBvhLo, // 2, Top level AS address low bits - TopLevelBvhHi, // 3, Top level AS address high bits - StackPtr, // 4, Stack pointer - StackPtrTop, // 5, Top Stack pointer - StackNumEntries, // 6, Stack number entries - InstNodePtr, // 7, Instance node pointer - CurrNodePtr, // 8, Current node pointer - InstanceHitContributionAndFlags, // 9, Instance hit contribution and flags - PrevNodePtr, // 10, Last node pointer - IsGoingDown, // 11, Is going down - LastInstanceNode, // 12, Last instance node - RayDesc, // 13, RayDesc structure - RayTMin, // 14, T min - RayFlags, // 15, Ray flags - InstanceInclusionMask, // 16, Instance inclusion mask - CandidateType, // 17, Candidate type - Candidate, // 18, Candidate system info - CommittedStatus, // 19, Committed status - Committed, // 20, Committed system info - CurrNodePtr2, // 21, currNodePtr2 - NumRayBoxTest, // 22, numRayBoxTest; - NumRayTriangleTest, // 23, numRayTriangleTest; - NumIterations, // 24, numIterations; - MaxStackDepth, // 25, maxStackDepth; - Clocks, // 26, clocks; - NumCandidateHits, // 27, numCandidateHits; - UnstanceIntersections, // 28, instanceIntersections; - RayQueryObj // 29, Internal ray query object handle -}; -} // namespace RayQueryParams - -// Enums for the committed status -namespace CommittedStatus { -enum : unsigned { - Nothing = 0, // Nothing hit - TriangleHit, // Triangle hit - ProceduralPrimitiveHit // Procedural hit -}; -} // namespace CommittedStatus - -// Ray query candidate intersection type values -namespace RayQueryCandidateIntersection { -enum : unsigned { - NonOpaqueTriangle = 0, // Candidate Intersection Non Opaque Triangle - Aabb, // Candidate Intersection Aabb - NonOpaqueAabb, // Candidate Intersection Non Opaque Aabb - NoDuplicateAnyHitAabb // Candidate Intersection No Duplicate Any Hit Aabb -}; -} // namespace RayQueryCandidateIntersection - -// Ray query committed intersection type values -namespace RayQueryCommittedIntersection { -enum : unsigned { - None = 0, // Committed Intersection None - Triangle, // Committed Intersection Triangle - Generated, // Committed Intersection Generated -}; -} // namespace RayQueryCommittedIntersection - -namespace Llpc { - -// ===================================================================================================================== -// Get RayDesc Type - -// @param builder : The builder to construct LLVM IR IR -Type *getRayDescTy(lgc::Builder *builder) { - - // struct RayDesc { - // vec3 origin; - // float tMin; - // vec3 direction; - // float tMax; - // }; - - LLVMContext &context = builder->getContext(); - auto floatx3Ty = FixedVectorType::get(builder->getFloatTy(), 3); - Type *rayDescTys[] = { - floatx3Ty, // origin - builder->getFloatTy(), // tMin - floatx3Ty, // direction - builder->getFloatTy(), // tMax - }; - StructType *rayDescTy = StructType::get(context, rayDescTys, false); - return rayDescTy; -} - -// ============================================================================= -// Get RayQueryInternal type -// -// @param build : The builder to construct LLVM IR -Type *getRayQueryInternalTy(lgc::Builder *builder) { - auto rayDescTy = getRayDescTy(builder); - LLVMContext &context = builder->getContext(); - - // struct RaySystemData { - // uint nodeIndex; - // float rayTCurrent; - // uint instanceNodePtr; - // uint instanceContribution; - // uint geometryIndex; - // uint primitiveIndex; - // vec2 barycentrics; - // uint frontFace; - // vec3 origin; - // vec3 direction; - // }; - - auto floatx2Ty = FixedVectorType::get(builder->getFloatTy(), 2); - auto floatx3Ty = FixedVectorType::get(builder->getFloatTy(), 3); - Type *raySystemDatas[] = { - builder->getInt32Ty(), // 0, nodeIndex - builder->getFloatTy(), // 1, rayTCurrent - builder->getInt32Ty(), // 2, instanceNodePtr - builder->getInt32Ty(), // 3, instanceContribution - builder->getInt32Ty(), // 4, geometryIndex; - builder->getInt32Ty(), // 5, primitiveIndex; - floatx2Ty, // 6, barycentrics; - builder->getInt32Ty(), // 7, frontFace; - floatx3Ty, // 8, origin; - floatx3Ty, // 9, direction; - }; - auto raySystemDataTy = StructType::get(context, raySystemDatas, false); - - // struct RayQueryInternal { - // uint bvhLo; - // uint bvhHi; - // uint topLevelBvhLo; - // uint topLevelBvhHi; - // uint stackPtr; - // uint stackPtrTop; - // uint stackNumEntries; - // uint instNodePtr; - // uint currNodePtr; - // uint instanceHitContributionAndFlags; - // uint prevNodePtr; - // uint isGoingDown; - // uint lastInstanceNode; - // RayDesc rayDesc; - // float rayTMin; - // uint rayFlags; - // uint instanceInclusionMask; - // uint candidateType; - // RaySystemData candidate; - // uint committedStatus; - // RaySystemData committed; - // uint numRayBoxTest; - // uint numRayTriangleTest; - // uint numIterations; - // uint maxStackDepth; - // uint clocks; - // uint numCandidateHits; - // uint instanceIntersections; - // uint rayqueryObj; - // }; - - Type *rayQueryInternalTys[] = { - builder->getInt32Ty(), // 0, bvhLo, - builder->getInt32Ty(), // 1, bvhHi, - builder->getInt32Ty(), // 2, topLevelBvhLo, - builder->getInt32Ty(), // 3, topLevelBvhHi, - builder->getInt32Ty(), // 4, stackPtr, - builder->getInt32Ty(), // 5, stackPtrTop, - builder->getInt32Ty(), // 6, stackNumEntries, - builder->getInt32Ty(), // 7, instNodePtr, - builder->getInt32Ty(), // 8, currNodePtr, - builder->getInt32Ty(), // 9, instanceHitContributionAndFlags, - builder->getInt32Ty(), // 10, prevNodePtr, - builder->getInt32Ty(), // 11, isGoingDown, - builder->getInt32Ty(), // 12, lastInstanceNode, - rayDescTy, // 13, rayDesc, - builder->getFloatTy(), // 14, rayTMin, - builder->getInt32Ty(), // 15, rayFlags, - builder->getInt32Ty(), // 16, instanceInclusionMask, - builder->getInt32Ty(), // 17, candidateType; - raySystemDataTy, // 18, candidate; - builder->getInt32Ty(), // 19, committedStatus; - raySystemDataTy, // 20, committed; - builder->getInt32Ty(), // 21, currNodePtr2 - builder->getInt32Ty(), // 22, numRayBoxTest; - builder->getInt32Ty(), // 23, numRayTriangleTest; - builder->getInt32Ty(), // 24, numIterations; - builder->getInt32Ty(), // 25, maxStackDepth; - builder->getInt32Ty(), // 26, clocks; - builder->getInt32Ty(), // 27, numCandidateHits; - builder->getInt32Ty(), // 28, instanceIntersections; - builder->getInt32Ty(), // 29, rayqueryObj - }; - return StructType::get(context, rayQueryInternalTys, false); -} - -// ===================================================================================================================== -SpirvLowerRayQuery::SpirvLowerRayQuery() : SpirvLowerRayQuery(false) { -} - -// ===================================================================================================================== -SpirvLowerRayQuery::SpirvLowerRayQuery(bool rayQueryLibrary) - : m_rayQueryLibrary(rayQueryLibrary), m_spirvOpMetaKindId(0), m_prevRayQueryObj(nullptr), m_rayQueryObjGen(nullptr), - m_nextTraceRayId(0) { -} - -// ===================================================================================================================== -// Executes this SPIR-V lowering pass on the specified LLVM module. -// -// @param [in/out] module : LLVM module to be run on -// @param [in/out] analysisManager : Analysis manager to use for this transformation -PreservedAnalyses SpirvLowerRayQuery::run(Module &module, ModuleAnalysisManager &analysisManager) { - m_crossModuleInliner = std::make_optional(); - auto onExit = make_scope_exit([&] { m_crossModuleInliner.reset(); }); - LLVM_DEBUG(dbgs() << "Run the pass Spirv-Lower-ray-query\n"); - SpirvLower::init(&module); - createGlobalRayQueryObj(); - createGlobalLdsUsage(); - if (!m_rayQueryLibrary) { - Instruction *insertPos = &*(m_entryPoint->begin()->getFirstNonPHIOrDbgOrAlloca()); - m_builder->SetInsertPoint(insertPos); - initGlobalVariable(); - m_spirvOpMetaKindId = m_context->getMDKindID(MetaNameSpirvOp); - for (auto funcIt = module.begin(), funcEnd = module.end(); funcIt != funcEnd;) { - Function *func = &*funcIt++; - processShaderFunction(func, getFuncOpcode(func)); - } - } - return PreservedAnalyses::none(); -} - -// ===================================================================================================================== -// Process RayQuery OpRayQueryInitializeKHR -// -// @param func : The function to create -template <> void SpirvLowerRayQuery::createRayQueryFunc(Function *func) { - // void TraceRayInlineAmdInternal( - // inout RayQueryInternal rayQuery, - // in uint accelStructLo, - // in uint accelStructHi, - // in uint constRayFlags, - // in uint rayFlags, - // in uint instanceMask, - // in RayDesc rayDesc, - // in uint rayId) - // - // void rayQueryInitializeEXT( - // rayQueryEXT q -> rayQuery, - // accelerationStructureEXT topLevel, - // uint rFlags, - // uint cullMask, - // vec3 origin, - // float tMin, - // vec3 direction, - // float tMax) - // { - // rayQuery = q - // accelStructLo = topLevel.x - // accelStructHi = topLevel.y - // instanceMask = cullMask - // rayDesc.Origin = origin - // rayDesc.Direction = direction - // rayDesc.TMin = tMin - // rayDesc.TMax = tMax - // constRayFlags = 0 - // rayFlags = rFlags - // rayId = 0 - // call TraceRayInlineAmdInternal - // } - - func->addFnAttr(Attribute::AlwaysInline); - BasicBlock *entryBlock = BasicBlock::Create(*m_context, "", func); - m_builder->SetInsertPoint(entryBlock); - - Value *zero = m_builder->getInt32(0); - Type *rayDescTy = getRayDescTy(m_builder); - auto int32x3Ty = FixedVectorType::get(m_builder->getInt32Ty(), 3); - - // traceRaysInline argument types - Type *funcArgTys[] = { - nullptr, // 0, Ray query type - m_builder->getInt32Ty(), // 1, Scene Addr low - m_builder->getInt32Ty(), // 2, Scene Addr high - m_builder->getInt32Ty(), // 3, Const ray flags - m_builder->getInt32Ty(), // 4, Ray flags - m_builder->getInt32Ty(), // 5, InstanceMask - rayDescTy, // 6, Ray desc - int32x3Ty, // 7, DispatchRay ID - }; - SmallVector traceRaysArgs(sizeof(funcArgTys) / sizeof(funcArgTys[0])); - auto argIt = func->arg_begin(); - traceRaysArgs[0] = argIt++; - for (size_t i = 1; i < traceRaysArgs.size(); ++i) - traceRaysArgs[i] = m_builder->CreateAlloca(funcArgTys[i], SPIRAS_Private); - - // NOTE: Initialize rayQuery.committed to zero, as a workaround for CTS that uses it without committed intersection. - auto rayQueryTy = getRayQueryInternalTy(m_builder); - Value *committedAddr = m_builder->CreateConstGEP2_32(rayQueryTy, traceRaysArgs[0], 0, RayQueryParams::Committed); - auto committedTy = rayQueryTy->getStructElementType(RayQueryParams::Committed); - m_builder->CreateStore(ConstantAggregateZero::get(committedTy), committedAddr); - - // Setup the rayQuery Object ID - Value *rayQueryObjId = m_builder->CreateLoad(m_builder->getInt32Ty(), m_rayQueryObjGen); - Value *rayQueryObjAddr = m_builder->CreateConstGEP2_32(rayQueryTy, traceRaysArgs[0], 0, RayQueryParams::RayQueryObj); - m_builder->CreateStore(rayQueryObjId, rayQueryObjAddr); - m_builder->CreateStore(m_builder->CreateAdd(rayQueryObjId, m_builder->getInt32(1)), m_rayQueryObjGen); - - // 1, Scene Addr low 2, Scene Addr high - Value *arg = argIt++; - Value *sceneAddLow = m_builder->CreateExtractElement(arg, uint64_t(0)); - Value *sceneAddHigh = m_builder->CreateExtractElement(arg, 1); - -#if GPURT_CLIENT_INTERFACE_MAJOR_VERSION < 34 - { - // For GPURT major version < 34, GPURT expect base address of acceleration structure being passed, which is stored - // at offset 0 of the resource. - auto gpuLowAddr = m_builder->CreateZExt(sceneAddLow, m_builder->getInt64Ty()); - auto gpuHighAddr = m_builder->CreateZExt(sceneAddHigh, m_builder->getInt64Ty()); - gpuHighAddr = m_builder->CreateShl(gpuHighAddr, m_builder->getInt64(32)); - auto gpuAddr = m_builder->CreateOr(gpuLowAddr, gpuHighAddr); - - Type *gpuAddrAsPtrTy = PointerType::get(*m_context, SPIRAS_Global); - auto loadPtr = m_builder->CreateIntToPtr(gpuAddr, gpuAddrAsPtrTy); - auto loadTy = FixedVectorType::get(Type::getInt32Ty(*m_context), 2); - - Value *loadValue = nullptr; - - if (m_context->getPipelineContext()->getPipelineOptions()->extendedRobustness.nullDescriptor) { - // We should not load from a null descriptor (if it is allowed). - // We do: - // .entry: - // ... - // %gpuAddr = ... - // %loadPtr = inttoptr %gpuAddr - // %isDescValid = icmp ne %gpuAddr, 0 - // br %isDescValid, label %.loadDescriptor, label %.continue - // - // .loadDescriptor: - // %AS = load %loadPtr - // - // .continue: - // %loadVal = phi [ %AS, %.loadDescriptor ], [ 0, %.entry ] - - BasicBlock *loadDescriptorBlock = BasicBlock::Create(*m_context, ".loadDescriptor", func); - BasicBlock *continueBlock = BasicBlock::Create(*m_context, ".continue", func); - - auto isDescValid = m_builder->CreateICmpNE(gpuAddr, m_builder->getInt64(0)); - m_builder->CreateCondBr(isDescValid, loadDescriptorBlock, continueBlock); - - m_builder->SetInsertPoint(loadDescriptorBlock); - auto accelerationStructureAddr = m_builder->CreateLoad(loadTy, loadPtr); - m_builder->CreateBr(continueBlock); - - m_builder->SetInsertPoint(continueBlock); - auto phi = m_builder->CreatePHI(loadTy, 2); - phi->addIncoming(accelerationStructureAddr, loadDescriptorBlock); - auto zero = m_builder->getInt32(0); - phi->addIncoming(ConstantVector::get({zero, zero}), entryBlock); - loadValue = phi; - } else { - loadValue = m_builder->CreateLoad(loadTy, loadPtr); - } - - sceneAddLow = m_builder->CreateExtractElement(loadValue, uint64_t(0)); - sceneAddHigh = m_builder->CreateExtractElement(loadValue, 1); - } -#endif - - m_builder->CreateStore(sceneAddLow, traceRaysArgs[1]); - m_builder->CreateStore(sceneAddHigh, traceRaysArgs[2]); - // 3, Const ray flags - m_builder->CreateStore(zero, traceRaysArgs[3]); - // 4, Ray flags - arg = argIt++; - m_builder->CreateStore(arg, traceRaysArgs[4]); - // 5, instance mask - arg = argIt++; - m_builder->CreateStore(arg, traceRaysArgs[5]); - // 6, RayDesc - Value *rayDesc = PoisonValue::get(rayDescTy); - // Insert values Origin,TMin,Direction,TMax to the RayDesc - // Origin - arg = argIt++; - rayDesc = m_builder->CreateInsertValue(rayDesc, arg, 0u); - // TMin - arg = argIt++; - rayDesc = m_builder->CreateInsertValue(rayDesc, arg, 1u); - // Direction - arg = argIt++; - rayDesc = m_builder->CreateInsertValue(rayDesc, arg, 2u); - // TMax - arg = argIt++; - rayDesc = m_builder->CreateInsertValue(rayDesc, arg, 3u); - m_builder->CreateStore(rayDesc, traceRaysArgs[6]); - // 7, Dispatch Id - m_builder->CreateStore(getDispatchId(), traceRaysArgs[7]); - - StringRef rayQueryInitialize = - m_context->getPipelineContext()->getRayTracingFunctionName(Vkgc::RT_ENTRY_TRACE_RAY_INLINE); - m_crossModuleInliner.value().inlineCall(*m_builder, getGpurtFunction(rayQueryInitialize), traceRaysArgs); - m_builder->CreateRetVoid(); - - if (m_context->getPipelineContext()->getRayTracingState()->enableRayTracingCounters) { - SmallVector tobeErased; - struct Payload { - SmallVectorImpl &tobeErased; - SpirvLowerRayQuery *self; - }; - Payload payload = {tobeErased, this}; - static auto visitor = llvm_dialects::VisitorBuilder() - .setStrategy(llvm_dialects::VisitorStrategy::ByFunctionDeclaration) - .add([](auto &payload, auto &op) { - auto builder = payload.self->m_builder; - builder->SetInsertPoint(&op); - payload.tobeErased.push_back(&op); - op.replaceAllUsesWith(builder->getInt32(payload.self->generateTraceRayStaticId())); - }) - .build(); - visitor.visit(payload, *func); - for (auto *call : tobeErased) - call->eraseFromParent(); - } -} - -// ===================================================================================================================== -// Get Dispatch Id -// -// -Value *SpirvLowerRayQuery::getDispatchId() { - Value *zero = m_builder->getInt32(0); - Value *dispatchId = nullptr; - // Local thread ID for graphics shader Stage, global thread ID for compute/raytracing shader stage - if (m_shaderStage < ShaderStageCompute) { - auto subThreadId = m_builder->CreateReadBuiltInInput(lgc::BuiltInSubgroupLocalInvocationId); - dispatchId = PoisonValue::get(FixedVectorType::get(m_builder->getInt32Ty(), 3)); - dispatchId = m_builder->CreateInsertElement(dispatchId, subThreadId, uint64_t(0)); - dispatchId = m_builder->CreateInsertElement(dispatchId, zero, 1); - dispatchId = m_builder->CreateInsertElement(dispatchId, zero, 2); - } else { - dispatchId = m_builder->CreateReadBuiltInInput(lgc::BuiltInGlobalInvocationId); - } - - return dispatchId; -} - -void SpirvLowerRayQuery::createRayQueryProceedFunc(Function *func) { - func->addFnAttr(Attribute::AlwaysInline); - BasicBlock *entryBlock = BasicBlock::Create(*m_context, "", func); - m_builder->SetInsertPoint(entryBlock); - - auto int32x3Ty = FixedVectorType::get(m_builder->getInt32Ty(), 3); - Value *constRayFlags = m_builder->CreateAlloca(m_builder->getInt32Ty(), SPIRAS_Private); - Value *threadId = m_builder->CreateAlloca(int32x3Ty, SPIRAS_Private); - - Value *zero = m_builder->getInt32(0); - Value *rayQuery = func->arg_begin(); - Type *rayQueryEltTy = getRayQueryInternalTy(m_builder); - - // Initialize ldsUsage for the shader stage - if (stageNotSupportLds(m_shaderStage)) - m_builder->CreateStore(m_builder->getInt32(0), m_ldsUsage); - else - m_builder->CreateStore(m_builder->getInt32(1), m_ldsUsage); - - // Get RayQueryObj for rayquery object comparison - Value *rayQueryObj = m_builder->CreateLoad( - m_builder->getInt32Ty(), m_builder->CreateConstGEP2_32(rayQueryEltTy, rayQuery, 0, RayQueryParams::RayQueryObj)); - Value *notEqual = - m_builder->CreateICmpNE(rayQueryObj, m_builder->CreateLoad(m_builder->getInt32Ty(), m_prevRayQueryObj)); - - Value *stackNumEntriesAddr = - m_builder->CreateConstGEP2_32(rayQueryEltTy, rayQuery, 0, RayQueryParams::StackNumEntries); - - Value *stackNumEntries = m_builder->CreateLoad(m_builder->getInt32Ty(), stackNumEntriesAddr); - stackNumEntries = m_builder->CreateSelect(notEqual, zero, stackNumEntries); - m_builder->CreateStore(stackNumEntries, stackNumEntriesAddr); - - m_builder->CreateStore(rayQueryObj, m_prevRayQueryObj); - - m_builder->CreateStore(zero, constRayFlags); - - m_builder->CreateStore(getDispatchId(), threadId); - - Value *result = m_builder->CreateNamedCall( - m_context->getPipelineContext()->getRayTracingFunctionName(Vkgc::RT_ENTRY_RAY_QUERY_PROCEED), - func->getReturnType(), {rayQuery, constRayFlags, threadId}, {Attribute::NoUnwind, Attribute::AlwaysInline}); - - m_builder->CreateStore(m_builder->getInt32(1), m_ldsUsage); - m_builder->CreateRet(result); -} - -// ===================================================================================================================== -// Process RayQuery OpRayQueryProceedKHR -// -// @param func : The function to create -template <> void SpirvLowerRayQuery::createRayQueryFunc(Function *func) { - - // bool RayQueryProceedAmdInternal( - // inout RayQueryInternal rayQuery, - // in uint constRayFlags, - // in uint3 dispatchThreadId) - - // bool rayQueryProceedEXT(rayQueryEXT q -> rayQuery) - // { - // if (stageNotSupportLds(stage)) - // ldsUsage = 0; - // else - // ldsUsage = 1; - // if (rayQuery != prevRayQueryObj) - // rayQuery.stackNumEntries = 0 - // prevRayQueryObj = rayQuery - // constRayFlags = 0 - // rayId = 0 - // bool proceed = call RayQueryProceedAmdInternal - // ldsUsage = 1; - // return proceed; - // } - - createRayQueryProceedFunc(func); -} - -// ===================================================================================================================== -// Process RayQuery OpRayQueryGetIntersectionTypeKHR -// -// @param func : The function to create -template <> void SpirvLowerRayQuery::createRayQueryFunc(Function *func) { - // uint rayQueryGetIntersectionTypeEXT(rayQueryEXT q -> rayQuery, bool committed) - // { - // if (committed) - // return q.committedStatus - // else - // return q.candidateType (return Aabb if q.candidateType is Aabb/NonOpaqueAabb/NoDuplicateAnyHitAabb) - // } - func->addFnAttr(Attribute::AlwaysInline); - BasicBlock *entryBlock = BasicBlock::Create(*m_context, "", func); - m_builder->SetInsertPoint(entryBlock); - - Value *rayQuery = func->arg_begin(); - Value *committed = func->arg_begin() + 1; - committed = m_builder->CreateTrunc(committed, m_builder->getInt1Ty()); - auto rayQueryTy = getRayQueryInternalTy(m_builder); - rayQuery = m_builder->CreateLoad(rayQueryTy, rayQuery); - auto candidateTy = m_builder->CreateExtractValue(rayQuery, RayQueryParams::CandidateType); - auto committedStatus = m_builder->CreateExtractValue(rayQuery, RayQueryParams::CommittedStatus); - Value *result = m_builder->CreateSelect(committed, committedStatus, candidateTy); - - // if (!committed && (q.candidateType)) - // result = Aabb - Value *compare = m_builder->CreateICmpUGE(result, m_builder->getInt32(RayQueryCandidateIntersection::Aabb)); - compare = m_builder->CreateAnd(compare, m_builder->CreateNot(committed)); - result = m_builder->CreateSelect(compare, m_builder->getInt32(RayQueryCandidateIntersection::Aabb), result); - - m_builder->CreateRet(result); -} - -// ===================================================================================================================== -// Process RayQuery OpRayQueryGetIntersectionTypeKHR -// -// @param func : The function to create -// @param raySystem : raySystem Parameter -Value *SpirvLowerRayQuery::createIntersectSystemValue(Function *func, unsigned raySystem) { - func->addFnAttr(Attribute::AlwaysInline); - BasicBlock *entryBlock = BasicBlock::Create(*m_context, "", func); - m_builder->SetInsertPoint(entryBlock); - - Value *rayQuery = func->arg_begin(); - Value *intersect = func->arg_begin() + 1; - intersect = m_builder->CreateTrunc(intersect, m_builder->getInt1Ty()); - auto rayQueryTy = getRayQueryInternalTy(m_builder); - rayQuery = m_builder->CreateLoad(rayQueryTy, rayQuery); - auto candidate = m_builder->CreateExtractValue(rayQuery, RayQueryParams::Candidate); - auto committed = m_builder->CreateExtractValue(rayQuery, RayQueryParams::Committed); - auto candidateVal = m_builder->CreateExtractValue(candidate, raySystem); - auto committedVal = m_builder->CreateExtractValue(committed, raySystem); - return m_builder->CreateSelect(intersect, committedVal, candidateVal); -} - -// ===================================================================================================================== -// Process RayQuery OpRayQueryGetIntersectionBarycentricsKHR -// -// @param func : The function to create -template <> void SpirvLowerRayQuery::createRayQueryFunc(Function *func) { - m_builder->CreateRet(createIntersectSystemValue(func, RaySystemParams::Barycentrics)); -} - -// ===================================================================================================================== -// Process RayQuery OpRayQueryGetIntersectionTKHR -// -// @param func : The function to create -template <> void SpirvLowerRayQuery::createRayQueryFunc(Function *func) { - func->addFnAttr(Attribute::AlwaysInline); - BasicBlock *entryBlock = BasicBlock::Create(*m_context, "", func); - m_builder->SetInsertPoint(entryBlock); - - Value *rayQuery = func->arg_begin(); - auto rayQueryEltTy = getRayQueryInternalTy(m_builder); - Value *intersect = func->arg_begin() + 1; - Value *rayTMinAddr = m_builder->CreateConstGEP2_32(rayQueryEltTy, rayQuery, 0, RayQueryParams::RayTMin); - auto minTVal = m_builder->CreateLoad(m_builder->getFloatTy(), rayTMinAddr); - - intersect = m_builder->CreateTrunc(intersect, m_builder->getInt1Ty()); - auto rayQueryTy = getRayQueryInternalTy(m_builder); - rayQuery = m_builder->CreateLoad(rayQueryTy, rayQuery); - auto candidate = m_builder->CreateExtractValue(rayQuery, RayQueryParams::Candidate); - auto committed = m_builder->CreateExtractValue(rayQuery, RayQueryParams::Committed); - auto candidateVal = m_builder->CreateExtractValue(candidate, RaySystemParams::RayTCurrent); - auto committedVal = m_builder->CreateExtractValue(committed, RaySystemParams::RayTCurrent); - auto lengthVal = m_builder->CreateSelect(intersect, committedVal, candidateVal); - - m_builder->CreateRet(m_builder->CreateFAdd(lengthVal, minTVal)); -} - -// ===================================================================================================================== -// Process RayQuery OpRayQueryGetIntersectionInstanceCustomIndexKHR -// -// @param func : The function to create -template <> -void SpirvLowerRayQuery::createRayQueryFunc(Function *func) { - // Read instance node pointer - auto instanceNodePtr = createIntersectSystemValue(func, RaySystemParams::InstanceNodePtr); - - // Extract instance node address from instance node pointer - Value *rayQuery = func->arg_begin(); - auto rayQueryTy = getRayQueryInternalTy(m_builder); - rayQuery = m_builder->CreateLoad(rayQueryTy, rayQuery); - auto instanceNodeAddr = createGetInstanceNodeAddr(instanceNodePtr, rayQuery); - - // Load instance index from instance node address - auto instanceIndex = createLoadInstanceIndexOrId(instanceNodeAddr, false); - - m_builder->CreateRet(instanceIndex); -} - -// ===================================================================================================================== -// Process RayQuery OpRayQueryGetIntersectionInstanceIdKHR -// -// @param func : The function to create -template <> void SpirvLowerRayQuery::createRayQueryFunc(Function *func) { - // Read instance node pointer - auto instanceNodePtr = createIntersectSystemValue(func, RaySystemParams::InstanceNodePtr); - - // Extract instance node address from instance node pointer - Value *rayQuery = func->arg_begin(); - auto rayQueryTy = getRayQueryInternalTy(m_builder); - rayQuery = m_builder->CreateLoad(rayQueryTy, rayQuery); - auto instanceNodeAddr = createGetInstanceNodeAddr(instanceNodePtr, rayQuery); - - // Load instance index from instance node address - auto instanceId = createLoadInstanceIndexOrId(instanceNodeAddr, true); - - m_builder->CreateRet(instanceId); -} - -// ===================================================================================================================== -// Process RayQuery OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR -// -// @param func : The function to create -template <> -void SpirvLowerRayQuery::createRayQueryFunc( - Function *func) { - m_builder->CreateRet(createIntersectSystemValue(func, RaySystemParams::InstanceContribution)); -} - -// ===================================================================================================================== -// Process RayQuery OpRayQueryGetIntersectionGeometryIndexKHR -// -// @param func : The function to create -template <> void SpirvLowerRayQuery::createRayQueryFunc(Function *func) { - m_builder->CreateRet(createIntersectSystemValue(func, RaySystemParams::GeometryIndex)); -} - -// ===================================================================================================================== -// Process RayQuery OpRayQueryGetIntersectionPrimitiveIndexKHR -// -// @param func : The function to create -template <> void SpirvLowerRayQuery::createRayQueryFunc(Function *func) { - m_builder->CreateRet(createIntersectSystemValue(func, RaySystemParams::PrimitiveIndex)); -} - -// ===================================================================================================================== -// Process RayQuery OpRayQueryGetIntersectionObjectRayDirectionKHR -// -// @param func : The function to create -template <> void SpirvLowerRayQuery::createRayQueryFunc(Function *func) { - Value *frontFace = createIntersectSystemValue(func, RaySystemParams::FrontFace); - frontFace = m_builder->CreateTrunc(frontFace, m_builder->getInt1Ty()); - m_builder->CreateRet(frontFace); -} - -// ===================================================================================================================== -// Process RayQuery OpRayQueryGetIntersectionObjectRayDirectionKHR -// -// @param func : The function to create -template <> -void SpirvLowerRayQuery::createRayQueryFunc(Function *func) { - m_builder->CreateRet(createIntersectSystemValue(func, RaySystemParams::Direction)); -} - -// ===================================================================================================================== -// Process RayQuery OpRayQueryGetIntersectionObjectRayOriginKHR -// -// @param func : The function to create -template <> void SpirvLowerRayQuery::createRayQueryFunc(Function *func) { - m_builder->CreateRet(createIntersectSystemValue(func, RaySystemParams::Origin)); -} - -// ===================================================================================================================== -// Process RayQuery OpRayQueryTerminateKHR -// -// @param func : The function to create -template <> void SpirvLowerRayQuery::createRayQueryFunc(Function *func) { - func->addFnAttr(Attribute::AlwaysInline); - BasicBlock *entryBlock = BasicBlock::Create(*m_context, "", func); - m_builder->SetInsertPoint(entryBlock); - - Value *rayQuery = func->arg_begin(); - auto rayQueryEltTy = getRayQueryInternalTy(m_builder); - - if (m_context->getGfxIpVersion().major >= 11) { - // Navi3x and beyond, use rayQuery.currentNodePtr == TERMINAL_NODE to determine Terminate() - - // TERMINAL_NODE defined in GPURT is 0xFFFFFFFE - static const unsigned RayQueryTerminalNode = 0xFFFFFFFE; - - Value *currNodeAddr = m_builder->CreateConstGEP2_32(rayQueryEltTy, rayQuery, 0, RayQueryParams::CurrNodePtr); - m_builder->CreateStore(m_builder->getInt32(RayQueryTerminalNode), currNodeAddr); - } else { - // Navi2x, use the following combination to determine Terminate() - // rayQuery.nodeIndex = 0xFFFFFFFF // invalid index - // rayQuery.numStackEntries = 0; - // rayQuery.stackPtr = ThreadIdInGroup() - - Value *currNodeAddr = m_builder->CreateConstGEP2_32(rayQueryEltTy, rayQuery, 0, RayQueryParams::CurrNodePtr); - m_builder->CreateStore(m_builder->getInt32(InvalidValue), currNodeAddr); - - Value *stackNumEntries = m_builder->CreateConstGEP2_32(rayQueryEltTy, rayQuery, 0, RayQueryParams::StackNumEntries); - m_builder->CreateStore(m_builder->getInt32(0), stackNumEntries); - - Value *stackPtr = m_builder->CreateConstGEP2_32(rayQueryEltTy, rayQuery, 0, RayQueryParams::StackPtr); - m_builder->CreateStore(getThreadIdInGroup(), stackPtr); - } - m_builder->CreateRetVoid(); -} - -// ===================================================================================================================== -// Process RayQuery OpRayQueryGenerateIntersectionKHR -// -// @param func : The function to create -template <> void SpirvLowerRayQuery::createRayQueryFunc(Function *func) { - // Ray tracing patch function: rayQueryGenerateIntersectionEXT - // void rayQueryGenerateIntersectionEXT(rayQuery, tHit) - // { - // if (rayQuery.candidateType == Aabb || - // rayQuery.candidateType == NonOpaqueAabb || - // rayQuery.candidateType == NoDuplicateAnyHitAabb) -> rayQuery.candidateType >= Aabb - // { - // rayQuery.commit = rayQuery.candidate - // rayQuery.committedStatus = gl_RayQueryCommittedIntersectionGeneratedEXT - // rayQuery.committed.rayTCurrent = tHit - rayQuery.rayTMin - // } - // } - func->addFnAttr(Attribute::AlwaysInline); - BasicBlock *entryBlock = BasicBlock::Create(*m_context, ".entry", func); - BasicBlock *setBlock = BasicBlock::Create(*m_context, ".set", func); - BasicBlock *endBlock = BasicBlock::Create(*m_context, ".end", func); - - m_builder->SetInsertPoint(entryBlock); - Value *rayQuery = func->arg_begin(); - Value *hitT = func->arg_begin() + 1; - auto rayQueryTy = getRayQueryInternalTy(m_builder); - Value *rayQueryVal = m_builder->CreateLoad(rayQueryTy, rayQuery); - auto candidateTy = m_builder->CreateExtractValue(rayQueryVal, RayQueryParams::CandidateType); - auto checkCandidate = m_builder->CreateICmpUGE(candidateTy, m_builder->getInt32(RayQueryCandidateIntersection::Aabb)); - m_builder->CreateCondBr(checkCandidate, setBlock, endBlock); - - // Set confirm block, set committed status and value - m_builder->SetInsertPoint(setBlock); - Value *candidate = m_builder->CreateExtractValue(rayQueryVal, RayQueryParams::Candidate); - Value *zero = m_builder->getInt32(0); - Value *storeAddr = m_builder->CreateConstGEP2_32(rayQueryTy, rayQuery, 0, RayQueryParams::Committed); - m_builder->CreateStore(candidate, storeAddr); - storeAddr = m_builder->CreateConstGEP2_32(rayQueryTy, rayQuery, 0, RayQueryParams::CommittedStatus); - m_builder->CreateStore(m_builder->getInt32(RayQueryCommittedIntersection::Generated), storeAddr); - storeAddr = m_builder->CreateGEP( - rayQueryTy, rayQuery, - {zero, m_builder->getInt32(RayQueryParams::Committed), m_builder->getInt32(RaySystemParams::RayTCurrent)}); - Value *rayTMinAddr = m_builder->CreateConstGEP2_32(rayQueryTy, rayQuery, 0, RayQueryParams::RayTMin); - auto minTVal = m_builder->CreateLoad(m_builder->getFloatTy(), rayTMinAddr); - // NOTE: rayTCurrent stored in rayQuery is relative to rayTMin, but tHit given by app is relative to ray origin. - m_builder->CreateStore(m_builder->CreateFSub(hitT, minTVal), storeAddr); - m_builder->CreateBr(endBlock); - - m_builder->SetInsertPoint(endBlock); - m_builder->CreateRetVoid(); -} - -// ===================================================================================================================== -// Process RayQuery OpRayQueryConfirmIntersectionKHR -// -// @param func : The function to create -template <> void SpirvLowerRayQuery::createRayQueryFunc(Function *func) { - // Ray tracing patch function: rayQueryConfirmIntersectionEXT - // void rayQueryConfirmIntersectionEXT(rayQuery) - // { - // if (rayQuery.candidateType == gl_RayQueryCandidateIntersectionTriangleEXT) - // { - // rayQuery.committed = rayQuery.candidate; - // rayQuery.committedStatus = gl_RayQueryCommittedIntersectionTriangleEXT; - // } - // } - - func->addFnAttr(Attribute::AlwaysInline); - BasicBlock *entryBlock = BasicBlock::Create(*m_context, ".entry", func); - BasicBlock *setBlock = BasicBlock::Create(*m_context, ".set", func); - BasicBlock *endBlock = BasicBlock::Create(*m_context, ".end", func); - - m_builder->SetInsertPoint(entryBlock); - Value *rayQuery = func->arg_begin(); - auto rayQueryTy = getRayQueryInternalTy(m_builder); - Value *rayQueryVal = m_builder->CreateLoad(rayQueryTy, rayQuery); - auto candidateTy = m_builder->CreateExtractValue(rayQueryVal, RayQueryParams::CandidateType); - auto checkCandidate = - m_builder->CreateICmpEQ(candidateTy, m_builder->getInt32(RayQueryCandidateIntersection::NonOpaqueTriangle)); - m_builder->CreateCondBr(checkCandidate, setBlock, endBlock); - - // Set confirm block, set committed status and value - m_builder->SetInsertPoint(setBlock); - Value *candidate = m_builder->CreateExtractValue(rayQueryVal, RayQueryParams::Candidate); - Value *storeAddr = m_builder->CreateConstGEP2_32(rayQueryTy, rayQuery, 0, RayQueryParams::Committed); - m_builder->CreateStore(candidate, storeAddr); - storeAddr = m_builder->CreateConstGEP2_32(rayQueryTy, rayQuery, 0, RayQueryParams::CommittedStatus); - m_builder->CreateStore(m_builder->getInt32(RayQueryCommittedIntersection::Triangle), storeAddr); - m_builder->CreateBr(endBlock); - - m_builder->SetInsertPoint(endBlock); - m_builder->CreateRetVoid(); -} - -// ===================================================================================================================== -// Process RayQuery OpRayQueryGetRayTMinKHR -// -// @param func : The function to create -template <> void SpirvLowerRayQuery::createRayQueryFunc(Function *func) { - func->addFnAttr(Attribute::AlwaysInline); - BasicBlock *entryBlock = BasicBlock::Create(*m_context, "", func); - m_builder->SetInsertPoint(entryBlock); - - Value *rayQuery = func->arg_begin(); - auto rayQueryEltTy = getRayQueryInternalTy(m_builder); - Value *rayTMinAddr = m_builder->CreateConstGEP2_32(rayQueryEltTy, rayQuery, 0, RayQueryParams::RayTMin); - - m_builder->CreateRet(m_builder->CreateLoad(m_builder->getFloatTy(), rayTMinAddr)); -} - -// ===================================================================================================================== -// Process RayQuery OpRayQueryGetRayFlagsKHR -// -// @param func : The function to create -template <> void SpirvLowerRayQuery::createRayQueryFunc(Function *func) { - func->addFnAttr(Attribute::AlwaysInline); - BasicBlock *entryBlock = BasicBlock::Create(*m_context, "", func); - m_builder->SetInsertPoint(entryBlock); - - Value *rayQuery = func->arg_begin(); - auto rayQueryEltTy = getRayQueryInternalTy(m_builder); - Value *rayFlagsAddr = m_builder->CreateConstGEP2_32(rayQueryEltTy, rayQuery, 0, RayQueryParams::RayFlags); - - m_builder->CreateRet(m_builder->CreateLoad(m_builder->getInt32Ty(), rayFlagsAddr)); -} - -// ===================================================================================================================== -// Process RayQuery OpRayQueryGetIntersectionCandidateAABBOpaqueKHR -// -// @param func : The function to create -template <> -void SpirvLowerRayQuery::createRayQueryFunc(Function *func) { - // bool rayQueryGetIntersectionCandidateAABBOpaqueEXT(rayQueryEXT q) - // { - // return (rayQuery.candidateType != NonOpaqueAabb); - // } - func->addFnAttr(Attribute::AlwaysInline); - BasicBlock *entryBlock = BasicBlock::Create(*m_context, "", func); - m_builder->SetInsertPoint(entryBlock); - - Value *rayQuery = func->arg_begin(); - auto rayQueryEltTy = getRayQueryInternalTy(m_builder); - Value *candidateTypeAddr = m_builder->CreateConstGEP2_32(rayQueryEltTy, rayQuery, 0, RayQueryParams::CandidateType); - Value *candidateType = m_builder->CreateLoad(m_builder->getInt32Ty(), candidateTypeAddr); - Value *ret = - m_builder->CreateICmpNE(candidateType, m_builder->getInt32(RayQueryCandidateIntersection::NonOpaqueAabb)); - m_builder->CreateRet(ret); -} - -// ===================================================================================================================== -// Process RayQuery OpRayQueryGetWorldRayDirectionKHR -// -// @param func : The function to create -template <> void SpirvLowerRayQuery::createRayQueryFunc(Function *func) { - func->addFnAttr(Attribute::AlwaysInline); - BasicBlock *entryBlock = BasicBlock::Create(*m_context, "", func); - m_builder->SetInsertPoint(entryBlock); - - auto floatx3Ty = FixedVectorType::get(m_builder->getFloatTy(), 3); - Value *rayQuery = func->arg_begin(); - auto rayQueryEltTy = getRayQueryInternalTy(m_builder); - Value *dirAddr = m_builder->CreateGEP(rayQueryEltTy, rayQuery, - {m_builder->getInt32(0), m_builder->getInt32(RayQueryParams::RayDesc), - m_builder->getInt32(RayDescParams::Direction)}); - m_builder->CreateRet(m_builder->CreateLoad(floatx3Ty, dirAddr)); -} - -// ===================================================================================================================== -// Process RayQuery OpRayQueryGetWorldRayOriginKHR -// -// @param func : The function to create -template <> void SpirvLowerRayQuery::createRayQueryFunc(Function *func) { - func->addFnAttr(Attribute::AlwaysInline); - BasicBlock *entryBlock = BasicBlock::Create(*m_context, "", func); - m_builder->SetInsertPoint(entryBlock); - - Value *rayQuery = func->arg_begin(); - auto rayQueryEltTy = getRayQueryInternalTy(m_builder); - Value *originAddr = m_builder->CreateGEP(rayQueryEltTy, rayQuery, - {m_builder->getInt32(0), m_builder->getInt32(RayQueryParams::RayDesc), - m_builder->getInt32(RayDescParams::Origin)}); - auto floatx3Ty = FixedVectorType::get(m_builder->getFloatTy(), 3); - m_builder->CreateRet(m_builder->CreateLoad(floatx3Ty, originAddr)); -} - -// ===================================================================================================================== -// Get RayQuery intersection matrix -// -// @param builtInId : ID of the built-in variable -void SpirvLowerRayQuery::createIntersectMatrix(Function *func, unsigned builtInId) { - func->addFnAttr(Attribute::AlwaysInline); - BasicBlock *entryBlock = BasicBlock::Create(*m_context, ".entry", func); - BasicBlock *endBlock = BasicBlock::Create(*m_context, ".end", func); - - m_builder->SetInsertPoint(entryBlock); - - Value *rayQuery = func->arg_begin(); - auto rayQueryTy = getRayQueryInternalTy(m_builder); - rayQuery = m_builder->CreateLoad(rayQueryTy, rayQuery); - - Value *intersect = func->arg_begin() + 1; - Value *accelStructLo = m_builder->CreateExtractValue(rayQuery, RayQueryParams::TopLevelBvhLo); - Value *accelStructHi = m_builder->CreateExtractValue(rayQuery, RayQueryParams::TopLevelBvhHi); - - Value *accelStruct = PoisonValue::get(FixedVectorType::get(Type::getInt32Ty(*m_context), 2)); - accelStruct = m_builder->CreateInsertElement(accelStruct, accelStructLo, uint64_t(0)); - accelStruct = m_builder->CreateInsertElement(accelStruct, accelStructHi, 1); - - intersect = m_builder->CreateTrunc(intersect, m_builder->getInt1Ty()); - auto candidate = m_builder->CreateExtractValue(rayQuery, RayQueryParams::Candidate); - auto committed = m_builder->CreateExtractValue(rayQuery, RayQueryParams::Committed); - auto candidateInstanceNodePtr = m_builder->CreateExtractValue(candidate, RaySystemParams::InstanceNodePtr); - auto committedInstanceNodePtr = m_builder->CreateExtractValue(committed, RaySystemParams::InstanceNodePtr); - Value *instanceNodePtr = m_builder->CreateSelect(intersect, committedInstanceNodePtr, candidateInstanceNodePtr); - Value *instanceNodeAddr = createGetInstanceNodeAddr(instanceNodePtr, rayQuery); - - Instruction *brInst = m_builder->CreateBr(endBlock); - Value *matrix = createTransformMatrix(builtInId, instanceNodeAddr, brInst); - m_builder->SetInsertPoint(endBlock); - m_builder->CreateRet(matrix); -} - -// ===================================================================================================================== -// Process RayQuery OpRayQueryGetIntersectionWorldToObjectKHR -// -// @param func : The function to create -template <> void SpirvLowerRayQuery::createRayQueryFunc(Function *func) { - createIntersectMatrix(func, BuiltInWorldToObjectKHR); -} - -// ===================================================================================================================== -// Process RayQuery OpRayQueryGetIntersectionObjectToWorldKHR -// -// @param func : The function to create -template <> void SpirvLowerRayQuery::createRayQueryFunc(Function *func) { - createIntersectMatrix(func, BuiltInObjectToWorldKHR); -} - -// ===================================================================================================================== -// Process RayQuery OpRayQueryGetIntersectionTriangleVertexPositionsKHR -// -// @param func : The function to create -template <> -void SpirvLowerRayQuery::createRayQueryFunc(Function *func) { - func->addFnAttr(Attribute::AlwaysInline); - BasicBlock *entryBlock = BasicBlock::Create(*m_context, ".entry", func); - m_builder->SetInsertPoint(entryBlock); - - // Cross module inliner cannot be used to inline a function with multiple blocks into in a degenerate block, create - // a temporary terminator first. - auto tempTerminator = m_builder->CreateUnreachable(); - m_builder->SetInsertPoint(tempTerminator); - - Value *rayQuery = func->arg_begin(); - Value *intersectVal = func->arg_begin() + 1; - Value *intersectPtr = m_builder->CreateAlloca(m_builder->getInt32Ty()); - m_builder->CreateStore(intersectVal, intersectPtr); - - // Call {vec3, vec3, vec3} FetchTrianglePositionFromRayQuery(rayquery* rayquery, int* intersect) - // return 3 triangle vertices - auto floatx3Ty = FixedVectorType::get(m_builder->getFloatTy(), 3); - auto triangleData = m_crossModuleInliner.value() - .inlineCall(*m_builder, - getGpurtFunction(m_context->getPipelineContext()->getRayTracingFunctionName( - Vkgc::RT_ENTRY_FETCH_HIT_TRIANGLE_FROM_RAY_QUERY)), - {rayQuery, intersectPtr}) - .returnValue; - - // Return type of OpRayQueryGetIntersectionTriangleVertexPositionsKHR is array of vec3 (vec3[3]). - auto retType = ArrayType::get(floatx3Ty, 3); - Value *ret = PoisonValue::get(retType); - for (unsigned i = 0; i < 3; i++) - ret = m_builder->CreateInsertValue(ret, m_builder->CreateExtractValue(triangleData, {i}), {i}); - m_builder->CreateRet(ret); - tempTerminator->eraseFromParent(); -} - -// ===================================================================================================================== -// Process compute/graphics/raytracing shader RayQueryOp functions -// -// @param func : The function to create -void SpirvLowerRayQuery::processShaderFunction(Function *func, unsigned opcode) { - switch (opcode) { - case OpRayQueryInitializeKHR: - return createRayQueryFunc(func); - case OpRayQueryProceedKHR: - return createRayQueryFunc(func); - case OpRayQueryGetIntersectionTypeKHR: - return createRayQueryFunc(func); - case OpRayQueryGetIntersectionBarycentricsKHR: - return createRayQueryFunc(func); - case OpRayQueryGetIntersectionTKHR: - return createRayQueryFunc(func); - case OpRayQueryGetIntersectionInstanceCustomIndexKHR: - return createRayQueryFunc(func); - case OpRayQueryGetIntersectionInstanceIdKHR: - return createRayQueryFunc(func); - case OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR: - return createRayQueryFunc(func); - case OpRayQueryGetIntersectionGeometryIndexKHR: - return createRayQueryFunc(func); - case OpRayQueryGetIntersectionPrimitiveIndexKHR: - return createRayQueryFunc(func); - case OpRayQueryGetIntersectionFrontFaceKHR: - return createRayQueryFunc(func); - case OpRayQueryGetIntersectionObjectRayDirectionKHR: - return createRayQueryFunc(func); - case OpRayQueryGetIntersectionObjectRayOriginKHR: - return createRayQueryFunc(func); - case OpRayQueryTerminateKHR: - return createRayQueryFunc(func); - case OpRayQueryGenerateIntersectionKHR: - return createRayQueryFunc(func); - case OpRayQueryConfirmIntersectionKHR: - return createRayQueryFunc(func); - case OpRayQueryGetRayTMinKHR: - return createRayQueryFunc(func); - case OpRayQueryGetRayFlagsKHR: - return createRayQueryFunc(func); - case OpRayQueryGetIntersectionCandidateAABBOpaqueKHR: - return createRayQueryFunc(func); - case OpRayQueryGetWorldRayDirectionKHR: - return createRayQueryFunc(func); - case OpRayQueryGetWorldRayOriginKHR: - return createRayQueryFunc(func); - case OpRayQueryGetIntersectionObjectToWorldKHR: - return createRayQueryFunc(func); - case OpRayQueryGetIntersectionWorldToObjectKHR: - return createRayQueryFunc(func); - case OpRayQueryGetIntersectionTriangleVertexPositionsKHR: - return createRayQueryFunc(func); - default: - return; - } -} - -// ===================================================================================================================== -// Create global variable for the LDS stack -void SpirvLowerRayQuery::createGlobalLdsUsage() { - m_ldsUsage = - new GlobalVariable(*m_module, Type::getInt32Ty(m_module->getContext()), true, GlobalValue::ExternalLinkage, - nullptr, RtName::LdsUsage, nullptr, GlobalValue::NotThreadLocal, SPIRAS_Private); - - m_ldsUsage->setAlignment(MaybeAlign(4)); -} - -// ===================================================================================================================== -// Create global variable for the prevRayQueryObj -void SpirvLowerRayQuery::createGlobalRayQueryObj() { - m_prevRayQueryObj = - new GlobalVariable(*m_module, m_builder->getInt32Ty(), false, GlobalValue::ExternalLinkage, nullptr, - RtName::PrevRayQueryObj, nullptr, GlobalValue::NotThreadLocal, SPIRAS_Private); - m_prevRayQueryObj->setAlignment(MaybeAlign(4)); - - m_rayQueryObjGen = - new GlobalVariable(*m_module, m_builder->getInt32Ty(), false, GlobalValue::ExternalLinkage, nullptr, - RtName::RayQueryObjGen, nullptr, GlobalValue::NotThreadLocal, SPIRAS_Private); - m_rayQueryObjGen->setAlignment(MaybeAlign(4)); -} - -// ===================================================================================================================== -// Erase BasicBlocks from the Function -// -// @param func : Function -void SpirvLowerRayQuery::eraseFunctionBlocks(Function *func) { - for (auto blockIt = func->begin(), blockEnd = func->end(); blockIt != blockEnd;) { - BasicBlock *basicBlock = &*blockIt++; - basicBlock->dropAllReferences(); - basicBlock->eraseFromParent(); - } -} - -// ===================================================================================================================== -// Get function opcode -// -// @param func : Function to get opcode -unsigned SpirvLowerRayQuery::getFuncOpcode(Function *func) { - const MDNode *const funcMeta = func->getMetadata(m_spirvOpMetaKindId); - if (!funcMeta) - return 0; - - const ConstantAsMetadata *const metaConst = cast(funcMeta->getOperand(0)); - unsigned opcode = cast(metaConst->getValue())->getZExtValue(); - return opcode; -} - -// ===================================================================================================================== -// Create WorldToObject/ObjectToWorld Matrix by GpuRt Library Func. -// -// @param builtInId : ID of the built-in variable -// @param instanceNodeAddr : instanceNode Address -// @param insertPos : Where to insert instructions -Value *SpirvLowerRayQuery::createTransformMatrix(unsigned builtInId, Value *instanceNodeAddr, Instruction *insertPos) { - assert(builtInId == BuiltInWorldToObjectKHR || builtInId == BuiltInObjectToWorldKHR); - m_builder->SetInsertPoint(insertPos); - return createLoadMatrixFromFunc(instanceNodeAddr, builtInId); -} - -// ===================================================================================================================== -// Init Ray Query Count -// -void SpirvLowerRayQuery::initGlobalVariable() { - m_builder->CreateStore(m_builder->getInt32(InvalidValue), m_prevRayQueryObj); - m_builder->CreateStore(m_builder->getInt32(0), m_rayQueryObjGen); - m_builder->CreateStore(m_builder->getInt32(1), m_ldsUsage); -} - -// ===================================================================================================================== -// Generate a static ID for current Trace Ray call -// -unsigned SpirvLowerRayQuery::generateTraceRayStaticId() { - Util::MetroHash64 hasher; - hasher.Update(m_nextTraceRayId++); - hasher.Update(m_module->getName().bytes_begin(), m_module->getName().size()); - - MetroHash::Hash hash = {}; - hasher.Finalize(hash.bytes); - - return MetroHash::compact32(&hash); -} - -// ===================================================================================================================== -// Shader stages not support LDS -// -// @param stage : Shader stage -bool SpirvLowerRayQuery::stageNotSupportLds(ShaderStage stage) { - return stage == ShaderStageRayTracingAnyHit; -} - -// ===================================================================================================================== -// Create instructions to load instance index/id given the 64-bit instance node address at the current insert point -// Note: HLSL has just the opposite naming of index/ID compares to SPIR-V. -// So "isIndex = true" means we use InstanceId(InstanceIndex for GPURT) for vulkan, -// and "isIndex = false" means we use InstanceIndex(InstanceId for GPURT) for vulkan, -// @param instNodeAddr : 64-bit instance node address, in <2 x i32> -Value *SpirvLowerRayQuery::createLoadInstanceIndexOrId(Value *instNodeAddr, bool isIndex) { - Value *instanceIdPtr = m_builder->CreateAllocaAtFuncEntry(m_builder->getInt64Ty()); - m_builder->CreateStore(instNodeAddr, instanceIdPtr); - - StringRef getterName = isIndex - ? m_context->getPipelineContext()->getRayTracingFunctionName(Vkgc::RT_ENTRY_INSTANCE_INDEX) - : m_context->getPipelineContext()->getRayTracingFunctionName(Vkgc::RT_ENTRY_INSTANCE_ID); - - auto cmiResult = m_crossModuleInliner.value().inlineCall(*m_builder, getGpurtFunction(getterName), {instanceIdPtr}); - - return cmiResult.returnValue; -} - -// ===================================================================================================================== -// Call GpuRt Library to get instance node address given the instance node pointer at the current -// insert point -// -// @param instNodePtr : Instance node pointer -Value *SpirvLowerRayQuery::createGetInstanceNodeAddr(Value *instNodePtr, Value *rayQuery) { - Value *BvhAddrLo = m_builder->CreateExtractValue(rayQuery, RayQueryParams::TopLevelBvhLo); - Value *BvhAddrHi = m_builder->CreateExtractValue(rayQuery, RayQueryParams::TopLevelBvhHi); - - Value *BvhAddr = PoisonValue::get(FixedVectorType::get(Type::getInt32Ty(*m_context), 2)); - BvhAddr = m_builder->CreateInsertElement(BvhAddr, BvhAddrLo, uint64_t(0)); - BvhAddr = m_builder->CreateInsertElement(BvhAddr, BvhAddrHi, 1); - - StringRef getInstanceNodeAddr = - m_context->getPipelineContext()->getRayTracingFunctionName(Vkgc::RT_ENTRY_GET_INSTANCE_NODE); - - auto bvhAddr = m_builder->CreateBitCast(BvhAddr, m_builder->getInt64Ty()); - Value *bvhPtr = m_builder->CreateAllocaAtFuncEntry(m_builder->getInt64Ty()); - Value *nodePtr = m_builder->CreateAllocaAtFuncEntry(m_builder->getInt32Ty()); - m_builder->CreateStore(bvhAddr, bvhPtr); - m_builder->CreateStore(instNodePtr, nodePtr); - - auto cmiResult = - m_crossModuleInliner.value().inlineCall(*m_builder, getGpurtFunction(getInstanceNodeAddr), {bvhPtr, nodePtr}); - return cmiResult.returnValue; -} - -// ===================================================================================================================== -// Call GpuRt Library Func to load a 3x4 matrix from given address at the current insert point -// -// @param instanceNodeAddr : instanceNode address, which type is i64 -Value *SpirvLowerRayQuery::createLoadMatrixFromFunc(Value *instanceNodeAddr, unsigned builtInId) { - auto floatx3Ty = FixedVectorType::get(m_builder->getFloatTy(), 3); - auto matrixTy = ArrayType::get(floatx3Ty, 4); - - Value *instandeNodeAddrPtr = m_builder->CreateAllocaAtFuncEntry(m_builder->getInt64Ty()); - m_builder->CreateStore(instanceNodeAddr, instandeNodeAddrPtr); - - StringRef getMatrixFunc; - if (builtInId == BuiltInObjectToWorldKHR) { - getMatrixFunc = - m_context->getPipelineContext()->getRayTracingFunctionName(Vkgc::RT_ENTRY_OBJECT_TO_WORLD_TRANSFORM); - } else { - getMatrixFunc = - m_context->getPipelineContext()->getRayTracingFunctionName(Vkgc::RT_ENTRY_WORLD_TO_OBJECT_TRANSFORM); - } - - Value *matrixRow[4] = { - PoisonValue::get(floatx3Ty), - PoisonValue::get(floatx3Ty), - PoisonValue::get(floatx3Ty), - PoisonValue::get(floatx3Ty), - }; - - for (unsigned i = 0; i < 3; ++i) { - Value *row = m_builder->getInt32(i); - for (unsigned j = 0; j < 4; ++j) { - Value *col = m_builder->getInt32(j); - - Value *colPtr = m_builder->CreateAllocaAtFuncEntry(m_builder->getInt32Ty()); - Value *rowPtr = m_builder->CreateAllocaAtFuncEntry(m_builder->getInt32Ty()); - m_builder->CreateStore(col, colPtr); - m_builder->CreateStore(row, rowPtr); - - auto cmiMatrixResult = m_crossModuleInliner.value().inlineCall(*m_builder, getGpurtFunction(getMatrixFunc), - {instandeNodeAddrPtr, rowPtr, colPtr}); - matrixRow[j] = m_builder->CreateInsertElement(matrixRow[j], cmiMatrixResult.returnValue, uint64_t(i)); - } - } - - Value *matrix = PoisonValue::get(matrixTy); - matrix = m_builder->CreateInsertValue(matrix, matrixRow[0], 0); - matrix = m_builder->CreateInsertValue(matrix, matrixRow[1], 1); - matrix = m_builder->CreateInsertValue(matrix, matrixRow[2], 2); - matrix = m_builder->CreateInsertValue(matrix, matrixRow[3], 3); - return matrix; -} - -// ===================================================================================================================== -// Get thread ID in group. -Value *SpirvLowerRayQuery::getThreadIdInGroup() const { - // Todo: for graphics shader, subgroupId * waveSize + subgroupLocalInvocationId() - unsigned builtIn = m_context->getPipelineType() == PipelineType::Graphics ? BuiltInSubgroupLocalInvocationId - : BuiltInLocalInvocationIndex; - return m_builder->CreateReadBuiltInInput(static_cast(builtIn)); -} - -// ===================================================================================================================== -// Looks up an exported function in the GPURT module -Function *SpirvLowerRayQuery::getGpurtFunction(StringRef name) { - auto &gpurtContext = lgc::GpurtContext::get(*m_context); - Function *fn = gpurtContext.theModule->getFunction(name); - assert(fn); - return fn; -} - -} // namespace Llpc diff --git a/llpc/lower/llpcSpirvLowerRayQuery.h b/llpc/lower/llpcSpirvLowerRayQuery.h deleted file mode 100644 index 6c4df2e191..0000000000 --- a/llpc/lower/llpcSpirvLowerRayQuery.h +++ /dev/null @@ -1,165 +0,0 @@ -/* - *********************************************************************************************************************** - * - * Copyright (c) 2020-2024 Advanced Micro Devices, Inc. All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - **********************************************************************************************************************/ -/** - *********************************************************************************************************************** - * @file llpcSpirvLowerRayQuery.h - * @brief LLPC header file: contains declaration of Llpc::SpirvLowerRayQuery - *********************************************************************************************************************** - */ -#pragma once - -#include "SPIRVInternal.h" -#include "compilerutils/CompilerUtils.h" -#include "llpcSpirvLower.h" -#include "llvm/IR/PassManager.h" - -#pragma pack(push, 4) -// Acceleration structure result data offsets -struct ResultDataOffsets { - unsigned internalNodes; // Offset to internal box nodes - unsigned leafNodes; // Offset to leaf nodes - unsigned sideband; // Offset to sideband data (BVH4 bottom level only) - unsigned geometryInfo; // Offset to geometry desc info (bottom level only) - unsigned primNodePtrs; // Offset to prim node pointers (BVH4 with triangle compression and ALLOW_UPDATE only) -}; -#pragma pack(pop) - -#pragma pack(push, 4) -// Header for acceleration structure -struct AccelStructHeader { - unsigned type; // Type of acceleration structure (Top level or bottom level) - unsigned metadataSizeInBytes; // Total size of the metadata in bytes - unsigned sizeInBytes; // Total size of the structure in bytes (Including this header) - unsigned numPrimitives; // Number of primitives encoded in the structure - unsigned numActivePrims; // Tracks the number of active prims to add to bvh - unsigned taskCounter; // Used in update parallel path to synchronize thread groups - unsigned numDescs; // Number of instance/geometry descs in the structure - unsigned geometryType; // Type of geometry contained in the bottom level structure - ResultDataOffsets dataOffsets; // Offsets within accel struct (not including the header) - unsigned numInternalNodes; // Number of internal nodes used by the acceleration structure after building - unsigned numLeafNodes; // Number of leaf nodes used by the acceleration structure after building - unsigned bboxMin[3]; // 32bit bounding box (float3), min. Set only if root node is a box - unsigned bboxMax[3]; // 32bit bounding box (float3), max. Set only if root node is a box - unsigned padding[11]; // Padding bytes for 128-byte alignment (Gfx10 cacheline size) -}; -#pragma pack(pop) - -#pragma pack(push, 4) -// Header for ray tracing instance descriptor -struct RayTracingInstanceDesc { - float Transform[3][4]; // Inverse transform for traversal - uint32_t InstanceID_and_Mask; // 24-bit instance ID and 8-bit mask - uint32_t InstanceContributionToHitGroupIndex_and_Flags; // 24-bit instance contribution and 8-bit flags - uint32_t accelStructureAddressLo; // Lower part of acceleration structure base address - uint32_t accelStructureAddressHiAndFlags; // Upper part of acceleration structure base address and -}; -#pragma pack(pop) - -#pragma pack(push, 4) -// Header for ray tracing instance extra data -struct RayTracingInstanceExtraData { - uint32_t instanceIndex; - uint32_t blasNodePointer; // might not point to root - uint32_t blasMetadataSize; - uint32_t padding0; - float Transform[3][4]; // Non-inverse -}; -#pragma pack(pop) - -#pragma pack(push, 4) -// Header for ray tracing instance node -struct RayTracingInstanceNode { - RayTracingInstanceDesc desc; - RayTracingInstanceExtraData extra; -}; -#pragma pack(pop) - -namespace CompilerUtils { -class CrossModuleInliner; -} // namespace CompilerUtils - -namespace Llpc { - -// Corresponds to gl_RayFlags* in GLSL_EXT_ray_tracing.txt -enum RayFlag : unsigned { - None = 0x0000, // gl_RayFlagsNoneEXT - ForceOpaque = 0x0001, // gl_RayFlagsOpaqueEXT - ForceNonOpaque = 0x0002, // gl_RayFlagsNoOpaqueEXT - AcceptFirstHitAndEndSearch = 0x0004, // gl_RayFlagsTerminateOnFirstHitEXT - SkipClosestHitShader = 0x0008, // gl_RayFlagsSkipClosestHitShaderEXT - CullBackFacingTriangles = 0x0010, // gl_RayFlagsCullBackFacingTrianglesEXT - CullFrontFacingTriangles = 0x0020, // gl_RayFlagsCullFrontFacingTrianglesEXT - CullOpaque = 0x0040, // gl_RayFlagsCullOpaqueEXT - CullNonOpaque = 0x0080, // gl_RayFlagsCullNoOpaqueEXT -}; - -// ===================================================================================================================== -// Represents the pass of SPIR-V lowering ray query. -class SpirvLowerRayQuery : public SpirvLower, public llvm::PassInfoMixin { -public: - SpirvLowerRayQuery(); - SpirvLowerRayQuery(bool rayQueryLibrary); - llvm::PreservedAnalyses run(llvm::Module &module, llvm::ModuleAnalysisManager &analysisManager); - llvm::Value *getThreadIdInGroup() const; - - static llvm::StringRef name() { return "Lower SPIR-V RayQuery operations"; } - - const static unsigned MaxLdsStackEntries = 16; - -protected: - void processLibraryFunction(llvm::Function *&func); - void processShaderFunction(llvm::Function *func, unsigned opcode); - void createGlobalLdsUsage(); - void createGlobalRayQueryObj(); - void initGlobalVariable(); - unsigned generateTraceRayStaticId(); - llvm::Value *createTransformMatrix(unsigned builtInId, llvm::Value *instanceNodeAddr, llvm::Instruction *insertPos); - void eraseFunctionBlocks(llvm::Function *func); - unsigned getFuncOpcode(llvm::Function *func); - llvm::Value *createLoadInstanceIndexOrId(Value *instNodeAddr, bool isIndex); - llvm::Value *createLoadMatrixFromFunc(llvm::Value *matrixAddr, unsigned builtInId); - llvm::Function *getGpurtFunction(llvm::StringRef name); - - bool m_rayQueryLibrary; // Whether the module is ray query library - unsigned m_spirvOpMetaKindId; // Metadata kind ID for "spirv.op" - std::optional m_crossModuleInliner; - -private: - template void createRayQueryFunc(llvm::Function *func); - void createRayQueryProceedFunc(llvm::Function *func); - llvm::Value *createIntersectSystemValue(llvm::Function *func, unsigned raySystem); - void createIntersectMatrix(llvm::Function *func, unsigned builtInId); - llvm::Value *createGetInstanceNodeAddr(llvm::Value *instNodePtr, llvm::Value *rayQuery); - llvm::Value *getDispatchId(); - bool stageNotSupportLds(ShaderStage stage); - - llvm::GlobalVariable *m_ldsUsage; // LDS usage - llvm::GlobalVariable *m_stackArray; // Stack array to hold stack value - llvm::GlobalVariable *m_prevRayQueryObj; // Previous ray query Object - llvm::GlobalVariable *m_rayQueryObjGen; // Ray query Object Id generator - unsigned m_nextTraceRayId; // Next trace ray ID to be used for ray history -}; - -} // namespace Llpc diff --git a/llpc/lower/llpcSpirvLowerRayTracing.cpp b/llpc/lower/llpcSpirvLowerRayTracing.cpp index 0d0f9af1a0..d40772b741 100644 --- a/llpc/lower/llpcSpirvLowerRayTracing.cpp +++ b/llpc/lower/llpcSpirvLowerRayTracing.cpp @@ -110,7 +110,7 @@ static unsigned TraceParamsTySize[] = { }; // ===================================================================================================================== -SpirvLowerRayTracing::SpirvLowerRayTracing() : SpirvLowerRayQuery(false) { +SpirvLowerRayTracing::SpirvLowerRayTracing() : m_nextTraceRayId(0) { } // ===================================================================================================================== @@ -1759,6 +1759,7 @@ Instruction *SpirvLowerRayTracing::createEntryFunc(Function *func) { m_entryPoint = newFunc; m_entryPoint->addFnAttr(Attribute::NoUnwind); m_entryPoint->addFnAttr(Attribute::AlwaysInline); + setLgcRtShaderStage(m_entryPoint, getLgcRtShaderStage(m_shaderStage)); Instruction *insertPos = &*(newFunc->begin()->getFirstNonPHIOrDbgOrAlloca()); m_builder->SetInsertPoint(insertPos); @@ -3058,4 +3059,109 @@ lgc::rt::RayTracingShaderStage SpirvLowerRayTracing::mapStageToLgcRtShaderStage( return static_cast(stage - ShaderStageRayTracingRayGen); } +// ===================================================================================================================== +// Generate a static ID for current Trace Ray call +// +unsigned SpirvLowerRayTracing::generateTraceRayStaticId() { + Util::MetroHash64 hasher; + hasher.Update(m_nextTraceRayId++); + hasher.Update(m_module->getName().bytes_begin(), m_module->getName().size()); + + MetroHash::Hash hash = {}; + hasher.Finalize(hash.bytes); + + return MetroHash::compact32(&hash); +} + +// ===================================================================================================================== +// Erase BasicBlocks from the Function +// +// @param func : Function +void SpirvLowerRayTracing::eraseFunctionBlocks(Function *func) { + for (auto blockIt = func->begin(), blockEnd = func->end(); blockIt != blockEnd;) { + BasicBlock *basicBlock = &*blockIt++; + basicBlock->dropAllReferences(); + basicBlock->eraseFromParent(); + } +} + +// ===================================================================================================================== +// Call GpuRt Library Func to load a 3x4 matrix from given address at the current insert point +// +// @param instanceNodeAddr : instanceNode address, which type is i64 +Value *SpirvLowerRayTracing::createLoadMatrixFromFunc(Value *instanceNodeAddr, unsigned builtInId) { + auto floatx3Ty = FixedVectorType::get(m_builder->getFloatTy(), 3); + auto matrixTy = ArrayType::get(floatx3Ty, 4); + + Value *instandeNodeAddrPtr = m_builder->CreateAllocaAtFuncEntry(m_builder->getInt64Ty()); + m_builder->CreateStore(instanceNodeAddr, instandeNodeAddrPtr); + + StringRef getMatrixFunc; + if (builtInId == BuiltInObjectToWorldKHR) { + getMatrixFunc = + m_context->getPipelineContext()->getRayTracingFunctionName(Vkgc::RT_ENTRY_OBJECT_TO_WORLD_TRANSFORM); + } else { + getMatrixFunc = + m_context->getPipelineContext()->getRayTracingFunctionName(Vkgc::RT_ENTRY_WORLD_TO_OBJECT_TRANSFORM); + } + + Value *matrixRow[4] = { + PoisonValue::get(floatx3Ty), + PoisonValue::get(floatx3Ty), + PoisonValue::get(floatx3Ty), + PoisonValue::get(floatx3Ty), + }; + + for (unsigned i = 0; i < 3; ++i) { + Value *row = m_builder->getInt32(i); + for (unsigned j = 0; j < 4; ++j) { + Value *col = m_builder->getInt32(j); + + Value *colPtr = m_builder->CreateAllocaAtFuncEntry(m_builder->getInt32Ty()); + Value *rowPtr = m_builder->CreateAllocaAtFuncEntry(m_builder->getInt32Ty()); + m_builder->CreateStore(col, colPtr); + m_builder->CreateStore(row, rowPtr); + + auto cmiMatrixResult = m_crossModuleInliner.value().inlineCall(*m_builder, getGpurtFunction(getMatrixFunc), + {instandeNodeAddrPtr, rowPtr, colPtr}); + matrixRow[j] = m_builder->CreateInsertElement(matrixRow[j], cmiMatrixResult.returnValue, uint64_t(i)); + } + } + + Value *matrix = PoisonValue::get(matrixTy); + matrix = m_builder->CreateInsertValue(matrix, matrixRow[0], 0); + matrix = m_builder->CreateInsertValue(matrix, matrixRow[1], 1); + matrix = m_builder->CreateInsertValue(matrix, matrixRow[2], 2); + matrix = m_builder->CreateInsertValue(matrix, matrixRow[3], 3); + return matrix; +} + +// ===================================================================================================================== +// Looks up an exported function in the GPURT module +Function *SpirvLowerRayTracing::getGpurtFunction(StringRef name) { + auto &gpurtContext = lgc::GpurtContext::get(*m_context); + Function *fn = gpurtContext.theModule->getFunction(name); + assert(fn); + return fn; +} + +// ===================================================================================================================== +// Create instructions to load instance index/id given the 64-bit instance node address at the current insert point +// Note: HLSL has just the opposite naming of index/ID compares to SPIR-V. +// So "isIndex = true" means we use InstanceId(InstanceIndex for GPURT) for vulkan, +// and "isIndex = false" means we use InstanceIndex(InstanceId for GPURT) for vulkan, +// @param instNodeAddr : 64-bit instance node address, in <2 x i32> +Value *SpirvLowerRayTracing::createLoadInstanceIndexOrId(Value *instNodeAddr, bool isIndex) { + Value *instanceIdPtr = m_builder->CreateAllocaAtFuncEntry(m_builder->getInt64Ty()); + m_builder->CreateStore(instNodeAddr, instanceIdPtr); + + StringRef getterName = isIndex + ? m_context->getPipelineContext()->getRayTracingFunctionName(Vkgc::RT_ENTRY_INSTANCE_INDEX) + : m_context->getPipelineContext()->getRayTracingFunctionName(Vkgc::RT_ENTRY_INSTANCE_ID); + + auto cmiResult = m_crossModuleInliner.value().inlineCall(*m_builder, getGpurtFunction(getterName), {instanceIdPtr}); + + return cmiResult.returnValue; +} + } // namespace Llpc diff --git a/llpc/lower/llpcSpirvLowerRayTracing.h b/llpc/lower/llpcSpirvLowerRayTracing.h index d5c90b4744..6bc3539098 100644 --- a/llpc/lower/llpcSpirvLowerRayTracing.h +++ b/llpc/lower/llpcSpirvLowerRayTracing.h @@ -30,8 +30,11 @@ */ #pragma once -#include "llpcSpirvLowerRayQuery.h" +#include "SPIRVInternal.h" +#include "compilerutils/CompilerUtils.h" +#include "llpcSpirvLower.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/IR/PassManager.h" #include namespace lgc::rt { @@ -173,9 +176,22 @@ enum RayHitStatus : unsigned { constexpr unsigned SqttWellKnownTypeFunctionCallCompact = 0x11; constexpr unsigned SqttWellKnownTypeFunctionReturn = 0x10; +// Corresponds to gl_RayFlags* in GLSL_EXT_ray_tracing.txt +enum RayFlag : unsigned { + None = 0x0000, // gl_RayFlagsNoneEXT + ForceOpaque = 0x0001, // gl_RayFlagsOpaqueEXT + ForceNonOpaque = 0x0002, // gl_RayFlagsNoOpaqueEXT + AcceptFirstHitAndEndSearch = 0x0004, // gl_RayFlagsTerminateOnFirstHitEXT + SkipClosestHitShader = 0x0008, // gl_RayFlagsSkipClosestHitShaderEXT + CullBackFacingTriangles = 0x0010, // gl_RayFlagsCullBackFacingTrianglesEXT + CullFrontFacingTriangles = 0x0020, // gl_RayFlagsCullFrontFacingTrianglesEXT + CullOpaque = 0x0040, // gl_RayFlagsCullOpaqueEXT + CullNonOpaque = 0x0080, // gl_RayFlagsCullNoOpaqueEXT +}; + // ===================================================================================================================== // Represents the pass of SPIR-V lowering ray tracing. -class SpirvLowerRayTracing : public SpirvLowerRayQuery { +class SpirvLowerRayTracing : public SpirvLower, public llvm::PassInfoMixin { public: SpirvLowerRayTracing(); llvm::PreservedAnalyses run(llvm::Module &module, llvm::ModuleAnalysisManager &analysisManager); @@ -183,8 +199,13 @@ class SpirvLowerRayTracing : public SpirvLowerRayQuery { static llvm::StringRef name() { return "Lower SPIR-V RayTracing operations"; } private: + void eraseFunctionBlocks(llvm::Function *func); + llvm::Value *createLoadInstanceIndexOrId(Value *instNodeAddr, bool isIndex); + llvm::Value *createLoadMatrixFromFunc(llvm::Value *matrixAddr, unsigned builtInId); + llvm::Function *getGpurtFunction(llvm::StringRef name); void createTraceParams(llvm::Function *func); void createRayGenEntryFunc(); + unsigned generateTraceRayStaticId(); void processShaderRecordBuffer(llvm::GlobalVariable *global, llvm::Value *bufferDesc, llvm::Value *tableIndex, llvm::Instruction *insertPos); llvm::CallInst *createTraceRay(); @@ -279,6 +300,8 @@ class SpirvLowerRayTracing : public SpirvLowerRayQuery { llvm::Value *createLoadInstNodeAddr(); lgc::rt::RayTracingShaderStage mapStageToLgcRtShaderStage(ShaderStage stage); + std::optional m_crossModuleInliner; + unsigned m_spirvOpMetaKindId; // Metadata kind ID for "spirv.op" llvm::Value *m_traceParams[TraceParam::Count]; // Trace ray set parameters llvm::StringRef m_traceParamNames[TraceParam::Count]; @@ -292,6 +315,7 @@ class SpirvLowerRayTracing : public SpirvLowerRayQuery { llvm::Value *m_shaderRecordIndex = nullptr; // Variable sourced from entry function argument llvm::Instruction *m_insertPosPastInit = nullptr; // Insert position after initialization instructions (storing trace // parameters, payload, callable data, etc.) + unsigned m_nextTraceRayId; // Next trace ray ID to be used for ray history }; } // namespace Llpc diff --git a/llpc/lower/llpcSpirvLowerTranslator.cpp b/llpc/lower/llpcSpirvLowerTranslator.cpp index 9c61f90cd4..bcb8dc25d2 100644 --- a/llpc/lower/llpcSpirvLowerTranslator.cpp +++ b/llpc/lower/llpcSpirvLowerTranslator.cpp @@ -120,38 +120,4 @@ void SpirvLowerTranslator::translateSpirvToLlvm(const PipelineShaderInfo *shader } ShaderModuleHelper::cleanOptimizedSpirv(&optimizedSpirvBin); - - // NOTE: Our shader entrypoint is marked in the SPIR-V reader as dllexport. Here we tell LGC that it is the - // shader entry-point, and mark other functions as internal and always_inline. - // - // TODO: We should rationalize this code as follows: - // 1. Add code to the spir-v reader to add the entrypoint name as metadata; - // 2. change this code here to detect that, instead of DLLExport; - // 3. remove the code we added to the spir-v reader to detect the required entrypoint and mark it as DLLExport; - // 4. remove the required entrypoint name and execution model args that we added to the spir-v reader API, to - // make it closer to the upstream Khronos copy of that code. - for (auto &func : *module) { - if (func.empty()) - continue; - - if (func.getDLLStorageClass() == GlobalValue::DLLExportStorageClass) { - // A ray-tracing shader stage does not count as an LGC shader stage, as they are all linked into - // a compute shader or compute library. For those, remove the dllexport, and leave as external. - if (entryStage > ShaderStageCompute) { - func.setDLLStorageClass(GlobalValue::DefaultStorageClass); - func.setLinkage(GlobalValue::ExternalLinkage); - if (auto rtStage = getLgcRtShaderStage(entryStage)) - lgc::rt::setLgcRtShaderStage(&func, rtStage); - continue; - } - - lgc::Pipeline::markShaderEntryPoint(&func, getLgcShaderStage(entryStage)); - continue; - } - // Not shader entry-point. - func.setLinkage(GlobalValue::InternalLinkage); - if (func.hasFnAttribute(Attribute::NoInline)) - func.removeFnAttr(Attribute::NoInline); - func.addFnAttr(Attribute::AlwaysInline); - } } diff --git a/llpc/lower/llpcSpirvLowerUtil.cpp b/llpc/lower/llpcSpirvLowerUtil.cpp index 6a7f860f5b..037e85b76d 100644 --- a/llpc/lower/llpcSpirvLowerUtil.cpp +++ b/llpc/lower/llpcSpirvLowerUtil.cpp @@ -74,7 +74,7 @@ ShaderStage getShaderStageFromFunction(Function *function) { if (!execModelNode) return ShaderStageInvalid; - auto execModel = mdconst::dyn_extract(execModelNode->getOperand(0))->getZExtValue(); + auto execModel = mdconst::extract(execModelNode->getOperand(0))->getZExtValue(); return convertToShaderStage(execModel); } diff --git a/llpc/lower/llpcSpirvProcessGpuRtLibrary.cpp b/llpc/lower/llpcSpirvProcessGpuRtLibrary.cpp index 817db5929c..974861d023 100644 --- a/llpc/lower/llpcSpirvProcessGpuRtLibrary.cpp +++ b/llpc/lower/llpcSpirvProcessGpuRtLibrary.cpp @@ -30,13 +30,14 @@ */ #include "llpcSpirvProcessGpuRtLibrary.h" #include "SPIRVInternal.h" +#include "compilerutils/ArgPromotion.h" #include "compilerutils/CompilerUtils.h" +#include "compilerutils/TypesMetadata.h" #include "llpcContext.h" #include "llpcRayTracingContext.h" #include "llpcSpirvLowerInternalLibraryIntrinsicUtil.h" #include "llpcSpirvLowerUtil.h" #include "llvmraytracing/Continuations.h" -#include "llvmraytracing/ContinuationsUtil.h" #include "lgc/Builder.h" #include "lgc/GpurtDialect.h" #include "lgc/LgcContext.h" @@ -44,6 +45,7 @@ #include "lgc/LgcRtDialect.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/IR/IntrinsicsAMDGPU.h" +#include "llvm/IR/ValueSymbolTable.h" #define DEBUG_TYPE "llpc-spirv-lower-gpurt-library" using namespace lgc; @@ -62,11 +64,102 @@ SpirvProcessGpuRtLibrary::SpirvProcessGpuRtLibrary() { PreservedAnalyses SpirvProcessGpuRtLibrary::run(Module &module, ModuleAnalysisManager &analysisManager) { LLVM_DEBUG(dbgs() << "Run the pass Spirv-Lower-gpurt\n"); SpirvLower::init(&module); - for (auto funcIt = module.begin(), funcEnd = module.end(); funcIt != funcEnd;) { - Function *func = &*funcIt++; - processLibraryFunction(func); + + // Process each function. + SmallVector> argPromotionsFuncs; + auto rtipVersion = m_context->getPipelineContext()->getRayTracingState()->rtIpVersion; + unsigned rtip = rtipVersion.major * 10 + rtipVersion.minor; + SmallVector maybeRtFuncs; + for (Function &func : module) { + if (func.isDeclaration() || !func.hasName()) + continue; + // We have a function definition that was not left anonymous by being overridden by an earlier + // RTIP-suffixed version of the same function. + + // For rayQuery functions, we detect which ones we want to keep, and we select the correct RTIP variant. + // TODO: Use the same scheme for ray-tracing functions so we no longer need the GPURT-provided function + // name table that the driver passes in to the compiler. + // Detect a rayQuery function. If it needs pointer args promoting, set a bit vector for that. + StringRef funcName = func.getName(); + SmallBitVector argPromotions(/*size=*/8); + bool isRqFunc = false; + if (funcName.starts_with("TraceRayInline")) + argPromotions.set(1, 8); + else if (funcName.starts_with("RayQueryProceed")) + argPromotions.set(1, 3); + else if (funcName.starts_with("FetchTrianglePositionFromRayQuery")) + argPromotions.set(1); + else { + StringRef rqFuncName = funcName; + isRqFunc = rqFuncName.consume_front("_RayQuery_"); + if (isRqFunc && rqFuncName.starts_with("CommitProceduralPrimitiveHit")) + argPromotions.set(1); + } + isRqFunc |= argPromotions.any(); + + if (!isRqFunc) { + // This is not a rayQuery function. Add to the list for processing after this loop. + maybeRtFuncs.push_back(&func); + continue; + } + + // This is a rayQuery function, and we have the args requiring promotion in the argPromotions bit vector. + // Parse off the RTIP suffix if any, e.g. "2_0", into a two-digit decimal number, e.g. 20. + // Ignore BVH8 funcs. + if (funcName.ends_with("BVH8")) + continue; + StringRef funcSuffix = funcName.take_back(3); + unsigned funcRtip = 0; + if (funcSuffix.size() == 3 && isdigit(funcSuffix[0]) && funcSuffix[1] == '_' && isdigit(funcSuffix[2])) { + funcRtip = (funcSuffix[0] - '0') * 10 + (funcSuffix[2] - '0'); + funcName = funcName.drop_back(funcSuffix.size()); + } + // If this function has an RTIP suffix but it is wrong, ignore it (leaving it as internal linkage so it gets + // removed later). + if (funcRtip != 0 && funcRtip != rtip) + continue; + + if (funcRtip != 0) { + // We have a function with the correct RTIP suffix. We want to rename it without the RTIP suffix. + // If there is another function of the same name without the RTIP suffix, take its name and make the + // other function internal so it gets removed later. (This works whether we saw that function first or + // this RTIP-suffixed one.) + if (Function *otherFunc = module.getFunction(funcName)) { + otherFunc->setLinkage(GlobalValue::InternalLinkage); + func.takeName(otherFunc); + } else { + // No other function. Set name the normal way. Note use of str() to copy the unsuffixed name out + // before setName() frees it. + func.setName(funcName.str()); + } + } + // Set external linkage on this function. + func.setLinkage(GlobalValue::WeakAnyLinkage); + + if (argPromotions.any()) { + // Add this function to the list that need arg promotion. + // We don't do the arg promotion here as it invalidates the module iterator. + // Also, we might end up not needing to do it for a non-RTIP-suffixed function that gets overridden + // by an RTIP-suffixed function later in the loop. + argPromotionsFuncs.push_back({&func, argPromotions}); + } + } + + // Promote args on functions as required. Skip overridden non-RTIP-suffixed ones that have gone back to + // being internal linkage. + for (const std::pair &argPromotionsFunc : argPromotionsFuncs) { + Function *func = argPromotionsFunc.first; + if (func->getLinkage() == GlobalValue::InternalLinkage) + continue; + Function *promotedFunc = CompilerUtils::promotePointerArguments(func, argPromotionsFunc.second); + promotedFunc->setLinkage(GlobalValue::WeakAnyLinkage); } + // Process ray-tracing (i.e. non-rayQuery) functions in a separate loop; processLibraryFunction() may do + // arg promotion, so we cannot do it in the same loop. + for (Function *func : maybeRtFuncs) + processLibraryFunction(func); + return PreservedAnalyses::none(); } @@ -139,64 +232,6 @@ SpirvProcessGpuRtLibrary::LibraryFunctionTable::LibraryFunctionTable() { void SpirvProcessGpuRtLibrary::processLibraryFunction(Function *&func) { auto funcName = func->getName(); - StringRef traceRayFuncName = m_context->getPipelineContext()->getRayTracingFunctionName(Vkgc::RT_ENTRY_TRACE_RAY); - - const StringRef rayQueryInitializeFuncName = - m_context->getPipelineContext()->getRayTracingFunctionName(Vkgc::RT_ENTRY_TRACE_RAY_INLINE); - const StringRef rayQueryProceedFuncName = - m_context->getPipelineContext()->getRayTracingFunctionName(Vkgc::RT_ENTRY_RAY_QUERY_PROCEED); - - const StringRef fetchTrianglePositionFromNodePointerFuncName = - m_context->getPipelineContext()->getRayTracingFunctionName(Vkgc::RT_ENTRY_FETCH_HIT_TRIANGLE_FROM_NODE_POINTER); - const StringRef fetchTrianglePositionFromRayQueryFuncName = - m_context->getPipelineContext()->getRayTracingFunctionName(Vkgc::RT_ENTRY_FETCH_HIT_TRIANGLE_FROM_RAY_QUERY); - - const StringRef getInstanceIndex = - m_context->getPipelineContext()->getRayTracingFunctionName(Vkgc::RT_ENTRY_INSTANCE_INDEX); - - const StringRef getInstanceId = - m_context->getPipelineContext()->getRayTracingFunctionName(Vkgc::RT_ENTRY_INSTANCE_ID); - - const StringRef getInstanceNodeAddr = - m_context->getPipelineContext()->getRayTracingFunctionName(Vkgc::RT_ENTRY_GET_INSTANCE_NODE); - - const StringRef getObjToWorldTrans = - m_context->getPipelineContext()->getRayTracingFunctionName(Vkgc::RT_ENTRY_OBJECT_TO_WORLD_TRANSFORM); - - const StringRef getWorldToObjTrans = - m_context->getPipelineContext()->getRayTracingFunctionName(Vkgc::RT_ENTRY_WORLD_TO_OBJECT_TRANSFORM); - - assert(!traceRayFuncName.empty()); - assert(!rayQueryInitializeFuncName.empty()); - assert(!rayQueryProceedFuncName.empty()); - assert(!fetchTrianglePositionFromNodePointerFuncName.empty()); - assert(!fetchTrianglePositionFromRayQueryFuncName.empty()); - assert(!getInstanceIndex.empty()); - assert(!getInstanceId.empty()); - assert(!getInstanceNodeAddr.empty()); - assert(!getObjToWorldTrans.empty()); - assert(!getWorldToObjTrans.empty()); - - // Set external linkage for library entry functions - if (funcName.starts_with(traceRayFuncName) || funcName.starts_with(rayQueryInitializeFuncName) || - funcName.starts_with(rayQueryProceedFuncName) || - funcName.starts_with(fetchTrianglePositionFromNodePointerFuncName) || - funcName.starts_with(fetchTrianglePositionFromRayQueryFuncName) || funcName.starts_with(getInstanceIndex) || - funcName.starts_with(getInstanceId) || funcName.starts_with(getInstanceNodeAddr) || - funcName.starts_with(getObjToWorldTrans) || funcName.starts_with(getWorldToObjTrans)) { - func->setLinkage(GlobalValue::WeakAnyLinkage); - return; - } - - // Drop dummy entry function. - static const char *LibraryEntryFuncName = "libraryEntry"; - if (funcName.starts_with(LibraryEntryFuncName)) { - func->dropAllReferences(); - func->eraseFromParent(); - func = nullptr; - return; - } - // Special handling for _AmdContStackStore* and _AmdContStackLoad* to accept arbitrary type if (funcName.starts_with("_AmdContStackStore")) { m_builder->SetInsertPoint(clearBlock(func)); @@ -229,6 +264,19 @@ void SpirvProcessGpuRtLibrary::processLibraryFunction(Function *&func) { } ContHelper::handleGetSetting(*func, contSettings); return; + } else if (funcName.starts_with("_AmdValueI32Count")) { + ContHelper::handleValueI32Count(*func, *m_builder); + return; + } else if (funcName.starts_with("_AmdValueGetI32") || funcName.starts_with("_AmdValueSetI32")) { + // The intrinsic handling require first argument to be a pointer, the rest to be values. + SmallBitVector promotionMask(func->arg_size(), true); + promotionMask.reset(0); + auto newFunc = CompilerUtils::promotePointerArguments(func, promotionMask); + if (funcName.starts_with("_AmdValueGetI32")) + ContHelper::handleValueGetI32(*newFunc, *m_builder); + else + ContHelper::handleValueSetI32(*newFunc, *m_builder); + return; } // Create implementation for intrinsic functions. @@ -256,16 +304,18 @@ void SpirvProcessGpuRtLibrary::processLibraryFunction(Function *&func) { // automatically. bool isAmdIntrinsic = funcName.starts_with("_Amd") && !funcName.contains("."); if (funcName.starts_with("_cont_") || isAmdIntrinsic) { - func->setLinkage(GlobalValue::WeakAnyLinkage); + // This function is provided by GPURT to the compiler. + if (!isAmdIntrinsic) + func->setLinkage(GlobalValue::WeakAnyLinkage); // Skip _AmdAwaitTraversal function resulting from calls to _AmdWaitAwaitTraversal. - if (!func->hasMetadata(ContHelper::MDTypesName) && !func->arg_empty()) + if (!func->hasMetadata(TypedFuncTy::MDTypesName) && !func->arg_empty()) return; SmallBitVector promotionMask(func->arg_size()); for (unsigned argNo = 0; argNo < func->arg_size(); argNo++) { auto *arg = func->getArg(argNo); - ContArgTy argTy = ContArgTy::get(func, arg); + TypedArgTy argTy = TypedArgTy::get(arg); auto funcName = func->getName(); if (!argTy.isPointerTy()) @@ -279,10 +329,10 @@ void SpirvProcessGpuRtLibrary::processLibraryFunction(Function *&func) { promotionMask.set(argNo); } - auto *newFunc = promotePointerArguments(func, promotionMask); + auto *newFunc = CompilerUtils::promotePointerArguments(func, promotionMask); - // Delete function body of _Amd* intrinsics that survive here, they will be handled in LowerRaytracingPipeline. - if (funcName.starts_with("_Amd")) + // This function is provided by the compiler to GPURT. It will be substituted by LowerRaytracingPipeline. + if (isAmdIntrinsic) newFunc->deleteBody(); if (newFunc->getName().starts_with("_AmdWaitAwait")) { @@ -831,9 +881,7 @@ void SpirvProcessGpuRtLibrary::createGetStaticId(llvm::Function *func) { // // @param func : The function to create void SpirvProcessGpuRtLibrary::createGetKnownSetRayFlags(llvm::Function *func) { - // TODO: currently return 0 to indicate that there is no known set - // We will probably need to analyse the traceRay ray flags for actual value - m_builder->CreateRet(m_builder->getInt32(0)); + m_builder->CreateRet(m_builder->create()); } // ===================================================================================================================== @@ -841,9 +889,7 @@ void SpirvProcessGpuRtLibrary::createGetKnownSetRayFlags(llvm::Function *func) { // // @param func : The function to create void SpirvProcessGpuRtLibrary::createGetKnownUnsetRayFlags(llvm::Function *func) { - // TODO: return 0 to indicate there is no knownUnset bits - // We will probably need to analyse the traceRay ray flags for actual value - m_builder->CreateRet(m_builder->getInt32(0)); + m_builder->CreateRet(m_builder->create()); } // ===================================================================================================================== diff --git a/llpc/test/shaderdb/core/FMA_TestOperandIsZero.spvasm b/llpc/test/shaderdb/core/FMA_TestOperandIsZero.spvasm index cb10e2f9e4..3d20b207b5 100644 --- a/llpc/test/shaderdb/core/FMA_TestOperandIsZero.spvasm +++ b/llpc/test/shaderdb/core/FMA_TestOperandIsZero.spvasm @@ -1,12 +1,11 @@ ; Test on fma((b==0.0 ? 0.0 : a), (a==0.0 ? 0.0 : b), c) ; BEGIN_SHADERTEST -; RUN: amdllpc --disable-fma=false -v -gfxip=11 %s | FileCheck -check-prefix=SHADERTEST %s -; SHADERTEST-LABEL: {{^// LLPC}} pipeline before-patching results -; SHADERTEST: call ninf float @llvm.amdgcn.fma.legacy(float %{{.*}}, float %{{.*}}, float %{{.*}}) -; SHADERTEST: call ninf float @llvm.amdgcn.fma.legacy(float %{{.*}}, float %{{.*}}, float %{{.*}}) -; SHADERTEST: call ninf float @llvm.amdgcn.fma.legacy(float %{{.*}}, float %{{.*}}, float %{{.*}}) -; SHADERTEST: AMDLLPC SUCCESS +; RUN: amdllpc --disable-fma=false -stop-after=lgc-patch-mul-dx9-zero %gfxip -o - %s | FileCheck -check-prefix=SHADERTEST %s +; SHADERTEST-LABEL: @lgc.shader.FS.main( +; SHADERTEST: call nnan float @llvm.amdgcn.fma.legacy(float {{.*}}, float {{.*}}, float {{.*}}) +; SHADERTEST: call nnan float @llvm.amdgcn.fma.legacy(float {{.*}}, float {{.*}}, float {{.*}}) +; SHADERTEST: call nnan float @llvm.amdgcn.fma.legacy(float {{.*}}, float {{.*}}, float {{.*}}) ; END_SHADERTEST ; SPIR-V @@ -31,9 +30,9 @@ OpDecorate %b Location 1 OpDecorate %a Location 0 OpDecorate %fragColor Location 0 - OpDecorate %22 FPFastMathMode NotInf - OpDecorate %32 FPFastMathMode NotInf - OpDecorate %38 FPFastMathMode NotInf + OpDecorate %22 FPFastMathMode NotNaN + OpDecorate %32 FPFastMathMode NotNaN + OpDecorate %38 FPFastMathMode NotNaN %void = OpTypeVoid %3 = OpTypeFunction %void %float = OpTypeFloat 32 @@ -72,7 +71,8 @@ %37 = OpSelect %float %35 %float_0 %36 %38 = OpExtInst %float %1 Fma %34 %37 %23 %39 = OpFAdd %float %38 %22 - OpStore %d %39 + %40 = OpFAdd %float %39 %33 + OpStore %d %40 %26 = OpLoad %float %d %27 = OpCompositeConstruct %v4float %26 %26 %26 %26 OpStore %fragColor %27 diff --git a/llpc/test/shaderdb/core/ObjNonUniform_TestTexutreLoadStoreInt64.spvasm b/llpc/test/shaderdb/core/ObjNonUniform_TestTexutreLoadStoreInt64.spvasm index 7eaa31a17b..014c4f991f 100644 --- a/llpc/test/shaderdb/core/ObjNonUniform_TestTexutreLoadStoreInt64.spvasm +++ b/llpc/test/shaderdb/core/ObjNonUniform_TestTexutreLoadStoreInt64.spvasm @@ -88,60 +88,44 @@ OpFunctionEnd ; SHADERTEST-LABEL: @main( ; SHADERTEST-NEXT: .entry: -; SHADERTEST-NEXT: [[TMP0:%.*]] = alloca { [3 x ptr addrspace(4)], { ptr addrspace(4), i32 } }, align 8, addrspace(5) +; SHADERTEST-NEXT: [[TMP0:%.*]] = alloca { ptr addrspace(4), i32, i32, ptr addrspace(4), i32, i32 }, align 8, addrspace(5) ; SHADERTEST-NEXT: [[_12:%.*]] = alloca i64, align 8, addrspace(5) ; SHADERTEST-NEXT: [[TMP1:%.*]] = call ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 7) ; SHADERTEST-NEXT: [[TMP2:%.*]] = call i32 (...) @lgc.create.get.desc.stride.i32(i32 1, i32 1, i64 0, i32 7) -; SHADERTEST-NEXT: [[TMP3:%.*]] = insertvalue { ptr addrspace(4), i32, i32, i32 } poison, ptr addrspace(4) [[TMP1]], 0 -; SHADERTEST-NEXT: [[TMP4:%.*]] = insertvalue { ptr addrspace(4), i32, i32, i32 } [[TMP3]], i32 [[TMP2]], 1 -; SHADERTEST-NEXT: [[TMP5:%.*]] = insertvalue { ptr addrspace(4), i32, i32, i32 } [[TMP4]], i32 32, 2 -; SHADERTEST-NEXT: [[TMP6:%.*]] = insertvalue { ptr addrspace(4), i32, i32, i32 } [[TMP5]], i32 1, 3 -; SHADERTEST-NEXT: [[TMP7:%.*]] = call ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4(i32 2, i32 2, i64 0, i32 7) -; SHADERTEST-NEXT: [[TMP8:%.*]] = insertvalue { ptr addrspace(4), i32, i32 } zeroinitializer, ptr addrspace(4) [[TMP7]], 0 -; SHADERTEST-NEXT: [[TMP9:%.*]] = call i32 (...) @lgc.create.get.desc.stride.i32(i32 2, i32 2, i64 0, i32 7) -; SHADERTEST-NEXT: [[TMP10:%.*]] = insertvalue { ptr addrspace(4), i32, i32 } [[TMP8]], i32 [[TMP9]], 1 -; SHADERTEST-NEXT: [[TMP11:%.*]] = insertvalue { { ptr addrspace(4), i32, i32, i32 }, { ptr addrspace(4), i32, i32 } } poison, { ptr addrspace(4), i32, i32, i32 } [[TMP6]], 0 -; SHADERTEST-NEXT: [[_11:%.*]] = insertvalue { { ptr addrspace(4), i32, i32, i32 }, { ptr addrspace(4), i32, i32 } } [[TMP11]], { ptr addrspace(4), i32, i32 } [[TMP10]], 1 -; SHADERTEST-NEXT: [[TMP12:%.*]] = load i64, ptr addrspace(64) @_4, align 8 -; SHADERTEST-NEXT: store i64 [[TMP12]], ptr addrspace(5) [[_12]], align 8 -; SHADERTEST-NEXT: [[TMP13:%.*]] = load i64, ptr addrspace(5) [[_12]], align 8 -; SHADERTEST-NEXT: call void @spirv.NonUniform.i64(i64 [[TMP13]]) -; SHADERTEST-NEXT: [[TMP14:%.*]] = trunc i64 [[TMP13]] to i32 -; SHADERTEST-NEXT: [[TMP15:%.*]] = getelementptr [4294967295 x i8], ptr null, i32 0, i32 [[TMP14]] -; SHADERTEST-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[TMP15]] to i32 -; SHADERTEST-NEXT: [[TMP17:%.*]] = extractvalue { { ptr addrspace(4), i32, i32, i32 }, { ptr addrspace(4), i32, i32 } } [[_11]], 0 -; SHADERTEST-NEXT: [[TMP18:%.*]] = extractvalue { { ptr addrspace(4), i32, i32, i32 }, { ptr addrspace(4), i32, i32 } } [[_11]], 1 -; SHADERTEST-NEXT: [[TMP19:%.*]] = extractvalue { ptr addrspace(4), i32, i32, i32 } [[TMP17]], 0 -; SHADERTEST-NEXT: [[TMP20:%.*]] = extractvalue { ptr addrspace(4), i32, i32, i32 } [[TMP17]], 1 -; SHADERTEST-NEXT: [[TMP21:%.*]] = mul i32 [[TMP16]], [[TMP20]] -; SHADERTEST-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP19]], i32 [[TMP21]] -; SHADERTEST-NEXT: [[TMP23:%.*]] = insertvalue { ptr addrspace(4), i32, i32, i32 } [[TMP17]], ptr addrspace(4) [[TMP22]], 0 -; SHADERTEST-NEXT: [[TMP24:%.*]] = extractvalue { ptr addrspace(4), i32, i32 } [[TMP18]], 0 -; SHADERTEST-NEXT: [[TMP25:%.*]] = extractvalue { ptr addrspace(4), i32, i32 } [[TMP18]], 1 -; SHADERTEST-NEXT: [[TMP26:%.*]] = mul i32 [[TMP16]], [[TMP25]] -; SHADERTEST-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP24]], i32 [[TMP26]] -; SHADERTEST-NEXT: [[TMP28:%.*]] = insertvalue { ptr addrspace(4), i32, i32 } [[TMP18]], ptr addrspace(4) [[TMP27]], 0 -; SHADERTEST-NEXT: [[TMP29:%.*]] = insertvalue { { ptr addrspace(4), i32, i32, i32 }, { ptr addrspace(4), i32, i32 } } poison, { ptr addrspace(4), i32, i32, i32 } [[TMP23]], 0 -; SHADERTEST-NEXT: [[TMP30:%.*]] = insertvalue { { ptr addrspace(4), i32, i32, i32 }, { ptr addrspace(4), i32, i32 } } [[TMP29]], { ptr addrspace(4), i32, i32 } [[TMP28]], 1 -; SHADERTEST-NEXT: call void @"spirv.NonUniform.s[s[p4,i32,i32,i32],s[p4,i32,i32]]"({ { ptr addrspace(4), i32, i32, i32 }, { ptr addrspace(4), i32, i32 } } [[TMP30]]) -; SHADERTEST-NEXT: [[TMP31:%.*]] = extractvalue { { ptr addrspace(4), i32, i32, i32 }, { ptr addrspace(4), i32, i32 } } [[TMP30]], 1 -; SHADERTEST-NEXT: [[TMP32:%.*]] = extractvalue { ptr addrspace(4), i32, i32 } [[TMP31]], 2 -; SHADERTEST-NEXT: [[TMP33:%.*]] = extractvalue { ptr addrspace(4), i32, i32 } [[TMP31]], 0 -; SHADERTEST-NEXT: [[TMP34:%.*]] = insertvalue { ptr addrspace(4), i32 } poison, ptr addrspace(4) [[TMP33]], 0 -; SHADERTEST-NEXT: [[TMP35:%.*]] = insertvalue { ptr addrspace(4), i32 } [[TMP34]], i32 [[TMP32]], 1 -; SHADERTEST-NEXT: [[TMP36:%.*]] = extractvalue { { ptr addrspace(4), i32, i32, i32 }, { ptr addrspace(4), i32, i32 } } [[TMP30]], 0 -; SHADERTEST-NEXT: [[TMP37:%.*]] = extractvalue { ptr addrspace(4), i32, i32, i32 } [[TMP36]], 0 -; SHADERTEST-NEXT: [[TMP38:%.*]] = insertvalue [3 x ptr addrspace(4)] poison, ptr addrspace(4) [[TMP37]], 0 -; SHADERTEST-NEXT: [[TMP39:%.*]] = insertvalue { [3 x ptr addrspace(4)], { ptr addrspace(4), i32 } } poison, [3 x ptr addrspace(4)] [[TMP38]], 0 -; SHADERTEST-NEXT: [[TMP40:%.*]] = insertvalue { [3 x ptr addrspace(4)], { ptr addrspace(4), i32 } } [[TMP39]], { ptr addrspace(4), i32 } [[TMP35]], 1 -; SHADERTEST-NEXT: call void @"spirv.NonUniform.s[a3p4,s[p4,i32]]"({ [3 x ptr addrspace(4)], { ptr addrspace(4), i32 } } [[TMP40]]) -; SHADERTEST-NEXT: store { [3 x ptr addrspace(4)], { ptr addrspace(4), i32 } } [[TMP40]], ptr addrspace(5) [[TMP0]], align 8 -; SHADERTEST-NEXT: [[TMP41:%.*]] = load { [3 x ptr addrspace(4)], { ptr addrspace(4), i32 } }, ptr addrspace(5) [[TMP0]], align 8 -; SHADERTEST-NEXT: [[TMP42:%.*]] = extractvalue { [3 x ptr addrspace(4)], { ptr addrspace(4), i32 } } [[TMP41]], 1 -; SHADERTEST-NEXT: [[TMP43:%.*]] = extractvalue { [3 x ptr addrspace(4)], { ptr addrspace(4), i32 } } [[TMP41]], 0 -; SHADERTEST-NEXT: [[TMP44:%.*]] = extractvalue [3 x ptr addrspace(4)] [[TMP43]], 0 -; SHADERTEST-NEXT: [[TMP45:%.*]] = extractvalue { ptr addrspace(4), i32 } [[TMP42]], 0 -; SHADERTEST-NEXT: [[TMP46:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 512, ptr addrspace(4) [[TMP44]], ptr addrspace(4) [[TMP45]], i32 1, <2 x float> zeroinitializer) -; SHADERTEST-NEXT: store <4 x float> [[TMP46]], ptr addrspace(65) @_3, align 16 +; SHADERTEST-NEXT: [[TMP3:%.*]] = insertvalue { ptr addrspace(4), i32, i32, ptr addrspace(4), i32, i32 } poison, ptr addrspace(4) [[TMP1]], 0 +; SHADERTEST-NEXT: [[TMP4:%.*]] = insertvalue { ptr addrspace(4), i32, i32, ptr addrspace(4), i32, i32 } [[TMP3]], i32 [[TMP2]], 1 +; SHADERTEST-NEXT: [[TMP5:%.*]] = insertvalue { ptr addrspace(4), i32, i32, ptr addrspace(4), i32, i32 } [[TMP4]], i32 0, 2 +; SHADERTEST-NEXT: [[TMP6:%.*]] = call ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4(i32 2, i32 2, i64 0, i32 7) +; SHADERTEST-NEXT: [[TMP7:%.*]] = call i32 (...) @lgc.create.get.desc.stride.i32(i32 2, i32 2, i64 0, i32 7) +; SHADERTEST-NEXT: [[TMP8:%.*]] = insertvalue { ptr addrspace(4), i32, i32, ptr addrspace(4), i32, i32 } [[TMP5]], ptr addrspace(4) [[TMP6]], 3 +; SHADERTEST-NEXT: [[TMP9:%.*]] = insertvalue { ptr addrspace(4), i32, i32, ptr addrspace(4), i32, i32 } [[TMP8]], i32 [[TMP7]], 4 +; SHADERTEST-NEXT: [[_11:%.*]] = insertvalue { ptr addrspace(4), i32, i32, ptr addrspace(4), i32, i32 } [[TMP9]], i32 0, 5 +; SHADERTEST-NEXT: [[TMP10:%.*]] = load i64, ptr addrspace(64) @_4, align 8 +; SHADERTEST-NEXT: store i64 [[TMP10]], ptr addrspace(5) [[_12]], align 8 +; SHADERTEST-NEXT: [[TMP11:%.*]] = load i64, ptr addrspace(5) [[_12]], align 8 +; SHADERTEST-NEXT: call void @spirv.NonUniform.i64(i64 [[TMP11]]) +; SHADERTEST-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP11]] to i32 +; SHADERTEST-NEXT: [[TMP13:%.*]] = getelementptr [4294967295 x i8], ptr null, i32 0, i32 [[TMP12]] +; SHADERTEST-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i32 +; SHADERTEST-NEXT: [[TMP15:%.*]] = extractvalue { ptr addrspace(4), i32, i32, ptr addrspace(4), i32, i32 } [[_11]], 0 +; SHADERTEST-NEXT: [[TMP16:%.*]] = extractvalue { ptr addrspace(4), i32, i32, ptr addrspace(4), i32, i32 } [[_11]], 1 +; SHADERTEST-NEXT: [[TMP17:%.*]] = mul i32 [[TMP14]], [[TMP16]] +; SHADERTEST-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP15]], i32 [[TMP17]] +; SHADERTEST-NEXT: [[TMP19:%.*]] = insertvalue { ptr addrspace(4), i32, i32, ptr addrspace(4), i32, i32 } [[_11]], ptr addrspace(4) [[TMP18]], 0 +; SHADERTEST-NEXT: [[TMP20:%.*]] = extractvalue { ptr addrspace(4), i32, i32, ptr addrspace(4), i32, i32 } [[TMP19]], 3 +; SHADERTEST-NEXT: [[TMP21:%.*]] = extractvalue { ptr addrspace(4), i32, i32, ptr addrspace(4), i32, i32 } [[TMP19]], 4 +; SHADERTEST-NEXT: [[TMP22:%.*]] = mul i32 [[TMP14]], [[TMP21]] +; SHADERTEST-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP20]], i32 [[TMP22]] +; SHADERTEST-NEXT: [[TMP24:%.*]] = insertvalue { ptr addrspace(4), i32, i32, ptr addrspace(4), i32, i32 } [[TMP19]], ptr addrspace(4) [[TMP23]], 3 +; SHADERTEST-NEXT: call void @"spirv.NonUniform.s[p4,i32,i32,p4,i32,i32]"({ ptr addrspace(4), i32, i32, ptr addrspace(4), i32, i32 } [[TMP24]]) +; SHADERTEST-NEXT: call void @"spirv.NonUniform.s[p4,i32,i32,p4,i32,i32]"({ ptr addrspace(4), i32, i32, ptr addrspace(4), i32, i32 } [[TMP24]]) +; SHADERTEST-NEXT: store { ptr addrspace(4), i32, i32, ptr addrspace(4), i32, i32 } [[TMP24]], ptr addrspace(5) [[TMP0]], align 8 +; SHADERTEST-NEXT: [[TMP25:%.*]] = load { ptr addrspace(4), i32, i32, ptr addrspace(4), i32, i32 }, ptr addrspace(5) [[TMP0]], align 8 +; SHADERTEST-NEXT: [[TMP26:%.*]] = extractvalue { ptr addrspace(4), i32, i32, ptr addrspace(4), i32, i32 } [[TMP25]], 0 +; SHADERTEST-NEXT: [[TMP27:%.*]] = extractvalue { ptr addrspace(4), i32, i32, ptr addrspace(4), i32, i32 } [[TMP25]], 2 +; SHADERTEST-NEXT: [[TMP28:%.*]] = extractvalue { ptr addrspace(4), i32, i32, ptr addrspace(4), i32, i32 } [[TMP25]], 3 +; SHADERTEST-NEXT: [[TMP29:%.*]] = extractvalue { ptr addrspace(4), i32, i32, ptr addrspace(4), i32, i32 } [[TMP25]], 5 +; SHADERTEST-NEXT: [[TMP30:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 512, ptr addrspace(4) [[TMP26]], ptr addrspace(4) [[TMP28]], i32 1, <2 x float> zeroinitializer) +; SHADERTEST-NEXT: store <4 x float> [[TMP30]], ptr addrspace(65) @_3, align 16 ; SHADERTEST-NEXT: ret void ; diff --git a/llpc/test/shaderdb/core/OpAtomicXXX_TestImageDimension_lit.comp b/llpc/test/shaderdb/core/OpAtomicXXX_TestImageDimension_lit.comp index 2d46dfd421..f68b4bb460 100644 --- a/llpc/test/shaderdb/core/OpAtomicXXX_TestImageDimension_lit.comp +++ b/llpc/test/shaderdb/core/OpAtomicXXX_TestImageDimension_lit.comp @@ -124,94 +124,94 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 0, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 1, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 2, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 9, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 3, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 10, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 4, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 5, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 8, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 6, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 7, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 4, i32 0, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 4, i32 1, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 4, i32 2, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 4, i32 9, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 4, i32 3, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 4, i32 10, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 4, i32 4, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 4, i32 5, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 4, i32 8, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 4, i32 6, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 4, i32 7, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 6, i32 0, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 6, i32 1, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 6, i32 2, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 6, i32 9, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 6, i32 3, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 6, i32 10, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 6, i32 4, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 6, i32 5, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 6, i32 8, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 6, i32 6, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 6, i32 7, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 0, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 1, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 2, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 9, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 3, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 10, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 4, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 5, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 8, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 6, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 7, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 0, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 1, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 2, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 9, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 3, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 10, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 4, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 5, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 8, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 6, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 7, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 0, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 1, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 2, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 9, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 3, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 10, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 4, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 5, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 8, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 6, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 7, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 0, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 1, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 2, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 9, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 3, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 10, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 4, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 5, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 8, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 6, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 7, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 0, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9, i32 3) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 1, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9, i32 3) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 2, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9, i32 3) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 9, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9, i32 3) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 3, i32 0, i32 0, ptr addrspace(4) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 10, i32 0, i32 0, ptr addrspace(4) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 4, i32 0, i32 0, ptr addrspace(4) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 5, i32 0, i32 0, ptr addrspace(4) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 8, i32 0, i32 0, ptr addrspace(4) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 6, i32 0, i32 0, ptr addrspace(4) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 7, i32 0, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 0, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 1, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 2, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 9, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 3, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 10, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 4, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 5, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 8, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 6, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 7, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 4, i32 0, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 4, i32 1, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 4, i32 2, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 4, i32 9, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 4, i32 3, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 4, i32 10, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 4, i32 4, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 4, i32 5, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 4, i32 8, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 4, i32 6, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 4, i32 7, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 6, i32 0, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 6, i32 1, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 6, i32 2, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 6, i32 9, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 6, i32 3, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 6, i32 10, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 6, i32 4, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 6, i32 5, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 6, i32 8, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 6, i32 6, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 6, i32 7, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 0, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 1, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 2, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 9, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 3, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 10, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 4, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 5, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 8, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 6, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 7, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 0, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 1, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 2, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 9, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 3, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 10, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 4, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 5, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 8, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 6, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 7, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 0, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 1, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 2, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 9, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 3, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 10, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 4, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 5, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 8, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 6, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 7, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 0, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 1, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 2, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 9, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 3, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 10, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 4, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 5, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 8, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 6, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 7, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 0, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9, i32 3) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 1, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9, i32 3) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 2, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9, i32 3) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 9, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9, i32 3) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 3, i32 512, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 10, i32 512, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 4, i32 512, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 5, i32 512, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 8, i32 512, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 6, i32 512, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 7, i32 512, i32 0, ptr addrspace(4) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST: call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i16(i32 9, i16 7, <8 x i32> %{{.*}}, i32 0, i32 0) diff --git a/llpc/test/shaderdb/core/OpAtomicXXX_TestImageMemoryQualifier_lit.comp b/llpc/test/shaderdb/core/OpAtomicXXX_TestImageMemoryQualifier_lit.comp index 3892925122..17d02659c0 100644 --- a/llpc/test/shaderdb/core/OpAtomicXXX_TestImageMemoryQualifier_lit.comp +++ b/llpc/test/shaderdb/core/OpAtomicXXX_TestImageMemoryQualifier_lit.comp @@ -16,9 +16,9 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 1, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 1, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 1, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 1, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 1, i32 513, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 1, i32 515, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST: call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i16(i32 9, i16 5, i16 5, <8 x i32> %{{.*}}, i32 0, i32 0) diff --git a/llpc/test/shaderdb/core/OpAtomicXXX_TestImage_lit.comp b/llpc/test/shaderdb/core/OpAtomicXXX_TestImage_lit.comp index c64f1c2cdb..90659fc955 100644 --- a/llpc/test/shaderdb/core/OpAtomicXXX_TestImage_lit.comp +++ b/llpc/test/shaderdb/core/OpAtomicXXX_TestImage_lit.comp @@ -46,23 +46,23 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 1, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 4, i32 1, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 6, i32 1, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 1, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 1, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 1, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 1, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 1, i32 0, i32 0, ptr addrspace(4) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 1, i32 0, i32 0, ptr addrspace(4) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 5, i32 1, i32 0, i32 0, ptr addrspace(4) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 7, i32 1, i32 0, i32 0, ptr addrspace(4) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 1, i32 0, i32 0, ptr addrspace(4) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 1, i32 0, i32 0, ptr addrspace(4) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 1, i32 0, i32 0, ptr addrspace(4) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 1, i32 0, i32 0, ptr addrspace(4) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 1, i32 0, i32 0, ptr addrspace(4) -; SHADERTEST: call reassoc nnan nsz arcp contract afn float (...) @lgc.create.image.atomic.f32(i32 0, i32 1, i32 0, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 1, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 4, i32 1, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 6, i32 1, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 1, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 1, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 1, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 1, i32 512, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 1, i32 512, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 1, i32 512, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 5, i32 1, i32 512, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 7, i32 1, i32 512, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 1, i32 512, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 1, i32 512, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 1, i32 512, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 1, i32 512, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 1, i32 512, i32 0, ptr addrspace(4) +; SHADERTEST: call reassoc nnan nsz arcp contract afn float (...) @lgc.create.image.atomic.f32(i32 0, i32 1, i32 512, i32 0, ptr addrspace(4) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST: call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i16(i32 9, i16 7, i16 7, <8 x i32> %{{.*}}, i32 0, i32 0) diff --git a/llpc/test/shaderdb/core/OpAtomicXXX_TestImage_lit.frag b/llpc/test/shaderdb/core/OpAtomicXXX_TestImage_lit.frag index 81b0a25cc1..e5eb6a602c 100644 --- a/llpc/test/shaderdb/core/OpAtomicXXX_TestImage_lit.frag +++ b/llpc/test/shaderdb/core/OpAtomicXXX_TestImage_lit.frag @@ -48,23 +48,23 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 0, i32 0, i32 0, ptr addrspace(4) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 4, i32 1, i32 0, i32 0, ptr addrspace(4) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 6, i32 1, i32 128, i32 0, ptr addrspace(4) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 6, i32 0, i32 0, ptr addrspace(4) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 0, i32 0, i32 0, ptr addrspace(4) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 0, i32 0, i32 0, ptr addrspace(4) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 0, i32 0, i32 0, ptr addrspace(4) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 0, i32 0, i32 0, ptr addrspace(4) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 3, i32 0, i32 0, ptr addrspace(4) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 5, i32 10, i32 0, i32 0, ptr addrspace(4) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 7, i32 10, i32 128, i32 0, ptr addrspace(4) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 7, i32 0, i32 0, ptr addrspace(4) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 3, i32 0, i32 0, ptr addrspace(4) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 3, i32 0, i32 0, ptr addrspace(4) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 3, i32 0, i32 0, ptr addrspace(4) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 3, i32 0, i32 0, ptr addrspace(4) -; SHADERTEST: call reassoc nnan nsz arcp contract afn float (...) @lgc.create.image.atomic.f32(i32 0, i32 9, i32 0, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 0, i32 512, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 4, i32 1, i32 512, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 6, i32 1, i32 640, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 6, i32 512, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 0, i32 512, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 0, i32 512, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 0, i32 512, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 0, i32 512, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 3, i32 512, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 5, i32 10, i32 512, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 7, i32 10, i32 640, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 7, i32 512, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 3, i32 512, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 3, i32 512, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 3, i32 512, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 3, i32 512, i32 0, ptr addrspace(4) +; SHADERTEST: call reassoc nnan nsz arcp contract afn float (...) @lgc.create.image.atomic.f32(i32 0, i32 9, i32 512, i32 0, ptr addrspace(4) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST: call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i16(i32 %{{.*}}, i16 1, <8 x i32> %{{.*}}, i32 0, i32 0) diff --git a/llpc/test/shaderdb/core/OpFMul_TestOperandIsZero.spvasm b/llpc/test/shaderdb/core/OpFMul_TestOperandIsZero.spvasm index fd43a195b7..385d139db3 100644 --- a/llpc/test/shaderdb/core/OpFMul_TestOperandIsZero.spvasm +++ b/llpc/test/shaderdb/core/OpFMul_TestOperandIsZero.spvasm @@ -1,12 +1,11 @@ ; Test on ((b==0.0 ? 0.0 : a) * (a==0.0 ? 0.0 : b)) ; BEGIN_SHADERTEST -; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s -; SHADERTEST-LABEL: {{^// LLPC}} pipeline before-patching results -; SHADERTEST: call ninf float @llvm.amdgcn.fmul.legacy(float %{{.*}}, float %{{.*}}) -; SHADERTEST: call ninf float @llvm.amdgcn.fmul.legacy(float %{{.*}}, float %{{.*}}) -; SHADERTEST: call ninf float @llvm.amdgcn.fmul.legacy(float %{{.*}}, float %{{.*}}) -; SHADERTEST: AMDLLPC SUCCESS +; RUN: amdllpc -stop-after=lgc-patch-mul-dx9-zero %gfxip -o - %s | FileCheck -check-prefix=SHADERTEST %s +; SHADERTEST-LABEL: @lgc.shader.FS.main( +; SHADERTEST: call nnan float @llvm.amdgcn.fmul.legacy(float {{.*}}, float {{.*}}) +; SHADERTEST: call nnan float @llvm.amdgcn.fmul.legacy(float {{.*}}, float {{.*}}) +; SHADERTEST: call nnan float @llvm.amdgcn.fmul.legacy(float {{.*}}, float {{.*}}) ; END_SHADERTEST ; SPIR-V @@ -29,9 +28,9 @@ OpDecorate %b Location 1 OpDecorate %a Location 0 OpDecorate %fragColor Location 0 - OpDecorate %22 FPFastMathMode NotInf - OpDecorate %32 FPFastMathMode NotInf - OpDecorate %38 FPFastMathMode NotInf + OpDecorate %22 FPFastMathMode NotNaN + OpDecorate %32 FPFastMathMode NotNaN + OpDecorate %38 FPFastMathMode NotNaN OpDecorate %33 NoContraction OpDecorate %39 NoContraction %void = OpTypeVoid diff --git a/llpc/test/shaderdb/core/OpFunctionCall_TestArguTexArray_lit.frag b/llpc/test/shaderdb/core/OpFunctionCall_TestArguTexArray_lit.frag index 0f0af83fe9..3eee27e308 100644 --- a/llpc/test/shaderdb/core/OpFunctionCall_TestArguTexArray_lit.frag +++ b/llpc/test/shaderdb/core/OpFunctionCall_TestArguTexArray_lit.frag @@ -26,7 +26,7 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-COUNT-2: call {{.*}} <4 x float> @{{.*}} -; SHADERTEST: define internal {{.*}} <4 x float> @{{.*}}({ { ptr addrspace(4), i32, i32, i32 }, { ptr addrspace(4), i32, i32 } } %{{[a-zA-Z0-9]+}}, ptr addrspace(5) %{{[a-z0-9]+}}) +; SHADERTEST: define internal {{.*}} <4 x float> @{{.*}}({ ptr addrspace(4), i32, i32, ptr addrspace(4), i32, i32 } %{{[a-zA-Z0-9]+}}, ptr addrspace(5) %{{[a-z0-9]+}}) ; SHADERTEST: AMDLLPC SUCCESS */ // END_SHADERTEST diff --git a/llpc/test/shaderdb/core/OpFunctionCall_TestParamSimpleTex_lit.frag b/llpc/test/shaderdb/core/OpFunctionCall_TestParamSimpleTex_lit.frag index 6635e4e464..5c4cb7e48c 100644 --- a/llpc/test/shaderdb/core/OpFunctionCall_TestParamSimpleTex_lit.frag +++ b/llpc/test/shaderdb/core/OpFunctionCall_TestParamSimpleTex_lit.frag @@ -20,7 +20,7 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-COUNT-2: call {{.*}} <4 x float> @{{.*}} -; SHADERTEST: define internal {{.*}}<4 x float> @{{.*}}({ { ptr addrspace(4), i32, i32, i32 }, { ptr addrspace(4), i32, i32 } } %{{[a-zA-Z0-9]+}}, ptr addrspace(5) %{{[a-z0-9]+}}) +; SHADERTEST: define internal {{.*}}<4 x float> @{{.*}}({ ptr addrspace(4), i32, i32, ptr addrspace(4), i32, i32 } %{{[a-zA-Z0-9]+}}, ptr addrspace(5) %{{[a-z0-9]+}}) ; SHADERTEST: AMDLLPC SUCCESS */ // END_SHADERTEST diff --git a/llpc/test/shaderdb/core/OpFunctionCall_TestParamTexArray_lit.frag b/llpc/test/shaderdb/core/OpFunctionCall_TestParamTexArray_lit.frag index 7160efaa12..bcec948e4a 100644 --- a/llpc/test/shaderdb/core/OpFunctionCall_TestParamTexArray_lit.frag +++ b/llpc/test/shaderdb/core/OpFunctionCall_TestParamTexArray_lit.frag @@ -27,7 +27,7 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-COUNT-2: call {{.*}} <4 x float> @{{.*}} -; SHADERTEST: define internal {{.*}}<4 x float> @{{.*}}({ { {{<8 x i32> addrspace\(4\)\*|ptr addrspace\(4\)}}, i32, i32, i32 }, { {{<4 x i32> addrspace\(4\)\*|ptr addrspace\(4\)}}, i32, i32 } } %{{[a-zA-Z0-9]+}}, {{<2 x float> addrspace\(5\)\*|ptr addrspace\(5\)}} %{{[a-z0-9]+}}) +; SHADERTEST: define internal {{.*}}<4 x float> @{{.*}}({ ptr addrspace(4), i32, i32, ptr addrspace(4), i32, i32 } %{{[a-zA-Z0-9]+}}, {{<2 x float> addrspace\(5\)\*|ptr addrspace\(5\)}} %{{[a-z0-9]+}}) ; SHADERTEST: AMDLLPC SUCCESS */ // END_SHADERTEST diff --git a/llpc/test/shaderdb/core/OpFunctionCall_TestParamTexNestedCall_lit.frag b/llpc/test/shaderdb/core/OpFunctionCall_TestParamTexNestedCall_lit.frag index 480ea0c85c..7498b414c9 100644 --- a/llpc/test/shaderdb/core/OpFunctionCall_TestParamTexNestedCall_lit.frag +++ b/llpc/test/shaderdb/core/OpFunctionCall_TestParamTexNestedCall_lit.frag @@ -32,7 +32,7 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-COUNT-2: call {{.*}} <4 x float> @{{.*}} -; SHADERTEST: define internal {{.*}}<4 x float> @{{.*}}({ { {{<8 x i32> addrspace\(4\)\*|ptr addrspace\(4\)}}, i32, i32, i32 }, { {{<4 x i32> addrspace\(4\)\*|ptr addrspace\(4\)}}, i32, i32 } } %{{[a-zA-Z0-9]+}}, {{<2 x float> addrspace\(5\)\*|ptr addrspace\(5\)}} %{{[a-z0-9]+}}) +; SHADERTEST: define internal {{.*}}<4 x float> @{{.*}}({ ptr addrspace(4), i32, i32, ptr addrspace(4), i32, i32 } %{{[a-zA-Z0-9]+}}, {{<2 x float> addrspace\(5\)\*|ptr addrspace\(5\)}} %{{[a-z0-9]+}}) ; SHADERTEST: AMDLLPC SUCCESS */ // END_SHADERTEST diff --git a/llpc/test/shaderdb/core/OpImageDrefGather_TestTextureGatherOffset_lit.frag b/llpc/test/shaderdb/core/OpImageDrefGather_TestTextureGatherOffset_lit.frag index 42c07e1bba..bceba33837 100644 --- a/llpc/test/shaderdb/core/OpImageDrefGather_TestTextureGatherOffset_lit.frag +++ b/llpc/test/shaderdb/core/OpImageDrefGather_TestTextureGatherOffset_lit.frag @@ -28,7 +28,7 @@ void main() ; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4(i32 2, i32 2, i64 0, i32 0) ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 1, i32 512, ptr addrspace(4) -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 5, i32 384, ptr addrspace(4) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 5, i32 896, ptr addrspace(4) ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 9, i32 512, ptr addrspace(4) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results diff --git a/llpc/test/shaderdb/core/OpImageDrefGather_TestTextureGatherOffsets_lit.frag b/llpc/test/shaderdb/core/OpImageDrefGather_TestTextureGatherOffsets_lit.frag index 8908a82ed0..a3b19e5012 100644 --- a/llpc/test/shaderdb/core/OpImageDrefGather_TestTextureGatherOffsets_lit.frag +++ b/llpc/test/shaderdb/core/OpImageDrefGather_TestTextureGatherOffsets_lit.frag @@ -34,7 +34,7 @@ void main() ; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 0, i32 0) ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 1, i32 512, {{.*}}, i32 801, <2 x float> , float 0.000000e+00, [4 x <2 x i32>] [<2 x i32> , <2 x i32> , <2 x i32> , <2 x i32> ], float 0x3FECCCCCC0000000) -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 5, i32 384, {{.*}}, i32 801, <3 x float> , float 0.000000e+00, [4 x <2 x i32>] [<2 x i32> , <2 x i32> , <2 x i32> , <2 x i32> ], float 0x3FE99999A0000000) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 5, i32 896, {{.*}}, i32 801, <3 x float> , float 0.000000e+00, [4 x <2 x i32>] [<2 x i32> , <2 x i32> , <2 x i32> , <2 x i32> ], float 0x3FE99999A0000000) ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 9, i32 512, {{.*}}, i32 801, <2 x float> , float 0.000000e+00, [4 x <2 x i32>] [<2 x i32> , <2 x i32> , <2 x i32> , <2 x i32> ], float 0x3FE6666660000000) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results diff --git a/llpc/test/shaderdb/core/OpImageDrefGather_TestTextureGather_lit.frag b/llpc/test/shaderdb/core/OpImageDrefGather_TestTextureGather_lit.frag index d5f32f94b5..6d364695d0 100644 --- a/llpc/test/shaderdb/core/OpImageDrefGather_TestTextureGather_lit.frag +++ b/llpc/test/shaderdb/core/OpImageDrefGather_TestTextureGather_lit.frag @@ -33,7 +33,7 @@ void main() ; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 0, i32 0) ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 1, i32 512, {{.*}}, i32 545, <2 x float> , float 0.000000e+00, float 0x3FECCCCCC0000000) -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 5, i32 384, {{.*}}, i32 545, <3 x float> , float 0.000000e+00, float 0x3FE99999A0000000) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 5, i32 896, {{.*}}, i32 545, <3 x float> , float 0.000000e+00, float 0x3FE99999A0000000) ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 9, i32 512, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 545, <2 x float> , float 0.000000e+00, float 0x3FE6666660000000) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results diff --git a/llpc/test/shaderdb/core/OpImageFetch_TestTexelFetchOffset_lit.frag b/llpc/test/shaderdb/core/OpImageFetch_TestTexelFetchOffset_lit.frag index 14eb182981..955b70ae62 100644 --- a/llpc/test/shaderdb/core/OpImageFetch_TestTexelFetchOffset_lit.frag +++ b/llpc/test/shaderdb/core/OpImageFetch_TestTexelFetchOffset_lit.frag @@ -44,11 +44,11 @@ void main() ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0 ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0 ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 0, i32 1536, {{.*}}, i32 6, i32 3) -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 1, i32 128, {{.*}}, <2 x i32> , i32 6) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 1, i32 1664, {{.*}}, <2 x i32> , i32 6) ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 2, i32 1536, {{.*}}, <3 x i32> , i32 2) ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 9, i32 1536, {{.*}}, <2 x i32> ) ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 4, i32 1536, {{.*}}, <2 x i32> , i32 6) -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 5, i32 128, {{.*}}, <3 x i32> , i32 2) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 5, i32 1664, {{.*}}, <3 x i32> , i32 2) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i16(i32 15, i16 6, i16 3, <8 x i32> %{{.*}}, i32 0, i32 0), !invariant.load !{{.*}} diff --git a/llpc/test/shaderdb/core/OpImageFetch_TestTexelFetch_lit.frag b/llpc/test/shaderdb/core/OpImageFetch_TestTexelFetch_lit.frag index 4dbca71ce8..bb559e78dd 100644 --- a/llpc/test/shaderdb/core/OpImageFetch_TestTexelFetch_lit.frag +++ b/llpc/test/shaderdb/core/OpImageFetch_TestTexelFetch_lit.frag @@ -34,10 +34,10 @@ void main() ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0 ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0 ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 0, i32 1536, {{.*}}, i32 2, i32 2) -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 1, i32 128, {{.*}}, <2 x i32> , i32 8) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 1, i32 1664, {{.*}}, <2 x i32> , i32 8) ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 9, i32 1536, {{.*}}, <2 x i32> ) ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 10, i32 1536, {{.*}}, i32 5) -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.with.fmask.v4f32(i32 6, i32 128, {{.*}}, {{.*}}, <2 x i32> , i32 4) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.with.fmask.v4f32(i32 6, i32 1664, {{.*}}, {{.*}}, <2 x i32> , i32 4) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i16(i32 15, i16 2, i16 2, <8 x i32> %{{.*}}, i32 0, i32 0), !invariant.load !{{.*}} diff --git a/llpc/test/shaderdb/core/OpImageGather_TestTextureGatherOffset_lit.frag b/llpc/test/shaderdb/core/OpImageGather_TestTextureGatherOffset_lit.frag index d05255542d..38c9dc866d 100644 --- a/llpc/test/shaderdb/core/OpImageGather_TestTextureGatherOffset_lit.frag +++ b/llpc/test/shaderdb/core/OpImageGather_TestTextureGatherOffset_lit.frag @@ -32,7 +32,7 @@ void main() ; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 0, i32 0) ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 1, i32 512, ptr addrspace(4) {{.*}}, ptr addrspace(4) {{.*}}, i32 293, <2 x float> , i32 2, float 0.000000e+00, <2 x {{.*}}) -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 5, i32 384, ptr addrspace(4) {{.*}}, ptr addrspace(4) {{.*}}, i32 293, <3 x float> , i32 3, float 0.000000e+00, <2 x {{.*}}) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 5, i32 896, ptr addrspace(4) {{.*}}, ptr addrspace(4) {{.*}}, i32 293, <3 x float> , i32 3, float 0.000000e+00, <2 x {{.*}}) ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 9, i32 512, ptr addrspace(4) {{.*}}, ptr addrspace(4) {{.*}}, i32 293, <2 x float> , i32 0, float 0.000000e+00, <2 x i32> ) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results diff --git a/llpc/test/shaderdb/core/OpImageGather_TestTextureGatherOffsets_lit.frag b/llpc/test/shaderdb/core/OpImageGather_TestTextureGatherOffsets_lit.frag index cb23486ca6..3bb640f2f3 100644 --- a/llpc/test/shaderdb/core/OpImageGather_TestTextureGatherOffsets_lit.frag +++ b/llpc/test/shaderdb/core/OpImageGather_TestTextureGatherOffsets_lit.frag @@ -33,7 +33,7 @@ void main() ; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 0, i32 0) ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 1, i32 512, {{.*}}, {{.*}}, i32 293, <2 x float> , i32 2, float 0.000000e+00, [4 x <2 x i32>] [<2 x i32> , <2 x i32> , <2 x i32> , <2 x i32> ]) -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 5, i32 384, {{.*}}, {{.*}}, i32 293, <3 x float> , i32 3, float 0.000000e+00, [4 x <2 x i32>] [<2 x i32> , <2 x i32> , <2 x i32> , <2 x i32> ]) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 5, i32 896, {{.*}}, {{.*}}, i32 293, <3 x float> , i32 3, float 0.000000e+00, [4 x <2 x i32>] [<2 x i32> , <2 x i32> , <2 x i32> , <2 x i32> ]) ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 9, i32 512, {{.*}}, {{.*}}, i32 293, <2 x float> , i32 0, float 0.000000e+00, [4 x <2 x i32>] [<2 x i32> , <2 x i32> , <2 x i32> , <2 x i32> ]) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results diff --git a/llpc/test/shaderdb/core/OpImageGather_TestTextureGather_lit.frag b/llpc/test/shaderdb/core/OpImageGather_TestTextureGather_lit.frag index 2bdc97e82b..65ad0c170f 100644 --- a/llpc/test/shaderdb/core/OpImageGather_TestTextureGather_lit.frag +++ b/llpc/test/shaderdb/core/OpImageGather_TestTextureGather_lit.frag @@ -31,7 +31,7 @@ void main() ; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 0, i32 0) ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 1, i32 512, ptr addrspace(4) {{.*}}, ptr addrspace(4) {{.*}}, i32 37, <2 x float> , i32 2, float 0.000000e+00) -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 5, i32 384, ptr addrspace(4) {{.*}}, ptr addrspace(4) {{.*}}, i32 37, <3 x float> , i32 3, float 0.000000e+00) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 5, i32 896, ptr addrspace(4) {{.*}}, ptr addrspace(4) {{.*}}, i32 37, <3 x float> , i32 3, float 0.000000e+00) ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 9, i32 512, ptr addrspace(4) {{.*}}, ptr addrspace(4) {{.*}}, i32 37, <2 x float> , i32 0, float 0.000000e+00) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results diff --git a/llpc/test/shaderdb/core/OpImageQueryLevels_TestTextureQueryLevels_lit.frag b/llpc/test/shaderdb/core/OpImageQueryLevels_TestTextureQueryLevels_lit.frag index 9ca1f9f322..25d69a8a83 100644 --- a/llpc/test/shaderdb/core/OpImageQueryLevels_TestTextureQueryLevels_lit.frag +++ b/llpc/test/shaderdb/core/OpImageQueryLevels_TestTextureQueryLevels_lit.frag @@ -31,9 +31,9 @@ void main() ; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call i32 (...) @lgc.create.image.query.levels.i32(i32 0, i32 512, ptr addrspace(4) {{.*}}) -; SHADERTEST: call i32 (...) @lgc.create.image.query.levels.i32(i32 1, i32 128, ptr addrspace(4) {{.*}}) +; SHADERTEST: call i32 (...) @lgc.create.image.query.levels.i32(i32 1, i32 640, ptr addrspace(4) {{.*}}) ; SHADERTEST: call i32 (...) @lgc.create.image.query.levels.i32(i32 1, i32 512, ptr addrspace(4) {{.*}}) -; SHADERTEST: call i32 (...) @lgc.create.image.query.levels.i32(i32 8, i32 128, ptr addrspace(4) {{.*}}) +; SHADERTEST: call i32 (...) @lgc.create.image.query.levels.i32(i32 8, i32 640, ptr addrspace(4) {{.*}}) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST: AMDLLPC SUCCESS diff --git a/llpc/test/shaderdb/core/OpImageQueryLod_TestTextureQueryLod_lit.frag b/llpc/test/shaderdb/core/OpImageQueryLod_TestTextureQueryLod_lit.frag index 883132f449..9cec111ffb 100644 --- a/llpc/test/shaderdb/core/OpImageQueryLod_TestTextureQueryLod_lit.frag +++ b/llpc/test/shaderdb/core/OpImageQueryLod_TestTextureQueryLod_lit.frag @@ -37,7 +37,7 @@ void main() ; SHADERTEST: select reassoc nnan nsz arcp contract afn i1 [[ZERO]], <2 x float> [[LOD2]], <2 x float> [[LOD1]] ; 2D -; SHADERTEST: [[LOD1:%[0-9]*]] = call reassoc nnan nsz arcp contract afn <2 x float> (...) @lgc.create.image.get.lod.v2f32(i32 1, i32 384, {{.*}}, {{.*}}, <2 x float> ) +; SHADERTEST: [[LOD1:%[0-9]*]] = call reassoc nnan nsz arcp contract afn <2 x float> (...) @lgc.create.image.get.lod.v2f32(i32 1, i32 896, {{.*}}, {{.*}}, <2 x float> ) ; SHADERTEST: [[DPX:%[0-9]*]] = call reassoc nnan nsz arcp contract afn <2 x float> (...) @lgc.create.derivative.v2f32(<2 x float> , i1 false, i1 true) ; SHADERTEST: [[ABSDPX:%[0-9]*]] = call reassoc nnan nsz arcp contract afn <2 x float> @llvm.fabs.v2f32(<2 x float> [[DPX]]) ; SHADERTEST: [[DPY:%[0-9]*]] = call reassoc nnan nsz arcp contract afn <2 x float> (...) @lgc.create.derivative.v2f32(<2 x float> , i1 true, i1 true) diff --git a/llpc/test/shaderdb/core/OpImageQuerySamples_TestImageSamples_lit.frag b/llpc/test/shaderdb/core/OpImageQuerySamples_TestImageSamples_lit.frag index 84befc8fa2..7a65d1a88b 100644 --- a/llpc/test/shaderdb/core/OpImageQuerySamples_TestImageSamples_lit.frag +++ b/llpc/test/shaderdb/core/OpImageQuerySamples_TestImageSamples_lit.frag @@ -25,7 +25,7 @@ void main() ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.query.samples.i32(i32 6, i32 512, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.image.query.samples.i32(i32 7, i32 128, {{.*}}) +; SHADERTEST: call {{.*}} @lgc.create.image.query.samples.i32(i32 7, i32 640, {{.*}}) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST: AMDLLPC SUCCESS diff --git a/llpc/test/shaderdb/core/OpImageQuerySamples_TestTextureSamples_lit.frag b/llpc/test/shaderdb/core/OpImageQuerySamples_TestTextureSamples_lit.frag index 8f86f670f1..cbc347099e 100644 --- a/llpc/test/shaderdb/core/OpImageQuerySamples_TestTextureSamples_lit.frag +++ b/llpc/test/shaderdb/core/OpImageQuerySamples_TestTextureSamples_lit.frag @@ -25,7 +25,7 @@ void main() ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.query.samples.i32(i32 6, i32 512, {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.image.query.samples.i32(i32 7, i32 128, {{.*}}) +; SHADERTEST: call {{.*}} @lgc.create.image.query.samples.i32(i32 7, i32 640, {{.*}}) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST: AMDLLPC SUCCESS diff --git a/llpc/test/shaderdb/core/OpImageQuerySizeLod_TestTextureSize_lit.frag b/llpc/test/shaderdb/core/OpImageQuerySizeLod_TestTextureSize_lit.frag index 2340035249..5fbaac5d96 100644 --- a/llpc/test/shaderdb/core/OpImageQuerySizeLod_TestTextureSize_lit.frag +++ b/llpc/test/shaderdb/core/OpImageQuerySizeLod_TestTextureSize_lit.frag @@ -26,7 +26,7 @@ void main() ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 5) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 4) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) -; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v2i32(i32 1, i32 128, {{.*}}, i32 3) +; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v2i32(i32 1, i32 640, {{.*}}, i32 3) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v2i32(i32 1, i32 512, {{.*}}, i32 4) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v3i32(i32 2, i32 512, {{.*}}, i32 5) diff --git a/llpc/test/shaderdb/core/OpImageQuerySize_TestImageSize_lit.frag b/llpc/test/shaderdb/core/OpImageQuerySize_TestImageSize_lit.frag index f9b18ff291..966d5e5668 100644 --- a/llpc/test/shaderdb/core/OpImageQuerySize_TestImageSize_lit.frag +++ b/llpc/test/shaderdb/core/OpImageQuerySize_TestImageSize_lit.frag @@ -36,8 +36,8 @@ void main() ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.i32(i32 0, i32 512, {{.*}}, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v2i32(i32 9, i32 512, {{.*}}, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v2i32(i32 6, i32 512, {{.*}}, i32 0) -; SHADERTEST: call {{.*}} @lgc.create.image.query.size.i32(i32 10, i32 128, {{.*}}, i32 0) -; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v3i32(i32 8, i32 128, {{.*}}, i32 0) +; SHADERTEST: call {{.*}} @lgc.create.image.query.size.i32(i32 10, i32 640, {{.*}}, i32 0) +; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v3i32(i32 8, i32 640, {{.*}}, i32 0) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST: AMDLLPC SUCCESS diff --git a/llpc/test/shaderdb/core/OpImageQuerySize_TestTextureSize_lit.frag b/llpc/test/shaderdb/core/OpImageQuerySize_TestTextureSize_lit.frag index dbc8973fd0..f8dc691e98 100644 --- a/llpc/test/shaderdb/core/OpImageQuerySize_TestTextureSize_lit.frag +++ b/llpc/test/shaderdb/core/OpImageQuerySize_TestTextureSize_lit.frag @@ -29,9 +29,9 @@ void main() ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v2i32(i32 9, i32 512, {{.*}}, i32 0) -; SHADERTEST: call {{.*}} @lgc.create.image.query.size.i32(i32 10, i32 128, {{.*}}, i32 0) +; SHADERTEST: call {{.*}} @lgc.create.image.query.size.i32(i32 10, i32 640, {{.*}}, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v2i32(i32 6, i32 512, {{.*}}, i32 0) -; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v3i32(i32 7, i32 128, {{.*}}, i32 0) +; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v3i32(i32 7, i32 640, {{.*}}, i32 0) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST: AMDLLPC SUCCESS diff --git a/llpc/test/shaderdb/core/OpImageRead_TestImageLoad_lit.frag b/llpc/test/shaderdb/core/OpImageRead_TestImageLoad_lit.frag index 0674a80145..8b515b730b 100644 --- a/llpc/test/shaderdb/core/OpImageRead_TestImageLoad_lit.frag +++ b/llpc/test/shaderdb/core/OpImageRead_TestImageLoad_lit.frag @@ -35,8 +35,8 @@ void main() ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.load.v4f32(i32 0, i32 512, {{.*}}, i32 1) ; SHADERTEST: call {{.*}} @lgc.create.image.load.v4f32(i32 9, i32 512, {{.*}}, <2 x i32> ) -; SHADERTEST: call {{.*}} @lgc.create.image.load.v4f32(i32 10, i32 128, {{.*}}, i32 4) -; SHADERTEST: call {{.*}} @lgc.create.image.load.v4f32(i32 8, i32 128, {{.*}}, <4 x i32> ) +; SHADERTEST: call {{.*}} @lgc.create.image.load.v4f32(i32 10, i32 640, {{.*}}, i32 4) +; SHADERTEST: call {{.*}} @lgc.create.image.load.v4f32(i32 8, i32 640, {{.*}}, <4 x i32> ) ; SHADERTEST: call {{.*}} @lgc.create.image.load.v4f32(i32 6, i32 512, {{.*}}, <3 x i32> ) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results diff --git a/llpc/test/shaderdb/core/OpImageSampleExplicitLod_TestTextureGradClamp_lit.frag b/llpc/test/shaderdb/core/OpImageSampleExplicitLod_TestTextureGradClamp_lit.frag index b7666b14b1..678ff351d7 100644 --- a/llpc/test/shaderdb/core/OpImageSampleExplicitLod_TestTextureGradClamp_lit.frag +++ b/llpc/test/shaderdb/core/OpImageSampleExplicitLod_TestTextureGradClamp_lit.frag @@ -39,17 +39,17 @@ void main() ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) -; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 384, {{.*}}, {{.*}}, i32 153, <2 x float> , <2 x float> , <2 x float> , {{.*}}) +; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 896, {{.*}}, {{.*}}, i32 153, <2 x float> , <2 x float> , <2 x float> , {{.*}}) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 2, i32 512, {{.*}}, {{.*}}, i32 153, <3 x float> , <3 x float> , <3 x float> , {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 384, {{.*}}, {{.*}}, i32 409, <2 x float> , <2 x float> , <2 x float> , {{.*}}, <2 x i32> ) +; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 896, {{.*}}, {{.*}}, i32 409, <2 x float> , <2 x float> , <2 x float> , {{.*}}, <2 x i32> ) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 2, i32 512, {{.*}}, {{.*}}, i32 409, <3 x float> , <3 x float> , <3 x float> , {{.*}}, <3 x i32> ) ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) -; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 384, {{.*}}, {{.*}}, i32 153, <2 x float> , <2 x float> , <2 x float> , {{.*}}) +; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 896, {{.*}}, {{.*}}, i32 153, <2 x float> , <2 x float> , <2 x float> , {{.*}}) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 2, i32 512, {{.*}}, {{.*}}, i32 153, <3 x float> , <3 x float> , <3 x float> , {{.*}}) -; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 384, {{.*}}, {{.*}}, i32 409, <2 x float> , <2 x float> , <2 x float> , {{.*}}, <2 x i32> ) +; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 896, {{.*}}, {{.*}}, i32 409, <2 x float> , <2 x float> , <2 x float> , {{.*}}, <2 x i32> ) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 2, i32 512, {{.*}}, {{.*}}, i32 409, <3 x float> , <3 x float> , <3 x float> , {{.*}}, <3 x i32> ) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results diff --git a/llpc/test/shaderdb/core/OpImageSampleExplicitLod_TestTextureGradOffset_lit.frag b/llpc/test/shaderdb/core/OpImageSampleExplicitLod_TestTextureGradOffset_lit.frag index 20588ab0bd..09c61dcd31 100644 --- a/llpc/test/shaderdb/core/OpImageSampleExplicitLod_TestTextureGradOffset_lit.frag +++ b/llpc/test/shaderdb/core/OpImageSampleExplicitLod_TestTextureGradOffset_lit.frag @@ -26,13 +26,13 @@ void main() ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 0, i32 512, {{.*}}, {{.*}}, i32 281, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, i32 2) -; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 384, {{.*}}, {{.*}}, i32 281, <2 x float> , <2 x float> , <2 x float> , <2 x i32> ) +; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 896, {{.*}}, {{.*}}, i32 281, <2 x float> , <2 x float> , <2 x float> , <2 x i32> ) ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 0, i32 512, {{.*}}, {{.*}}, i32 281, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, i32 2) -; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 384, {{.*}}, {{.*}}, i32 281, <2 x float> , <2 x float> , <2 x float> , <2 x i32> ) +; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 896, {{.*}}, {{.*}}, i32 281, <2 x float> , <2 x float> , <2 x float> , <2 x i32> ) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST: load <4 x i32>, ptr addrspace(4) %{{[0-9]*}} diff --git a/llpc/test/shaderdb/core/OpImageSampleExplicitLod_TestTextureGrad_lit.frag b/llpc/test/shaderdb/core/OpImageSampleExplicitLod_TestTextureGrad_lit.frag index 008c58de26..064891b5a9 100644 --- a/llpc/test/shaderdb/core/OpImageSampleExplicitLod_TestTextureGrad_lit.frag +++ b/llpc/test/shaderdb/core/OpImageSampleExplicitLod_TestTextureGrad_lit.frag @@ -26,13 +26,13 @@ void main() ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 0, i32 512, {{.*}}, {{.*}}, i32 25, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00) -; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 384, {{.*}}, {{.*}}, i32 25, <2 x float> , <2 x float> , <2 x float> ) +; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 896, {{.*}}, {{.*}}, i32 25, <2 x float> , <2 x float> , <2 x float> ) ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 0, i32 512, {{.*}}, {{.*}}, i32 25, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00) -; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 384, {{.*}}, {{.*}}, i32 25, <2 x float> , <2 x float> , <2 x float> ) +; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 896, {{.*}}, {{.*}}, i32 25, <2 x float> , <2 x float> , <2 x float> ) ; SHADERTEST-LABEL: pipeline patching results ; SHADERTEST: load <4 x i32>, ptr addrspace(4) %{{[0-9]*}} diff --git a/llpc/test/shaderdb/core/OpImageSampleExplicitLod_TestTextureLodOffset_lit.frag b/llpc/test/shaderdb/core/OpImageSampleExplicitLod_TestTextureLodOffset_lit.frag index c09ef5e539..86d578ab20 100644 --- a/llpc/test/shaderdb/core/OpImageSampleExplicitLod_TestTextureLodOffset_lit.frag +++ b/llpc/test/shaderdb/core/OpImageSampleExplicitLod_TestTextureLodOffset_lit.frag @@ -26,13 +26,13 @@ void main() ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 0, i32 512, {{.*}}, {{.*}}, i32 289, float 5.000000e-01, float 0x3FD99999A0000000, i32 6) -; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 384, {{.*}}, {{.*}}, i32 289, <2 x float> , float 0x3FE6666660000000, <2 x i32> ) +; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 896, {{.*}}, {{.*}}, i32 289, <2 x float> , float 0x3FE6666660000000, <2 x i32> ) ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 0, i32 512, {{.*}}, {{.*}}, i32 289, float 5.000000e-01, float 0x3FD99999A0000000, i32 6) -; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 384, {{.*}}, {{.*}}, i32 289, <2 x float> , float 0x3FE6666660000000, <2 x i32> ) +; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 896, {{.*}}, {{.*}}, i32 289, <2 x float> , float 0x3FE6666660000000, <2 x i32> ) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST: load <4 x i32>, ptr addrspace(4) %{{[0-9]*}} diff --git a/llpc/test/shaderdb/core/OpImageSampleExplicitLod_TestTextureLod_lit.frag b/llpc/test/shaderdb/core/OpImageSampleExplicitLod_TestTextureLod_lit.frag index cd68490b42..df50628a44 100644 --- a/llpc/test/shaderdb/core/OpImageSampleExplicitLod_TestTextureLod_lit.frag +++ b/llpc/test/shaderdb/core/OpImageSampleExplicitLod_TestTextureLod_lit.frag @@ -26,13 +26,13 @@ void main() ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 0, i32 512, {{.*}}, {{.*}}, i32 33, float 5.000000e-01, float 0x3FD99999A0000000) -; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 384, {{.*}}, {{.*}}, i32 33, <2 x float> , float 0x3FE6666660000000) +; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 896, {{.*}}, {{.*}}, i32 33, <2 x float> , float 0x3FE6666660000000) ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 0, i32 512, {{.*}}, {{.*}}, i32 33, float 5.000000e-01, float 0x3FD99999A0000000) -; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 384, {{.*}}, {{.*}}, i32 33, <2 x float> , float 0x3FE6666660000000) +; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 896, {{.*}}, {{.*}}, i32 33, <2 x float> , float 0x3FE6666660000000) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST: load <4 x i32>, ptr addrspace(4) %{{[0-9]*}} diff --git a/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestArrayDirectAccess_lit.frag b/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestArrayDirectAccess_lit.frag index e42cb57358..5579768037 100644 --- a/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestArrayDirectAccess_lit.frag +++ b/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestArrayDirectAccess_lit.frag @@ -14,11 +14,11 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 1, <2 x float> ) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 512, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 1, <2 x float> ) ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) -; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 0, {{.*}}, {{.*}}, i32 1, <2 x float> ) +; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 512, {{.*}}, {{.*}}, i32 1, <2 x float> ) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST: load <4 x i32>, ptr addrspace(4) %{{[0-9]*}} diff --git a/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestMultiDimArrayDirectAccess_lit.frag b/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestMultiDimArrayDirectAccess_lit.frag index af4dec00a8..7109e65a27 100644 --- a/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestMultiDimArrayDirectAccess_lit.frag +++ b/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestMultiDimArrayDirectAccess_lit.frag @@ -14,11 +14,11 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 1, <2 x float> ) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 512, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 1, <2 x float> ) ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) -; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 0, {{.*}}, {{.*}}, i32 1, <2 x float> ) +; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 512, {{.*}}, {{.*}}, i32 1, <2 x float> ) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST-LABEL: load <4 x i32>, ptr addrspace(4) %{{[0-9]*}} diff --git a/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureBiasClamp_lit.frag b/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureBiasClamp_lit.frag index a132ad6d38..3950cc0e5e 100644 --- a/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureBiasClamp_lit.frag +++ b/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureBiasClamp_lit.frag @@ -49,7 +49,7 @@ void main() ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 2, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) -; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 0, i32 384, {{.*}}, {{.*}}, i32 193, float %{{[0-9]*}}, float 2.000000e+00, {{.*}}) +; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 0, i32 896, {{.*}}, {{.*}}, i32 193, float %{{[0-9]*}}, float 2.000000e+00, {{.*}}) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 512, {{.*}}, {{.*}}, i32 193, <2 x float> %{{[0-9]*}}, float 2.000000e+00, {{.*}}) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 2, i32 512, {{.*}}, {{.*}}, i32 193, <3 x float> %{{[0-9]*}}, float 2.000000e+00, {{.*}}) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 3, i32 512, {{.*}}, {{.*}}, i32 193, <3 x float> %{{[0-9]*}}, float 2.000000e+00, {{.*}}) @@ -65,7 +65,7 @@ void main() ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 2, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) -; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 0, i32 384, {{.*}}, {{.*}}, i32 193, float %{{[0-9]*}}, float 2.000000e+00, {{.*}}) +; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 0, i32 896, {{.*}}, {{.*}}, i32 193, float %{{[0-9]*}}, float 2.000000e+00, {{.*}}) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 512, {{.*}}, {{.*}}, i32 193, <2 x float> %{{[0-9]*}}, float 2.000000e+00, {{.*}}) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 2, i32 512, {{.*}}, {{.*}}, i32 193, <3 x float> %{{[0-9]*}}, float 2.000000e+00, {{.*}}) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 3, i32 512, {{.*}}, {{.*}}, i32 193, <3 x float> %{{[0-9]*}}, float 2.000000e+00, {{.*}}) diff --git a/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureClamp_lit.frag b/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureClamp_lit.frag index e48d5ec2f5..9afcacbc46 100644 --- a/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureClamp_lit.frag +++ b/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureClamp_lit.frag @@ -49,7 +49,7 @@ void main() ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 2, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) -; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 0, i32 384, {{.*}}, {{.*}}, i32 129, float %{{[0-9]*}}, {{.*}}) +; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 0, i32 896, {{.*}}, {{.*}}, i32 129, float %{{[0-9]*}}, {{.*}}) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 512, {{.*}}, {{.*}}, i32 129, <2 x float> %{{[0-9]*}}, {{.*}}) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 2, i32 512, {{.*}}, {{.*}}, i32 129, <3 x float> %{{[0-9]*}}, {{.*}}) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 3, i32 512, {{.*}}, {{.*}}, i32 129, <3 x float> %{{[0-9]*}}, {{.*}}) @@ -65,7 +65,7 @@ void main() ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 2, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) -; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 0, i32 384, {{.*}}, {{.*}}, i32 129, float %{{[0-9]*}}, {{.*}}) +; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 0, i32 896, {{.*}}, {{.*}}, i32 129, float %{{[0-9]*}}, {{.*}}) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 512, {{.*}}, {{.*}}, i32 129, <2 x float> %{{[0-9]*}}, {{.*}}) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 2, i32 512, {{.*}}, {{.*}}, i32 129, <3 x float> %{{[0-9]*}}, {{.*}}) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 3, i32 512, {{.*}}, {{.*}}, i32 129, <3 x float> %{{[0-9]*}}, {{.*}}) diff --git a/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureGradClamp_lit.frag b/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureGradClamp_lit.frag index 6a83db23d5..1b71f891f6 100644 --- a/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureGradClamp_lit.frag +++ b/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureGradClamp_lit.frag @@ -51,7 +51,7 @@ void main() ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 2, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) -; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 0, i32 384, {{.*}}, {{.*}}, i32 153, float %{{[0-9]*}}, float %{{[0-9]*}}, float %{{[0-9]*}}, {{.*}}) +; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 0, i32 896, {{.*}}, {{.*}}, i32 153, float %{{[0-9]*}}, float %{{[0-9]*}}, float %{{[0-9]*}}, {{.*}}) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 512, {{.*}}, {{.*}}, i32 153, <2 x float> %{{[0-9]*}}, <2 x float> %{{[0-9]*}}, <2 x float> %{{[0-9]*}}, {{.*}}) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 2, i32 512, {{.*}}, {{.*}}, i32 153, <3 x float> %{{[0-9]*}}, <3 x float> %{{[0-9]*}}, <3 x float> %{{[0-9]*}}, {{.*}}) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 3, i32 512, {{.*}}, {{.*}}, i32 153, <3 x float> %{{[0-9]*}}, <3 x float> %{{[0-9]*}}, <3 x float> %{{[0-9]*}}, {{.*}}) @@ -67,7 +67,7 @@ void main() ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 2, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) -; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 0, i32 384, {{.*}}, {{.*}}, i32 153, float %{{[0-9]*}}, float %{{[0-9]*}}, float %{{[0-9]*}}, {{.*}}) +; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 0, i32 896, {{.*}}, {{.*}}, i32 153, float %{{[0-9]*}}, float %{{[0-9]*}}, float %{{[0-9]*}}, {{.*}}) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 512, {{.*}}, {{.*}}, i32 153, <2 x float> %{{[0-9]*}}, <2 x float> %{{[0-9]*}}, <2 x float> %{{[0-9]*}}, {{.*}}) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 2, i32 512, {{.*}}, {{.*}}, i32 153, <3 x float> %{{[0-9]*}}, <3 x float> %{{[0-9]*}}, <3 x float> %{{[0-9]*}}, {{.*}}) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 3, i32 512, {{.*}}, {{.*}}, i32 153, <3 x float> %{{[0-9]*}}, <3 x float> %{{[0-9]*}}, <3 x float> %{{[0-9]*}}, {{.*}}) diff --git a/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureGradOffsetClamp_lit.frag b/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureGradOffsetClamp_lit.frag index b13c902967..00e98897f8 100644 --- a/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureGradOffsetClamp_lit.frag +++ b/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureGradOffsetClamp_lit.frag @@ -42,7 +42,7 @@ void main() ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 2, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) -; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 0, i32 384, {{.*}}, {{.*}}, i32 409, float 0x3FB99999A0000000, float 0x3FC99999A0000000, float 0x3FD3333340000000, {{.*}}, i32 2) +; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 0, i32 896, {{.*}}, {{.*}}, i32 409, float 0x3FB99999A0000000, float 0x3FC99999A0000000, float 0x3FD3333340000000, {{.*}}, i32 2) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 512, {{.*}}, {{.*}}, i32 409, <2 x float> , <2 x float> , <2 x float> , {{.*}}, <2 x i32> ) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 2, i32 512, {{.*}}, {{.*}}, i32 409, <3 x float> , <3 x float> , <3 x float> , {{.*}}, <3 x i32> ) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 4, i32 512, {{.*}}, {{.*}}, i32 409, <2 x float> , float 0x3FC99999A0000000, float 0x3FD3333340000000, {{.*}}, i32 2) @@ -54,7 +54,7 @@ void main() ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 2, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) -; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 0, i32 384, {{.*}}, {{.*}}, i32 409, float 0x3FB99999A0000000, float 0x3FC99999A0000000, float 0x3FD3333340000000, {{.*}}, i32 2) +; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 0, i32 896, {{.*}}, {{.*}}, i32 409, float 0x3FB99999A0000000, float 0x3FC99999A0000000, float 0x3FD3333340000000, {{.*}}, i32 2) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 512, {{.*}}, {{.*}}, i32 409, <2 x float> , <2 x float> , <2 x float> , {{.*}}, <2 x i32> ) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 2, i32 512, {{.*}}, {{.*}}, i32 409, <3 x float> , <3 x float> , <3 x float> , {{.*}}, <3 x i32> ) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 4, i32 512, {{.*}}, {{.*}}, i32 409, <2 x float> , float 0x3FC99999A0000000, float 0x3FD3333340000000, {{.*}}, i32 2) diff --git a/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureOffsetClamp_lit.frag b/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureOffsetClamp_lit.frag index 5e7dd16029..14e3c9a49f 100644 --- a/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureOffsetClamp_lit.frag +++ b/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureOffsetClamp_lit.frag @@ -42,7 +42,7 @@ void main() ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 2, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) -; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 0, i32 384, {{.*}}, {{.*}}, i32 385, float 0x3FB99999A0000000, {{.*}}, i32 2) +; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 0, i32 896, {{.*}}, {{.*}}, i32 385, float 0x3FB99999A0000000, {{.*}}, i32 2) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 512, {{.*}}, {{.*}}, i32 385, <2 x float> , {{.*}}, <2 x i32> ) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 2, i32 512, {{.*}}, {{.*}}, i32 385, <3 x float> , {{.*}}, <3 x i32> ) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 4, i32 512, {{.*}}, {{.*}}, i32 385, <2 x float> , {{.*}}, i32 2) @@ -54,7 +54,7 @@ void main() ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 2, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) -; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 0, i32 384, {{.*}}, {{.*}}, i32 385, float 0x3FB99999A0000000, {{.*}}, i32 2) +; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 0, i32 896, {{.*}}, {{.*}}, i32 385, float 0x3FB99999A0000000, {{.*}}, i32 2) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 512, {{.*}}, {{.*}}, i32 385, <2 x float> , {{.*}}, <2 x i32> ) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 2, i32 512, {{.*}}, {{.*}}, i32 385, <3 x float> , {{.*}}, <3 x i32> ) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 4, i32 512, {{.*}}, {{.*}}, i32 385, <2 x float> , {{.*}}, i32 2) diff --git a/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureOffset_lit.frag b/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureOffset_lit.frag index 1b6c64e883..c42564c18c 100644 --- a/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureOffset_lit.frag +++ b/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureOffset_lit.frag @@ -25,13 +25,13 @@ void main() ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 0, i32 512, {{.*}}, {{.*}}, i32 321, float 1.000000e+00, float 0x3FD99999A0000000, i32 2) -; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 384, {{.*}}, {{.*}}, i32 257, <2 x float> , <2 x i32> ) +; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 896, {{.*}}, {{.*}}, i32 257, <2 x float> , <2 x i32> ) ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 0, i32 512, {{.*}}, {{.*}}, i32 321, float 1.000000e+00, float 0x3FD99999A0000000, i32 2) -; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 384, {{.*}}, {{.*}}, i32 257, <2 x float> , <2 x i32> ) +; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 896, {{.*}}, {{.*}}, i32 257, <2 x float> , <2 x i32> ) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST: load <4 x i32>, ptr addrspace(4) %{{[0-9]*}} diff --git a/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTexture_lit.frag b/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTexture_lit.frag index 380f48a887..0e8fb0c744 100644 --- a/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTexture_lit.frag +++ b/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTexture_lit.frag @@ -26,13 +26,13 @@ void main() ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 0, i32 512, {{.*}}, {{.*}}, i32 65, float 1.000000e+00, float 0x3FD99999A0000000) -; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 384, {{.*}}, {{.*}}, i32 1, <2 x float> ) +; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 896, {{.*}}, {{.*}}, i32 1, <2 x float> ) ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 0, i32 512, {{.*}}, {{.*}}, i32 65, float 1.000000e+00, float 0x3FD99999A0000000) -; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 384, {{.*}}, {{.*}}, i32 1, <2 x float> ) +; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 896, {{.*}}, {{.*}}, i32 1, <2 x float> ) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST: load <4 x i32>, ptr addrspace(4) %{{[0-9]*}} diff --git a/llpc/test/shaderdb/core/OpSDotAccSat_TestIVec16bit.spvasm b/llpc/test/shaderdb/core/OpSDotAccSat_TestIVec16bit.spvasm index 2d24d8c790..5d02075be2 100644 --- a/llpc/test/shaderdb/core/OpSDotAccSat_TestIVec16bit.spvasm +++ b/llpc/test/shaderdb/core/OpSDotAccSat_TestIVec16bit.spvasm @@ -52,9 +52,8 @@ %18 = OpLoad %v4float %b0 %19 = OpConvertFToS %v4int %18 %20 = OpSConvert %v4int16 %19 - %21 = OpSDotAccSatKHR %int16 %17 %20 %5 - %22 = OpSConvert %int %21 - %23 = OpCompositeConstruct %v4int %22 %22 %22 %22 + %21 = OpSDotAccSatKHR %int %17 %20 %5 + %23 = OpCompositeConstruct %v4int %21 %21 %21 %21 OpStore %c %23 %24 = OpLoad %v4int %c %25 = OpConvertSToF %v4float %24 diff --git a/llpc/test/shaderdb/core/OpTypeSampledImage_TestWaterfallInsertion.frag b/llpc/test/shaderdb/core/OpTypeSampledImage_TestWaterfallInsertion.frag index 74975a3767..c932e07fdb 100644 --- a/llpc/test/shaderdb/core/OpTypeSampledImage_TestWaterfallInsertion.frag +++ b/llpc/test/shaderdb/core/OpTypeSampledImage_TestWaterfallInsertion.frag @@ -27,7 +27,7 @@ void main() // SHADERTEST-NEXT: %[[readfirstlane:[0-9]+]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 %[[begin]], i32 %[[mul]]) // SHADERTEST-NEXT: %[[sext:[0-9]+]] = sext i32 %[[readfirstlane]] to i64 // SHADERTEST-NEXT: %[[gep1:[0-9]+]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i64 %[[sext]] -// SHADERTEST-NEXT: %[[load1:[0-9]+]] = load <8 x i32>, ptr addrspace(4) %[[gep1]], align 32 +// SHADERTEST-NEXT: %[[load1:[0-9]+]] = load <8 x i32>, ptr addrspace(4) %[[gep1]], align 4 // SHADERTEST-NEXT: %[[extract:[.a-z0-9]+]] = extractelement <8 x i32> %[[load1]], i64 3 // SHADERTEST-NEXT: %[[and:[0-9]+]] = and i32 %[[extract]], 268435455 // SHADERTEST-NEXT: %[[cmp:[0-9]+]] = icmp slt i32 %[[extract]], 0 @@ -35,7 +35,7 @@ void main() // SHADERTEST-NEXT: %[[insert:[.a-z0-9]+]] = insertelement <8 x i32> %[[load1]], i32 %[[select]], i64 3 // SHADERTEST-NEXT: %[[shufflevector:[0-9]+]] = shufflevector <8 x i32> %[[insert]], <8 x i32> %[[load1]], <8 x i32> // SHADERTEST-NEXT: %[[gep2:[0-9]+]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i64 %[[sext]] -// SHADERTEST-NEXT: %[[load2:[0-9]+]] = load <4 x i32>, ptr addrspace(4) %[[gep2]], align 16 +// SHADERTEST-NEXT: %[[load2:[0-9]+]] = load <4 x i32>, ptr addrspace(4) %[[gep2]], align 4 // SHADERTEST-NEXT: %[[image_call:[0-9]+]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32 15, half 0xH0000, half 0xH0000, <8 x i32> %[[shufflevector]], <4 x i32> %[[load2]], i1 false, i32 0, i32 0) // SHADERTEST-NEXT: %[[end:[0-9]+]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.waterfall.end.v4f32(i32 %[[begin]], <4 x float> %[[image_call]]) // SHADERTEST: AMDLLPC SUCCESS diff --git a/llpc/test/shaderdb/core/OpTypeSampledImage_TestWaterfallScalarize.frag b/llpc/test/shaderdb/core/OpTypeSampledImage_TestWaterfallScalarize.frag index 283422ccc1..ebfaa14d26 100644 --- a/llpc/test/shaderdb/core/OpTypeSampledImage_TestWaterfallScalarize.frag +++ b/llpc/test/shaderdb/core/OpTypeSampledImage_TestWaterfallScalarize.frag @@ -26,7 +26,7 @@ void main() // SHADERTEST-GFX-NEXT: %[[readfirstlane:[0-9]+]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 %[[begin]], i32 %[[mul]]) // SHADERTEST-GFX-NEXT: %[[sext:[0-9]+]] = sext i32 %[[readfirstlane]] to i64 // SHADERTEST-GFX-NEXT: %[[gep1:[0-9]+]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i64 %[[sext]] -// SHADERTEST-GFX-NEXT: %[[load1:[0-9]+]] = load <8 x i32>, ptr addrspace(4) %[[gep1]], align 32 +// SHADERTEST-GFX-NEXT: %[[load1:[0-9]+]] = load <8 x i32>, ptr addrspace(4) %[[gep1]], align 4 // SHADERTEST-GFX-NEXT: %[[extract:[.a-z0-9]+]] = extractelement <8 x i32> %[[load1]], i64 3 // SHADERTEST-GFX-NEXT: %[[and:[0-9]+]] = and i32 %[[extract]], 268435455 // SHADERTEST-GFX-NEXT: %[[cmp:[0-9]+]] = icmp slt i32 %[[extract]], 0 @@ -34,7 +34,7 @@ void main() // SHADERTEST-GFX-NEXT: %[[insert:[.a-z0-9]+]] = insertelement <8 x i32> %[[load1]], i32 %[[select]], i64 3 // SHADERTEST-GFX-NEXT: %[[shufflevector:[0-9]+]] = shufflevector <8 x i32> %[[insert]], <8 x i32> %[[load1]], <8 x i32> // SHADERTEST-GFX-NEXT: %[[gep2:[0-9]+]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i64 %[[sext]] -// SHADERTEST-GFX-NEXT: %[[load2:[0-9]+]] = load <4 x i32>, ptr addrspace(4) %[[gep2]], align 16 +// SHADERTEST-GFX-NEXT: %[[load2:[0-9]+]] = load <4 x i32>, ptr addrspace(4) %[[gep2]], align 4 // SHADERTEST-GFX-NEXT: %[[image_call:[0-9]+]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %{{.*}}, float %{{.*}}, <8 x i32> %[[shufflevector]], <4 x i32> %[[load2]], i1 false, i32 0, i32 0) // SHADERTEST-GFX-NEXT: %[[end:[0-9]+]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.waterfall.end.v4f32(i32 %[[begin]], <4 x float> %[[image_call]]) // SHADERTEST-GFX: AMDLLPC SUCCESS @@ -45,9 +45,9 @@ void main() // SHADERTEST-GFX_10_3_0-NEXT: %[[readfirstlane:[0-9]+]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 %[[begin]], i32 %[[mul]]) // SHADERTEST-GFX_10_3_0-NEXT: %[[sext:[0-9]+]] = sext i32 %[[readfirstlane]] to i64 // SHADERTEST-GFX_10_3_0-NEXT: %[[gep1:[0-9]+]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i64 %[[sext]] -// SHADERTEST-GFX_10_3_0-NEXT: %[[load1:[0-9]+]] = load <8 x i32>, ptr addrspace(4) %[[gep1]], align 32 +// SHADERTEST-GFX_10_3_0-NEXT: %[[load1:[0-9]+]] = load <8 x i32>, ptr addrspace(4) %[[gep1]], align 4 // SHADERTEST-GFX_10_3_0-NEXT: %[[gep2:[0-9]+]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i64 %[[sext]] -// SHADERTEST-GFX_10_3_0-NEXT: %[[load2:[0-9]+]] = load <4 x i32>, ptr addrspace(4) %[[gep2]], align 16 +// SHADERTEST-GFX_10_3_0-NEXT: %[[load2:[0-9]+]] = load <4 x i32>, ptr addrspace(4) %[[gep2]], align 4 // SHADERTEST-GFX_10_3_0-NEXT: %[[image_call:[0-9]+]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %{{.*}}, float %{{.*}}, <8 x i32> %[[load1]], <4 x i32> %[[load2]], i1 false, i32 0, i32 0) // SHADERTEST-GFX_10_3_0-NEXT: %[[end:[0-9]+]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.waterfall.end.v4f32(i32 %[[begin]], <4 x float> %[[image_call]]) // SHADERTEST-GFX_10_3_0: AMDLLPC SUCCESS @@ -58,13 +58,13 @@ void main() // SHADERTEST-GFX_10_3_2-NEXT: %[[readfirstlane:[0-9]+]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 %[[begin]], i32 %[[mul]]) // SHADERTEST-GFX_10_3_2-NEXT: %[[sext:[0-9]+]] = sext i32 %[[readfirstlane]] to i64 // SHADERTEST-GFX_10_3_2-NEXT: %[[gep1:[0-9]+]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i64 %[[sext]] -// SHADERTEST-GFX_10_3_2-NEXT: %[[load1:[0-9]+]] = load <8 x i32>, ptr addrspace(4) %[[gep1]], align 32 +// SHADERTEST-GFX_10_3_2-NEXT: %[[load1:[0-9]+]] = load <8 x i32>, ptr addrspace(4) %[[gep1]], align 4 // SHADERTEST-GFX_10_3_2-NEXT: %[[extract:[.a-z0-9]+]] = extractelement <8 x i32> %[[load1]], i64 6 // SHADERTEST-GFX_10_3_2-NEXT: %[[and:[0-9]+]] = and i32 %[[extract]], -1048577 // SHADERTEST-GFX_10_3_2-NEXT: %[[insert:[.a-z0-9]+]] = insertelement <8 x i32> %[[load1]], i32 %[[and]], i64 6 // SHADERTEST-GFX_10_3_2-NEXT: %[[shufflevector:[0-9]+]] = shufflevector <8 x i32> %[[insert]], <8 x i32> %[[load1]], <8 x i32> // SHADERTEST-GFX_10_3_2-NEXT: %[[gep2:[0-9]+]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i64 %[[sext]] -// SHADERTEST-GFX_10_3_2-NEXT: %[[load2:[0-9]+]] = load <4 x i32>, ptr addrspace(4) %[[gep2]], align 16 +// SHADERTEST-GFX_10_3_2-NEXT: %[[load2:[0-9]+]] = load <4 x i32>, ptr addrspace(4) %[[gep2]], align 4 // SHADERTEST-GFX_10_3_2-NEXT: %[[image_call:[0-9]+]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %{{.*}}, float %{{.*}}, <8 x i32> %[[shufflevector]], <4 x i32> %[[load2]], i1 false, i32 0, i32 0) // SHADERTEST-GFX_10_3_2-NEXT: %[[end:[0-9]+]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.waterfall.end.v4f32(i32 %[[begin]], <4 x float> %[[image_call]]) // SHADERTEST-GFX_10_3_2: AMDLLPC SUCCESS diff --git a/llpc/test/shaderdb/core/OpTypeSampledImage_TestWaterfallScalarize_MultiBlock.frag b/llpc/test/shaderdb/core/OpTypeSampledImage_TestWaterfallScalarize_MultiBlock.frag index a4b12e33a0..486b00f87d 100644 --- a/llpc/test/shaderdb/core/OpTypeSampledImage_TestWaterfallScalarize_MultiBlock.frag +++ b/llpc/test/shaderdb/core/OpTypeSampledImage_TestWaterfallScalarize_MultiBlock.frag @@ -36,7 +36,7 @@ void main() // SHADERTEST-GFX-NEXT: %[[readfirstlane1:[0-9]+]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 %[[begin1]], i32 %[[mul1]]) // SHADERTEST-GFX-NEXT: %[[sext1:[0-9]+]] = sext i32 %[[readfirstlane1]] to i64 // SHADERTEST-GFX-NEXT: %[[gep1:[0-9]+]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i64 %[[sext1]] -// SHADERTEST-GFX-NEXT: %[[load1:[0-9]+]] = load <8 x i32>, ptr addrspace(4) %[[gep1]], align 32 +// SHADERTEST-GFX-NEXT: %[[load1:[0-9]+]] = load <8 x i32>, ptr addrspace(4) %[[gep1]], align 4 // SHADERTEST-GFX-NEXT: %[[extract1:[.a-z0-9]+]] = extractelement <8 x i32> %[[load1]], i64 3 // SHADERTEST-GFX-NEXT: %[[and1:[0-9]+]] = and i32 %[[extract1]], 268435455 // SHADERTEST-GFX-NEXT: %[[cmp1:[0-9]+]] = icmp slt i32 %[[extract1]], 0 @@ -44,7 +44,7 @@ void main() // SHADERTEST-GFX-NEXT: %[[insert1:[.a-z0-9]+]] = insertelement <8 x i32> %[[load1]], i32 %[[select1]], i64 3 // SHADERTEST-GFX-NEXT: %[[shufflevector1:[0-9]+]] = shufflevector <8 x i32> %[[insert1]], <8 x i32> %[[load1]], <8 x i32> // SHADERTEST-GFX-NEXT: %[[gep2:[0-9]+]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i64 %[[sext1]] -// SHADERTEST-GFX-NEXT: %[[load2:[0-9]+]] = load <4 x i32>, ptr addrspace(4) %[[gep2]], align 16 +// SHADERTEST-GFX-NEXT: %[[load2:[0-9]+]] = load <4 x i32>, ptr addrspace(4) %[[gep2]], align 4 // SHADERTEST-GFX-NEXT: %[[image_call1:[0-9]+]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %{{.*}}, float %{{.*}}, <8 x i32> %[[shufflevector1]], <4 x i32> %[[load2]], i1 false, i32 0, i32 0) // SHADERTEST-GFX-NEXT: %[[end1:[0-9]+]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.waterfall.end.v4f32(i32 %[[begin1]], <4 x float> %[[image_call1]]) // @@ -52,7 +52,7 @@ void main() // SHADERTEST-GFX-NEXT: %[[readfirstlane2:[0-9]+]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 %[[begin2]], i32 %[[mul1]]) // SHADERTEST-GFX-NEXT: %[[sext2:[0-9]+]] = sext i32 %[[readfirstlane2]] to i64 // SHADERTEST-GFX-NEXT: %[[gep3:[0-9]+]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i64 %[[sext2]] -// SHADERTEST-GFX-NEXT: %[[load3:[0-9]+]] = load <8 x i32>, ptr addrspace(4) %[[gep3]], align 32 +// SHADERTEST-GFX-NEXT: %[[load3:[0-9]+]] = load <8 x i32>, ptr addrspace(4) %[[gep3]], align 4 // SHADERTEST-GFX-NEXT: %[[extract2:[.a-z0-9]+]] = extractelement <8 x i32> %[[load3]], i64 3 // SHADERTEST-GFX-NEXT: %[[and2:[0-9]+]] = and i32 %[[extract2]], 268435455 // SHADERTEST-GFX-NEXT: %[[cmp2:[0-9]+]] = icmp slt i32 %[[extract2]], 0 @@ -60,7 +60,7 @@ void main() // SHADERTEST-GFX-NEXT: %[[insert2:[.a-z0-9]+]] = insertelement <8 x i32> %[[load3]], i32 %[[select2]], i64 3 // SHADERTEST-GFX-NEXT: %[[shufflevector2:[0-9]+]] = shufflevector <8 x i32> %[[insert2]], <8 x i32> %[[load3]], <8 x i32> // SHADERTEST-GFX-NEXT: %[[gep4:[0-9]+]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i64 %[[sext2]] -// SHADERTEST-GFX-NEXT: %[[load4:[0-9]+]] = load <4 x i32>, ptr addrspace(4) %[[gep4]], align 16 +// SHADERTEST-GFX-NEXT: %[[load4:[0-9]+]] = load <4 x i32>, ptr addrspace(4) %[[gep4]], align 4 // SHADERTEST-GFX-NEXT: %[[image_call2:[0-9]+]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %{{.*}}, float %{{.*}}, <8 x i32> %[[shufflevector2]], <4 x i32> %[[load4]], i1 false, i32 0, i32 0) // SHADERTEST-GFX-NEXT: %[[end2:[0-9]+]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.waterfall.end.v4f32(i32 %[[begin2]], <4 x float> %[[image_call2]]) // @@ -68,7 +68,7 @@ void main() // SHADERTEST-GFX-NEXT: %[[readfirstlane3:[0-9]+]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 %[[begin3]], i32 %[[mul1]]) // SHADERTEST-GFX-NEXT: %[[sext3:[0-9]+]] = sext i32 %[[readfirstlane3]] to i64 // SHADERTEST-GFX-NEXT: %[[gep5:[0-9]+]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i64 %[[sext3]] -// SHADERTEST-GFX-NEXT: %[[load5:[0-9]+]] = load <8 x i32>, ptr addrspace(4) %[[gep5]], align 32 +// SHADERTEST-GFX-NEXT: %[[load5:[0-9]+]] = load <8 x i32>, ptr addrspace(4) %[[gep5]], align 4 // SHADERTEST-GFX-NEXT: %[[extract3:[.a-z0-9]+]] = extractelement <8 x i32> %[[load5]], i64 3 // SHADERTEST-GFX-NEXT: %[[and3:[0-9]+]] = and i32 %[[extract3]], 268435455 // SHADERTEST-GFX-NEXT: %[[cmp3:[0-9]+]] = icmp slt i32 %[[extract3]], 0 @@ -76,7 +76,7 @@ void main() // SHADERTEST-GFX-NEXT: %[[insert3:[.a-z0-9]+]] = insertelement <8 x i32> %[[load5]], i32 %[[select3]], i64 3 // SHADERTEST-GFX-NEXT: %[[shufflevector3:[0-9]+]] = shufflevector <8 x i32> %[[insert3]], <8 x i32> %[[load5]], <8 x i32> // SHADERTEST-GFX-NEXT: %[[gep6:[0-9]+]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i64 %[[sext3]] -// SHADERTEST-GFX-NEXT: %[[load6:[0-9]+]] = load <4 x i32>, ptr addrspace(4) %[[gep6]], align 16 +// SHADERTEST-GFX-NEXT: %[[load6:[0-9]+]] = load <4 x i32>, ptr addrspace(4) %[[gep6]], align 4 // SHADERTEST-GFX-NEXT: [[image_call3:[0-9]+]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %{{.*}}, float %{{.*}}, <8 x i32> %[[shufflevector3]], <4 x i32> %[[load6]], i1 false, i32 0, i32 0) // SHADERTEST-GFX-NEXT: %[[end3:[0-9]+]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.waterfall.end.v4f32(i32 %[[begin3]], <4 x float> %[[image_call3]]) // SHADERTEST-GFX: AMDLLPC SUCCESS @@ -87,9 +87,9 @@ void main() // SHADERTEST-GFX_10_3_0-NEXT: %[[readfirstlane1:[0-9]+]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 %[[begin1]], i32 %[[mul1]]) // SHADERTEST-GFX_10_3_0-NEXT: %[[sext1:[0-9]+]] = sext i32 %[[readfirstlane1]] to i64 // SHADERTEST-GFX_10_3_0-NEXT: %[[gep1:[0-9]+]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i64 %[[sext1]] -// SHADERTEST-GFX_10_3_0-NEXT: %[[load1:[0-9]+]] = load <8 x i32>, ptr addrspace(4) %[[gep1]], align 32 +// SHADERTEST-GFX_10_3_0-NEXT: %[[load1:[0-9]+]] = load <8 x i32>, ptr addrspace(4) %[[gep1]], align 4 // SHADERTEST-GFX_10_3_0-NEXT: %[[gep2:[0-9]+]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i64 %[[sext1]] -// SHADERTEST-GFX_10_3_0-NEXT: %[[load2:[0-9]+]] = load <4 x i32>, ptr addrspace(4) %[[gep2]], align 16 +// SHADERTEST-GFX_10_3_0-NEXT: %[[load2:[0-9]+]] = load <4 x i32>, ptr addrspace(4) %[[gep2]], align 4 // SHADERTEST-GFX_10_3_0-NEXT: %[[image_call1:[0-9]+]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %{{.*}}, float %{{.*}}, <8 x i32> %[[load1]], <4 x i32> %[[load2]], i1 false, i32 0, i32 0) // SHADERTEST-GFX_10_3_0-NEXT: %[[end1:[0-9]+]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.waterfall.end.v4f32(i32 %[[begin1]], <4 x float> %[[image_call1]]) // @@ -97,9 +97,9 @@ void main() // SHADERTEST-GFX_10_3_0-NEXT: %[[readfirstlane2:[0-9]+]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 %[[begin2]], i32 %[[mul1]]) // SHADERTEST-GFX_10_3_0-NEXT: %[[sext2:[0-9]+]] = sext i32 %[[readfirstlane2]] to i64 // SHADERTEST-GFX_10_3_0-NEXT: %[[gep3:[0-9]+]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i64 %[[sext2]] -// SHADERTEST-GFX_10_3_0-NEXT: %[[load3:[0-9]+]] = load <8 x i32>, ptr addrspace(4) %[[gep3]], align 32 +// SHADERTEST-GFX_10_3_0-NEXT: %[[load3:[0-9]+]] = load <8 x i32>, ptr addrspace(4) %[[gep3]], align 4 // SHADERTEST-GFX_10_3_0-NEXT: %[[gep4:[0-9]+]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i64 %[[sext2]] -// SHADERTEST-GFX_10_3_0-NEXT: %[[load4:[0-9]+]] = load <4 x i32>, ptr addrspace(4) %[[gep4]], align 16 +// SHADERTEST-GFX_10_3_0-NEXT: %[[load4:[0-9]+]] = load <4 x i32>, ptr addrspace(4) %[[gep4]], align 4 // SHADERTEST-GFX_10_3_0-NEXT: %[[image_call2:[0-9]+]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %{{.*}}, float %{{.*}}, <8 x i32> %[[load3]], <4 x i32> %[[load4]], i1 false, i32 0, i32 0) // SHADERTEST-GFX_10_3_0-NEXT: %[[end2:[0-9]+]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.waterfall.end.v4f32(i32 %[[begin2]], <4 x float> %[[image_call2]]) // @@ -107,9 +107,9 @@ void main() // SHADERTEST-GFX_10_3_0-NEXT: %[[readfirstlane3:[0-9]+]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 %[[begin3]], i32 %[[mul1]]) // SHADERTEST-GFX_10_3_0-NEXT: %[[sext3:[0-9]+]] = sext i32 %[[readfirstlane3]] to i64 // SHADERTEST-GFX_10_3_0-NEXT: %[[gep5:[0-9]+]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i64 %[[sext3]] -// SHADERTEST-GFX_10_3_0-NEXT: %[[load5:[0-9]+]] = load <8 x i32>, ptr addrspace(4) %[[gep5]], align 32 +// SHADERTEST-GFX_10_3_0-NEXT: %[[load5:[0-9]+]] = load <8 x i32>, ptr addrspace(4) %[[gep5]], align 4 // SHADERTEST-GFX_10_3_0-NEXT: %[[gep6:[0-9]+]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i64 %[[sext3]] -// SHADERTEST-GFX_10_3_0-NEXT: %[[load6:[0-9]+]] = load <4 x i32>, ptr addrspace(4) %[[gep6]], align 16 +// SHADERTEST-GFX_10_3_0-NEXT: %[[load6:[0-9]+]] = load <4 x i32>, ptr addrspace(4) %[[gep6]], align 4 // SHADERTEST-GFX_10_3_0-NEXT: [[image_call3:[0-9]+]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %{{.*}}, float %{{.*}}, <8 x i32> %[[load5]], <4 x i32> %[[load6]], i1 false, i32 0, i32 0) // SHADERTEST-GFX_10_3_0-NEXT: %[[end3:[0-9]+]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.waterfall.end.v4f32(i32 %[[begin3]], <4 x float> %[[image_call3]]) // SHADERTEST-GFX_10_3_0: AMDLLPC SUCCESS @@ -120,13 +120,13 @@ void main() // SHADERTEST-GFX_10_3_2-NEXT: %[[readfirstlane1:[0-9]+]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 %[[begin1]], i32 %[[mul1]]) // SHADERTEST-GFX_10_3_2-NEXT: %[[sext1:[0-9]+]] = sext i32 %[[readfirstlane1]] to i64 // SHADERTEST-GFX_10_3_2-NEXT: %[[gep1:[0-9]+]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i64 %[[sext1]] -// SHADERTEST-GFX_10_3_2-NEXT: %[[load1:[0-9]+]] = load <8 x i32>, ptr addrspace(4) %[[gep1]], align 32 +// SHADERTEST-GFX_10_3_2-NEXT: %[[load1:[0-9]+]] = load <8 x i32>, ptr addrspace(4) %[[gep1]], align 4 // SHADERTEST-GFX_10_3_2-NEXT: %[[extract1:[.a-z0-9]+]] = extractelement <8 x i32> %[[load1]], i64 6 // SHADERTEST-GFX_10_3_2-NEXT: %[[and1:[0-9]+]] = and i32 %[[extract1]], -1048577 // SHADERTEST-GFX_10_3_2-NEXT: %[[insert1:[.a-z0-9]+]] = insertelement <8 x i32> %[[load1]], i32 %[[and1]], i64 6 // SHADERTEST-GFX_10_3_2-NEXT: %[[shufflevector1:[0-9]+]] = shufflevector <8 x i32> %[[insert1]], <8 x i32> %[[load1]], <8 x i32> // SHADERTEST-GFX_10_3_2-NEXT: %[[gep2:[0-9]+]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i64 %[[sext1]] -// SHADERTEST-GFX_10_3_2-NEXT: %[[load2:[0-9]+]] = load <4 x i32>, ptr addrspace(4) %[[gep2]], align 16 +// SHADERTEST-GFX_10_3_2-NEXT: %[[load2:[0-9]+]] = load <4 x i32>, ptr addrspace(4) %[[gep2]], align 4 // SHADERTEST-GFX_10_3_2-NEXT: %[[image_call1:[0-9]+]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %{{.*}}, float %{{.*}}, <8 x i32> %[[shufflevector1]], <4 x i32> %[[load2]], i1 false, i32 0, i32 0) // SHADERTEST-GFX_10_3_2-NEXT: %[[end1:[0-9]+]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.waterfall.end.v4f32(i32 %[[begin1]], <4 x float> %[[image_call1]]) // @@ -134,13 +134,13 @@ void main() // SHADERTEST-GFX_10_3_2-NEXT: %[[readfirstlane2:[0-9]+]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 %[[begin2]], i32 %[[mul1]]) // SHADERTEST-GFX_10_3_2-NEXT: %[[sext2:[0-9]+]] = sext i32 %[[readfirstlane2]] to i64 // SHADERTEST-GFX_10_3_2-NEXT: %[[gep3:[0-9]+]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i64 %[[sext2]] -// SHADERTEST-GFX_10_3_2-NEXT: %[[load3:[0-9]+]] = load <8 x i32>, ptr addrspace(4) %[[gep3]], align 32 +// SHADERTEST-GFX_10_3_2-NEXT: %[[load3:[0-9]+]] = load <8 x i32>, ptr addrspace(4) %[[gep3]], align 4 // SHADERTEST-GFX_10_3_2-NEXT: %[[extract2:[.a-z0-9]+]] = extractelement <8 x i32> %[[load3]], i64 6 // SHADERTEST-GFX_10_3_2-NEXT: %[[and2:[0-9]+]] = and i32 %[[extract2]], -1048577 // SHADERTEST-GFX_10_3_2-NEXT: %[[insert2:[.a-z0-9]+]] = insertelement <8 x i32> %[[load3]], i32 %[[and2]], i64 6 // SHADERTEST-GFX_10_3_2-NEXT: %[[shufflevector2:[0-9]+]] = shufflevector <8 x i32> %[[insert2]], <8 x i32> %[[load3]], <8 x i32> // SHADERTEST-GFX_10_3_2-NEXT: %[[gep4:[0-9]+]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i64 %[[sext2]] -// SHADERTEST-GFX_10_3_2-NEXT: %[[load4:[0-9]+]] = load <4 x i32>, ptr addrspace(4) %[[gep4]], align 16 +// SHADERTEST-GFX_10_3_2-NEXT: %[[load4:[0-9]+]] = load <4 x i32>, ptr addrspace(4) %[[gep4]], align 4 // SHADERTEST-GFX_10_3_2-NEXT: %[[image_call2:[0-9]+]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %{{.*}}, float %{{.*}}, <8 x i32> %[[shufflevector2]], <4 x i32> %[[load4]], i1 false, i32 0, i32 0) // SHADERTEST-GFX_10_3_2-NEXT: %[[end2:[0-9]+]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.waterfall.end.v4f32(i32 %[[begin2]], <4 x float> %[[image_call2]]) // @@ -148,13 +148,13 @@ void main() // SHADERTEST-GFX_10_3_2-NEXT: %[[readfirstlane3:[0-9]+]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 %[[begin3]], i32 %[[mul1]]) // SHADERTEST-GFX_10_3_2-NEXT: %[[sext3:[0-9]+]] = sext i32 %[[readfirstlane3]] to i64 // SHADERTEST-GFX_10_3_2-NEXT: %[[gep5:[0-9]+]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i64 %[[sext3]] -// SHADERTEST-GFX_10_3_2-NEXT: %[[load5:[0-9]+]] = load <8 x i32>, ptr addrspace(4) %[[gep5]], align 32 +// SHADERTEST-GFX_10_3_2-NEXT: %[[load5:[0-9]+]] = load <8 x i32>, ptr addrspace(4) %[[gep5]], align 4 // SHADERTEST-GFX_10_3_2-NEXT: %[[extract3:[.a-z0-9]+]] = extractelement <8 x i32> %[[load5]], i64 6 // SHADERTEST-GFX_10_3_2-NEXT: %[[and3:[0-9]+]] = and i32 %[[extract3]], -1048577 // SHADERTEST-GFX_10_3_2-NEXT: %[[insert3:[.a-z0-9]+]] = insertelement <8 x i32> %[[load5]], i32 %[[and3]], i64 6 // SHADERTEST-GFX_10_3_2-NEXT: %[[shufflevector3:[0-9]+]] = shufflevector <8 x i32> %[[insert3]], <8 x i32> %[[load5]], <8 x i32> // SHADERTEST-GFX_10_3_2-NEXT: %[[gep6:[0-9]+]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i64 %[[sext3]] -// SHADERTEST-GFX_10_3_2-NEXT: %[[load6:[0-9]+]] = load <4 x i32>, ptr addrspace(4) %[[gep6]], align 16 +// SHADERTEST-GFX_10_3_2-NEXT: %[[load6:[0-9]+]] = load <4 x i32>, ptr addrspace(4) %[[gep6]], align 4 // SHADERTEST-GFX_10_3_2-NEXT: [[image_call3:[0-9]+]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %{{.*}}, float %{{.*}}, <8 x i32> %[[shufflevector3]], <4 x i32> %[[load6]], i1 false, i32 0, i32 0) // SHADERTEST-GFX_10_3_2-NEXT: %[[end3:[0-9]+]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.waterfall.end.v4f32(i32 %[[begin3]], <4 x float> %[[image_call3]]) // SHADERTEST-GFX_10_3_2: AMDLLPC SUCCESS diff --git a/llpc/test/shaderdb/core/OpTypeSampledImage_TestWaterfallScalarize_SharedDesc.frag b/llpc/test/shaderdb/core/OpTypeSampledImage_TestWaterfallScalarize_SharedDesc.frag index 0b52c6e83a..69719c1be2 100644 --- a/llpc/test/shaderdb/core/OpTypeSampledImage_TestWaterfallScalarize_SharedDesc.frag +++ b/llpc/test/shaderdb/core/OpTypeSampledImage_TestWaterfallScalarize_SharedDesc.frag @@ -29,7 +29,7 @@ void main() // SHADERTEST-GFX-NEXT: %[[readfirstlane1:[0-9]+]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 %[[begin1]], i32 %[[mul1]]) // SHADERTEST-GFX-NEXT: %[[sext1:[0-9]+]] = sext i32 %[[readfirstlane1]] to i64 // SHADERTEST-GFX-NEXT: %[[gep1:[0-9]+]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i64 %[[sext1]] -// SHADERTEST-GFX-NEXT: %[[load1:[0-9]+]] = load <8 x i32>, ptr addrspace(4) %[[gep1]], align 32 +// SHADERTEST-GFX-NEXT: %[[load1:[0-9]+]] = load <8 x i32>, ptr addrspace(4) %[[gep1]], align 4 // SHADERTEST-GFX-NEXT: %[[extract1:[.a-z0-9]+]] = extractelement <8 x i32> %[[load1]], i64 3 // SHADERTEST-GFX-NEXT: %[[and1:[0-9]+]] = and i32 %[[extract1]], 268435455 // SHADERTEST-GFX-NEXT: %[[cmp1:[0-9]+]] = icmp slt i32 %[[extract1]], 0 @@ -37,7 +37,7 @@ void main() // SHADERTEST-GFX-NEXT: %[[insert1:[.a-z0-9]+]] = insertelement <8 x i32> %[[load1]], i32 %[[select1]], i64 3 // SHADERTEST-GFX-NEXT: %[[shufflevector1:[0-9]+]] = shufflevector <8 x i32> %[[insert1]], <8 x i32> %[[load1]], <8 x i32> // SHADERTEST-GFX-NEXT: %[[gep2:[0-9]+]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i64 %[[sext1]] -// SHADERTEST-GFX-NEXT: %[[load2:[0-9]+]] = load <4 x i32>, ptr addrspace(4) %[[gep2]], align 16 +// SHADERTEST-GFX-NEXT: %[[load2:[0-9]+]] = load <4 x i32>, ptr addrspace(4) %[[gep2]], align 4 // SHADERTEST-GFX-NEXT: %[[image_call1:[0-9]+]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %{{.*}}, float %{{.*}}, <8 x i32> %[[shufflevector1]], <4 x i32> %[[load2]], i1 false, i32 0, i32 0) // SHADERTEST-GFX-NEXT: %[[end1:[0-9]+]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.waterfall.end.v4f32(i32 %[[begin1]], <4 x float> %[[image_call1]]) // @@ -45,7 +45,7 @@ void main() // SHADERTEST-GFX-NEXT: %[[readfirstlane2:[0-9]+]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 %[[begin2]], i32 %[[mul1]]) // SHADERTEST-GFX-NEXT: %[[sext2:[0-9]+]] = sext i32 %[[readfirstlane2]] to i64 // SHADERTEST-GFX-NEXT: %[[gep3:[0-9]+]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i64 %[[sext2]] -// SHADERTEST-GFX-NEXT: %[[load3:[0-9]+]] = load <8 x i32>, ptr addrspace(4) %[[gep3]], align 32 +// SHADERTEST-GFX-NEXT: %[[load3:[0-9]+]] = load <8 x i32>, ptr addrspace(4) %[[gep3]], align 4 // SHADERTEST-GFX-NEXT: %[[extract2:[.a-z0-9]+]] = extractelement <8 x i32> %[[load3]], i64 3 // SHADERTEST-GFX-NEXT: %[[and2:[0-9]+]] = and i32 %[[extract2]], 268435455 // SHADERTEST-GFX-NEXT: %[[cmp2:[0-9]+]] = icmp slt i32 %[[extract2]], 0 @@ -53,7 +53,7 @@ void main() // SHADERTEST-GFX-NEXT: %[[insert2:[.a-z0-9]+]] = insertelement <8 x i32> %[[load3]], i32 %[[select2]], i64 3 // SHADERTEST-GFX-NEXT: %[[shufflevector2:[0-9]+]] = shufflevector <8 x i32> %[[insert2]], <8 x i32> %[[load3]], <8 x i32> // SHADERTEST-GFX-NEXT: %[[gep4:[0-9]+]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i64 %[[sext2]] -// SHADERTEST-GFX-NEXT: %[[load4:[0-9]+]] = load <4 x i32>, ptr addrspace(4) %[[gep4]], align 16 +// SHADERTEST-GFX-NEXT: %[[load4:[0-9]+]] = load <4 x i32>, ptr addrspace(4) %[[gep4]], align 4 // SHADERTEST-GFX-NEXT: %[[image_call2:[0-9]+]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %{{.*}}, float %{{.*}}, <8 x i32> %[[shufflevector2]], <4 x i32> %[[load4]], i1 false, i32 0, i32 0) // SHADERTEST-GFX-NEXT: %[[end2:[0-9]+]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.waterfall.end.v4f32(i32 %[[begin2]], <4 x float> %[[image_call2]]) // SHADERTEST-GFX: AMDLLPC SUCCESS @@ -64,9 +64,9 @@ void main() // SHADERTEST-GFX_10_3_0-NEXT: %[[readfirstlane1:[0-9]+]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 %[[begin1]], i32 %[[mul1]]) // SHADERTEST-GFX_10_3_0-NEXT: %[[sext1:[0-9]+]] = sext i32 %[[readfirstlane1]] to i64 // SHADERTEST-GFX_10_3_0-NEXT: %[[gep1:[0-9]+]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i64 %[[sext1]] -// SHADERTEST-GFX_10_3_0-NEXT: %[[load1:[0-9]+]] = load <8 x i32>, ptr addrspace(4) %[[gep1]], align 32 +// SHADERTEST-GFX_10_3_0-NEXT: %[[load1:[0-9]+]] = load <8 x i32>, ptr addrspace(4) %[[gep1]], align 4 // SHADERTEST-GFX_10_3_0-NEXT: %[[gep2:[0-9]+]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i64 %[[sext1]] -// SHADERTEST-GFX_10_3_0-NEXT: %[[load2:[0-9]+]] = load <4 x i32>, ptr addrspace(4) %[[gep2]], align 16 +// SHADERTEST-GFX_10_3_0-NEXT: %[[load2:[0-9]+]] = load <4 x i32>, ptr addrspace(4) %[[gep2]], align 4 // SHADERTEST-GFX_10_3_0-NEXT: %[[image_call1:[0-9]+]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %{{.*}}, float %{{.*}}, <8 x i32> %[[load1]], <4 x i32> %[[load2]], i1 false, i32 0, i32 0) // SHADERTEST-GFX_10_3_0-NEXT: %[[end1:[0-9]+]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.waterfall.end.v4f32(i32 %[[begin1]], <4 x float> %[[image_call1]]) // @@ -74,9 +74,9 @@ void main() // SHADERTEST-GFX_10_3_0-NEXT: %[[readfirstlane2:[0-9]+]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 %[[begin2]], i32 %[[mul1]]) // SHADERTEST-GFX_10_3_0-NEXT: %[[sext2:[0-9]+]] = sext i32 %[[readfirstlane2]] to i64 // SHADERTEST-GFX_10_3_0-NEXT: %[[gep3:[0-9]+]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i64 %[[sext2]] -// SHADERTEST-GFX_10_3_0-NEXT: %[[load3:[0-9]+]] = load <8 x i32>, ptr addrspace(4) %[[gep3]], align 32 +// SHADERTEST-GFX_10_3_0-NEXT: %[[load3:[0-9]+]] = load <8 x i32>, ptr addrspace(4) %[[gep3]], align 4 // SHADERTEST-GFX_10_3_0-NEXT: %[[gep4:[0-9]+]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i64 %[[sext2]] -// SHADERTEST-GFX_10_3_0-NEXT: %[[load4:[0-9]+]] = load <4 x i32>, ptr addrspace(4) %[[gep4]], align 16 +// SHADERTEST-GFX_10_3_0-NEXT: %[[load4:[0-9]+]] = load <4 x i32>, ptr addrspace(4) %[[gep4]], align 4 // SHADERTEST-GFX_10_3_0-NEXT: %[[image_call2:[0-9]+]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %{{.*}}, float %{{.*}}, <8 x i32> %[[load3]], <4 x i32> %[[load4]], i1 false, i32 0, i32 0) // SHADERTEST-GFX_10_3_0-NEXT: %[[end2:[0-9]+]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.waterfall.end.v4f32(i32 %[[begin2]], <4 x float> %[[image_call2]]) // SHADERTEST-GFX_10_3_0: AMDLLPC SUCCESS @@ -87,13 +87,13 @@ void main() // SHADERTEST-GFX_10_3_2-NEXT: %[[readfirstlane1:[0-9]+]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 %[[begin1]], i32 %[[mul1]]) // SHADERTEST-GFX_10_3_2-NEXT: %[[sext1:[0-9]+]] = sext i32 %[[readfirstlane1]] to i64 // SHADERTEST-GFX_10_3_2-NEXT: %[[gep1:[0-9]+]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i64 %[[sext1]] -// SHADERTEST-GFX_10_3_2-NEXT: %[[load1:[0-9]+]] = load <8 x i32>, ptr addrspace(4) %[[gep1]], align 32 +// SHADERTEST-GFX_10_3_2-NEXT: %[[load1:[0-9]+]] = load <8 x i32>, ptr addrspace(4) %[[gep1]], align 4 // SHADERTEST-GFX_10_3_2-NEXT: %[[extract:[.a-z0-9]+]] = extractelement <8 x i32> %[[load1]], i64 6 // SHADERTEST-GFX_10_3_2-NEXT: %[[and:[0-9]+]] = and i32 %[[extract]], -1048577 // SHADERTEST-GFX_10_3_2-NEXT: %[[insert:[.a-z0-9]+]] = insertelement <8 x i32> %[[load1]], i32 %[[and]], i64 6 // SHADERTEST-GFX_10_3_2-NEXT: %[[shufflevector:[0-9]+]] = shufflevector <8 x i32> %[[insert]], <8 x i32> %[[load1]], <8 x i32> // SHADERTEST-GFX_10_3_2-NEXT: %[[gep2:[0-9]+]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i64 %[[sext1]] -// SHADERTEST-GFX_10_3_2-NEXT: %[[load2:[0-9]+]] = load <4 x i32>, ptr addrspace(4) %[[gep2]], align 16 +// SHADERTEST-GFX_10_3_2-NEXT: %[[load2:[0-9]+]] = load <4 x i32>, ptr addrspace(4) %[[gep2]], align 4 // SHADERTEST-GFX_10_3_2-NEXT: %[[image_call1:[0-9]+]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %{{.*}}, float %{{.*}}, <8 x i32> %[[shufflevector]], <4 x i32> %[[load2]], i1 false, i32 0, i32 0) // SHADERTEST-GFX_10_3_2-NEXT: %[[end1:[0-9]+]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.waterfall.end.v4f32(i32 %[[begin1]], <4 x float> %[[image_call1]]) // @@ -101,13 +101,13 @@ void main() // SHADERTEST-GFX_10_3_2-NEXT: %[[readfirstlane2:[0-9]+]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 %[[begin2]], i32 %[[mul1]]) // SHADERTEST-GFX_10_3_2-NEXT: %[[sext2:[0-9]+]] = sext i32 %[[readfirstlane2]] to i64 // SHADERTEST-GFX_10_3_2-NEXT: %[[gep3:[0-9]+]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i64 %[[sext2]] -// SHADERTEST-GFX_10_3_2-NEXT: %[[load3:[0-9]+]] = load <8 x i32>, ptr addrspace(4) %[[gep3]], align 32 +// SHADERTEST-GFX_10_3_2-NEXT: %[[load3:[0-9]+]] = load <8 x i32>, ptr addrspace(4) %[[gep3]], align 4 // SHADERTEST-GFX_10_3_2-NEXT: %[[extract:[.a-z0-9]+]] = extractelement <8 x i32> %[[load3]], i64 6 // SHADERTEST-GFX_10_3_2-NEXT: %[[and:[0-9]+]] = and i32 %[[extract]], -1048577 // SHADERTEST-GFX_10_3_2-NEXT: %[[insert:[.a-z0-9]+]] = insertelement <8 x i32> %[[load3]], i32 %[[and]], i64 6 // SHADERTEST-GFX_10_3_2-NEXT: %[[shufflevector:[0-9]+]] = shufflevector <8 x i32> %[[insert]], <8 x i32> %[[load3]], <8 x i32> // SHADERTEST-GFX_10_3_2-NEXT: %[[gep4:[0-9]+]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i64 %[[sext2]] -// SHADERTEST-GFX_10_3_2-NEXT: %[[load4:[0-9]+]] = load <4 x i32>, ptr addrspace(4) %[[gep4]], align 16 +// SHADERTEST-GFX_10_3_2-NEXT: %[[load4:[0-9]+]] = load <4 x i32>, ptr addrspace(4) %[[gep4]], align 4 // SHADERTEST-GFX_10_3_2-NEXT: %[[image_call2:[0-9]+]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %{{.*}}, float %{{.*}}, <8 x i32> %[[shufflevector]], <4 x i32> %[[load4]], i1 false, i32 0, i32 0) // SHADERTEST-GFX_10_3_2-NEXT: %[[end2:[0-9]+]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.waterfall.end.v4f32(i32 %[[begin2]], <4 x float> %[[image_call2]]) // SHADERTEST-GFX_10_3_2: AMDLLPC SUCCESS diff --git a/llpc/test/shaderdb/core/OpUDotAccSat_TestUVec16bit.spvasm b/llpc/test/shaderdb/core/OpUDotAccSat_TestUVec16bit.spvasm index 8fc567ef20..c05dff6be2 100644 --- a/llpc/test/shaderdb/core/OpUDotAccSat_TestUVec16bit.spvasm +++ b/llpc/test/shaderdb/core/OpUDotAccSat_TestUVec16bit.spvasm @@ -52,9 +52,8 @@ %18 = OpLoad %v4float %b0 %19 = OpConvertFToU %v4int %18 %20 = OpSConvert %v4int16 %19 - %21 = OpUDotAccSatKHR %int16 %17 %20 %5 - %22 = OpUConvert %int %21 - %23 = OpCompositeConstruct %v4int %22 %22 %22 %22 + %21 = OpUDotAccSatKHR %int %17 %20 %5 + %23 = OpCompositeConstruct %v4int %21 %21 %21 %21 OpStore %c %23 %24 = OpLoad %v4int %c %25 = OpConvertSToF %v4float %24 diff --git a/llpc/test/shaderdb/core/TestForceNonUniformResourceIndex.frag b/llpc/test/shaderdb/core/TestForceNonUniformResourceIndex.frag index 13b66e12f2..316a82ca5a 100644 --- a/llpc/test/shaderdb/core/TestForceNonUniformResourceIndex.frag +++ b/llpc/test/shaderdb/core/TestForceNonUniformResourceIndex.frag @@ -3,7 +3,7 @@ // RUN: amdllpc -v %gfxip %s --force-non-uniform-resource-index-stage-mask=0x00000000 | FileCheck -check-prefix=NOTFORCENURITEST %s // NOTFORCENURITEST-LABEL: {{^// LLPC}} pipeline before-patching results // When not forcing NURI (Non Uniform Resource Index), there should be a `readfirstlane`. -// NOTFORCENURITEST: %{{[0-9]+}} = call i32 @llvm.amdgcn.readfirstlane(i32 %{{[0-9]+}}) +// NOTFORCENURITEST: %{{[0-9]+}} = call i32 @llvm.amdgcn.readfirstlane{{(.i32)?}}(i32 %{{[0-9]+}}) // NOTFORCENURITEST: AMDLLPC SUCCESS // END_SHADERTEST @@ -12,7 +12,7 @@ // RUN: amdllpc -v %gfxip %s --force-non-uniform-resource-index-stage-mask=0xFFFFFFFF | FileCheck -check-prefix=FORCENURITEST %s // FORCENURITEST-LABEL: {{^// LLPC}} pipeline before-patching results // When forcing NURI (Non Uniform Resource Index), there should not be a `readfirstlane`. -// FORCENURITEST-NOT: %{{[0-9]+}} = call i32 @llvm.amdgcn.readfirstlane(i32 %{{[0-9]+}}) +// FORCENURITEST-NOT: %{{[0-9]+}} = call i32 @llvm.amdgcn.readfirstlane{{(.i32)?}}(i32 %{{[0-9]+}}) // FORCENURITEST: AMDLLPC SUCCESS // END_SHADERTEST diff --git a/llpc/test/shaderdb/core/TestXfbStateMetadata.vert b/llpc/test/shaderdb/core/TestXfbStateMetadata.vert index c0a28c5490..30c0a02033 100644 --- a/llpc/test/shaderdb/core/TestXfbStateMetadata.vert +++ b/llpc/test/shaderdb/core/TestXfbStateMetadata.vert @@ -18,7 +18,7 @@ void main() gl_PointSize = pointSize; } // CHECK-LABEL: define {{[^@]+}}@lgc.shader.VS.main -// CHECK-SAME: () local_unnamed_addr #[[ATTR0:[0-9]+]] !spirv.ExecutionModel [[META6:![0-9]+]] !lgc.xfb.state [[META7:![0-9]+]] !lgc.shaderstage [[META1:![0-9]+]] { +// CHECK-SAME: () local_unnamed_addr #[[ATTR0:[0-9]+]] !spirv.ExecutionModel !10 !lgc.shaderstage !1 !lgc.xfb.state !11 { // CHECK-NEXT: .entry: // CHECK-NEXT: [[TMP0:%.*]] = call float (...) @lgc.create.read.generic.input.f32(i32 1, i32 0, i32 0, i32 0, i32 0, i32 poison) // CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> (...) @lgc.create.read.generic.input.v4f32(i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison) @@ -28,11 +28,9 @@ void main() // CHECK-NEXT: ret void // //. -// CHECK: attributes #[[ATTR0]] = { nounwind "denormal-fp-math-f32"="preserve-sign" } +// CHECK: attributes #[[ATTR0]] = { alwaysinline nounwind "denormal-fp-math-f32"="preserve-sign" } // CHECK: attributes #[[ATTR1:[0-9]+]] = { nounwind } // CHECK: attributes #[[ATTR2:[0-9]+]] = { nounwind willreturn memory(read) } //. -// CHECK: [[META1]] = !{i32 1} -// CHECK: [[META6]] = !{i32 0} -// CHECK: [[META7]] = !{i32 0, i32 4, i32 -1, i32 0, i32 -1, i32 0, i32 -1, i32 0} -//. +// CHECK: [[META0:![0-9]+]] = !{!"Vulkan"} +// CHECK: [[META1:![0-9]+]] = !{i32 1} diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateAtCentroidNoPersp_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateAtCentroidNoPersp_lit.frag index 56d812fb5c..c0225acb56 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateAtCentroidNoPersp_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateAtCentroidNoPersp_lit.frag @@ -8,10 +8,10 @@ ; SHADERTEST: %{{[A-Za-z0-9]*}} = call <2 x float> @lgc.input.import.builtin.InterpLinearCentroid.v2f32.i32(i32 268435462) ; SHADERTEST: %{{[A-Za-z0-9]*}} = call <2 x float> @lgc.input.import.builtin.InterpPerspCentroid.v2f32.i32(i32 268435458) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results -; SHADERTEST: %{{[0-9]*}} = call float @llvm.amdgcn.interp.p1(float %{{.*}}, i32 immarg 0, i32 immarg 0, i32 %{{.*}}) -; SHADERTEST: %{{[0-9]*}} = call float @llvm.amdgcn.interp.p2(float %{{.*}}, float %{{.*}}, i32 immarg 0, i32 immarg 0, i32 %{{.*}}) -; SHADERTEST: %{{[0-9]*}} = call float @llvm.amdgcn.interp.p1(float %{{.*}}, i32 immarg 1, i32 immarg 1, i32 %{{.*}}) -; SHADERTEST: %{{[0-9]*}} = call float @llvm.amdgcn.interp.p2(float %{{.*}}, float %{{.*}}, i32 immarg 1, i32 immarg 1, i32 %{{.*}}) +; SHADERTEST: %{{[0-9]*}} = call float @llvm.amdgcn.interp.p1(float %{{.*}}, i32 0, i32 0, i32 %{{.*}}) +; SHADERTEST: %{{[0-9]*}} = call float @llvm.amdgcn.interp.p2(float %{{.*}}, float %{{.*}}, i32 0, i32 0, i32 %{{.*}}) +; SHADERTEST: %{{[0-9]*}} = call float @llvm.amdgcn.interp.p1(float %{{.*}}, i32 1, i32 1, i32 %{{.*}}) +; SHADERTEST: %{{[0-9]*}} = call float @llvm.amdgcn.interp.p2(float %{{.*}}, float %{{.*}}, i32 1, i32 1, i32 %{{.*}}) ; SHADERTEST: AMDLLPC SUCCESS */ // END_SHADERTEST diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateAtCentroid_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateAtCentroid_lit.frag index 393453a596..8ba1ddc40b 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateAtCentroid_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateAtCentroid_lit.frag @@ -23,10 +23,10 @@ void main() ; SHADERTEST: %{{[A-Za-z0-9]*}} = call <2 x float> @lgc.input.import.builtin.InterpPerspCentroid.v2f32.i32(i32 268435458) ; SHADERTEST: %{{[A-Za-z0-9]*}} = call <2 x float> @lgc.input.import.builtin.InterpPerspCentroid.v2f32.i32(i32 268435458) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results -; SHADERTEST: %{{[0-9]*}} = call float @llvm.amdgcn.interp.p1(float %{{.*}}, i32 immarg 0, i32 immarg 0, i32 %{{.*}}) -; SHADERTEST: %{{[0-9]*}} = call float @llvm.amdgcn.interp.p2(float %{{.*}}, float %{{.*}}, i32 immarg 0, i32 immarg 0, i32 %{{.*}}) -; SHADERTEST: %{{[0-9]*}} = call float @llvm.amdgcn.interp.p1(float %{{.*}}, i32 immarg 1, i32 immarg 1, i32 %{{.*}}) -; SHADERTEST: %{{[0-9]*}} = call float @llvm.amdgcn.interp.p2(float %{{.*}}, float %{{.*}}, i32 immarg 1, i32 immarg 1, i32 %{{.*}}) +; SHADERTEST: %{{[0-9]*}} = call float @llvm.amdgcn.interp.p1(float %{{.*}}, i32 0, i32 0, i32 %{{.*}}) +; SHADERTEST: %{{[0-9]*}} = call float @llvm.amdgcn.interp.p2(float %{{.*}}, float %{{.*}}, i32 0, i32 0, i32 %{{.*}}) +; SHADERTEST: %{{[0-9]*}} = call float @llvm.amdgcn.interp.p1(float %{{.*}}, i32 1, i32 1, i32 %{{.*}}) +; SHADERTEST: %{{[0-9]*}} = call float @llvm.amdgcn.interp.p2(float %{{.*}}, float %{{.*}}, i32 1, i32 1, i32 %{{.*}}) ; SHADERTEST: AMDLLPC SUCCESS */ // END_SHADERTEST diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateAtOffset_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateAtOffset_lit.frag index 4c6dede97e..46cfb9a632 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateAtOffset_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateAtOffset_lit.frag @@ -29,9 +29,9 @@ void main() ; SHADERTEST-COUNT-12: = call i32 @llvm.amdgcn.mov.dpp.i32(i32 ; SHADERTEST: = call float (...) @lgc.input.import.interpolated__f32( ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results -; SHADERTEST: %{{[0-9]*}} = call float @llvm.amdgcn.interp.p1(float %{{.*}}, i32 immarg 0, i32 immarg 0, i32 %{{.*}}) -; SHADERTEST: %{{[0-9]*}} = call float @llvm.amdgcn.interp.p2(float %{{.*}}, float %{{.*}}, i32 immarg 0, i32 immarg 0, i32 %{{.*}}) -; SHADERTEST: %{{[0-9]*}} = call float @llvm.amdgcn.interp.mov(i32 {{.*}}2, i32 immarg 1, i32 immarg 1, i32 %{{.*}}) +; SHADERTEST: %{{[0-9]*}} = call float @llvm.amdgcn.interp.p1(float %{{.*}}, i32 0, i32 0, i32 %{{.*}}) +; SHADERTEST: %{{[0-9]*}} = call float @llvm.amdgcn.interp.p2(float %{{.*}}, float %{{.*}}, i32 0, i32 0, i32 %{{.*}}) +; SHADERTEST: %{{[0-9]*}} = call float @llvm.amdgcn.interp.mov(i32 {{.*}}2, i32 1, i32 1, i32 %{{.*}}) ; SHADERTEST: AMDLLPC SUCCESS */ // END_SHADERTEST diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateAtSample_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateAtSample_lit.frag index cf4d9dc27a..fa9b34d113 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateAtSample_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestInterpolateAtSample_lit.frag @@ -31,9 +31,9 @@ void main() ; SHADERTEST-DAG: = call float (...) @lgc.input.import.interpolated__f32(i1 false, i32 0, i32 0, i32 0, i32 poison, i32 0, <2 x float> ; SHADERTEST-DAG: = call <4 x float> (...) @lgc.input.import.interpolated__v4f32(i1 false, i32 1, i32 0, i32 0, i32 poison, i32 1, i32 poison ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results -; SHADERTEST: %{{[0-9]*}} = call float @llvm.amdgcn.interp.p1(float %{{.*}}, i32 immarg 0, i32 immarg 0, i32 %{{.*}}) -; SHADERTEST: %{{[0-9]*}} = call float @llvm.amdgcn.interp.p2(float %{{.*}}, float %{{.*}}, i32 immarg 0, i32 immarg 0, i32 %{{.*}}) -; SHADERTEST: %{{[0-9]*}} = call float @llvm.amdgcn.interp.mov(i32 {{.*}}2, i32 immarg 1, i32 immarg 1, i32 %{{.*}}) +; SHADERTEST: %{{[0-9]*}} = call float @llvm.amdgcn.interp.p1(float %{{.*}}, i32 0, i32 0, i32 %{{.*}}) +; SHADERTEST: %{{[0-9]*}} = call float @llvm.amdgcn.interp.p2(float %{{.*}}, float %{{.*}}, i32 0, i32 0, i32 %{{.*}}) +; SHADERTEST: %{{[0-9]*}} = call float @llvm.amdgcn.interp.mov(i32 {{.*}}2, i32 1, i32 1, i32 %{{.*}}) ; SHADERTEST: AMDLLPC SUCCESS */ // END_SHADERTEST diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestPow2_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestPow2_lit.frag index 8328b7914e..461af2e7db 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestPow2_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestPow2_lit.frag @@ -18,7 +18,7 @@ void main() ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST: = call float @llvm.amdgcn.frexp.mant.f32(float ; SHADERTEST: = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float -; SHADERTEST: = call reassoc nnan nsz arcp contract afn float @llvm.exp2.f32(float +; SHADERTEST: = call reassoc nnan nsz arcp contract afn float @llvm.{{(ldexp.f32.i32\(float 1.000000e\+00, i32)|(exp2.f32\(float)}} ; SHADERTEST: AMDLLPC SUCCESS */ // END_SHADERTEST diff --git a/llpc/test/shaderdb/general/ImgDescLoad.comp b/llpc/test/shaderdb/general/ImgDescLoad.comp index 3abbe69a02..ce7aac0d8b 100644 --- a/llpc/test/shaderdb/general/ImgDescLoad.comp +++ b/llpc/test/shaderdb/general/ImgDescLoad.comp @@ -5,21 +5,9 @@ ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} pipeline before-patching results -; SHADERTEST: [[IMG_DESC:%[0-9]*]] = load <8 x i32>, ptr addrspace(4) %{{[0-9]*}}, align 32, !invariant.load -; SHADERTEST: [[SMP_DESC:%[0-9]*]] = load <4 x i32>, ptr addrspace(4) %{{[0-9]*}}, align 16, !invariant.load -; SHADERTEST: %{{[0-9]*}} = extractelement <4 x i32> [[SMP_DESC]], i64 0 -; SHADERTEST: %{{[0-9]*}} = call i32 @llvm.amdgcn.readfirstlane(i32 %{{[0-9]*}}) -; SHADERTEST: %{{[0-9]*}} = insertelement <4 x i32> poison, i32 %{{[0-9]*}}, i64 0 -; SHADERTEST: %{{[0-9]*}} = extractelement <4 x i32> [[SMP_DESC]], i64 1 -; SHADERTEST: %{{[0-9]*}} = call i32 @llvm.amdgcn.readfirstlane(i32 %{{[0-9]*}}) -; SHADERTEST: %{{[0-9]*}} = insertelement <4 x i32> %{{[0-9]*}}, i32 %{{[0-9]*}}, i64 1 -; SHADERTEST: %{{[0-9]*}} = extractelement <4 x i32> [[SMP_DESC]], i64 2 -; SHADERTEST: %{{[0-9]*}} = call i32 @llvm.amdgcn.readfirstlane(i32 %{{[0-9]*}}) -; SHADERTEST: %{{[0-9]*}} = insertelement <4 x i32> %{{[0-9]*}}, i32 %{{[0-9]*}}, i64 2 -; SHADERTEST: %{{[0-9]*}} = extractelement <4 x i32> [[SMP_DESC]], i64 3 -; SHADERTEST: %{{[0-9]*}} = call i32 @llvm.amdgcn.readfirstlane(i32 %{{[0-9]*}}) -; SHADERTEST: [[NEW_SMP_DESC:%[0-9]*]] = insertelement <4 x i32> %{{[0-9]*}}, i32 %{{[0-9]*}}, i64 3 -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, <8 x i32> [[IMG_DESC]], <4 x i32> [[NEW_SMP_DESC]], i1 false, i32 0, i32 0) +; SHADERTEST: [[IMG_DESC:%[0-9]*]] = load <8 x i32>, ptr addrspace(4) %{{[0-9]*}}, align 4, !invariant.load !12 +; SHADERTEST: [[SMP_DESC:%[0-9]*]] = load <4 x i32>, ptr addrspace(4) %{{[0-9]*}}, align 4, !invariant.load !12 +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, <8 x i32> [[IMG_DESC]], <4 x i32> [[SMP_DESC]], i1 false, i32 0, i32 0) */ // END_SHADERTEST diff --git a/llpc/test/shaderdb/general/PipelineCs_DebugPrintf.pipe b/llpc/test/shaderdb/general/PipelineCs_DebugPrintf.pipe index d4a15e421b..6344c54e97 100644 --- a/llpc/test/shaderdb/general/PipelineCs_DebugPrintf.pipe +++ b/llpc/test/shaderdb/general/PipelineCs_DebugPrintf.pipe @@ -26,7 +26,7 @@ userDataNode[0].next[0].binding = 6 ; CHECK-NEXT: .entry: ; CHECK-NEXT: [[TMP0:%.*]] = call <3 x i32> (...) @lgc.create.read.builtin.input.v3i32(i32 28, i32 0, i32 poison, i32 poison) ; CHECK-NEXT: [[__LLPC_INPUT_PROXY_GL_GLOBALINVOCATIONID_0_VEC_EXTRACT:%.*]] = extractelement <3 x i32> [[TMP0]], i64 0 -; CHECK-NEXT: call void (...) @lgc.debug.printf(ptr nonnull @[[GLOB0:[0-9]+]], i32 [[__LLPC_INPUT_PROXY_GL_GLOBALINVOCATIONID_0_VEC_EXTRACT]]) -; CHECK-NEXT: call void (...) @lgc.debug.printf(ptr nonnull @[[GLOB1:[0-9]+]], double 1.000000e+00, double 1.000000e+00) +; CHECK-NEXT: call void (...) @lgc.debug.printf(ptr nonnull [[GLOB0:@.*]], i32 [[__LLPC_INPUT_PROXY_GL_GLOBALINVOCATIONID_0_VEC_EXTRACT]]) +; CHECK-NEXT: call void (...) @lgc.debug.printf(ptr nonnull [[GLOB1:@.*]], double 1.000000e+00, double 1.000000e+00) ; CHECK-NEXT: ret void ; diff --git a/llpc/test/shaderdb/general/PipelineCs_LdsSpillLimitDwordsOption.pipe b/llpc/test/shaderdb/general/PipelineCs_LdsSpillLimitDwordsOption.pipe index 86a91d55be..04ea8b0b42 100644 --- a/llpc/test/shaderdb/general/PipelineCs_LdsSpillLimitDwordsOption.pipe +++ b/llpc/test/shaderdb/general/PipelineCs_LdsSpillLimitDwordsOption.pipe @@ -19,5 +19,7 @@ options.ldsSpillLimitDwords = 1024 ; CHECK-NEXT: ret void ; ;. -; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind memory(readwrite) "amdgpu-flat-work-group-size"="66,66" "amdgpu-lds-spill-limit-dwords"="1024" "amdgpu-memory-bound"="false" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-prealloc-sgpr-spill-vgprs" "amdgpu-unroll-threshold"="700" "amdgpu-wave-limiter"="false" "amdgpu-work-group-info-arg-no"="1" "denormal-fp-math-f32"="preserve-sign" "target-features"=",+wavefrontsize64,+cumode,+enable-flat-scratch" } +; CHECK: attributes #[[ATTR0:[0-9]+]] = { alwaysinline nounwind memory(readwrite) "amdgpu-flat-work-group-size"="66,66" "amdgpu-lds-spill-limit-dwords"="1024" "amdgpu-memory-bound"="false" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-prealloc-sgpr-spill-vgprs" "amdgpu-unroll-threshold"="700" "amdgpu-wave-limiter"="false" "amdgpu-work-group-info-arg-no"="1" "denormal-fp-math-f32"="preserve-sign" "target-features"=",+wavefrontsize64,+cumode,+enable-flat-scratch" } ;. +; CHECK: [[META0:![0-9]+]] = !{i32 2, i32 3, i32 11} +; CHECK: [[META1:![0-9]+]] = !{!"Vulkan"} \ No newline at end of file diff --git a/llpc/test/shaderdb/general/PipelineCs_MultipleRootInlineBuffer.pipe b/llpc/test/shaderdb/general/PipelineCs_MultipleRootInlineBuffer.pipe index 888650934e..20766fa0ab 100644 --- a/llpc/test/shaderdb/general/PipelineCs_MultipleRootInlineBuffer.pipe +++ b/llpc/test/shaderdb/general/PipelineCs_MultipleRootInlineBuffer.pipe @@ -14,20 +14,8 @@ ; SHADERTEST: [[buf_addr1:%[0-9]*]] = call ptr addrspace(4) @lgc.user.data(i32 40) ; SHADERTEST: [[buf1:%[0-9]*]] = ptrtoint ptr addrspace(4) [[buf_addr1]] to i64 -; Build the descriptor. The first two elements comes from the address of the buffer. -; SHADERTEST: [[buf1_vec:%[0-9]*]] = bitcast i64 [[buf1]] to <2 x i32> -; SHADERTEST: [[buf1_0:%[0-9]*]] = extractelement <2 x i32> [[buf1_vec]], i64 0 -; SHADERTEST: [[buf1_1:%[0-9]*]] = extractelement <2 x i32> [[buf1_vec]], i64 1 -; SHADERTEST: [[desc1_0:%[0-9]*]] = insertelement <4 x i32> poison, i32 [[buf1_0]], i64 0 -; SHADERTEST: [[buf1_2:%[0-9]*]] = and i32 [[buf1_1]], 65535 -; SHADERTEST: [[desc1_1:%[0-9]*]] = insertelement <4 x i32> [[desc1_0]], i32 [[buf1_2]], i64 1 - -; The rest of the descriptor is filled in with the literals. -; SHADERTEST: [[desc1_2:%[0-9]*]] = insertelement <4 x i32> [[desc1_1]], i32 -1, i64 2 -; SHADERTEST: [[desc1_3:%[0-9]*]] = insertelement <4 x i32> [[desc1_2]], i32 553734060, i64 3 - ; Get the "fat pointer" for the buffer -; SHADERTEST: call ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32> [[desc1_3]]) +; SHADERTEST: call ptr addrspace(7) @lgc.buffer.addr.to.ptr(i64 [[buf1]]) ; Get a pointer to the first inline buffer. Offset 4 comes from the user data nodes ; SHADERTEST: [[buf_addr0:%[0-9]*]] = call ptr addrspace(4) @lgc.user.data(i32 4) diff --git a/llpc/test/shaderdb/general/PipelineMesh_OutputPackingInLds.pipe b/llpc/test/shaderdb/general/PipelineMesh_OutputPackingInLds.pipe new file mode 100644 index 0000000000..c0f7a2fdba --- /dev/null +++ b/llpc/test/shaderdb/general/PipelineMesh_OutputPackingInLds.pipe @@ -0,0 +1,107 @@ +; This test is to check output packing of mesh shader in LDS space. If we treat each location +; of a mesh shader output as vec4, the LDS usage will exceed HW limitation. But if we pack +; those outputs tightly in LDS space, the LDS usage will be reduced greatly. +; +; In this test, we have 25 vertex outputs and 2 primitive outputs. Further, the mesh shader +; uses 2500 dwords shared variable. All consumes LDS space. If output packing in LDS space +; is not performed, the outputs will consume 4 * (25 + 2) * 128 = 13824 dwords. If packing +; is enabled, the LDS consumption will be reduced to (25 + 2) * 128 = 3456. The effect is +; noticeable. + +; BEGIN_SHADERTEST +; RUN: amdllpc -v -gfxip=10.3 %s | FileCheck -check-prefix=SHADERTEST %s + +; SHADERTEST-LABEL: // LLPC mesh shader LDS region info (in dwords) and general info + +; SHADERTEST-LABEL: Per-vertex Output : offset = 0x0083, size = 0x0C80 +; SHADERTEST-LABEL: Per-primitive Output : offset = 0x0D03, size = 0x0100 + +; SHADERTEST-LABEL: Vertex Outputs Layout (stride = 25, exports = 25): +; SHADERTEST-LABEL: -- location = 0, components = 1, offset = 0 +; SHADERTEST-LABEL: -- location = 1, components = 1, offset = 1 +; SHADERTEST-LABEL: -- location = 2, components = 1, offset = 2 +; SHADERTEST-LABEL: -- location = 3, components = 1, offset = 3 +; SHADERTEST-LABEL: -- location = 4, components = 1, offset = 4 +; SHADERTEST-LABEL: -- location = 5, components = 1, offset = 5 +; SHADERTEST-LABEL: -- location = 6, components = 1, offset = 6 +; SHADERTEST-LABEL: -- location = 7, components = 1, offset = 7 +; SHADERTEST-LABEL: -- location = 8, components = 1, offset = 8 +; SHADERTEST-LABEL: -- location = 9, components = 1, offset = 9 +; SHADERTEST-LABEL: -- location = 10, components = 1, offset = 10 +; SHADERTEST-LABEL: -- location = 11, components = 1, offset = 11 +; SHADERTEST-LABEL: -- location = 12, components = 1, offset = 12 +; SHADERTEST-LABEL: -- location = 13, components = 1, offset = 13 +; SHADERTEST-LABEL: -- location = 14, components = 1, offset = 14 +; SHADERTEST-LABEL: -- location = 15, components = 1, offset = 15 +; SHADERTEST-LABEL: -- location = 16, components = 1, offset = 16 +; SHADERTEST-LABEL: -- location = 17, components = 1, offset = 17 +; SHADERTEST-LABEL: -- location = 18, components = 1, offset = 18 +; SHADERTEST-LABEL: -- location = 19, components = 1, offset = 19 +; SHADERTEST-LABEL: -- location = 20, components = 1, offset = 20 +; SHADERTEST-LABEL: -- location = 21, components = 1, offset = 21 +; SHADERTEST-LABEL: -- location = 22, components = 1, offset = 22 +; SHADERTEST-LABEL: -- location = 23, components = 1, offset = 23 +; SHADERTEST-LABEL: -- location = 24, components = 1, offset = 24 + +; SHADERTEST-LABEL: Primitive outputs layout (stride = 2, exports = 2): +; SHADERTEST-LABEL: -- location = 0, components = 1, offset = 0 +; SHADERTEST-LABEL: -- location = 1, components = 1, offset = 1 + +; SHADERTEST: AMDLLPC SUCCESS +; END_SHADERTEST + +[MeshGlsl] +#version 460 core + +#extension GL_EXT_mesh_shader: enable +#extension GL_EXT_shader_explicit_arithmetic_types: enable + +layout(local_size_x=128, local_size_y=1, local_size_z=1) in; +layout(points, max_vertices = 128, max_primitives = 128) out; + +layout(location = 0) out float vertex[][25]; + +layout(location = 25) out perprimitiveEXT float primitive[][2]; + +shared float sharedVar[2500]; + +void main() { + SetMeshOutputsEXT(128, 128); + + for (int i = 0; i < 25; i++) + vertex[gl_LocalInvocationIndex][i] = float(i / 25.0); + + primitive[gl_LocalInvocationIndex][0] = 0.0; + primitive[gl_LocalInvocationIndex][1] = 0.5; + + sharedVar[gl_LocalInvocationIndex] = float(gl_LocalInvocationIndex); +} + +[MeshInfo] +entryPoint = main + +[FsGlsl] +#version 460 core + +#extension GL_EXT_mesh_shader: enable + +layout(location = 0) in float vertex[25]; +layout(location = 25) in perprimitiveEXT float primitive[2]; + +layout(location = 0) out vec4 outColor; + +void main() { + outColor = vec4(0.0); + + for (int i = 0; i < 25; i++) + outColor.x += vertex[i]; + + outColor.y += primitive[0]; + outColor.y += primitive[1]; +} + +[FsInfo] +entryPoint = main + +[GraphicsPipelineState] +patchControlPoints = 3 diff --git a/llpc/test/shaderdb/general/PipelineTess_TestInOutPacking.pipe b/llpc/test/shaderdb/general/PipelineTess_TestInOutPacking.pipe index ab1bb64817..02aa21c359 100644 --- a/llpc/test/shaderdb/general/PipelineTess_TestInOutPacking.pipe +++ b/llpc/test/shaderdb/general/PipelineTess_TestInOutPacking.pipe @@ -3,11 +3,11 @@ ; SHADERTEST_PP0-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST_PP0: [[VERTEX_BASE:%[0-9a-zA-Z.]+]] = mul i32 %{{[0-9]*}}, 48 -; SHADERTEST_PP0: [[P0:%[0-9a-zA-Z.]+]] = getelementptr i32, ptr addrspace(3) {{.*}}, i32 [[VERTEX_BASE]] -; SHADERTEST_PP0: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(44|176)}} -; SHADERTEST_PP0: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(45|180)}} -; SHADERTEST_PP0: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(46|184)}} -; SHADERTEST_PP0: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(47|188)}} +; SHADERTEST_PP0: [[P0:%[0-9a-zA-Z.]+]] = getelementptr {{i8|i32}}, ptr addrspace(3) {{.*}}, i32 [[VERTEX_BASE]] +; SHADERTEST_PP0: %{{[0-9]*}} = getelementptr {{i8|i32}}, ptr addrspace(3) [[P0]], i32 {{(44|176)}} +; SHADERTEST_PP0: %{{[0-9]*}} = getelementptr {{i8|i32}}, ptr addrspace(3) [[P0]], i32 {{(45|180)}} +; SHADERTEST_PP0: %{{[0-9]*}} = getelementptr {{i8|i32}}, ptr addrspace(3) [[P0]], i32 {{(46|184)}} +; SHADERTEST_PP0: %{{[0-9]*}} = getelementptr {{i8|i32}}, ptr addrspace(3) [[P0]], i32 {{(47|188)}} ; SHADERTEST_PP0: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 1 ; SHADERTEST_PP0: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 4 ; SHADERTEST_PP0: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 5 @@ -15,29 +15,26 @@ ; SHADERTEST_PP0: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 9 ; SHADERTEST_PP0: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 10 ; SHADERTEST_PP0: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 12 -; SHADERTEST_PP0: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(16|64)}} -; SHADERTEST_PP0: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(20|80)}} -; SHADERTEST_PP0: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(24|96)}} -; SHADERTEST_PP0: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(28|112)}} -; SHADERTEST_PP0: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(29|116)}} -; SHADERTEST_PP0: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(30|120)}} -; SHADERTEST_PP0: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(31|124)}} -; SHADERTEST_PP0: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(32|128)}} -; SHADERTEST_PP0: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(33|132)}} -; SHADERTEST_PP0: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(36|144)}} -; SHADERTEST_PP0: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(37|148)}} -; SHADERTEST_PP0: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(38|152)}} -; SHADERTEST_PP0: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(39|156)}} -; SHADERTEST_PP0: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(40|160)}} -; SHADERTEST_PP0: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(41|164)}} +; SHADERTEST_PP0: %{{[0-9]*}} = getelementptr {{i8|i32}}, ptr addrspace(3) [[P0]], i32 {{(28|112)}} +; SHADERTEST_PP0: %{{[0-9]*}} = getelementptr {{i8|i32}}, ptr addrspace(3) [[P0]], i32 {{(29|116)}} +; SHADERTEST_PP0: %{{[0-9]*}} = getelementptr {{i8|i32}}, ptr addrspace(3) [[P0]], i32 {{(30|120)}} +; SHADERTEST_PP0: %{{[0-9]*}} = getelementptr {{i8|i32}}, ptr addrspace(3) [[P0]], i32 {{(31|124)}} +; SHADERTEST_PP0: %{{[0-9]*}} = getelementptr {{i8|i32}}, ptr addrspace(3) [[P0]], i32 {{(32|128)}} +; SHADERTEST_PP0: %{{[0-9]*}} = getelementptr {{i8|i32}}, ptr addrspace(3) [[P0]], i32 {{(33|132)}} +; SHADERTEST_PP0: %{{[0-9]*}} = getelementptr {{i8|i32}}, ptr addrspace(3) [[P0]], i32 {{(36|144)}} +; SHADERTEST_PP0: %{{[0-9]*}} = getelementptr {{i8|i32}}, ptr addrspace(3) [[P0]], i32 {{(37|148)}} +; SHADERTEST_PP0: %{{[0-9]*}} = getelementptr {{i8|i32}}, ptr addrspace(3) [[P0]], i32 {{(38|152)}} +; SHADERTEST_PP0: %{{[0-9]*}} = getelementptr {{i8|i32}}, ptr addrspace(3) [[P0]], i32 {{(39|156)}} +; SHADERTEST_PP0: %{{[0-9]*}} = getelementptr {{i8|i32}}, ptr addrspace(3) [[P0]], i32 {{(40|160)}} +; SHADERTEST_PP0: %{{[0-9]*}} = getelementptr {{i8|i32}}, ptr addrspace(3) [[P0]], i32 {{(41|164)}} ; SHADERTEST_PP0: call void @llvm.amdgcn.exp.f32(i32 {{.*}}32, i32 {{.*}}15, float %{{[^,]*}}, float %{{[^,]*}}, float %{{[^,]*}}, float %{{[^,]*}}, i1 {{.*}}false, i1 {{.*}}false) ; SHADERTEST_PP0: call void @llvm.amdgcn.exp.f32(i32 {{.*}}33, i32 {{.*}}3, float %{{[^,]*}}, float %{{[^,]*}}, float poison, float poison, i1 {{.*}}false, i1 {{.*}}false) -; SHADERTEST_PP0: call float @llvm.amdgcn.interp.p1(float %{{[^,]*}}, i32 immarg 1, i32 immarg 1, i32 %PrimMask) -; SHADERTEST_PP0: call float @llvm.amdgcn.interp.p1(float %{{[^,]*}}, i32 immarg 2, i32 immarg 0, i32 %PrimMask) -; SHADERTEST_PP0: call float @llvm.amdgcn.interp.p1(float %{{[^,]*}}, i32 immarg 3, i32 immarg 0, i32 %PrimMask) -; SHADERTEST_PP0: call float @llvm.amdgcn.interp.p1(float %{{[^,]*}}, i32 immarg 0, i32 immarg 1, i32 %PrimMask) -; SHADERTEST_PP0: call float @llvm.amdgcn.interp.p1(float %{{[^,]*}}, i32 immarg 0, i32 immarg 0, i32 %PrimMask) -; SHADERTEST_PP0: call float @llvm.amdgcn.interp.p1(float %{{[^,]*}}, i32 immarg 1, i32 immarg 0, i32 %PrimMask) +; SHADERTEST_PP0: call float @llvm.amdgcn.interp.p1(float %{{[^,]*}}, i32 1, i32 1, i32 %PrimMask) +; SHADERTEST_PP0: call float @llvm.amdgcn.interp.p1(float %{{[^,]*}}, i32 2, i32 0, i32 %PrimMask) +; SHADERTEST_PP0: call float @llvm.amdgcn.interp.p1(float %{{[^,]*}}, i32 3, i32 0, i32 %PrimMask) +; SHADERTEST_PP0: call float @llvm.amdgcn.interp.p1(float %{{[^,]*}}, i32 0, i32 1, i32 %PrimMask) +; SHADERTEST_PP0: call float @llvm.amdgcn.interp.p1(float %{{[^,]*}}, i32 0, i32 0, i32 %PrimMask) +; SHADERTEST_PP0: call float @llvm.amdgcn.interp.p1(float %{{[^,]*}}, i32 1, i32 0, i32 %PrimMask) ; SHADERTEST_PP0: AMDLLPC SUCCESS ; END_SHADERTEST @@ -45,20 +42,20 @@ ; RUN: amdllpc -enable-part-pipeline=1 -v %gfxip %s | FileCheck -check-prefix=SHADERTEST_PP1 %s ; Fragment shader part-pipeline: ; SHADERTEST_PP1-LABEL: {{^// LLPC}} pipeline patching results -; SHADERTEST_PP1: call float @llvm.amdgcn.interp.p1(float %{{[^,]*}}, i32 immarg 1, i32 immarg 1, i32 %PrimMask) -; SHADERTEST_PP1: call float @llvm.amdgcn.interp.p1(float %{{[^,]*}}, i32 immarg 2, i32 immarg 0, i32 %PrimMask) -; SHADERTEST_PP1: call float @llvm.amdgcn.interp.p1(float %{{[^,]*}}, i32 immarg 3, i32 immarg 0, i32 %PrimMask) -; SHADERTEST_PP1: call float @llvm.amdgcn.interp.p1(float %{{[^,]*}}, i32 immarg 0, i32 immarg 1, i32 %PrimMask) -; SHADERTEST_PP1: call float @llvm.amdgcn.interp.p1(float %{{[^,]*}}, i32 immarg 0, i32 immarg 0, i32 %PrimMask) -; SHADERTEST_PP1: call float @llvm.amdgcn.interp.p1(float %{{[^,]*}}, i32 immarg 1, i32 immarg 0, i32 %PrimMask) +; SHADERTEST_PP1: call float @llvm.amdgcn.interp.p1(float %{{[^,]*}}, i32 1, i32 1, i32 %PrimMask) +; SHADERTEST_PP1: call float @llvm.amdgcn.interp.p1(float %{{[^,]*}}, i32 2, i32 0, i32 %PrimMask) +; SHADERTEST_PP1: call float @llvm.amdgcn.interp.p1(float %{{[^,]*}}, i32 3, i32 0, i32 %PrimMask) +; SHADERTEST_PP1: call float @llvm.amdgcn.interp.p1(float %{{[^,]*}}, i32 0, i32 1, i32 %PrimMask) +; SHADERTEST_PP1: call float @llvm.amdgcn.interp.p1(float %{{[^,]*}}, i32 0, i32 0, i32 %PrimMask) +; SHADERTEST_PP1: call float @llvm.amdgcn.interp.p1(float %{{[^,]*}}, i32 1, i32 0, i32 %PrimMask) ; Pre-rasterization part-pipeline: ; SHADERTEST_PP1-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST_PP1: [[VERTEX_BASE:%[0-9a-zA-Z.]+]] = mul i32 %{{[0-9]*}}, 48 ; SHADERTEST_PP1: [[P0:%[0-9a-zA-Z.]+]] = getelementptr i32, ptr addrspace(3) {{.*}}, i32 [[VERTEX_BASE]] -; SHADERTEST_PP1: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(44|176)}} -; SHADERTEST_PP1: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(45|180)}} -; SHADERTEST_PP1: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(46|184)}} -; SHADERTEST_PP1: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(47|188)}} +; SHADERTEST_PP1: %{{[0-9]*}} = getelementptr {{i8|i32}}, ptr addrspace(3) [[P0]], i32 {{(44|176)}} +; SHADERTEST_PP1: %{{[0-9]*}} = getelementptr {{i8|i32}}, ptr addrspace(3) [[P0]], i32 {{(45|180)}} +; SHADERTEST_PP1: %{{[0-9]*}} = getelementptr {{i8|i32}}, ptr addrspace(3) [[P0]], i32 {{(46|184)}} +; SHADERTEST_PP1: %{{[0-9]*}} = getelementptr {{i8|i32}}, ptr addrspace(3) [[P0]], i32 {{(47|188)}} ; SHADERTEST_PP1: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 1 ; SHADERTEST_PP1: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 4 ; SHADERTEST_PP1: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 5 @@ -66,21 +63,18 @@ ; SHADERTEST_PP1: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 9 ; SHADERTEST_PP1: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 10 ; SHADERTEST_PP1: %{{[0-9]*}} = {{add|or}} {{.*}}i32 [[VERTEX_BASE]], 12 -; SHADERTEST_PP1: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(16|64)}} -; SHADERTEST_PP1: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(20|80)}} -; SHADERTEST_PP1: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(24|96)}} -; SHADERTEST_PP1: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(28|112)}} -; SHADERTEST_PP1: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(29|116)}} -; SHADERTEST_PP1: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(30|120)}} -; SHADERTEST_PP1: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(31|124)}} -; SHADERTEST_PP1: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(32|128)}} -; SHADERTEST_PP1: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(33|132)}} -; SHADERTEST_PP1: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(36|144)}} -; SHADERTEST_PP1: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(37|148)}} -; SHADERTEST_PP1: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(38|152)}} -; SHADERTEST_PP1: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(39|156)}} -; SHADERTEST_PP1: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(40|160)}} -; SHADERTEST_PP1: %{{[0-9]*}} = getelementptr {{(i8|i32)}}, ptr addrspace(3) [[P0]], i32 {{(41|164)}} +; SHADERTEST_PP1: %{{[0-9]*}} = getelementptr {{i8|i32}}, ptr addrspace(3) [[P0]], i32 {{(28|112)}} +; SHADERTEST_PP1: %{{[0-9]*}} = getelementptr {{i8|i32}}, ptr addrspace(3) [[P0]], i32 {{(29|116)}} +; SHADERTEST_PP1: %{{[0-9]*}} = getelementptr {{i8|i32}}, ptr addrspace(3) [[P0]], i32 {{(30|120)}} +; SHADERTEST_PP1: %{{[0-9]*}} = getelementptr {{i8|i32}}, ptr addrspace(3) [[P0]], i32 {{(31|124)}} +; SHADERTEST_PP1: %{{[0-9]*}} = getelementptr {{i8|i32}}, ptr addrspace(3) [[P0]], i32 {{(32|128)}} +; SHADERTEST_PP1: %{{[0-9]*}} = getelementptr {{i8|i32}}, ptr addrspace(3) [[P0]], i32 {{(33|132)}} +; SHADERTEST_PP1: %{{[0-9]*}} = getelementptr {{i8|i32}}, ptr addrspace(3) [[P0]], i32 {{(36|144)}} +; SHADERTEST_PP1: %{{[0-9]*}} = getelementptr {{i8|i32}}, ptr addrspace(3) [[P0]], i32 {{(37|148)}} +; SHADERTEST_PP1: %{{[0-9]*}} = getelementptr {{i8|i32}}, ptr addrspace(3) [[P0]], i32 {{(38|152)}} +; SHADERTEST_PP1: %{{[0-9]*}} = getelementptr {{i8|i32}}, ptr addrspace(3) [[P0]], i32 {{(39|156)}} +; SHADERTEST_PP1: %{{[0-9]*}} = getelementptr {{i8|i32}}, ptr addrspace(3) [[P0]], i32 {{(40|160)}} +; SHADERTEST_PP1: %{{[0-9]*}} = getelementptr {{i8|i32}}, ptr addrspace(3) [[P0]], i32 {{(41|164)}} ; SHADERTEST_PP1: call void @llvm.amdgcn.exp.f32(i32 {{.*}}32, i32 {{.*}}15, float %{{[^,]*}}, float %{{[^,]*}}, float %{{[^,]*}}, float %{{[^,]*}}, i1 {{.*}}false, i1 {{.*}}false) ; SHADERTEST_PP1: call void @llvm.amdgcn.exp.f32(i32 {{.*}}33, i32 {{.*}}3, float %{{[^,]*}}, float %{{[^,]*}}, float poison, float poison, i1 {{.*}}false, i1 {{.*}}false) ; SHADERTEST_PP1: AMDLLPC SUCCESS diff --git a/llpc/test/shaderdb/general/PipelineTess_XfbWithManyComponents.pipe b/llpc/test/shaderdb/general/PipelineTess_XfbWithManyComponents.pipe index 75155ed03d..468e4376e0 100644 --- a/llpc/test/shaderdb/general/PipelineTess_XfbWithManyComponents.pipe +++ b/llpc/test/shaderdb/general/PipelineTess_XfbWithManyComponents.pipe @@ -10,7 +10,7 @@ ; SHADERTEST-LABEL: .fetchXfbOutput ; Write v4[31] = 4.0 -> LDS -; SHADERTEST: [[ldsPtr1:%[0-9]*]] = getelementptr i32, ptr addrspace(3) @Lds.GS, i32 %{{[0-9]*}} +; SHADERTEST: [[ldsPtr1:%[0-9]*]] = getelementptr {{i8|i32}}, ptr addrspace(3) @Lds.GS, i32 %{{[0-9]*}} ; SHADERTEST-NEXT: store i32 1082130432, ptr addrspace(3) [[ldsPtr1]], align 4 ; Write v3[31] = 3.0 -> LDS @@ -27,7 +27,7 @@ ; SHADERTEST-LABEL: .exportXfbOutput ; Read v4[31] <- LDS -; SHADERTEST: [[ldsPtr1:%[0-9]*]] = getelementptr i32, ptr addrspace(3) @Lds.GS, i32 %{{[0-9]*}} +; SHADERTEST: [[ldsPtr1:%[0-9]*]] = getelementptr {{i8|i32}}, ptr addrspace(3) @Lds.GS, i32 %{{[0-9]*}} ; SHADERTEST-NEXT: [[v4:%[0-9]*]] = load float, ptr addrspace(3) [[ldsPtr1]], align 4 ; SHADERTEST-NEXT: [[offset1:%[0-9]*]] = mul i32 %threadIdInSubgroup, 1536 ; SHADERTEST-NEXT: [[offset2:%[0-9]*]] = or disjoint i32 [[offset1]], 508 diff --git a/llpc/test/shaderdb/general/PipelineVsFs_DynamicSampleInfo.pipe b/llpc/test/shaderdb/general/PipelineVsFs_DynamicSampleInfo.pipe index aa08b8a473..425456fb11 100644 --- a/llpc/test/shaderdb/general/PipelineVsFs_DynamicSampleInfo.pipe +++ b/llpc/test/shaderdb/general/PipelineVsFs_DynamicSampleInfo.pipe @@ -244,6 +244,6 @@ attribute[1].offset = 16 ; SHADERTEST-NEXT: [[TMP91:%.*]] = extractelement <4 x float> [[TMP87]], i32 3 ; SHADERTEST-NEXT: [[TMP92:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float [[TMP88]], float [[TMP89]]) ; SHADERTEST-NEXT: [[TMP93:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float [[TMP90]], float [[TMP91]]) -; SHADERTEST-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 immarg 0, i32 immarg 15, <2 x half> [[TMP92]], <2 x half> [[TMP93]], i1 immarg true, i1 immarg true) #[[ATTR10:[0-9]+]] +; SHADERTEST-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> [[TMP92]], <2 x half> [[TMP93]], i1 true, i1 true) ; SHADERTEST-NEXT: ret void ; diff --git a/llpc/test/shaderdb/general/PipelineVsFs_PixelShaderSamplesZero.pipe b/llpc/test/shaderdb/general/PipelineVsFs_PixelShaderSamplesZero.pipe index 5384068efa..8317292c12 100644 --- a/llpc/test/shaderdb/general/PipelineVsFs_PixelShaderSamplesZero.pipe +++ b/llpc/test/shaderdb/general/PipelineVsFs_PixelShaderSamplesZero.pipe @@ -3,8 +3,8 @@ ; SHADERTEST-LABEL: define dllexport amdgpu_ps void @_amdgpu_ps_main( ; SHADERTEST: %[[PerspInterpSample:[^,]*]] = extractelement <2 x float> %PerspInterpSample, i64 1 ; SHADERTEST: %[[PerspInterpSample:[^,]*]] = extractelement <2 x float> %PerspInterpSample, i64 0 -; SHADERTEST: call float @llvm.amdgcn.interp.p1(float %{{[^,]*}}, i32 immarg 0, i32 immarg 0, i32 %PrimMask) -; SHADERTEST: call float @llvm.amdgcn.interp.p2(float %{{[^,]*}}, float %{{[^,]*}}, i32 immarg 0, i32 immarg 0, i32 %PrimMask) +; SHADERTEST: call float @llvm.amdgcn.interp.p1(float %{{[^,]*}}, i32 0, i32 0, i32 %PrimMask) +; SHADERTEST: call float @llvm.amdgcn.interp.p2(float %{{[^,]*}}, float %{{[^,]*}}, i32 0, i32 0, i32 %PrimMask) ; SHADERTEST: AMDLLPC SUCCESS ; END_SHADERTEST diff --git a/llpc/test/shaderdb/general/PipelineVsFs_TestIgnoreDynamicDualSourceBlendEnable.pipe b/llpc/test/shaderdb/general/PipelineVsFs_TestIgnoreDynamicDualSourceBlendEnable.pipe index adfd99480d..24db0f94cf 100644 --- a/llpc/test/shaderdb/general/PipelineVsFs_TestIgnoreDynamicDualSourceBlendEnable.pipe +++ b/llpc/test/shaderdb/general/PipelineVsFs_TestIgnoreDynamicDualSourceBlendEnable.pipe @@ -2,9 +2,9 @@ ; BEGIN_SHADERTEST ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results -; SHADERTEST: call void @llvm.amdgcn.exp.f32(i32 immarg 0 -; SHADERTEST-NEXT: call void @llvm.amdgcn.exp.f32(i32 immarg 1 -; SHADERTEST-NEXT: call void @llvm.amdgcn.exp.f32(i32 immarg 2 +; SHADERTEST: call void @llvm.amdgcn.exp.f32(i32 0 +; SHADERTEST-NEXT: call void @llvm.amdgcn.exp.f32(i32 1 +; SHADERTEST-NEXT: call void @llvm.amdgcn.exp.f32(i32 2 ; END_SHADERTEST [VsGlsl] diff --git a/llpc/test/shaderdb/general/PipelineVsFs_TestIndirectResourceLayout.pipe b/llpc/test/shaderdb/general/PipelineVsFs_TestIndirectResourceLayout.pipe index 71dc89747d..129666d422 100644 --- a/llpc/test/shaderdb/general/PipelineVsFs_TestIndirectResourceLayout.pipe +++ b/llpc/test/shaderdb/general/PipelineVsFs_TestIndirectResourceLayout.pipe @@ -27,7 +27,7 @@ ; SHADERTEST: [[Value0:%[.a-zA-Z0-9]+]] = extractelement <4 x float> [[Value]], i64 0 ; SHADERTEST: [[Color0:%[0-9]*]] = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float [[Value0]], float [[Value1]]) ; SHADERTEST: [[Color1:%[0-9]*]] = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float [[Value2]], float [[Value3]]) -; SHADERTEST: call void @llvm.amdgcn.exp.compr.v2f16(i32 immarg 0, i32 immarg 15, <2 x half> [[Color0]], <2 x half> [[Color1]], i1 immarg true, i1 immarg true) +; SHADERTEST: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> [[Color0]], <2 x half> [[Color1]], i1 true, i1 true) ; SHADERTEST: AMDLLPC SUCCESS ; END_SHADERTEST diff --git a/llpc/test/shaderdb/general/PipelineVsFs_TestNullFs.pipe b/llpc/test/shaderdb/general/PipelineVsFs_TestNullFs.pipe index 65b4eb5fe6..c1c9296094 100644 --- a/llpc/test/shaderdb/general/PipelineVsFs_TestNullFs.pipe +++ b/llpc/test/shaderdb/general/PipelineVsFs_TestNullFs.pipe @@ -51,6 +51,7 @@ colorBuffer[0].blendSrcAlphaToColor = 0 ; CHECK-NEXT: .kill_enable: false ; CHECK-NEXT: .mask_export_enable: false ; CHECK-NEXT: .pre_shader_depth_coverage_enable: 0 +; CHECK-NEXT: .primitive_ordered_pixel_shader: false ; CHECK-NEXT: .stencil_test_val_export_enable: 0 ; CHECK-NEXT: .z_export_enable: 0 ; CHECK-NEXT: .z_order: 0x1 diff --git a/llpc/test/shaderdb/general/TestNumComponentsWithReversedAccessOrder.mesh b/llpc/test/shaderdb/general/TestNumComponentsWithReversedAccessOrder.mesh new file mode 100644 index 0000000000..bbcacc3da0 --- /dev/null +++ b/llpc/test/shaderdb/general/TestNumComponentsWithReversedAccessOrder.mesh @@ -0,0 +1,40 @@ +// This test is to verify we calculate correct component counts for the outputs when the component +// accessing is from large component indices to small ones. Make sure we use the max value to finally +// determine the correct component count for an output. + +// BEGIN_SHADERTEST +// RUN: amdllpc -v gfxip=10.3 %s | FileCheck --check-prefix=SHADERTEST %s + +// SHADERTEST-LABEL: // LLPC mesh shader LDS region info (in dwords) and general info + +// SHADERTEST-LABEL: Vertex Outputs Layout (stride = 4, exports = 1): +// SHADERTEST-LABEL: -- location = 0, components = 4, offset = 0, export = 0 + +// SHADERTEST-LABEL: Primitive outputs layout (stride = 3, exports = 1): +// SHADERTEST-LABEL: -- location = 1, components = 3, offset = 0, export = 1 + +// SHADERTEST: AMDLLPC SUCCESS +// END_SHADERTEST + +#version 460 core + +#extension GL_EXT_mesh_shader: enable + +layout(local_size_x=1, local_size_y=1, local_size_z=1) in; +layout(max_vertices = 1, max_primitives = 1, points) out; + +layout(location = 0) out vec4 data1[]; +layout(location = 1) perprimitiveEXT out vec3 data2[]; + +void main() { + SetMeshOutputsEXT(1, 1); + + data1[gl_LocalInvocationIndex].w = 0.4; + data1[gl_LocalInvocationIndex].z = 0.3; + data1[gl_LocalInvocationIndex].y = 0.2; + data1[gl_LocalInvocationIndex].x = 0.1; + + data2[gl_LocalInvocationIndex].z = -0.3; + data2[gl_LocalInvocationIndex].y = -0.2; + data2[gl_LocalInvocationIndex].x = -0.1; +} diff --git a/llpc/test/shaderdb/general/TestWorkgroupIdOpt.comp b/llpc/test/shaderdb/general/TestWorkgroupIdOpt.comp index 5c2c7ee99b..4a05fb20cb 100644 --- a/llpc/test/shaderdb/general/TestWorkgroupIdOpt.comp +++ b/llpc/test/shaderdb/general/TestWorkgroupIdOpt.comp @@ -16,20 +16,25 @@ void main() test = gl_WorkGroupID.x; } // CHECK-LABEL: define {{[^@]+}}@_amdgpu_cs_main -// CHECK-SAME: (i32 inreg noundef [[GLOBALTABLE:%.*]], i32 inreg noundef [[USERDATA0:%.*]], i32 inreg noundef [[WORKGROUPID1:%.*]], i32 inreg noundef [[MULTIDISPATCHINFO:%.*]], <3 x i32> noundef [[LOCALINVOCATIONID:%.*]]) #[[ATTR0:[0-9]+]] !spirv.ExecutionModel !{{.*}} !lgc.shaderstage !{{.*}} { +// CHECK-SAME: (i32 inreg noundef [[GLOBALTABLE:%.*]], i32 inreg noundef [[USERDATA0:%.*]], i32 inreg noundef [[WORKGROUPID1:%.*]], i32 inreg noundef [[MULTIDISPATCHINFO:%.*]], <3 x i32> noundef [[LOCALINVOCATIONID:%.*]]) #[[ATTR0:[0-9]+]] !spirv.ExecutionModel !5 !lgc.shaderstage !6 { // CHECK-NEXT: .entry: // CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() // CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], -4294967296 // CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[USERDATA0]] to i64 -// CHECK-NEXT: [[TMP3:%.*]] = or {{(disjoint )?}}i64 [[TMP1]], [[TMP2]] +// CHECK-NEXT: [[TMP3:%.*]] = or disjoint i64 [[TMP1]], [[TMP2]] // CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(4) +// CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr addrspace(4) [[TMP4]], i32 4), "dereferenceable"(ptr addrspace(4) [[TMP4]], i32 -1) ] // CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP4]], align 16 // CHECK-NEXT: call void @llvm.amdgcn.raw.buffer.store.i32(i32 [[WORKGROUPID1]], <4 x i32> [[TMP5]], i32 0, i32 0, i32 0) // CHECK-NEXT: ret void // //. -// CHECK: attributes #[[ATTR0]] = { nounwind memory(readwrite) "amdgpu-flat-work-group-size"="256,256" "amdgpu-memory-bound"="false" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-prealloc-sgpr-spill-vgprs" "amdgpu-unroll-threshold"="700" "amdgpu-wave-limiter"="false" "amdgpu-work-group-info-arg-no"="3" "denormal-fp-math-f32"="preserve-sign" "target-features"=",+wavefrontsize64,+cumode,+enable-flat-scratch" } +// CHECK: attributes #[[ATTR0]] = { alwaysinline nounwind memory(readwrite) "amdgpu-flat-work-group-size"="256,256" "amdgpu-memory-bound"="false" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-prealloc-sgpr-spill-vgprs" "amdgpu-unroll-threshold"="700" "amdgpu-wave-limiter"="false" "amdgpu-work-group-info-arg-no"="3" "denormal-fp-math-f32"="preserve-sign" "target-features"=",+wavefrontsize64,+cumode,+enable-flat-scratch" } // CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } -// CHECK: attributes #[[ATTR2:[0-9]+]] = { nounwind willreturn memory(none) } -// CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(write) } +// CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) } +// CHECK: attributes #[[ATTR3:[0-9]+]] = { nounwind willreturn memory(none) } +// CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(write) } //. +// CHECK: [[META0:![0-9]+]] = !{i32 16, i32 16, i32 1} +// CHECK: [[META1:![0-9]+]] = !{!"Vulkan"} +// CHECK: [[META2:![0-9]+]] = !{i32 1} diff --git a/llpc/test/shaderdb/gfx10/PipelineVsFs_TestVsOutMiscSideBusEna.pipe b/llpc/test/shaderdb/gfx10/PipelineVsFs_TestVsOutMiscSideBusEna.pipe index 4e89bd1ffd..4962809bbb 100644 --- a/llpc/test/shaderdb/gfx10/PipelineVsFs_TestVsOutMiscSideBusEna.pipe +++ b/llpc/test/shaderdb/gfx10/PipelineVsFs_TestVsOutMiscSideBusEna.pipe @@ -56,6 +56,7 @@ entryPoint = main ; SHADERTEST-NEXT: .kill_enable: false ; SHADERTEST-NEXT: .mask_export_enable: false ; SHADERTEST-NEXT: .pre_shader_depth_coverage_enable: 0 +; SHADERTEST-NEXT: .primitive_ordered_pixel_shader: false ; SHADERTEST-NEXT: .stencil_test_val_export_enable: 0 ; SHADERTEST-NEXT: .z_export_enable: 0 ; SHADERTEST-NEXT: .z_order: 0x1 diff --git a/llpc/test/shaderdb/gfx11/FlatParamDpp.frag b/llpc/test/shaderdb/gfx11/FlatParamDpp.frag index 84c8a4023e..19d2d24c67 100644 --- a/llpc/test/shaderdb/gfx11/FlatParamDpp.frag +++ b/llpc/test/shaderdb/gfx11/FlatParamDpp.frag @@ -5,7 +5,7 @@ // // CHECK-LABEL: {{^}}// LLPC pipeline patching results // CHECK: call void @llvm.amdgcn.kill(i1 false) -// CHECK: [[P0:%.*]] = call float @llvm.amdgcn.lds.param.load(i32 immarg 2, i32 immarg 2, i32 %PrimMask) +// CHECK: [[P0:%.*]] = call float @llvm.amdgcn.lds.param.load(i32 2, i32 2, i32 %PrimMask) // CHECK: [[P1:%.*]] = bitcast float [[P0]] to i32 // CHECK: [[P2:%.*]] = call i32 @llvm.amdgcn.mov.dpp.i32(i32 [[P1]], i32 0, i32 15, i32 15, i1 true) // CHECK: [[P3:%.*]] = call i32 @llvm.amdgcn.strict.wqm.i32(i32 [[P2]]) diff --git a/llpc/test/shaderdb/gfx11/HalfAttribute.frag b/llpc/test/shaderdb/gfx11/HalfAttribute.frag index 1f34547b47..69c9dfbadd 100644 --- a/llpc/test/shaderdb/gfx11/HalfAttribute.frag +++ b/llpc/test/shaderdb/gfx11/HalfAttribute.frag @@ -4,7 +4,7 @@ // RUN: FileCheck %s --check-prefix=CHECK // // CHECK-LABEL: {{^}}// LLPC pipeline patching results -// CHECK: [[P:%.*]] = call float @llvm.amdgcn.lds.param.load(i32 immarg 0, i32 immarg 0, i32 %PrimMask) #1 +// CHECK: [[P:%.*]] = call float @llvm.amdgcn.lds.param.load(i32 0, i32 0, i32 %PrimMask) // CHECK: [[P1:%.*]] = call float @llvm.amdgcn.interp.p10.rtz.f16(float [[P]], float %PerspInterpCenter.i0, float [[P]], i1 false) // CHECK: [[P2:%.*]] = call half @llvm.amdgcn.interp.p2.rtz.f16(float [[P]], float %PerspInterpCenter.i1, float [[P1]], i1 false) // CHECK-LABEL: {{^}}===== AMDLLPC SUCCESS ===== diff --git a/llpc/test/shaderdb/gfx11/SgprUserDataInit_Fs.pipe b/llpc/test/shaderdb/gfx11/SgprUserDataInit_Fs.pipe index 32b699566e..d75fa4f7bd 100644 --- a/llpc/test/shaderdb/gfx11/SgprUserDataInit_Fs.pipe +++ b/llpc/test/shaderdb/gfx11/SgprUserDataInit_Fs.pipe @@ -168,6 +168,7 @@ colorBuffer[0].blendSrcAlphaToColor = 0 ; CHECK-NEXT: .kill_enable: false ; CHECK-NEXT: .mask_export_enable: false ; CHECK-NEXT: .pre_shader_depth_coverage_enable: 0 +; CHECK-NEXT: .primitive_ordered_pixel_shader: false ; CHECK-NEXT: .stencil_test_val_export_enable: 0 ; CHECK-NEXT: .z_export_enable: 0 ; CHECK-NEXT: .z_order: 0x1 @@ -359,7 +360,7 @@ colorBuffer[0].blendSrcAlphaToColor = 0 ; CHECK-NEXT: .user_data_reg_map: ; CHECK-NEXT: - 0x10000000 ; CHECK-NEXT: - 0x12 -; CHECK-NEXT: - 0x10000023 +; CHECK-NEXT: - 0xffffffff ; CHECK-NEXT: - 0xffffffff ; CHECK-NEXT: - 0xffffffff ; CHECK-NEXT: - 0xffffffff diff --git a/llpc/test/shaderdb/graphics_library/PipelineVsFs_TestGraphicsLibrary.pipe b/llpc/test/shaderdb/graphics_library/PipelineVsFs_TestGraphicsLibrary.pipe index 584652edee..323b350d11 100644 --- a/llpc/test/shaderdb/graphics_library/PipelineVsFs_TestGraphicsLibrary.pipe +++ b/llpc/test/shaderdb/graphics_library/PipelineVsFs_TestGraphicsLibrary.pipe @@ -16,7 +16,7 @@ colorExport=PipelineLibCes_TestColorExport.pipe ; SHADERTEST-NEXT: [[TMP1:%.*]] = call ptr @llvm.invariant.start.p7(i64 -1, ptr addrspace(7) [[TMP0]]) ; SHADERTEST-NEXT: [[TMP2:%.*]] = call <4 x float> @lgc.input.import.generic__v4f32(i1 false, i32 0, i32 0, i32 0, i32 poison) ; SHADERTEST-NEXT: [[TMP3:%.*]] = call i32 @lgc.special.user.data.BaseVertex(i32 268435459) #[[ATTR3:[0-9]+]] -; SHADERTEST-NEXT: [[TMP4:%.*]] = call i32 @lgc.shader.input.VertexId(i32 17) #[[ATTR3]] +; SHADERTEST-NEXT: [[TMP4:%.*]] = call i32 @lgc.shader.input.VertexId(i32 18) #[[ATTR3]] ; SHADERTEST-NEXT: [[VERTEXINDEX:%.*]] = add i32 [[TMP3]], [[TMP4]] ; SHADERTEST-NEXT: [[DOTFR:%.*]] = freeze i32 [[VERTEXINDEX]] ; SHADERTEST-NEXT: [[TMP5:%.*]] = icmp slt i32 [[DOTFR]], 3 @@ -55,7 +55,7 @@ colorExport=PipelineLibCes_TestColorExport.pipe ; SHADERTEST-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP0]], i64 3 ; SHADERTEST-NEXT: [[TMP7:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float [[TMP3]], float [[TMP4]]) ; SHADERTEST-NEXT: [[TMP8:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float [[TMP5]], float [[TMP6]]) -; SHADERTEST-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 immarg 0, i32 immarg 15, <2 x half> [[TMP7]], <2 x half> [[TMP8]], i1 immarg true, i1 immarg true) #[[ATTR2:[0-9]+]] +; SHADERTEST-NEXT: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> [[TMP7]], <2 x half> [[TMP8]], i1 true, i1 true) ; SHADERTEST-NEXT: call void @llvm.amdgcn.endpgm() ; SHADERTEST-NEXT: unreachable ; diff --git a/llpc/test/shaderdb/hlsl/PipelineHlsl_TestRuntimeArrayPad.pipe b/llpc/test/shaderdb/hlsl/PipelineHlsl_TestRuntimeArrayPad.pipe deleted file mode 100644 index 8ced321837..0000000000 --- a/llpc/test/shaderdb/hlsl/PipelineHlsl_TestRuntimeArrayPad.pipe +++ /dev/null @@ -1,334 +0,0 @@ -; Check that the corner case that HLSL-derived shader that has a variable-sized array with struct-type element whose last member is variable-sized array. -; The case can work well by not padding the outmost variable-sized array. - -; BEGIN_SHADERTEST -; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s -; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results -; SHADERTEST: @0 = external addrspace(7) global <{ [4294967295 x <{ i32, [4294967295 x float] }>] }> -; SHADERTEST: AMDLLPC SUCCESS -; END_SHADERTEST - -[Version] -version = 53 - -[VsSpirv] - OpCapability Shader - %1 = OpExtInstImport "GLSL.std.450" - OpMemoryModel Logical GLSL450 - OpEntryPoint Vertex %4 "main" - %void = OpTypeVoid - %3 = OpTypeFunction %void - %4 = OpFunction %void None %3 - %5 = OpLabel - %8 = OpFunctionCall %void %6 - OpReturn - OpFunctionEnd - %6 = OpFunction %void None %3 - %7 = OpLabel - OpReturn - OpFunctionEnd - -[VsInfo] -entryPoint = main -options.trapPresent = 0 -options.debugMode = 0 -options.enablePerformanceData = 0 -options.allowReZ = 0 -options.vgprLimit = 0 -options.sgprLimit = 0 -options.maxThreadGroupsPerComputeUnit = 0 -options.waveSize = 0 -options.wgpMode = 0 -options.waveBreakSize = None -options.forceLoopUnrollCount = 0 -options.useSiScheduler = 0 -options.allowVaryWaveSize = 0 -options.enableLoadScalarizer = 0 -options.disableLicm = 0 -options.unrollThreshold = 0 -options.scalarThreshold = 0 -options.disableLoopUnroll = 0 -options.fp32DenormalMode = Auto -options.adjustDepthImportVrs = 0 -options.disableLicmThreshold = 0 -options.unrollHintThreshold = 0 -options.dontUnrollHintThreshold = 0 - -[TcsSpirv] - OpCapability Tessellation - %1 = OpExtInstImport "GLSL.std.450" - OpMemoryModel Logical GLSL450 - OpEntryPoint TessellationControl %4 "main" %gl_InvocationID %gl_TessLevelOuter - OpExecutionMode %4 OutputVertices 1 - OpExecutionMode %4 Isolines - OpExecutionMode %4 SpacingFractionalOdd - OpExecutionMode %4 PointMode - OpDecorate %gl_InvocationID BuiltIn InvocationId - OpDecorate %gl_TessLevelOuter Patch - OpDecorate %gl_TessLevelOuter BuiltIn TessLevelOuter - OpMemberDecorate %_struct_46 0 NonWritable - OpMemberDecorate %_struct_46 0 Offset 0 - OpDecorate %_struct_46 BufferBlock - OpDecorate %48 DescriptorSet 0 - OpDecorate %48 Binding 0 - %void = OpTypeVoid - %3 = OpTypeFunction %void - %float = OpTypeFloat 32 - %uint = OpTypeInt 32 0 - %uint_2 = OpConstant %uint 2 -%_arr_float_uint_2 = OpTypeArray %float %uint_2 - %_struct_12 = OpTypeStruct %_arr_float_uint_2 - %13 = OpTypeFunction %_struct_12 - %uint_4 = OpConstant %uint 4 - %uint_0 = OpConstant %uint 0 -%_ptr_Input_uint = OpTypePointer Input %uint -%gl_InvocationID = OpVariable %_ptr_Input_uint Input - %int = OpTypeInt 32 1 - %int_0 = OpConstant %int 0 - %bool = OpTypeBool -%_ptr_Function__struct_12 = OpTypePointer Function %_struct_12 -%_arr_float_uint_4 = OpTypeArray %float %uint_4 -%_ptr_Output__arr_float_uint_4 = OpTypePointer Output %_arr_float_uint_4 -%gl_TessLevelOuter = OpVariable %_ptr_Output__arr_float_uint_4 Output -%_ptr_Function_float = OpTypePointer Function %float -%_ptr_Output_float = OpTypePointer Output %float - %int_1 = OpConstant %int 1 - %float_1 = OpConstant %float 1 - %_struct_46 = OpTypeStruct %float -%_ptr_Uniform__struct_46 = OpTypePointer Uniform %_struct_46 - %48 = OpVariable %_ptr_Uniform__struct_46 Uniform -%_ptr_Uniform_float = OpTypePointer Uniform %float - %4 = OpFunction %void None %3 - %5 = OpLabel - %29 = OpVariable %_ptr_Function__struct_12 Function - %16 = OpFunctionCall %void %6 - OpControlBarrier %uint_2 %uint_4 %uint_0 - %21 = OpLoad %uint %gl_InvocationID - %25 = OpIEqual %bool %21 %int_0 - OpSelectionMerge %27 None - OpBranchConditional %25 %26 %27 - %26 = OpLabel - %30 = OpFunctionCall %_struct_12 %14 - OpStore %29 %30 - %35 = OpAccessChain %_ptr_Function_float %29 %int_0 %int_0 - %36 = OpLoad %float %35 - %38 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %int_0 - OpStore %38 %36 - %40 = OpAccessChain %_ptr_Function_float %29 %int_0 %int_1 - %41 = OpLoad %float %40 - %42 = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %int_1 - OpStore %42 %41 - OpBranch %27 - %27 = OpLabel - OpReturn - OpFunctionEnd - %6 = OpFunction %void None %3 - %7 = OpLabel - OpReturn - OpFunctionEnd - %14 = OpFunction %_struct_12 None %13 - %15 = OpLabel - %43 = OpVariable %_ptr_Function__struct_12 Function - %45 = OpAccessChain %_ptr_Function_float %43 %int_0 %int_0 - OpStore %45 %float_1 - %50 = OpAccessChain %_ptr_Uniform_float %48 %int_0 - %51 = OpLoad %float %50 - %52 = OpAccessChain %_ptr_Function_float %43 %int_0 %int_1 - OpStore %52 %51 - %53 = OpLoad %_struct_12 %43 - OpReturnValue %53 - OpFunctionEnd - -[TcsInfo] -entryPoint = main -options.trapPresent = 0 -options.debugMode = 0 -options.enablePerformanceData = 0 -options.allowReZ = 0 -options.vgprLimit = 0 -options.sgprLimit = 0 -options.maxThreadGroupsPerComputeUnit = 0 -options.waveSize = 0 -options.wgpMode = 0 -options.waveBreakSize = None -options.forceLoopUnrollCount = 0 -options.useSiScheduler = 0 -options.allowVaryWaveSize = 0 -options.enableLoadScalarizer = 0 -options.disableLicm = 0 -options.unrollThreshold = 0 -options.scalarThreshold = 0 -options.disableLoopUnroll = 0 -options.fp32DenormalMode = Auto -options.adjustDepthImportVrs = 0 -options.disableLicmThreshold = 0 -options.unrollHintThreshold = 0 -options.dontUnrollHintThreshold = 0 - -[TesSpirv] - OpCapability Tessellation - %1 = OpExtInstImport "GLSL.std.450" - OpMemoryModel Logical GLSL450 - OpEntryPoint TessellationEvaluation %4 "main" %gl_TessCoord - OpDecorate %_runtimearr_float ArrayStride 4 - OpMemberDecorate %_struct_17 0 Offset 0 - OpMemberDecorate %_struct_17 1 Offset 4 - OpDecorate %_runtimearr__struct_17 ArrayStride 8 - OpMemberDecorate %_struct_19 0 Coherent - OpMemberDecorate %_struct_19 0 Offset 0 - OpDecorate %_struct_19 BufferBlock - OpDecorate %21 DescriptorSet 0 - OpDecorate %21 Binding 1 - OpDecorate %gl_TessCoord Patch - OpDecorate %gl_TessCoord BuiltIn TessCoord - %void = OpTypeVoid - %3 = OpTypeFunction %void - %float = OpTypeFloat 32 - %v2float = OpTypeVector %float 2 -%_ptr_Function_v2float = OpTypePointer Function %v2float - %9 = OpTypeFunction %void %_ptr_Function_v2float - %int = OpTypeInt 32 1 -%_ptr_Function_int = OpTypePointer Function %int -%_runtimearr_float = OpTypeRuntimeArray %float - %_struct_17 = OpTypeStruct %int %_runtimearr_float -%_runtimearr__struct_17 = OpTypeRuntimeArray %_struct_17 - %_struct_19 = OpTypeStruct %_runtimearr__struct_17 -%_ptr_Uniform__struct_19 = OpTypePointer Uniform %_struct_19 - %21 = OpVariable %_ptr_Uniform__struct_19 Uniform - %int_0 = OpConstant %int 0 -%_ptr_Uniform_int = OpTypePointer Uniform %int - %int_1 = OpConstant %int 1 - %uint = OpTypeInt 32 0 - %uint_1 = OpConstant %uint 1 - %uint_0 = OpConstant %uint 0 -%_ptr_Function_float = OpTypePointer Function %float -%_ptr_Uniform_float = OpTypePointer Uniform %float - %v3float = OpTypeVector %float 3 -%_ptr_Input_v3float = OpTypePointer Input %v3float -%gl_TessCoord = OpVariable %_ptr_Input_v3float Input - %4 = OpFunction %void None %3 - %5 = OpLabel - %36 = OpVariable %_ptr_Function_v2float Function - %44 = OpVariable %_ptr_Function_v2float Function - %40 = OpLoad %v3float %gl_TessCoord - %41 = OpCompositeExtract %float %40 0 - %42 = OpCompositeExtract %float %40 1 - %43 = OpCompositeConstruct %v2float %41 %42 - OpStore %36 %43 - %45 = OpLoad %v2float %36 - OpStore %44 %45 - %46 = OpFunctionCall %void %11 %44 - OpReturn - OpFunctionEnd - %11 = OpFunction %void None %9 - %10 = OpFunctionParameter %_ptr_Function_v2float - %12 = OpLabel - %15 = OpVariable %_ptr_Function_int Function - %24 = OpAccessChain %_ptr_Uniform_int %21 %int_0 %int_0 %int_0 - %29 = OpAtomicIAdd %int %24 %uint_1 %uint_0 %int_1 - OpStore %15 %29 - %30 = OpLoad %int %15 - %32 = OpAccessChain %_ptr_Function_float %10 %uint_0 - %33 = OpLoad %float %32 - %35 = OpAccessChain %_ptr_Uniform_float %21 %int_0 %int_0 %int_1 %30 - OpStore %35 %33 - OpReturn - OpFunctionEnd - -[TesInfo] -entryPoint = main -options.trapPresent = 0 -options.debugMode = 0 -options.enablePerformanceData = 0 -options.allowReZ = 0 -options.vgprLimit = 0 -options.sgprLimit = 0 -options.maxThreadGroupsPerComputeUnit = 0 -options.waveSize = 0 -options.wgpMode = 0 -options.waveBreakSize = None -options.forceLoopUnrollCount = 0 -options.useSiScheduler = 0 -options.allowVaryWaveSize = 0 -options.enableLoadScalarizer = 0 -options.disableLicm = 0 -options.unrollThreshold = 0 -options.scalarThreshold = 0 -options.disableLoopUnroll = 0 -options.fp32DenormalMode = Auto -options.adjustDepthImportVrs = 0 -options.disableLicmThreshold = 0 -options.unrollHintThreshold = 0 -options.dontUnrollHintThreshold = 0 - -[ResourceMapping] -userDataNode[0].visibility = 2 -userDataNode[0].type = IndirectUserDataVaPtr -userDataNode[0].offsetInDwords = 0 -userDataNode[0].sizeInDwords = 1 -userDataNode[0].indirectUserDataCount = 0 -userDataNode[1].visibility = 16 -userDataNode[1].type = StreamOutTableVaPtr -userDataNode[1].offsetInDwords = 2 -userDataNode[1].sizeInDwords = 1 -userDataNode[2].visibility = 12 -userDataNode[2].type = DescriptorTableVaPtr -userDataNode[2].offsetInDwords = 4 -userDataNode[2].sizeInDwords = 1 -userDataNode[2].next[0].type = DescriptorBuffer -userDataNode[2].next[0].offsetInDwords = 0 -userDataNode[2].next[0].sizeInDwords = 4 -userDataNode[2].next[0].set = 0x00000000 -userDataNode[2].next[0].binding = 0 -userDataNode[2].next[1].type = DescriptorBuffer -userDataNode[2].next[1].offsetInDwords = 4 -userDataNode[2].next[1].sizeInDwords = 4 -userDataNode[2].next[1].set = 0x00000000 -userDataNode[2].next[1].binding = 1 - -[GraphicsPipelineState] -topology = VK_PRIMITIVE_TOPOLOGY_PATCH_LIST -provokingVertexMode = VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT -patchControlPoints = 1 -deviceIndex = 0 -disableVertexReuse = 0 -switchWinding = 0 -enableMultiView = 0 -depthClipEnable = 1 -rasterizerDiscardEnable = 1 -perSampleShading = 0 -numSamples = 1 -pixelShaderSamples = 0 -samplePatternIdx = 0 -usrClipPlaneMask = 0 -alphaToCoverageEnable = 0 -dualSourceBlendEnable = 0 -nggState.enableNgg = 1 -nggState.enableGsUse = 0 -nggState.forceCullingMode = 0 -nggState.compactMode = NggCompactDisable -nggState.enableVertexReuse = 0 -nggState.enableBackfaceCulling = 1 -nggState.enableFrustumCulling = 0 -nggState.enableBoxFilterCulling = 0 -nggState.enableSphereCulling = 0 -nggState.enableSmallPrimFilter = 1 -nggState.enableCullDistanceCulling = 0 -nggState.backfaceExponent = 0 -nggState.subgroupSizing = Auto -nggState.primsPerSubgroup = 256 -nggState.vertsPerSubgroup = 256 -dynamicVertexStride = 0 -enableUberFetchShader = 0 -enableEarlyCompile = 0 -options.includeDisassembly = 0 -options.scalarBlockLayout = 1 -options.includeIr = 0 -options.robustBufferAccess = 0 -options.reconfigWorkgroupLayout = 0 -options.shadowDescriptorTableUsage = Disable -options.shadowDescriptorTablePtrHigh = 2 -options.extendedRobustness.robustBufferAccess = 0 -options.extendedRobustness.robustImageAccess = 0 -options.extendedRobustness.nullDescriptor = 0 -options.optimizeTessFactor = 0 diff --git a/llpc/test/shaderdb/multiple_inputs/test_inputs/PipelineVsFs_ConstantData_Vs2Fs1.pipe b/llpc/test/shaderdb/multiple_inputs/test_inputs/PipelineVsFs_ConstantData_Vs2Fs1.pipe index d023d321f7..05cad7e5fb 100644 --- a/llpc/test/shaderdb/multiple_inputs/test_inputs/PipelineVsFs_ConstantData_Vs2Fs1.pipe +++ b/llpc/test/shaderdb/multiple_inputs/test_inputs/PipelineVsFs_ConstantData_Vs2Fs1.pipe @@ -50,7 +50,7 @@ colorBuffer[0].blendEnable = 0 ; SHADERTEST2_PP0-NEXT: [[DOTI2:%.*]] = extractelement <4 x float> [[TMP5]], i64 2 ; SHADERTEST2_PP0-NEXT: [[DOTI1:%.*]] = extractelement <4 x float> [[TMP5]], i64 1 ; SHADERTEST2_PP0-NEXT: [[DOTI0:%.*]] = extractelement <4 x float> [[TMP5]], i64 0 -; SHADERTEST2_PP0-NEXT: call void @llvm.amdgcn.exp.f32(i32 immarg 0, i32 immarg 15, float [[DOTI0]], float [[DOTI1]], float [[DOTI2]], float [[DOTI3]], i1 immarg true, i1 immarg true) #[[ATTR2:[0-9]+]] +; SHADERTEST2_PP0-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float [[DOTI0]], float [[DOTI1]], float [[DOTI2]], float [[DOTI3]], i1 true, i1 true) ; SHADERTEST2_PP0-NEXT: ret void ; ; @@ -67,6 +67,6 @@ colorBuffer[0].blendEnable = 0 ; SHADERTEST2_PP1-NEXT: [[DOTI2:%.*]] = extractelement <4 x float> [[TMP5]], i64 2 ; SHADERTEST2_PP1-NEXT: [[DOTI1:%.*]] = extractelement <4 x float> [[TMP5]], i64 1 ; SHADERTEST2_PP1-NEXT: [[DOTI0:%.*]] = extractelement <4 x float> [[TMP5]], i64 0 -; SHADERTEST2_PP1-NEXT: call void @llvm.amdgcn.exp.f32(i32 immarg 0, i32 immarg 15, float [[DOTI0]], float [[DOTI1]], float [[DOTI2]], float [[DOTI3]], i1 immarg true, i1 immarg true) #[[ATTR1:[0-9]+]] +; SHADERTEST2_PP1-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float [[DOTI0]], float [[DOTI1]], float [[DOTI2]], float [[DOTI3]], i1 true, i1 true) ; SHADERTEST2_PP1-NEXT: ret void ; diff --git a/llpc/test/shaderdb/object/ObjImage_TestCubeAtomicAdd_lit.comp b/llpc/test/shaderdb/object/ObjImage_TestCubeAtomicAdd_lit.comp index 365fc5f536..3563dbf489 100644 --- a/llpc/test/shaderdb/object/ObjImage_TestCubeAtomicAdd_lit.comp +++ b/llpc/test/shaderdb/object/ObjImage_TestCubeAtomicAdd_lit.comp @@ -16,10 +16,10 @@ void main (void) /* ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 8, i32 0, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 8, i32 513, i32 0, ptr addrspace(4) ; SHADERTEST-LABEL: {{^// LLPC.*}} SPIR-V lowering results -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 8, i32 0, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 8, i32 513, i32 0, ptr addrspace(4) ; SHADERTEST-LABEL: {{^// LLPC}} final pipeline module info ; SHADERTEST: call i32 @llvm.amdgcn.image.atomic.add.cube.i32.i32 diff --git a/llpc/test/shaderdb/object/ObjInput_TestFsCompSpecifier_lit.frag b/llpc/test/shaderdb/object/ObjInput_TestFsCompSpecifier_lit.frag index 50e5515a40..bf40a879ee 100644 --- a/llpc/test/shaderdb/object/ObjInput_TestFsCompSpecifier_lit.frag +++ b/llpc/test/shaderdb/object/ObjInput_TestFsCompSpecifier_lit.frag @@ -18,10 +18,10 @@ void main (void) ; SHADERTEST: call <2 x float> (...) @lgc.input.import.interpolated__v2f32{{.*}} ; SHADERTEST: call float (...) @lgc.input.import.interpolated__f32{{.*}} ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results -; SHADERTEST: call float @llvm.amdgcn.interp.p1(float %{{[^,]*}}, i32 immarg 1, i32 immarg 0, i32 %PrimMask) -; SHADERTEST: call float @llvm.amdgcn.interp.p2(float %{{[^,]*}}, float %{{[^,]*}}, i32 immarg 1, i32 immarg 0, i32 %PrimMask) -; SHADERTEST: call float @llvm.amdgcn.interp.p1(float %{{[^,]*}}, i32 immarg 3, i32 immarg 0, i32 %PrimMask) -; SHADERTEST: call float @llvm.amdgcn.interp.p2(float %{{[^,]*}}, float %{{[^,]*}}, i32 immarg 3, i32 immarg 0, i32 %PrimMask) +; SHADERTEST: call float @llvm.amdgcn.interp.p1(float %{{[^,]*}}, i32 1, i32 0, i32 %PrimMask) +; SHADERTEST: call float @llvm.amdgcn.interp.p2(float %{{[^,]*}}, float %{{[^,]*}}, i32 1, i32 0, i32 %PrimMask) +; SHADERTEST: call float @llvm.amdgcn.interp.p1(float %{{[^,]*}}, i32 3, i32 0, i32 %PrimMask) +; SHADERTEST: call float @llvm.amdgcn.interp.p2(float %{{[^,]*}}, float %{{[^,]*}}, i32 3, i32 0, i32 %PrimMask) ; SHADERTEST: AMDLLPC SUCCESS */ // END_SHADERTEST diff --git a/llpc/test/shaderdb/object/ObjNonUniform_TestImageSample.frag b/llpc/test/shaderdb/object/ObjNonUniform_TestImageSample.frag index 3e211d250a..b288e23122 100644 --- a/llpc/test/shaderdb/object/ObjNonUniform_TestImageSample.frag +++ b/llpc/test/shaderdb/object/ObjNonUniform_TestImageSample.frag @@ -28,17 +28,17 @@ void main() /* ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results -; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 512 -; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 512 -; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 0 -; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 384 -; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 24 +; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 512, +; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 512, +; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 512, +; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 896, +; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 1, i32 536, ; SHADERTEST-LABEL: {{^// LLPC}} pipeline before-patching results ; SHADERTEST-COUNT-12: call i32 @llvm.amdgcn.readfirstlane ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST: {{%[0-9]*}} = call float @llvm.amdgcn.interp.mov ; SHADERTEST: {{%[0-9]*}} = bitcast float {{%[0-9]*}} to i32 -; SHADERTEST: {{%[0-9]*}} = call i32 @llvm.amdgcn.readfirstlane(i32 {{%[0-9]*}}) +; SHADERTEST: {{%[0-9]*}} = call i32 @llvm.amdgcn.readfirstlane{{(.i32)?}}(i32 {{%[0-9]*}}) ; SHADERTEST: AMDLLPC SUCCESS */ -// END_SHADERTEST \ No newline at end of file +// END_SHADERTEST diff --git a/llpc/test/shaderdb/object/ObjShaderBallot_TestGeneral_lit.comp b/llpc/test/shaderdb/object/ObjShaderBallot_TestGeneral_lit.comp index 8e9d0a8e17..901c075912 100644 --- a/llpc/test/shaderdb/object/ObjShaderBallot_TestGeneral_lit.comp +++ b/llpc/test/shaderdb/object/ObjShaderBallot_TestGeneral_lit.comp @@ -53,10 +53,10 @@ void main() ; SHADERTEST: call i32 (...) @lgc.create.subgroup.broadcast.first.i32( ; SHADERTEST-LABEL: {{^// LLPC.*}} pipeline patching results -; SHADERTEST: call i32 @llvm.amdgcn.readlane(i32 %{{.*}}, i32 %{{.*}}) -; SHADERTEST: call i32 @llvm.amdgcn.readfirstlane(i32 %{{.*}}) -; SHADERTEST-COUNT-5: call i32 @llvm.amdgcn.readlane(i32 %{{.*}}, i32 %{{.*}}) -; SHADERTEST-COUNT-7: call i32 @llvm.amdgcn.readfirstlane(i32 %{{.*}}) +; SHADERTEST: call i32 @llvm.amdgcn.readlane{{(.i32)?}}(i32 %{{.*}}, i32 %{{.*}}) +; SHADERTEST: call i32 @llvm.amdgcn.readfirstlane{{(.i32)?}}(i32 %{{.*}}) +; SHADERTEST-COUNT-5: call i32 @llvm.amdgcn.readlane{{(.i32)?}}(i32 %{{.*}}, i32 %{{.*}}) +; SHADERTEST-COUNT-7: call i32 @llvm.amdgcn.readfirstlane{{(.i32)?}}(i32 %{{.*}}) ; SHADERTEST: AMDLLPC SUCCESS */ diff --git a/llpc/test/shaderdb/object/ObjSharedVariable_TestArrayCopy_lit.comp b/llpc/test/shaderdb/object/ObjSharedVariable_TestArrayCopy_lit.comp index f311930853..94c6c8aa7b 100644 --- a/llpc/test/shaderdb/object/ObjSharedVariable_TestArrayCopy_lit.comp +++ b/llpc/test/shaderdb/object/ObjSharedVariable_TestArrayCopy_lit.comp @@ -30,8 +30,8 @@ void main() ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST: @[[LDS:[^ ]*]] = addrspace(3) global { i32, [16 x i32] } -; SHADERTEST: store i32 %{{[0-9]*}}, ptr addrspace(3) getelementptr {{(inbounds )?}}({ i32, [16 x i32] }, ptr addrspace(3) @[[LDS]], i32 0, i32 1, i32 {{[0-9]*}}) -; SHADERTEST: %{{[0-9]*}} = load i32, ptr addrspace(3) getelementptr {{(inbounds )?}}({ i32, [16 x i32] }, ptr addrspace(3) @[[LDS]], i32 0, i32 1, i32 {{[0-9]*}}) +; SHADERTEST: store i32 %{{[0-9]*}}, ptr addrspace(3) getelementptr {{(inbounds )?}}({ i32, [16 x i32] }, ptr addrspace(3) @[[LDS]], i32 0, i32 1{{.*}}) +; SHADERTEST: %{{[0-9]*}} = load i32, ptr addrspace(3) getelementptr {{(inbounds )?}}({ i32, [16 x i32] }, ptr addrspace(3) @[[LDS]], i32 0, i32 1{{.*}}) ; SHADERTEST: %{{[0-9]*}} = getelementptr { i32, [16 x i32] }, ptr addrspace(3) @[[LDS]], i32 0, i32 1, i32 %{{[0-9]*}} ; SHADERTEST: %{{[0-9]*}} = load i32, ptr addrspace(3) %{{[0-9]*}} diff --git a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadMatrixArray_lit.vert b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadMatrixArray_lit.vert index cda46c22b6..0704a79593 100644 --- a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadMatrixArray_lit.vert +++ b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadMatrixArray_lit.vert @@ -24,10 +24,10 @@ void main() ; SHADERTEST: %{{[0-9]*}} = load <4 x float>, ptr addrspace(7) getelementptr ([4 x %llpc.matrix.column], ptr addrspace(7) getelementptr {{(inbounds )?}}(<{ i32, [12 x i8], [2 x [4 x %{{[a-z.]*}}]] }>, ptr addrspace(7) @{{[a-z0-9]+}}, i32 0, i32 2), i32 0, i32 1, i32 0), align 16 ; SHADERTEST: %{{[0-9]*}} = load <4 x float>, ptr addrspace(7) getelementptr ([4 x %llpc.matrix.column], ptr addrspace(7) getelementptr {{(inbounds )?}}(<{ i32, [12 x i8], [2 x [4 x %{{[a-z.]*}}]] }>, ptr addrspace(7) @{{[a-z0-9]+}}, i32 0, i32 2), i32 0, i32 2, i32 0), align 16 ; SHADERTEST: %{{[0-9]*}} = load <4 x float>, ptr addrspace(7) getelementptr ([4 x %llpc.matrix.column], ptr addrspace(7) getelementptr {{(inbounds )?}}(<{ i32, [12 x i8], [2 x [4 x %{{[a-z.]*}}]] }>, ptr addrspace(7) @{{[a-z0-9]+}}, i32 0, i32 2), i32 0, i32 3, i32 0), align 16 -; SHADERTEST: %{{[0-9]*}} = load <4 x float>, ptr addrspace(7) getelementptr {{(inbounds )?}}(<{ i32, [12 x i8], [2 x [4 x %{{[a-z.]*}}]] }>, ptr addrspace(7) @{{[a-z0-9]+}}, i32 0, i32 2, i32 1), align 16 -; SHADERTEST: %{{[0-9]*}} = load <4 x float>, ptr addrspace(7) getelementptr {{(inbounds )?}}(<{ i32, [12 x i8], [2 x [4 x %{{[a-z.]*}}]] }>, ptr addrspace(7) @{{[a-z0-9]+}}, i32 0, i32 2, i32 1, i32 1, i32 0), align 16 -; SHADERTEST: %{{[0-9]*}} = load <4 x float>, ptr addrspace(7) getelementptr {{(inbounds )?}}(<{ i32, [12 x i8], [2 x [4 x %{{[a-z.]*}}]] }>, ptr addrspace(7) @{{[a-z0-9]+}}, i32 0, i32 2, i32 1, i32 2, i32 0), align 16 -; SHADERTEST: %{{[0-9]*}} = load <4 x float>, ptr addrspace(7) getelementptr {{(inbounds )?}}(<{ i32, [12 x i8], [2 x [4 x %{{[a-z.]*}}]] }>, ptr addrspace(7) @{{[a-z0-9]+}}, i32 0, i32 2, i32 1, i32 3, i32 0), align 16 +; SHADERTEST: %{{[0-9]*}} = load <4 x float>, ptr addrspace(7) getelementptr {{(inbounds )?}}{{.*}}(<{ i32, [12 x i8], [2 x [4 x %{{[a-z.]*}}]] }>, ptr addrspace(7) @{{[a-z0-9]+}}, i32 0, i32 2{{, i32 1|\), i32 0, i32 1}}), align 16 +; SHADERTEST: %{{[0-9]*}} = load <4 x float>, ptr addrspace(7) getelementptr {{(inbounds )?}}{{.*}}(<{ i32, [12 x i8], [2 x [4 x %{{[a-z.]*}}]] }>, ptr addrspace(7) @{{[a-z0-9]+}}, i32 0, i32 2{{, i32 1, i32 1, i32 0|\), i32 0, i32 1\), i32 0, i32 1, i32 0}}), align 16 +; SHADERTEST: %{{[0-9]*}} = load <4 x float>, ptr addrspace(7) getelementptr {{(inbounds )?}}{{.*}}(<{ i32, [12 x i8], [2 x [4 x %{{[a-z.]*}}]] }>, ptr addrspace(7) @{{[a-z0-9]+}}, i32 0, i32 2{{, i32 1, i32 2, i32 0|\), i32 0, i32 1\), i32 0, i32 2, i32 0}}), align 16 +; SHADERTEST: %{{[0-9]*}} = load <4 x float>, ptr addrspace(7) getelementptr {{(inbounds )?}}{{.*}}(<{ i32, [12 x i8], [2 x [4 x %{{[a-z.]*}}]] }>, ptr addrspace(7) @{{[a-z0-9]+}}, i32 0, i32 2{{, i32 1, i32 3, i32 0|\), i32 0, i32 1\), i32 0, i32 3, i32 0}}), align 16 ; SHADERTEST: AMDLLPC SUCCESS */ diff --git a/llpc/test/shaderdb/ray_tracing/PipelineRayquery.pipe b/llpc/test/shaderdb/ray_tracing/PipelineRayquery.pipe new file mode 100644 index 0000000000..d1e1a03404 --- /dev/null +++ b/llpc/test/shaderdb/ray_tracing/PipelineRayquery.pipe @@ -0,0 +1,293 @@ +; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py +; RUN: amdllpc -o - -gfxip 11.0.1 -emit-lgc %s | FileCheck -check-prefixes=CHECK %s +[Version] +version = 70 + +[CsGlsl] +#version 460 +#extension GL_EXT_ray_tracing : enable +#extension GL_EXT_ray_query : require + +struct Ray { vec3 pos; float tmin; vec3 dir; float tmax; }; +layout(set =0, binding = 1) uniform accelerationStructureEXT topLevelAS; +layout(std430, set = 0, binding = 0) buffer Results { vec2 results[]; }; +layout(std430, set = 0, binding = 2) buffer Rays { Ray rays[]; }; +layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +void main() +{ + uint index = (gl_NumWorkGroups.x * gl_WorkGroupSize.x) * gl_GlobalInvocationID.y + gl_GlobalInvocationID.x; + uint rayFlags = gl_RayFlagsOpaqueEXT; + uint cullMask = 0xff; + Ray ray = rays[index]; + rayQueryEXT q[3]; + rayQueryInitializeEXT(q[2], topLevelAS, rayFlags, cullMask, ray.pos, ray.tmin, ray.dir, ray.tmax); + bool proceed = rayQueryProceedEXT(q[2]); + if(!proceed) return; + uint committedStatus = rayQueryGetIntersectionTypeEXT(q[2], true); + switch (committedStatus) + { + case gl_RayQueryCommittedIntersectionTriangleEXT: + { + results[index] = rayQueryGetIntersectionBarycentricsEXT(q[2], true); + break; + } + default: + break; + } +} + +[CsInfo] +entryPoint = main +options.clientHash = 0x0, 0x0 +options.trapPresent = 0 +options.debugMode = 0 +options.enablePerformanceData = 0 +options.allowReZ = 0 +options.vgprLimit = 0 +options.sgprLimit = 0 +options.maxThreadGroupsPerComputeUnit = 0 +options.subgroupSize = 0 +options.waveSize = 64 +options.wgpMode = 0 +options.waveBreakSize = None +options.forceLoopUnrollCount = 0 +options.enableLoadScalarizer = 0 +options.allowVaryWaveSize = 0 +options.useSiScheduler = 0 +options.disableCodeSinking = 0 +options.favorLatencyHiding = 0 +options.disableLicm = 0 +options.unrollThreshold = 0 +options.scalarThreshold = 0 +options.disableLoopUnroll = 0 +options.adjustDepthImportVrs = 0 +options.fp32DenormalMode = Auto +options.disableLicmThreshold = 0 +options.unrollHintThreshold = 0 +options.dontUnrollHintThreshold = 0 +options.noContractOpDot = 0 +options.fastMathFlags = 0 +options.disableFastMathFlags = 0 +options.ldsSpillLimitDwords = 0 +options.scalarizeWaterfallLoads = 0 +options.overrideForceThreadIdSwizzling = 0 +options.overrideShaderThreadGroupSizeX = 0 +options.overrideShaderThreadGroupSizeY = 0 +options.overrideShaderThreadGroupSizeZ = 0 +options.forceLateZ = 0 +options.nsaThreshold = 0 +options.aggressiveInvariantLoads = Auto +options.workaroundStorageImageFormats = 0 +options.workaroundInitializeOutputsToZero = 0 +options.disableFMA = 0 +options.disableReadFirstLaneWorkaround = 0 +options.backwardPropagateNoContract = 0 +options.forwardPropagateNoContract = 1 +options.workgroupRoundRobin = 0 +options.constantBufferBindingOffset = 0 + +[ResourceMapping] +userDataNode[0].visibility = 128 +userDataNode[0].type = DescriptorTableVaPtr +userDataNode[0].offsetInDwords = 7 +userDataNode[0].sizeInDwords = 1 +userDataNode[0].next[0].type = DescriptorConstBufferCompact +userDataNode[0].next[0].offsetInDwords = 0 +userDataNode[0].next[0].sizeInDwords = 2 +userDataNode[0].next[0].set = 0x0000005D +userDataNode[0].next[0].binding = 17 +userDataNode[0].next[0].strideInDwords = 0 +userDataNode[0].next[1].type = DescriptorConstBuffer +userDataNode[0].next[1].offsetInDwords = 2 +userDataNode[0].next[1].sizeInDwords = 4 +userDataNode[0].next[1].set = 0x0000005D +userDataNode[0].next[1].binding = 0 +userDataNode[0].next[1].strideInDwords = 0 +userDataNode[0].next[2].type = DescriptorBuffer +userDataNode[0].next[2].offsetInDwords = 6 +userDataNode[0].next[2].sizeInDwords = 4 +userDataNode[0].next[2].set = 0x0000005D +userDataNode[0].next[2].binding = 1 +userDataNode[0].next[2].strideInDwords = 0 +userDataNode[1].visibility = 2 +userDataNode[1].type = StreamOutTableVaPtr +userDataNode[1].offsetInDwords = 2 +userDataNode[1].sizeInDwords = 1 +userDataNode[2].visibility = 128 +userDataNode[2].type = DescriptorTableVaPtr +userDataNode[2].offsetInDwords = 6 +userDataNode[2].sizeInDwords = 1 +userDataNode[2].next[0].type = DescriptorBuffer +userDataNode[2].next[0].offsetInDwords = 0 +userDataNode[2].next[0].sizeInDwords = 4 +userDataNode[2].next[0].set = 0x00000000 +userDataNode[2].next[0].binding = 0 +userDataNode[2].next[0].strideInDwords = 0 +userDataNode[2].next[1].type = DescriptorConstBuffer +userDataNode[2].next[1].offsetInDwords = 4 +userDataNode[2].next[1].sizeInDwords = 4 +userDataNode[2].next[1].set = 0x00000000 +userDataNode[2].next[1].binding = 1 +userDataNode[2].next[1].strideInDwords = 0 +userDataNode[2].next[2].type = DescriptorBuffer +userDataNode[2].next[2].offsetInDwords = 8 +userDataNode[2].next[2].sizeInDwords = 4 +userDataNode[2].next[2].set = 0x00000000 +userDataNode[2].next[2].binding = 2 +userDataNode[2].next[2].strideInDwords = 0 + +[ComputePipelineState] +deviceIndex = 0 +options.includeDisassembly = 0 +options.scalarBlockLayout = 1 +options.reconfigWorkgroupLayout = 0 +options.forceCsThreadIdSwizzling = 0 +options.includeIr = 0 +options.robustBufferAccess = 0 +options.enableRelocatableShaderElf = 0 +options.disableImageResourceCheck = 0 +options.enableScratchAccessBoundsChecks = 0 +options.enableImplicitInvariantExports = 1 +options.shadowDescriptorTableUsage = Disable +options.shadowDescriptorTablePtrHigh = 0 +options.extendedRobustness.robustBufferAccess = 0 +options.extendedRobustness.robustImageAccess = 1 +options.extendedRobustness.nullDescriptor = 0 +options.enableRayQuery = 0 +options.optimizeTessFactor = 1 +options.enableInterpModePatch = 0 +options.pageMigrationEnabled = 0 +options.optimizationLevel = 2 +options.overrideThreadGroupSizeX = 0 +options.overrideThreadGroupSizeY = 0 +options.overrideThreadGroupSizeZ = 0 +options.resourceLayoutScheme = Compact +options.threadGroupSwizzleMode = Default +options.reverseThreadGroup = 0 +options.internalRtShaders = 0 +options.forceNonUniformResourceIndexStageMask = 0 +options.expertSchedulingMode = 0 +options.replaceSetWithResourceType = 0 +options.disableSampleMask = 0 +options.buildResourcesDataForShaderModule = 0 +options.disableTruncCoordForGather = 1 +options.enableCombinedTexture = 0 +options.vertex64BitsAttribSingleLoc = 0 +options.enableFragColor = 0 +options.disableBaseVertex = 0 +options.cacheScopePolicyControl = 0 +options.enablePrimGeneratedQuery = 0 +options.disablePerCompFetch = 0 +rtState.nodeStrideShift = 7 +rtState.bvhResDescSize = 4 +rtState.bvhResDesc[0] = 0 +rtState.bvhResDesc[1] = 2197815296 +rtState.bvhResDesc[2] = 4294967295 +rtState.bvhResDesc[3] = 2172650495 +rtState.staticPipelineFlags = 0 +rtState.triCompressMode = 3 +rtState.boxSortHeuristicMode = 5 +rtState.pipelineFlags = 0 +rtState.counterMode = 0 +rtState.counterMask = 0 +rtState.threadGroupSizeX = 8 +rtState.threadGroupSizeY = 4 +rtState.threadGroupSizeZ = 1 +rtState.rayQueryCsSwizzle = 1 +rtState.ldsStackSize = 16 +rtState.dispatchRaysThreadGroupSize = 32 +rtState.ldsSizePerThreadGroup = 65536 +rtState.outerTileSize = 4 +rtState.dispatchDimSwizzleMode = 0 +rtState.exportConfig.indirectCallingConvention = 1 +rtState.exportConfig.indirectCalleeSavedRegs.raygen = 2 +rtState.exportConfig.indirectCalleeSavedRegs.miss = 40 +rtState.exportConfig.indirectCalleeSavedRegs.closestHit = 50 +rtState.exportConfig.indirectCalleeSavedRegs.anyHit = 75 +rtState.exportConfig.indirectCalleeSavedRegs.intersection = 75 +rtState.exportConfig.indirectCalleeSavedRegs.callable = 28 +rtState.exportConfig.indirectCalleeSavedRegs.traceRays = 28 +rtState.exportConfig.enableUniformNoReturn = 1 +rtState.exportConfig.enableTraceRayArgsInLds = 0 +rtState.exportConfig.enableReducedLinkageOpt = 0 +rtState.exportConfig.readsDispatchRaysIndex = 0 +rtState.exportConfig.enableDynamicLaunch = 0 +rtState.exportConfig.emitRaytracingShaderDataToken = 1 +rtState.enableRayQueryCsSwizzle = 0 +rtState.enableDispatchRaysInnerSwizzle = 1 +rtState.enableDispatchRaysOuterSwizzle = 1 +rtState.forceInvalidAccelStruct = 0 +rtState.enableRayTracingCounters = 0 +rtState.enableRayTracingHwTraversalStack = 1 +rtState.enableOptimalLdsStackSizeForIndirect = 1 +rtState.enableOptimalLdsStackSizeForUnified = 1 +rtState.maxRayLength = 0 +rtState.enablePickClosestLaneResultForAbortRays = 0 +rtState.traceRayWaveDensityThreshold[7] = 1 +rtState.gpurtFeatureFlags = 0 +rtState.gpurtFuncTable.pFunc[0] = TraceRay2_0 +rtState.gpurtFuncTable.pFunc[1] = TraceRayInline2_0 +rtState.gpurtFuncTable.pFunc[2] = TraceRayUsingHitToken2_0 +rtState.gpurtFuncTable.pFunc[3] = RayQueryProceed2_0 +rtState.gpurtFuncTable.pFunc[4] = GetInstanceIndex +rtState.gpurtFuncTable.pFunc[5] = GetInstanceID +rtState.gpurtFuncTable.pFunc[6] = GetObjectToWorldTransform +rtState.gpurtFuncTable.pFunc[7] = GetWorldToObjectTransform +rtState.gpurtFuncTable.pFunc[8] = GetRayQuery64BitInstanceNodePtr +rtState.gpurtFuncTable.pFunc[9] = TraceLongRayAMD2_0 +rtState.gpurtFuncTable.pFunc[10] = LongRayQueryProceedAMD2_0 +rtState.gpurtFuncTable.pFunc[11] = FetchTrianglePositionFromNodePointer +rtState.gpurtFuncTable.pFunc[12] = FetchTrianglePositionFromRayQuery +rtState.rtIpVersion = 2.0 +rtState.gpurtOverride = 0 +rtState.rtIpOverride = 0 +; CHECK-LABEL: @lgc.shader.CS.main( +; CHECK-NEXT: .entry: +; CHECK-NEXT: [[__LLPC_GLOBAL_PROXY_Q:%.*]] = alloca [3 x i127], align 8, addrspace(5) +; CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(7) @lgc.load.buffer.desc(i64 0, i32 0, i32 0, i32 2) +; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4(i32 6, i32 6, i64 0, i32 1) +; CHECK-NEXT: [[TMP2:%.*]] = call ptr @llvm.invariant.start.p4(i64 -1, ptr addrspace(4) [[TMP1]]) +; CHECK-NEXT: [[TMP3:%.*]] = call ptr addrspace(7) @lgc.load.buffer.desc(i64 0, i32 2, i32 0, i32 2) +; CHECK-NEXT: [[TMP4:%.*]] = call ptr @llvm.invariant.start.p7(i64 -1, ptr addrspace(7) [[TMP3]]) +; CHECK-NEXT: [[TMP5:%.*]] = call <3 x i32> (...) @lgc.create.read.builtin.input.v3i32(i32 28, i32 0, i32 poison, i32 poison) +; CHECK-NEXT: [[TMP6:%.*]] = call <3 x i32> (...) @lgc.create.read.builtin.input.v3i32(i32 24, i32 0, i32 poison, i32 poison) +; CHECK-NEXT: [[__LLPC_INPUT_PROXY_GL_NUMWORKGROUPS_0_VEC_EXTRACT:%.*]] = extractelement <3 x i32> [[TMP6]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = shl i32 [[__LLPC_INPUT_PROXY_GL_NUMWORKGROUPS_0_VEC_EXTRACT]], 3 +; CHECK-NEXT: [[__LLPC_INPUT_PROXY_GL_GLOBALINVOCATIONID_4_VEC_EXTRACT:%.*]] = extractelement <3 x i32> [[TMP5]], i64 1 +; CHECK-NEXT: [[TMP8:%.*]] = mul i32 [[TMP7]], [[__LLPC_INPUT_PROXY_GL_GLOBALINVOCATIONID_4_VEC_EXTRACT]] +; CHECK-NEXT: [[__LLPC_INPUT_PROXY_GL_GLOBALINVOCATIONID_0_VEC_EXTRACT:%.*]] = extractelement <3 x i32> [[TMP5]], i64 0 +; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], [[__LLPC_INPUT_PROXY_GL_GLOBALINVOCATIONID_0_VEC_EXTRACT]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr <{ [4294967295 x <{ [3 x float], float, [3 x float], float }>] }>, ptr addrspace(7) [[TMP3]], i32 0, i32 0, i32 [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = load <3 x float>, ptr addrspace(7) [[TMP10]], align 16 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr addrspace(7) [[TMP10]], i32 12 +; CHECK-NEXT: [[TMP13:%.*]] = load float, ptr addrspace(7) [[TMP12]], align 4 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr addrspace(7) [[TMP10]], i32 16 +; CHECK-NEXT: [[TMP15:%.*]] = load <3 x float>, ptr addrspace(7) [[TMP14]], align 16 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr addrspace(7) [[TMP10]], i32 28 +; CHECK-NEXT: [[TMP17:%.*]] = load float, ptr addrspace(7) [[TMP16]], align 4 +; CHECK-NEXT: [[TMP18:%.*]] = call ptr addrspace(5) (...) @lgc.rtq.gep.opaque([3 x i127] poison, i1 false, ptr addrspace(5) [[__LLPC_GLOBAL_PROXY_Q]], i32 0, i32 2) +; CHECK-NEXT: [[TMP19:%.*]] = load <2 x i32>, ptr addrspace(4) [[TMP1]], align 8 +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x i32> [[TMP19]], i64 1 +; CHECK-NEXT: [[TMP21:%.*]] = and i32 [[TMP20]], 65535 +; CHECK-NEXT: [[TMP22:%.*]] = insertelement <2 x i32> [[TMP19]], i32 [[TMP21]], i64 1 +; CHECK-NEXT: [[TMP23:%.*]] = bitcast <2 x i32> [[TMP22]] to i64 +; CHECK-NEXT: call void (...) @lgc.rtq.initialize(ptr addrspace(5) [[TMP18]], i64 [[TMP23]], i32 1, i32 255, <3 x float> [[TMP11]], float [[TMP13]], <3 x float> [[TMP15]], float [[TMP17]]) +; CHECK-NEXT: [[TMP24:%.*]] = call ptr addrspace(5) (...) @lgc.rtq.gep.opaque([3 x i127] poison, i1 false, ptr addrspace(5) [[__LLPC_GLOBAL_PROXY_Q]], i32 0, i32 2) +; CHECK-NEXT: [[TMP25:%.*]] = call i1 (...) @lgc.rtq.proceed(ptr addrspace(5) [[TMP24]]) +; CHECK-NEXT: [[DOTFR:%.*]] = freeze i1 [[TMP25]] +; CHECK-NEXT: br i1 [[DOTFR]], label [[TMP26:%.*]], label [[COMMON_RET:%.*]] +; CHECK: common.ret: +; CHECK-NEXT: ret void +; CHECK: 26: +; CHECK-NEXT: [[TMP27:%.*]] = call ptr addrspace(5) (...) @lgc.rtq.gep.opaque([3 x i127] poison, i1 false, ptr addrspace(5) [[__LLPC_GLOBAL_PROXY_Q]], i32 0, i32 2) +; CHECK-NEXT: [[TMP28:%.*]] = call i32 (...) @lgc.rtq.intersection.type(ptr addrspace(5) [[TMP27]], i1 true) +; CHECK-NEXT: [[COND_FREEZE1:%.*]] = freeze i32 [[TMP28]] +; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[COND_FREEZE1]], 1 +; CHECK-NEXT: br i1 [[COND]], label [[TMP29:%.*]], label [[COMMON_RET]] +; CHECK: 29: +; CHECK-NEXT: [[TMP30:%.*]] = call reassoc nnan nsz arcp contract afn <2 x float> (...) @lgc.rtq.intersection.barycentrics(ptr addrspace(5) [[TMP27]], i1 true) +; CHECK-NEXT: [[TMP31:%.*]] = getelementptr <{ [4294967295 x [2 x float]] }>, ptr addrspace(7) [[TMP0]], i32 0, i32 0, i32 [[TMP9]] +; CHECK-NEXT: store <2 x float> [[TMP30]], ptr addrspace(7) [[TMP31]], align 8 +; CHECK-NEXT: br label [[COMMON_RET]] +; diff --git a/llpc/test/shaderdb/ray_tracing/PipelineRays_Irreducible.pipe b/llpc/test/shaderdb/ray_tracing/PipelineRays_Irreducible.pipe new file mode 100644 index 0000000000..4005996e4a --- /dev/null +++ b/llpc/test/shaderdb/ray_tracing/PipelineRays_Irreducible.pipe @@ -0,0 +1,145 @@ +; RUN: amdllpc -gfxip 11.0 -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK %s + +; This case will have irreducible CFG after continuation transform. +; Without fixing irreducible, PatchBufferOp would generate several global load. +; CHECK-LABEL: @_rgen_1.resume.0( +; CHECK-NOT: load {{.*}}, ptr addrspace(1) + +; CHECK-LABEL: @_cs_( +; CHECK: call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray. +; CHECK-NOT: ret void +; CHECK: call void {{.*}} @llvm.amdgcn.cs.chain. +; CHECK-NOT: ret void + +[Version] +version = 69 + +[rgenGlsl] +#version 460 +#extension GL_EXT_ray_tracing : require + +struct RayPayload { + vec3 color; + vec4 direction; +}; + +layout(binding = 0, set = 0) uniform accelerationStructureEXT g_bvh; +layout(binding = 1, set = 0, rgba32f) uniform image2D g_dst; + +layout(set = 0, binding = 2) uniform Properties +{ + uint samples; + uint bounces; +} prop; + +layout(location = 14) rayPayloadEXT RayPayload g_ray; + +void main() { + vec3 origin; + origin.x = gl_LaunchIDEXT.x; + origin.y = gl_LaunchIDEXT.y; + origin.z = 0; + + vec3 pixelColor = vec3(0.0); + for (uint s = 0; s < prop.samples; s++) { + vec3 direction = vec3(1,0,0); + vec3 color = vec3(1.0); + uint bounces = prop.bounces; + + for (uint b = 0; b <= bounces; b++) { + traceRayEXT(g_bvh, /* ray flags */ 0, /* cull mask */ 0xff, + /* sbt offset */ 0, /* sbt stride */ 1, /* miss index */ 0, + origin.xyz, /* tmin */ 0.0, /* direction */ direction, + /* tmax */ 48.0, /* payload location */ 14); + + color *= g_ray.color; + + if (g_ray.direction.w <= 0.0) + break; + origin += direction * g_ray.direction.w; + direction = g_ray.direction.xyz; + } + pixelColor += color; + } + imageStore(g_dst, ivec2(gl_LaunchIDEXT.xy), vec4(pixelColor, 0)); +} + +[rgenInfo] +entryPoint = main + + +[ResourceMapping] +userDataNode[0].visibility = 0xffffffff +userDataNode[0].type = DescriptorTableVaPtr +userDataNode[0].offsetInDwords = 0 +userDataNode[0].sizeInDwords = 1 +userDataNode[0].next[0].type = DescriptorConstBuffer +userDataNode[0].next[0].offsetInDwords = 0 +userDataNode[0].next[0].sizeInDwords = 4 +userDataNode[0].next[0].set = 0x00000000 +userDataNode[0].next[0].binding = 0 +userDataNode[0].next[1].type = DescriptorImage +userDataNode[0].next[1].offsetInDwords = 4 +userDataNode[0].next[1].sizeInDwords = 8 +userDataNode[0].next[1].set = 0x00000000 +userDataNode[0].next[1].binding = 1 + +userDataNode[0].next[2].type = DescriptorConstBuffer +userDataNode[0].next[2].offsetInDwords = 8 +userDataNode[0].next[2].sizeInDwords = 4 +userDataNode[0].next[2].set = 0x00000000 +userDataNode[0].next[2].binding = 2 + +userDataNode[1].visibility = 0xffffffff +userDataNode[1].type = DescriptorTableVaPtr +userDataNode[1].offsetInDwords = 1 +userDataNode[1].sizeInDwords = 1 +userDataNode[1].next[0].type = DescriptorConstBufferCompact +userDataNode[1].next[0].offsetInDwords = 0 +userDataNode[1].next[0].sizeInDwords = 2 +userDataNode[1].next[0].set = 0x0000005D +userDataNode[1].next[0].binding = 17 +userDataNode[1].next[1].type = DescriptorConstBuffer +userDataNode[1].next[1].offsetInDwords = 2 +userDataNode[1].next[1].sizeInDwords = 4 +userDataNode[1].next[1].set = 0x0000005D +userDataNode[1].next[1].binding = 0 +userDataNode[1].next[2].type = DescriptorBuffer +userDataNode[1].next[2].offsetInDwords = 6 +userDataNode[1].next[2].sizeInDwords = 4 +userDataNode[1].next[2].set = 0x0000005D +userDataNode[1].next[2].binding = 1 + +[RayTracingPipelineState] +groups[0].type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR +groups[0].generalShader = 0 +groups[0].closestHitShader = -1 +groups[0].anyHitShader = -1 +groups[0].intersectionShader = -1 +groups[1].type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR +groups[1].closestHitShader = 1 +maxRecursionDepth = 1 +indirectStageMask = 0xffffffff +mode = 3 +rtState.bvhResDescSize = 4 +rtState.bvhResDesc[0] = 0 +rtState.bvhResDesc[1] = 2197815296 +rtState.bvhResDesc[2] = 4294967295 +rtState.bvhResDesc[3] = 2164261887 +rtState.nodeStrideShift = 7 +rtState.threadGroupSizeX = 8 +rtState.threadGroupSizeY = 4 +rtState.threadGroupSizeZ = 1 +rtState.rayQueryCsSwizzle = 1 +rtState.ldsStackSize = 16 +rtState.dispatchRaysThreadGroupSize = 32 +rtState.ldsSizePerThreadGroup = 65536 +rtState.outerTileSize = 4 +rtState.dispatchDimSwizzleMode = 0 +rtState.enableDispatchRaysInnerSwizzle = 1 +rtState.enableDispatchRaysOuterSwizzle = 1 +rtState.enableOptimalLdsStackSizeForIndirect = 1 +rtState.enableOptimalLdsStackSizeForUnified = 1 +payloadSizeMaxInLib = 12 +attributeSizeMaxInLib = 8 +hasPipelineLibrary = 1 diff --git a/llpc/test/shaderdb/ray_tracing/PipelineRays_SetContinuationsCompileUnified.pipe b/llpc/test/shaderdb/ray_tracing/PipelineRays_SetContinuationsCompileUnified.pipe new file mode 100644 index 0000000000..53a1af50cf --- /dev/null +++ b/llpc/test/shaderdb/ray_tracing/PipelineRays_SetContinuationsCompileUnified.pipe @@ -0,0 +1,132 @@ +; Check that the ray tracing continuations mode option is set but the pipeline is still compiled in unified mode. +; This ensures pipeline will not get into indirect mode unexpectedly when it can be unified. + +; RUN: amdllpc -gfxip 11.0 -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK %s + +; CHECK-NOT: call void {{.*}} @llvm.amdgcn.cs.chain. +; CHECK: call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray. + +[Version] +version = 69 + +[rgenGlsl] +#version 460 +#extension GL_EXT_ray_tracing : require + +struct RayPayload { + vec3 color; +}; + +layout(binding = 0, set = 0) uniform accelerationStructureEXT g_bvh; +layout(binding = 1, set = 0, rgba32f) uniform image2D g_dst; + +layout(location = 14) rayPayloadEXT RayPayload g_ray; + +void main() { + vec3 origin; + origin.x = gl_LaunchIDEXT.x; + origin.y = gl_LaunchIDEXT.y; + origin.z = 0; + + traceRayEXT(g_bvh, /* ray flags */ 0, /* cull mask */ 0xff, + /* sbt offset */ 0, /* sbt stride */ 1, /* miss index */ 0, + origin.xyz, /* tmin */ 0.0, /* direction */ vec3(1, 0, 0), + /* tmax */ 48.0, /* payload location */ 14); + + imageStore(g_dst, ivec2(gl_LaunchIDEXT.xy), vec4(g_ray.color, 0)); +} + +[rgenInfo] +entryPoint = main + +[chitGlsl] +#version 460 +#extension GL_EXT_ray_tracing : require + +struct RayPayload { + vec3 color; +}; + +layout(shaderRecordEXT, std430) buffer sbt { + float z; +}; + +hitAttributeEXT vec2 g_hit; +rayPayloadInEXT RayPayload g_ray; + +void main() { + g_ray.color.xy = g_hit; + g_ray.color.z = z; +} + +[chitInfo] +entryPoint = main + +[ResourceMapping] +userDataNode[0].visibility = 0xffffffff +userDataNode[0].type = DescriptorTableVaPtr +userDataNode[0].offsetInDwords = 0 +userDataNode[0].sizeInDwords = 1 +userDataNode[0].next[0].type = DescriptorConstBuffer +userDataNode[0].next[0].offsetInDwords = 0 +userDataNode[0].next[0].sizeInDwords = 4 +userDataNode[0].next[0].set = 0x00000000 +userDataNode[0].next[0].binding = 0 +userDataNode[0].next[1].type = DescriptorImage +userDataNode[0].next[1].offsetInDwords = 4 +userDataNode[0].next[1].sizeInDwords = 8 +userDataNode[0].next[1].set = 0x00000000 +userDataNode[0].next[1].binding = 1 +userDataNode[1].visibility = 0xffffffff +userDataNode[1].type = DescriptorTableVaPtr +userDataNode[1].offsetInDwords = 1 +userDataNode[1].sizeInDwords = 1 +userDataNode[1].next[0].type = DescriptorConstBufferCompact +userDataNode[1].next[0].offsetInDwords = 0 +userDataNode[1].next[0].sizeInDwords = 2 +userDataNode[1].next[0].set = 0x0000005D +userDataNode[1].next[0].binding = 17 +userDataNode[1].next[1].type = DescriptorConstBuffer +userDataNode[1].next[1].offsetInDwords = 2 +userDataNode[1].next[1].sizeInDwords = 4 +userDataNode[1].next[1].set = 0x0000005D +userDataNode[1].next[1].binding = 0 +userDataNode[1].next[2].type = DescriptorBuffer +userDataNode[1].next[2].offsetInDwords = 6 +userDataNode[1].next[2].sizeInDwords = 4 +userDataNode[1].next[2].set = 0x0000005D +userDataNode[1].next[2].binding = 1 + +[RayTracingPipelineState] +groups[0].type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR +groups[0].generalShader = 0 +groups[0].closestHitShader = -1 +groups[0].anyHitShader = -1 +groups[0].intersectionShader = -1 +groups[1].type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR +groups[1].closestHitShader = 1 +maxRecursionDepth = 1 +indirectStageMask = 0x0 +mode = 3 +rtState.bvhResDescSize = 4 +rtState.bvhResDesc[0] = 0 +rtState.bvhResDesc[1] = 2197815296 +rtState.bvhResDesc[2] = 4294967295 +rtState.bvhResDesc[3] = 2164261887 +rtState.nodeStrideShift = 7 +rtState.threadGroupSizeX = 8 +rtState.threadGroupSizeY = 4 +rtState.threadGroupSizeZ = 1 +rtState.rayQueryCsSwizzle = 1 +rtState.ldsStackSize = 16 +rtState.dispatchRaysThreadGroupSize = 32 +rtState.ldsSizePerThreadGroup = 65536 +rtState.outerTileSize = 4 +rtState.dispatchDimSwizzleMode = 0 +rtState.enableDispatchRaysInnerSwizzle = 1 +rtState.enableDispatchRaysOuterSwizzle = 1 +rtState.enableOptimalLdsStackSizeForIndirect = 1 +rtState.enableOptimalLdsStackSizeForUnified = 1 +payloadSizeMaxInLib = 12 +attributeSizeMaxInLib = 8 +hasPipelineLibrary = 0 diff --git a/llpc/test/shaderdb/ray_tracing/PipelineRays_TestLaunchKernel.pipe b/llpc/test/shaderdb/ray_tracing/PipelineRays_TestLaunchKernel.pipe index 7d6b67f683..f7919eced8 100644 --- a/llpc/test/shaderdb/ray_tracing/PipelineRays_TestLaunchKernel.pipe +++ b/llpc/test/shaderdb/ray_tracing/PipelineRays_TestLaunchKernel.pipe @@ -1,4 +1,4 @@ -; RUN: amdllpc -gfxip 11.0 -o - -print-after=prepare-continuations -gpurt-use-dumped=true -llpc-raytracing-mode=continuations -emit-lgc %s | FileCheck -check-prefixes=CHECK %s +; RUN: amdllpc -gfxip 11.0 -o - -print-after=prepare-continuations -llpc-raytracing-mode=continuations -emit-lgc %s | FileCheck -check-prefixes=CHECK %s [Version] version = 70 @@ -160,7 +160,6 @@ rtState.traceRayWaveDensityThreshold[10] = 1 rtState.traceRayWaveDensityThreshold[11] = 1 rtState.traceRayWaveDensityThreshold[12] = 1 rtState.gpurtFeatureFlags = 0 -rtState.gpurtShaderLibrary = Shader_0x6E9B9DD1ADBD5A1D.spv rtState.gpurtFuncTable.pFunc[0] = TraceRay2_0 rtState.gpurtFuncTable.pFunc[1] = TraceRayInline2_0 rtState.gpurtFuncTable.pFunc[2] = TraceRayUsingHitToken2_0 @@ -184,5 +183,5 @@ pipelineLibStageMask = 0 ; CHECK-LABEL: ; ModuleID = 'lgcPipeline' ; CHECK-NEXT: source_filename = "main" -; CHECK: define dllexport void @lgc.shader.CS.main() !lgc.shaderstage !{{[0-9]+}} !lgc.rt.shaderstage !{{[0-9]+}} { +; CHECK: define dllexport void @lgc.shader.CS.main() !lgc.rt.shaderstage !{{[0-9]+}} !lgc.shaderstage !{{[0-9]+}} { ; CHECK: call void (...) @lgc.cps.jump( diff --git a/llpc/test/shaderdb/ray_tracing/PipelineRays_TestLibraryNoTraversal.pipe b/llpc/test/shaderdb/ray_tracing/PipelineRays_TestLibraryNoTraversal.pipe new file mode 100644 index 0000000000..29ff8c2c43 --- /dev/null +++ b/llpc/test/shaderdb/ray_tracing/PipelineRays_TestLibraryNoTraversal.pipe @@ -0,0 +1,193 @@ +; Check that traversal module is not compiled when compiling a pipeline library even it calls TraceRay + +; RUN: amdllpc -gfxip 11.0 -o - -emit-lgc %s | FileCheck -check-prefixes=CHECK %s + +; CHECK-NOT: call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray. + +[Version] +version = 70 + +[rgenGlsl] +#version 460 +#extension GL_EXT_ray_tracing : enable + +layout(binding = 0, set = 0) uniform accelerationStructureEXT g_bvh; +layout(location = 0) rayPayloadEXT vec3 g_ray; + +void main() +{ + traceRayEXT(g_bvh, /* ray flags */ 0, /* cull mask */ 0xff, + /* sbt offset */ 0, /* sbt stride */ 1, /* miss index */ 0, + /* origin */ vec3(0, 0, 0), /* tmin */ 0.0, /* direction */ vec3(1, 0, 0), + /* tmax */ 48.0, /* payload location */ 0); +} + +[rgenInfo] +entryPoint = main + +[ResourceMapping] +userDataNode[0].visibility = 16128 +userDataNode[0].type = DescriptorTableVaPtr +userDataNode[0].offsetInDwords = 8 +userDataNode[0].sizeInDwords = 1 +userDataNode[0].next[0].type = DescriptorConstBufferCompact +userDataNode[0].next[0].offsetInDwords = 0 +userDataNode[0].next[0].sizeInDwords = 2 +userDataNode[0].next[0].set = 0x0000005D +userDataNode[0].next[0].binding = 17 +userDataNode[0].next[0].strideInDwords = 0 +userDataNode[0].next[1].type = DescriptorConstBuffer +userDataNode[0].next[1].offsetInDwords = 2 +userDataNode[0].next[1].sizeInDwords = 4 +userDataNode[0].next[1].set = 0x0000005D +userDataNode[0].next[1].binding = 0 +userDataNode[0].next[1].strideInDwords = 0 +userDataNode[0].next[2].type = DescriptorBuffer +userDataNode[0].next[2].offsetInDwords = 6 +userDataNode[0].next[2].sizeInDwords = 4 +userDataNode[0].next[2].set = 0x0000005D +userDataNode[0].next[2].binding = 1 +userDataNode[0].next[2].strideInDwords = 0 +userDataNode[1].visibility = 2 +userDataNode[1].type = StreamOutTableVaPtr +userDataNode[1].offsetInDwords = 3 +userDataNode[1].sizeInDwords = 1 +userDataNode[2].visibility = 16128 +userDataNode[2].type = DescriptorTableVaPtr +userDataNode[2].offsetInDwords = 7 +userDataNode[2].sizeInDwords = 1 +userDataNode[2].next[0].type = DescriptorImage +userDataNode[2].next[0].offsetInDwords = 0 +userDataNode[2].next[0].sizeInDwords = 8 +userDataNode[2].next[0].set = 0x00000000 +userDataNode[2].next[0].binding = 0 +userDataNode[2].next[0].strideInDwords = 0 +userDataNode[2].next[1].type = DescriptorConstBuffer +userDataNode[2].next[1].offsetInDwords = 8 +userDataNode[2].next[1].sizeInDwords = 4 +userDataNode[2].next[1].set = 0x00000000 +userDataNode[2].next[1].binding = 1 +userDataNode[2].next[1].strideInDwords = 0 + +[RayTracingPipelineState] +deviceIndex = 0 +options.includeDisassembly = 0 +options.scalarBlockLayout = 1 +options.resourceLayoutScheme = Compact +options.includeIr = 0 +options.robustBufferAccess = 0 +options.reconfigWorkgroupLayout = 0 +options.forceCsThreadIdSwizzling = 0 +options.overrideThreadGroupSizeX = 0 +options.overrideThreadGroupSizeY = 0 +options.overrideThreadGroupSizeZ = 0 +options.shadowDescriptorTableUsage = Disable +options.shadowDescriptorTablePtrHigh = 0 +options.extendedRobustness.robustBufferAccess = 0 +options.extendedRobustness.robustImageAccess = 1 +options.extendedRobustness.nullDescriptor = 0 +options.optimizeTessFactor = 1 +options.optimizationLevel = 2 +options.threadGroupSwizzleMode = Default +options.reverseThreadGroup = 0 +options.enableImplicitInvariantExports = 1 +options.internalRtShaders = 0 +options.forceNonUniformResourceIndexStageMask = 0 +options.replaceSetWithResourceType = 0 +options.disableSampleMask = 0 +options.buildResourcesDataForShaderModule = 0 +options.disableTruncCoordForGather = 1 +options.enableCombinedTexture = 0 +options.vertex64BitsAttribSingleLoc = 0 +options.enableFragColor = 0 +options.disableBaseVertex = 0 +options.enablePrimGeneratedQuery = 0 +options.disablePerCompFetch = 0 +groups[0].type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR +groups[0].generalShader = 0 +groups[0].closestHitShader = -1 +groups[0].anyHitShader = -1 +groups[0].intersectionShader = -1 +groups[1].type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR +groups[1].generalShader = 3 +groups[1].closestHitShader = -1 +groups[1].anyHitShader = -1 +groups[1].intersectionShader = -1 +groups[2].type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR +groups[2].generalShader = -1 +groups[2].closestHitShader = 2 +groups[2].anyHitShader = 1 +groups[2].intersectionShader = -1 +maxRecursionDepth = 1 +indirectStageMask = 4294967295 +libraryMode = 2 +mode = 1 +rtState.bvhResDescSize = 4 +rtState.bvhResDesc[0] = 0 +rtState.bvhResDesc[1] = 2197815296 +rtState.bvhResDesc[2] = 4294967295 +rtState.bvhResDesc[3] = 2172650495 +rtState.nodeStrideShift = 7 +rtState.staticPipelineFlags = 512 +rtState.triCompressMode = 3 +rtState.pipelineFlags = 8192 +rtState.threadGroupSizeX = 8 +rtState.threadGroupSizeY = 4 +rtState.threadGroupSizeZ = 1 +rtState.boxSortHeuristicMode = 5 +rtState.counterMode = 0 +rtState.counterMask = 0 +rtState.rayQueryCsSwizzle = 1 +rtState.ldsStackSize = 16 +rtState.dispatchRaysThreadGroupSize = 32 +rtState.ldsSizePerThreadGroup = 65536 +rtState.outerTileSize = 4 +rtState.dispatchDimSwizzleMode = 0 +rtState.exportConfig.indirectCallingConvention = 1 +rtState.exportConfig.indirectCalleeSavedRegs.raygen = 2 +rtState.exportConfig.indirectCalleeSavedRegs.miss = 40 +rtState.exportConfig.indirectCalleeSavedRegs.closestHit = 50 +rtState.exportConfig.indirectCalleeSavedRegs.anyHit = 75 +rtState.exportConfig.indirectCalleeSavedRegs.intersection = 75 +rtState.exportConfig.indirectCalleeSavedRegs.callable = 28 +rtState.exportConfig.indirectCalleeSavedRegs.traceRays = 28 +rtState.exportConfig.enableUniformNoReturn = 1 +rtState.exportConfig.enableTraceRayArgsInLds = 0 +rtState.exportConfig.readsDispatchRaysIndex = 0 +rtState.exportConfig.enableDynamicLaunch = 0 +rtState.exportConfig.emitRaytracingShaderDataToken = 0 +rtState.enableRayQueryCsSwizzle = 0 +rtState.enableDispatchRaysInnerSwizzle = 1 +rtState.enableDispatchRaysOuterSwizzle = 1 +rtState.forceInvalidAccelStruct = 0 +rtState.enableRayTracingCounters = 0 +rtState.enableRayTracingHwTraversalStack = 1 +rtState.enableOptimalLdsStackSizeForIndirect = 1 +rtState.enableOptimalLdsStackSizeForUnified = 1 +rtState.maxRayLength = 0 +rtState.enablePickClosestLaneResultForAbortRays = 0 +rtState.traceRayWaveDensityThreshold[8] = 1 +rtState.traceRayWaveDensityThreshold[10] = 1 +rtState.traceRayWaveDensityThreshold[11] = 1 +rtState.traceRayWaveDensityThreshold[12] = 1 +rtState.gpurtFeatureFlags = 0 +rtState.gpurtFuncTable.pFunc[0] = TraceRay2_0 +rtState.gpurtFuncTable.pFunc[1] = TraceRayInline2_0 +rtState.gpurtFuncTable.pFunc[2] = TraceRayUsingHitToken2_0 +rtState.gpurtFuncTable.pFunc[3] = RayQueryProceed2_0 +rtState.gpurtFuncTable.pFunc[4] = GetInstanceIndex +rtState.gpurtFuncTable.pFunc[5] = GetInstanceID +rtState.gpurtFuncTable.pFunc[6] = GetObjectToWorldTransform +rtState.gpurtFuncTable.pFunc[7] = GetWorldToObjectTransform +rtState.gpurtFuncTable.pFunc[8] = GetRayQuery64BitInstanceNodePtr +rtState.gpurtFuncTable.pFunc[9] = TraceLongRayAMD2_0 +rtState.gpurtFuncTable.pFunc[10] = LongRayQueryProceedAMD2_0 +rtState.gpurtFuncTable.pFunc[11] = FetchTrianglePositionFromNodePointer +rtState.gpurtFuncTable.pFunc[12] = FetchTrianglePositionFromRayQuery +rtState.rtIpVersion = 2.0 +rtState.gpurtOverride = 0 +rtState.rtIpOverride = 0 +payloadSizeMaxInLib = 0 +attributeSizeMaxInLib = 0 +hasPipelineLibrary = 0 +pipelineLibStageMask = 0 diff --git a/llpc/test/shaderdb/ray_tracing/PipelineRays_TestStaticCompile.pipe b/llpc/test/shaderdb/ray_tracing/PipelineRays_TestStaticCompile.pipe index f2564787ea..488e6ac10a 100644 --- a/llpc/test/shaderdb/ray_tracing/PipelineRays_TestStaticCompile.pipe +++ b/llpc/test/shaderdb/ray_tracing/PipelineRays_TestStaticCompile.pipe @@ -11,8 +11,28 @@ [Version] version = 53 -[rgenSpvFile] -fileName = Shader_0xF10BE264545A9F4B.spv +[rgenGlsl] +#version 460 +#extension GL_EXT_ray_tracing : enable + +layout(binding = 0, set = 0) uniform accelerationStructureEXT g_bvh; +layout(binding = 1, set = 0, rgba32f) uniform image2D g_dst; + +layout(location = 0) rayPayloadEXT vec3 g_ray; + +void main() { + vec3 origin; + origin.x = gl_LaunchIDEXT.x; + origin.y = gl_LaunchIDEXT.y; + origin.z = 0; + + traceRayEXT(g_bvh, /* ray flags */ 0, /* cull mask */ 0xff, + /* sbt offset */ 0, /* sbt stride */ 1, /* miss index */ 0, + origin.xyz, /* tmin */ 0.0, /* direction */ vec3(1, 0, 0), + /* tmax */ 48.0, /* payload location */ 0); + + imageStore(g_dst, ivec2(gl_LaunchIDEXT.xy), vec4(g_ray, 0)); +} [rgenInfo] entryPoint = main @@ -41,8 +61,14 @@ options.disableLicmThreshold = 0 options.unrollHintThreshold = 0 options.dontUnrollHintThreshold = 0 -[missSpvFile] -fileName = Shader_0xE7D0EF376FDDEA56.spv +[missGlsl] +#version 460 +#extension GL_EXT_ray_tracing : enable + +void main() +{ +} + [missInfo] entryPoint = main @@ -71,38 +97,16 @@ options.disableLicmThreshold = 0 options.unrollHintThreshold = 0 options.dontUnrollHintThreshold = 0 -[chitSpvFile] -fileName = Shader_0x04BDF3B158225AFA.spv +[chitGlsl] +#version 460 +#extension GL_EXT_ray_tracing : enable -[chitInfo] -entryPoint = main -options.trapPresent = 0 -options.debugMode = 0 -options.enablePerformanceData = 0 -options.allowReZ = 0 -options.vgprLimit = 0 -options.sgprLimit = 0 -options.maxThreadGroupsPerComputeUnit = 0 -options.waveSize = 0 -options.wgpMode = 0 -options.waveBreakSize = None -options.forceLoopUnrollCount = 0 -options.useSiScheduler = 0 -options.updateDescInElf = 0 -options.allowVaryWaveSize = 0 -options.enableLoadScalarizer = 0 -options.disableLicm = 0 -options.unrollThreshold = 0 -options.scalarThreshold = 0 -options.disableLoopUnroll = 0 -options.fp32DenormalMode = Auto -options.adjustDepthImportVrs = 0 -options.disableLicmThreshold = 0 -options.unrollHintThreshold = 0 -options.dontUnrollHintThreshold = 0 +layout(location = 0) rayPayloadInEXT vec3 hitValue; -[chitSpvFile] -fileName = Shader_0x89ABD17F2942EFC4.spv +void main() +{ + hitValue = vec3(3, 4, 5); +} [chitInfo] entryPoint = main @@ -131,8 +135,14 @@ options.disableLicmThreshold = 0 options.unrollHintThreshold = 0 options.dontUnrollHintThreshold = 0 -[sectSpvFile] -fileName = Shader_0xAD98DA756C353EFE.spv +[sectGlsl] +#version 460 +#extension GL_EXT_ray_tracing : enable + +void main() +{ + reportIntersectionEXT(0.5, 0u); +} [sectInfo] entryPoint = main diff --git a/llpc/test/shaderdb/ray_tracing/Shader_0x04BDF3B158225AFA.spv b/llpc/test/shaderdb/ray_tracing/Shader_0x04BDF3B158225AFA.spv deleted file mode 100644 index a613112bcc..0000000000 Binary files a/llpc/test/shaderdb/ray_tracing/Shader_0x04BDF3B158225AFA.spv and /dev/null differ diff --git a/llpc/test/shaderdb/ray_tracing/Shader_0x6E9B9DD1ADBD5A1D.spv b/llpc/test/shaderdb/ray_tracing/Shader_0x6E9B9DD1ADBD5A1D.spv deleted file mode 100644 index c926dc0d1b..0000000000 Binary files a/llpc/test/shaderdb/ray_tracing/Shader_0x6E9B9DD1ADBD5A1D.spv and /dev/null differ diff --git a/llpc/test/shaderdb/ray_tracing/Shader_0x89ABD17F2942EFC4.spv b/llpc/test/shaderdb/ray_tracing/Shader_0x89ABD17F2942EFC4.spv deleted file mode 100644 index 257274f45d..0000000000 Binary files a/llpc/test/shaderdb/ray_tracing/Shader_0x89ABD17F2942EFC4.spv and /dev/null differ diff --git a/llpc/test/shaderdb/ray_tracing/Shader_0xAD98DA756C353EFE.spv b/llpc/test/shaderdb/ray_tracing/Shader_0xAD98DA756C353EFE.spv deleted file mode 100644 index 24b646de82..0000000000 Binary files a/llpc/test/shaderdb/ray_tracing/Shader_0xAD98DA756C353EFE.spv and /dev/null differ diff --git a/llpc/test/shaderdb/ray_tracing/Shader_0xE7D0EF376FDDEA56.spv b/llpc/test/shaderdb/ray_tracing/Shader_0xE7D0EF376FDDEA56.spv deleted file mode 100644 index 2d8e25cc90..0000000000 Binary files a/llpc/test/shaderdb/ray_tracing/Shader_0xE7D0EF376FDDEA56.spv and /dev/null differ diff --git a/llpc/test/shaderdb/ray_tracing/Shader_0xF10BE264545A9F4B.spv b/llpc/test/shaderdb/ray_tracing/Shader_0xF10BE264545A9F4B.spv deleted file mode 100644 index d2bce9f95b..0000000000 Binary files a/llpc/test/shaderdb/ray_tracing/Shader_0xF10BE264545A9F4B.spv and /dev/null differ diff --git a/llpc/test/shaderdb/ray_tracing/TestKnownRayFlags.rgen b/llpc/test/shaderdb/ray_tracing/TestKnownRayFlags.rgen new file mode 100644 index 0000000000..b817bba209 --- /dev/null +++ b/llpc/test/shaderdb/ray_tracing/TestKnownRayFlags.rgen @@ -0,0 +1,30 @@ +// BEGIN_SHADERTEST +/* +; RUN: amdllpc --print-after=lgc-lower-gpurt %gfxip 2>&1 %s | FileCheck -check-prefix=SHADERTEST %s +; Check whether ray flags are replaced by known values +; SHADERTEST: %{{.*}} = xor i32 -171, -1 +; SHADERTEST: %{{.*}} = or i32 %{{.*}}, 170 +*/ +// END_SHADERTEST + +#version 460 +#extension GL_EXT_ray_tracing : require + +struct RayPayload { + vec3 color; +}; + +layout(binding = 0, set = 0) uniform accelerationStructureEXT g_bvh; +layout(location = 0) rayPayloadEXT RayPayload g_ray; + +void main() { + vec3 origin; + origin.x = gl_LaunchIDEXT.x; + origin.y = gl_LaunchIDEXT.y; + origin.z = 0; + + traceRayEXT(g_bvh, /* ray flags */ 0xAA, /* cull mask */ 0xff, + /* sbt offset */ 0, /* sbt stride */ 1, /* miss index */ 0, + origin.xyz, /* tmin */ 0.0, /* direction */ vec3(1, 0, 0), + /* tmax */ 48.0, /* payload location */ 0); +} diff --git a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_EnableColorExport.pipe b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_EnableColorExport.pipe index b08b250318..a0dde93d4d 100644 --- a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_EnableColorExport.pipe +++ b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_EnableColorExport.pipe @@ -74,7 +74,7 @@ attribute[0].offset = 0 ; SHADERTEST-LABEL: @lgc.shader.VS.main( ; SHADERTEST-NEXT: .entry: ; SHADERTEST-NEXT: [[TMP0:%.*]] = call <2 x float> @lgc.input.import.generic__v2f32(i1 false, i32 0, i32 0, i32 0, i32 poison) -; SHADERTEST-NEXT: call void @lgc.output.export.generic.i32.i32.v2f32(i32 0, i32 0, <2 x float> [[TMP0]]) #[[ATTR1:[0-9]+]] +; SHADERTEST-NEXT: call void @lgc.output.export.generic.i32.i32.v2f32(i32 0, i32 0, <2 x float> [[TMP0]]) #[[ATTR2:[0-9]+]] ; SHADERTEST-NEXT: ret void ; ; @@ -85,38 +85,28 @@ attribute[0].offset = 0 ; SHADERTEST-NEXT: [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <2 x i32> ; SHADERTEST-NEXT: [[TMP2:%.*]] = call i64 @llvm.amdgcn.s.getpc() ; SHADERTEST-NEXT: [[TMP3:%.*]] = bitcast i64 [[TMP2]] to <2 x i32> -; SHADERTEST-NEXT: [[INTERPPERSPCENTER:%.*]] = call <2 x float> @lgc.input.import.builtin.InterpPerspCenter.v2f32.i32(i32 268435457) #[[ATTR5:[0-9]+]] +; SHADERTEST-NEXT: [[INTERPPERSPCENTER:%.*]] = call <2 x float> @lgc.input.import.builtin.InterpPerspCenter.v2f32.i32(i32 268435457) #[[ATTR4:[0-9]+]] ; SHADERTEST-NEXT: [[TMP4:%.*]] = call <2 x float> (...) @lgc.input.import.interpolated__v2f32(i1 false, i32 0, i32 0, i32 0, i32 poison, i32 0, <2 x float> [[INTERPPERSPCENTER]]) ; SHADERTEST-NEXT: [[TMP5:%.*]] = call i32 @lgc.load.user.data__i32(i32 44) ; SHADERTEST-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP5]], i64 0 ; SHADERTEST-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to i64 ; SHADERTEST-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr addrspace(4) +; SHADERTEST-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr addrspace(4) [[TMP8]], i32 4), "dereferenceable"(ptr addrspace(4) [[TMP8]], i32 -1) ] ; SHADERTEST-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP8]], i32 0 ; SHADERTEST-NEXT: [[TMP10:%.*]] = call i32 @lgc.load.user.data__i32(i32 44) ; SHADERTEST-NEXT: [[TMP11:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP10]], i64 0 ; SHADERTEST-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP11]] to i64 ; SHADERTEST-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(4) +; SHADERTEST-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr addrspace(4) [[TMP13]], i32 4), "dereferenceable"(ptr addrspace(4) [[TMP13]], i32 -1) ] ; SHADERTEST-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP13]], i32 0 ; SHADERTEST-NEXT: [[TMP15:%.*]] = fptosi <2 x float> [[TMP4]] to <2 x i32> ; SHADERTEST-NEXT: [[TMP16:%.*]] = sitofp <2 x i32> [[TMP15]] to <2 x float> -; SHADERTEST-NEXT: [[TMP17:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP14]], align 32, !invariant.load !11 -; SHADERTEST-NEXT: [[TMP18:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP9]], align 16, !invariant.load !11 +; SHADERTEST-NEXT: [[TMP17:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP14]], align 4, !invariant.load !11 +; SHADERTEST-NEXT: [[TMP18:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP9]], align 4, !invariant.load !11 ; SHADERTEST-NEXT: [[TMP19:%.*]] = extractelement <2 x float> [[TMP16]], i64 0 ; SHADERTEST-NEXT: [[TMP20:%.*]] = extractelement <2 x float> [[TMP16]], i64 1 -; SHADERTEST-NEXT: [[TMP21:%.*]] = extractelement <4 x i32> [[TMP18]], i64 0 -; SHADERTEST-NEXT: [[TMP22:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP21]]) -; SHADERTEST-NEXT: [[TMP23:%.*]] = insertelement <4 x i32> poison, i32 [[TMP22]], i64 0 -; SHADERTEST-NEXT: [[TMP24:%.*]] = extractelement <4 x i32> [[TMP18]], i64 1 -; SHADERTEST-NEXT: [[TMP25:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP24]]) -; SHADERTEST-NEXT: [[TMP26:%.*]] = insertelement <4 x i32> [[TMP23]], i32 [[TMP25]], i64 1 -; SHADERTEST-NEXT: [[TMP27:%.*]] = extractelement <4 x i32> [[TMP18]], i64 2 -; SHADERTEST-NEXT: [[TMP28:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP27]]) -; SHADERTEST-NEXT: [[TMP29:%.*]] = insertelement <4 x i32> [[TMP26]], i32 [[TMP28]], i64 2 -; SHADERTEST-NEXT: [[TMP30:%.*]] = extractelement <4 x i32> [[TMP18]], i64 3 -; SHADERTEST-NEXT: [[TMP31:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP30]]) -; SHADERTEST-NEXT: [[TMP32:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP31]], i64 3 -; SHADERTEST-NEXT: [[TMP33:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float [[TMP19]], float [[TMP20]], <8 x i32> [[TMP17]], <4 x i32> [[TMP32]], i1 false, i32 0, i32 0) -; SHADERTEST-NEXT: call void @lgc.output.export.generic.i32.i32.v4f32(i32 0, i32 0, <4 x float> [[TMP33]]) #[[ATTR6:[0-9]+]] +; SHADERTEST-NEXT: [[TMP21:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float [[TMP19]], float [[TMP20]], <8 x i32> [[TMP17]], <4 x i32> [[TMP18]], i1 false, i32 0, i32 0) +; SHADERTEST-NEXT: call void @lgc.output.export.generic.i32.i32.v4f32(i32 0, i32 0, <4 x float> [[TMP21]]) #[[ATTR5:[0-9]+]] ; SHADERTEST-NEXT: ret void ; ; @@ -126,7 +116,7 @@ attribute[0].offset = 0 ; SHADERTEST-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP0]], i64 1 ; SHADERTEST-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP0]], i64 2 ; SHADERTEST-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP0]], i64 3 -; SHADERTEST-NEXT: call void @llvm.amdgcn.exp.f32(i32 immarg 0, i32 immarg 15, float [[TMP3]], float [[TMP4]], float [[TMP5]], float [[TMP6]], i1 immarg true, i1 immarg true) #[[ATTR1]] +; SHADERTEST-NEXT: call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float [[TMP3]], float [[TMP4]], float [[TMP5]], float [[TMP6]], i1 true, i1 true) ; SHADERTEST-NEXT: call void @llvm.amdgcn.endpgm() ; SHADERTEST-NEXT: unreachable ; diff --git a/llpc/tool/llpcCompilationUtils.cpp b/llpc/tool/llpcCompilationUtils.cpp index 01a62c99ce..ae6dbf4a6e 100644 --- a/llpc/tool/llpcCompilationUtils.cpp +++ b/llpc/tool/llpcCompilationUtils.cpp @@ -179,6 +179,10 @@ static void disassembleSpirv(unsigned binSize, const void *code, const llvm::Twi LLPC_OUTS("\nSPIR-V disassembly for " << header << ":\n"); spvDisassembleSpirv(binSize, code, textSize, spvText.data()); LLPC_OUTS(spvText.data() << "\n"); + + // Flush to ensure the SPIR-V is written out before we attempt to parse it. This is helpful when debugging + // SPIRVReader crashes. + llvm::outs().flush(); } // ===================================================================================================================== diff --git a/llpc/tool/llpcShaderCacheWrap.h b/llpc/tool/llpcShaderCacheWrap.h index fc21c8e77f..359aa7a7e9 100644 --- a/llpc/tool/llpcShaderCacheWrap.h +++ b/llpc/tool/llpcShaderCacheWrap.h @@ -51,7 +51,7 @@ class ShaderCacheWrap : public Vkgc::ICache { LLPC_NODISCARD Result GetEntry(Vkgc::HashId hash, bool allocateOnMiss, Vkgc::EntryHandle *pHandle); - LLPC_NODISCARD void ReleaseEntry(Vkgc::RawEntryHandle rawHandle); + void ReleaseEntry(Vkgc::RawEntryHandle rawHandle); LLPC_NODISCARD Result WaitForEntry(Vkgc::RawEntryHandle rawHandle); diff --git a/llpc/translator/lib/SPIRV/SPIRVReader.cpp b/llpc/translator/lib/SPIRV/SPIRVReader.cpp index 1aeb804992..2b5f8eee30 100644 --- a/llpc/translator/lib/SPIRV/SPIRVReader.cpp +++ b/llpc/translator/lib/SPIRV/SPIRVReader.cpp @@ -45,6 +45,7 @@ #include "SPIRVType.h" #include "SPIRVUtil.h" #include "SPIRVValue.h" +#include "compilerutils/TypesMetadata.h" #include "llpcCompiler.h" #include "llpcContext.h" #include "llpcDialect.h" @@ -53,11 +54,13 @@ #include "llvmraytracing/ContinuationsUtil.h" #include "lgc/LgcDialect.h" #include "lgc/LgcRtDialect.h" +#include "lgc/LgcRtqDialect.h" #include "lgc/Pipeline.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/CFG.h" @@ -97,9 +100,9 @@ using namespace llvm; using namespace SPIRV; using namespace Llpc; using namespace lgc::rt; +using namespace lgc::rtq; namespace Llpc { -Type *getRayQueryInternalTy(lgc::Builder *builder); unsigned getTraceRayParamPayloadIdx(void); } // namespace Llpc @@ -314,7 +317,89 @@ uint64_t SPIRVToLLVM::getTypeStoreSize(Type *const t) { return calculatedSize; } +unsigned SPIRVToLLVM::getImageTypeComponents(SPIRVType *t) const { + const auto opcode = t->getOpCode(); + SPIRVTypeImage *spvImageTy = nullptr; + if (opcode == OpTypeImage) + spvImageTy = static_cast(t); + else if (opcode == OpTypeSampledImage) + spvImageTy = static_cast(t)->getImageType(); + else + assert(opcode == OpTypeSampler); + + unsigned components = 0; + + if (spvImageTy) { + components |= ImageComponentImage; + if (spvImageTy->getDescriptor().MS) + components |= ImageComponentFMask; + } + + if (opcode == OpTypeSampledImage || opcode == OpTypeSampler) + components |= ImageComponentSampler; + + return components; +} + +ImageTypeIndices SPIRVToLLVM::getImageTypeIndices(unsigned imageComponents) const { + ImageTypeIndices result; + unsigned idx = 0; + + if (imageComponents & ImageComponentImage) { + result.imagePointer = idx++; + result.imageStride = idx++; + result.imagePlaneStride = idx++; + + if (imageComponents & ImageComponentFMask) { + result.fmaskPointer = idx++; + result.fmaskStride = idx++; + } + } + + if (imageComponents & ImageComponentSampler) { + result.samplerPointer = idx++; + result.samplerStride = idx++; + result.convertingSamplerIdx = idx++; + } + + return result; +} + +Type *SPIRVToLLVM::getImageTy(unsigned imageComponents) const { + assert(imageComponents != 0); + + Type *descPtrTy = getBuilder()->getDescPtrTy(); + Type *i32 = getBuilder()->getInt32Ty(); + + SmallVector types; + if (imageComponents & ImageComponentImage) { + types.push_back(descPtrTy); // pointer + types.push_back(i32); // stride + types.push_back(i32); // planeStride + + if (imageComponents & ImageComponentFMask) { + types.push_back(descPtrTy); // pointer + types.push_back(i32); // stride + } + } + + if (imageComponents & ImageComponentSampler) { + types.push_back(descPtrTy); // pointer + types.push_back(i32); // stride + types.push_back(i32); // convertingSamplerIdx + } + + return StructType::get(*m_context, types); +} + SmallVector SPIRVToLLVM::mapValue(SPIRVValue *bv, ArrayRef values) { +#ifndef NDEBUG + if (bv->hasType()) { + Type *t = transType(bv->getType()); + assert(!values.empty() || t->isVoidTy()); + } +#endif + auto oldValues = m_valueMap.lookup(bv); if (!oldValues.empty()) { if (oldValues[0] == values[0]) { @@ -425,20 +510,20 @@ Type *SPIRVToLLVM::transFPType(SPIRVType *t) { } // ===================================================================================================================== -// Translate an "OpTypeArray". This contains special handling for arrays in interface storage classes which are +// Translate an "OpType{Runtime}Array". This contains special handling for arrays in interface storage classes which are // explicitly laid out and may contain manually placed padding bytes. If the array needs padding, we map an array like // '[length]' -> 'struct { , }[length]'. // // @param spvType : The type. // @param matrixStride : The matrix stride (can be 0). // @param isColumnMajor : Whether the matrix is column major. -// @param isParentPointer : If the parent is a pointer type. // @param layout : The layout mode will be used for the type translation. -template <> -Type *SPIRVToLLVM::transTypeWithOpcode(SPIRVType *const spvType, const unsigned matrixStride, - const bool isColumnMajor, const bool isParentPointer, - LayoutMode layout) { - Type *elementType = transType(spvType->getArrayElementType(), matrixStride, isColumnMajor, isParentPointer, layout); +Type *SPIRVToLLVM::transTypeArray(SPIRVType *const spvType, const unsigned matrixStride, const bool isColumnMajor, + LayoutMode layout) { + const auto opcode = spvType->getOpCode(); + assert(opcode == OpTypeArray || opcode == OpTypeRuntimeArray); + + Type *elementType = transType(spvType->getArrayElementType(), matrixStride, isColumnMajor, layout); SPIRVWord arrayStride = 0; const bool hasArrayStride = spvType->hasDecorate(DecorationArrayStride, 0, &arrayStride); @@ -468,13 +553,14 @@ Type *SPIRVToLLVM::transTypeWithOpcode(SPIRVType *const spvTyp } } - Type *const arrayType = ArrayType::get(elementType, spvType->getArrayLength()); + const SPIRVWord arrayLength = opcode == OpTypeArray ? spvType->getArrayLength() : SPIRVWORD_MAX; + Type *const arrayType = ArrayType::get(elementType, arrayLength); // Setup the replaced array type in case this array is used in default uniform struct: // If the member type could be found in replaced-type map, insert the replaced-type, // If the member type is image type, insert an int8 type. This is used for image array of array SPIRVType *spvElementType = spvType->getArrayElementType(); - SPIRVTypeContext ctxElementType(spvElementType, matrixStride, isColumnMajor, isParentPointer, layout); + SPIRVTypeContext ctxElementType(spvElementType, matrixStride, isColumnMajor, layout); Type *imageElementType = nullptr; auto it = m_imageTypeMap.find(ctxElementType.asTuple()); if (it != m_imageTypeMap.end()) { @@ -485,12 +571,14 @@ Type *SPIRVToLLVM::transTypeWithOpcode(SPIRVType *const spvTyp } if (imageElementType) { - Type *const imageArrayType = ArrayType::get(imageElementType, spvType->getArrayLength()); - SPIRVTypeContext ctxArray(spvType, matrixStride, isColumnMajor, isParentPointer, layout); + Type *const imageArrayType = ArrayType::get(imageElementType, arrayLength); + SPIRVTypeContext ctxArray(spvType, matrixStride, isColumnMajor, layout); m_imageTypeMap[ctxArray.asTuple()] = imageArrayType; } - return paddedArray ? recordTypeWithPad(arrayType) : arrayType; + if (paddedArray) + recordTypeWithPad(arrayType); + return arrayType; } // ===================================================================================================================== @@ -500,13 +588,11 @@ Type *SPIRVToLLVM::transTypeWithOpcode(SPIRVType *const spvTyp // @param spvType : The type. // @param matrixStride : The matrix stride (can be 0). // @param isColumnMajor : Whether the matrix is column major. -// @param isParentPointer : If the parent is a pointer type. // @param layout : The layout mode will be used for the type translation. template <> Type *SPIRVToLLVM::transTypeWithOpcode(SPIRVType *const spvType, const unsigned matrixStride, - const bool isColumnMajor, const bool isParentPointer, - LayoutMode layout) { - if (isParentPointer) + const bool isColumnMajor, LayoutMode layout) { + if (layout != LayoutMode::None) return getBuilder()->getInt32Ty(); return getBuilder()->getInt1Ty(); } @@ -517,38 +603,18 @@ Type *SPIRVToLLVM::transTypeWithOpcode(SPIRVType *const spvType, con // @param spvType : The type. // @param matrixStride : The matrix stride (can be 0). // @param isColumnMajor : Whether the matrix is column major. -// @param isParentPointer : If the parent is a pointer type. // @param layout : The layout mode will be used for the type translation. template <> Type *SPIRVToLLVM::transTypeWithOpcode(SPIRVType *const spvType, const unsigned matrixStride, - const bool isColumnMajor, const bool isParentPointer, - LayoutMode layout) { + const bool isColumnMajor, LayoutMode layout) { SPIRVTypeForwardPointer *const spvForwardPointerType = static_cast(spvType); const SPIRVStorageClassKind storageClass = spvForwardPointerType->getPointerStorageClass(); // Forward pointers must always point to structs. assert(spvForwardPointerType->getPointerElementType()->isTypeStruct()); - // We first have to map the pointed-to-struct to an opaque struct so we can have a forward reference to the struct. - StructType *const pointeeType = StructType::create(*m_context); - - // Then we need to map our forward pointer itself, because the struct we are pointing to could use the pointer. const unsigned addrSpace = SPIRSPIRVAddrSpaceMap::rmap(storageClass); - Type *const type = mapType(spvType, PointerType::get(pointeeType, addrSpace)); - - const bool isBufferBlockPointer = storageClass == StorageClassStorageBuffer || storageClass == StorageClassUniform || - storageClass == StorageClassPushConstant || - storageClass == StorageClassShaderRecordBufferKHR || - storageClass == StorageClassPhysicalStorageBufferEXT; - LayoutMode structLayout = isBufferBlockPointer ? LayoutMode::Explicit : LayoutMode::Native; - - // Finally we translate the struct we are pointing to create it. - StructType *const structType = - cast(transType(spvType->getPointerElementType(), matrixStride, isColumnMajor, true, structLayout)); - - pointeeType->setBody(structType->elements(), structType->isPacked()); - - return type; + return PointerType::get(*m_context, addrSpace); } // ===================================================================================================================== @@ -558,12 +624,10 @@ Type *SPIRVToLLVM::transTypeWithOpcode(SPIRVType *const sp // @param spvType : The type. // @param matrixStride : The matrix stride (can be 0). // @param isColumnMajor : Whether the matrix is column major. -// @param isParentPointer : If the parent is a pointer type. // @param layout : The layout mode will be used for the type translation. template <> Type *SPIRVToLLVM::transTypeWithOpcode(SPIRVType *const spvType, unsigned matrixStride, - const bool isColumnMajor, const bool isParentPointer, - LayoutMode layout) { + const bool isColumnMajor, LayoutMode layout) { const auto spvColumnType = spvType->getMatrixColumnType(); const auto spvElementType = spvColumnType->getVectorComponentType(); const unsigned spvColumnCount = spvType->getMatrixColumnCount(); @@ -571,13 +635,13 @@ Type *SPIRVToLLVM::transTypeWithOpcode(SPIRVType *const spvType, u Type *columnType = nullptr; unsigned columnCount = 0; - if (!isParentPointer || isColumnMajor) { + if (layout == LayoutMode::None || isColumnMajor) { // If the matrix is not explicitly laid out or is column major, just translate the column type. - columnType = transType(spvColumnType, matrixStride, isColumnMajor, isParentPointer, layout); + columnType = transType(spvColumnType, matrixStride, isColumnMajor, layout); columnCount = spvColumnCount; } else { // We need to transpose the matrix type to represent its layout in memory. - Type *const elementType = transType(spvElementType, matrixStride, isColumnMajor, isParentPointer, layout); + Type *const elementType = transType(spvElementType, matrixStride, isColumnMajor, layout); // NOTE: The new column after transposition is actually the original SPIR-V row vector and the column count is the // original SPIR-V row vector count. @@ -626,7 +690,9 @@ Type *SPIRVToLLVM::transTypeWithOpcode(SPIRVType *const spvType, u } Type *const matrixType = ArrayType::get(columnType, columnCount); - return usePadding ? recordTypeWithPad(matrixType, !isColumnMajor) : matrixType; + if (usePadding) + recordTypeWithPad(matrixType, !isColumnMajor); + return matrixType; } // ===================================================================================================================== @@ -636,15 +702,11 @@ Type *SPIRVToLLVM::transTypeWithOpcode(SPIRVType *const spvType, u // @param spvType : The type. // @param matrixStride : The matrix stride (can be 0). // @param isColumnMajor : Whether the matrix is column major. -// @param isParentPointer : If the parent is a pointer type. // @param layout : The layout mode will be used for the type translation. template <> Type *SPIRVToLLVM::transTypeWithOpcode(SPIRVType *const spvType, const unsigned matrixStride, - const bool isColumnMajor, const bool isParentPointer, - LayoutMode layout) { + const bool isColumnMajor, LayoutMode layout) { SPIRVStorageClassKind storageClass = spvType->getPointerStorageClass(); - LayoutMode pointeeLayout = - isStorageClassExplicitlyLaidOut(m_bm, storageClass) ? LayoutMode::Explicit : LayoutMode::Native; auto addrSpace = SPIRSPIRVAddrSpaceMap::rmap(storageClass); // Handle image etc types first, if in UniformConstant memory. @@ -657,122 +719,18 @@ Type *SPIRVToLLVM::transTypeWithOpcode(SPIRVType *const spvType, } if (spvElementType->getOpCode() == OpTypeImage || spvElementType->getOpCode() == OpTypeSampler || - spvElementType->getOpCode() == OpTypeSampledImage) { - // Pointer to image/sampler/sampledimage type. - Type *imagePtrTy = nullptr; - SPIRVTypeImage *spvImageTy = nullptr; - - if (spvElementType->getOpCode() != OpTypeSampler) { - // Image or sampledimage: get the image pointer type. - if (spvElementType->getOpCode() == OpTypeSampledImage) - spvImageTy = static_cast(spvElementType)->getImageType(); - else - spvImageTy = static_cast(spvElementType); - if (spvImageTy->getDescriptor().Dim == DimBuffer) { - // Texel buffer. - imagePtrTy = getBuilder()->getDescPtrTy(ResourceNodeType::DescriptorTexelBuffer); - } else { - // Image descriptor. - imagePtrTy = getBuilder()->getDescPtrTy(ResourceNodeType::DescriptorResource); - } - // Pointer to image is represented as a struct containing {pointer, stride, planeStride, isResource}. - imagePtrTy = StructType::get(*m_context, {imagePtrTy, getBuilder()->getInt32Ty(), getBuilder()->getInt32Ty(), - getBuilder()->getInt32Ty()}); - - if (spvImageTy->getDescriptor().MS) { - // Pointer to multisampled image is represented as two image pointers, the second one for the fmask. - imagePtrTy = StructType::get(*m_context, {imagePtrTy, imagePtrTy}); - } - } - - // For an image (not sampler or sampledimage), just return the pointer-to-image type. - if (spvElementType->getOpCode() == OpTypeImage) - return imagePtrTy; - - // Sampler or sampledimage: get the sampler pointer type. - Type *samplerPtrTy = getBuilder()->getDescPtrTy(ResourceNodeType::DescriptorSampler); - // Pointer to sampler is represented as a struct containing {pointer,stride,convertingSamplerIdx} - samplerPtrTy = - StructType::get(*m_context, {samplerPtrTy, getBuilder()->getInt32Ty(), getBuilder()->getInt32Ty()}); - - // For a sampler, just return that. For a sampledimage, return a struct type containing both pointers. - if (!imagePtrTy) - return samplerPtrTy; - return StructType::get(*m_context, {imagePtrTy, samplerPtrTy}); - } else { - // Uniform constant variable outside of a block use std430 layout. - pointeeLayout = isAccelerationStructureType(spvElementType) ? LayoutMode::Explicit : LayoutMode::Std430; - // From now on (GPURT major version >= 34), AS header may start at a non-zero offset, GPURT now request base - // offset of the resource, and it will calculate the actual GPUVA, instead of compiler providing one loaded from - // offset 0. Here we use SPIRAS_Constant because later in llpcSpirvLowerGlobal the AS will be lowered to - // get.desc.ptr which returns SPIRAS_Constant ptr. - addrSpace = isAccelerationStructureType(spvElementType) ? SPIRAS_Constant : addrSpace; - } - } - - Type *const pointeeType = - transType(spvType->getPointerElementType(), matrixStride, isColumnMajor, true, pointeeLayout); - - return PointerType::get(pointeeType, addrSpace); -} - -// ===================================================================================================================== -// Translate an "OpTypeRuntimeArray". This contains special handling for arrays in interface storage classes which are -// explicitly laid out and may contain manually placed padding bytes. If the array needs padding, we map an array like -// '[length]' -> 'struct { , }[length]'. -// -// @param spvType : The type. -// @param matrixStride : The matrix stride (can be 0). -// @param isColumnMajor : Whether the matrix is column major. -// @param isParentPointer : If the parent is a pointer type. -// @param layout : The layout mode will be used for the type translation. -template <> -Type *SPIRVToLLVM::transTypeWithOpcode(SPIRVType *const spvType, const unsigned matrixStride, - const bool isColumnMajor, const bool isParentPointer, - LayoutMode layout) { - Type *elementType = transType(spvType->getArrayElementType(), matrixStride, isColumnMajor, isParentPointer, layout); - - SPIRVWord arrayStride = 0; - const bool hasArrayStride = spvType->hasDecorate(DecorationArrayStride, 0, &arrayStride); - assert(hasArrayStride ^ (arrayStride == 0)); - - const uint64_t storeSize = getTypeStoreSize(elementType); - - // NOTE: Padding isn't allowed for a case that the array element is a structure with array-type member in HLSL. - bool paddedArray = arrayStride > storeSize; - - if (layout == LayoutMode::Explicit && hasArrayStride && paddedArray) { - const unsigned padding = static_cast(arrayStride - storeSize); - - // Record that the array was remapped, even though we don't record a useful mapping for arrays. - recordRemappedTypeElements(spvType, 0, 0); - - elementType = StructType::create({elementType, getPadType(padding)}, "llpc.runtime.array.element", true); - } - - Type *const runtimeArrayType = ArrayType::get(elementType, SPIRVWORD_MAX); - - // Setup the replaced array type in case this array is used in default uniform struct: - // If the member type could be found in replaced-type map, insert the replaced-type, - // If the member type is image type, insert an int8 type. This is used for image array of array - SPIRVType *spvElementType = spvType->getArrayElementType(); - SPIRVTypeContext ctxElementType(spvElementType, matrixStride, isColumnMajor, isParentPointer, layout); - Type *imageElementType = nullptr; - auto it = m_imageTypeMap.find(ctxElementType.asTuple()); - if (it != m_imageTypeMap.end()) { - imageElementType = static_cast(it->second); - } else if (spvElementType->getOpCode() == OpTypeImage || spvElementType->getOpCode() == OpTypeSampler || - spvElementType->getOpCode() == OpTypeSampledImage) { - imageElementType = Type::getInt8Ty(*m_context); - } + spvElementType->getOpCode() == OpTypeSampledImage) + return getImageTy(getImageTypeComponents(spvElementType)); - if (imageElementType) { - Type *const imageArrayType = ArrayType::get(imageElementType, SPIRVWORD_MAX); - SPIRVTypeContext ctxArray(spvType, matrixStride, isColumnMajor, isParentPointer, layout); - m_imageTypeMap[ctxArray.asTuple()] = imageArrayType; + // From now on (GPURT major version >= 34), AS header may start at a non-zero offset, GPURT now request base + // offset of the resource, and it will calculate the actual GPUVA, instead of compiler providing one loaded from + // offset 0. Here we use SPIRAS_Constant because later in llpcSpirvLowerGlobal the AS will be lowered to + // get.desc.ptr which returns SPIRAS_Constant ptr. + if (isAccelerationStructureType(spvElementType)) + addrSpace = SPIRAS_Constant; } - return paddedArray ? recordTypeWithPad(runtimeArrayType) : runtimeArrayType; + return PointerType::get(*m_context, addrSpace); } // ===================================================================================================================== @@ -783,12 +741,10 @@ Type *SPIRVToLLVM::transTypeWithOpcode(SPIRVType *const spvT // @param spvType : The type. // @param matrixStride : The matrix stride (can be 0). // @param isColumnMajor : Whether the matrix is column major. -// @param isParentPointer : If the parent is a pointer type. // @param layout : The layout mode will be used for the type translation. template <> Type *SPIRVToLLVM::transTypeWithOpcode(SPIRVType *const spvType, const unsigned matrixStride, - const bool isColumnMajor, const bool isParentPointer, - LayoutMode layout) { + const bool isColumnMajor, LayoutMode layout) { SPIRVTypeStruct *const spvStructType = static_cast(spvType); bool isPacked = false; @@ -896,8 +852,8 @@ Type *SPIRVToLLVM::transTypeWithOpcode(SPIRVType *const spvTy if (isExplicitlyLaidOut && memberMatrixStride > 0) assert(memberIsColumnMajor ^ spvStructType->hasMemberDecorate(index, DecorationRowMajor)); - Type *memberType = transType(spvMemberType, memberMatrixStride, memberIsColumnMajor, isParentPointer, layout); - SPIRVTypeContext ctxMemberType(spvMemberType, matrixStride, isColumnMajor, isParentPointer, layout); + Type *memberType = transType(spvMemberType, memberMatrixStride, memberIsColumnMajor, layout); + SPIRVTypeContext ctxMemberType(spvMemberType, matrixStride, isColumnMajor, layout); // Setup the replaced struct type in case this struct is used as default uniform: // 1. If the member type is sampler: @@ -951,11 +907,13 @@ Type *SPIRVToLLVM::transTypeWithOpcode(SPIRVType *const spvTy } if (hasSamplerOrNested) { - SPIRVTypeContext ctx(spvType, matrixStride, isColumnMajor, isParentPointer, layout); + SPIRVTypeContext ctx(spvType, matrixStride, isColumnMajor, layout); m_imageTypeMap[ctx.asTuple()] = imageStructType; } - return usePadding ? recordTypeWithPad(structType) : structType; + if (usePadding) + recordTypeWithPad(structType); + return structType; } // ===================================================================================================================== @@ -966,14 +924,11 @@ Type *SPIRVToLLVM::transTypeWithOpcode(SPIRVType *const spvTy // @param spvType : The type. // @param matrixStride : The matrix stride (can be 0). // @param isColumnMajor : Whether the matrix is column major. -// @param isParentPointer : If the parent is a pointer type. // @param layout : The layout mode will be used for the type translation. template <> Type *SPIRVToLLVM::transTypeWithOpcode(SPIRVType *const spvType, const unsigned matrixStride, - const bool isColumnMajor, const bool isParentPointer, - LayoutMode layout) { - Type *const compType = - transType(spvType->getVectorComponentType(), matrixStride, isColumnMajor, isParentPointer, layout); + const bool isColumnMajor, LayoutMode layout) { + Type *const compType = transType(spvType->getVectorComponentType(), matrixStride, isColumnMajor, layout); // If the vector needs explicit/std430 layout, we need to use an array to represent it because of LLVM's data layout // rules. @@ -987,13 +942,11 @@ Type *SPIRVToLLVM::transTypeWithOpcode(SPIRVType *const spvType, c // @param spvType : The type. // @param matrixStride : The matrix stride (can be 0). // @param isColumnMajor : Whether the matrix is column major. -// @param isParentPointer : If the parent is a pointer type. // @param layout : The layout mode will be used for the type translation. template <> Type *SPIRVToLLVM::transTypeWithOpcode(SPIRVType *const spvType, const unsigned matrixStride, - const bool isColumnMajor, const bool isParentPointer, - LayoutMode layout) { + const bool isColumnMajor, LayoutMode layout) { auto elemType = mapToBasicType(spvType->getCooperativeMatrixKHRComponentType()); auto use = spvType->getCooperativeMatrixKHRUse(); unsigned rows = spvType->getCooperativeMatrixKHRRows(); @@ -1008,6 +961,7 @@ Type *SPIRVToLLVM::transTypeWithOpcode(SPIRVType *co // @param v : SPIRV Value // @param layout : The layout mode will be used for the type translation. Type *SPIRVToLLVM::getPointeeType(SPIRVValue *v, LayoutMode layout) { + assert(layout != LayoutMode::None); auto opCode = v->getOpCode(); if (isAccessChainOpCode(opCode)) { // If the Base of the AccessChain is a structure then additional padding may be added (depending on the structure @@ -1035,128 +989,73 @@ Type *SPIRVToLLVM::getPointeeType(SPIRVValue *v, LayoutMode layout) { if (isStorageClassExplicitlyLaidOut(m_bm, v->getType()->getPointerStorageClass())) layout = LayoutMode::Explicit; - return transType(v->getType()->getPointerElementType(), 0, true, true, layout); + return transType(v->getType()->getPointerElementType(), 0, true, layout); } -Type *SPIRVToLLVM::transType(SPIRVType *t, unsigned matrixStride, bool columnMajor, bool parentIsPointer, - LayoutMode layout) { - SPIRVTypeContext ctx(t, matrixStride, columnMajor, parentIsPointer, layout); +Type *SPIRVToLLVM::transType(SPIRVType *t, unsigned matrixStride, bool columnMajor, LayoutMode layout) { + SPIRVTypeContext ctx(t, matrixStride, columnMajor, layout); auto it = m_fullTypeMap.find(ctx.asTuple()); if (it != m_fullTypeMap.end()) return it->second; - auto res = transTypeImpl(t, matrixStride, columnMajor, parentIsPointer, layout); + auto res = transTypeImpl(t, matrixStride, columnMajor, layout); m_fullTypeMap[ctx.asTuple()] = res; return res; } -Type *SPIRVToLLVM::transTypeImpl(SPIRVType *t, unsigned matrixStride, bool columnMajor, bool parentIsPointer, - LayoutMode layout) { - // If the type is not a sub-part of a pointer or it is a forward pointer, we can look in the map. - if (!parentIsPointer || t->isTypeForwardPointer()) { - auto loc = m_typeMap.find(t); - if (loc != m_typeMap.end()) - return loc->second; - } - +Type *SPIRVToLLVM::transTypeImpl(SPIRVType *t, unsigned matrixStride, bool columnMajor, LayoutMode layout) { t->validate(); switch (t->getOpCode()) { case OpTypeVoid: - return mapType(t, Type::getVoidTy(*m_context)); + return Type::getVoidTy(*m_context); case OpTypeInt: - return mapType(t, Type::getIntNTy(*m_context, t->getIntegerBitWidth())); + return Type::getIntNTy(*m_context, t->getIntegerBitWidth()); case OpTypeFloat: - return mapType(t, transFPType(t)); + return transFPType(t); case OpTypeFunction: { auto ft = static_cast(t); auto rt = transType(ft->getReturnType()); std::vector pt; for (size_t i = 0, e = ft->getNumParameters(); i != e; ++i) pt.push_back(transType(ft->getParameterType(i))); - return mapType(t, FunctionType::get(rt, pt, false)); - } - case OpTypeImage: { - if (layout != LayoutMode::Native) - return getBuilder()->getInt8Ty(); - - auto st = static_cast(t); - // A buffer image is represented by a texel buffer descriptor. Any other image is represented by an array - // of three image descriptors, to allow for multi-plane YCbCr conversion. (The f-mask part of a multi-sampled - // image is not an array of three.) - Type *imageTy = nullptr; - if (st->getDescriptor().Dim == DimBuffer) { - imageTy = PointerType::get(*m_context, SPIRAS_Constant); - } else { - Type *singleImageTy = PointerType::get(*m_context, SPIRAS_Constant); - imageTy = ArrayType::get(singleImageTy, 3); - if (st->getDescriptor().MS) { - // A multisampled image is represented by a struct containing both the - // image descriptor and the fmask descriptor. - imageTy = StructType::get(*m_context, {imageTy, singleImageTy}); - } - } - return mapType(t, imageTy); + return FunctionType::get(rt, pt, false); } + case OpTypeImage: case OpTypeSampler: - case OpTypeSampledImage: { - if (layout != LayoutMode::Native) + case OpTypeSampledImage: + if (layout != LayoutMode::Native && layout != LayoutMode::None) return getBuilder()->getInt8Ty(); - // Get sampler type. - // A sampler is represented by a struct containing the sampler itself, and the convertingSamplerIdx, an i32 - // that is either 0 or the 1-based index into the converting samplers. - Type *ty = PointerType::get(*m_context, SPIRAS_Constant); - ty = StructType::get(*m_context, {ty, getBuilder()->getInt32Ty()}); - if (t->getOpCode() == OpTypeSampledImage) { - // A sampledimage is represented by a struct containing the image descriptor - // and the sampler descriptor. - Type *imageTy = transType(static_cast(t)->getImageType()); - ty = StructType::get(*m_context, {imageTy, ty}); - } - return mapType(t, ty); - } + return getImageTy(getImageTypeComponents(t)); case OpTypeAccelerationStructureKHR: { auto int32x2Ty = FixedVectorType::get(Type::getInt32Ty(*m_context), 2); - return mapType(t, int32x2Ty); + return int32x2Ty; } case OpTypeRayQueryKHR: - return mapType(t, getRayQueryInternalTy(m_builder)); - case OpTypeArray: { - Type *newTy = transTypeWithOpcode(t, matrixStride, columnMajor, parentIsPointer, layout); - return parentIsPointer ? newTy : mapType(t, newTy); - } + return rtq::getRayQueryType(*m_context); + case OpTypeArray: + case OpTypeRuntimeArray: + return transTypeArray(t, matrixStride, columnMajor, layout); case OpTypeBool: { - Type *newTy = transTypeWithOpcode(t, matrixStride, columnMajor, parentIsPointer, layout); - return parentIsPointer ? newTy : mapType(t, newTy); + return transTypeWithOpcode(t, matrixStride, columnMajor, layout); } case OpTypeForwardPointer: { - Type *newTy = transTypeWithOpcode(t, matrixStride, columnMajor, parentIsPointer, layout); - return parentIsPointer ? newTy : mapType(t, newTy); + return transTypeWithOpcode(t, matrixStride, columnMajor, layout); } case OpTypeMatrix: { - Type *newTy = transTypeWithOpcode(t, matrixStride, columnMajor, parentIsPointer, layout); - return parentIsPointer ? newTy : mapType(t, newTy); + return transTypeWithOpcode(t, matrixStride, columnMajor, layout); } case OpTypePointer: { - Type *newTy = transTypeWithOpcode(t, matrixStride, columnMajor, parentIsPointer, layout); - return parentIsPointer ? newTy : mapType(t, newTy); - } - case OpTypeRuntimeArray: { - Type *newTy = transTypeWithOpcode(t, matrixStride, columnMajor, parentIsPointer, layout); - return parentIsPointer ? newTy : mapType(t, newTy); + return transTypeWithOpcode(t, matrixStride, columnMajor, layout); } case OpTypeStruct: { - Type *newTy = transTypeWithOpcode(t, matrixStride, columnMajor, parentIsPointer, layout); - return parentIsPointer ? newTy : mapType(t, newTy); + return transTypeWithOpcode(t, matrixStride, columnMajor, layout); } case OpTypeVector: { - Type *newTy = transTypeWithOpcode(t, matrixStride, columnMajor, parentIsPointer, layout); - return parentIsPointer ? newTy : mapType(t, newTy); + return transTypeWithOpcode(t, matrixStride, columnMajor, layout); } case OpTypeCooperativeMatrixKHR: { - Type *newTy = - transTypeWithOpcode(t, matrixStride, columnMajor, parentIsPointer, layout); - return parentIsPointer ? newTy : mapType(t, newTy); + return transTypeWithOpcode(t, matrixStride, columnMajor, layout); } default: { llvm_unreachable("Not implemented"); @@ -2115,15 +2014,11 @@ Value *SPIRVToLLVM::addLoadInstRecursively(SPIRVType *const spvType, Value *load Type *alignmentType = loadType; // Vectors are represented as arrays in memory, so we need to cast the array to a vector before loading. if (spvType->isTypeVector()) { - Type *const vectorType = transType(spvType, 0, false, true, LayoutMode::Native); - Type *const castType = vectorType->getPointerTo(loadPointer->getType()->getPointerAddressSpace()); - loadPointer = getBuilder()->CreateBitCast(loadPointer, castType); - loadType = vectorType; + loadType = transType(spvType, 0, false, LayoutMode::Native); const bool scalarBlockLayout = getPipelineOptions()->scalarBlockLayout; - if (!scalarBlockLayout) - alignmentType = vectorType; + alignmentType = loadType; } LoadInst *load = getBuilder()->CreateAlignedLoad(loadType, loadPointer, @@ -2240,14 +2135,11 @@ void SPIRVToLLVM::addStoreInstRecursively(SPIRVType *const spvType, Value *store storeValue = getBuilder()->CreateZExtOrBitCast(storeValue, alignmentType); storeType = storeValue->getType(); } else { - storeType = transType(spvType, 0, false, true, LayoutMode::Native); + storeType = transType(spvType, 0, false, LayoutMode::Native); } // Vectors are represented as arrays in memory, so we need to cast the array to a vector before storing. if (spvType->isTypeVector()) { - Type *const castType = storeType->getPointerTo(storePointer->getType()->getPointerAddressSpace()); - storePointer = getBuilder()->CreateBitCast(storePointer, castType); - const bool scalarBlockLayout = getPipelineOptions()->scalarBlockLayout; if (!scalarBlockLayout) alignmentType = storeType; @@ -2834,7 +2726,7 @@ template <> Value *SPIRVToLLVM::transValueWithOpcode(SPIRVValue *c LayoutMode loadLayout = isStorageClassExplicitlyLaidOut(m_bm, spvLoadType->getPointerStorageClass()) ? LayoutMode::Explicit : LayoutMode::Native; - Type *const loadType = transType(spvCopyMemLoadType, 0, true, true, loadLayout); + Type *const loadType = transType(spvCopyMemLoadType, 0, true, loadLayout); bool isNonTemporal = spvCopyMemory->SPIRVMemoryAccess::isNonTemporal(true); Value *const load = addLoadInstRecursively(spvCopyMemLoadType, loadPointer, loadType, isSrcVolatile, isCoherent, isNonTemporal); @@ -2846,7 +2738,7 @@ template <> Value *SPIRVToLLVM::transValueWithOpcode(SPIRVValue *c ? LayoutMode::Explicit : LayoutMode::Native; - Type *const storeType = transType(spvCopyMemStoreType, 0, true, true, storeLayout); + Type *const storeType = transType(spvCopyMemStoreType, 0, true, storeLayout); isNonTemporal = spvCopyMemory->SPIRVMemoryAccess::isNonTemporal(false); addStoreInstRecursively(spvCopyMemStoreType, storePointer, storeType, load, isDestVolatile, isCoherent, @@ -2872,6 +2764,9 @@ template <> Value *SPIRVToLLVM::transValueWithOpcode(SPIRVValue *const s SPIRVLoad *const spvLoad = static_cast(spvValue); LayoutMode layout = LayoutMode::Native; + Value *const loadPointer = + transValue(spvLoad->getSrc(), getBuilder()->GetInsertBlock()->getParent(), getBuilder()->GetInsertBlock()); + const auto storageClassKind = spvLoad->getSrc()->getType()->getPointerStorageClass(); // Handle UniformConstant image/sampler/sampledimage load. if (storageClassKind == StorageClassUniformConstant) { @@ -2879,7 +2774,7 @@ template <> Value *SPIRVToLLVM::transValueWithOpcode(SPIRVValue *const s case OpTypeImage: case OpTypeSampler: case OpTypeSampledImage: - return transLoadImage(spvLoad->getSrc()); + return loadPointer; case OpTypeAccelerationStructureKHR: { if (getPipelineContext()->getRayTracingState()->forceInvalidAccelStruct) { // Always return invalid AS address (0x0, 0x0) if the option is set. @@ -2896,9 +2791,6 @@ template <> Value *SPIRVToLLVM::transValueWithOpcode(SPIRVValue *const s } } - Value *const loadPointer = - transValue(spvLoad->getSrc(), getBuilder()->GetInsertBlock()->getParent(), getBuilder()->GetInsertBlock()); - bool isVolatile = spvLoad->SPIRVMemoryAccess::isVolatile(true); const Vkgc::ExtendedRobustness &extendedRobustness = getPipelineOptions()->extendedRobustness; if (extendedRobustness.nullDescriptor || extendedRobustness.robustBufferAccess) @@ -2984,11 +2876,8 @@ template <> Value *SPIRVToLLVM::transValueWithOpcode(SPIRVValue *const s // // @param spvImageLoadPtr : The image/sampler/sampledimage pointer Value *SPIRVToLLVM::transLoadImage(SPIRVValue *spvImageLoadPtr) { - SPIRVType *spvElementTy = spvImageLoadPtr->getType()->getPointerElementType(); - Type *elementTy = transType(spvElementTy, 0, false, false, LayoutMode::Native); BasicBlock *bb = getBuilder()->GetInsertBlock(); - Value *base = transValueMulti(spvImageLoadPtr, bb->getParent(), bb)[0]; - return loadImageSampler(elementTy, base); + return transValueMulti(spvImageLoadPtr, bb->getParent(), bb)[0]; } // ===================================================================================================================== @@ -2998,142 +2887,40 @@ Value *SPIRVToLLVM::transLoadImage(SPIRVValue *spvImageLoadPtr) { // @param imgDescGpuAddress : image descriptor's gpu memory address // @param bindlessTexture : true is bindless texture, false is bindless image Value *SPIRVToLLVM::transLoadBindlessImage(SPIRVType *spvElementTy, Value *imgDescGpuAddress, bool bindlessTexture) { + unsigned components = getImageTypeComponents(spvElementTy); + auto idxs = getImageTypeIndices(components); + Type *descPtrTy = getBuilder()->getDescPtrTy(); + Type *elementTy = getImageTy(components); + auto imageDescAddr = getBuilder()->CreateIntToPtr(imgDescGpuAddress, descPtrTy); - Type *elementTy = transType(spvElementTy, 0, false, false, LayoutMode::Native); - Type *gpuAddrAsPtrTy = getBuilder()->getPtrTy(SPIRAS_Constant); - auto imageDescAddr = getBuilder()->CreateIntToPtr(imgDescGpuAddress, gpuAddrAsPtrTy); + assert(bindlessTexture == ((components & ImageComponentSampler) != 0)); - SPIRVTypeImage *spvImageTy = nullptr; - if (spvElementTy->getOpCode() == OpTypeSampledImage) { - spvImageTy = static_cast(spvElementTy)->getImageType(); - } else { - spvImageTy = static_cast(spvElementTy); - } + // Fill in the pointer components. Strides are generally left as poison because GEP cannot be used on the result of + // "loading" a bindless image/texture. + Value *result = PoisonValue::get(elementTy); - auto desc = spvImageTy->getDescriptor(); - Value *imageDescPtr = nullptr; + if (components & ImageComponentImage) { + result = getBuilder()->CreateInsertValue(result, imageDescAddr, idxs.imagePointer); - // Handle samplerBuffer or imageBuffer - if (desc.Dim == DimBuffer) { - auto bufferDescStride = getBuilder()->getInt32(DescriptorSizeBuffer); - imageDescPtr = getBuilder()->CreateInsertValue( - PoisonValue::get(StructType::get(*m_context, {imageDescAddr->getType(), bufferDescStride->getType(), - bufferDescStride->getType(), getBuilder()->getInt32Ty()})), - imageDescAddr, 0); - imageDescPtr = getBuilder()->CreateInsertValue(imageDescPtr, bufferDescStride, 1); - } else { - // The descriptor stride is unimportant for bindless texture/image, just use it as a placeholder - auto imageDescStride = getBuilder()->getInt32(DescriptorSizeResource); - imageDescPtr = getBuilder()->CreateInsertValue( - PoisonValue::get(StructType::get(*m_context, {imageDescAddr->getType(), imageDescStride->getType(), - imageDescStride->getType(), getBuilder()->getInt32Ty()})), - imageDescAddr, 0); - - imageDescPtr = getBuilder()->CreateInsertValue(imageDescPtr, imageDescStride, 1); - imageDescPtr = getBuilder()->CreateInsertValue(imageDescPtr, getBuilder()->getInt32(DescriptorSizeResource), 2); - imageDescPtr = getBuilder()->CreateInsertValue(imageDescPtr, getBuilder()->getInt32(1), 3); - } - - // Insert fmask descriptor address into structure - if (desc.MS) { - auto fMaskOffset = getBuilder()->getInt64(DescriptorSizeResource + DescriptorSizeSampler); - constexpr unsigned descriptorSizeFmask = 8 * sizeof(uint32_t); - auto fmaskDescStride = getBuilder()->getInt32(descriptorSizeFmask); - Value *fMaskDescAddr = - getBuilder()->CreateIntToPtr(getBuilder()->CreateAdd(imgDescGpuAddress, fMaskOffset), gpuAddrAsPtrTy); - - auto fmaskDescPtr = getBuilder()->CreateInsertValue( - PoisonValue::get(StructType::get(*m_context, {fMaskDescAddr->getType(), fmaskDescStride->getType(), - fmaskDescStride->getType(), getBuilder()->getInt32Ty()})), - fMaskDescAddr, 0); - fmaskDescPtr = getBuilder()->CreateInsertValue(fmaskDescPtr, fmaskDescStride, 1); - imageDescPtr = getBuilder()->CreateInsertValue( - PoisonValue::get(StructType::get(*m_context, {imageDescPtr->getType(), fmaskDescPtr->getType()})), imageDescPtr, - 0); - imageDescPtr = getBuilder()->CreateInsertValue(imageDescPtr, fmaskDescPtr, 1); - } - - // True for bindless texture, otherwise is bindless image - if (bindlessTexture) { - auto samplerOffset = getBuilder()->getInt64(DescriptorSizeResource); - auto samplerDescStride = getBuilder()->getInt32(DescriptorSizeSampler); - - Value *samplerDescAddr = - getBuilder()->CreateIntToPtr(getBuilder()->CreateAdd(imgDescGpuAddress, samplerOffset), gpuAddrAsPtrTy); - - Type *samplerPtrTy = StructType::get( - *m_context, {samplerDescAddr->getType(), getBuilder()->getInt32Ty(), getBuilder()->getInt32Ty()}); - Value *samplerDescPtr = Constant::getNullValue(samplerPtrTy); - - samplerDescPtr = getBuilder()->CreateInsertValue(samplerDescPtr, samplerDescAddr, 0); - samplerDescPtr = getBuilder()->CreateInsertValue(samplerDescPtr, samplerDescStride, 1); - - Value *descPtr = - PoisonValue::get(StructType::get(*m_context, {imageDescPtr->getType(), samplerDescPtr->getType()})); - descPtr = getBuilder()->CreateInsertValue(descPtr, imageDescPtr, 0); - descPtr = getBuilder()->CreateInsertValue(descPtr, samplerDescPtr, 1); - - return loadImageSampler(elementTy, descPtr); - } - - return loadImageSampler(elementTy, imageDescPtr); -} - -// ===================================================================================================================== -// Generate a load of an image, sampler or sampledimage -// -// @param elementTy : Element type being loaded -// @param base : Pointer to load from -Value *SPIRVToLLVM::loadImageSampler(Type *elementTy, Value *base) { - if (auto structTy = dyn_cast(elementTy)) { - if (!structTy->getElementType(1)->isIntegerTy()) { - // The item being loaded is a struct of two items that need loading separately (excluding the case below that - // is it a struct with an i32, which is a sampler with its convertingSamplerIdx). There are two cases - // of that: - // 1. A sampledimage is an image plus a sampler. - // 2. An image that is multisampled is an image plus an fmask. - Value *ptr1 = getBuilder()->CreateExtractValue(base, 1); - Value *element1 = loadImageSampler(structTy->getElementType(1), ptr1); - Value *ptr0 = getBuilder()->CreateExtractValue(base, 0); - Value *element0 = loadImageSampler(structTy->getElementType(0), ptr0); - Value *result = getBuilder()->CreateInsertValue(PoisonValue::get(structTy), element0, 0); - result = getBuilder()->CreateInsertValue(result, element1, 1); - return result; - } + // TODO: Planes may be loaded opportunistically, so need a valid plane stride to avoid UB. + result = getBuilder()->CreateInsertValue(result, getBuilder()->getInt32(0), idxs.imagePlaneStride); - // The item being loaded is a struct where element 1 is integer. That must be a sampler with its i32 - // convertingSamplerIdx. The loaded value inherits the convertingSamplerIdx from the - // {pointer,stride,convertingSamplerIdx} struct that represents the descriptor pointer. - Value *convertingSamplerIdx = getBuilder()->CreateExtractValue(base, 2); - Value *loadedVal = loadImageSampler(structTy->getElementType(0), base); - loadedVal = getBuilder()->CreateInsertValue(PoisonValue::get(structTy), loadedVal, 0); - return getBuilder()->CreateInsertValue(loadedVal, convertingSamplerIdx, 1); - } - - // The image or sampler "descriptor" is in fact a struct containing the pointer and stride. We only - // need the pointer here. - Value *ptr = getBuilder()->CreateExtractValue(base, 0); - - if (auto arrayTy = dyn_cast(elementTy)) { - // The element type being loaded is an array. That must be where a non-texel-buffer image is represented as - // an array of three image descriptors, to allow for multiple planes in YCbCr conversion. Normally we only - // load one descriptor; if there are any converting samplers, we load all three, and rely on later optimizations - // to remove the unused ones (and thus stop us reading off the end of the descriptor table). - Value *result = getBuilder()->CreateInsertValue(PoisonValue::get(arrayTy), ptr, 0); - // Pointer to image is represented as a struct containing {pointer, stride, planeStride, isResource}. - if (!m_convertingSamplers.empty() && base->getType()->getStructNumElements() >= 4) { - Value *planeStride = getBuilder()->CreateExtractValue(base, 2); - Type *ptrTy = ptr->getType(); - - for (unsigned planeIdx = 1; planeIdx != arrayTy->getNumElements(); ++planeIdx) { - ptr = getBuilder()->CreateGEP(getBuilder()->getInt8Ty(), ptr, planeStride); - ptr = getBuilder()->CreateBitCast(ptr, ptrTy); - result = getBuilder()->CreateInsertValue(result, ptr, planeIdx); - } + if (components & ImageComponentFMask) { + auto fmaskOffset = DescriptorSizeResource + DescriptorSizeSampler; + Value *fmaskDescAddr = getBuilder()->CreateConstGEP1_32(getBuilder()->getInt8Ty(), imageDescAddr, fmaskOffset); + result = getBuilder()->CreateInsertValue(result, fmaskDescAddr, idxs.fmaskPointer); } - return result; } - return ptr; + + if (components & ImageComponentSampler) { + auto samplerOffset = DescriptorSizeResource; + Value *samplerDescAddr = getBuilder()->CreateConstGEP1_32(getBuilder()->getInt8Ty(), imageDescAddr, samplerOffset); + + result = getBuilder()->CreateInsertValue(result, samplerDescAddr, idxs.samplerPointer); + result = getBuilder()->CreateInsertValue(result, getBuilder()->getInt32(0), idxs.convertingSamplerIdx); + } + + return result; } // ===================================================================================================================== @@ -3180,12 +2967,48 @@ Value *SPIRVToLLVM::transImagePointer(SPIRVValue *spvImagePtr, SPIRVType *baseTy elementWorklist.push_back(spvTy->getStructMemberType(i)); } - Value *imageDescPtr = nullptr; - Value *samplerDescPtr = nullptr; + unsigned components = getImageTypeComponents(spvTy); + auto idxs = getImageTypeIndices(components); + Value *result = PoisonValue::get(getImageTy(components)); + + unsigned imageDescSet = descriptorSet; + unsigned fmaskDescSet = descriptorSet; + unsigned samplerDescSet = descriptorSet; - if (getPipelineOptions()->getGlState().replaceSetWithResourceType) + if (getPipelineOptions()->getGlState().replaceSetWithResourceType) { assert(spvTy->getOpCode() != OpTypeSampler); + if (spvTy->getOpCode() == OpTypeImage) { + imageDescSet = PipelineContext::getGlResourceNodeSetFromType(Vkgc::ResourceMappingNodeType::DescriptorImage); + } else if (spvTy->getOpCode() == OpTypeSampledImage) { + if (getPipelineOptions()->getGlState().enableCombinedTexture) { + imageDescSet = + PipelineContext::getGlResourceNodeSetFromType(Vkgc::ResourceMappingNodeType::DescriptorCombinedTexture); + } else { + imageDescSet = PipelineContext::getGlResourceNodeSetFromType(Vkgc::ResourceMappingNodeType::DescriptorResource); + } + } + + fmaskDescSet = imageDescSet; + samplerDescSet = imageDescSet; + + if (spvTy->getOpCode() != OpTypeImage) + fmaskDescSet = PipelineContext::getGlResourceNodeSetFromType(Vkgc::ResourceMappingNodeType::DescriptorFmask); + + if (!getPipelineOptions()->getGlState().enableCombinedTexture) + samplerDescSet = PipelineContext::getGlResourceNodeSetFromType(Vkgc::ResourceMappingNodeType::DescriptorSampler); + } + + unsigned convertingSamplerIdx = 0; + unsigned nextIdx = 1; + for (const ConvertingSampler &convertingSampler : m_convertingSamplers) { + if (convertingSampler.set == descriptorSet && convertingSampler.binding == binding) { + convertingSamplerIdx = nextIdx; + break; + } + nextIdx += convertingSampler.values.size() / ConvertingSamplerDwordCount; + } + if (spvTy->getOpCode() != OpTypeSampler) { // Image or sampledimage -- need to get the image pointer-and-stride. SPIRVType *spvImageTy = spvTy; @@ -3197,143 +3020,46 @@ Value *SPIRVToLLVM::transImagePointer(SPIRVValue *spvImagePtr, SPIRVType *baseTy auto resType = desc->Dim == DimBuffer ? ResourceNodeType::DescriptorTexelBuffer : ResourceNodeType::DescriptorResource; - if (getPipelineOptions()->getGlState().replaceSetWithResourceType) { - if (spvTy->getOpCode() == OpTypeImage) { - descriptorSet = PipelineContext::getGlResourceNodeSetFromType(Vkgc::ResourceMappingNodeType::DescriptorImage); - } else if (spvTy->getOpCode() == OpTypeSampledImage) { - if (getPipelineOptions()->getGlState().enableCombinedTexture) { - descriptorSet = - PipelineContext::getGlResourceNodeSetFromType(Vkgc::ResourceMappingNodeType::DescriptorCombinedTexture); - } else { - descriptorSet = - PipelineContext::getGlResourceNodeSetFromType(Vkgc::ResourceMappingNodeType::DescriptorResource); - } - } - } + Value *imagePointer = getBuilder()->CreateGetDescPtr(resType, resType, imageDescSet, binding); + Value *imageStride = getBuilder()->CreateGetDescStride(resType, resType, imageDescSet, binding); + result = getBuilder()->CreateInsertValue(result, imagePointer, idxs.imagePointer); + result = getBuilder()->CreateInsertValue(result, imageStride, idxs.imageStride); - imageDescPtr = getDescPointerAndStride(resType, descriptorSet, binding, resType); + if (convertingSamplerIdx == 0) { + result = getBuilder()->CreateInsertValue(result, getBuilder()->getInt32(0), idxs.imagePlaneStride); + } else { + auto samplerMetadata = + m_convertingSamplers[convertingSamplerIdx - 1].values.data() + DescriptorSizeSamplerInDwords; + Value *planes = getBuilder()->getInt32( + reinterpret_cast(samplerMetadata)->word1.planes); + Value *planeStride = getBuilder()->CreateUDiv(imageStride, planes); + result = getBuilder()->CreateInsertValue(result, planeStride, idxs.imagePlaneStride); + } if (desc->MS) { - if (getPipelineOptions()->getGlState().replaceSetWithResourceType && spvTy->getOpCode() != OpTypeImage) - descriptorSet = PipelineContext::getGlResourceNodeSetFromType(Vkgc::ResourceMappingNodeType::DescriptorFmask); - // A multisampled image pointer is a struct containing an image desc pointer and an fmask desc pointer. - Value *fmaskDescPtr = getDescPointerAndStride(ResourceNodeType::DescriptorFmask, descriptorSet, binding, - ResourceNodeType::DescriptorFmask); - imageDescPtr = getBuilder()->CreateInsertValue( - PoisonValue::get(StructType::get(*m_context, {imageDescPtr->getType(), fmaskDescPtr->getType()})), - imageDescPtr, 0); - imageDescPtr = getBuilder()->CreateInsertValue(imageDescPtr, fmaskDescPtr, 1); + Value *fmaskPointer = getBuilder()->CreateGetDescPtr(ResourceNodeType::DescriptorFmask, + ResourceNodeType::DescriptorFmask, fmaskDescSet, binding); + Value *fmaskStride = getBuilder()->CreateGetDescStride(ResourceNodeType::DescriptorFmask, + ResourceNodeType::DescriptorFmask, fmaskDescSet, binding); + result = getBuilder()->CreateInsertValue(result, fmaskPointer, idxs.fmaskPointer); + result = getBuilder()->CreateInsertValue(result, fmaskStride, idxs.fmaskStride); } } if (spvTy->getOpCode() != OpTypeImage) { - if (getPipelineOptions()->getGlState().replaceSetWithResourceType && - !getPipelineOptions()->getGlState().enableCombinedTexture) - descriptorSet = PipelineContext::getGlResourceNodeSetFromType(Vkgc::ResourceMappingNodeType::DescriptorSampler); - // Sampler or sampledimage -- need to get the sampler {pointer,stride,convertingSamplerIdx} - samplerDescPtr = getDescPointerAndStride(ResourceNodeType::DescriptorSampler, descriptorSet, binding, - ResourceNodeType::DescriptorSampler); - - if (spvTy->getOpCode() == OpTypeSampler) - return samplerDescPtr; - } - - if (imageDescPtr) { - if (samplerDescPtr) { - Value *descPtr = - PoisonValue::get(StructType::get(*m_context, {imageDescPtr->getType(), samplerDescPtr->getType()})); - descPtr = getBuilder()->CreateInsertValue(descPtr, imageDescPtr, 0); - descPtr = getBuilder()->CreateInsertValue(descPtr, samplerDescPtr, 1); - return descPtr; - } - return imageDescPtr; - } - return samplerDescPtr; -} - -// ===================================================================================================================== -// Get an image/sampler descriptor pointer-and-stride struct -// -// @param resType : ResourceNodeType value -// @param descriptorSet : Descriptor set -// @param binding : Binding -// @param searchType : ResourceNodeType to find user resource node -Value *SPIRVToLLVM::getDescPointerAndStride(ResourceNodeType resType, unsigned descriptorSet, unsigned binding, - ResourceNodeType searchType) { - if (resType != ResourceNodeType::DescriptorSampler) { - // f-mask/texel buffer, where a pointer is represented by a struct {pointer,stride}. - Value *descPtr = getBuilder()->CreateGetDescPtr(resType, searchType, descriptorSet, binding); - Value *descStride = getBuilder()->CreateGetDescStride(resType, searchType, descriptorSet, binding); - descPtr = getBuilder()->CreateInsertValue( - PoisonValue::get(StructType::get(*m_context, {descPtr->getType(), descStride->getType(), descStride->getType(), - getBuilder()->getInt32Ty()})), - descPtr, 0); - descPtr = getBuilder()->CreateInsertValue(descPtr, descStride, 1); - - if (resType == ResourceNodeType::DescriptorResource) { - // Image, where a pointer is represented by a struct {pointer, stride, planeStride, isResource} - unsigned convertingSamplerIdx = 0; - unsigned nextIdx = 1; - for (const ConvertingSampler &convertingSampler : m_convertingSamplers) { - if (convertingSampler.set == descriptorSet && convertingSampler.binding == binding) { - convertingSamplerIdx = nextIdx; - break; - } - nextIdx += convertingSampler.values.size() / ConvertingSamplerDwordCount; - } - if (convertingSamplerIdx == 0) { - descPtr = getBuilder()->CreateInsertValue(descPtr, getBuilder()->getInt32(DescriptorSizeResource), 2); - } else { - // Sampler Descriptor includes {sampler, YCbCrMetaDta} - auto samplerMetadata = - m_convertingSamplers[convertingSamplerIdx - 1].values.data() + DescriptorSizeSamplerInDwords; - Value *planes = getBuilder()->getInt32( - reinterpret_cast(samplerMetadata)->word1.planes); - Value *planeStride = getBuilder()->CreateUDiv(descStride, planes); - descPtr = getBuilder()->CreateInsertValue(descPtr, planeStride, 2); - } - descPtr = getBuilder()->CreateInsertValue(descPtr, getBuilder()->getInt32(1), 3); - } - return descPtr; - } - - // A sampler pointer is represented by a struct {pointer,stride,convertingSamplerIdx}, where - // convertingSamplerIdx is 0 or the 1-based converting sampler index. Here we use descriptorSet and binding - // to detect whether it is a converting sampler, and set up the converting sampler index. - unsigned convertingSamplerIdx = 0; - unsigned nextIdx = 1; - unsigned convertingSamplerDescriptorSet = descriptorSet; - if (getPipelineOptions()->getGlState().replaceSetWithResourceType && - descriptorSet == - PipelineContext::getGlResourceNodeSetFromType(Vkgc::ResourceMappingNodeType::DescriptorSampler)) { - // When using 'replaceSetWithResourceType' option (OGL default) it's not possible to match converting samplers - // for 'DescriptorResource' and 'DescriptorSampler' at the same time, which is needed to handle YCbCr formats. - // Converting sampler with YCbCr metadata has 'DescriptorResource' set assigned, hence looking for it instead. - convertingSamplerDescriptorSet = - PipelineContext::getGlResourceNodeSetFromType(Vkgc::ResourceMappingNodeType::DescriptorResource); - } - for (const ConvertingSampler &convertingSampler : m_convertingSamplers) { - if (convertingSampler.set == convertingSamplerDescriptorSet && convertingSampler.binding == binding) { - convertingSamplerIdx = nextIdx; - break; + if (convertingSamplerIdx == 0) { + Value *samplerPointer = getBuilder()->CreateGetDescPtr( + ResourceNodeType::DescriptorSampler, ResourceNodeType::DescriptorSampler, samplerDescSet, binding); + Value *samplerStride = getBuilder()->CreateGetDescStride( + ResourceNodeType::DescriptorSampler, ResourceNodeType::DescriptorSampler, samplerDescSet, binding); + result = getBuilder()->CreateInsertValue(result, samplerPointer, idxs.samplerPointer); + result = getBuilder()->CreateInsertValue(result, samplerStride, idxs.samplerStride); } - nextIdx += convertingSampler.values.size() / ConvertingSamplerDwordCount; + result = getBuilder()->CreateInsertValue(result, getBuilder()->getInt32(convertingSamplerIdx), + idxs.convertingSamplerIdx); } - Type *samplerPtrTy = StructType::get(*m_context, {getBuilder()->getDescPtrTy(ResourceNodeType::DescriptorSampler), - getBuilder()->getInt32Ty(), getBuilder()->getInt32Ty()}); - Value *samplerDescPtr = Constant::getNullValue(samplerPtrTy); - if (convertingSamplerIdx == 0) { - // Not a converting sampler. Get a normal sampler pointer and stride and put it in the struct. - samplerDescPtr = getBuilder()->CreateInsertValue( - samplerDescPtr, getBuilder()->CreateGetDescPtr(resType, searchType, descriptorSet, binding), 0); - samplerDescPtr = getBuilder()->CreateInsertValue( - samplerDescPtr, getBuilder()->CreateGetDescStride(resType, searchType, descriptorSet, binding), 1); - } else { - // It is a converting sampler. Return the struct with just the converting sampler index. - samplerDescPtr = getBuilder()->CreateInsertValue(samplerDescPtr, getBuilder()->getInt32(convertingSamplerIdx), 2); - } - return samplerDescPtr; + return result; } // ===================================================================================================================== @@ -3725,6 +3451,17 @@ SmallVector SPIRVToLLVM::transAccessChain(SPIRVValue *const spvValue) { llvm_unreachable("unhandled type in access chain"); } } + // Process GEP last access chain type + switch (spvAccessElementType->getOpCode()) { + case OpTypeRayQueryKHR: { + SmallVector args; + args.insert(args.end(), gepIndices.begin(), gepIndices.end()); + base = getBuilder()->create(basePointeeType, inBound, base, args); + gepIndices.erase(gepIndices.begin() + 1, gepIndices.end()); + basePointeeType = transType(spvAccessElementType); + break; + } + } Type *finalPointeeType = GetElementPtrInst::getIndexedType(basePointeeType, gepIndices); flushGep(); @@ -3745,7 +3482,7 @@ SmallVector SPIRVToLLVM::transAccessChain(SPIRVValue *const spvValue) { // 'proxyType' is the replaced type for struct/array type with image/sampler member. // In which, image/sampler member is replaced by int8 type, and non-image member is replaced by empty sturct. Type *proxyType = nullptr; - SPIRVTypeContext ctx(spvAccessType, 0, true, true, layout); + SPIRVTypeContext ctx(spvAccessType, 0, true, layout); auto it = m_imageTypeMap.find(ctx.asTuple()); if (it != m_imageTypeMap.end()) proxyType = it->second; @@ -3773,8 +3510,7 @@ SmallVector SPIRVToLLVM::transAccessChain(SPIRVValue *const spvValue) { elementWorklist.push_back(spvElementType->getStructMemberType(i)); } - Type *imageSamplerType = transType(spvElementType); - result.push_back(indexDescPtr(imageSamplerType, base, offset)); + result.push_back(indexDescPtr(spvElementType, base, offset)); } } @@ -3790,54 +3526,49 @@ template <> SmallVector SPIRVToLLVM::transValueMultiWithOpcode(elementTy); - if (structTy && !structTy->getElementType(structTy->getNumElements() - 1)->isIntegerTy()) { - // The element type is a struct containing two image/sampler elements. The cases where this happens are: - // 1. A sampledimage is a struct containing image and sampler. - // 2. An image that is multisampled is a struct containing image and fmask. - // In both cases, the pointer type is also a struct containing the corresponding two pointer-and-samples. - // Index them separately. - assert(structTy->getNumElements() == 2); - Value *ptr0 = getBuilder()->CreateExtractValue(base, 0); - Value *ptr1 = getBuilder()->CreateExtractValue(base, 1); - ptr0 = indexDescPtr(structTy->getElementType(0), ptr0, index); - ptr1 = indexDescPtr(structTy->getElementType(1), ptr1, index); - base = getBuilder()->CreateInsertValue(PoisonValue::get(base->getType()), ptr0, 0); - base = getBuilder()->CreateInsertValue(base, ptr1, 1); - return base; - } - - // A sampler pointer is represented by a {pointer,stride,convertingSamplerIdx} struct. If the converting sampler - // index is non-zero (i.e. it is actually a converting sampler), we also want to modify that index. That can only - // happen if there are any converting samplers at all. - if (!m_convertingSamplers.empty() && base->getType()->getStructNumElements() == 3) { - Value *convertingSamplerIdx = getBuilder()->CreateExtractValue(base, 2); - Value *modifiedIdx = getBuilder()->CreateAdd(convertingSamplerIdx, index); - Value *isConvertingSampler = getBuilder()->CreateICmpNE(convertingSamplerIdx, getBuilder()->getInt32(0)); - modifiedIdx = getBuilder()->CreateSelect(isConvertingSampler, modifiedIdx, getBuilder()->getInt32(0)); - base = getBuilder()->CreateInsertValue(base, modifiedIdx, 2); - } - - // The descriptor "pointer" is in fact a struct containing the pointer and stride. - Value *ptr = getBuilder()->CreateExtractValue(base, 0); - Value *stride = getBuilder()->CreateExtractValue(base, 1); +Value *SPIRVToLLVM::indexDescPtr(SPIRVType *spvElementTy, Value *base, Value *index) { + unsigned components = getImageTypeComponents(spvElementTy); + auto idxs = getImageTypeIndices(components); + index = getBuilder()->CreateZExtOrTrunc(index, getBuilder()->getInt32Ty()); - index = getBuilder()->CreateMul(index, stride); - // Do the indexing operation by GEPping as a byte pointer. - Type *ptrTy = ptr->getType(); - ptr = getBuilder()->CreateGEP(getBuilder()->getInt8Ty(), ptr, index); - ptr = getBuilder()->CreateBitCast(ptr, ptrTy); - base = getBuilder()->CreateInsertValue(base, ptr, 0); + if (components & ImageComponentImage) { + Value *pointer = getBuilder()->CreateExtractValue(base, idxs.imagePointer); + Value *stride = getBuilder()->CreateExtractValue(base, idxs.imageStride); + Value *offset = getBuilder()->CreateMul(index, stride); + pointer = getBuilder()->CreateGEP(getBuilder()->getInt8Ty(), pointer, offset); + base = getBuilder()->CreateInsertValue(base, pointer, idxs.imagePointer); + } + + if (components & ImageComponentFMask) { + Value *pointer = getBuilder()->CreateExtractValue(base, idxs.fmaskPointer); + Value *stride = getBuilder()->CreateExtractValue(base, idxs.fmaskStride); + Value *offset = getBuilder()->CreateMul(index, stride); + pointer = getBuilder()->CreateGEP(getBuilder()->getInt8Ty(), pointer, offset); + base = getBuilder()->CreateInsertValue(base, pointer, idxs.fmaskPointer); + } + + if (components & ImageComponentSampler) { + Value *pointer = getBuilder()->CreateExtractValue(base, idxs.samplerPointer); + Value *stride = getBuilder()->CreateExtractValue(base, idxs.samplerStride); + Value *offset = getBuilder()->CreateMul(index, stride); + pointer = getBuilder()->CreateGEP(getBuilder()->getInt8Ty(), pointer, offset); + base = getBuilder()->CreateInsertValue(base, pointer, idxs.samplerPointer); + + if (!m_convertingSamplers.empty()) { + Value *convertingSamplerIdx = getBuilder()->CreateExtractValue(base, idxs.convertingSamplerIdx); + Value *updated = getBuilder()->CreateAdd(convertingSamplerIdx, index); + Value *isConverting = getBuilder()->CreateICmpNE(convertingSamplerIdx, getBuilder()->getInt32(0)); + convertingSamplerIdx = getBuilder()->CreateSelect(isConverting, updated, getBuilder()->getInt32(0)); + base = getBuilder()->CreateInsertValue(base, convertingSamplerIdx, idxs.convertingSamplerIdx); + } + } return base; } @@ -3873,9 +3604,25 @@ SmallVector SPIRVToLLVM::transValueMultiWithOpcode Value *SPIRVToLLVM::transValueWithOpcode(SPIRVValue *const spvValue) { - Value *sampledImage = transValue(static_cast(spvValue)->getOpValue(0), - getBuilder()->GetInsertBlock()->getParent(), getBuilder()->GetInsertBlock()); - return getBuilder()->CreateExtractValue(sampledImage, uint64_t(0)); + SPIRVValue *spvSrc = static_cast(spvValue)->getOpValue(0); + Value *src = transValue(spvSrc, getBuilder()->GetInsertBlock()->getParent(), getBuilder()->GetInsertBlock()); + auto srcIdxs = getImageTypeIndices(getImageTypeComponents(spvSrc->getType())); + unsigned dstComponents = getImageTypeComponents(spvValue->getType()); + auto dstIdxs = getImageTypeIndices(dstComponents); + Value *result = PoisonValue::get(getImageTy(dstComponents)); + result = getBuilder()->CreateInsertValue(result, getBuilder()->CreateExtractValue(src, srcIdxs.imagePointer), + dstIdxs.imagePointer); + result = getBuilder()->CreateInsertValue(result, getBuilder()->CreateExtractValue(src, srcIdxs.imageStride), + dstIdxs.imageStride); + result = getBuilder()->CreateInsertValue(result, getBuilder()->CreateExtractValue(src, srcIdxs.imagePlaneStride), + dstIdxs.imagePlaneStride); + if (dstComponents & ImageComponentFMask) { + result = getBuilder()->CreateInsertValue(result, getBuilder()->CreateExtractValue(src, srcIdxs.fmaskPointer), + dstIdxs.fmaskPointer); + result = getBuilder()->CreateInsertValue(result, getBuilder()->CreateExtractValue(src, srcIdxs.fmaskStride), + dstIdxs.fmaskStride); + } + return result; } // ===================================================================================================================== @@ -3883,14 +3630,31 @@ template <> Value *SPIRVToLLVM::transValueWithOpcode(SPIRVValue *const // // @param spvValue : A SPIR-V value. template <> Value *SPIRVToLLVM::transValueWithOpcode(SPIRVValue *const spvValue) { - Value *image = transValue(static_cast(spvValue)->getOpValue(0), - getBuilder()->GetInsertBlock()->getParent(), getBuilder()->GetInsertBlock()); - Value *sampler = transValue(static_cast(spvValue)->getOpValue(1), - getBuilder()->GetInsertBlock()->getParent(), getBuilder()->GetInsertBlock()); - - Value *result = PoisonValue::get(StructType::get(*m_context, {image->getType(), sampler->getType()})); - result = getBuilder()->CreateInsertValue(result, image, uint64_t(0)); - result = getBuilder()->CreateInsertValue(result, sampler, 1); + SPIRVValue *spvImage = static_cast(spvValue)->getOpValue(0); + SPIRVValue *spvSampler = static_cast(spvValue)->getOpValue(1); + Value *image = transValue(spvImage, getBuilder()->GetInsertBlock()->getParent(), getBuilder()->GetInsertBlock()); + Value *sampler = transValue(spvSampler, getBuilder()->GetInsertBlock()->getParent(), getBuilder()->GetInsertBlock()); + auto imageIdxs = getImageTypeIndices(getImageTypeComponents(spvImage->getType())); + auto samplerIdxs = getImageTypeIndices(getImageTypeComponents(spvSampler->getType())); + unsigned dstComponents = getImageTypeComponents(spvValue->getType()); + auto dstIdxs = getImageTypeIndices(dstComponents); + Value *result = PoisonValue::get(getImageTy(dstComponents)); + result = getBuilder()->CreateInsertValue(result, getBuilder()->CreateExtractValue(image, imageIdxs.imagePointer), + dstIdxs.imagePointer); + result = getBuilder()->CreateInsertValue(result, getBuilder()->CreateExtractValue(image, imageIdxs.imageStride), + dstIdxs.imageStride); + result = getBuilder()->CreateInsertValue(result, getBuilder()->CreateExtractValue(image, imageIdxs.imagePlaneStride), + dstIdxs.imagePlaneStride); + if (dstComponents & ImageComponentFMask) { + result = getBuilder()->CreateInsertValue(result, getBuilder()->CreateExtractValue(image, imageIdxs.fmaskPointer), + dstIdxs.fmaskPointer); + result = getBuilder()->CreateInsertValue(result, getBuilder()->CreateExtractValue(image, imageIdxs.fmaskStride), + dstIdxs.fmaskStride); + } + result = getBuilder()->CreateInsertValue( + result, getBuilder()->CreateExtractValue(sampler, samplerIdxs.samplerPointer), dstIdxs.samplerPointer); + result = getBuilder()->CreateInsertValue(result, getBuilder()->CreateExtractValue(sampler, samplerIdxs.samplerStride), + dstIdxs.samplerStride); return result; } @@ -3931,6 +3695,22 @@ template <> Value *SPIRVToLLVM::transValueWithOpcode(SP return getBuilder()->CreateIsHelperInvocation(); } +// ===================================================================================================================== +// Handle OpBeginInvocationInterlockEXT. +// +// @param spvValue : A SPIR-V value. +template <> Value *SPIRVToLLVM::transValueWithOpcode(SPIRVValue *const spvValue) { + return getBuilder()->create(); +} + +// ===================================================================================================================== +// Handle OpEndInvocationInterlockEXT. +// +// @param spvValue : A SPIR-V value. +template <> Value *SPIRVToLLVM::transValueWithOpcode(SPIRVValue *const spvValue) { + return getBuilder()->create(); +} + // ===================================================================================================================== // Handle OpReadClockKHR. // @@ -4366,13 +4146,15 @@ Value *SPIRV::SPIRVToLLVM::createTraceRayDialectOp(SPIRVValue *const spvValue) { Value *const rayTMax = transValue(spvOperands[9], func, block); Value *const payload = transValue(spvOperands[10], func, block); + getRaytracingContext()->updateRayFlagsKnownBits(computeKnownBits(rayFlags, m_m->getDataLayout())); + auto accelStructAsI64 = getBuilder()->CreateBitCast(accelStruct, getBuilder()->getInt64Ty()); Type *payloadTy = transType(spvOperands[10]->getType()->getPointerElementType()); // Wrap payload with struct, PAQ handling expects a struct type. // FIXME: We should support non-struct types for PAQ - if (getRaytracingContext()->isContinuationsMode() && !payloadTy->isStructTy()) + if (!payloadTy->isStructTy()) payloadTy = StructType::get(*m_context, {payloadTy}, ""); auto paq = getPaqFromSize(getBuilder()->getContext(), alignTo(m_m->getDataLayout().getTypeAllocSize(payloadTy), 4)); @@ -4406,7 +4188,7 @@ template <> Value *SPIRVToLLVM::transValueWithOpcode(SPIRV // Wrap payload with struct, PAQ handling expects a struct type. // FIXME: We should support non-struct types for PAQ - if (getRaytracingContext()->isContinuationsMode() && !callableDataTy->isStructTy()) + if (!callableDataTy->isStructTy()) callableDataTy = StructType::get(*m_context, {callableDataTy}, ""); auto *call = getBuilder()->create(shaderIndex, callableData, dataByteSize); @@ -5109,7 +4891,7 @@ Value *SPIRVToLLVM::transVariableNonImage(SPIRVValue *const spvValue) { case OpTypeSampler: case OpTypeSampledImage: // Only translate image/sampler array type to record the m_imageTypeMap - transType(spvVarType, 0, true, true, layout); + transType(spvVarType, 0, true, layout); return nullptr; default: if (!isAccelerationStructureType(spvElementType)) @@ -5121,7 +4903,7 @@ Value *SPIRVToLLVM::transVariableNonImage(SPIRVValue *const spvValue) { Type *const ptrType = transType(spvVar->getType()); unsigned addrSpace = ptrType->getPointerAddressSpace(); - Type *const varType = transType(spvVarType, 0, true, true, layout); + Type *const varType = transType(spvVarType, 0, true, layout); SPIRVValue *const spvInitializer = spvVar->getInitializer(); Constant *initializer = nullptr; @@ -5219,8 +5001,8 @@ Value *SPIRVToLLVM::transVariableNonImage(SPIRVValue *const spvValue) { assert(bb->isEntryBlock()); getBuilder()->SetInsertPoint(bb, bb->getFirstInsertionPt()); auto allocAddr = m_m->getDataLayout().getAllocaAddrSpace(); - Value *const var = getBuilder()->CreateAlloca(varType, allocAddr, nullptr, spvVar->getName()); + Value *const var = getBuilder()->CreateAlloca(varType, allocAddr, nullptr, spvVar->getName()); getBuilder()->restoreIP(insertPoint); if (initializer) @@ -5284,6 +5066,26 @@ Value *SPIRVToLLVM::transVariableNonImage(SPIRVValue *const spvValue) { return globalVar; } +// ===================================================================================================================== +// find spirv type recursively +// +// @param spvTy : A SPIR-V type to search +// @param Op : Spirv type opcode +bool SPIRVToLLVM::hasSpirvType(SPIRVType *spvTy, Op ty) { + if (spvTy->getOpCode() == ty) + return true; + else if (spvTy->isTypeStruct()) { + for (unsigned i = 0; i < spvTy->getStructMemberCount(); ++i) { + if (hasSpirvType(spvTy->getStructMemberType(i), ty)) + return true; + } + return false; + } else if (spvTy->isTypeArray()) { + return hasSpirvType(spvTy->getArrayElementType(), ty); + } + return false; +}; + // ===================================================================================================================== // Handle OpTranspose. // @@ -5730,6 +5532,26 @@ template <> Value *SPIRVToLLVM::transValueWithOpcode Value *SPIRVToLLVM::transValueWithOpcode(SPIRVValue *const spvValue) { + SPIRVInstruction *const spvInst = static_cast(spvValue); + std::vector spvOperands = spvInst->getOperands(); + BasicBlock *const block = getBuilder()->GetInsertBlock(); + Function *const func = getBuilder()->GetInsertBlock()->getParent(); + Value *rayQuery = transValue(spvOperands[0], func, block); + Value *accStru = transValue(spvOperands[1], func, block); + accStru = getBuilder()->CreateBitCast(accStru, getBuilder()->getInt64Ty()); + Value *rayFlags = transValue(spvOperands[2], func, block); + Value *mask = transValue(spvOperands[3], func, block); + Value *origin = transValue(spvOperands[4], func, block); + Value *tmin = transValue(spvOperands[5], func, block); + Value *dir = transValue(spvOperands[6], func, block); + Value *tmax = transValue(spvOperands[7], func, block); + return getBuilder()->create(rayQuery, accStru, rayFlags, mask, origin, tmin, dir, tmax); +} + /// For instructions, this function assumes they are created in order /// and appended to the given basic block. An instruction may use a /// instruction from another BB which has not been translated. Such @@ -6356,7 +6178,8 @@ SmallVector SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *bv, Fu case OpControlBarrier: case OpMemoryBarrier: - return mapValue(bv, transBarrierFence(static_cast(bv), bb)); + transBarrierFence(static_cast(bv), bb); + return {}; case OpSNegate: { if (bv->getType()->isTypeCooperativeMatrixKHR()) { @@ -6914,6 +6737,10 @@ SmallVector SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *bv, Fu return mapValue(bv, transValueWithOpcode(bv)); case OpIsHelperInvocationEXT: return mapValue(bv, transValueWithOpcode(bv)); + case OpBeginInvocationInterlockEXT: + return mapValue(bv, transValueWithOpcode(bv)); + case OpEndInvocationInterlockEXT: + return mapValue(bv, transValueWithOpcode(bv)); case OpTraceRayKHR: return mapValue(bv, createTraceRayDialectOp(bv)); case OpExecuteCallableKHR: @@ -6945,6 +6772,127 @@ SmallVector SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *bv, Fu return mapValue(bv, transValueWithOpcode(bv)); case OpCooperativeMatrixMulAddKHR: return mapValue(bv, transValueWithOpcode(bv)); + case OpRayQueryInitializeKHR: + return mapValue(bv, transValueWithOpcode(bv)); + case OpRayQueryTerminateKHR: { + auto *bi = static_cast(bv); + return mapValue(bv, m_builder->create(transValue(bi->getOperands()[0], f, bb))); + } + case OpRayQueryGenerateIntersectionKHR: { + auto *bi = static_cast(bv); + auto query = transValue(bi->getOperands()[0], f, bb); + auto hitT = transValue(bi->getOperands()[1], f, bb); + return mapValue(bv, m_builder->create(query, hitT)); + } + case OpRayQueryConfirmIntersectionKHR: { + auto *bi = static_cast(bv); + return mapValue(bv, m_builder->create(transValue(bi->getOperands()[0], f, bb))); + } + case OpRayQueryProceedKHR: { + auto *bi = static_cast(bv); + return mapValue(bv, m_builder->create(transValue(bi->getOperands()[0], f, bb))); + } + case OpRayQueryGetIntersectionTypeKHR: { + auto bi = static_cast(bv); + bool committed = isRayQueryCommittedIntersection(bi->getOperands()[1]); + return mapValue(bv, m_builder->create(transValue(bi->getOperands()[0], f, bb), committed)); + } + case OpRayQueryGetRayTMinKHR: { + auto bi = static_cast(bv); + return mapValue(bv, m_builder->create(transValue(bi->getOperands()[0], f, bb))); + } + case OpRayQueryGetRayFlagsKHR: { + auto bi = static_cast(bv); + return mapValue(bv, m_builder->create(transValue(bi->getOperands()[0], f, bb))); + } + case OpRayQueryGetIntersectionTKHR: { + auto bi = static_cast(bv); + auto committed = isRayQueryCommittedIntersection(bi->getOperands()[1]); + return mapValue(bv, m_builder->create(transValue(bi->getOperands()[0], f, bb), committed)); + } + case OpRayQueryGetIntersectionInstanceCustomIndexKHR: { + auto bi = static_cast(bv); + auto committed = isRayQueryCommittedIntersection(bi->getOperands()[1]); + return mapValue(bv, + m_builder->create(transValue(bi->getOperands()[0], f, bb), committed)); + } + case OpRayQueryGetIntersectionInstanceIdKHR: { + auto bi = static_cast(bv); + auto committed = isRayQueryCommittedIntersection(bi->getOperands()[1]); + return mapValue(bv, + m_builder->create(transValue(bi->getOperands()[0], f, bb), committed)); + } + case OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR: { + auto bi = static_cast(bv); + auto committed = isRayQueryCommittedIntersection(bi->getOperands()[1]); + return mapValue(bv, m_builder->create( + transValue(bi->getOperands()[0], f, bb), committed)); + } + case OpRayQueryGetIntersectionGeometryIndexKHR: { + auto bi = static_cast(bv); + auto committed = isRayQueryCommittedIntersection(bi->getOperands()[1]); + return mapValue(bv, + m_builder->create(transValue(bi->getOperands()[0], f, bb), committed)); + } + case OpRayQueryGetIntersectionPrimitiveIndexKHR: { + auto bi = static_cast(bv); + auto committed = isRayQueryCommittedIntersection(bi->getOperands()[1]); + return mapValue( + bv, m_builder->create(transValue(bi->getOperands()[0], f, bb), committed)); + } + case OpRayQueryGetIntersectionBarycentricsKHR: { + auto bi = static_cast(bv); + auto committed = isRayQueryCommittedIntersection(bi->getOperands()[1]); + return mapValue(bv, + m_builder->create(transValue(bi->getOperands()[0], f, bb), committed)); + } + case OpRayQueryGetIntersectionFrontFaceKHR: { + auto bi = static_cast(bv); + auto committed = isRayQueryCommittedIntersection(bi->getOperands()[1]); + return mapValue(bv, m_builder->create(transValue(bi->getOperands()[0], f, bb), committed)); + } + case OpRayQueryGetIntersectionCandidateAABBOpaqueKHR: { + auto bi = static_cast(bv); + return mapValue(bv, m_builder->create(transValue(bi->getOperands()[0], f, bb))); + } + case OpRayQueryGetIntersectionObjectRayDirectionKHR: { + auto bi = static_cast(bv); + auto committed = isRayQueryCommittedIntersection(bi->getOperands()[1]); + return mapValue( + bv, m_builder->create(transValue(bi->getOperands()[0], f, bb), committed)); + } + case OpRayQueryGetIntersectionObjectRayOriginKHR: { + auto bi = static_cast(bv); + auto committed = isRayQueryCommittedIntersection(bi->getOperands()[1]); + return mapValue( + bv, m_builder->create(transValue(bi->getOperands()[0], f, bb), committed)); + } + case OpRayQueryGetWorldRayDirectionKHR: { + auto bi = static_cast(bv); + return mapValue(bv, m_builder->create(transValue(bi->getOperands()[0], f, bb))); + } + case OpRayQueryGetWorldRayOriginKHR: { + auto bi = static_cast(bv); + return mapValue(bv, m_builder->create(transValue(bi->getOperands()[0], f, bb))); + } + case OpRayQueryGetIntersectionObjectToWorldKHR: { + auto bi = static_cast(bv); + auto committed = isRayQueryCommittedIntersection(bi->getOperands()[1]); + return mapValue(bv, + m_builder->create(transValue(bi->getOperands()[0], f, bb), committed)); + } + case OpRayQueryGetIntersectionWorldToObjectKHR: { + auto bi = static_cast(bv); + auto committed = isRayQueryCommittedIntersection(bi->getOperands()[1]); + return mapValue(bv, + m_builder->create(transValue(bi->getOperands()[0], f, bb), committed)); + } + case OpRayQueryGetIntersectionTriangleVertexPositionsKHR: { + auto bi = static_cast(bv); + auto rayquery = transValue(bi->getOperands()[0], f, bb); + auto committed = isRayQueryCommittedIntersection(bi->getOperands()[1]); + return mapValue(bv, m_builder->create(rayquery, committed)); + } default: { auto oc = bv->getOpCode(); if (isCmpOpCode(oc)) @@ -7028,6 +6976,7 @@ Function *SPIRVToLLVM::transFunction(SPIRVFunction *bf) { if (isFuncNoUnwind()) f->addFnAttr(Attribute::NoUnwind); foreachFuncCtlMask(bf, [&](Attribute::AttrKind attr) { f->addFnAttr(attr); }); + f->addFnAttr(Attribute::AlwaysInline); } for (Function::arg_iterator i = f->arg_begin(), e = f->arg_end(); i != e; ++i) { @@ -7082,23 +7031,22 @@ Function *SPIRVToLLVM::transFunction(SPIRVFunction *bf) { m_blockPredecessorToCount.erase(f); - auto getContArgTy = [&](SPIRVType *argTy) { - if (argTy->isTypePointer()) { - auto storageClass = argTy->getPointerStorageClass(); - const unsigned addrSpace = SPIRSPIRVAddrSpaceMap::rmap(storageClass); + auto getContArgTy = [&](Type *irTy, SPIRVType *argTy) { + if (isa(irTy)) { Type *pointeeType = transType(argTy->getPointerElementType()); - Type *ptrTy = PointerType::get(*m_context, addrSpace); - return ContArgTy(ptrTy, pointeeType); + return TypedArgTy(irTy, pointeeType); } - return ContArgTy(transType(argTy)); + return TypedArgTy(irTy); }; - SmallVector argTys; + SmallVector argTys; for (unsigned i = 0; i < bf->getNumArguments(); ++i) { + Type *irArgTy = f->getArg(i)->getType(); auto argTy = bf->getArgument(i)->getType(); - argTys.push_back(getContArgTy(argTy)); + argTys.push_back(getContArgTy(irArgTy, argTy)); } - ContFuncTy funcTys(getContArgTy(bf->getType()), argTys); + Type *irRetTy = f->getFunctionType()->getReturnType(); + TypedFuncTy funcTys(getContArgTy(irRetTy, bf->getType()), argTys); funcTys.writeMetadata(f); return f; @@ -7251,149 +7199,197 @@ static unsigned convertDimension(const SPIRVTypeImageDescriptor *desc) { } // ============================================================================= -// Get image and/or sampler descriptors, and get information from the image -// type. -void SPIRVToLLVM::getImageDesc(SPIRVValue *bImageInst, ExtractedImageInfo *info) { - auto setEnforceReadFirstLaneFlag = [&](SPIRVValue *accessChainInst, bool isImage) { - if (!accessChainInst || !isAccessChainOpCode(accessChainInst->getOpCode())) - return; - SPIRVAccessChainBase *const spvAccessChain = static_cast(accessChainInst); - std::vector spvIndicesVec = spvAccessChain->getIndices(); - // Check if any index is not a constant, set the flag true - bool enforceReadFirstlane = false; - for (auto idxIt : spvIndicesVec) { - if (idxIt->getOpCode() != OpConstant) { - enforceReadFirstlane = true; - break; - } +// Scan backwards from an image/sampler or pointer-to-image/sampler value and set non-uniform/coherent/volatile flags. +static void scanImageDescNonUniformCV(SPIRVToLLVM::ExtractedImageInfo *info, SPIRVValue *spvValue, bool image, + bool sampler) { + for (;;) { + if (image) { + if (spvValue->hasDecorate(DecorationCoherent)) + info->flags |= lgc::Builder::ImageFlagCoherent; + if (spvValue->hasDecorate(DecorationVolatile)) + info->flags |= lgc::Builder::ImageFlagVolatile; } - if (enforceReadFirstlane) - info->flags |= isImage ? lgc::Builder::ImageFlagEnforceReadFirstLaneImage - : lgc::Builder::ImageFlagEnforceReadFirstLaneSampler; - }; - bool forceNonUniform = isShaderStageInMask(convertToShaderStage(m_execModule), - getPipelineOptions()->forceNonUniformResourceIndexStageMask); + const auto opcode = spvValue->getOpCode(); - if (forceNonUniform || bImageInst->hasDecorate(DecorationNonUniformEXT)) { - info->flags |= lgc::Builder::ImageFlagNonUniformImage; - if (bImageInst->getType()->getOpCode() == OpTypeSampledImage) - info->flags |= lgc::Builder::ImageFlagNonUniformSampler; + // Section 2.16.1 ("Universal Validation Rules") of the SPIR-V specification (version 1.6) says: + // + // "Image, sampler, and sampled image objects must not appear as operands to OpPhi instructions, or OpSelect + // instructions, or any instructions other than the image or sampler instructions specified to operate on them." + // + // However, we have some legacy workloads in our database which break this rule. We're doing a best-effort + // treatment here because it is easy to do so in our design. + bool isPhiOrSelect = opcode == OpPhi || opcode == OpSelect; + if (spvValue->hasDecorate(DecorationNonUniformEXT) || isPhiOrSelect) { + if (image) + info->flags |= lgc::Builder::ImageFlagNonUniformImage; + if (sampler) + info->flags |= lgc::Builder::ImageFlagNonUniformSampler; + } + if (isPhiOrSelect) + break; + + if (opcode == OpCopyObject || opcode == OpCopyLogical) { + spvValue = static_cast(spvValue)->getOperand(); + continue; + } + + if (opcode == OpImage) { + assert(!sampler); + spvValue = static_cast(spvValue)->getOperand(0); + continue; + } + if (opcode == OpSampledImage) { + auto *sampledImage = static_cast(spvValue); + if (image) + scanImageDescNonUniformCV(info, sampledImage->getOperands()[0], true, false); + if (sampler) + scanImageDescNonUniformCV(info, sampledImage->getOperands()[1], false, true); + break; + } + + if (opcode == OpLoad) { + spvValue = static_cast(spvValue)->getSrc(); + continue; + } + + bool isAccessChain = opcode == OpAccessChain || opcode == OpInBoundsAccessChain; + if (isAccessChain) { + spvValue = static_cast(spvValue)->getOperands()[0]; + continue; + } + + bool isObject = opcode == OpVariable || opcode == OpFunctionParameter; + if (isObject) { + if (image && !spvValue->hasDecorate(DecorationAliased)) + info->flags |= lgc::Builder::ImageFlagNotAliased; + break; + } + + if (opcode == OpBitcast || opcode == OpUndef) + break; + + llvm_unreachable("unhandled image/sampler definition"); } +} - if (bImageInst->getOpCode() == OpImageTexelPointer) { - // We are looking at the OpImageTexelPointer for an image atomic. Load the - // image descriptor from its image pointer. - SPIRVValue *bImagePtr = static_cast(bImageInst)->getImage(); - info->desc = &static_cast(bImagePtr->getType()->getPointerElementType())->getDescriptor(); - info->dim = convertDimension(info->desc); - info->imageDesc = transLoadImage(bImagePtr); - if (isa(info->imageDesc->getType())) { - // Extract image descriptor from struct containing image+fmask descs. - info->imageDesc = getBuilder()->CreateExtractValue(info->imageDesc, uint64_t(0)); - } - if (isa(info->imageDesc->getType())) { - // Extract image descriptor from possible array of multi-plane image descriptors. - info->imageDesc = getBuilder()->CreateExtractValue(info->imageDesc, 0); - } - // We also need to trace back to the OpVariable/OpUntypedVariableKHR or OpFunctionParam to find - // the coherent and volatile decorations. - SPIRVValue *imageAccessChain = nullptr; - while (bImagePtr->getOpCode() == OpAccessChain || bImagePtr->getOpCode() == OpInBoundsAccessChain) { - std::vector operands = static_cast(bImagePtr)->getOperands(); - for (SPIRVValue *operand : operands) { - if (forceNonUniform || operand->hasDecorate(DecorationNonUniformEXT)) - info->flags |= lgc::Builder::ImageFlagNonUniformImage; +// ============================================================================= +// Scan backwards from an image/sampler or pointer-to-image/sampler value and set the force readfirstlane flag +// if a non-constant index is found. +static void scanImageDescForceReadFirstLane(SPIRVToLLVM::ExtractedImageInfo *info, SPIRVValue *spvValue, bool image, + bool sampler) { + for (;;) { + const auto opcode = spvValue->getOpCode(); + + if (opcode == OpCopyObject || opcode == OpCopyLogical) { + spvValue = static_cast(spvValue)->getOperand(); + continue; + } + + if (opcode == OpImage) { + assert(!sampler); + spvValue = static_cast(spvValue)->getOperand(0); + continue; + } + if (opcode == OpSampledImage) { + auto *sampledImage = static_cast(spvValue); + if (image) + scanImageDescForceReadFirstLane(info, sampledImage->getOperands()[0], true, false); + if (sampler) + scanImageDescForceReadFirstLane(info, sampledImage->getOperands()[1], false, true); + break; + } + + if (opcode == OpLoad) { + spvValue = static_cast(spvValue)->getSrc(); + continue; + } + + bool isAccessChain = opcode == OpAccessChain || opcode == OpInBoundsAccessChain; + if (isAccessChain) { + auto *spvAccessChain = static_cast(spvValue); + std::vector spvIndicesVec = spvAccessChain->getIndices(); + + // Check if any index is not a constant, set the flag true + for (auto idxIt : spvIndicesVec) { + if (idxIt->getOpCode() != OpConstant) { + if (image) + info->flags |= lgc::Builder::ImageFlagEnforceReadFirstLaneImage; + if (sampler) + info->flags |= lgc::Builder::ImageFlagEnforceReadFirstLaneSampler; + return; + } } - imageAccessChain = bImagePtr; - bImagePtr = operands[0]; + + spvValue = static_cast(spvValue)->getOperands()[0]; + continue; } - assert(bImagePtr->getOpCode() == OpVariable || bImagePtr->getOpCode() == OpFunctionParameter); - if (bImageInst->hasDecorate(DecorationCoherent)) - info->flags |= lgc::Builder::ImageFlagCoherent; - if (bImageInst->hasDecorate(DecorationVolatile)) - info->flags |= lgc::Builder::ImageFlagVolatile; - // Set enforce readfirstlane flag for accessing image array - if ((info->flags & lgc::Builder::ImageFlagNonUniformImage) == 0) - setEnforceReadFirstLaneFlag(imageAccessChain, true); + bool isObject = opcode == OpVariable || opcode == OpFunctionParameter; + if (isObject) + break; - return; + if (opcode == OpBitcast || opcode == OpUndef) + break; + + llvm_unreachable("unhandled image/sampler definition"); } +} - SPIRVValue *imageLoadSrc = nullptr; - SPIRVValue *samplerLoadSrc = nullptr; - if (bImageInst->getOpCode() == OpLoad) { - SPIRVLoad *load = static_cast(bImageInst); - SPIRVValue *const loadSrc = load->getSrc(); +// ============================================================================= +// Get image and/or sampler descriptors, and get information from the image +// type. +void SPIRVToLLVM::getImageDesc(SPIRVValue *bImageInst, ExtractedImageInfo *info) { + // Get the descriptor(s). + SPIRVType *spvImageType = nullptr; - if (loadSrc->isCoherent()) - info->flags |= lgc::Builder::ImageFlagCoherent; - if (loadSrc->isVolatile()) - info->flags |= lgc::Builder::ImageFlagVolatile; - if (load->getType()->getOpCode() == OpTypeSampledImage) { - imageLoadSrc = loadSrc; - samplerLoadSrc = loadSrc; - } else { - imageLoadSrc = loadSrc; - } + if (bImageInst->getOpCode() == OpImageTexelPointer) { + bImageInst = static_cast(bImageInst)->getImage(); + spvImageType = static_cast(bImageInst->getType()->getPointerElementType()); + } else { + spvImageType = static_cast(bImageInst->getType()); } - // We need to scan back through OpImage/OpSampledImage just to find any - // NonUniform decoration. - SPIRVValue *scanBackInst = bImageInst; - while (scanBackInst->getOpCode() == OpImage || scanBackInst->getOpCode() == OpSampledImage) { - if (scanBackInst->getOpCode() == OpSampledImage) { - auto sampler = static_cast(scanBackInst)->getOpValue(1); - if (forceNonUniform || sampler->hasDecorate(DecorationNonUniformEXT)) - info->flags |= lgc::Builder::ImageFlagNonUniformSampler; - if (sampler->getOpCode() == OpLoad) - samplerLoadSrc = static_cast(sampler)->getSrc(); - } - scanBackInst = static_cast(scanBackInst)->getOpValue(0); - if (forceNonUniform || scanBackInst->hasDecorate(DecorationNonUniformEXT)) - info->flags |= lgc::Builder::ImageFlagNonUniformImage; - if (scanBackInst->getOpCode() == OpLoad) - imageLoadSrc = static_cast(scanBackInst)->getSrc(); - } - // Set enforce readfirstlane flag for accessing image or sampled image array - if ((info->flags & lgc::Builder::ImageFlagNonUniformImage) == 0) - setEnforceReadFirstLaneFlag(imageLoadSrc, true); - if ((info->flags & lgc::Builder::ImageFlagNonUniformSampler) == 0) - setEnforceReadFirstLaneFlag(samplerLoadSrc, false); - - if (imageLoadSrc && (imageLoadSrc->getOpCode() == OpVariable || imageLoadSrc->getOpCode() == OpFunctionParameter) && - !imageLoadSrc->hasDecorate(DecorationAliased)) - info->flags |= lgc::Builder::ImageFlagNotAliased; - - // Get the IR value for the image/sampledimage. - Value *desc = transValue(bImageInst, getBuilder()->GetInsertBlock()->getParent(), getBuilder()->GetInsertBlock()); - - SPIRVType *bImageTy = bImageInst->getType(); - if (bImageTy->getOpCode() == OpTypeSampledImage) { - // For a sampledimage, the IR value is a struct containing the image and the - // sampler. - info->samplerDesc = getBuilder()->CreateExtractValue(desc, 1); - desc = getBuilder()->CreateExtractValue(desc, uint64_t(0)); - bImageTy = static_cast(bImageTy)->getImageType(); - } - assert(bImageTy->getOpCode() == OpTypeImage); - info->desc = &static_cast(bImageTy)->getDescriptor(); + if (spvImageType->getOpCode() == OpTypeImage) { + info->desc = &static_cast(spvImageType)->getDescriptor(); + } else { + assert(spvImageType->getOpCode() == OpTypeSampledImage); + info->desc = &static_cast(spvImageType)->getImageType()->getDescriptor(); + } info->dim = convertDimension(info->desc); - if (info->desc->MS) { - // For a multisampled image, the IR value is a struct containing the image - // descriptor and the fmask descriptor. - info->fmaskDesc = getBuilder()->CreateExtractValue(desc, 1); - desc = getBuilder()->CreateExtractValue(desc, uint64_t(0)); + unsigned components = getImageTypeComponents(spvImageType); + const auto idxs = getImageTypeIndices(components); + Value *image = transValue(bImageInst, getBuilder()->GetInsertBlock()->getParent(), getBuilder()->GetInsertBlock()); + + if (components & ImageComponentImage) { + info->imagePointer = getBuilder()->CreateExtractValue(image, idxs.imagePointer); + info->imagePlaneStride = getBuilder()->CreateExtractValue(image, idxs.imagePlaneStride); } - // desc might be an array of multi-plane descriptors (for YCbCrSampler conversion). - info->imageDescArray = desc; - if (isa(desc->getType())) - desc = getBuilder()->CreateExtractValue(desc, 0); + if (components & ImageComponentFMask) + info->fmaskPointer = getBuilder()->CreateExtractValue(image, idxs.fmaskPointer); + + if (components & ImageComponentSampler) { + info->samplerPointer = getBuilder()->CreateExtractValue(image, idxs.samplerPointer); + info->convertingSamplerIdx = getBuilder()->CreateExtractValue(image, idxs.convertingSamplerIdx); + } - info->imageDesc = desc; + // Analyze the data flow for coheren/volatile/(non-)uniformness. + bool forceNonUniform = isShaderStageInMask(convertToShaderStage(m_execModule), + getPipelineOptions()->forceNonUniformResourceIndexStageMask); + if (forceNonUniform) { + info->flags |= lgc::Builder::ImageFlagNonUniformImage; + if (components & ImageComponentSampler) + info->flags |= lgc::Builder::ImageFlagNonUniformSampler; + } + + scanImageDescNonUniformCV(info, bImageInst, components & ImageComponentImage, components & ImageComponentSampler); + bool imageUniform = (components & ImageComponentImage) && !(info->flags & lgc::Builder::ImageFlagNonUniformImage); + bool samplerUniform = + (components & ImageComponentSampler) && !(info->flags & lgc::Builder::ImageFlagNonUniformSampler); + if (imageUniform || samplerUniform) + scanImageDescForceReadFirstLane(info, bImageInst, imageUniform, samplerUniform); } // ============================================================================= @@ -7656,7 +7652,7 @@ void SPIRVToLLVM::handleImageFetchReadWriteCoord(SPIRVInstruction *bi, Extracted Value *SPIRVToLLVM::transSPIRVFragmentFetchFromInst(SPIRVInstruction *bi, BasicBlock *bb) { // Get image type descriptor and load resource descriptor. - ExtractedImageInfo imageInfo = {bb}; + ExtractedImageInfo imageInfo; auto bii = static_cast(bi); getImageDesc(bii->getOpValue(0), &imageInfo); @@ -7687,7 +7683,8 @@ Value *SPIRVToLLVM::transSPIRVFragmentFetchFromInst(SPIRVInstruction *bi, BasicB Type *resultTy = transType(bii->getType()); // Create the image load. - return getBuilder()->CreateImageLoad(resultTy, imageInfo.dim, imageInfo.flags, imageInfo.imageDesc, coord, nullptr); + return getBuilder()->CreateImageLoad(resultTy, imageInfo.dim, imageInfo.flags, imageInfo.imagePointer, coord, + nullptr); } // ============================================================================= @@ -7696,7 +7693,7 @@ Value *SPIRVToLLVM::transSPIRVFragmentMaskFetchFromInst(SPIRVInstruction *bi, Ba if (getPipelineOptions()->shadowDescriptorTableUsage != Vkgc::ShadowDescriptorTableUsage::Disable) { // Get image type descriptor and fmask descriptor. - ExtractedImageInfo imageInfo = {bb}; + ExtractedImageInfo imageInfo; auto bii = static_cast(bi); getImageDesc(bii->getOpValue(0), &imageInfo); @@ -7720,7 +7717,7 @@ Value *SPIRVToLLVM::transSPIRVFragmentMaskFetchFromInst(SPIRVInstruction *bi, Ba // Create the image load. Value *result = - getBuilder()->CreateImageLoad(resultTy, imageInfo.dim, imageInfo.flags, imageInfo.fmaskDesc, coord, nullptr); + getBuilder()->CreateImageLoad(resultTy, imageInfo.dim, imageInfo.flags, imageInfo.fmaskPointer, coord, nullptr); return getBuilder()->CreateExtractElement(result, uint64_t(0)); } @@ -7756,7 +7753,7 @@ Value *SPIRVToLLVM::transSPIRVImageAtomicOpFromInst(SPIRVInstruction *bi, BasicB comparator = transValue(bit->getOpValue(opndIdx++), bb->getParent(), bb); // Get image type descriptor and load resource descriptor. - ExtractedImageInfo imageInfo = {bb}; + ExtractedImageInfo imageInfo = {}; getImageDesc(pointerBi, &imageInfo); // Set up address arguments. @@ -7809,8 +7806,8 @@ Value *SPIRVToLLVM::transSPIRVImageAtomicOpFromInst(SPIRVInstruction *bi, BasicB Value *result = nullptr; switch (bi->getOpCode()) { case OpAtomicCompareExchange: - result = getBuilder()->CreateImageAtomicCompareSwap(imageInfo.dim, imageInfo.flags, ordering, imageInfo.imageDesc, - coord, inputData, comparator); + result = getBuilder()->CreateImageAtomicCompareSwap(imageInfo.dim, imageInfo.flags, ordering, + imageInfo.imagePointer, coord, inputData, comparator); break; case OpAtomicStore: @@ -7871,7 +7868,7 @@ Value *SPIRVToLLVM::transSPIRVImageAtomicOpFromInst(SPIRVInstruction *bi, BasicB } if (!result) { - result = getBuilder()->CreateImageAtomic(atomicOp, imageInfo.dim, imageInfo.flags, ordering, imageInfo.imageDesc, + result = getBuilder()->CreateImageAtomic(atomicOp, imageInfo.dim, imageInfo.flags, ordering, imageInfo.imagePointer, coord, inputData); } if (bi->getOpCode() == OpAtomicLoad && bi->getType()->isTypeFloat()) @@ -7920,7 +7917,7 @@ Value *SPIRVToLLVM::ConvertingSamplerSelectLadderHelper(Value *result, Value *co // Translate image sample to LLVM IR Value *SPIRVToLLVM::transSPIRVImageSampleFromInst(SPIRVInstruction *bi, BasicBlock *bb) { // Get image type descriptor and load resource and sampler descriptors. - ExtractedImageInfo imageInfo = {bb}; + ExtractedImageInfo imageInfo; auto bii = static_cast(bi); getImageDesc(bii->getOpValue(0), &imageInfo); @@ -7975,17 +7972,22 @@ Value *SPIRVToLLVM::transSPIRVImageSampleFromInst(SPIRVInstruction *bi, BasicBlo setupImageAddressOperands(bii, opndIdx, hasProj, addr, &imageInfo, nullptr); // First do a normal image sample, extracting the sampler from the {sampler,convertingSamplerIdx} struct. - Value *samplerDesc = getBuilder()->CreateExtractValue(imageInfo.samplerDesc, 0); - Value *result = - getBuilder()->CreateImageSample(resultTy, imageInfo.dim, imageInfo.flags, imageInfo.imageDesc, samplerDesc, addr); + Value *result = getBuilder()->CreateImageSample(resultTy, imageInfo.dim, imageInfo.flags, imageInfo.imagePointer, + imageInfo.samplerPointer, addr); if (!m_convertingSamplers.empty()) { + Value *planes = PoisonValue::get(ArrayType::get(getBuilder()->getDescPtrTy(), 3)); + for (unsigned i = 0; i < 3; ++i) { + Value *offset = getBuilder()->CreateMul(imageInfo.imagePlaneStride, getBuilder()->getInt32(i)); + Value *plane = getBuilder()->CreateGEP(getBuilder()->getInt8Ty(), imageInfo.imagePointer, offset); + planes = getBuilder()->CreateInsertValue(planes, plane, i); + } + auto createImageSampleConvert = [&](Value *samplerDescIn) -> Value * { - return getBuilder()->CreateImageSampleConvert(resultTy, imageInfo.dim, imageInfo.flags, imageInfo.imageDescArray, - samplerDescIn, addr); + return getBuilder()->CreateImageSampleConvert(resultTy, imageInfo.dim, imageInfo.flags, planes, samplerDescIn, + addr); }; - Value *convertingSamplerIdx = getBuilder()->CreateExtractValue(imageInfo.samplerDesc, 1); - result = ConvertingSamplerSelectLadderHelper(result, convertingSamplerIdx, createImageSampleConvert); + result = ConvertingSamplerSelectLadderHelper(result, imageInfo.convertingSamplerIdx, createImageSampleConvert); } // For a sparse sample, swap the struct elements back again. @@ -8003,7 +8005,7 @@ Value *SPIRVToLLVM::transSPIRVImageSampleFromInst(SPIRVInstruction *bi, BasicBlo // Translate image gather to LLVM IR Value *SPIRVToLLVM::transSPIRVImageGatherFromInst(SPIRVInstruction *bi, BasicBlock *bb) { // Get image type descriptor and load resource and sampler descriptors. - ExtractedImageInfo imageInfo = {bb}; + ExtractedImageInfo imageInfo; auto bii = static_cast(bi); getImageDesc(bii->getOpValue(0), &imageInfo); @@ -8060,9 +8062,6 @@ Value *SPIRVToLLVM::transSPIRVImageGatherFromInst(SPIRVInstruction *bi, BasicBlo } } - // A sampler descriptor is encoded as {desc,convertingSamplerIdx}. Extract the actual sampler. - Value *samplerDesc = getBuilder()->CreateExtractValue(imageInfo.samplerDesc, 0); - Value *result = nullptr; if (constOffsets) { // A gather with non-standard offsets is done as four separate gathers. If @@ -8074,7 +8073,7 @@ Value *SPIRVToLLVM::transSPIRVImageGatherFromInst(SPIRVInstruction *bi, BasicBlo for (int idx = 3; idx >= 0; --idx) { addr[lgc::Builder::ImageAddressIdxOffset] = getBuilder()->CreateExtractValue(constOffsets, idx); Value *singleResult = getBuilder()->CreateImageGather(resultTy, imageInfo.dim, imageInfo.flags, - imageInfo.imageDesc, samplerDesc, addr); + imageInfo.imagePointer, imageInfo.samplerPointer, addr); if (resultTy != origResultTy) { // Handle sparse. residency = getBuilder()->CreateExtractValue(singleResult, 1); @@ -8091,8 +8090,8 @@ Value *SPIRVToLLVM::transSPIRVImageGatherFromInst(SPIRVInstruction *bi, BasicBlo } // Create the image gather call. - result = - getBuilder()->CreateImageGather(resultTy, imageInfo.dim, imageInfo.flags, imageInfo.imageDesc, samplerDesc, addr); + result = getBuilder()->CreateImageGather(resultTy, imageInfo.dim, imageInfo.flags, imageInfo.imagePointer, + imageInfo.samplerPointer, addr); // For a sparse gather, swap the struct elements back again. if (resultTy != origResultTy) { @@ -8109,7 +8108,7 @@ Value *SPIRVToLLVM::transSPIRVImageGatherFromInst(SPIRVInstruction *bi, BasicBlo // Translate image fetch/read to LLVM IR Value *SPIRVToLLVM::transSPIRVImageFetchReadFromInst(SPIRVInstruction *bi, BasicBlock *bb) { // Get image type descriptor and load resource descriptor. - ExtractedImageInfo imageInfo = {bb}; + ExtractedImageInfo imageInfo; auto bii = static_cast(bi); getImageDesc(bii->getOpValue(0), &imageInfo); @@ -8148,8 +8147,8 @@ Value *SPIRVToLLVM::transSPIRVImageFetchReadFromInst(SPIRVInstruction *bi, Basic // This is an OpImageFetch with sample, or an OpImageRead with sample and // subpass data dimension. We need to use the fmask variant of the builder // method. First we need to get the fmask descriptor. - result = getBuilder()->CreateImageLoadWithFmask(resultTy, imageInfo.dim, imageInfo.flags, imageInfo.imageDesc, - imageInfo.fmaskDesc, coord, sampleNum); + result = getBuilder()->CreateImageLoadWithFmask(resultTy, imageInfo.dim, imageInfo.flags, imageInfo.imagePointer, + imageInfo.fmaskPointer, coord, sampleNum); } else { // This is an OpImageRead with sample but not subpass data dimension. // Append the sample onto the coordinate. @@ -8164,7 +8163,8 @@ Value *SPIRVToLLVM::transSPIRVImageFetchReadFromInst(SPIRVInstruction *bi, Basic if (!result) { // We did not do the "load with fmask" above. Do the normal image load now. Value *lod = addr[lgc::Builder::ImageAddressIdxLod]; - result = getBuilder()->CreateImageLoad(resultTy, imageInfo.dim, imageInfo.flags, imageInfo.imageDesc, coord, lod); + result = + getBuilder()->CreateImageLoad(resultTy, imageInfo.dim, imageInfo.flags, imageInfo.imagePointer, coord, lod); } // For a sparse read/fetch, swap the struct elements back again. @@ -8182,7 +8182,7 @@ Value *SPIRVToLLVM::transSPIRVImageFetchReadFromInst(SPIRVInstruction *bi, Basic // Translate image write to LLVM IR Value *SPIRVToLLVM::transSPIRVImageWriteFromInst(SPIRVInstruction *bi, BasicBlock *bb) { // Get image type descriptor and load resource descriptor. - ExtractedImageInfo imageInfo = {bb}; + ExtractedImageInfo imageInfo; auto bii = static_cast(bi); getImageDesc(bii->getOpValue(0), &imageInfo); @@ -8230,38 +8230,38 @@ Value *SPIRVToLLVM::transSPIRVImageWriteFromInst(SPIRVInstruction *bi, BasicBloc // Do the image store. Value *lod = addr[lgc::Builder::ImageAddressIdxLod]; - return getBuilder()->CreateImageStore(texel, imageInfo.dim, imageInfo.flags, imageInfo.imageDesc, coord, lod); + return getBuilder()->CreateImageStore(texel, imageInfo.dim, imageInfo.flags, imageInfo.imagePointer, coord, lod); } // ============================================================================= // Translate OpImageQueryLevels to LLVM IR Value *SPIRVToLLVM::transSPIRVImageQueryLevelsFromInst(SPIRVInstruction *bi, BasicBlock *bb) { // Get image type descriptor and load resource descriptor. - ExtractedImageInfo imageInfo = {bb}; + ExtractedImageInfo imageInfo; auto bii = static_cast(bi); getImageDesc(bii->getOpValue(0), &imageInfo); // Generate the operation. - return getBuilder()->CreateImageQueryLevels(imageInfo.dim, imageInfo.flags, imageInfo.imageDesc); + return getBuilder()->CreateImageQueryLevels(imageInfo.dim, imageInfo.flags, imageInfo.imagePointer); } // ============================================================================= // Translate OpImageQuerySamples to LLVM IR Value *SPIRVToLLVM::transSPIRVImageQuerySamplesFromInst(SPIRVInstruction *bi, BasicBlock *bb) { // Get image type descriptor and load resource descriptor. - ExtractedImageInfo imageInfo = {bb}; + ExtractedImageInfo imageInfo; auto bii = static_cast(bi); getImageDesc(bii->getOpValue(0), &imageInfo); // Generate the operation. - return getBuilder()->CreateImageQuerySamples(imageInfo.dim, imageInfo.flags, imageInfo.imageDesc); + return getBuilder()->CreateImageQuerySamples(imageInfo.dim, imageInfo.flags, imageInfo.imagePointer); } // ============================================================================= // Translate OpImageQuerySize/OpImageQuerySizeLod to LLVM IR Value *SPIRVToLLVM::transSPIRVImageQuerySizeFromInst(SPIRVInstruction *bi, BasicBlock *bb) { // Get image type descriptor and load resource descriptor. - ExtractedImageInfo imageInfo = {bb}; + ExtractedImageInfo imageInfo; auto bii = static_cast(bi); getImageDesc(bii->getOpValue(0), &imageInfo); @@ -8269,31 +8269,28 @@ Value *SPIRVToLLVM::transSPIRVImageQuerySizeFromInst(SPIRVInstruction *bi, Basic Value *lod = getBuilder()->getInt32(0); if (bii->getOpCode() == OpImageQuerySizeLod) lod = transValue(bii->getOpValue(1), bb->getParent(), bb); - return getBuilder()->CreateImageQuerySize(imageInfo.dim, imageInfo.flags, imageInfo.imageDesc, lod); + return getBuilder()->CreateImageQuerySize(imageInfo.dim, imageInfo.flags, imageInfo.imagePointer, lod); } // ============================================================================= // Translate OpImageQueryLod to LLVM IR Value *SPIRVToLLVM::transSPIRVImageQueryLodFromInst(SPIRVInstruction *bi, BasicBlock *bb) { // Get image type descriptor and load resource and sampler descriptors. - ExtractedImageInfo imageInfo = {bb}; + ExtractedImageInfo imageInfo; auto bii = static_cast(bi); getImageDesc(bii->getOpValue(0), &imageInfo); - // A sampler descriptor is encoded as {desc,convertingSamplerIdx}. Extract the actual sampler. - Value *samplerDesc = getBuilder()->CreateExtractValue(imageInfo.samplerDesc, 0); - // Generate the operation for normal image get lod. Value *coord = transValue(bii->getOpValue(1), bb->getParent(), bb); - Value *result = - getBuilder()->CreateImageGetLod(imageInfo.dim, imageInfo.flags, imageInfo.imageDesc, samplerDesc, coord); + Value *result = getBuilder()->CreateImageGetLod(imageInfo.dim, imageInfo.flags, imageInfo.imagePointer, + imageInfo.samplerPointer, coord); if (!m_convertingSamplers.empty()) { auto createImageGetLod = [&](Value *samplerDescIn) -> Value * { - return getBuilder()->CreateImageGetLod(imageInfo.dim, imageInfo.flags, imageInfo.imageDesc, samplerDescIn, coord); + return getBuilder()->CreateImageGetLod(imageInfo.dim, imageInfo.flags, imageInfo.imagePointer, samplerDescIn, + coord); }; - Value *convertingSamplerIdx = getBuilder()->CreateExtractValue(imageInfo.samplerDesc, 1); - result = ConvertingSamplerSelectLadderHelper(result, convertingSamplerIdx, createImageGetLod); + result = ConvertingSamplerSelectLadderHelper(result, imageInfo.convertingSamplerIdx, createImageGetLod); } // NOTE: This is a workaround. When UV width equals 0, the result return 0, but we expect the value is @@ -8421,8 +8418,6 @@ bool SPIRVToLLVM::translate(ExecutionModel entryExecModel, const char *entryName m_requireFullQuads = m_entryTarget->getExecutionMode(ExecutionModeRequireFullQuadsKHR) != nullptr; m_maximallyReconverges = m_entryTarget->getExecutionMode(ExecutionModeMaximallyReconvergesKHR) != nullptr; - } else { - createLibraryEntryFunc(); } // Determine any denormal overrides to be applied. @@ -8571,8 +8566,13 @@ bool SPIRVToLLVM::translate(ExecutionModel entryExecModel, const char *entryName // Set DLLExport on targeted entry-point so we can find it later. if (!m_bm->getEntryPoint(bf->getId()) || bf == m_entryTarget) { auto f = transFunction(bf); - if (bf == m_entryTarget) - f->setDLLStorageClass(GlobalValue::DLLExportStorageClass); + if (bf == m_entryTarget) { + Vkgc::ShaderStage stage = convertToShaderStage(m_execModule); + if (stage > ShaderStageCompute) + lgc::rt::setLgcRtShaderStage(f, getLgcRtShaderStage(stage)); + else + lgc::Pipeline::markShaderEntryPoint(f, getLgcShaderStage(stage)); + } } } @@ -8816,6 +8816,12 @@ bool SPIRVToLLVM::transMetadata() { fragmentMode.earlyFragmentTests = true; } + if (bf->getExecutionMode(ExecutionModePixelInterlockOrderedEXT) || + bf->getExecutionMode(ExecutionModePixelInterlockUnorderedEXT) || + bf->getExecutionMode(ExecutionModeSampleInterlockOrderedEXT) || + bf->getExecutionMode(ExecutionModeSampleInterlockUnorderedEXT)) + fragmentMode.enablePops = true; + fragmentMode.waveOpsRequireHelperLanes = m_maximallyReconverges && m_hasDemoteToHelper; Pipeline::setFragmentShaderMode(*m_m, fragmentMode); @@ -9797,7 +9803,7 @@ Constant *SPIRVToLLVM::buildShaderBlockMetadata(SPIRVType *bt, ShaderBlockDecora const unsigned remappedIdx = isRemappedTypeElements(bt) ? lookupRemappedTypeElements(bt, memberIdx) : memberIdx; const DataLayout &dl = m_m->getDataLayout(); - Type *const ty = transType(bt, 0, false, true, LayoutMode::Explicit); + Type *const ty = transType(bt, 0, false, LayoutMode::Explicit); assert(ty->isStructTy()); const StructLayout *const sl = dl.getStructLayout(static_cast(ty)); @@ -10486,12 +10492,7 @@ Value *SPIRVToLLVM::transGLSLBuiltinFromExtInst(SPIRVExtInst *bc, BasicBlock *bb return call; } -Instruction *SPIRVToLLVM::transBarrier(BasicBlock *bb, SPIRVWord execScope, SPIRVWord memSema, SPIRVWord memScope) { - transMemFence(bb, memSema, memScope); - return getBuilder()->CreateBarrier(); -} - -Instruction *SPIRVToLLVM::transMemFence(BasicBlock *bb, SPIRVWord memSema, SPIRVWord memScope) { +void SPIRVToLLVM::transMemFence(BasicBlock *bb, SPIRVWord memSema, SPIRVWord memScope) { AtomicOrdering ordering = AtomicOrdering::NotAtomic; // We are safe to downgrade the SequentiallyConsistent to AcquireRelease based on Vulkan validation rules within a @@ -10513,7 +10514,7 @@ Instruction *SPIRVToLLVM::transMemFence(BasicBlock *bb, SPIRVWord memSema, SPIRV } if (ordering == AtomicOrdering::NotAtomic) - return nullptr; + return; SyncScope::ID scope = SyncScope::System; @@ -10539,42 +10540,72 @@ Instruction *SPIRVToLLVM::transMemFence(BasicBlock *bb, SPIRVWord memSema, SPIRV llvm_unreachable("Invalid scope"); } - return new FenceInst(*m_context, ordering, scope, bb); + getBuilder()->CreateFence(ordering, scope); } -Instruction *SPIRVToLLVM::transBarrierFence(SPIRVInstruction *mb, BasicBlock *bb) { +void SPIRVToLLVM::transBarrierFence(SPIRVInstruction *mb, BasicBlock *bb) { assert(bb && "Invalid BB"); - std::string funcName; auto getIntVal = [](SPIRVValue *value) { return static_cast(value)->getZExtIntValue(); }; - Instruction *barrier = nullptr; - if (mb->getOpCode() == OpMemoryBarrier) { auto memB = static_cast(mb); SPIRVWord memScope = getIntVal(memB->getOpValue(0)); SPIRVWord memSema = getIntVal(memB->getOpValue(1)); - barrier = transMemFence(bb, memSema, memScope); - } else if (mb->getOpCode() == OpControlBarrier) { + transMemFence(bb, memSema, memScope); + return; + } + + if (mb->getOpCode() == OpControlBarrier) { auto ctlB = static_cast(mb); SPIRVWord execScope = getIntVal(ctlB->getExecScope()); SPIRVWord memSema = getIntVal(ctlB->getMemSemantic()); SPIRVWord memScope = getIntVal(ctlB->getMemScope()); - barrier = transBarrier(bb, execScope, memSema, memScope); - } else - llvm_unreachable("Invalid instruction"); + // Normalize the ordering semantics. Section 9.6 ("Shader Memory Access + // Ordering") of the Vulkan 1.3.285 specification says: + // + // "Sequentially consistent atomics and barriers are not supported and SequentiallyConsistent is treated as + // AcquireRelease. SequentiallyConsistent should not be used." + // + // Release semantics are handled by a fence before the barrier, acquire + // semantics are handled by a fence after the barrier. + if (memSema & (MemorySemanticsAcquireReleaseMask | MemorySemanticsSequentiallyConsistentMask)) + memSema |= MemorySemanticsReleaseMask | MemorySemanticsAcquireMask; + memSema &= ~(MemorySemanticsAcquireReleaseMask | MemorySemanticsSequentiallyConsistentMask); + + transMemFence(bb, memSema & ~MemorySemanticsAcquireMask, memScope); + + switch (execScope) { + case ScopeCrossDevice: + case ScopeQueueFamilyKHR: + case ScopeDevice: + // We cannot implement control barriers at these scopes, but apparently a spec oversight left them as valid + // and at least some version(s) of Doom4 actually had them. We'll just treat them like workgroup barriers. + case ScopeWorkgroup: + getBuilder()->CreateBarrier(); + break; - if (barrier) { - setName(barrier, mb); + case ScopeSubgroup: + getBuilder()->CreateIntrinsic(Intrinsic::amdgcn_wave_barrier, {}, {}); + break; + + case ScopeShaderCallKHR: + case ScopeInvocation: + /* Noop */ + break; - if (CallInst *call = dyn_cast(barrier)) - setAttrByCalledFunc(call); + default: + llvm_unreachable("unsupported execution scope on OpControlBarrier"); + } + + transMemFence(bb, memSema & ~MemorySemanticsReleaseMask, memScope); + return; } - return barrier; + llvm_unreachable("Invalid instruction"); } llvm::GlobalValue::LinkageTypes SPIRVToLLVM::transLinkageType(const SPIRVValue *v) { @@ -10594,12 +10625,12 @@ llvm::GlobalValue::LinkageTypes SPIRVToLLVM::transLinkageType(const SPIRVValue * // Function declaration if (v->getOpCode() == OpFunction) { if (static_cast(v)->getNumBasicBlock() == 0) - return GlobalValue::ExternalLinkage; + return GlobalValue::WeakAnyLinkage; } // Variable declaration if (v->getOpCode() == OpVariable) { if (static_cast(v)->getInitializer() == 0) - return GlobalValue::ExternalLinkage; + return GlobalValue::WeakAnyLinkage; } // Definition return GlobalValue::AvailableExternallyLinkage; @@ -10610,22 +10641,7 @@ llvm::GlobalValue::LinkageTypes SPIRVToLLVM::transLinkageType(const SPIRVValue * // Tentative definition return GlobalValue::CommonLinkage; } - return GlobalValue::ExternalLinkage; -} - -llvm::Function *SPIRVToLLVM::createLibraryEntryFunc() { - auto builder = getBuilder(); - FunctionType *funcTy = FunctionType::get(builder->getVoidTy(), {}, false); - auto func = Function::Create(funcTy, GlobalValue::ExternalLinkage, "libraryEntry", m_m); - BasicBlock *entryBlock = BasicBlock::Create(*m_context, "", func); - builder->SetInsertPoint(entryBlock); - builder->CreateRetVoid(); - std::vector execModelMDs; - execModelMDs.push_back(ConstantAsMetadata::get(ConstantInt::get(builder->getInt32Ty(), ExecutionModelGLCompute))); - auto execModelMdNode = MDNode::get(*m_context, execModelMDs); - func->addMetadata(gSPIRVMD::ExecutionModel, *execModelMdNode); - func->setDLLStorageClass(GlobalValue::DLLExportStorageClass); - return func; + return GlobalValue::WeakAnyLinkage; } PipelineContext *SPIRVToLLVM::getPipelineContext() const { @@ -10879,6 +10895,11 @@ void SPIRVToLLVM::insertScratchBoundsChecks(SPIRVValue *memOp, const ScratchBoun } } +bool SPIRVToLLVM::isRayQueryCommittedIntersection(SPIRVValue *bv) { + auto spvInterSect = static_cast(bv); + return spvInterSect->getZExtIntValue() == RayQueryIntersectionRayQueryCommittedIntersectionKHR; +} + void SPIRVToLLVM::createXfbMetadata(bool hasXfbOuts) { auto llpcContext = static_cast(m_context); auto pipelineBuildInfo = static_cast(llpcContext->getPipelineBuildInfo()); @@ -10968,7 +10989,7 @@ void SPIRVToLLVM::createXfbMetadata(bool hasXfbOuts) { auto output = cast(getTranslatedValue(bv, nullptr, nullptr)); MDNode *metaNode = output->getMetadata(gSPIRVMD::InOut); assert(metaNode); - auto elemMeta = mdconst::dyn_extract(metaNode->getOperand(0)); + auto elemMeta = mdconst::extract(metaNode->getOperand(0)); // Find the innermost array-element auto elemTy = bt; uint64_t elemCount = 0; diff --git a/llpc/translator/lib/SPIRV/SPIRVReader.h b/llpc/translator/lib/SPIRV/SPIRVReader.h index aadf063459..df5a3c0527 100644 --- a/llpc/translator/lib/SPIRV/SPIRVReader.h +++ b/llpc/translator/lib/SPIRV/SPIRVReader.h @@ -73,9 +73,29 @@ class SPIRVLoopMerge; class SPIRVToLLVMDbgTran; enum class LayoutMode : uint8_t { - Native = 0, ///< Using native LLVM layout rule - Explicit = 1, ///< Using layout decorations(like offset) from SPIRV - Std430 = 2, ///< Using std430 layout rule + None = 0, ///< SSA value -- has no memory layout + Native = 1, ///< Using native LLVM rules for in-memory layout + Explicit = 2, ///< Using layout decorations(like offset) from SPIRV + Std430 = 3, ///< Using std430 layout rule +}; + +// Describe what parts of image/sampler descriptors are present. +enum ImageComponent { + ImageComponentImage = 0x1, + ImageComponentFMask = 0x2, + ImageComponentSampler = 0x4, +}; + +// Holds indices into a struct describing an image/sampler (pointer) value. +struct ImageTypeIndices { + unsigned imagePointer = ~0; + unsigned imageStride = ~0; + unsigned imagePlaneStride = ~0; + unsigned fmaskPointer = ~0; + unsigned fmaskStride = ~0; + unsigned samplerPointer = ~0; + unsigned samplerStride = ~0; + unsigned convertingSamplerIdx = ~0; }; class SPIRVToLLVM { @@ -90,11 +110,15 @@ class SPIRVToLLVM { void updateDebugLoc(SPIRVValue *bv, Function *f); - Type *transType(SPIRVType *bt, unsigned matrixStride = 0, bool columnMajor = true, bool parentIsPointer = false, - LayoutMode layout = LayoutMode::Native); + unsigned getImageTypeComponents(SPIRVType *t) const; + ImageTypeIndices getImageTypeIndices(unsigned imageComponents) const; + Type *getImageTy(unsigned imageComponents) const; + + Type *transType(SPIRVType *bt, unsigned matrixStride = 0, bool columnMajor = true, + LayoutMode layout = LayoutMode::None); template - Type *transTypeWithOpcode(SPIRVType *bt, unsigned matrixStride, bool columnMajor, bool parentIsPointer, - LayoutMode layout); + Type *transTypeWithOpcode(SPIRVType *bt, unsigned matrixStride, bool columnMajor, LayoutMode layout); + Type *transTypeArray(SPIRVType *bt, unsigned matrixStride, bool columnMajor, LayoutMode layout); std::vector transTypeVector(const std::vector &); bool translate(ExecutionModel entryExecModel, const char *entryName); bool transAddressingModel(); @@ -111,11 +135,8 @@ class SPIRVToLLVM { template SmallVector transValueMultiWithOpcode(SPIRVValue *, Function *f, BasicBlock *bb); Value *transLoadImage(SPIRVValue *spvImageLoadPtr); Value *transLoadBindlessImage(SPIRVType *spvElementTy, Value *imgDescGpuAddress, bool bindlessTexture); - Value *loadImageSampler(Type *elementTy, Value *base); Value *transImagePointer(SPIRVValue *spvImagePtr, SPIRVType *elementTy = nullptr); - Value *getDescPointerAndStride(lgc::ResourceNodeType resType, unsigned descriptorSet, unsigned binding, - lgc::ResourceNodeType searchType); - Value *indexDescPtr(Type *elementTy, Value *base, Value *index); + Value *indexDescPtr(SPIRVType *spvElementTy, Value *base, Value *index); Value *transGroupArithOp(lgc::Builder::GroupArithOp, SPIRVValue *); bool transDecoration(SPIRVValue *, ArrayRef); @@ -135,7 +156,7 @@ class SPIRVToLLVM { Value *transConvertInst(SPIRVValue *bv, Function *f, BasicBlock *bb); Instruction *transBuiltinFromInst(const std::string &funcName, SPIRVInstruction *bi, BasicBlock *bb); Instruction *transSPIRVBuiltinFromInst(SPIRVInstruction *bi, BasicBlock *bb); - Instruction *transBarrierFence(SPIRVInstruction *bi, BasicBlock *bb); + void transBarrierFence(SPIRVInstruction *bi, BasicBlock *bb); Value *transString(const SPIRVString *spvValue); Value *transDebugPrintf(SPIRVInstruction *bi, const ArrayRef spvValues, Function *func, BasicBlock *bb); Value *transVariableNonImage(SPIRVValue *const spvValue); @@ -143,14 +164,14 @@ class SPIRVToLLVM { Value *transArrayLength(SPIRVValue *const spvValue); // Struct used to pass information in and out of getImageDesc. struct ExtractedImageInfo { - BasicBlock *bb; - const SPIRVTypeImageDescriptor *desc; - unsigned dim; // lgc::Builder dimension - unsigned flags; // lgc::Builder image call flags - Value *imageDesc; // Image descriptor (first plane if multi-plane) - Value *imageDescArray; // Array of image descriptors for multi-plane - Value *fmaskDesc; - Value *samplerDesc; + const SPIRVTypeImageDescriptor *desc = nullptr; + unsigned dim = ~0; // lgc::Builder dimension + unsigned flags = 0; // lgc::Builder image call flags + Value *imagePointer = nullptr; + Value *imagePlaneStride = nullptr; + Value *fmaskPointer = nullptr; + Value *samplerPointer = nullptr; + Value *convertingSamplerIdx = nullptr; }; // Load image and/or sampler descriptors, and get information from the image @@ -218,6 +239,7 @@ class SPIRVToLLVM { // Create !lgc.xfb.state metadata void createXfbMetadata(bool hasXfbOuts); + bool isRayQueryCommittedIntersection(SPIRVValue *bv); private: class SPIRVTypeContext { @@ -226,11 +248,10 @@ class SPIRVToLLVM { uint8_t m_predicates; public: - SPIRVTypeContext(SPIRVType *type, uint32_t matrixStride, bool columnMajor, bool isParentPointer, LayoutMode layout) + SPIRVTypeContext(SPIRVType *type, uint32_t matrixStride, bool columnMajor, LayoutMode layout) : m_typeId(type->getId()), m_matrixStride(matrixStride), m_predicates(0) { m_predicates |= uint8_t(columnMajor); - m_predicates |= uint8_t(isParentPointer << 1); - m_predicates |= uint8_t((uint8_t)layout << 2); + m_predicates |= uint8_t((uint8_t)layout << 1); } // Tuple representation to make it easily hashable. @@ -239,8 +260,7 @@ class SPIRVToLLVM { }; typedef DenseMap SPIRVToLLVMFullTypeMap; - typedef DenseMap SPIRVToLLVMTypeMap; - typedef compilerutils::LoweringPointerTupleMap SPIRVToLLVMValueMap; + typedef CompilerUtils::LoweringPointerTupleMap SPIRVToLLVMValueMap; typedef DenseMap SPIRVBlockToLLVMStructMap; typedef DenseMap SPIRVToLLVMFunctionMap; typedef DenseMap BuiltinVarMap; @@ -266,7 +286,6 @@ class SPIRVToLLVM { SPIRVFunction *m_entryTarget; const SPIRVSpecConstMap &m_specConstMap; llvm::ArrayRef m_convertingSamplers; - SPIRVToLLVMTypeMap m_typeMap; SPIRVToLLVMFullTypeMap m_fullTypeMap; SPIRVToLLVMFullTypeMap m_imageTypeMap; // Map to store struct/array with sampler type SPIRVToLLVMValueMap m_valueMap; @@ -334,12 +353,7 @@ class SPIRVToLLVM { lgc::Builder *getBuilder() const { return m_builder; } // Perform type translation for uncached types. Used in `transType`. Returns the new LLVM type. - Type *transTypeImpl(SPIRVType *bt, unsigned matrixStride, bool columnMajor, bool parentIsPointer, LayoutMode layout); - - Type *mapType(SPIRVType *bt, Type *t) { - m_typeMap[bt] = t; - return t; - } + Type *transTypeImpl(SPIRVType *bt, unsigned matrixStride, bool columnMajor, LayoutMode layout); Type *getPointeeType(SPIRVValue *v, LayoutMode layout = LayoutMode::Native); @@ -368,10 +382,7 @@ class SPIRVToLLVM { Type *getPadType(unsigned bytes) { return ArrayType::get(getBuilder()->getInt8Ty(), bytes); } - Type *recordTypeWithPad(Type *const t, bool isMatrixRow = false) { - m_typesWithPadMap[t] = isMatrixRow; - return t; - } + void recordTypeWithPad(Type *const t, bool isMatrixRow = false) { m_typesWithPadMap[t] = isMatrixRow; } bool isTypeWithPad(Type *const t) const { return m_typesWithPadMap.count(t) > 0; } @@ -431,16 +442,13 @@ class SPIRVToLLVM { template bool foreachFuncCtlMask(Source, Func); llvm::GlobalValue::LinkageTypes transLinkageType(const SPIRVValue *v); - Instruction *transBarrier(BasicBlock *bb, SPIRVWord execScope, SPIRVWord memSema, SPIRVWord memScope); - - Instruction *transMemFence(BasicBlock *bb, SPIRVWord memSema, SPIRVWord memScope); + void transMemFence(BasicBlock *bb, SPIRVWord memSema, SPIRVWord memScope); void truncConstantIndex(std::vector &indices, BasicBlock *bb); Value *ConvertingSamplerSelectLadderHelper(Value *result, Value *convertingSamplerIdx, const std::function &createImageOp); - Function *createLibraryEntryFunc(); - + bool hasSpirvType(SPIRVType *spvTy, spv::Op ty); Value *createTraceRayDialectOp(SPIRVValue *const spvValue); // ======================================================================================================================== diff --git a/llpc/translator/lib/SPIRV/libSPIRV/SPIRVEnum.h b/llpc/translator/lib/SPIRV/libSPIRV/SPIRVEnum.h index 2c54985436..4d7fbd0288 100644 --- a/llpc/translator/lib/SPIRV/libSPIRV/SPIRVEnum.h +++ b/llpc/translator/lib/SPIRV/libSPIRV/SPIRVEnum.h @@ -273,6 +273,12 @@ template <> inline void SPIRVMap::init() { ADD_VEC_INIT(ExecutionModeStencilRefLessBackAMD, {CapabilityStencilExportEXT}); ADD_VEC_INIT(ExecutionModeRequireFullQuadsKHR, {CapabilityQuadControlKHR}); ADD_VEC_INIT(ExecutionModeQuadDerivativesKHR, {CapabilityQuadControlKHR}); + ADD_VEC_INIT(ExecutionModePixelInterlockOrderedEXT, {CapabilityFragmentShaderPixelInterlockEXT}); + ADD_VEC_INIT(ExecutionModePixelInterlockUnorderedEXT, {CapabilityFragmentShaderPixelInterlockEXT}); + ADD_VEC_INIT(ExecutionModeSampleInterlockOrderedEXT, {CapabilityFragmentShaderSampleInterlockEXT}); + ADD_VEC_INIT(ExecutionModeSampleInterlockUnorderedEXT, {CapabilityFragmentShaderSampleInterlockEXT}); + ADD_VEC_INIT(ExecutionModeShadingRateInterlockOrderedEXT, {CapabilityFragmentShaderShadingRateInterlockEXT}); + ADD_VEC_INIT(ExecutionModeShadingRateInterlockUnorderedEXT, {CapabilityFragmentShaderShadingRateInterlockEXT}); } template <> inline void SPIRVMap::init() { diff --git a/llpc/translator/lib/SPIRV/libSPIRV/SPIRVInstruction.h b/llpc/translator/lib/SPIRV/libSPIRV/SPIRVInstruction.h index b144cebd59..90ab0639be 100644 --- a/llpc/translator/lib/SPIRV/libSPIRV/SPIRVInstruction.h +++ b/llpc/translator/lib/SPIRV/libSPIRV/SPIRVInstruction.h @@ -791,6 +791,8 @@ typedef SPIRVInstNoOperand SPIRVUnreachable; typedef SPIRVInstNoOperand SPIRVKill; typedef SPIRVInstNoOperand SPIRVDemoteToHelperInvocationEXT; typedef SPIRVInstNoOperand SPIRVTerminateInvocation; +typedef SPIRVInstNoOperand SPIRVBeginInvocationInterlockEXT; +typedef SPIRVInstNoOperand SPIRVEndInvocationInterlockEXT; class SPIRVReturnValue : public SPIRVInstruction { public: diff --git a/llpc/translator/lib/SPIRV/libSPIRV/SPIRVIsValidEnum.h b/llpc/translator/lib/SPIRV/libSPIRV/SPIRVIsValidEnum.h index bf92863f6b..5a82f5cd8c 100644 --- a/llpc/translator/lib/SPIRV/libSPIRV/SPIRVIsValidEnum.h +++ b/llpc/translator/lib/SPIRV/libSPIRV/SPIRVIsValidEnum.h @@ -165,6 +165,12 @@ inline bool isValid(spv::ExecutionMode V) { case ExecutionModeQuadDerivativesKHR: case ExecutionModeRequireFullQuadsKHR: case ExecutionModeFPFastMathDefault: + case ExecutionModePixelInterlockOrderedEXT: + case ExecutionModePixelInterlockUnorderedEXT: + case ExecutionModeSampleInterlockOrderedEXT: + case ExecutionModeSampleInterlockUnorderedEXT: + case ExecutionModeShadingRateInterlockOrderedEXT: + case ExecutionModeShadingRateInterlockUnorderedEXT: return true; default: return false; @@ -569,6 +575,9 @@ inline bool isValid(spv::Capability V) { case CapabilityGroupNonUniformRotateKHR: case CapabilityQuadControlKHR: case CapabilityFloatControls2: + case CapabilityFragmentShaderSampleInterlockEXT: + case CapabilityFragmentShaderShadingRateInterlockEXT: + case CapabilityFragmentShaderPixelInterlockEXT: return true; default: return false; diff --git a/llpc/translator/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h b/llpc/translator/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h index cd04bb2760..4a01d3ac88 100644 --- a/llpc/translator/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h +++ b/llpc/translator/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h @@ -142,6 +142,12 @@ template <> inline void SPIRVMap::init() { add(ExecutionModeRequireFullQuadsKHR, "RequireFullQuadsKHR"); add(ExecutionModeFPFastMathDefault, "FPFastMathDefault"); add(ExecutionModeMaximallyReconvergesKHR, "MaximallyReconvergesKHR"); + add(ExecutionModePixelInterlockOrderedEXT, "PixelInterlockOrderedEXT"); + add(ExecutionModePixelInterlockUnorderedEXT, "PixelInterlockUnorderedEXT"); + add(ExecutionModeSampleInterlockOrderedEXT, "SampleInterlockOrderedEXT"); + add(ExecutionModeSampleInterlockUnorderedEXT, "SampleInterlockUnorderedEXT"); + add(ExecutionModeShadingRateInterlockOrderedEXT, "ShadingRateInterlockOrderedEXT"); + add(ExecutionModeShadingRateInterlockUnorderedEXT, "ShadingRateInterlockUnorderedEXT"); } SPIRV_DEF_NAMEMAP(ExecutionMode, SPIRVExecutionModeNameMap) @@ -510,6 +516,9 @@ template <> inline void SPIRVMap::init() { add(CapabilityGroupNonUniformRotateKHR, "GroupNonUniformRotateKHR"); add(CapabilityQuadControlKHR, "QuadControlKHR"); add(CapabilityFloatControls2, "FloatControls2"); + add(CapabilityFragmentShaderSampleInterlockEXT, "FragmentShaderSampleInterlockEXT"); + add(CapabilityFragmentShaderShadingRateInterlockEXT, "FragmentShaderShadingRateInterlockEXT"); + add(CapabilityFragmentShaderPixelInterlockEXT, "FragmentShaderPixelInterlockEXT"); } SPIRV_DEF_NAMEMAP(Capability, SPIRVCapabilityNameMap) diff --git a/llpc/translator/lib/SPIRV/libSPIRV/SPIRVOpCodeEnum.h b/llpc/translator/lib/SPIRV/libSPIRV/SPIRVOpCodeEnum.h index 12c649e410..851aea834d 100644 --- a/llpc/translator/lib/SPIRV/libSPIRV/SPIRVOpCodeEnum.h +++ b/llpc/translator/lib/SPIRV/libSPIRV/SPIRVOpCodeEnum.h @@ -334,6 +334,8 @@ _SPIRV_OP(CooperativeMatrixLoadKHR, 4457) _SPIRV_OP(CooperativeMatrixStoreKHR, 4458) _SPIRV_OP(CooperativeMatrixMulAddKHR, 4459) _SPIRV_OP(CooperativeMatrixLengthKHR, 4460) +_SPIRV_OP(BeginInvocationInterlockEXT, 5364) +_SPIRV_OP(EndInvocationInterlockEXT, 5365) _SPIRV_OP(DemoteToHelperInvocationEXT, 5380) _SPIRV_OP(IsHelperInvocationEXT, 5381) _SPIRV_OP(SubgroupShuffleINTEL, 5571) diff --git a/llvmraytracing/.clang-format b/llvmraytracing/.clang-format deleted file mode 100644 index 9b3aa8b721..0000000000 --- a/llvmraytracing/.clang-format +++ /dev/null @@ -1 +0,0 @@ -BasedOnStyle: LLVM diff --git a/llvmraytracing/CMakeLists.txt b/llvmraytracing/CMakeLists.txt index db81197eb4..cdc578e061 100644 --- a/llvmraytracing/CMakeLists.txt +++ b/llvmraytracing/CMakeLists.txt @@ -16,6 +16,7 @@ option(LLVMRAYTRACING_BUILD_TESTS "Build raytracing tests") add_llvm_library(LLVMRaytracing lib/CleanupContinuations.cpp lib/Continuations.cpp + lib/ContinuationsLint.cpp lib/CpsStackLowering.cpp lib/DXILContIntrinsicPrepare.cpp lib/DXILContLgcRtOpConverter.cpp @@ -24,17 +25,18 @@ add_llvm_library(LLVMRaytracing lib/GpurtContext.cpp lib/GpurtDialect.cpp lib/LegacyCleanupContinuations.cpp + lib/ContinuationsStatsReport.cpp lib/LgcCpsDialect.cpp + lib/LgcCpsJumpInliner.cpp lib/LgcIlCpsDialect.cpp lib/LgcRtDialect.cpp lib/LgcRtqDialect.cpp lib/LowerAwait.cpp + lib/LowerRayQuery.cpp lib/LowerRaytracingPipeline.cpp lib/PassRegistry.inc lib/PayloadAccessQualifiers.cpp - lib/RegisterBuffer.cpp lib/RemoveTypesMetadata.cpp - lib/TypesMetadata.cpp DEPENDS intrinsics_gen diff --git a/llvmraytracing/include/lgc/GpurtDialect.h b/llvmraytracing/include/lgc/GpurtDialect.h index e25d2dd529..b64e333cc2 100644 --- a/llvmraytracing/include/lgc/GpurtDialect.h +++ b/llvmraytracing/include/lgc/GpurtDialect.h @@ -10,8 +10,8 @@ * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * - * The above copyright notice and this permission notice shall be included in - *all copies or substantial portions of the Software. + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, @@ -33,3 +33,14 @@ #define GET_INCLUDES #define GET_DIALECT_DECLS #include "GpurtDialect.h.inc" + +namespace llvm { +class Module; +} // namespace llvm + +namespace lgc::gpurt { +void setKnownSetRayFlags(llvm::Module &module, unsigned flags); +void setKnownUnsetRayFlags(llvm::Module &module, unsigned flags); +unsigned getKnownSetRayFlags(const llvm::Module &module); +unsigned getKnownUnsetRayFlags(const llvm::Module &module); +} // namespace lgc::gpurt diff --git a/llvmraytracing/include/lgc/GpurtDialect.td b/llvmraytracing/include/lgc/GpurtDialect.td index ac76f07dd0..89b966ea5c 100644 --- a/llvmraytracing/include/lgc/GpurtDialect.td +++ b/llvmraytracing/include/lgc/GpurtDialect.td @@ -36,6 +36,7 @@ class GpurtOp traits_ = []> def V2F32 : TgConstant<(FixedVectorType F32, 2)>, Type; def PrivatePointer : TgConstant<(PointerType 5)>, Type; def V2I32 : TgConstant<(FixedVectorType I32, 2)>, Type; +def V3I32 : TgConstant<(FixedVectorType I32, 3)>, Type; def V4I32 : TgConstant<(FixedVectorType I32, 4)>, Type; def PairStructType : BuiltinType { @@ -364,3 +365,18 @@ def GpurtContinuationStackIsGlobalOp : GpurtOp<"continuation.stack.is.global", [ let results = (outs I1:$result); let summary = "Check whether continuation stack is global"; } + +def GpurtGetRayQueryDispatchIdOp : GpurtOp<"get.ray.query.dispatch.id", [Memory<[(read InaccessibleMem)]>, WillReturn]> { + let arguments = (ins); + let results = (outs V3I32:$dispatchId); + + let summary = "Get the rayQuery dispatch ID"; + let description = [{ + This op is not called from GPURT; rather, it is generated by LowerRayQuery to get the dispatch ID to pass + in to the GPURT initialize and proceed ops. + + For compute and ray-tracing, the rayQuery dispatch ID is the global invocation ID. For a graphics shader, + the rayQuery dispatch ID is the subgroup local invocation ID in the X component, and 0 in the Y and Z + components. + }]; +} diff --git a/llvmraytracing/include/lgc/LgcCpsDialect.h b/llvmraytracing/include/lgc/LgcCpsDialect.h index 9528c8730c..aa7f450913 100644 --- a/llvmraytracing/include/lgc/LgcCpsDialect.h +++ b/llvmraytracing/include/lgc/LgcCpsDialect.h @@ -10,8 +10,8 @@ * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * - * The above copyright notice and this permission notice shall be included in - *all copies or substantial portions of the Software. + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, @@ -68,24 +68,16 @@ constexpr unsigned MaxArgumentDwords = 32; constexpr unsigned CpsPayloadMaxNumVgprs = MaxArgumentDwords; unsigned getArgumentDwordCount(const llvm::DataLayout &DL, llvm::Type *type); -unsigned getArgumentDwordCount(const llvm::DataLayout &DL, - llvm::ArrayRef types); -std::optional -getRemainingArgumentDwords(const llvm::DataLayout &DL, - llvm::ArrayRef arguments); +unsigned getArgumentDwordCount(const llvm::DataLayout &DL, llvm::ArrayRef types); +std::optional getRemainingArgumentDwords(const llvm::DataLayout &DL, llvm::ArrayRef arguments); bool isCpsFunction(const llvm::Function &fn); void setCpsFunctionLevel(llvm::Function &fn, CpsLevel level); CpsLevel getCpsLevelFromFunction(const llvm::Function &fn); CpsLevel getCpsLevelForShaderStage(lgc::rt::RayTracingShaderStage stage); uint8_t getPotentialCpsReturnLevels(lgc::rt::RayTracingShaderStage stage); -void pushStateToCpsStack(llvm_dialects::Builder &builder, - lgc::cps::JumpOp &jumpOp); -llvm::Value *popStateFromCpsStack(llvm_dialects::Builder &builder, - const llvm::DataLayout &DL, - llvm::Type *stateType); -llvm::Value * -lowerAsContinuationReference(llvm::IRBuilder<> &Builder, - lgc::cps::AsContinuationReferenceOp &AsCROp, - llvm::Value *Relocation = nullptr); +void pushStateToCpsStack(llvm_dialects::Builder &builder, lgc::cps::JumpOp &jumpOp); +llvm::Value *popStateFromCpsStack(llvm_dialects::Builder &builder, const llvm::DataLayout &DL, llvm::Type *stateType); +llvm::Value *lowerAsContinuationReference(llvm::IRBuilder<> &Builder, lgc::cps::AsContinuationReferenceOp &AsCROp, + llvm::Value *Relocation = nullptr); } // namespace lgc::cps diff --git a/llvmraytracing/include/lgc/LgcCpsDialect.td b/llvmraytracing/include/lgc/LgcCpsDialect.td index ce56e6a3b9..2e796dc9fe 100644 --- a/llvmraytracing/include/lgc/LgcCpsDialect.td +++ b/llvmraytracing/include/lgc/LgcCpsDialect.td @@ -39,7 +39,7 @@ class LgcCpsOp traits_> def StackPointer : TgConstant<(PointerType 32)>, Type; // A pointer to a CPS function, combined with additional metadata -def ContinuationReference : TgConstant<(I32)>, Type; +def ContinuationReference : TgConstant<(or I32, I64)>, Type; // ===================================================================================================================== def JumpOp : LgcCpsOp<"jump", [NoReturn]> { @@ -75,7 +75,7 @@ def AwaitOp : LgcCpsOp<"await", [NoUnwind, WillReturn]> { // ===================================================================================================================== def AsContinuationReferenceOp : LgcCpsOp<"as.continuation.reference", [NoUnwind, WillReturn]> { let arguments = (ins PointerType:$fn); - let results = (outs (or ContinuationReference, I64):$ref); + let results = (outs ContinuationReference:$ref); let defaultBuilderHasExplicitResultType = true; diff --git a/llvmraytracing/include/lgc/LgcIlCpsDialect.h b/llvmraytracing/include/lgc/LgcIlCpsDialect.h index 58af07bb70..205fd5f2f7 100644 --- a/llvmraytracing/include/lgc/LgcIlCpsDialect.h +++ b/llvmraytracing/include/lgc/LgcIlCpsDialect.h @@ -10,8 +10,8 @@ * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * - * The above copyright notice and this permission notice shall be included in - *all copies or substantial portions of the Software. + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, diff --git a/llvmraytracing/include/lgc/LgcIlCpsDialect.td b/llvmraytracing/include/lgc/LgcIlCpsDialect.td index 75c9fbf8ed..605a901ed9 100644 --- a/llvmraytracing/include/lgc/LgcIlCpsDialect.td +++ b/llvmraytracing/include/lgc/LgcIlCpsDialect.td @@ -53,6 +53,45 @@ def GetReturnValueOp : LgcIlCpsOp<"getReturnValue", [NoUnwind, WillReturn]> { }]; } +def ContinueOp : LgcIlCpsOp<"continue", [NoReturn]> { + let arguments = (ins I64:$shaderAddr, I32:$csp, I64:$returnAddr, varargs:$tail); + let results = (outs); + + let summary = + "represents the jump to another shader"; + + let description = [{ + Describes the jump to another shader. The arguments are: + + - shaderAddr, the shader the current shader should jump to + - csp, the continuation stack pointer. Whatever is passed here, is going to be + overridden by the compiler. + - returnAddr, the return address the called shader should jump back to, e. g. the + resume function. + - tail, a set of arguments like the system data or hit attributes. + }]; +} + +def WaitContinueOp : LgcIlCpsOp<"waitContinue", [NoReturn]> { + let arguments = (ins I64:$shaderAddr, I64:$waitMask, I32:$csp, I64:$returnAddr, varargs:$tail); + let results = (outs); + + let summary = + "represents the jump to another shader with a wait mask"; + + let description = [{ + Describes the jump to another shader. The arguments are: + + - shaderAddr, the shader the current shader should jump to + - waitMask, the bitmask all lanes have to wait for. + - csp, the continuation stack pointer. Whatever is passed here, is going to be + overridden by the compiler. + - returnAddr, the return address the called shader should jump back to, e. g. the + resume function. + - tail, a set of arguments like the system data or hit attributes. + }]; +} + def ReturnOp : LgcIlCpsOp<"return", [NoReturn]> { let arguments = (ins value:$returnAddr, varargs:$args); let results = (outs); diff --git a/llvmraytracing/include/lgc/LgcRtDialect.h b/llvmraytracing/include/lgc/LgcRtDialect.h index 67caf9abaa..3dc2721540 100644 --- a/llvmraytracing/include/lgc/LgcRtDialect.h +++ b/llvmraytracing/include/lgc/LgcRtDialect.h @@ -10,8 +10,8 @@ * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * - * The above copyright notice and this permission notice shall be included in - *all copies or substantial portions of the Software. + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, @@ -55,26 +55,41 @@ enum class RayTracingShaderStage { Count }; +// These ray flags correspond to the equivalent flags in GLSL_EXT_ray_tracing +enum class RayFlag { + None = 0x00, + ForceOpaque = 0x01, + ForceNonOpaque = 0x02, + AcceptFirstHitAndEndSearch = 0x04, + SkipClosestHitShader = 0x08, + CullBackFacingTriangles = 0x10, + CullFrontFacingTriangles = 0x20, + CullOpaque = 0x40, + CullNonOpaque = 0x80, + SkipTriangles = 0x100, + SkipProceduralPrimitives = 0x200, +}; + // Set shader stage metadata on a LLVM function and erase it by setting // std::nullopt. // func can instead be a GlobalVariable, allowing a front-end to use a // GlobalVariable to represent a shader retrieved from the cache, and wants to // mark it with a shader stage. -void setLgcRtShaderStage(llvm::GlobalObject *func, - std::optional stage); +void setLgcRtShaderStage(llvm::GlobalObject *func, std::optional stage); // Gets the shader stage from the specified LLVM function or std::nullopt // if no metadata is apparent. // func can instead be a GlobalVariable, allowing a front-end to use a // GlobalVariable to represent a shader retrieved from the cache, and wants to // mark it with a shader stage. -std::optional -getLgcRtShaderStage(const llvm::GlobalObject *func); +std::optional getLgcRtShaderStage(const llvm::GlobalObject *func); + +// Get the name string of shader subtype for the specified shader stage. +const char *getShaderSubtypeForRtShaderStage(RayTracingShaderStage stage); // Get the metadata IDs associated with the lgc.rt dialect, so the caller knows // which ones can be removed when the dialect is processed. -void getLgcRtMetadataIds(llvm::LLVMContext &context, - llvm::SmallVectorImpl &ids); +void getLgcRtMetadataIds(llvm::LLVMContext &context, llvm::SmallVectorImpl &ids); // Get PAQ (payload access qualifier) metadata for a ray-tracing shader // function, or nullptr if none. diff --git a/llvmraytracing/include/lgc/LgcRtqDialect.h b/llvmraytracing/include/lgc/LgcRtqDialect.h index 1c342c18f7..983b7c812d 100644 --- a/llvmraytracing/include/lgc/LgcRtqDialect.h +++ b/llvmraytracing/include/lgc/LgcRtqDialect.h @@ -10,8 +10,8 @@ * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * - * The above copyright notice and this permission notice shall be included in - *all copies or substantial portions of the Software. + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, diff --git a/llvmraytracing/include/lgc/LgcRtqDialect.td b/llvmraytracing/include/lgc/LgcRtqDialect.td index 0b0df99a28..9784721cbe 100644 --- a/llvmraytracing/include/lgc/LgcRtqDialect.td +++ b/llvmraytracing/include/lgc/LgcRtqDialect.td @@ -40,7 +40,9 @@ def LgcRtqDialect : Dialect { def V2F32 : TgConstant<(FixedVectorType F32, 2)>, Type; def V3F32 : TgConstant<(FixedVectorType F32, 3)>, Type; +def A3V3F32 : TgConstant<(ArrayType V3F32, 3)>, Type; def A4V3F32 : TgConstant<(ArrayType V3F32, 4)>, Type; +def PrivatePointer : TgConstant<(PointerType 5)>, Type; class LgcRtqOp traits_ = []> : Op; @@ -370,3 +372,24 @@ def IntersectionWorldToObjectOp : LgcRtqOp<"intersection.world.to.object", [Memo If `committed` is true, behavior is undefined if there is no currently committed intersection. }]; } + +def GepOpaqueOp: LgcRtqOp<"gep.opaque", [Memory<[(read ArgMem)]>]> { + let arguments = (ins type:$base_type, AttrI1:$inbound, PointerType:$base_pointer, varargs:$offsets); + let results = (outs PrivatePointer:$result); + + let summary = "GEP a rayquery pointer from a base object pointer"; + let description = [{ + Returns a opaque rayquery pointer from a base object pointer through gep array of indices + }]; +} + +// ===================================================================================================================== +def IntersectionTriangleVertexPositionsOp : LgcRtqOp<"intersection.triangle.vertex.position", [Memory<[(read ArgMem)]>]> { + let arguments = (ins PointerType:$ray_query, AttrI1:$committed); + let results = (outs A3V3F32:$result); + + let summary = "Fetch the triangle vertex points"; + let description = [{ + Returns the intersected vec3[3] triangle vertices from the ray query. + }]; +} diff --git a/llvmraytracing/include/llvmraytracing/Continuations.h b/llvmraytracing/include/llvmraytracing/Continuations.h index 624ec64ced..d4f3733991 100644 --- a/llvmraytracing/include/llvmraytracing/Continuations.h +++ b/llvmraytracing/include/llvmraytracing/Continuations.h @@ -10,8 +10,8 @@ * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * - * The above copyright notice and this permission notice shall be included in - *all copies or substantial portions of the Software. + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, @@ -61,22 +61,19 @@ // // Rematerializable intrinsics like DispatchRaysIndex are left in their lgc.rt // form and don't access system data until the DXILContPostProcess pass. There, -// a new alloca is added, SetupRayGen is called to create the initial system -// data and the rematerializable intrinsics get the new alloca as their -// argument. All these intrinsics cannot modify system data, otherwise we could -// not rematerialize them. +// a new alloca is added, and the rematerializable intrinsics get the new alloca +// as their argument. All these intrinsics cannot modify system data, otherwise +// we could not rematerialize them. // -// At the start of a function, the alloca is initialized from an argument. In -// the case of RayGen, this argument is removed and replaced with a proper call -// to SetupRayGen in the DXILContPostProcess pass. +// At the start of a function, the alloca is initialized from an argument. #pragma once -#include "TypesMetadata.h" #include "compilerutils/CompilerUtils.h" -#include "llvm-dialects/Dialect/Builder.h" +#include "compilerutils/TypesMetadata.h" #include "llvmraytracing/ContinuationsUtil.h" #include "llvmraytracing/PayloadAccessQualifiers.h" +#include "llvm-dialects/Dialect/Builder.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" #include "llvm/Bitcode/BitcodeReader.h" @@ -85,7 +82,6 @@ #include "llvm/IR/PassManager.h" #include "llvm/Transforms/Coroutines/CoroSplit.h" #include -#include #include #include @@ -103,42 +99,19 @@ struct CoroSplitPass; // Returns the PAQShaderStage corresponding to the given RayTracingShaderStage, // if there is any. -std::optional -rtShaderStageToPAQShaderStage(lgc::rt::RayTracingShaderStage ShaderKind); +std::optional rtShaderStageToPAQShaderStage(lgc::rt::RayTracingShaderStage ShaderKind); -/// Create a new function, as cloneFunctionHeader, but include types metadata. -Function *cloneFunctionHeaderWithTypes(Function &F, ContFuncTy &NewType, - ArrayRef ArgAttrs); - -Function *cloneFunctionHeaderWithTypes(Function &F, ContFuncTy &NewType, - AttributeList FnAttr); /// Remove bitcasts of function pointers in metadata. /// This also removes the DXIL payload metadata from functions. /// Returns true if something changed. bool fixupDxilMetadata(Module &M); -/// Get intrinsic that forms a barrier with some arguments. -/// This is used to connect storing the memory pointer of a register buffer to -/// accessing the buffer and prevent reordering. -Function *getRegisterBufferSetPointerBarrier(Module &M); - -/// Create the metadata for a register buffer global. -MDTuple *createRegisterBufferMetadata(LLVMContext &Context, - const RegisterBufferMD &MD); - -/// Extract the metadata for a register buffer global. -RegisterBufferMD getRegisterBufferMetadata(const MDNode *MD); - /// Get intrinsic to set the local root signature index. Function *getSetLocalRootIndex(Module &M); /// Get intrinsic to convert a dx handle to an acceleration struct address. Function *getAccelStructAddr(Module &M, Type *HandleTy); -/// Get the continuation.continue intrinsic. -Function *getContinuationContinue(Module &M); -/// Get the continuation.waitContinue intrinsic. -Function *getContinuationWaitContinue(Module &M); /// Get the await intrinsic. Function *getContinuationAwait(Module &M, Type *TokenTy, StructType *RetTy); @@ -154,18 +127,18 @@ uint64_t getInlineHitAttrsBytes(Module &M); /// Extract a function from a constant metadata node, ignoring any bitcasts. Function *extractFunctionOrNull(Metadata *N); +/// Based on the metadata of a function, check if this is a start function of a shader. +bool isStartFunc(Function *Func); + /// Recurse into the first member of the given SystemData to find an object of /// the wanted type. /// See also the system data documentation at the top of Continuations.h. -Value *getDXILSystemData(IRBuilder<> &B, Value *SystemData, Type *SystemDataTy, - Type *Ty); +Value *getDXILSystemData(IRBuilder<> &B, Value *SystemData, Type *SystemDataTy, Type *Ty); /// Replace call to intrinsic (lgc.rt.*) with a call to the driver /// implementation (_cont_*). -CallInst *replaceIntrinsicCall(IRBuilder<> &B, Type *SystemDataTy, - Value *SystemData, - lgc::rt::RayTracingShaderStage Kind, - CallInst *Call, Module *GpurtLibrary, +CallInst *replaceIntrinsicCall(IRBuilder<> &B, Type *SystemDataTy, Value *SystemData, + lgc::rt::RayTracingShaderStage Kind, CallInst *Call, Module *GpurtLibrary, CompilerUtils::CrossModuleInliner &Inliner); /// Terminate a shader by inserting a return instruction and taking care of @@ -178,13 +151,10 @@ void terminateShader(IRBuilder<> &Builder, CallInst *CompleteCall); /// Returns whether something changed. bool earlyDriverTransform(Module &M); -/// Buffered pointers use a fixed number of registers, and fall back to an -/// allocation if the registers to not suffice to contain the content. Given a -/// number NumI32s of 4-byte values and the number of reserved registers, return -/// the amount of dynamic storage required to store that many 4-byte values, in -/// bytes. Returns 0 if the reserved registers suffice. -uint64_t computeNeededStackSizeForRegisterBuffer(uint64_t NumI32s, - uint64_t NumReservedRegisters); +/// Given a number NumI32s of 4-byte values and the number of reserved +/// registers, return the amount of dynamic storage required to store that many +/// 4-byte values, in bytes. Returns 0 if the reserved registers suffice. +uint64_t computePayloadSpillSize(uint64_t NumI32s, uint64_t NumReservedRegisters); // Given two I32 pointers, copy NumBytes many bytes from Src to Dst. // The implementation performs I32 copies, plus a copy @@ -195,16 +165,14 @@ class DialectContextAnalysisResult { public: DialectContextAnalysisResult() {} - bool invalidate(llvm::Module &, const llvm::PreservedAnalyses &, - llvm::ModuleAnalysisManager::Invalidator &) { + bool invalidate(llvm::Module &, const llvm::PreservedAnalyses &, llvm::ModuleAnalysisManager::Invalidator &) { return false; } }; /// An analysis to run with dialects, even if the running tool does not have /// explicit support for it. This will create a dialect context on-demand. -class DialectContextAnalysis - : public llvm::AnalysisInfoMixin { +class DialectContextAnalysis : public llvm::AnalysisInfoMixin { public: using Result = DialectContextAnalysisResult; DialectContextAnalysis(bool NeedDialectContext = true); @@ -218,23 +186,19 @@ class DialectContextAnalysis bool NeedDialectContext; }; -class LegacyCleanupContinuationsPass - : public llvm::PassInfoMixin { +class LegacyCleanupContinuationsPass : public llvm::PassInfoMixin { public: LegacyCleanupContinuationsPass() {} - llvm::PreservedAnalyses run(llvm::Module &Module, - llvm::ModuleAnalysisManager &AnalysisManager); + llvm::PreservedAnalyses run(llvm::Module &Module, llvm::ModuleAnalysisManager &AnalysisManager); static llvm::StringRef name() { return "legacy continuation cleanup"; } }; -class CleanupContinuationsPass - : public llvm::PassInfoMixin { +class CleanupContinuationsPass : public llvm::PassInfoMixin { public: CleanupContinuationsPass(bool Use64BitContinuationReferences = false) : Use64BitContinuationReferences{Use64BitContinuationReferences} {} - llvm::PreservedAnalyses run(llvm::Module &Module, - llvm::ModuleAnalysisManager &AnalysisManager); + llvm::PreservedAnalyses run(llvm::Module &Module, llvm::ModuleAnalysisManager &AnalysisManager); static llvm::StringRef name() { return "continuation cleanup"; } @@ -251,18 +215,14 @@ class CleanupContinuationsPass }; void removeContFreeCall(Function *F, Function *ContFree); - Value * - getContinuationFramePtr(Function *F, bool IsStart, - const ContinuationData &ContinuationInfo, - SmallVector *InstsToRemove = nullptr); + Value *getContinuationFramePtr(Function *F, bool IsStart, const ContinuationData &ContinuationInfo, + SmallVector *InstsToRemove = nullptr); void freeCpsStack(Function *F, ContinuationData &CpsInfo); - void updateCpsStack(Function *F, Function *NewFunc, bool IsStart, - ContinuationData &CpsInfo); + void updateCpsStack(Function *F, Function *NewFunc, bool IsStart, ContinuationData &CpsInfo); void analyzeContinuation(Function &F, MDNode *MD); void processContinuations(); void handleContinue(ContinuationData &Data, Instruction *Ret); - void handleSingleContinue(ContinuationData &Data, CallInst *Call, - Value *ResumeFun); + void handleSingleContinue(ContinuationData &Data, CallInst *Call, Value *ResumeFun); void lowerIntrinsicCall(Module &Mod); void lowerGetResumePoint(Module &Mod); @@ -282,90 +242,71 @@ class DXILCleanupContinuationsPass : public CleanupContinuationsPass { public: DXILCleanupContinuationsPass() : CleanupContinuationsPass(true) {} - static llvm::StringRef name() { - return "DXIL cleanup continuations pass wrapper"; - } + static llvm::StringRef name() { return "DXIL cleanup continuations pass wrapper"; } }; -class LowerRaytracingPipelinePass - : public llvm::PassInfoMixin { +// A pass that reports statistics from the continuations module. +class ContinuationsStatsReportPass : public llvm::PassInfoMixin { +public: + ContinuationsStatsReportPass() = default; + llvm::PreservedAnalyses run(llvm::Module &Module, llvm::ModuleAnalysisManager &AnalysisManager); + + static llvm::StringRef name() { return "Continuations statistics reporting pass"; } +}; + +class LowerRaytracingPipelinePass : public llvm::PassInfoMixin { public: LowerRaytracingPipelinePass() {} - llvm::PreservedAnalyses run(llvm::Module &Module, - llvm::ModuleAnalysisManager &AnalysisManager); + llvm::PreservedAnalyses run(llvm::Module &Module, llvm::ModuleAnalysisManager &AnalysisManager); static llvm::StringRef name() { return "Lower raytracing pipeline pass"; } }; -class DXILContIntrinsicPreparePass - : public llvm::PassInfoMixin { +class LgcCpsJumpInlinerPass : public llvm::PassInfoMixin { +public: + LgcCpsJumpInlinerPass() {} + llvm::PreservedAnalyses run(llvm::Module &Module, llvm::ModuleAnalysisManager &AnalysisManager); + + static llvm::StringRef name() { return "lgc.cps jump inliner pass"; } +}; + +class DXILContIntrinsicPreparePass : public llvm::PassInfoMixin { public: DXILContIntrinsicPreparePass(); - llvm::PreservedAnalyses run(llvm::Module &Module, - llvm::ModuleAnalysisManager &AnalysisManager); + llvm::PreservedAnalyses run(llvm::Module &Module, llvm::ModuleAnalysisManager &AnalysisManager); - static llvm::StringRef name() { - return "DXIL continuation intrinsic preparation"; - } + static llvm::StringRef name() { return "DXIL continuation intrinsic preparation"; } }; -class DXILContPostProcessPass - : public llvm::PassInfoMixin { +class DXILContPostProcessPass : public llvm::PassInfoMixin { public: DXILContPostProcessPass() {} - llvm::PreservedAnalyses run(llvm::Module &Module, - llvm::ModuleAnalysisManager &AnalysisManager); + llvm::PreservedAnalyses run(llvm::Module &Module, llvm::ModuleAnalysisManager &AnalysisManager); static llvm::StringRef name() { return "DXIL continuation post processing"; } }; -class LowerAwaitPass : public llvm::PassInfoMixin { +class ContinuationsLintPass : public llvm::PassInfoMixin { public: - LowerAwaitPass(); - llvm::PreservedAnalyses run(llvm::Module &Module, - llvm::ModuleAnalysisManager &AnalysisManager); + ContinuationsLintPass() {} + llvm::PreservedAnalyses run(llvm::Module &Module, llvm::ModuleAnalysisManager &AnalysisManager); - static llvm::StringRef name() { return "continuation point lowering"; } + static llvm::StringRef name() { return "Continuations lint pass"; } }; -class RegisterBufferPass : public llvm::PassInfoMixin { +class LowerAwaitPass : public llvm::PassInfoMixin { public: - RegisterBufferPass(); - llvm::PreservedAnalyses run(llvm::Module &Module, - llvm::ModuleAnalysisManager &AnalysisManager); - - static llvm::StringRef name() { return "register buffer lowering"; } - - /// Handle a load/store that accesses a single register only. - Value *handleSingleLoadStore(IRBuilder<> &Builder, Type *Ty, Value *StoreVal, - Value *Address, Align Alignment, - AAMDNodes AATags, bool IsLoad); + LowerAwaitPass(); + llvm::PreservedAnalyses run(llvm::Module &Module, llvm::ModuleAnalysisManager &AnalysisManager); -private: - /// Convert Address into an address that accesses the memory base address - /// instead of the register global. - Value *computeMemAddr(IRBuilder<> &Builder, Value *Address); - - void handleLoadStore(IRBuilder<> &Builder, Instruction *I, Value *Address, - bool IsLoad); - - /// Maps a Value that accesses the register part of the global to a Value that - /// accesses the memory part. - DenseMap MemAccessors; - - // Properties of the current item that is worked on - GlobalVariable *Global; - IntegerType *ElementType; - RegisterBufferMD Data; - uint32_t TotalElementCount; + static llvm::StringRef name() { return "continuation point lowering"; } }; // No-op pass running before the DXIL continuations pipeline, e.g. for usage // with -print-after class DXILContPreHookPass : public llvm::PassInfoMixin { public: - llvm::PreservedAnalyses run(llvm::Module &Module, - llvm::ModuleAnalysisManager &AnalysisManager) { + llvm::PreservedAnalyses run(llvm::Module &Module, llvm::ModuleAnalysisManager &AnalysisManager) { return PreservedAnalyses::all(); } static llvm::StringRef name() { return "DXIL continuation pre hook pass"; } @@ -375,8 +316,7 @@ class DXILContPreHookPass : public llvm::PassInfoMixin { // -print-after class DXILContPostHookPass : public llvm::PassInfoMixin { public: - llvm::PreservedAnalyses run(llvm::Module &Module, - llvm::ModuleAnalysisManager &AnalysisManager) { + llvm::PreservedAnalyses run(llvm::Module &Module, llvm::ModuleAnalysisManager &AnalysisManager) { return PreservedAnalyses::all(); } static llvm::StringRef name() { return "DXIL continuation post hook pass"; } @@ -390,13 +330,9 @@ bool DXILMaterializable(Instruction &I); // coro-split) class DXILCoroSplitPass : public CoroSplitPass { public: - DXILCoroSplitPass() - : CoroSplitPass(std::function(&DXILMaterializable), - true) {} + DXILCoroSplitPass() : CoroSplitPass(std::function(&DXILMaterializable), true) {} - static llvm::StringRef name() { - return "DXIL continuations coro split pass wrapper"; - } + static llvm::StringRef name() { return "DXIL continuations coro split pass wrapper"; } }; // Helper function to query whether an instruction is rematerializable, which is @@ -411,31 +347,23 @@ bool LgcMaterializable(Instruction &I); // coro-split) class LgcCoroSplitPass : public CoroSplitPass { public: - LgcCoroSplitPass() - : CoroSplitPass(std::function(&LgcMaterializable), - true) {} + LgcCoroSplitPass() : CoroSplitPass(std::function(&LgcMaterializable), true) {} - static llvm::StringRef name() { - return "Lgc continuations coro split pass wrapper"; - } + static llvm::StringRef name() { return "Lgc continuations coro split pass wrapper"; } }; -// Pass to remove !types metadata from function definitions and declarations -class RemoveTypesMetadataPass - : public llvm::PassInfoMixin { +// Pass to remove !pointeetys metadata from function definitions and declarations +class RemoveTypesMetadataPass : public llvm::PassInfoMixin { public: - llvm::PreservedAnalyses run(llvm::Module &Module, - llvm::ModuleAnalysisManager &AnalysisManager); + llvm::PreservedAnalyses run(llvm::Module &Module, llvm::ModuleAnalysisManager &AnalysisManager); static llvm::StringRef name() { return "Remove types metadata"; } }; -class DXILContLgcRtOpConverterPass - : public llvm::PassInfoMixin { +class DXILContLgcRtOpConverterPass : public llvm::PassInfoMixin { public: DXILContLgcRtOpConverterPass() = default; - llvm::PreservedAnalyses run(llvm::Module &Module, - llvm::ModuleAnalysisManager &AnalysisManager); + llvm::PreservedAnalyses run(llvm::Module &Module, llvm::ModuleAnalysisManager &AnalysisManager); static llvm::StringRef name() { return "Convert DXIL ops into lgc.rt ops"; } @@ -445,32 +373,21 @@ class DXILContLgcRtOpConverterPass const llvm::DataLayout *DL = nullptr; bool convertDxOp(llvm::Function &Func); - using OpCallbackType = std::function; + using OpCallbackType = std::function; std::optional getCallbackByOpName(StringRef OpName); template Value *handleSimpleCall(CallInst &CI); Value *handleTraceRayOp(CallInst &CI); Value *handleReportHitOp(CallInst &CI); Value *handleCallShaderOp(CallInst &CI); - template - Value *handleVecResult(CallInst &CI); - template - Value *handleMatrixResult(CallInst &CI); + template Value *handleVecResult(CallInst &CI); + template Value *handleMatrixResult(CallInst &CI); Value *createVec3(Value *X, Value *Y, Value *Z); void addDXILPayloadTypeToCall(Function &DXILFunc, CallInst &CI); bool prepareEntryPointShaders(); void setupLocalRootIndex(Function *F); }; -Function *promotePointerArguments(Function *Fn, - const SmallBitVector &PromotionMask); - -/// Replace struct return type with it's first element type. -Function *unpackStructReturnType(Function *Fn); -/// Turn StructRet argument into return type. -Function *lowerStructRetArgument(Function *Fn); - /// Add necessary continuation transform passes for LGC. void addLgcContinuationTransform(ModulePassManager &MPM); diff --git a/llvmraytracing/include/llvmraytracing/ContinuationsUtil.h b/llvmraytracing/include/llvmraytracing/ContinuationsUtil.h index ac1fb42de5..7ff58efa64 100644 --- a/llvmraytracing/include/llvmraytracing/ContinuationsUtil.h +++ b/llvmraytracing/include/llvmraytracing/ContinuationsUtil.h @@ -10,8 +10,8 @@ * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * - * The above copyright notice and this permission notice shall be included in - *all copies or substantial portions of the Software. + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, @@ -31,10 +31,10 @@ #pragma once -#include "lgc/LgcCpsDialect.h" -#include "lgc/LgcRtDialect.h" #include "llpc/GpurtEnums.h" #include "llpc/GpurtVersion.h" +#include "lgc/LgcCpsDialect.h" +#include "lgc/LgcRtDialect.h" #include "llvm-dialects/Dialect/OpMap.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" @@ -106,6 +106,7 @@ struct GpuRtIntrinsicEntry { extern const llvm_dialects::OpMap LgcRtGpuRtMap; llvm::raw_ostream &operator<<(llvm::raw_ostream &, DXILShaderKind); +llvm::raw_ostream &operator<<(llvm::raw_ostream &, lgc::rt::RayTracingShaderStage); enum class AnyHitExitKind { None, // not an AnyHit shader @@ -117,62 +118,6 @@ enum class AnyHitExitKind { // The address space used for the continuation stack. enum class ContStackAddrspace : uint32_t { Scratch = 21, Global = 22 }; -// Metadata associated with a register buffer. -struct RegisterBufferMD { - /// Number of registers to use. - uint32_t RegisterCount; - /// Address space for the memory part of the buffer. - uint32_t Addrspace; -}; - -// Helper class to abstract over function argument types. -// Derives types from custom metadata when available, allowing pointer -// element types to be derives even with opaque pointers. -class ContArgTy { -private: - Type *ArgTy; - Type *ElemTy; - -public: - ContArgTy() : ArgTy(nullptr), ElemTy(nullptr) {} - ContArgTy(Type *Arg, Type *Elem) : ArgTy(Arg), ElemTy(Elem) {} - ContArgTy(Type *Arg); - - static ContArgTy get(const Function *F, const Argument *Arg); - static ContArgTy get(const Function *F, const unsigned ArgNo); - static ContArgTy get(const Metadata *MD, LLVMContext &Context); - - Type *asType(LLVMContext &Context); - Type *getPointerElementType() const; - - bool isPointerTy() const; - bool isVoidTy() const; - Metadata *getTypeMetadata(LLVMContext &Context); - - bool operator==(const ContArgTy &RHS) const { - return (ArgTy == RHS.ArgTy) && (ElemTy == RHS.ElemTy); - } -}; - -// Helper class to abstract over function types. -// Uses ContArgTy to derive types from and encode types to custom metadata. -class ContFuncTy { -public: - ContFuncTy() {} - ContFuncTy(ContArgTy Return) : ReturnTy(Return) {} - ContFuncTy(ContArgTy Return, ArrayRef Args) - : ReturnTy(Return), ArgTys(Args) {} - - ContArgTy ReturnTy; - SmallVector ArgTys; - - static ContFuncTy get(const Function *F); - static ContFuncTy get(const Metadata *MD, LLVMContext &Context); - - FunctionType *asFunctionType(LLVMContext &Context); - void writeMetadata(Function *F); -}; - struct ContSetting { /// A hash value that is used as name. uint64_t NameHash; @@ -201,15 +146,13 @@ class ContHelper { // // The number of registers entering a function (if used as function // metadata), or leaving a function (if used on a continue statement). - static constexpr const char *MDRegisterCountName = - "continuation.registercount"; + static constexpr const char *MDRegisterCountName = "continuation.registercount"; // The number of registers returned by a TraceRay or CallShader call, // annotated to the outgoing continue call. For resume functions, we scan // continue calls referencing the resume function, and use their returned // register count annotation as incoming register count for the resume // function. - static constexpr const char *MDReturnedRegisterCountName = - "continuation.returnedRegistercount"; + static constexpr const char *MDReturnedRegisterCountName = "continuation.returnedRegistercount"; // Module-scope *payload* register count metadata // Payload registers are registers used to pass data between RT stages. @@ -232,14 +175,12 @@ class ContHelper { // For intersection, it is not used, because early-compiled intersection // shaders can be used in pipelines with large payload types unknown when // compiling the intersection shader. - static constexpr const char *MDPreservedPayloadRegisterCountName = - "continuation.preservedPayloadRegisterCount"; + static constexpr const char *MDPreservedPayloadRegisterCountName = "continuation.preservedPayloadRegisterCount"; // [in] MaxPayloadRegisterCount // The maximum allowed number of payload registers to be used for payload and // other inter-stage date (e.g. attributes). If state does not fit into this // limit, we spill to the continuation stack. - static constexpr const char *MDMaxPayloadRegisterCountName = - "continuation.maxPayloadRegisterCount"; + static constexpr const char *MDMaxPayloadRegisterCountName = "continuation.maxPayloadRegisterCount"; // [out] MaxUsedPayloadRegisterCount // The maximum number of payload registers written or read by any // shader in the module. This excludes intersection shaders, which @@ -247,12 +188,10 @@ class ContHelper { // This can be used to populate PreservedPayloadRegisterCount when compiling // the driver module in case all modules of the pipeline are known and // have already been processed. - static constexpr const char *MDMaxUsedPayloadRegisterCountName = - "continuation.maxUsedPayloadRegisterCount"; + static constexpr const char *MDMaxUsedPayloadRegisterCountName = "continuation.maxUsedPayloadRegisterCount"; // The address space used to store the continuations stack. // The possible values for this metadata are the values of ContStackAddrspace. - static constexpr const char *MDStackAddrspaceName = - "continuation.stackAddrspace"; + static constexpr const char *MDStackAddrspaceName = "continuation.stackAddrspace"; // The raytracing ip level that is available on the target architecture. // This is exposed to gpurt code via the GetRtip intrinsic. static constexpr const char *MDRtipName = "continuation.rtip"; @@ -264,8 +203,7 @@ class ContHelper { static std::optional extractZExtI32Constant(MDNode *Node) { if (Node) { - uint64_t Result = - mdconst::extract(Node->getOperand(0))->getZExtValue(); + uint64_t Result = mdconst::extract(Node->getOperand(0))->getZExtValue(); assert(Result <= std::numeric_limits::max()); return Result; } @@ -274,18 +212,15 @@ class ContHelper { static MDNode *getI32MDConstant(LLVMContext &Context, uint32_t Value) { IntegerType *Int32Ty = Type::getInt32Ty(Context); - MDNode *Result = MDTuple::get( - Context, {ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Value))}); + MDNode *Result = MDTuple::get(Context, {ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Value))}); assert(Result && "Failed to create metadata node!"); - assert(extractZExtI32Constant(Result) == Value && - "Failed to extract value from node!"); + assert(extractZExtI32Constant(Result) == Value && "Failed to extract value from node!"); return Result; } static Type *getPayloadTypeFromMetadata(const MDNode *Node) { auto *MDTup = cast(Node); - if (auto *ExtractedConstant = - mdconst::extract(MDTup->getOperand(0))) { + if (auto *ExtractedConstant = mdconst::extract(MDTup->getOperand(0))) { return ExtractedConstant->getType(); } @@ -298,18 +233,12 @@ class ContHelper { static constexpr const char *MDStackSizeName = "continuation.stacksize"; static constexpr const char *MDStateName = "continuation.state"; static constexpr const char *MDContinuationName = "continuation"; - static constexpr const char *MDTypesName = "types"; - static constexpr const char *MDTypesFunctionName = "function"; - static constexpr const char *MDTypesVoidName = "void"; static constexpr const char *MDContPayloadTyName = "cont.payload.type"; static constexpr const char *MDLgcCpsModuleName = "lgc.cps.module"; static constexpr const char *MDGpurtSettingsName = "gpurt.settings"; + static constexpr const char *MDWaitMaskName = "waitmask"; - // Global variable names - static constexpr const char *GlobalPayloadName = "PAYLOAD"; - static constexpr const char *GlobalRegistersName = "REGISTERS"; - static constexpr ContStackAddrspace DefaultStackAddrspace = - ContStackAddrspace::Scratch; + static constexpr ContStackAddrspace DefaultStackAddrspace = ContStackAddrspace::Scratch; static void RegisterPasses(llvm::PassBuilder &PB, bool NeedDialectContext); @@ -318,8 +247,7 @@ class ContHelper { // Registers the DXIL-specific Continuation pipeline to a LLVM Module Pass // manager. - static void addDxilContinuationPasses(llvm::ModulePassManager &MPM, - llvm::Module *GpurtLibrary = nullptr); + static void addDxilContinuationPasses(llvm::ModulePassManager &MPM, llvm::Module *GpurtLibrary = nullptr); // Registers the DXIL-specific pipeline for the driver library module to a // LLVM Module Pass manager. These passes preprocess the driver library into a @@ -328,8 +256,7 @@ class ContHelper { static void addDxilGpurtLibraryPasses(llvm::ModulePassManager &MPM); // Get gpurt settings from metadata. - static void getGpurtSettings(const Module &M, - SmallVectorImpl &Settings) { + static void getGpurtSettings(const Module &M, SmallVectorImpl &Settings) { auto *MD = M.getNamedMetadata(MDGpurtSettingsName); if (!MD) return; @@ -354,137 +281,91 @@ class ContHelper { IntegerType *Int64Ty = Type::getInt64Ty(Context); // Stored as {bitwidth, value, bitwidth, value, ...} for (auto &Setting : Settings) { - Vals.push_back( - ConstantAsMetadata::get(ConstantInt::get(Int64Ty, Setting.NameHash))); - Vals.push_back( - ConstantAsMetadata::get(ConstantInt::get(Int64Ty, Setting.Value))); + Vals.push_back(ConstantAsMetadata::get(ConstantInt::get(Int64Ty, Setting.NameHash))); + Vals.push_back(ConstantAsMetadata::get(ConstantInt::get(Int64Ty, Setting.Value))); } MD->addOperand(MDTuple::get(Context, Vals)); } - // Set metadata specifying the number of outgoing payload registers. - static void setOutgoingRegisterCount(Instruction *I, uint32_t RegisterCount) { - I->setMetadata(MDRegisterCountName, - getI32MDConstant(I->getContext(), RegisterCount)); - } +#define NUMERIC_METADATA_HELPER(SCOPE, NAME, MD_NAME) \ + class NAME final { \ + public: \ + static void setValue(SCOPE *S, uint32_t Val) { S->setMetadata(MD_NAME, getI32MDConstant(S->getContext(), Val)); } \ + static std::optional tryGetValue(const SCOPE *S) { \ + return extractZExtI32Constant(S->getMetadata(MD_NAME)); \ + } \ + static void reset(SCOPE *S) { S->setMetadata(MD_NAME, nullptr); } \ + static void inc(SCOPE *S, uint32_t Value) { \ + auto ExistingSize = tryGetValue(S).value_or(0); \ + S->setMetadata(MD_NAME, getI32MDConstant(S->getContext(), ExistingSize + Value)); \ + } \ + }; - // Get the number of outgoing payload registers if set. - static std::optional - tryGetOutgoingRegisterCount(const Instruction *I) { - return extractZExtI32Constant(I->getMetadata(MDRegisterCountName)); - } + // Handle the number of outgoing payload registers. + NUMERIC_METADATA_HELPER(Instruction, OutgoingRegisterCount, MDRegisterCountName) - // Set metadata specifying the number of incoming payload registers. - static void setIncomingRegisterCount(Function *F, uint32_t RegisterCount) { - F->setMetadata(MDRegisterCountName, - getI32MDConstant(F->getContext(), RegisterCount)); - } + // Handle the number of incoming payload registers. + NUMERIC_METADATA_HELPER(Function, IncomingRegisterCount, MDRegisterCountName) - // Get the number of incoming payload registers if set. - static std::optional - tryGetIncomingRegisterCount(const Function *F) { - return extractZExtI32Constant(F->getMetadata(MDRegisterCountName)); - } + // Handle the number of payload registers returned by a TraceRay or CallShader. See MDReturnedRegisterCountName for + // details. + NUMERIC_METADATA_HELPER(Instruction, ReturnedRegisterCount, MDReturnedRegisterCountName) - // Set metadata specifying the number of payload registers returned by a - // TraceRay or CallShader. See MDReturnedRegisterCountName for details. - static void setReturnedRegisterCount(Instruction *I, uint32_t RegisterCount) { - I->setMetadata(MDReturnedRegisterCountName, - getI32MDConstant(I->getContext(), RegisterCount)); - } + // Handle the continuation state byte count metadata. + NUMERIC_METADATA_HELPER(Function, ContinuationStateByteCount, MDStateName) - // Get the number of payload registers returned by a TraceRay or CallShader - // from metadata if set. See MDReturnedRegisterCountName for details. - static std::optional - tryGetReturnedRegisterCount(const Instruction *I) { - return extractZExtI32Constant(I->getMetadata(MDReturnedRegisterCountName)); - } + // Handle the continuation stack size metadata. + NUMERIC_METADATA_HELPER(Function, StackSize, MDStackSizeName) - // If there is module-level metadata node, return its value. Otherwise, return - // std::nullopt. - static std::optional - tryGetPreservedPayloadRegisterCount(const Module &M) { - auto *MD = M.getNamedMetadata(MDPreservedPayloadRegisterCountName); - if (!MD) - return {}; - return extractZExtI32Constant(MD->getOperand(0)); - }; +#undef NUMERIC_METADATA_HELPER - static void - setPreservedPayloadRegisterCount(Module &M, - uint32_t PreservedPayloadRegisterCount) { - auto *MD = M.getOrInsertNamedMetadata(MDPreservedPayloadRegisterCountName); - assert(MD && "Failed to create metadata node!"); - MD->clearOperands(); - MD->addOperand( - getI32MDConstant(M.getContext(), PreservedPayloadRegisterCount)); + static std::optional tryGetIncomingRegisterCount(const Function *F) { + return IncomingRegisterCount::tryGetValue(F); } - // Old alias until clients are migrated to setPreservedPayloadRegisterCount: - static void - setMinPayloadRegisterCount(Module &M, - uint32_t PreservedPayloadRegisterCount) { - setPreservedPayloadRegisterCount(M, PreservedPayloadRegisterCount); - } - - // If there is module-level metadata specifying the maximum number - // of payload registers, return that value. Otherwise, return std::nullopt. - static std::optional - tryGetMaxUsedPayloadRegisterCount(const Module &M) { - auto *MD = M.getNamedMetadata(MDMaxUsedPayloadRegisterCountName); - if (!MD) - return {}; - return extractZExtI32Constant(MD->getOperand(0)); - }; + static std::optional tryGetStackSize(const Function *F) { return StackSize::tryGetValue(F); } - static void - setMaxUsedPayloadRegisterCount(Module &M, - uint32_t MaxUsedPayloadRegisterCount) { - auto *MD = M.getOrInsertNamedMetadata(MDMaxUsedPayloadRegisterCountName); - assert(MD && "Failed to create metadata node!"); - MD->clearOperands(); - MD->addOperand( - getI32MDConstant(M.getContext(), MaxUsedPayloadRegisterCount)); + static std::optional tryGetOutgoingRegisterCount(const Instruction *I) { + return OutgoingRegisterCount::tryGetValue(I); } - static std::optional - tryGetMaxPayloadRegisterCount(const Module &M) { - auto *MD = M.getNamedMetadata(MDMaxPayloadRegisterCountName); - if (!MD) - return {}; - return extractZExtI32Constant(MD->getOperand(0)); - }; - - static void setMaxPayloadRegisterCount(Module &M, - uint32_t MaxPayloadRegisterCount) { - auto *MD = M.getOrInsertNamedMetadata(MDMaxPayloadRegisterCountName); - assert(MD && "Failed to create metadata node!"); - MD->clearOperands(); - MD->addOperand(getI32MDConstant(M.getContext(), MaxPayloadRegisterCount)); - } +// A compile-time directive to generate helper classes for accessing module-wide metadata. +// TODO: Remove the generic, non-class scope helper functions once they are not required anymore. +#define MODULE_METADATA_HELPER(NAME, MD_NAME) \ + class NAME final { \ + public: \ + static std::optional tryGetValue(const Module *M) { \ + auto *MD = M->getNamedMetadata(MD_NAME); \ + if (!MD) \ + return {}; \ + return extractZExtI32Constant(MD->getOperand(0)); \ + } \ + static void setValue(Module *M, uint32_t Value) { \ + auto *MD = M->getOrInsertNamedMetadata(MD_NAME); \ + assert(MD && "Failed to create metadata node!"); \ + MD->clearOperands(); \ + MD->addOperand(getI32MDConstant(M->getContext(), Value)); \ + } \ + }; \ + static std::optional tryGet##NAME(const Module &M) { return NAME::tryGetValue(&M); } \ + static void set##NAME(Module &M, uint32_t Value) { NAME::setValue(&M, Value); } - static void setStackSize(Function *F, uint32_t StackSize) { - F->setMetadata(MDStackSizeName, - getI32MDConstant(F->getContext(), StackSize)); - } + MODULE_METADATA_HELPER(PreservedPayloadRegisterCount, MDPreservedPayloadRegisterCountName) + MODULE_METADATA_HELPER(MaxUsedPayloadRegisterCount, MDMaxUsedPayloadRegisterCountName) + MODULE_METADATA_HELPER(MaxPayloadRegisterCount, MDMaxPayloadRegisterCountName) + MODULE_METADATA_HELPER(Rtip, MDRtipName) + MODULE_METADATA_HELPER(Flags, MDFlagsName) - // If the function already has stacksize metadata, add the given value. - // Otherwise, assume an existing value of zero, and set the pass value. - static void addStackSize(Function *F, uint32_t AddedStackSize) { - auto ExistingSize = tryGetStackSize(F).value_or(0); - F->setMetadata( - MDStackSizeName, - getI32MDConstant(F->getContext(), ExistingSize + AddedStackSize)); - } +#undef MODULE_METADATA_HELPER - static std::optional tryGetStackSize(const Function *F) { - return extractZExtI32Constant(F->getMetadata(MDStackSizeName)); + // Old alias until clients are migrated to setPreservedPayloadRegisterCount: + static void setMinPayloadRegisterCount(Module &M, uint32_t PreservedPayloadRegisterCount) { + PreservedPayloadRegisterCount::setValue(&M, PreservedPayloadRegisterCount); } // If there is module-level metadata specifying the stack addrspace, // return that value. Otherwise, return std::nullopt. - static std::optional - tryGetStackAddrspace(const Module &M) { + static std::optional tryGetStackAddrspace(const Module &M) { auto *MD = M.getNamedMetadata(MDStackAddrspaceName); if (!MD) return {}; @@ -500,114 +381,72 @@ class ContHelper { static void setStackAddrspace(Module &M, ContStackAddrspace StackAddrspace) { auto *MD = M.getOrInsertNamedMetadata(MDStackAddrspaceName); MD->clearOperands(); - MD->addOperand(getI32MDConstant(M.getContext(), - static_cast(StackAddrspace))); - } - - static std::optional tryGetRtip(const Module &M) { - auto *MD = M.getNamedMetadata(MDRtipName); - if (!MD) - return {}; - return extractZExtI32Constant(MD->getOperand(0)); - }; - - static void setRtip(Module &M, uint32_t RtipLevel) { - auto *MD = M.getOrInsertNamedMetadata(MDRtipName); - MD->clearOperands(); - MD->addOperand(getI32MDConstant(M.getContext(), RtipLevel)); - } - - static std::optional tryGetFlags(const Module &M) { - auto *MD = M.getNamedMetadata(MDFlagsName); - if (!MD) - return {}; - return extractZExtI32Constant(MD->getOperand(0)); - }; - - static void setFlags(Module &M, uint32_t Flags) { - auto *MD = M.getOrInsertNamedMetadata(MDFlagsName); - MD->clearOperands(); - MD->addOperand(getI32MDConstant(M.getContext(), Flags)); - } - - static void setContinuationStateByteCount(Function &F, uint32_t ByteCount) { - F.setMetadata(MDStateName, getI32MDConstant(F.getContext(), ByteCount)); - } - - static std::optional - tryGetContinuationStateByteCount(const Function &F) { - return extractZExtI32Constant(F.getMetadata(MDStateName)); + MD->addOperand(getI32MDConstant(M.getContext(), static_cast(StackAddrspace))); } static Type *getPayloadTypeFromMetadata(const Function &Func) { if (MDNode *Node = Func.getMetadata(MDContPayloadTyName)) return getPayloadTypeFromMetadata(Node); - report_fatal_error(Twine(MDContPayloadTyName) + - " metadata not found on function " + Func.getName() + - "!"); + report_fatal_error(Twine(MDContPayloadTyName) + " metadata not found on function " + Func.getName() + "!"); } static Type *getPayloadTypeFromMetadata(const CallInst &CI) { if (MDNode *Node = CI.getMetadata(MDContPayloadTyName)) return getPayloadTypeFromMetadata(Node); - report_fatal_error(Twine(MDContPayloadTyName) + - " metadata not found on CallInst!"); + report_fatal_error(Twine(MDContPayloadTyName) + " metadata not found on CallInst!"); } static void setPayloadTypeMetadata(Instruction *I, Type *T) { I->setMetadata(ContHelper::MDContPayloadTyName, - MDNode::get(I->getContext(), - {ConstantAsMetadata::get(PoisonValue::get(T))})); + MDNode::get(I->getContext(), {ConstantAsMetadata::get(PoisonValue::get(T))})); + } + + static std::optional tryGetWaitMask(const CallInst &CI) { + return extractZExtI32Constant(CI.getMetadata(MDWaitMaskName)); } - static bool isLgcCpsModule(Module &Mod) { - return Mod.getNamedMetadata(MDLgcCpsModuleName) != nullptr; + static void setWaitMask(CallInst &CI, int32_t WaitMask) { + CI.setMetadata(MDWaitMaskName, getI32MDConstant(CI.getContext(), WaitMask)); } + static void removeWaitMask(CallInst &CI) { CI.setMetadata(MDWaitMaskName, nullptr); } + + static bool isLgcCpsModule(Module &Mod) { return Mod.getNamedMetadata(MDLgcCpsModuleName) != nullptr; } + // Specifies that an awaited call should wait on a wait mask. static void setIsWaitAwaitCall(CallInst &CI) { - CI.setMetadata(ContHelper::MDIsWaitAwaitName, - MDTuple::get(CI.getContext(), {})); + CI.setMetadata(ContHelper::MDIsWaitAwaitName, MDTuple::get(CI.getContext(), {})); } // Queries whether an awaited call should wait on a wait mask. - static bool isWaitAwaitCall(const CallInst &CI) { - return CI.getMetadata(MDIsWaitAwaitName) != nullptr; - } + static bool isWaitAwaitCall(const CallInst &CI) { return CI.getMetadata(MDIsWaitAwaitName) != nullptr; } - static void removeIsWaitAwaitMetadata(CallInst &CI) { - CI.setMetadata(ContHelper::MDIsWaitAwaitName, nullptr); - } + static void removeIsWaitAwaitMetadata(CallInst &CI) { CI.setMetadata(ContHelper::MDIsWaitAwaitName, nullptr); } /// Returns true if a call to the given function should be rematerialized /// in a shader of the specified kind. /// /// If no shader kind is specified, return false. - static bool isRematerializableLgcRtOp( - CallInst &CInst, - std::optional Kind = std::nullopt); + static bool isRematerializableLgcRtOp(CallInst &CInst, + std::optional Kind = std::nullopt); - static bool isLegacyEntryFunction(Function *Func) { - return Func->hasMetadata(MDEntryName); - } + static bool isLegacyEntryFunction(Function *Func) { return Func->hasMetadata(MDEntryName); } // Given a list of types, get a type that makes the list of types // occupy a specific number of dwords including it. - static Type *getPaddingType(const DataLayout &DL, LLVMContext &Context, - ArrayRef Types, unsigned TargetNumDwords); + static Type *getPaddingType(const DataLayout &DL, LLVMContext &Context, ArrayRef Types, + unsigned TargetNumDwords); // Given a list of types, add a type to the list that makes the list of types // occupy a specific number of dwords. - static void addPaddingType(const DataLayout &DL, LLVMContext &Context, - SmallVectorImpl &Types, + static void addPaddingType(const DataLayout &DL, LLVMContext &Context, SmallVectorImpl &Types, unsigned TargetNumDwords); // Given a list of values, add a value to the list that makes the list of // values occupy a specific number of dwords. - static void addPaddingValue(const DataLayout &DL, LLVMContext &Context, - SmallVectorImpl &Values, + static void addPaddingValue(const DataLayout &DL, LLVMContext &Context, SmallVectorImpl &Values, unsigned TargetNumDwords); // Returns whether the given flag is enabled in the given GpuRt module, @@ -617,12 +456,23 @@ class ContHelper { // Handles _AmdGetSetting_* intrinsics. static void handleGetSetting(Function &F, ArrayRef Settings); + + // Handles _AmdGetFuncAddr* intrinsics. + static void handleGetFuncAddr(Function &F, llvm_dialects::Builder &Builder); + + // Handles _AmdValueI32Count intrinsics. + static void handleValueI32Count(Function &F, IRBuilder<> &Builder); + + // Handles _AmdValueGetI32 intrinsics. + static void handleValueGetI32(Function &F, IRBuilder<> &Builder); + + // Handles _AmdValueSetI32 intrinsics. + static void handleValueSetI32(Function &F, IRBuilder<> &Builder); }; class ShaderStageHelper final { public: - static DXILShaderKind - rtShaderStageToDxilShaderKind(lgc::rt::RayTracingShaderStage Stage) { + static DXILShaderKind rtShaderStageToDxilShaderKind(lgc::rt::RayTracingShaderStage Stage) { switch (Stage) { case lgc::rt::RayTracingShaderStage::RayGeneration: return DXILShaderKind::RayGeneration; @@ -648,8 +498,7 @@ class ShaderStageHelper final { } } - static std::optional - dxilShaderKindToRtShaderStage(DXILShaderKind Kind) { + static std::optional dxilShaderKindToRtShaderStage(DXILShaderKind Kind) { switch (Kind) { case DXILShaderKind::RayGeneration: return lgc::rt::RayTracingShaderStage::RayGeneration; @@ -677,7 +526,6 @@ DRIVER_FUNC_NAME(SetTriangleHitAttributes) DRIVER_FUNC_NAME(GetCandidateState) DRIVER_FUNC_NAME(GetCommittedState) DRIVER_FUNC_NAME(GetContinuationStackAddr) -DRIVER_FUNC_NAME(SetupRayGen) DRIVER_FUNC_NAME(ExitRayGen) DRIVER_FUNC_NAME(IsEndSearch) DRIVER_FUNC_NAME(GetLocalRootIndex) @@ -699,19 +547,6 @@ DRIVER_FUNC_NAME(ShaderStart) /// Free-standing helpers. -// Helper to visit all calls of a function. -// Expected type for Callback: -// void(CallInst &) -template -void forEachCall(Function &F, CallbackTy Callback) { - static_assert(std::is_invocable_v); - for (auto &Use : make_early_inc_range(F.uses())) { - if (auto *CInst = dyn_cast(Use.getUser())) - if (CInst->isCallee(&Use)) - Callback(*CInst); - } -} - // Replace all calls to a given function with some value. // Removes the original call. void replaceCallsToFunction(llvm::Function &F, llvm::Value &Replacement); @@ -723,8 +558,7 @@ void moveFunctionBody(Function &OldFunc, Function &NewFunc); // From a specific lgc.rt call operation, try to find information about the // corresponding GPURT implementation. -std::optional -findIntrImplEntryByIntrinsicCall(CallInst *Call); +std::optional findIntrImplEntryByIntrinsicCall(CallInst *Call); // Collect and remove unused function declarations. // @OnlyIntrinsics is used to differentiate whether all function declarations @@ -736,20 +570,6 @@ findIntrImplEntryByIntrinsicCall(CallInst *Call); // at the end of LowerRaytracingPipeline. bool removeUnusedFunctionDecls(Module *Mod, bool OnlyIntrinsics = true); -// For each basic block in Func, find the terminator. If it is contained in -// TerminatorOpcodes, then apply the callback on the terminator. -template >> -void forEachTerminator(Function *Func, ArrayRef TerminatorOpcodes, - CallbackTy Callback) { - for (auto &BB : *Func) { - auto *Terminator = BB.getTerminator(); - if (llvm::find(TerminatorOpcodes, Terminator->getOpcode()) != - TerminatorOpcodes.end()) - Callback(*Terminator); - } -} - // Do store-to-load forwarding for memory access to continuation stack. This is // helpful to mitigate the issue that coroutine passes in some cases still load // state from the in-memory continuation state when it is still available in SSA diff --git a/llvmraytracing/include/llvmraytracing/CpsStackLowering.h b/llvmraytracing/include/llvmraytracing/CpsStackLowering.h index 53c8b6ebbf..c30b36955a 100644 --- a/llvmraytracing/include/llvmraytracing/CpsStackLowering.h +++ b/llvmraytracing/include/llvmraytracing/CpsStackLowering.h @@ -10,8 +10,8 @@ * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * - * The above copyright notice and this permission notice shall be included in - *all copies or substantial portions of the Software. + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, @@ -32,6 +32,7 @@ #include "compilerutils/TypeLowering.h" #include "lgc/LgcCpsDialect.h" +#include "lgc/LgcIlCpsDialect.h" #include "llvm/ADT/SmallVector.h" #include "llvm/IR/IRBuilder.h" @@ -54,38 +55,29 @@ constexpr unsigned ContinuationStackAlignment = 4; class CpsStackLowering { public: - CpsStackLowering(llvm::LLVMContext &Context, - unsigned LoweredCpsStackAddrSpace) + CpsStackLowering(llvm::LLVMContext &Context, unsigned LoweredCpsStackAddrSpace) : TypeLower(Context), LoweredCpsStackAddrSpace{LoweredCpsStackAddrSpace} { - BasePointer = llvm::ConstantPointerNull::get(llvm::PointerType::get( - llvm::Type::getInt8Ty(Context), LoweredCpsStackAddrSpace)); + BasePointer = llvm::ConstantPointerNull::get( + llvm::PointerType::get(llvm::Type::getInt8Ty(Context), LoweredCpsStackAddrSpace)); } - llvm::Function *lowerCpsStackOps(llvm::Function *Func, - llvm::Function *GetGlobalMemBase, - bool RequiresIncomingCsp, + llvm::Function *lowerCpsStackOps(llvm::Function *Func, llvm::Function *GetGlobalMemBase, bool RequiresIncomingCsp, llvm::Value *CspStorage = nullptr); // Get continuation stack size (in bytes). unsigned getStackSizeInBytes() { return StackSizeInBytes; } - inline unsigned getLoweredCpsStackAddrSpace() const { - return LoweredCpsStackAddrSpace; - } + inline unsigned getLoweredCpsStackAddrSpace() const { return LoweredCpsStackAddrSpace; } - inline unsigned - getLoweredCpsStackPointerSize(const llvm::DataLayout &Layout) { + inline unsigned getLoweredCpsStackPointerSize(const llvm::DataLayout &Layout) { return Layout.getPointerSize(LoweredCpsStackAddrSpace); } - static unsigned getContinuationStackAlignment() { - return ContinuationStackAlignment; - } + static unsigned getContinuationStackAlignment() { return ContinuationStackAlignment; } - TypeLowering TypeLower; + CompilerUtils::TypeLowering TypeLower; private: - llvm::SmallVector convertStackPtrToI32(TypeLowering &, - llvm::Type *); + llvm::SmallVector convertStackPtrToI32(CompilerUtils::TypeLowering &, llvm::Type *); void visitCpsAlloc(lgc::cps::AllocOp &); void visitCpsFree(lgc::cps::FreeOp &); void visitCpsPeek(lgc::cps::PeekOp &); @@ -97,12 +89,10 @@ class CpsStackLowering { void visitBitCastInst(llvm::BitCastInst &); void visitLoad(llvm::LoadInst &); void visitStore(llvm::StoreInst &); + void visitContinue(lgc::ilcps::ContinueOp &); + void visitWaitContinue(lgc::ilcps::WaitContinueOp &); llvm::Value *getRealMemoryAddress(llvm::IRBuilder<> &, llvm::Value *); - llvm::Function *addOrInitCsp(llvm::Function *F, - llvm::Function *GetGlobalMemBase, - bool RequiresIncomingCsp); - void visitContinueCalls(llvm::Function *); - void visitContinueCall(llvm::CallInst &); + llvm::Function *addOrInitCsp(llvm::Function *F, llvm::Function *GetGlobalMemBase, bool RequiresIncomingCsp); // Register a base pointer in the CpsStackLowering. // This is used to set the base address when using a stack residing in global @@ -112,9 +102,9 @@ class CpsStackLowering { // corresponding CSP as offset for the source / dest addresses. In case // @setRealBasePointer never was called, this just creates a pointer out of an // offset. - void setRealBasePointer(llvm::Value *BasePointer) { - this->BasePointer = BasePointer; - } + void setRealBasePointer(llvm::Value *BasePointer) { this->BasePointer = BasePointer; } + + llvm::Value *loadCsp(llvm::IRBuilder<> &Builder); llvm::Module *Mod; llvm::AllocaInst *CpsStackAlloca = nullptr; diff --git a/llvmraytracing/include/llvmraytracing/GpurtContext.h b/llvmraytracing/include/llvmraytracing/GpurtContext.h index 277c236280..14af64c654 100644 --- a/llvmraytracing/include/llvmraytracing/GpurtContext.h +++ b/llvmraytracing/include/llvmraytracing/GpurtContext.h @@ -10,8 +10,8 @@ * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * - * The above copyright notice and this permission notice shall be included in - *all copies or substantial portions of the Software. + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, @@ -31,9 +31,8 @@ #pragma once -#include - #include "llvm-dialects/Dialect/ContextExtension.h" +#include namespace llvm { class Module; diff --git a/llvmraytracing/include/llvmraytracing/LowerRayQuery.h b/llvmraytracing/include/llvmraytracing/LowerRayQuery.h new file mode 100644 index 0000000000..bbaba8793b --- /dev/null +++ b/llvmraytracing/include/llvmraytracing/LowerRayQuery.h @@ -0,0 +1,174 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + +// LowerRayQuery.h : Pass to lower rayQuery ops by inlining GPURT functions. +// Typically used by running a pass class that derives from this one, setting m_staticFlags and setting up +// a GpurtContext as appropriate. + +#pragma once + +#include "llvm-dialects/Dialect/Visitor.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/IR/PassManager.h" + +namespace CompilerUtils { +class TypeLowering; +} + +namespace llvm_dialects { +class Builder; +} // namespace llvm_dialects + +namespace lgc { +class GpurtGetStaticFlagsOp; +class GpurtStackReadOp; +class GpurtStackWriteOp; +class GpurtLdsStackInitOp; + +namespace rtq { +class InitializeOp; +class TerminateOp; +class ProceedOp; +class IntersectionCommitAabbOp; +class IntersectionCommitTriangleOp; +class IntersectionTypeOp; +class RayTMinOp; +class RayFlagsOp; +class IntersectionTOp; +class IntersectionInstanceIdOp; +class IntersectionInstanceIndexOp; +class IntersectionContributionToHitGroupIndexOp; +class IntersectionGeometryIndexOp; +class IntersectionPrimitiveIndexOp; +class IntersectionBarycentricsOp; +class IntersectionFrontFaceOp; +class IntersectionCandidateAabbOpaqueOp; +class IntersectionObjectRayDirectionOp; +class IntersectionObjectRayOriginOp; +class IntersectionWorldRayDirectionOp; +class IntersectionWorldRayOriginOp; +class IntersectionObjectToWorldOp; +class IntersectionWorldToObjectOp; +class IntersectionTriangleVertexPositionsOp; +class GepOpaqueOp; +} // namespace rtq + +namespace rt { + +class LowerRayQuery : public llvm::PassInfoMixin { +public: + // Enum of GPURT functions. Order must match GpurtFuncNames array in LowerRayTracing.cpp. + enum class GpurtFunc : unsigned { + Abort, // _RayQuery_Abort + Allocate, // _RayQuery_Allocate + CandidateAabbOpaque, // _RayQuery_CandidateAabbOpaque + CommitNonOpaqueTriangleHit, // _RayQuery_CommitNonOpaqueTriangleHit + CommitProceduralPrimitiveHit, // _RayQuery_CommitProceduralPrimitiveHit + EndInterleavedProceed, // _RayQuery_EndInterleavedProceed + FetchTrianglePositionFromRayQuery, // FetchTrianglePositionFromRayQuery + GeometryIndex, // _RayQuery_GeometryIndex + GetObjId, // _RayQuery_GetObjId + InstanceContributionToHitGroupIndex, // _RayQuery_InstanceContributionToHitGroupIndex + InstanceID, // _RayQuery_InstanceID + InstanceIndex, // _RayQuery_InstanceIndex + IntersectionType, // _RayQuery_IntersectionType + LongRayQueryProceed, // LongRayQueryProceedAMD + ObjectRayDirection, // _RayQuery_ObjectRayDirection + ObjectRayOrigin, // _RayQuery_ObjectRayOrigin + ObjectToWorld4x3, // _RayQuery_ObjectToWorld4x3 + PrimitiveIndex, // _RayQuery_PrimitiveIndex + RayFlags, // _RayQuery_RayFlags + RayQueryProceed, // RayQueryProceed + RayT, // _RayQuery_RayT + RayTMin, // _RayQuery_RayTMin + SetObjId, // _RayQuery_SetObjId + TraceRayInline, // TraceRayInline + TriangleBarycentrics, // _RayQuery_TriangleBarycentrics + TriangleFrontFace, // _RayQuery_TriangleFrontFace + WorldRayDirection, // _RayQuery_WorldRayDirection + WorldRayOrigin, // _RayQuery_WorldRayOrigin + WorldToObject4x3, // _RayQuery_WorldToObject4x3 + Count + }; + + llvm::PreservedAnalyses run(llvm::Module &module, llvm::ModuleAnalysisManager &analysisManager); + llvm::Type *replaceRayQueryType(llvm::Type *ty); + bool hasRtqOpaqueType(llvm::Type *ty); + +protected: + unsigned m_staticFlags = 0; + +private: + void visitInitializeOp(lgc::rtq::InitializeOp &inst); + void visitTerminateOp(lgc::rtq::TerminateOp &inst); + void visitProceedOp(lgc::rtq::ProceedOp &inst); + void visitIntersectionCommitAabbOp(lgc::rtq::IntersectionCommitAabbOp &inst); + void visitIntersectionCommitTriangleOp(lgc::rtq::IntersectionCommitTriangleOp &inst); + void visitIntersectionTypeOp(lgc::rtq::IntersectionTypeOp &inst); + void visitRayTMinOp(lgc::rtq::RayTMinOp &inst); + void visitRayFlagsOp(lgc::rtq::RayFlagsOp &inst); + void visitIntersectionTOp(lgc::rtq::IntersectionTOp &inst); + void visitIntersectionInstanceIdOp(lgc::rtq::IntersectionInstanceIdOp &inst); + void visitIntersectionInstanceIndexOp(lgc::rtq::IntersectionInstanceIndexOp &inst); + void visitIntersectionContributionToHitGroupIndexOp(lgc::rtq::IntersectionContributionToHitGroupIndexOp &inst); + void visitIntersectionGeometryIndexOp(lgc::rtq::IntersectionGeometryIndexOp &inst); + void visitIntersectionPrimitiveIndexOp(lgc::rtq::IntersectionPrimitiveIndexOp &inst); + void visitIntersectionBarycentricsOp(lgc::rtq::IntersectionBarycentricsOp &inst); + void visitIntersectionFrontFaceOp(lgc::rtq::IntersectionFrontFaceOp &inst); + void visitIntersectionCandidateAabbOpaqueOp(lgc::rtq::IntersectionCandidateAabbOpaqueOp &inst); + void visitIntersectionObjectRayDirectionOp(lgc::rtq::IntersectionObjectRayDirectionOp &inst); + void visitIntersectionObjectRayOriginOp(lgc::rtq::IntersectionObjectRayOriginOp &inst); + void visitIntersectionWorldRayDirectionOp(lgc::rtq::IntersectionWorldRayDirectionOp &inst); + void visitIntersectionWorldRayOriginOp(lgc::rtq::IntersectionWorldRayOriginOp &inst); + void visitIntersectionObjectToWorldOp(lgc::rtq::IntersectionObjectToWorldOp &inst); + void visitIntersectionWorldToObjectOp(lgc::rtq::IntersectionWorldToObjectOp &inst); + void visitIntersectionTriangleVertexPositionsOp(lgc::rtq::IntersectionTriangleVertexPositionsOp &inst); + void visitPtrToInt(llvm::PtrToIntInst &inst); + void visitGepOpaqueOp(lgc::rtq::GepOpaqueOp &inst); + void visitGetStaticFlagsOp(lgc::GpurtGetStaticFlagsOp &inst); + void visitStackReadOp(lgc::GpurtStackReadOp &inst); + void visitStackWriteOp(lgc::GpurtStackWriteOp &inst); + void visitLdsStackInitOp(lgc::GpurtLdsStackInitOp &inst); + + void visitHitAccessor(GpurtFunc instType, llvm::Value *rayQuery, bool committed, llvm::CallBase *inst); + void visitAccessor(GpurtFunc instType, llvm::Value *rayQuery, llvm::CallBase *inst); + llvm_dialects::VisitorResult visitAlloca(llvm::AllocaInst &alloca); + llvm_dialects::VisitorResult visitLifetimeIntrinsic(llvm::LifetimeIntrinsic &intrinc); + void initializeAlloc(llvm::Function *func); + void setRtqObjId(lgc::rtq::InitializeOp &inst, llvm::Value *rtq); + llvm::Value *getRayQuery(llvm::Value *rayQuery); + llvm::Function *getGpurtFunc(GpurtFunc gpurtFunc, bool optional = false); + + llvm::Module *m_gpurtModule = nullptr; + llvm::Function **m_gpurtFuncs = nullptr; + llvm::SmallVector m_rtqAlloc; + llvm::SmallSet m_funcsToLower; + llvm_dialects::Builder *m_builder = nullptr; + CompilerUtils::TypeLowering *m_typeLowering = nullptr; + llvm::Type *m_rtqType = nullptr; +}; + +} // namespace rt +} // namespace lgc diff --git a/llvmraytracing/include/llvmraytracing/PayloadAccessQualifiers.h b/llvmraytracing/include/llvmraytracing/PayloadAccessQualifiers.h index 640a9ba5ec..c9cdbc6e12 100644 --- a/llvmraytracing/include/llvmraytracing/PayloadAccessQualifiers.h +++ b/llvmraytracing/include/llvmraytracing/PayloadAccessQualifiers.h @@ -10,8 +10,8 @@ * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * - * The above copyright notice and this permission notice shall be included in - *all copies or substantial portions of the Software. + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, @@ -337,10 +337,8 @@ enum class PAQShaderStage { llvm::raw_ostream &operator<<(llvm::raw_ostream &, PAQShaderStage); // List of all valid PAQShaderStage values -constexpr std::array(PAQShaderStage::Count)> - PAQShaderStages = {PAQShaderStage::Caller, PAQShaderStage::AnyHit, - PAQShaderStage::ClosestHit, PAQShaderStage::Miss}; +constexpr std::array(PAQShaderStage::Count)> PAQShaderStages = { + PAQShaderStage::Caller, PAQShaderStage::AnyHit, PAQShaderStage::ClosestHit, PAQShaderStage::Miss}; // Prints enum value in lower case (as in HLSL) enum class PAQAccessKind { Read = 0, Write, NumKinds }; @@ -356,8 +354,7 @@ class PAQAccessMask { return AccessMask & getBitmask(Stage, AccessKind); } - constexpr PAQAccessMask &set(PAQShaderStage Stage, PAQAccessKind AccessKind, - bool Value = true) { + constexpr PAQAccessMask &set(PAQShaderStage Stage, PAQAccessKind AccessKind, bool Value = true) { if (Value) { AccessMask |= getBitmask(Stage, AccessKind); } else { @@ -366,25 +363,20 @@ class PAQAccessMask { return *this; } - bool operator==(const PAQAccessMask &RHS) const { - return AccessMask == RHS.AccessMask; - } + bool operator==(const PAQAccessMask &RHS) const { return AccessMask == RHS.AccessMask; } bool operator!=(const PAQAccessMask &RHS) const { return !(*this == RHS); } // Prints HLSL-like qualifier string as in "write(..) : read(..)" // If AccessKind is set, only prints the part corresponding to that kind. - void print(llvm::raw_ostream &, - std::optional AccessKind = {}) const; + void print(llvm::raw_ostream &, std::optional AccessKind = {}) const; bool empty() const { return AccessMask == 0u; } private: // Offset of the bit corresponding to (Stage, AccessKind) in AccessMask - static constexpr uint32_t getBitmask(PAQShaderStage Stage, - PAQAccessKind AccessKind) { - uint32_t Offset = static_cast(Stage) * - static_cast(PAQAccessKind::NumKinds) + + static constexpr uint32_t getBitmask(PAQShaderStage Stage, PAQAccessKind AccessKind) { + uint32_t Offset = static_cast(Stage) * static_cast(PAQAccessKind::NumKinds) + static_cast(AccessKind); return 1u << Offset; } @@ -392,13 +384,11 @@ class PAQAccessMask { uint32_t AccessMask = 0u; static_assert(sizeof(AccessMask) * CHAR_BIT >= - static_cast(PAQShaderStage::Count) * - static_cast(PAQAccessKind::NumKinds), + static_cast(PAQShaderStage::Count) * static_cast(PAQAccessKind::NumKinds), "Increase width of AccessMask!"); }; -inline raw_ostream &operator<<(raw_ostream &Stream, - const PAQAccessMask &AccessMask) { +inline raw_ostream &operator<<(raw_ostream &Stream, const PAQAccessMask &AccessMask) { AccessMask.print(Stream); return Stream; } @@ -428,19 +418,18 @@ enum class PAQLifetimeClass : uint32_t { Count }; -constexpr std::array(PAQLifetimeClass::Count)> - PAQLifetimeClasses = {PAQLifetimeClass::Caller_To_Caller, - PAQLifetimeClass::AnyHit_To_Caller, - PAQLifetimeClass::Caller_To_ClosestHitAndMiss, - PAQLifetimeClass::Caller_To_ClosestHit, - PAQLifetimeClass::AnyHit_To_ClosestHitAndMiss, - PAQLifetimeClass::AnyHit_To_ClosestHit, - PAQLifetimeClass::Caller_To_AnyHit, - PAQLifetimeClass::AnyHit_To_AnyHit, - PAQLifetimeClass::ClosestHitAndMiss_To_Caller, - PAQLifetimeClass::ClosestHit_To_Caller, - PAQLifetimeClass::Miss_To_Caller}; +constexpr std::array(PAQLifetimeClass::Count)> PAQLifetimeClasses = { + PAQLifetimeClass::Caller_To_Caller, + PAQLifetimeClass::AnyHit_To_Caller, + PAQLifetimeClass::Caller_To_ClosestHitAndMiss, + PAQLifetimeClass::Caller_To_ClosestHit, + PAQLifetimeClass::AnyHit_To_ClosestHitAndMiss, + PAQLifetimeClass::AnyHit_To_ClosestHit, + PAQLifetimeClass::Caller_To_AnyHit, + PAQLifetimeClass::AnyHit_To_AnyHit, + PAQLifetimeClass::ClosestHitAndMiss_To_Caller, + PAQLifetimeClass::ClosestHit_To_Caller, + PAQLifetimeClass::Miss_To_Caller}; llvm::raw_ostream &operator<<(llvm::raw_ostream &, PAQLifetimeClass); @@ -464,17 +453,14 @@ enum class PAQSerializationLayoutKind { Count }; -constexpr std::array( - PAQSerializationLayoutKind::Count)> - PAQSerializationLayoutKinds = { - PAQSerializationLayoutKind::CallerOut, - PAQSerializationLayoutKind::AnyHitIn, - PAQSerializationLayoutKind::AnyHitOutAcceptHit, - PAQSerializationLayoutKind::AnyHitOutAcceptHitAndEndSearch, - PAQSerializationLayoutKind::MissIn, - PAQSerializationLayoutKind::ClosestHitOut, - PAQSerializationLayoutKind::MissOut}; +constexpr std::array(PAQSerializationLayoutKind::Count)> + PAQSerializationLayoutKinds = {PAQSerializationLayoutKind::CallerOut, + PAQSerializationLayoutKind::AnyHitIn, + PAQSerializationLayoutKind::AnyHitOutAcceptHit, + PAQSerializationLayoutKind::AnyHitOutAcceptHitAndEndSearch, + PAQSerializationLayoutKind::MissIn, + PAQSerializationLayoutKind::ClosestHitOut, + PAQSerializationLayoutKind::MissOut}; llvm::raw_ostream &operator<<(llvm::raw_ostream &, PAQSerializationLayoutKind); @@ -493,8 +479,7 @@ enum class PAQLivenessStatus : uint8_t { // This can be computed more efficiently for all combinations together, hence // using a lookup table instead of querying each individual combination. using PAQLivenessStatusTable = llvm::EnumeratedArray< - llvm::EnumeratedArray, + llvm::EnumeratedArray, PAQLifetimeClass, PAQLifetimeClass::Last, std::size_t>; // A permutation of all PAQLifetimeClass values. @@ -514,9 +499,7 @@ using PAQLivenessStatusTable = llvm::EnumeratedArray< // ordering to avoid unnecessary dummy fields. In other words, the order should // be a topological order of the lifetime domination graph. For example, // Caller_To_Caller should always come first. -using PAQLifetimeClassPackingOrder = - std::array(PAQLifetimeClass::Count)>; +using PAQLifetimeClassPackingOrder = std::array(PAQLifetimeClass::Count)>; // Determine an ordering of lifetime classes in the TraceRay serialization // layout. Currently, we use a fixed hardcoded order, but we could dynamically @@ -527,8 +510,7 @@ PAQLifetimeClassPackingOrder determineLifetimeClassPackingOrder(); // use a static ordering, this could be done manually in a large switch // statement, and was done so in the past, but that was a huge, error-prone case // distinction. -PAQLivenessStatusTable -computeLivenessStatusTable(const PAQLifetimeClassPackingOrder &Ordering); +PAQLivenessStatusTable computeLivenessStatusTable(const PAQLifetimeClassPackingOrder &Ordering); // Try to determine the unique layout kind for the given shader stage and access // kind. If there are multiple relevant layouts, returns std::nullopt; these @@ -536,8 +518,7 @@ computeLivenessStatusTable(const PAQLifetimeClassPackingOrder &Ordering); // - read(caller): There is no unique layout kind, because we import // multiple layouts (ClosestHitOut, MissOut). // - write(anyhit): There are multiple possible layout kinds. -std::optional -tryDetermineLayoutKind(PAQShaderStage ShaderStage, PAQAccessKind AccessKind); +std::optional tryDetermineLayoutKind(PAQShaderStage ShaderStage, PAQAccessKind AccessKind); // For every payload struct, we store PAQ qualifiers of its possibly nested // fields in a tree whose structure corresponds to the nested fields structure @@ -626,14 +607,12 @@ struct PAQPayloadConfig { // Prefer explicit constructor over aggregate initialization to catch cases // of missing fields in cases we add fields to this struct. PAQPayloadConfig(Type *PayloadType, uint32_t MaxHitAttributeByteCnt) - : PayloadTy{PayloadType}, MaxHitAttributeByteCount{ - MaxHitAttributeByteCnt} {} + : PayloadTy{PayloadType}, MaxHitAttributeByteCount{MaxHitAttributeByteCnt} {} Type *PayloadTy = nullptr; // Only relevant for TraceRay: uint32_t MaxHitAttributeByteCount = 0; - friend bool operator==(const PAQPayloadConfig &LHS, - const PAQPayloadConfig &RHS) { + friend bool operator==(const PAQPayloadConfig &LHS, const PAQPayloadConfig &RHS) { return std::tie(LHS.PayloadTy, LHS.MaxHitAttributeByteCount) == std::tie(RHS.PayloadTy, RHS.MaxHitAttributeByteCount); } @@ -643,12 +622,8 @@ template <> struct DenseMapInfo { using T = PAQPayloadConfig; static T getEmptyKey() { return T{DenseMapInfo::getEmptyKey(), 0}; } - static T getTombstoneKey() { - return T{DenseMapInfo::getTombstoneKey(), 0}; - } - static unsigned getHashValue(const T &Val) { - return llvm::hash_combine(Val.PayloadTy, Val.MaxHitAttributeByteCount); - } + static T getTombstoneKey() { return T{DenseMapInfo::getTombstoneKey(), 0}; } + static unsigned getHashValue(const T &Val) { return llvm::hash_combine(Val.PayloadTy, Val.MaxHitAttributeByteCount); } static bool isEqual(const T &LHS, const T &RHS) { return LHS == RHS; } }; @@ -662,12 +637,8 @@ struct PAQIndexInterval { uint32_t size() const { return End - Begin; } - bool operator==(const PAQIndexInterval &Other) const { - return Begin == Other.Begin && End == Other.End; - } - bool operator!=(const PAQIndexInterval &Other) const { - return !(*this == Other); - } + bool operator==(const PAQIndexInterval &Other) const { return Begin == Other.Begin && End == Other.End; } + bool operator!=(const PAQIndexInterval &Other) const { return !(*this == Other); } // Sort lexicographically by (Begin, End) bool operator<(const PAQIndexInterval &Other) const { return std::tie(Begin, End) < std::tie(Other.Begin, Other.End); @@ -727,8 +698,7 @@ enum class PAQSerializationInfoKind { TraceRay = 1, CallShader }; // PAQTraceRaySerializationInfo and PAQCallShaderSerializationInfo inherit from // this. struct PAQSerializationInfoBase { - PAQSerializationInfoBase(PAQSerializationInfoKind InfoKind) - : Kind{InfoKind} {} + PAQSerializationInfoBase(PAQSerializationInfoKind InfoKind) : Kind{InfoKind} {} PAQSerializationInfoBase(PAQSerializationInfoBase &&) = default; PAQSerializationInfoBase &operator=(PAQSerializationInfoBase &&) = default; virtual ~PAQSerializationInfoBase() = default; @@ -793,15 +763,11 @@ struct PAQHitGroupLayoutInfo { // Stores complete serialization info for a particular payload type for the // whole TraceRay pipeline. struct PAQTraceRaySerializationInfo : public PAQSerializationInfoBase { - PAQTraceRaySerializationInfo() - : PAQSerializationInfoBase(PAQSerializationInfoKind::TraceRay) {} + PAQTraceRaySerializationInfo() : PAQSerializationInfoBase(PAQSerializationInfoKind::TraceRay) {} PAQTraceRaySerializationInfo(PAQTraceRaySerializationInfo &&) = default; - PAQTraceRaySerializationInfo & - operator=(PAQTraceRaySerializationInfo &&) = default; + PAQTraceRaySerializationInfo &operator=(PAQTraceRaySerializationInfo &&) = default; - static bool classof(const PAQSerializationInfoBase *IB) { - return IB->Kind == PAQSerializationInfoKind::TraceRay; - } + static bool classof(const PAQSerializationInfoBase *IB) { return IB->Kind == PAQSerializationInfoKind::TraceRay; } PAQPayloadConfig PAQConfig = {nullptr, 0}; @@ -810,8 +776,8 @@ struct PAQTraceRaySerializationInfo : public PAQSerializationInfoBase { // attribute size, required to compute the maximum required payload storage // size. AnyHit and ClosestHit shaders know the exact attribute type and size, // and use specialized layouts in SpecializedHitGroupLayouts. - llvm::EnumeratedArray + llvm::EnumeratedArray LayoutsByKind; // Specialized layouts for known attribute size @@ -837,12 +803,10 @@ struct PAQTraceRaySerializationInfo : public PAQSerializationInfoBase { // any PAQ qualifiers, assuming write(all) + read(all). Otherwise, RootNode // must be the root node of a PAQ tree containing PAQ access qualifiers for // PayloadType. Ownership of RootNode is transferred to the returned object. - static std::unique_ptr - create(Module &M, const PAQPayloadConfig &PAQConfig, const PAQNode &RootNode, - uint64_t PayloadRegisterCount); + static std::unique_ptr create(Module &M, const PAQPayloadConfig &PAQConfig, + const PAQNode &RootNode, uint64_t PayloadRegisterCount); - virtual void - collectAllNodes(SmallVectorImpl &Result) const override { + virtual void collectAllNodes(SmallVectorImpl &Result) const override { PAQSerializationInfoBase::collectAllNodes(Result); if (WorstCaseHitAttributesNode) Result.push_back(WorstCaseHitAttributesNode.get()); @@ -850,8 +814,7 @@ struct PAQTraceRaySerializationInfo : public PAQSerializationInfoBase { // Compute a PAQHitGroupLayoutInfo, containing specialized serialization // layouts for a fixed number of required I32s for hit attribute storage - PAQHitGroupLayoutInfo - createHitGroupLayoutInfo(Module &M, uint32_t PayloadHitAttrI32s) const; + PAQHitGroupLayoutInfo createHitGroupLayoutInfo(Module &M, uint32_t PayloadHitAttrI32s) const; }; // Serialization info for CallShader calls. @@ -859,36 +822,30 @@ struct PAQTraceRaySerializationInfo : public PAQSerializationInfoBase { // read/write all payload fields. This class allows a consistent implementation // without special case handling for CallShader. struct PAQCallShaderSerializationInfo : public PAQSerializationInfoBase { - PAQCallShaderSerializationInfo() - : PAQSerializationInfoBase(PAQSerializationInfoKind::CallShader) {} + PAQCallShaderSerializationInfo() : PAQSerializationInfoBase(PAQSerializationInfoKind::CallShader) {} - static bool classof(const PAQSerializationInfoBase *IB) { - return IB->Kind == PAQSerializationInfoKind::CallShader; - } + static bool classof(const PAQSerializationInfoBase *IB) { return IB->Kind == PAQSerializationInfoKind::CallShader; } PAQSerializationLayout CallShaderSerializationLayout; // Computes a serialization info for CallShader calls for the given payload // type. Note that CallShader calls are not affected by PAQ access qualifiers. static std::unique_ptr - create(Module &M, const PAQPayloadConfig &PAQConfig, - const PAQNode &PAQRootNode, uint64_t PayloadRegisterCount); + create(Module &M, const PAQPayloadConfig &PAQConfig, const PAQNode &PAQRootNode, uint64_t PayloadRegisterCount); }; // Helper class to obtain serialization infos, importing DXIL PAQ metadata, // and caching already seen serialization infos. class PAQSerializationInfoManager { public: - PAQSerializationInfoManager(Module *M, Module *GpurtLibrary, - uint32_t MaxPayloadRegisterCount); + PAQSerializationInfoManager(Module *M, Module *GpurtLibrary, uint32_t MaxPayloadRegisterCount); PAQSerializationInfoManager(const PAQSerializationInfoManager &) = delete; PAQSerializationInfoManager(PAQSerializationInfoManager &&) = default; // Returns the result of either getOrCreateTraceRaySerializationInfo or // getOrCreateCallShaderSerializationInfo depending on ShaderKind. - PAQSerializationInfoBase & - getOrCreateSerializationInfo(const PAQPayloadConfig &PayloadConfig, - lgc::rt::RayTracingShaderStage ShaderKind); + PAQSerializationInfoBase &getOrCreateSerializationInfo(const PAQPayloadConfig &PayloadConfig, + lgc::rt::RayTracingShaderStage ShaderKind); // Check whether a serialization info for the given // payload type has already been computed (or imported from DXIL metadata). @@ -896,34 +853,29 @@ class PAQSerializationInfoManager { // Otherwise, compute a new serialization info with trivial qualifiers // (write+read everything). // Result is non-const to allow adding custom hitgroup layouts later on. - PAQTraceRaySerializationInfo & - getOrCreateTraceRaySerializationInfo(const PAQPayloadConfig &PAQConfig); + PAQTraceRaySerializationInfo &getOrCreateTraceRaySerializationInfo(const PAQPayloadConfig &PAQConfig); // Same as above, but for CallShader. - PAQCallShaderSerializationInfo & - getOrCreateCallShaderSerializationInfo(const PAQPayloadConfig &PAQConfig); + PAQCallShaderSerializationInfo &getOrCreateCallShaderSerializationInfo(const PAQPayloadConfig &PAQConfig); // For ClosestHit and AnyHitOutAcceptHit layouts, the layout depends on the // actually used hit attribute type. In this case, the HitAttributesTy // argument must be non-null. In all other cases, it is ignored. - const PAQSerializationLayout & - getOrCreateTraceRayLayout(PAQTraceRaySerializationInfo &TraceRayInfo, - PAQSerializationLayoutKind LayoutKind, - Type *HitAttributesTy = nullptr); + const PAQSerializationLayout &getOrCreateTraceRayLayout(PAQTraceRaySerializationInfo &TraceRayInfo, + PAQSerializationLayoutKind LayoutKind, + Type *HitAttributesTy = nullptr); // Convenience wrapper that selects the layout to be used for the payload // incoming to a shader on shader entry. - const PAQSerializationLayout &getOrCreateShaderStartSerializationLayout( - PAQSerializationInfoBase &SerializationInfo, - lgc::rt::RayTracingShaderStage ShaderKind, - Type *HitAttributesTy = nullptr); + const PAQSerializationLayout &getOrCreateShaderStartSerializationLayout(PAQSerializationInfoBase &SerializationInfo, + lgc::rt::RayTracingShaderStage ShaderKind, + Type *HitAttributesTy = nullptr); // Convenience wrapper that selects the layout to be used for the payload // outgoing of a shader on shader exit. - const PAQSerializationLayout &getOrCreateShaderExitSerializationLayout( - PAQSerializationInfoBase &SerializationInfo, - lgc::rt::RayTracingShaderStage ShaderKind, - Type *HitAttributesTy = nullptr, - AnyHitExitKind AHExitKind = AnyHitExitKind::None); + const PAQSerializationLayout & + getOrCreateShaderExitSerializationLayout(PAQSerializationInfoBase &SerializationInfo, + lgc::rt::RayTracingShaderStage ShaderKind, Type *HitAttributesTy = nullptr, + AnyHitExitKind AHExitKind = AnyHitExitKind::None); enum class MaxPayloadStorageConsideration : uint8_t { ConsiderOnlyTraceRay, @@ -937,19 +889,14 @@ class PAQSerializationInfoManager { // TraceRay, this takes the maximum over all serialization formats. uint32_t getMaxPayloadStorageI32s( const PAQPayloadConfig &PAQConfig, - MaxPayloadStorageConsideration Consideration = - MaxPayloadStorageConsideration::ConsiderTraceRayAndCallShader); + MaxPayloadStorageConsideration Consideration = MaxPayloadStorageConsideration::ConsiderTraceRayAndCallShader); - uint32_t - getMaxPayloadStorageI32sForTraceRayFunc(const PAQPayloadConfig &PAQConfig) { - return getMaxPayloadStorageI32s( - PAQConfig, MaxPayloadStorageConsideration::ConsiderOnlyTraceRay); + uint32_t getMaxPayloadStorageI32sForTraceRayFunc(const PAQPayloadConfig &PAQConfig) { + return getMaxPayloadStorageI32s(PAQConfig, MaxPayloadStorageConsideration::ConsiderOnlyTraceRay); } - uint32_t - getMaxPayloadStorageI32sForCallShaderFunc(const PAQPayloadConfig &PAQConfig) { - return getMaxPayloadStorageI32s( - PAQConfig, MaxPayloadStorageConsideration::ConsiderOnlyCallShader); + uint32_t getMaxPayloadStorageI32sForCallShaderFunc(const PAQPayloadConfig &PAQConfig) { + return getMaxPayloadStorageI32s(PAQConfig, MaxPayloadStorageConsideration::ConsiderOnlyCallShader); } private: @@ -964,12 +911,10 @@ class PAQSerializationInfoManager { // all CallShader payload types, we construct trivial (i.e. always read and // write everything) PAQNodes on demand. MapVector> PAQRootNodes; - MapVector> - SerializationInfos; + MapVector> SerializationInfos; - SerializationInfoT & - getOrCreateSerializationInfo(Module &M, uint32_t MaxPayloadRegisterCount, - const PAQPayloadConfig &PAQConfig); + SerializationInfoT &getOrCreateSerializationInfo(Module &M, uint32_t MaxPayloadRegisterCount, + const PAQPayloadConfig &PAQConfig); }; PAQCache TraceRayCache; diff --git a/llvmraytracing/lib/CleanupContinuations.cpp b/llvmraytracing/lib/CleanupContinuations.cpp index 34d7414674..0f604c3310 100644 --- a/llvmraytracing/lib/CleanupContinuations.cpp +++ b/llvmraytracing/lib/CleanupContinuations.cpp @@ -12,8 +12,8 @@ * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * - * The above copyright notice and this permission notice shall be included in - *all copies or substantial portions of the Software. + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, @@ -57,12 +57,12 @@ //===----------------------------------------------------------------------===// #include "compilerutils/CompilerUtils.h" -#include "lgc/LgcCpsDialect.h" -#include "lgc/LgcIlCpsDialect.h" -#include "llvm-dialects/Dialect/Visitor.h" #include "llvmraytracing/Continuations.h" #include "llvmraytracing/ContinuationsUtil.h" #include "llvmraytracing/GpurtContext.h" +#include "lgc/LgcCpsDialect.h" +#include "lgc/LgcIlCpsDialect.h" +#include "llvm-dialects/Dialect/Visitor.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" @@ -81,8 +81,7 @@ using namespace lgc; /// Returns a map (origin BB, (call that created the continuation token, resume /// function)). static DenseMap> -findTokenOrigin(BasicBlock *BB, Value *V, - SmallVectorImpl &ToRemove) { +findTokenOrigin(BasicBlock *BB, Value *V, SmallVectorImpl &ToRemove) { DenseMap> Result; Value *Call = nullptr; Value *ResumeFun = nullptr; @@ -122,23 +121,17 @@ findTokenOrigin(BasicBlock *BB, Value *V, ToRemove.push_back(CallPhi); ToRemove.push_back(ResumeFunPhi); - for (auto CallEntry : - llvm::zip(CallPhi->blocks(), CallPhi->incoming_values())) { + for (auto CallEntry : llvm::zip(CallPhi->blocks(), CallPhi->incoming_values())) { auto *PhiBB = std::get<0>(CallEntry); auto *ResumeFunEntry = ResumeFunPhi->getIncomingValueForBlock(PhiBB); assert(ResumeFunEntry && "Need a resume fun for each call"); - assert(isa(ResumeFunEntry) && - "Resume function should be a constant function"); - - assert(isa(std::get<1>(CallEntry)) && - "Phi should come from a call"); - Result.insert(std::make_pair( - PhiBB, std::make_pair(cast(std::get<1>(CallEntry)), - ResumeFunEntry))); + assert(isa(ResumeFunEntry) && "Resume function should be a constant function"); + + assert(isa(std::get<1>(CallEntry)) && "Phi should come from a call"); + Result.insert(std::make_pair(PhiBB, std::make_pair(cast(std::get<1>(CallEntry)), ResumeFunEntry))); } } else { - assert(isa(ResumeFun) && - "Resume function should be a constant function"); + assert(isa(ResumeFun) && "Resume function should be a constant function"); assert(isa(Call) && "Call should be a CallInst"); auto *CallI = cast(Call); Result.insert(std::make_pair(BB, std::make_pair(CallI, ResumeFun))); @@ -174,27 +167,21 @@ void CleanupContinuationsPass::analyzeContinuation(Function &F, MDNode *MD) { // Without malloc call, we check later if the continuation state is used if (Data.MallocCall) { - Data.ContStateBytes = - cast(Data.MallocCall->getArgOperand(0))->getSExtValue(); + Data.ContStateBytes = cast(Data.MallocCall->getArgOperand(0))->getSExtValue(); } if (Data.ContStateBytes > MaxContStateBytes) MaxContStateBytes = Data.ContStateBytes; } -void CleanupContinuationsPass::updateCpsStack(Function *F, Function *NewFunc, - bool IsStart, - ContinuationData &CpsInfo) { +void CleanupContinuationsPass::updateCpsStack(Function *F, Function *NewFunc, bool IsStart, ContinuationData &CpsInfo) { - Builder->SetInsertPoint( - &*NewFunc->getEntryBlock().getFirstNonPHIOrDbgOrAlloca()); + Builder->SetInsertPoint(&*NewFunc->getEntryBlock().getFirstNonPHIOrDbgOrAlloca()); Value *CpsStack = nullptr; if (IsStart) { - CpsStack = Builder->create( - Builder->getInt32(CpsInfo.ContStateBytes)); + CpsStack = Builder->create(Builder->getInt32(CpsInfo.ContStateBytes)); CpsStack->setName("cont.state.stack.segment"); } else { - CpsStack = - Builder->create(Builder->getInt32(CpsInfo.ContStateBytes)); + CpsStack = Builder->create(Builder->getInt32(CpsInfo.ContStateBytes)); } SmallVector ToBeRemoved; @@ -205,11 +192,9 @@ void CleanupContinuationsPass::updateCpsStack(Function *F, Function *NewFunc, I->eraseFromParent(); } -static void updateCpsFunctionArgs(Function *OldFunc, Function *NewFunc, - const SmallVector &AllArgValues) { +static void updateCpsFunctionArgs(Function *OldFunc, Function *NewFunc, const SmallVector &AllArgValues) { // Set arg names for new function - for (unsigned Idx = 0; Idx != NewFunc->getFunctionType()->params().size(); - ++Idx) { + for (unsigned Idx = 0; Idx != NewFunc->getFunctionType()->params().size(); ++Idx) { Argument *Arg = NewFunc->getArg(Idx); Value *OldVal = AllArgValues[Idx]; if (OldVal) { @@ -219,10 +204,8 @@ static void updateCpsFunctionArgs(Function *OldFunc, Function *NewFunc, } } -static void buildCpsArgInfos(Function *F, bool IsStart, - SmallVector &AllArgTypes, - SmallVector &AllArgValues, - SmallVector &ParamAttrs, +static void buildCpsArgInfos(Function *F, bool IsStart, SmallVector &AllArgTypes, + SmallVector &AllArgValues, SmallVector &ParamAttrs, SmallVector &InstsToRemove) { auto &Context = F->getContext(); @@ -232,8 +215,7 @@ static void buildCpsArgInfos(Function *F, bool IsStart, assert(F->arg_size() >= 1 && "Entry function has at least one argument"); // Use all arguments except the last (pre-allocated buffer for the // coroutine passes) for the continuation start - for (auto Arg = F->arg_begin(), ArgEnd = F->arg_end() - 1; Arg != ArgEnd; - Arg++) { + for (auto Arg = F->arg_begin(), ArgEnd = F->arg_end() - 1; Arg != ArgEnd; Arg++) { AllArgTypes.push_back(Arg->getType()); AllArgValues.push_back(Arg); ParamAttrs.push_back(FAttrs.getParamAttrs(ArgNo)); @@ -263,9 +245,9 @@ static void buildCpsArgInfos(Function *F, bool IsStart, /// Find the continuation state pointer, either returned by the malloc or /// given as an argument -Value *CleanupContinuationsPass::getContinuationFramePtr( - Function *F, bool IsStart, const ContinuationData &ContinuationInfo, - SmallVector *InstsToRemove) { +Value *CleanupContinuationsPass::getContinuationFramePtr(Function *F, bool IsStart, + const ContinuationData &ContinuationInfo, + SmallVector *InstsToRemove) { if (!ContinuationInfo.MallocCall) return IsStart ? F->getArg(F->arg_size() - 1) : F->getArg(0); @@ -275,8 +257,7 @@ Value *CleanupContinuationsPass::getContinuationFramePtr( return ContinuationInfo.MallocCall; } // Look for the load of the allocated pointer - Instruction *Load = - cast(F->getArg(0)->getUniqueUndroppableUser()); + Instruction *Load = cast(F->getArg(0)->getUniqueUndroppableUser()); if (InstsToRemove) InstsToRemove->push_back(Load); // Load needs to be eliminated return Load; @@ -284,8 +265,7 @@ Value *CleanupContinuationsPass::getContinuationFramePtr( /// Remove call to continuation.free() in F, ContFree is the pointer to /// declaration of continuation.free(). -void CleanupContinuationsPass::removeContFreeCall(Function *F, - Function *ContFree) { +void CleanupContinuationsPass::removeContFreeCall(Function *F, Function *ContFree) { for (auto *User : make_early_inc_range(ContFree->users())) { if (auto *Call = dyn_cast(User)) { if (Call->getFunction() == F) { @@ -300,8 +280,7 @@ void CleanupContinuationsPass::removeContFreeCall(Function *F, /// Note: we skip the cps.free() insertion before calls to /// @lgc.ilcps.return. Because this is not useful any more as it means the /// thread termination. -void CleanupContinuationsPass::freeCpsStack(Function *F, - ContinuationData &CpsInfo) { +void CleanupContinuationsPass::freeCpsStack(Function *F, ContinuationData &CpsInfo) { struct VisitState { ContinuationData &CpsInfo; llvm_dialects::Builder *Builder; @@ -312,10 +291,9 @@ void CleanupContinuationsPass::freeCpsStack(Function *F, llvm_dialects::VisitorBuilder() .setStrategy(llvm_dialects::VisitorStrategy::ByFunctionDeclaration) .add([](auto &State, auto &Jump) { - if (Jump.getFunction() == State.F) { + if (Jump.getFunction() == State.F && State.CpsInfo.ContStateBytes) { State.Builder->SetInsertPoint(&Jump); - State.Builder->template create( - State.Builder->getInt32(State.CpsInfo.ContStateBytes)); + State.Builder->template create(State.Builder->getInt32(State.CpsInfo.ContStateBytes)); } }) .build(); @@ -331,8 +309,7 @@ void CleanupContinuationsPass::processContinuations() { // start part. // 3. Edit resume signature to add the state/rcr/shader-indxe/returnvalues. for (auto &FuncData : ToProcess) { - LLVM_DEBUG(dbgs() << "Processing function: " << FuncData.first->getName() - << "\n"); + LLVM_DEBUG(dbgs() << "Processing function: " << FuncData.first->getName() << "\n"); for (auto *F : FuncData.second.Functions) { // Set same linkage as for start function if (F != FuncData.first) @@ -342,8 +319,7 @@ void CleanupContinuationsPass::processContinuations() { if (F->empty()) continue; - LLVM_DEBUG(dbgs() << "Processing function part: " << F->getName() - << "\n"); + LLVM_DEBUG(dbgs() << "Processing function part: " << F->getName() << "\n"); // If this is the continuation start bool IsStart = F == FuncData.first; @@ -358,8 +334,7 @@ void CleanupContinuationsPass::processContinuations() { SmallVector ParamAttrs; SmallVector InstsToRemove; - buildCpsArgInfos(F, IsStart, AllArgTypes, AllArgValues, ParamAttrs, - InstsToRemove); + buildCpsArgInfos(F, IsStart, AllArgTypes, AllArgValues, ParamAttrs, InstsToRemove); if (ContFree) removeContFreeCall(F, ContFree); @@ -368,10 +343,8 @@ void CleanupContinuationsPass::processContinuations() { if (FuncData.second.MD) F->eraseMetadata(FuncData.second.MD->getMetadataID()); auto &Context = F->getContext(); - auto *NewFuncTy = - FunctionType::get(Type::getVoidTy(Context), AllArgTypes, false); - Function *NewFunc = - CompilerUtils::cloneFunctionHeader(*F, NewFuncTy, ParamAttrs); + auto *NewFuncTy = FunctionType::get(Type::getVoidTy(Context), AllArgTypes, false); + Function *NewFunc = CompilerUtils::cloneFunctionHeader(*F, NewFuncTy, ParamAttrs); NewFunc->takeName(F); FuncData.second.NewFunctions.push_back(NewFunc); @@ -394,7 +367,7 @@ void CleanupContinuationsPass::processContinuations() { // We should only possibly have 'lgc.ilcps.return' or // 'lgc.cps.jump' call before unreachable. auto *Call = cast(--I->getIterator()); - if (auto *ContRet = dyn_cast(Call)) { + if (isa(Call)) { Builder->SetInsertPoint(Call); Builder->CreateRetVoid(); Call->eraseFromParent(); @@ -442,11 +415,9 @@ void CleanupContinuationsPass::processContinuations() { /// i32 %cr2, ...) /// /// Also handles cases where the token and resume function are behind a phi. -void CleanupContinuationsPass::handleContinue(ContinuationData &Data, - Instruction *Ret) { +void CleanupContinuationsPass::handleContinue(ContinuationData &Data, Instruction *Ret) { // Find the function call that generates the token - LLVM_DEBUG(dbgs() << "Converting ret to continue: " << *Ret - << "\nArgument: " << *Ret->getOperand(0) << "\n"); + LLVM_DEBUG(dbgs() << "Converting ret to continue: " << *Ret << "\nArgument: " << *Ret->getOperand(0) << "\n"); auto *BB = Ret->getParent(); SmallVector ToRemove; ToRemove.push_back(Ret); @@ -456,8 +427,7 @@ void CleanupContinuationsPass::handleContinue(ContinuationData &Data, I->eraseFromParent(); for (auto &Entry : Calls) { - LLVM_DEBUG(dbgs() << "Handling call: " << *Entry.second.first - << " with resume function " << Entry.second.second + LLVM_DEBUG(dbgs() << "Handling call: " << *Entry.second.first << " with resume function " << Entry.second.second << "\n"); auto *Call = Entry.second.first; auto *ResumeFun = Entry.second.second; @@ -465,15 +435,12 @@ void CleanupContinuationsPass::handleContinue(ContinuationData &Data, } if (BB->empty()) { - assert(BB->hasNPredecessorsOrMore(0) && - "Handled all continues but the block still has predecessors left"); + assert(BB->hasNPredecessorsOrMore(0) && "Handled all continues but the block still has predecessors left"); BB->eraseFromParent(); } } -void CleanupContinuationsPass::handleSingleContinue(ContinuationData &Data, - CallInst *Call, - Value *ResumeFun) { +void CleanupContinuationsPass::handleSingleContinue(ContinuationData &Data, CallInst *Call, Value *ResumeFun) { Builder->SetInsertPoint(Call); SmallVector TailArgs; @@ -487,8 +454,7 @@ void CleanupContinuationsPass::handleSingleContinue(ContinuationData &Data, ++SkipCount; } - auto *ResumeCR = Builder->create( - ContinuationReferenceType, ResumeFun); + auto *ResumeCR = Builder->create(ContinuationReferenceType, ResumeFun); TailArgs.push_back(ResumeCR); } else { @@ -498,8 +464,7 @@ void CleanupContinuationsPass::handleSingleContinue(ContinuationData &Data, // Skip continuation.reference, levels and potentially the wait mask. TailArgs.append(SmallVector(drop_begin(Call->args(), SkipCount))); auto *CR = Call->getArgOperand(0); - Value *Level = - Call->getArgOperand(ContHelper::isWaitAwaitCall(*Call) ? 2 : 1); + Value *Level = Call->getArgOperand(ContHelper::isWaitAwaitCall(*Call) ? 2 : 1); unsigned LevelImm = cast(Level)->getZExtValue(); // TODO: Continuation state are passed through stack for now. @@ -550,17 +515,14 @@ void CleanupContinuationsPass::lowerIntrinsicCall(Module &Mod) { // Signature of cps function: { state, rcr, shader-index, system-data} auto *SystemDataArg = Caller->getArg(CpsArgIdxSystemData); - assert(SystemDataArg->getType()->isStructTy() && - "SystemData should be struct type"); - auto *AllocaInsertPt = - &*Caller->getEntryBlock().getFirstNonPHIOrDbgOrAlloca(); + assert(SystemDataArg->getType()->isStructTy() && "SystemData should be struct type"); + auto *AllocaInsertPt = &*Caller->getEntryBlock().getFirstNonPHIOrDbgOrAlloca(); Builder->SetInsertPoint(AllocaInsertPt); auto *SystemData = Builder->CreateAlloca(SystemDataArg->getType()); Builder->CreateStore(SystemDataArg, SystemData); for (auto *Call : IntrinsicCalls) - replaceIntrinsicCall(*Builder, SystemDataArg->getType(), SystemData, - *Stage, Call, GpurtLibrary ? GpurtLibrary : &Mod, - CrossInliner); + replaceIntrinsicCall(*Builder, SystemDataArg->getType(), SystemData, *Stage, Call, + GpurtLibrary ? GpurtLibrary : &Mod, CrossInliner); } } @@ -576,8 +538,7 @@ void CleanupContinuationsPass::lowerGetResumePoint(Module &Mod) { auto JumpCall = findDominatedContinueCall(GetResumeCall); assert(JumpCall && "Should find a dominated call to lgc.cps.jump"); // For wait calls, skip the wait mask. - uint32_t SkipCount = - ContHelper::isWaitAwaitCall(*(JumpCall.value())) ? 1 : 0; + uint32_t SkipCount = ContHelper::isWaitAwaitCall(*(JumpCall.value())) ? 1 : 0; lgc::cps::JumpOp *Jump = cast(*JumpCall); Value *ResumeFn = *(Jump->getTail().begin() + SkipCount); @@ -592,13 +553,10 @@ void CleanupContinuationsPass::lowerGetResumePoint(Module &Mod) { } } -llvm::PreservedAnalyses -CleanupContinuationsPass::run(llvm::Module &Mod, - llvm::ModuleAnalysisManager &AnalysisManager) { +llvm::PreservedAnalyses CleanupContinuationsPass::run(llvm::Module &Mod, llvm::ModuleAnalysisManager &AnalysisManager) { LLVM_DEBUG(dbgs() << "Run the lgc-cleanup-continuations pass\n"); AnalysisManager.getResult(Mod); - auto &FAM = AnalysisManager.getResult(Mod) - .getManager(); + auto &FAM = AnalysisManager.getResult(Mod).getManager(); ToProcess.clear(); MaxContStateBytes = 0; diff --git a/llvmraytracing/lib/Continuations.cpp b/llvmraytracing/lib/Continuations.cpp index d314c9eb91..cc152e878d 100644 --- a/llvmraytracing/lib/Continuations.cpp +++ b/llvmraytracing/lib/Continuations.cpp @@ -10,8 +10,8 @@ * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * - * The above copyright notice and this permission notice shall be included in - *all copies or substantial portions of the Software. + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, @@ -31,14 +31,15 @@ #include "llvmraytracing/Continuations.h" #include "compilerutils/CompilerUtils.h" +#include "llvmraytracing/ContinuationsUtil.h" +#include "llvmraytracing/GpurtContext.h" #include "lgc/LgcCpsDialect.h" #include "lgc/LgcIlCpsDialect.h" #include "lgc/LgcRtDialect.h" #include "llvm-dialects/Dialect/Builder.h" #include "llvm-dialects/Dialect/Dialect.h" #include "llvm-dialects/Dialect/OpSet.h" -#include "llvmraytracing/ContinuationsUtil.h" -#include "llvmraytracing/GpurtContext.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/IntervalTree.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallBitVector.h" @@ -66,11 +67,9 @@ using namespace llvm; -#define GPURTMAP_ENTRY(Op, GpurtName, AccessesHitData) \ - { \ - llvm_dialects::OpDescription::get(), { \ - GpurtName, AccessesHitData \ - } \ +#define GPURTMAP_ENTRY(Op, GpurtName, AccessesHitData) \ + { \ + llvm_dialects::OpDescription::get(), { GpurtName, AccessesHitData } \ } const llvm_dialects::OpMap llvm::LgcRtGpuRtMap = {{ @@ -104,9 +103,7 @@ const llvm_dialects::OpMap llvm::LgcRtGpuRtMap = {{ void llvm::replaceCallsToFunction(Function &F, Value &Replacement) { llvm::forEachCall(F, [&](CallInst &CInst) { // Basic sanity check. We should also check for dominance. - assert((!isa(&Replacement) || - cast(&Replacement)->getFunction() == - CInst.getFunction()) && + assert((!isa(&Replacement) || cast(&Replacement)->getFunction() == CInst.getFunction()) && "llvm::replaceCallsToFunction: Replacement should " "reside in the same function as CallInst to replace!"); CInst.replaceAllUsesWith(&Replacement); @@ -115,7 +112,7 @@ void llvm::replaceCallsToFunction(Function &F, Value &Replacement) { } bool llvm::isLgcRtOp(const llvm::Function *F) { - return F && F->getName().starts_with("lgc.rt"); + return F && F->getName().starts_with("lgc.rt."); } void llvm::moveFunctionBody(Function &OldFunc, Function &NewFunc) { @@ -126,8 +123,7 @@ void llvm::moveFunctionBody(Function &OldFunc, Function &NewFunc) { } } -std::optional -llvm::findIntrImplEntryByIntrinsicCall(CallInst *Call) { +std::optional llvm::findIntrImplEntryByIntrinsicCall(CallInst *Call) { if (!isLgcRtOp(Call->getCalledFunction())) return std::nullopt; @@ -143,8 +139,7 @@ bool llvm::removeUnusedFunctionDecls(Module *Mod, bool OnlyIntrinsics) { for (Function &F : make_early_inc_range(*Mod)) { if (F.isDeclaration() && F.user_empty()) { - if (!OnlyIntrinsics || - (isLgcRtOp(&F) || F.getName().starts_with("dx.op."))) { + if (!OnlyIntrinsics || (isLgcRtOp(&F) || F.getName().starts_with("dx.op."))) { F.eraseFromParent(); DidChange = true; } @@ -154,8 +149,7 @@ bool llvm::removeUnusedFunctionDecls(Module *Mod, bool OnlyIntrinsics) { return DidChange; } -bool ContHelper::isRematerializableLgcRtOp( - CallInst &CInst, std::optional Kind) { +bool ContHelper::isRematerializableLgcRtOp(CallInst &CInst, std::optional Kind) { using namespace lgc::rt; Function *Callee = CInst.getCalledFunction(); if (!llvm::isLgcRtOp(Callee)) @@ -163,8 +157,7 @@ bool ContHelper::isRematerializableLgcRtOp( // Always rematerialize static const llvm_dialects::OpSet RematerializableDialectOps = - llvm_dialects::OpSet::get(); + llvm_dialects::OpSet::get(); if (RematerializableDialectOps.contains(*Callee)) return true; @@ -175,11 +168,9 @@ bool ContHelper::isRematerializableLgcRtOp( // because ReportHit calls can change that. if (!Kind || *Kind == RayTracingShaderStage::Intersection) { static const llvm_dialects::OpSet RematerializableIntersectionDialectOps = - llvm_dialects::OpSet::get< - InstanceIdOp, InstanceIndexOp, GeometryIndexOp, - ObjectRayDirectionOp, ObjectRayOriginOp, ObjectToWorldOp, - PrimitiveIndexOp, RayFlagsOp, RayTminOp, WorldRayDirectionOp, - WorldRayOriginOp, WorldToObjectOp, InstanceInclusionMaskOp>(); + llvm_dialects::OpSet::get(); if (RematerializableIntersectionDialectOps.contains(*Callee)) return true; } @@ -187,8 +178,7 @@ bool ContHelper::isRematerializableLgcRtOp( return false; } -Type *ContHelper::getPaddingType(const DataLayout &DL, LLVMContext &Context, - ArrayRef Types, +Type *ContHelper::getPaddingType(const DataLayout &DL, LLVMContext &Context, ArrayRef Types, unsigned TargetNumDwords) { unsigned DwordsOccupied = lgc::cps::getArgumentDwordCount(DL, Types); @@ -200,25 +190,21 @@ Type *ContHelper::getPaddingType(const DataLayout &DL, LLVMContext &Context, return StructType::get(Context); } -void ContHelper::addPaddingType(const DataLayout &DL, LLVMContext &Context, - SmallVectorImpl &Types, +void ContHelper::addPaddingType(const DataLayout &DL, LLVMContext &Context, SmallVectorImpl &Types, unsigned TargetNumDwords) { Types.push_back(getPaddingType(DL, Context, Types, TargetNumDwords)); } -void ContHelper::addPaddingValue(const DataLayout &DL, LLVMContext &Context, - SmallVectorImpl &Values, +void ContHelper::addPaddingValue(const DataLayout &DL, LLVMContext &Context, SmallVectorImpl &Values, unsigned TargetNumDwords) { SmallVector Types; for (auto Value : Values) Types.push_back(Value->getType()); - Values.push_back( - PoisonValue::get(getPaddingType(DL, Context, Types, TargetNumDwords))); + Values.push_back(PoisonValue::get(getPaddingType(DL, Context, Types, TargetNumDwords))); } -bool ContHelper::getGpurtVersionFlag(Module &GpurtModule, - GpuRtVersionFlag Flag) { +bool ContHelper::getGpurtVersionFlag(Module &GpurtModule, GpuRtVersionFlag Flag) { auto *F = GpurtModule.getFunction(ContDriverFunc::GpurtVersionFlagsName); if (!F) { // If the GpuRt version flags intrinsic is not found, treat flags as set, @@ -233,8 +219,7 @@ bool ContHelper::getGpurtVersionFlag(Module &GpurtModule, return (Flags & static_cast(Flag)) != 0; } -void llvm::forwardContinuationFrameStoreToLoad(DominatorTree &DT, - Value *FramePtr) { +void llvm::forwardContinuationFrameStoreToLoad(DominatorTree &DT, Value *FramePtr) { assert(FramePtr); DenseMap> OffsetLoadMap; @@ -250,8 +235,7 @@ void llvm::forwardContinuationFrameStoreToLoad(DominatorTree &DT, // we introduce a sorted array to help detecting if there is conflicting // store within the range (load_begin, load_end). struct OffsetStorePair { - OffsetStorePair(int64_t Offset, StoreInst *Store) - : Offset(Offset), Store(Store) {} + OffsetStorePair(int64_t Offset, StoreInst *Store) : Offset(Offset), Store(Store) {} int64_t Offset; StoreInst *Store; }; @@ -277,15 +261,13 @@ void llvm::forwardContinuationFrameStoreToLoad(DominatorTree &DT, const DataLayout &DL = Gep->getModule()->getDataLayout(); unsigned OffsetBitWidth = DL.getIndexSizeInBits(Gep->getAddressSpace()); APInt Offset(OffsetBitWidth, 0); - bool ConstantOffset = Gep->accumulateConstantOffset( - Gep->getModule()->getDataLayout(), Offset); + bool ConstantOffset = Gep->accumulateConstantOffset(Gep->getModule()->getDataLayout(), Offset); // Give up on dynamic indexes for simplicity. if (!ConstantOffset) return; for (auto &UU : Gep->uses()) - Worklist.push_back( - PointerUse(&UU, Offset.getSExtValue() + PtrUse.Offset)); + Worklist.push_back(PointerUse(&UU, Offset.getSExtValue() + PtrUse.Offset)); break; } case Instruction::Load: { @@ -303,12 +285,10 @@ void llvm::forwardContinuationFrameStoreToLoad(DominatorTree &DT, assert(Store->getPointerOperand() == PtrUse.Ptr->get()); const DataLayout &DL = Store->getModule()->getDataLayout(); - unsigned StoredBytes = - DL.getTypeStoreSize(Store->getValueOperand()->getType()); + unsigned StoredBytes = DL.getTypeStoreSize(Store->getValueOperand()->getType()); SortedStores.push_back(OffsetStorePair(PtrUse.Offset, Store)); - StoreIntervals.insert(PtrUse.Offset, PtrUse.Offset + StoredBytes - 1, - Store); + StoreIntervals.insert(PtrUse.Offset, PtrUse.Offset + StoredBytes - 1, Store); break; } case Instruction::BitCast: @@ -326,17 +306,14 @@ void llvm::forwardContinuationFrameStoreToLoad(DominatorTree &DT, } LLVM_FALLTHROUGH; default: - LLVM_DEBUG(dbgs() << "Unhandled user of continuation frame pointer: " - << *U << '\n'); + LLVM_DEBUG(dbgs() << "Unhandled user of continuation frame pointer: " << *U << '\n'); return; } } StoreIntervals.create(); llvm::sort(SortedStores, - [](const OffsetStorePair &Left, const OffsetStorePair &Right) { - return Left.Offset < Right.Offset; - }); + [](const OffsetStorePair &Left, const OffsetStorePair &Right) { return Left.Offset < Right.Offset; }); // Nothing to do if there is no store. if (StoreIntervals.empty()) @@ -358,26 +335,21 @@ void llvm::forwardContinuationFrameStoreToLoad(DominatorTree &DT, for (auto *Load : Loads) { const DataLayout &DL = Load->getModule()->getDataLayout(); unsigned LoadBytes = DL.getTypeStoreSize(Load->getType()); - auto IntersectionsRight = - StoreIntervals.getContaining(Offset + LoadBytes - 1); + auto IntersectionsRight = StoreIntervals.getContaining(Offset + LoadBytes - 1); assert(!IntersectionsRight.empty()); // Make sure the store we found fully covers the loaded range and is the // only one. - if (IntersectionsRight.size() != 1 || - IntersectionsRight.front()->value() != StoreInfo.value()) + if (IntersectionsRight.size() != 1 || IntersectionsRight.front()->value() != StoreInfo.value()) continue; StoreInst *Store = StoreInfo.value(); // Get the first iterator pointing to a value that is strictly greater // than Offset. - auto *MaybeConflict = llvm::upper_bound( - SortedStores, Offset, [](int64_t V, const OffsetStorePair &Elem) { - return V < Elem.Offset; - }); + auto *MaybeConflict = llvm::upper_bound(SortedStores, Offset, + [](int64_t V, const OffsetStorePair &Elem) { return V < Elem.Offset; }); // Abort if there is another store which write to the memory region // strictly within the loaded region. - if (MaybeConflict != SortedStores.end() && - MaybeConflict->Offset < StoreInfo.right()) + if (MaybeConflict != SortedStores.end() && MaybeConflict->Offset < StoreInfo.right()) continue; // Currently we only forward if the value types are the same. This can @@ -441,48 +413,46 @@ static const char *toString(DXILShaderKind ShaderKind) { report_fatal_error("unexpected shader kind"); } -llvm::raw_ostream &llvm::operator<<(llvm::raw_ostream &Str, - DXILShaderKind ShaderKind) { +llvm::raw_ostream &llvm::operator<<(llvm::raw_ostream &Str, DXILShaderKind ShaderKind) { Str << ::toString(ShaderKind); return Str; } +llvm::raw_ostream &llvm::operator<<(llvm::raw_ostream &Str, lgc::rt::RayTracingShaderStage Stage) { + Str << ::toString(ShaderStageHelper::rtShaderStageToDxilShaderKind(Stage)); + return Str; +} + void ContHelper::RegisterPasses(PassBuilder &PB, bool NeedDialectContext) { -#define HANDLE_PASS(NAME, CREATE_PASS) \ - if (innerPipeline.empty() && name == NAME) { \ - passMgr.addPass(CREATE_PASS); \ - return true; \ +#define HANDLE_PASS(NAME, CREATE_PASS) \ + if (innerPipeline.empty() && name == NAME) { \ + passMgr.addPass(CREATE_PASS); \ + return true; \ } -#define HANDLE_ANALYSIS(NAME, CREATE_PASS, IRUNIT) \ - if (innerPipeline.empty() && name == "require<" NAME ">") { \ - passMgr.addPass( \ - RequireAnalysisPass, \ - IRUNIT>()); \ - return true; \ - } \ - if (innerPipeline.empty() && name == "invalidate<" NAME ">") { \ - passMgr.addPass(InvalidateAnalysisPass< \ - std::remove_reference_t>()); \ - return true; \ +#define HANDLE_ANALYSIS(NAME, CREATE_PASS, IRUNIT) \ + if (innerPipeline.empty() && name == "require<" NAME ">") { \ + passMgr.addPass(RequireAnalysisPass, IRUNIT>()); \ + return true; \ + } \ + if (innerPipeline.empty() && name == "invalidate<" NAME ">") { \ + passMgr.addPass(InvalidateAnalysisPass>()); \ + return true; \ } PB.registerPipelineParsingCallback( - [](StringRef name, ModulePassManager &passMgr, - ArrayRef innerPipeline) { + [](StringRef name, ModulePassManager &passMgr, ArrayRef innerPipeline) { StringRef Params; (void)Params; #define CONT_MODULE_PASS HANDLE_PASS -#define CONT_MODULE_ANALYSIS(NAME, CREATE_PASS) \ - HANDLE_ANALYSIS(NAME, CREATE_PASS, Module) +#define CONT_MODULE_ANALYSIS(NAME, CREATE_PASS) HANDLE_ANALYSIS(NAME, CREATE_PASS, Module) #include "PassRegistry.inc" return false; }); PB.registerPipelineParsingCallback( - [](StringRef Name, FunctionPassManager &PassMgr, - ArrayRef InnerPipeline) { + [](StringRef Name, FunctionPassManager &PassMgr, ArrayRef InnerPipeline) { StringRef Params; (void)Params; #define CONT_FUNCTION_PASS HANDLE_PASS @@ -492,8 +462,7 @@ void ContHelper::RegisterPasses(PassBuilder &PB, bool NeedDialectContext) { }); PB.registerPipelineParsingCallback( - [](StringRef Name, LoopPassManager &PassMgr, - ArrayRef InnerPipeline) { + [](StringRef Name, LoopPassManager &PassMgr, ArrayRef InnerPipeline) { StringRef Params; (void)Params; #define CONT_LOOP_PASS HANDLE_PASS @@ -503,14 +472,13 @@ void ContHelper::RegisterPasses(PassBuilder &PB, bool NeedDialectContext) { }); PB.registerPipelineParsingCallback( - [](StringRef name, ModulePassManager &passMgr, - ArrayRef innerPipeline) { + [](StringRef name, ModulePassManager &passMgr, ArrayRef innerPipeline) { StringRef Params; (void)Params; -#define CONT_CGSCC_PASS(NAME, CREATE_PASS) \ - if (innerPipeline.empty() && name == NAME) { \ - passMgr.addPass(createModuleToPostOrderCGSCCPassAdaptor(CREATE_PASS)); \ - return true; \ +#define CONT_CGSCC_PASS(NAME, CREATE_PASS) \ + if (innerPipeline.empty() && name == NAME) { \ + passMgr.addPass(createModuleToPostOrderCGSCCPassAdaptor(CREATE_PASS)); \ + return true; \ } #include "PassRegistry.inc" return false; @@ -519,19 +487,15 @@ void ContHelper::RegisterPasses(PassBuilder &PB, bool NeedDialectContext) { #undef HANDLE_ANALYSIS #undef HANDLE_PASS - PB.registerAnalysisRegistrationCallback( - [=](ModuleAnalysisManager &AnalysisManager) { -#define CONT_MODULE_ANALYSIS(NAME, CREATE_PASS) \ - AnalysisManager.registerPass([&] { return CREATE_PASS; }); + PB.registerAnalysisRegistrationCallback([=](ModuleAnalysisManager &AnalysisManager) { +#define CONT_MODULE_ANALYSIS(NAME, CREATE_PASS) AnalysisManager.registerPass([&] { return CREATE_PASS; }); #include "PassRegistry.inc" - }); + }); auto *PIC = PB.getPassInstrumentationCallbacks(); if (PIC) { -#define CONT_PASS(NAME, CREATE_PASS) \ - PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); -#define CONT_MODULE_ANALYSIS(NAME, CREATE_PASS) \ - PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); +#define CONT_PASS(NAME, CREATE_PASS) PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); +#define CONT_MODULE_ANALYSIS(NAME, CREATE_PASS) PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #include "PassRegistry.inc" } } @@ -544,8 +508,7 @@ void ContHelper::addContinuationPasses(ModulePassManager &MPM) { // Convert the system data struct to a value, so it isn't stored in the // continuation state - MPM.addPass(createModuleToFunctionPassAdaptor( - SROAPass(llvm::SROAOptions::ModifyCFG))); + MPM.addPass(createModuleToFunctionPassAdaptor(SROAPass(llvm::SROAOptions::ModifyCFG))); MPM.addPass(LowerAwaitPass()); MPM.addPass(CoroEarlyPass()); @@ -554,9 +517,13 @@ void ContHelper::addContinuationPasses(ModulePassManager &MPM) { MPM.addPass(CoroCleanupPass()); MPM.addPass(LegacyCleanupContinuationsPass()); - MPM.addPass(RegisterBufferPass()); + MPM.addPass(ContinuationsStatsReportPass()); MPM.addPass(DXILContPostProcessPass()); +#ifndef NDEBUG + MPM.addPass(ContinuationsLintPass()); +#endif + // The FixIrreducible pass does not cope with switch instructions, so lower // them before. MPM.addPass(createModuleToFunctionPassAdaptor(LowerSwitchPass())); @@ -571,8 +538,7 @@ void ContHelper::addContinuationPasses(ModulePassManager &MPM) { MPM.addPass(createModuleToFunctionPassAdaptor(FixIrreduciblePass())); } -void ContHelper::addDxilContinuationPasses(ModulePassManager &MPM, - Module *GpurtLibrary) { +void ContHelper::addDxilContinuationPasses(ModulePassManager &MPM, Module *GpurtLibrary) { if (GpurtLibrary) { // Set up GpurtContext so that later passes can access the library via it. auto &GpurtContext = lgc::GpurtContext::get(GpurtLibrary->getContext()); @@ -613,37 +579,18 @@ void ContHelper::addDxilGpurtLibraryPasses(ModulePassManager &MPM) { AnalysisKey DialectContextAnalysis::Key; -DialectContextAnalysis::DialectContextAnalysis(bool NeedDialectContext) - : NeedDialectContext(NeedDialectContext) {} +DialectContextAnalysis::DialectContextAnalysis(bool NeedDialectContext) : NeedDialectContext(NeedDialectContext) { +} -DialectContextAnalysis::Result -DialectContextAnalysis::run(llvm::Module &M, - llvm::ModuleAnalysisManager &AnalysisManager) { +DialectContextAnalysis::Result DialectContextAnalysis::run(llvm::Module &M, + llvm::ModuleAnalysisManager &AnalysisManager) { if (NeedDialectContext) { - Context = llvm_dialects::DialectContext::make( - M.getContext()); + Context = llvm_dialects::DialectContext::make(M.getContext()); } return DialectContextAnalysis::Result(); } -Function *llvm::cloneFunctionHeaderWithTypes(Function &F, ContFuncTy &NewType, - ArrayRef ArgAttrs) { - FunctionType *FuncTy = NewType.asFunctionType(F.getContext()); - Function *NewFunc = CompilerUtils::cloneFunctionHeader(F, FuncTy, ArgAttrs); - NewType.writeMetadata(NewFunc); - return NewFunc; -} - -Function *llvm::cloneFunctionHeaderWithTypes(Function &F, ContFuncTy &NewType, - AttributeList FnAttr) { - FunctionType *FuncTy = NewType.asFunctionType(F.getContext()); - Function *NewFunc = CompilerUtils::cloneFunctionHeader(F, FuncTy, FnAttr); - NewType.writeMetadata(NewFunc); - return NewFunc; -} - static bool stripMDCasts(MDTuple *MDTup) { bool Changed = false; for (unsigned I = 0; I < MDTup->getNumOperands(); I++) { @@ -660,8 +607,7 @@ static bool stripMDCasts(MDTuple *MDTup) { if (Const != Val->getValue()) { auto *NewMD = ConstantAsMetadata::get(Const); - LLVM_DEBUG(dbgs() << "Replace " << *Val->getValue() - << " in metadata with " << *NewMD << "\n"); + LLVM_DEBUG(dbgs() << "Replace " << *Val->getValue() << " in metadata with " << *NewMD << "\n"); MDTup->replaceOperandWith(I, NewMD); Changed = true; } @@ -699,8 +645,7 @@ bool llvm::fixupDxilMetadata(Module &M) { } Function *llvm::getContinuationStackGlobalMemBase(Module &M) { - auto *F = - M.getFunction(ContDriverFunc::GetContinuationStackGlobalMemBaseName); + auto *F = M.getFunction(ContDriverFunc::GetContinuationStackGlobalMemBaseName); assert(F && "Could not find GetContinuationStackGlobalMemBase function"); assert(F->arg_size() == 0 && F->getReturnType()->isIntegerTy(64)); return F; @@ -708,8 +653,7 @@ Function *llvm::getContinuationStackGlobalMemBase(Module &M) { bool llvm::isCastGlobal(GlobalValue *Global, Value *V) { while (auto *Expr = dyn_cast_or_null(V)) { - if (Expr->getOpcode() == Instruction::BitCast || - Expr->getOpcode() == Instruction::AddrSpaceCast) { + if (Expr->getOpcode() == Instruction::BitCast || Expr->getOpcode() == Instruction::AddrSpaceCast) { V = Expr->getOperand(0); } else { break; @@ -720,63 +664,15 @@ bool llvm::isCastGlobal(GlobalValue *Global, Value *V) { uint64_t llvm::getInlineHitAttrsBytes(Module &M) { const DataLayout &DL = M.getDataLayout(); - auto *GetTriangleHitAttributes = - M.getFunction(ContDriverFunc::GetTriangleHitAttributesName); - assert(GetTriangleHitAttributes && - "Could not find GetTriangleHitAttributes function"); + auto *GetTriangleHitAttributes = M.getFunction(ContDriverFunc::GetTriangleHitAttributesName); + assert(GetTriangleHitAttributes && "Could not find GetTriangleHitAttributes function"); auto *InlineHitAttrsTy = GetTriangleHitAttributes->getReturnType(); - uint64_t InlineHitAttrsBytes = - DL.getTypeStoreSize(InlineHitAttrsTy).getFixedValue(); - assert( - (InlineHitAttrsBytes % RegisterBytes) == 0 && - "Size of inline hit attributes must be a multiple of the register size"); + uint64_t InlineHitAttrsBytes = DL.getTypeStoreSize(InlineHitAttrsTy).getFixedValue(); + assert((InlineHitAttrsBytes % RegisterBytes) == 0 && + "Size of inline hit attributes must be a multiple of the register size"); return InlineHitAttrsBytes; } -Function *llvm::getRegisterBufferSetPointerBarrier(Module &M) { - const char *Name = "registerbuffer.setpointerbarrier"; - if (auto *F = M.getFunction(Name)) - return F; - auto &C = M.getContext(); - auto *Void = Type::getVoidTy(C); - auto *FuncTy = FunctionType::get(Void, {}, true); - AttributeList AL = AttributeList::get( - C, AttributeList::FunctionIndex, - {Attribute::NoFree, Attribute::NoRecurse, Attribute::NoSync, - Attribute::NoUnwind, Attribute::WillReturn}); - auto *Func = - cast(M.getOrInsertFunction(Name, FuncTy, AL).getCallee()); - Func->setOnlyAccessesArgMemory(); - Func->setOnlyWritesMemory(); - return Func; -} - -MDTuple *llvm::createRegisterBufferMetadata(LLVMContext &Context, - const RegisterBufferMD &MD) { - // Metadata format: {i32 registersize, i32 addrspace} - auto *I32 = Type::getInt32Ty(Context); - return MDTuple::get( - Context, - {ConstantAsMetadata::get(ConstantInt::get(I32, MD.RegisterCount)), - ConstantAsMetadata::get(ConstantInt::get(I32, MD.Addrspace))}); -} - -RegisterBufferMD llvm::getRegisterBufferMetadata(const MDNode *MD) { - const auto *TMD = dyn_cast(MD); - assert(TMD && TMD->getNumOperands() == 2 && - "registerbuffer metadata must be of format { i32, i32 }"); - const auto *IMD = mdconst::dyn_extract(TMD->getOperand(0)); - assert(IMD && IMD->getBitWidth() == 32 && - "first registerbuffer metadata must be an i32"); - RegisterBufferMD Data; - Data.RegisterCount = IMD->getZExtValue(); - IMD = mdconst::dyn_extract(TMD->getOperand(1)); - assert(IMD && IMD->getBitWidth() == 32 && - "second registerbuffer metadata must be an i32"); - Data.Addrspace = IMD->getZExtValue(); - return Data; -} - Function *llvm::getAccelStructAddr(Module &M, Type *HandleTy) { auto *Name = "amd.dx.getAccelStructAddr"; if (auto *F = M.getFunction(Name)) @@ -784,30 +680,15 @@ Function *llvm::getAccelStructAddr(Module &M, Type *HandleTy) { auto &C = M.getContext(); auto *I64 = Type::getInt64Ty(C); auto *FuncTy = FunctionType::get(I64, {HandleTy}, false); - AttributeList AL = AttributeList::get( - C, AttributeList::FunctionIndex, - {Attribute::NoFree, Attribute::NoRecurse, Attribute::NoSync, - Attribute::NoUnwind, Attribute::Speculatable, Attribute::WillReturn}); - auto *Func = - cast(M.getOrInsertFunction(Name, FuncTy, AL).getCallee()); + AttributeList AL = AttributeList::get(C, AttributeList::FunctionIndex, + {Attribute::NoFree, Attribute::NoRecurse, Attribute::NoSync, + Attribute::NoUnwind, Attribute::Speculatable, Attribute::WillReturn}); + auto *Func = cast(M.getOrInsertFunction(Name, FuncTy, AL).getCallee()); Func->setOnlyAccessesArgMemory(); Func->setOnlyReadsMemory(); return Func; } -Function *llvm::getContinuationContinue(Module &M) { - auto *Name = "continuation.continue"; - if (auto *F = M.getFunction(Name)) - return F; - auto &C = M.getContext(); - auto *Void = Type::getVoidTy(C); - auto *I64 = Type::getInt64Ty(C); - auto *FuncTy = FunctionType::get(Void, {I64}, true); - AttributeList AL = AttributeList::get(C, AttributeList::FunctionIndex, - {Attribute::NoReturn}); - return cast(M.getOrInsertFunction(Name, FuncTy, AL).getCallee()); -} - Function *llvm::extractFunctionOrNull(Metadata *N) { auto *C = mdconst::extract_or_null(N); // Strip bitcasts @@ -820,13 +701,20 @@ Function *llvm::extractFunctionOrNull(Metadata *N) { return dyn_cast_or_null(C); } +bool llvm::isStartFunc(Function *Func) { + if (auto *MD = dyn_cast_or_null(Func->getMetadata(ContHelper::MDContinuationName))) { + auto *EntryF = extractFunctionOrNull(MD->getOperand(0)); + return Func == EntryF; + } + return false; +} + /// Recurse into the first member of the given SystemData to find an object of /// the wanted type. -Value *llvm::getDXILSystemData(IRBuilder<> &B, Value *SystemData, - Type *SystemDataTy, Type *Ty) { +Value *llvm::getDXILSystemData(IRBuilder<> &B, Value *SystemData, Type *SystemDataTy, Type *Ty) { assert(Ty->isStructTy() && "Expected a struct type for system data"); - LLVM_DEBUG(dbgs() << "Searching for system data type " << *Ty << " in " - << *SystemData << " (" << *SystemDataTy << ")\n"); + LLVM_DEBUG(dbgs() << "Searching for system data type " << *Ty << " in " << *SystemData << " (" << *SystemDataTy + << ")\n"); Type *OrigSystemDataTy = SystemDataTy; SmallVector Indices; // Dereference pointer @@ -837,8 +725,7 @@ Value *llvm::getDXILSystemData(IRBuilder<> &B, Value *SystemData, if (!StructTy) { LLVM_DEBUG(dbgs() << "System data struct: "; SystemDataTy->dump()); LLVM_DEBUG(dbgs() << "Wanted struct type: "; Ty->dump()); - report_fatal_error( - "Invalid system data struct: Did not contain the needed struct type"); + report_fatal_error("Invalid system data struct: Did not contain the needed struct type"); } SystemDataTy = StructTy->getElementType(0); Indices.push_back(B.getInt32(0)); @@ -848,10 +735,9 @@ Value *llvm::getDXILSystemData(IRBuilder<> &B, Value *SystemData, return B.CreateInBoundsGEP(OrigSystemDataTy, SystemData, Indices); } -CallInst *llvm::replaceIntrinsicCall( - IRBuilder<> &B, Type *SystemDataTy, Value *SystemData, - lgc::rt::RayTracingShaderStage Kind, CallInst *Call, Module *GpurtLibrary, - CompilerUtils::CrossModuleInliner &Inliner) { +CallInst *llvm::replaceIntrinsicCall(IRBuilder<> &B, Type *SystemDataTy, Value *SystemData, + lgc::rt::RayTracingShaderStage Kind, CallInst *Call, Module *GpurtLibrary, + CompilerUtils::CrossModuleInliner &Inliner) { B.SetInsertPoint(Call); auto IntrImplEntry = findIntrImplEntryByIntrinsicCall(Call); @@ -861,40 +747,31 @@ CallInst *llvm::replaceIntrinsicCall( std::string Name = ("_cont_" + IntrImplEntry->Name).str(); auto *IntrImpl = GpurtLibrary->getFunction(Name); if (!IntrImpl) - report_fatal_error(Twine("Intrinsic implementation '") + Name + - "' not found"); + report_fatal_error(Twine("Intrinsic implementation '") + Name + "' not found"); SmallVector Arguments; // Add the right system data type LLVM_DEBUG(dbgs() << "Getting system data for " << Name << "\n"); - Arguments.push_back(getDXILSystemData(B, SystemData, SystemDataTy, - getFuncArgPtrElementType(IntrImpl, 0))); + Arguments.push_back(getDXILSystemData(B, SystemData, SystemDataTy, getFuncArgPtrElementType(IntrImpl, 0))); // For hit data accessors, get the hit data struct if (IntrImplEntry->AccessesHitData) { Function *GetHitData; - if (Kind == lgc::rt::RayTracingShaderStage::AnyHit || - Kind == lgc::rt::RayTracingShaderStage::Intersection) { - auto *GetCandidateState = - GpurtLibrary->getFunction(ContDriverFunc::GetCandidateStateName); + if (Kind == lgc::rt::RayTracingShaderStage::AnyHit || Kind == lgc::rt::RayTracingShaderStage::Intersection) { + auto *GetCandidateState = GpurtLibrary->getFunction(ContDriverFunc::GetCandidateStateName); assert(GetCandidateState && "Could not find GetCandidateState function"); - assert( - GetCandidateState->getReturnType()->isStructTy() && - GetCandidateState->arg_size() == 1 - // Traversal data - && - GetCandidateState->getFunctionType()->getParamType(0)->isPointerTy()); + assert(GetCandidateState->getReturnType()->isStructTy() && + GetCandidateState->arg_size() == 1 + // Traversal data + && GetCandidateState->getFunctionType()->getParamType(0)->isPointerTy()); GetHitData = GetCandidateState; } else { - auto *GetCommittedState = - GpurtLibrary->getFunction(ContDriverFunc::GetCommittedStateName); + auto *GetCommittedState = GpurtLibrary->getFunction(ContDriverFunc::GetCommittedStateName); assert(GetCommittedState && "Could not find GetCommittedState function"); - assert( - GetCommittedState->getReturnType()->isStructTy() && - GetCommittedState->arg_size() == 1 - // Traversal data - && - GetCommittedState->getFunctionType()->getParamType(0)->isPointerTy()); + assert(GetCommittedState->getReturnType()->isStructTy() && + GetCommittedState->arg_size() == 1 + // Traversal data + && GetCommittedState->getFunctionType()->getParamType(0)->isPointerTy()); GetHitData = GetCommittedState; } // The intrinsic expects a pointer, so create an alloca @@ -904,10 +781,8 @@ CallInst *llvm::replaceIntrinsicCall( B.restoreIP(IP); auto *HitData = Inliner - .inlineCall( - B, GetHitData, - {getDXILSystemData(B, SystemData, SystemDataTy, - getFuncArgPtrElementType(GetHitData, 0))}) + .inlineCall(B, GetHitData, + {getDXILSystemData(B, SystemData, SystemDataTy, getFuncArgPtrElementType(GetHitData, 0))}) .returnValue; B.CreateStore(HitData, HitDataAlloca); Arguments.push_back(HitDataAlloca); @@ -916,8 +791,7 @@ CallInst *llvm::replaceIntrinsicCall( // Skip the intrinsic id argument, the system data argument and the hit data // argument auto *IntrType = IntrImpl->getFunctionType(); - for (unsigned CallI = 0, ImplI = IntrImplEntry->AccessesHitData ? 2 : 1, - ImplE = IntrType->getNumParams(); + for (unsigned CallI = 0, ImplI = IntrImplEntry->AccessesHitData ? 2 : 1, ImplE = IntrType->getNumParams(); ImplI < ImplE; CallI++, ImplI++) { Value *Arg = Call->getArgOperand(CallI); Type *ArgType = Arg->getType(); @@ -934,8 +808,7 @@ CallInst *llvm::replaceIntrinsicCall( raw_string_ostream ToStream(To); ArgType->print(FromStream, true); NewType->print(ToStream, true); - report_fatal_error(Twine("Can't convert ") + From + " to " + To + - " for intrinsic '" + IntrImplEntry->Name + "'"); + report_fatal_error(Twine("Can't convert ") + From + " to " + To + " for intrinsic '" + IntrImplEntry->Name + "'"); } } @@ -947,8 +820,7 @@ CallInst *llvm::replaceIntrinsicCall( // requires [3 x <3 x float>]. Replacement = PoisonValue::get(Call->getType()); for (unsigned i = 0; i < 3; i++) { - Replacement = - B.CreateInsertValue(Replacement, B.CreateExtractValue(NewCall, i), i); + Replacement = B.CreateInsertValue(Replacement, B.CreateExtractValue(NewCall, i), i); } } @@ -962,37 +834,75 @@ CallInst *llvm::replaceIntrinsicCall( } /// Transform enqueue intrinsics to continuation intrinsics -static void replaceEnqueueIntrinsic(Function &F, Function *NewFunc) { - for (auto &Use : make_early_inc_range(F.uses())) { - if (auto *CInst = dyn_cast(Use.getUser())) { - if (CInst->isCallee(&Use)) { - llvm_dialects::Builder B(CInst); - SmallVector Args(CInst->args()); - bool IsEnqueue = F.getName().contains("Enqueue"); - // Add the current function as return address to the call. - // Used when Traversal calls AnyHit or Intersection. - if (IsEnqueue && F.getName().contains("EnqueueCall")) { - bool HasWaitMask = F.getName().contains("WaitEnqueue"); - auto *RetAddr = B.create( - B.getInt64Ty(), CInst->getFunction()); - Args.insert(Args.begin() + (HasWaitMask ? 3 : 2), RetAddr); - } +static bool replaceEnqueueIntrinsic(Function &F) { + bool Changed = false; + StringRef FuncName = F.getName(); + bool IsEnqueueCall = FuncName.contains("EnqueueCall"); + bool IsWaitEnqueue = FuncName.contains("WaitEnqueue"); + llvm_dialects::Builder B{F.getContext()}; + + auto CreateContinue = [&B](const CallInst &CInst, SmallVectorImpl &TailArgs, + std::optional ReturnAddr) -> CallInst * { + Value *ShaderAddr = CInst.getArgOperand(0); + TailArgs.append(CInst.arg_begin() + 2, CInst.arg_end()); + return B.create(ShaderAddr, PoisonValue::get(B.getInt32Ty()), + ReturnAddr.value_or(CInst.getArgOperand(1)), TailArgs); + }; + + auto CreateWaitContinue = [&B](const CallInst &CInst, SmallVectorImpl &TailArgs, + std::optional ReturnAddr) -> CallInst * { + Value *ShaderAddr = CInst.getArgOperand(0); + TailArgs.append(CInst.arg_begin() + 3, CInst.arg_end()); + Value *WaitMask = CInst.getArgOperand(1); + return B.create(ShaderAddr, WaitMask, PoisonValue::get(B.getInt32Ty()), + ReturnAddr.value_or(CInst.getArgOperand(2)), TailArgs); + }; - B.CreateCall(NewFunc, Args); - CompilerUtils::createUnreachable(B); + llvm::forEachCall(F, [&](CallInst &CInst) { + B.SetInsertPoint(&CInst); + SmallVector TailArgs; + CallInst *NewCall = nullptr; + if (IsEnqueueCall) { + // Add the current function as return address to the call. + // Used when Traversal calls AnyHit or Intersection. + auto *RetAddr = B.create(B.getInt64Ty(), CInst.getFunction()); + if (IsWaitEnqueue) { + // Handle WaitEnqueueCall. + NewCall = CreateWaitContinue(CInst, TailArgs, RetAddr); + } else { + // Handle EnqueueCall. + NewCall = CreateContinue(CInst, TailArgs, RetAddr); } + + } else if (IsWaitEnqueue) { + // Handle WaitEnqueue. + NewCall = CreateWaitContinue(CInst, TailArgs, std::nullopt); + } else { + // Handle Enqueue. + NewCall = CreateContinue(CInst, TailArgs, std::nullopt); } - } + + // NOTE: Inlining ExitRayGen in LowerRaytracingPipeline can cause continue + // ops whose name is suffixed .cloned.*, which don't get picked up by the + // direct name comparison we use when checking for existence of payload + // metadata in DXILContPostProcess. With the new dialect ops, these get + // picked up, so they need to have outgoing register count. + if (NewCall->getFunction()->getName() == ContDriverFunc::ExitRayGenName) + ContHelper::OutgoingRegisterCount::setValue(NewCall, 0); + + CompilerUtils::createUnreachable(B); + Changed = true; + }); + + return Changed; } -static void handleContinuationStackIsGlobal(Function &Func, - ContStackAddrspace StackAddrspace) { +static void handleContinuationStackIsGlobal(Function &Func, ContStackAddrspace StackAddrspace) { assert(Func.arg_empty() // bool && Func.getFunctionType()->getReturnType()->isIntegerTy(1)); - auto *IsGlobal = ConstantInt::getBool( - Func.getContext(), StackAddrspace == ContStackAddrspace::Global); + auto *IsGlobal = ConstantInt::getBool(Func.getContext(), StackAddrspace == ContStackAddrspace::Global); llvm::replaceCallsToFunction(Func, *IsGlobal); } @@ -1002,8 +912,7 @@ static void handleContinuationsGetFlags(Function &Func, uint32_t Flags) { // i32 && Func.getFunctionType()->getReturnType()->isIntegerTy(32)); - auto *FlagsConst = - ConstantInt::get(IntegerType::get(Func.getContext(), 32), Flags); + auto *FlagsConst = ConstantInt::get(IntegerType::get(Func.getContext(), 32), Flags); llvm::replaceCallsToFunction(Func, *FlagsConst); } @@ -1013,8 +922,7 @@ static void handleGetRtip(Function &Func, uint32_t RtipLevel) { // i32 && Func.getFunctionType()->getReturnType()->isIntegerTy(32)); - auto *RtipConst = - ConstantInt::get(IntegerType::get(Func.getContext(), 32), RtipLevel); + auto *RtipConst = ConstantInt::get(IntegerType::get(Func.getContext(), 32), RtipLevel); for (auto &Use : make_early_inc_range(Func.uses())) { if (auto *CInst = dyn_cast(Use.getUser())) { if (CInst->isCallee(&Use)) { @@ -1042,8 +950,8 @@ static void handleGetUninitialized(Function &Func) { void ContHelper::handleGetSetting(Function &F, ArrayRef Settings) { auto *Ty = dyn_cast(F.getReturnType()); if (!Ty) - report_fatal_error(Twine("Only integer settings are supported but '") + - F.getName() + "' does not return an integer"); + report_fatal_error(Twine("Only integer settings are supported but '") + F.getName() + + "' does not return an integer"); auto Name = F.getName(); bool Consumed = Name.consume_front("_AmdGetSetting_"); if (!Consumed) @@ -1054,8 +962,7 @@ void ContHelper::handleGetSetting(Function &F, ArrayRef Settings) { uint64_t NameVal; bool Failed = Name.getAsInteger(10, NameVal); if (Failed) { - report_fatal_error( - Twine("Failed to parse _AmdGetSetting_ suffix as int: ") + Name); + report_fatal_error(Twine("Failed to parse _AmdGetSetting_ suffix as int: ") + Name); } uint64_t Value = 0; @@ -1069,8 +976,7 @@ void ContHelper::handleGetSetting(Function &F, ArrayRef Settings) { } if (!Found) { #ifndef NDEBUG - errs() << Twine("Warning: Setting '") + Name + - "' is not defined, setting to 0\n"; + errs() << Twine("Warning: Setting '") + Name + "' is not defined, setting to 0\n"; #endif } @@ -1079,14 +985,88 @@ void ContHelper::handleGetSetting(Function &F, ArrayRef Settings) { replaceCallsToFunction(F, *Val); } +void ContHelper::handleGetFuncAddr(Function &F, llvm_dialects::Builder &Builder) { + assert(F.arg_empty() + // returns i64 or i32 + && (F.getFunctionType()->getReturnType()->isIntegerTy(64) || + F.getFunctionType()->getReturnType()->isIntegerTy(32))); + + auto Name = F.getName(); + [[maybe_unused]] bool Consumed = Name.consume_front("_AmdGetFuncAddr"); + assert(Consumed); + + Function *Impl = F.getParent()->getFunction(Name); + if (!Impl) + report_fatal_error(Twine("Did not find function '") + Name + "' requested by _AmdGetFuncAddr"); + + llvm::forEachCall(F, [&](llvm::CallInst &CInst) { + auto *RetTy = F.getReturnType(); + Builder.SetInsertPoint(&CInst); + Value *AsContRef = Builder.create(RetTy, Impl); + CInst.replaceAllUsesWith(AsContRef); + CInst.eraseFromParent(); + }); +} + +void ContHelper::handleValueI32Count(Function &F, IRBuilder<> &Builder) { + assert(F.arg_size() == 1 + // i32 count + && F.getFunctionType()->getReturnType()->isIntegerTy(32) + // Pointer to a struct + && F.getFunctionType()->getParamType(0)->isPointerTy()); + + auto *Ty = getFuncArgPtrElementType(&F, 0); + auto *Size = Builder.getInt32(divideCeil(F.getParent()->getDataLayout().getTypeStoreSize(Ty).getFixedValue(), 4)); + llvm::replaceCallsToFunction(F, *Size); +} + +void ContHelper::handleValueGetI32(Function &F, IRBuilder<> &Builder) { + assert(F.arg_size() == 2 + // value + && F.getFunctionType()->getReturnType()->isIntegerTy(32) + // Pointer to a struct + && F.getFunctionType()->getParamType(0)->isPointerTy() + // index + && F.getFunctionType()->getParamType(1)->isIntegerTy(32)); + + auto *I32 = Builder.getInt32Ty(); + + llvm::forEachCall(F, [&](CallInst &CInst) { + Builder.SetInsertPoint(&CInst); + Value *Addr = CInst.getArgOperand(0); + Addr = Builder.CreateGEP(I32, Addr, CInst.getArgOperand(1)); + auto *Load = Builder.CreateLoad(I32, Addr); + CInst.replaceAllUsesWith(Load); + CInst.eraseFromParent(); + }); +} + +void ContHelper::handleValueSetI32(Function &F, IRBuilder<> &Builder) { + assert(F.arg_size() == 3 && + F.getFunctionType()->getReturnType()->isVoidTy() + // Pointer to a struct + && F.getFunctionType()->getParamType(0)->isPointerTy() + // index + && F.getFunctionType()->getParamType(1)->isIntegerTy(32) + // value + && F.getFunctionType()->getParamType(2)->isIntegerTy(32)); + + auto *I32 = Builder.getInt32Ty(); + llvm::forEachCall(F, [&](CallInst &CInst) { + Builder.SetInsertPoint(&CInst); + Value *Addr = CInst.getArgOperand(0); + Addr = Builder.CreateGEP(I32, CInst.getArgOperand(0), CInst.getArgOperand(1)); + Builder.CreateStore(CInst.getArgOperand(2), Addr); + CInst.eraseFromParent(); + }); +} + void llvm::terminateShader(IRBuilder<> &Builder, CallInst *CompleteCall) { Builder.SetInsertPoint(CompleteCall); - [[maybe_unused]] Instruction *OldTerminator = - CompleteCall->getParent()->getTerminator(); + [[maybe_unused]] Instruction *OldTerminator = CompleteCall->getParent()->getTerminator(); Type *FuncRetTy = CompleteCall->getFunction()->getReturnType(); - // During the driver transform, this will see a _cont_SetupRayGen which - // returns _AmdDispatchSystemData. Thus, we return a poison. Resume functions + // For functions returning a value, return a poison. Resume functions // and other shaders will simply return a void value when this helper is being // called from LegacyCleanupContinuations. These will be treated as // continuation.complete by the translator. @@ -1096,8 +1076,7 @@ void llvm::terminateShader(IRBuilder<> &Builder, CallInst *CompleteCall) { else Ret = Builder.CreateRet(PoisonValue::get(FuncRetTy)); - assert(OldTerminator != CompleteCall && - "terminateShader: Invalid terminator instruction provided!"); + assert(OldTerminator != CompleteCall && "terminateShader: Invalid terminator instruction provided!"); // If there is some code after the call to _AmdComplete or the intended // lgc.ilcps.return that aborts the shader, do the following: @@ -1116,28 +1095,21 @@ void llvm::terminateShader(IRBuilder<> &Builder, CallInst *CompleteCall) { bool llvm::earlyDriverTransform(Module &M) { // Import StackAddrspace from metadata if set, otherwise from default auto StackAddrspaceMD = ContHelper::tryGetStackAddrspace(M); - auto StackAddrspace = - StackAddrspaceMD.value_or(ContHelper::DefaultStackAddrspace); + auto StackAddrspace = StackAddrspaceMD.value_or(ContHelper::DefaultStackAddrspace); // Import from metadata if set - auto RtipLevel = ContHelper::tryGetRtip(M); - auto Flags = ContHelper::tryGetFlags(M); + auto RtipLevel = ContHelper::Rtip::tryGetValue(&M); + auto Flags = ContHelper::Flags::tryGetValue(&M); SmallVector GpurtSettings; ContHelper::getGpurtSettings(M, GpurtSettings); bool Changed = false; // Replace Enqueue and Complete intrinsics for (auto &F : M) { - Function *Replacement = nullptr; auto Name = F.getName(); - if (Name.contains("WaitEnqueue")) - Replacement = getContinuationWaitContinue(M); - else if (Name.contains("Enqueue")) - Replacement = getContinuationContinue(M); - if (Replacement) { - Changed = true; - replaceEnqueueIntrinsic(F, Replacement); + if (Name.contains("Enqueue")) { + Changed = replaceEnqueueIntrinsic(F); } if (Name.starts_with("_AmdContinuationStackIsGlobal")) { @@ -1152,8 +1124,7 @@ bool llvm::earlyDriverTransform(Module &M) { } else if (Name.starts_with("_AmdGetRtip")) { Changed = true; if (!RtipLevel) - report_fatal_error( - "Tried to get rtip level but it is not available on the module"); + report_fatal_error("Tried to get rtip level but it is not available on the module"); handleGetRtip(F, *RtipLevel); } else if (Name.starts_with("_AmdGetUninitialized")) { Changed = true; @@ -1167,9 +1138,7 @@ bool llvm::earlyDriverTransform(Module &M) { return Changed; } -uint64_t -llvm::computeNeededStackSizeForRegisterBuffer(uint64_t NumI32s, - uint64_t NumReservedRegisters) { +uint64_t llvm::computePayloadSpillSize(uint64_t NumI32s, uint64_t NumReservedRegisters) { if (NumI32s <= NumReservedRegisters) return 0; @@ -1177,18 +1146,6 @@ llvm::computeNeededStackSizeForRegisterBuffer(uint64_t NumI32s, return NumStackI32s * RegisterBytes; } -Type *llvm::getFuncArgPtrElementType(const Argument *Arg) { - auto *ArgTy = Arg->getType(); - if (!ArgTy->isPointerTy()) - return nullptr; - - return ContArgTy::get(Arg->getParent(), Arg).getPointerElementType(); -} - -Type *llvm::getFuncArgPtrElementType(const Function *F, int ArgNo) { - return getFuncArgPtrElementType(F->getArg(ArgNo)); -} - namespace llvm { namespace coro { bool defaultMaterializable(Instruction &V); @@ -1200,13 +1157,11 @@ bool llvm::commonMaterializable(Instruction &Inst) { return true; // Insert into constant. - if (isa(Inst) && - isa(Inst.getOperand(0))) { + if (isa(Inst) && isa(Inst.getOperand(0))) { return true; } - if (auto *Shuffle = dyn_cast(&Inst); - Shuffle && Shuffle->isSingleSource()) + if (auto *Shuffle = dyn_cast(&Inst); Shuffle && Shuffle->isSingleSource()) return true; return false; @@ -1263,10 +1218,8 @@ bool llvm::LgcMaterializable(Instruction &OrigI) { auto CalledName = CalledFunc->getName(); // FIXME: switch to dialectOp check. - if (CalledName.starts_with("lgc.user.data") || - CalledName.starts_with("lgc.shader.input") || - CalledName.starts_with("lgc.create.get.desc.ptr") || - CalledName.starts_with("lgc.load.buffer.desc") || + if (CalledName.starts_with("lgc.user.data") || CalledName.starts_with("lgc.shader.input") || + CalledName.starts_with("lgc.create.get.desc.ptr") || CalledName.starts_with("lgc.load.buffer.desc") || CalledName.starts_with("lgc.load.user.data")) return true; } @@ -1275,8 +1228,7 @@ bool llvm::LgcMaterializable(Instruction &OrigI) { return false; } -std::optional -llvm::findDominatedContinueCall(CallInst *GetResPointAddr) { +std::optional llvm::findDominatedContinueCall(CallInst *GetResPointAddr) { SmallDenseSet Visited; SmallDenseSet UnknownPreds; SmallVector WorkList; @@ -1323,8 +1275,7 @@ llvm::findDominatedContinueCall(CallInst *GetResPointAddr) { } if (Candidate == nullptr) { - LLVM_DEBUG( - dbgs() << "Did not find a continue call after a GetResumePointAddr\n"); + LLVM_DEBUG(dbgs() << "Did not find a continue call after a GetResumePointAddr\n"); return {}; } @@ -1339,194 +1290,6 @@ llvm::findDominatedContinueCall(CallInst *GetResPointAddr) { return Candidate; } -/// Copy the function body from the old function. -static Function *cloneFunctionWithTypes(Function *Fn, ContFuncTy NewFnTy, - AttributeList FnAttrs) { - // Erase outdated types metadata to avoid being propagated to the new - // function. - Fn->eraseMetadata(Fn->getContext().getMDKindID(ContHelper::MDTypesName)); - Function *NewFn = cloneFunctionHeaderWithTypes(*Fn, NewFnTy, FnAttrs); - NewFn->splice(NewFn->begin(), Fn); - NewFn->takeName(Fn); - Fn->replaceAllUsesWith(ConstantExpr::getBitCast(NewFn, Fn->getType())); - return NewFn; -} - -/// Promote pointer argument type to its value type if the corresponding bit in -/// `PromotionMask` is being set. -Function *llvm::promotePointerArguments(Function *Fn, - const SmallBitVector &PromotionMask) { - SmallVector ArgTys; - SmallVector ParamAttrs; - - // Do nothing if the promotion mask is zero. - if (PromotionMask.none()) - return Fn; - - auto FnAttrs = Fn->getAttributes(); - // The function might not have types metadata like _cont_SetupRayGen, in which - // case nothing needs to be done. - if (!Fn->getMetadata(ContHelper::MDTypesName)) - return Fn; - - for (const auto &[ArgNo, Arg] : llvm::enumerate(Fn->args())) { - ContArgTy ArgTy = ContArgTy::get(Fn, &Arg); - - // Promote the pointer type to its value type if the bit in `PromotionMask` - // is set. - if (PromotionMask[ArgNo]) { - assert(ArgTy.isPointerTy()); - ArgTys.push_back(ArgTy.getPointerElementType()); - ParamAttrs.push_back({}); - continue; - } - ArgTys.push_back(ArgTy); - ParamAttrs.push_back(FnAttrs.getParamAttrs(ArgNo)); - } - - ContFuncTy NewFuncTy(ContFuncTy::get(Fn).ReturnTy, ArgTys); - auto NewFnAttr = AttributeList::get(Fn->getContext(), FnAttrs.getFnAttrs(), - FnAttrs.getRetAttrs(), ParamAttrs); - auto *NewFn = cloneFunctionWithTypes(Fn, NewFuncTy, NewFnAttr); - - IRBuilder<> B(Fn->getContext()); - // Change argument types at call sites. - llvm::forEachCall(*NewFn, [&](CallInst &Call) { - B.SetInsertPoint(&Call); - for (const auto &[ArgNo, ArgPair] : - llvm::enumerate(llvm::zip(Call.args(), NewFn->args()))) { - auto &CallArg = std::get<0>(ArgPair); - auto &NewArg = std::get<1>(ArgPair); - if (CallArg->getType() != NewArg.getType()) { - auto *NewOp = B.CreateLoad(NewArg.getType(), CallArg); - Call.setArgOperand(ArgNo, NewOp); - } - } - // Update Callee function type. - Call.setCalledFunction(NewFn); - }); - - // Replace argument uses. - for (const auto &[OldArg, NewArg] : llvm::zip(Fn->args(), NewFn->args())) { - Value *NewValue = &NewArg; - NewArg.setName(OldArg.getName()); - if (!NewFn->isDeclaration()) { - if (NewArg.getType() != OldArg.getType()) { - B.SetInsertPointPastAllocas(NewFn); - auto *ArgAlloca = B.CreateAlloca(NewArg.getType()); - B.CreateStore(&NewArg, ArgAlloca); - NewValue = ArgAlloca; - } - OldArg.replaceAllUsesWith(NewValue); - } - } - Fn->eraseFromParent(); - return NewFn; -} - -/// Unpack the return (struct) type of the input function, which means change -/// the return type to its first element type. This may generate invalid IR in -/// general, call this with extra caution. -Function *llvm::unpackStructReturnType(Function *Fn) { - auto *RetTy = Fn->getReturnType(); - assert(RetTy->isStructTy()); - auto *NewRetTy = RetTy->getStructElementType(0); - - ContFuncTy NewFnTy(NewRetTy, ContFuncTy::get(Fn).ArgTys); - auto *NewFn = cloneFunctionWithTypes(Fn, NewFnTy, Fn->getAttributes()); - llvm::forEachCall(*NewFn, [&](CallInst &Call) { - // Update callee function type. - Call.setCalledFunction(NewFn); - }); - - // Copy argument names and replace argument uses. - for (const auto &[OldArg, NewArg] : llvm::zip(Fn->args(), NewFn->args())) { - NewArg.setName(OldArg.getName()); - if (!NewFn->isDeclaration()) - OldArg.replaceAllUsesWith(&NewArg); - } - IRBuilder<> B(Fn->getContext()); - llvm::forEachTerminator( - NewFn, {Instruction::Ret}, [&](Instruction &Terminator) { - B.SetInsertPoint(&Terminator); - Value *RetExtractVal = - B.CreateExtractValue(Terminator.getOperand(0), {0}); - B.CreateRet(RetExtractVal); - Terminator.eraseFromParent(); - }); - Fn->eraseFromParent(); - return NewFn; -} - -// Turn `StructRet` argument into more canonical return statement. -Function *llvm::lowerStructRetArgument(Function *Fn) { - assert(Fn->getReturnType()->isVoidTy()); - auto *RetArg = Fn->getArg(0); - if (!RetArg->hasStructRetAttr()) - RetArg = Fn->getArg(1); - assert(RetArg->hasStructRetAttr()); - unsigned RetArgIdx = RetArg->getArgNo(); - Type *RetTy = RetArg->getParamStructRetType(); - - AttributeList FnAttrs = Fn->getAttributes(); - SmallVector ArgAttrs; - SmallVector NewArgTys; - const SmallVector &OldArgTys = ContFuncTy::get(Fn).ArgTys; - for (unsigned Idx = 0; Idx < Fn->arg_size(); Idx++) { - if (Idx != RetArgIdx) { - ArgAttrs.push_back(FnAttrs.getParamAttrs(Idx)); - NewArgTys.push_back(OldArgTys[Idx]); - } - } - - ContFuncTy NewFnTy(RetTy, NewArgTys); - auto NewFnAttr = AttributeList::get(Fn->getContext(), FnAttrs.getFnAttrs(), - FnAttrs.getRetAttrs(), ArgAttrs); - Function *NewFn = cloneFunctionWithTypes(Fn, NewFnTy, NewFnAttr); - - IRBuilder<> B(Fn->getContext()); - llvm::forEachCall(*NewFn, [&](CallInst &Call) { - B.SetInsertPoint(&Call); - Value *StructRetArg = nullptr; - SmallVector Args; - for (const auto &[Idx, Arg] : llvm::enumerate(Call.args())) { - if (Idx == RetArgIdx) { - StructRetArg = Arg; - continue; - } - Args.push_back(Arg); - } - auto *NewRet = B.CreateCall(NewFn, Args); - B.CreateStore(NewRet, StructRetArg); - Call.eraseFromParent(); - }); - - // Copy argument names and replace argument uses. - for (const auto &[ArgNo, NewArg] : llvm::enumerate(NewFn->args())) { - auto *OldArg = Fn->getArg(ArgNo >= RetArgIdx ? ArgNo + 1 : ArgNo); - NewArg.setName(OldArg->getName()); - if (!NewFn->isDeclaration()) - OldArg->replaceAllUsesWith(&NewArg); - } - - if (!NewFn->isDeclaration()) { - B.SetInsertPointPastAllocas(NewFn); - auto *RetAlloca = B.CreateAlloca(RetTy); - RetArg->replaceAllUsesWith(RetAlloca); - - // Replace returns with return value - llvm::forEachTerminator(NewFn, {Instruction::Ret}, - [&](Instruction &Terminator) { - B.SetInsertPoint(&Terminator); - Value *RetLoad = B.CreateLoad(RetTy, RetAlloca); - B.CreateRet(RetLoad); - Terminator.eraseFromParent(); - }); - } - Fn->eraseFromParent(); - return NewFn; -} - namespace llvm { void addLgcContinuationTransform(ModulePassManager &MPM) { MPM.addPass(AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/false)); @@ -1541,5 +1304,12 @@ void addLgcContinuationTransform(ModulePassManager &MPM) { MPM.addPass(CoroCleanupPass()); MPM.addPass(CleanupContinuationsPass()); + +#ifndef NDEBUG + MPM.addPass(ContinuationsLintPass()); +#endif + + MPM.addPass(createModuleToFunctionPassAdaptor(LowerSwitchPass())); + MPM.addPass(createModuleToFunctionPassAdaptor(FixIrreduciblePass())); } } // End namespace llvm diff --git a/llvmraytracing/lib/ContinuationsLint.cpp b/llvmraytracing/lib/ContinuationsLint.cpp new file mode 100644 index 0000000000..4902ff7d91 --- /dev/null +++ b/llvmraytracing/lib/ContinuationsLint.cpp @@ -0,0 +1,164 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + +//===- ContinuationsLint.cpp - Continuations linter pass ------------------------===// +// +// This file implements a pass that runs some common integrity checks on a continuations module. +// This also runs the default LLVM linter on the whole module. +//===----------------------------------------------------------------------===// + +#include "llvmraytracing/Continuations.h" +#include "lgc/LgcCpsDialect.h" +#include "lgc/LgcIlCpsDialect.h" +#include "llvm-dialects/Dialect/Visitor.h" +#include "llvm/IR/Analysis.h" +#include "llvm/IR/PassManager.h" + +using namespace llvm; + +#define DEBUG_TYPE "continuations-lint" + +static const char ContLintAbortOnErrorArgName[] = "cont-lint-abort-on-error"; +// Defaults to true. If the continuations module is broken at some point, then we cannot ignore that. +static cl::opt ContLintAbortOnError(ContLintAbortOnErrorArgName, cl::init(true), + cl::desc("In the Continuations lint pass, abort on errors.")); + +#define Check(C, ...) \ + do { \ + if (!(C)) { \ + checkFailed(__VA_ARGS__); \ + return; \ + } \ + } while (false) + +namespace { +class ContinuationsLintPassImpl final { +public: + ContinuationsLintPassImpl(Module &M); + void run(); + +private: + Module &Mod; + + using JumpVecTy = SmallVector; + JumpVecTy AllJumps; + void collectJumps(); + void checkJumpTargets(); + void checkSetLocalRootIndex(); + + // Printing and check logic borrowed from llvm's @Lint pass. + std::string Messages; + raw_string_ostream MessagesStr; + /// A check failed, so printout out the condition and the message. + /// + /// This provides a nice place to put a breakpoint if you want to see why + /// something is not correct. + void checkFailed(const Twine &Message) { MessagesStr << Message << '\n'; } + + void writeValues(ArrayRef Vs) { + for (const Value *V : Vs) { + if (!V) + continue; + if (isa(V)) { + MessagesStr << *V << '\n'; + } else { + V->printAsOperand(MessagesStr, true, &Mod); + MessagesStr << '\n'; + } + } + } + + template void checkFailed(const Twine &Message, const T1 &V1, const Ts &...Vs) { + checkFailed(Message); + writeValues({V1, Vs...}); + } +}; +} // anonymous namespace + +ContinuationsLintPassImpl::ContinuationsLintPassImpl(Module &M) : Mod{M}, MessagesStr(Messages) { +} + +void ContinuationsLintPassImpl::run() { + LLVM_DEBUG(dbgs() << "Run the pass continuations-lint\n"); + collectJumps(); + + checkJumpTargets(); + checkSetLocalRootIndex(); + + dbgs() << MessagesStr.str(); + if (ContLintAbortOnError && !MessagesStr.str().empty()) + report_fatal_error(Twine("Continuations linter found errors, aborting. (enabled by --") + + ContLintAbortOnErrorArgName + ")", + false); +} + +void ContinuationsLintPassImpl::collectJumps() { + static const auto Visitor = llvm_dialects::VisitorBuilder() + .addSet( + [](JumpVecTy &Jumps, Instruction &Op) { Jumps.push_back(cast(&Op)); }) + .build(); + + Visitor.visit(AllJumps, Mod); +} + +// Check that every possible jump candidate has a valid jump target +void ContinuationsLintPassImpl::checkJumpTargets() { + for (auto *JumpCandidate : AllJumps) { + Value *JumpTarget = nullptr; + if (auto *Continue = dyn_cast(JumpCandidate)) + JumpTarget = Continue->getShaderAddr(); + else if (auto *WaitContinue = dyn_cast(JumpCandidate)) + JumpTarget = WaitContinue->getShaderAddr(); + else if (auto *Jump = dyn_cast(JumpCandidate)) + JumpTarget = Jump->getTarget(); + + assert(JumpTarget); + + Check(!isa(JumpTarget), "Jump has undefined jump target", JumpCandidate); + } +} + +// Check that every function has at most one setLocalRootIndex call. +void ContinuationsLintPassImpl::checkSetLocalRootIndex() { + if (auto *SetF = Mod.getFunction("amd.dx.setLocalRootIndex")) { + SmallDenseSet HasSetF; + + llvm::forEachCall(*SetF, [&](CallInst &CInst) { + // Returns true if it is a new value + Function *Func = CInst.getFunction(); + auto Inserted = HasSetF.insert(Func); + Check(Inserted.second, "Found a function with more than one call to setLocalRootIndex", Func); + }); + } +} + +PreservedAnalyses ContinuationsLintPass::run(Module &Mod, ModuleAnalysisManager &AnalysisManager) { + ContinuationsLintPassImpl Impl{Mod}; + Impl.run(); + + return PreservedAnalyses::all(); +} + +#undef Check diff --git a/llvmraytracing/lib/ContinuationsStatsReport.cpp b/llvmraytracing/lib/ContinuationsStatsReport.cpp new file mode 100644 index 0000000000..1751a7259d --- /dev/null +++ b/llvmraytracing/lib/ContinuationsStatsReport.cpp @@ -0,0 +1,221 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + +//===- ContinuationsStatsReport.cpp - Continuations statistics reporting ------------------===// +// +// A pass that gets the following statistics from a continuations module: +// * Report payload sizes +// * Report system data sizes +// * Report continuation state sizes +// +// This pass is designed to be ran after the cleanup passes, since this is +// where all required information for analysis is available. +// The metadata can be safely omitted after running this pass. +//===----------------------------------------------------------------------===// + +#include "llvmraytracing/Continuations.h" +#include "llvmraytracing/ContinuationsUtil.h" +#include "lgc/LgcCpsDialect.h" +#include "lgc/LgcIlCpsDialect.h" +#include "lgc/LgcRtDialect.h" +#include "llvm-dialects/Dialect/Visitor.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/IR/Analysis.h" +#include "llvm/IR/PassManager.h" +#include + +using namespace llvm; +using namespace lgc::rt; + +#define DEBUG_TYPE "continuations-stats-report" + +static cl::opt ReportContStateSizes("report-cont-state-sizes", + cl::desc("Report continuation state sizes for entry functions."), + cl::init(false)); + +static cl::opt ReportPayloadRegisterSizes("report-payload-register-sizes", + cl::desc("Report payload VGPR sizes for functions."), cl::init(false)); + +static cl::opt ReportSystemDataSizes("report-system-data-sizes", + cl::desc("Report incoming system data sizes for functions."), + cl::init(false)); + +static cl::opt ReportAllSizes("report-all-continuation-sizes", + cl::desc("Report continuation state, payload and system data sizes."), + cl::init(false)); + +namespace { +class ContinuationsStatsReportPassImpl final { +public: + ContinuationsStatsReportPassImpl(Module &M); + void run(); + +private: + void collectProcessableFunctions(); + void reportContStateSizes(); + void reportPayloadRegisterSizes(); + void reportSystemDataSizes(); + + struct FunctionData { + std::optional Stage = std::nullopt; + Type *SystemDataTy = nullptr; + }; + + Module &Mod; + MapVector ToProcess; +}; +} // namespace + +ContinuationsStatsReportPassImpl::ContinuationsStatsReportPassImpl(Module &Mod) : Mod{Mod} { +} + +void ContinuationsStatsReportPassImpl::run() { + if (!ReportPayloadRegisterSizes && !ReportSystemDataSizes && !ReportContStateSizes && !ReportAllSizes) + return; + + collectProcessableFunctions(); + + if (ReportAllSizes || ReportPayloadRegisterSizes) + reportPayloadRegisterSizes(); + + if (ReportAllSizes || ReportSystemDataSizes) + reportSystemDataSizes(); + + if (ReportAllSizes || ReportContStateSizes) + reportContStateSizes(); +} + +void ContinuationsStatsReportPassImpl::collectProcessableFunctions() { + for (Function &F : Mod) { + if (F.isDeclaration()) + continue; + + auto Stage = getLgcRtShaderStage(&F); + if (!Stage || Stage == RayTracingShaderStage::KernelEntry) + continue; + + if (!llvm::isStartFunc(&F)) { + FunctionData Data; + Data.Stage = Stage; + + // Extract the actual system data type from the { systemData, padding, + // payload } struct returned by await. + Data.SystemDataTy = F.getArg(F.arg_size() - 1)->getType()->getStructElementType(0); + + [[maybe_unused]] bool DidInsert = ToProcess.insert({&F, std::move(Data)}).second; + assert(DidInsert); + + continue; + } + + const uint32_t SystemDataArgumentIndex = lgc::cps::isCpsFunction(F) ? CpsArgIdxSystemData : 1; + switch (Stage.value()) { + case RayTracingShaderStage::RayGeneration: + case RayTracingShaderStage::Intersection: + case RayTracingShaderStage::AnyHit: + case RayTracingShaderStage::ClosestHit: + case RayTracingShaderStage::Miss: + case RayTracingShaderStage::Callable: { + FunctionData Data; + Data.Stage = Stage; + Data.SystemDataTy = F.getFunctionType()->getParamType(SystemDataArgumentIndex); + assert(Data.SystemDataTy->isStructTy() && "SystemData should be of struct type!"); + + [[maybe_unused]] bool DidInsert = ToProcess.insert({&F, std::move(Data)}).second; + assert(DidInsert); + break; + } + default: + break; + } + } +} + +void ContinuationsStatsReportPassImpl::reportContStateSizes() { + for (auto &[Func, FuncData] : ToProcess) { + auto OptStateSize = ContHelper::ContinuationStateByteCount::tryGetValue(Func); + if (!OptStateSize.has_value()) + continue; + + dbgs() << "Continuation state size of \"" << Func->getName() << "\" (" << FuncData.Stage + << "): " << OptStateSize.value() << " bytes\n"; + } +} + +void ContinuationsStatsReportPassImpl::reportPayloadRegisterSizes() { + static const auto Visitor = llvm_dialects::VisitorBuilder>() + .addSet( + [](auto &FuncOutgoingRegCountMap, auto &CInst) { + auto RegCount = ContHelper::OutgoingRegisterCount::tryGetValue(&CInst).value(); + FuncOutgoingRegCountMap[CInst.getFunction()] = + std::max(FuncOutgoingRegCountMap[CInst.getFunction()], RegCount); + }) + .build(); + + DenseMap MaxOutgoingRegisterCounts; + Visitor.visit(MaxOutgoingRegisterCounts, Mod); + + for (auto &[Func, FuncData] : ToProcess) { + DXILShaderKind ShaderKind = ShaderStageHelper::rtShaderStageToDxilShaderKind(FuncData.Stage.value()); + auto OptIncomingPayloadRegisterCount = ContHelper::IncomingRegisterCount::tryGetValue(Func); + bool HasIncomingPayload = OptIncomingPayloadRegisterCount.has_value(); + auto It = MaxOutgoingRegisterCounts.find(Func); + bool HasOutgoingPayload = (It != MaxOutgoingRegisterCounts.end()); + + if (!HasIncomingPayload && !HasOutgoingPayload) + continue; + + dbgs() << "Incoming and max outgoing payload VGPR size of \"" << Func->getName() << "\" (" << ShaderKind << "): "; + if (HasIncomingPayload) { + dbgs() << OptIncomingPayloadRegisterCount.value() * RegisterBytes; + } else { + dbgs() << "(no incoming payload)"; + } + dbgs() << " and "; + if (HasOutgoingPayload) { + dbgs() << It->second * RegisterBytes; + } else { + dbgs() << "(no outgoing payload)"; + } + dbgs() << " bytes\n"; + } +} + +void ContinuationsStatsReportPassImpl::reportSystemDataSizes() { + for (const auto &[F, FuncData] : ToProcess) { + if (FuncData.SystemDataTy == nullptr) + continue; + auto SystemDataBytes = Mod.getDataLayout().getTypeStoreSize(FuncData.SystemDataTy); + + dbgs() << "Incoming system data of \"" << F->getName() << "\" (" << FuncData.Stage << ") is \"" + << FuncData.SystemDataTy->getStructName() << "\", size: " << SystemDataBytes << " bytes\n"; + } +} + +PreservedAnalyses ContinuationsStatsReportPass::run(Module &Mod, ModuleAnalysisManager &AnalysisManager) { + ContinuationsStatsReportPassImpl Impl{Mod}; + Impl.run(); + return PreservedAnalyses::all(); +} diff --git a/llvmraytracing/lib/CpsStackLowering.cpp b/llvmraytracing/lib/CpsStackLowering.cpp index 398e14abf4..1ea139b1b3 100644 --- a/llvmraytracing/lib/CpsStackLowering.cpp +++ b/llvmraytracing/lib/CpsStackLowering.cpp @@ -10,8 +10,8 @@ * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * - * The above copyright notice and this permission notice shall be included in - *all copies or substantial portions of the Software. + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, @@ -25,23 +25,23 @@ #include "llvmraytracing/CpsStackLowering.h" #include "compilerutils/CompilerUtils.h" +#include "llvmraytracing/ContinuationsUtil.h" +#include "llvmraytracing/GpurtContext.h" #include "lgc/LgcCpsDialect.h" +#include "lgc/LgcIlCpsDialect.h" #include "lgc/LgcRtDialect.h" -#include "llvm-dialects/Dialect/Builder.h" #include "llvm-dialects/Dialect/Visitor.h" -#include "llvmraytracing/ContinuationsUtil.h" -#include "llvmraytracing/GpurtContext.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Type.h" +using namespace CompilerUtils; using namespace llvm; using namespace lgc::cps; LLVM_DIALECTS_VISITOR_PAYLOAD_PROJECT_FIELD(CpsStackLowering, TypeLower) -SmallVector -CpsStackLowering::convertStackPtrToI32(TypeLowering &TypeLower, Type *Ty) { +SmallVector CpsStackLowering::convertStackPtrToI32(TypeLowering &TypeLower, Type *Ty) { SmallVector Types; if (auto *PtrTy = dyn_cast(Ty)) { @@ -63,9 +63,7 @@ CpsStackLowering::convertStackPtrToI32(TypeLowering &TypeLower, Type *Ty) { // stack pointer. TODO Remove this argument. This function // should be responsible for adding the alloca. // @return: The new function, if Function was mutated, or the Function argument. -Function *CpsStackLowering::lowerCpsStackOps(Function *Func, - Function *GetGlobalMemBase, - bool RequiresIncomingCsp, +Function *CpsStackLowering::lowerCpsStackOps(Function *Func, Function *GetGlobalMemBase, bool RequiresIncomingCsp, llvm::Value *CspStorage) { Mod = Func->getParent(); StackSizeInBytes = 0; @@ -75,8 +73,8 @@ Function *CpsStackLowering::lowerCpsStackOps(Function *Func, else Func = addOrInitCsp(Func, GetGlobalMemBase, RequiresIncomingCsp); - TypeLower.addRule(std::bind(&CpsStackLowering::convertStackPtrToI32, this, - std::placeholders::_1, std::placeholders::_2)); + TypeLower.addRule( + std::bind(&CpsStackLowering::convertStackPtrToI32, this, std::placeholders::_1, std::placeholders::_2)); if (lgc::cps::isCpsFunction(*Func)) Func = TypeLower.lowerFunctionArguments(*Func); @@ -93,13 +91,13 @@ Function *CpsStackLowering::lowerCpsStackOps(Function *Func, .add(&CpsStackLowering::visitBitCastInst) .add(&CpsStackLowering::visitLoad) .add(&CpsStackLowering::visitStore) + .add(&CpsStackLowering::visitContinue) + .add(&CpsStackLowering::visitWaitContinue) .build(); Visitor.visit(*this, *Func); TypeLower.finishPhis(); TypeLower.finishCleanup(); - visitContinueCalls(Func); - CpsStackAlloca = nullptr; return Func; @@ -124,21 +122,18 @@ void CpsStackLowering::visitGetElementPtr(GetElementPtrInst &GEP) { APInt ConstantOffset{BitWidth, 0}; MapVector VariableOffsets; - [[maybe_unused]] bool Success = - GEP.collectOffset(DL, BitWidth, VariableOffsets, ConstantOffset); + [[maybe_unused]] bool Success = GEP.collectOffset(DL, BitWidth, VariableOffsets, ConstantOffset); assert(Success && "CpsStackLowering::visitGetElementPtr: GEP.collectOffset " "did not succeed!"); if (ConstantOffset.getSExtValue() != 0) - AddChain = Builder.CreateAdd( - AddChain, Builder.getInt32(ConstantOffset.getSExtValue())); + AddChain = Builder.CreateAdd(AddChain, Builder.getInt32(ConstantOffset.getSExtValue())); for (const auto &[Index, Scaling] : VariableOffsets) { Value *ScaledVal = Index; if (Scaling.getSExtValue() != 1) - ScaledVal = Builder.CreateMul(ScaledVal, - Builder.getInt32(Scaling.getSExtValue())); + ScaledVal = Builder.CreateMul(ScaledVal, Builder.getInt32(Scaling.getSExtValue())); AddChain = Builder.CreateAdd(AddChain, ScaledVal); } @@ -159,8 +154,7 @@ void CpsStackLowering::visitLoad(LoadInst &Load) { IRBuilder<> Builder(&Load); Values[0] = getRealMemoryAddress(Builder, Values[0]); - Values[0] = Builder.CreateBitCast( - Values[0], Load.getType()->getPointerTo(getLoweredCpsStackAddrSpace())); + Values[0] = Builder.CreateBitCast(Values[0], Load.getType()->getPointerTo(getLoweredCpsStackAddrSpace())); Load.replaceUsesOfWith(Load.getPointerOperand(), Values[0]); } @@ -178,58 +172,28 @@ void CpsStackLowering::visitStore(llvm::StoreInst &Store) { IRBuilder<> Builder(&Store); Values[0] = getRealMemoryAddress(Builder, Values[0]); - Values[0] = Builder.CreateBitCast( - Values[0], Store.getValueOperand()->getType()->getPointerTo( - getLoweredCpsStackAddrSpace())); + Values[0] = + Builder.CreateBitCast(Values[0], Store.getValueOperand()->getType()->getPointerTo(getLoweredCpsStackAddrSpace())); Store.replaceUsesOfWith(Store.getPointerOperand(), Values[0]); } // ===================================================================================================================== -// Add stack pointer to continue calls +// Add stack pointer to a lgc.ilcps.continue call // -// @param Func: the function where stack pointers should be added to continue -// calls -void CpsStackLowering::visitContinueCalls(llvm::Function *Func) { - llvm::forEachTerminator( - Func, {Instruction::Unreachable, Instruction::Ret}, - [&](Instruction &Terminator) { - auto *BB = Terminator.getParent(); - if (&Terminator != &*BB->begin()) { - auto Before = --Terminator.getIterator(); - if (auto *CInst = dyn_cast(Before)) { - if (auto *Func = CInst->getCalledFunction()) { - auto Name = Func->getName(); - if (Name.starts_with("continuation.continue") || - Name.starts_with("continuation.waitContinue")) - visitContinueCall(*CInst); - } - } - } - }); +// @param Continue: the instruction +void CpsStackLowering::visitContinue(lgc::ilcps::ContinueOp &Continue) { + IRBuilder<> Builder(&Continue); + Continue.setCsp(loadCsp(Builder)); } // ===================================================================================================================== -// Add stack pointer to continue call +// Add stack pointer to a lgc.ilcps.waitContinue call // -// @param CInst: the continue call -void CpsStackLowering::visitContinueCall(llvm::CallInst &CInst) { - auto *Func = CInst.getCalledFunction(); - auto Name = Func->getName(); - SmallVector NewCallArgs{CInst.args()}; - IRBuilder<> Builder(&CInst); - - // If the function does not use the stack, pass-through the CSP argument. - Value *Csp = Builder.CreateLoad(Builder.getInt32Ty(), CpsStackAlloca); - - bool IsWaitContinue = Name.contains("waitContinue"); - const size_t CspInsertIndex = IsWaitContinue ? 2 : 1; - NewCallArgs.insert(NewCallArgs.begin() + CspInsertIndex, Csp); - - auto *NewCall = Builder.CreateCall(Func, NewCallArgs); - CInst.replaceAllUsesWith(NewCall); - NewCall->copyMetadata(CInst); - CInst.eraseFromParent(); +// @param WaitContinue: the instruction +void CpsStackLowering::visitWaitContinue(lgc::ilcps::WaitContinueOp &WaitContinue) { + IRBuilder<> Builder(&WaitContinue); + WaitContinue.setCsp(loadCsp(Builder)); } // ===================================================================================================================== @@ -262,13 +226,11 @@ void CpsStackLowering::visitIntToPtrInst(llvm::IntToPtrInst &Int2Ptr) { // @param BC: the instruction void CpsStackLowering::visitBitCastInst(llvm::BitCastInst &BC) { Type *SrcTy = BC.getOperand(0)->getType(); - if (!SrcTy->isPointerTy() || - cast(SrcTy)->getAddressSpace() != lgc::cps::stackAddrSpace) + if (!SrcTy->isPointerTy() || cast(SrcTy)->getAddressSpace() != lgc::cps::stackAddrSpace) return; Type *DstTy = BC.getType(); - if (!DstTy->isPointerTy() || - cast(DstTy)->getAddressSpace() != lgc::cps::stackAddrSpace) + if (!DstTy->isPointerTy() || cast(DstTy)->getAddressSpace() != lgc::cps::stackAddrSpace) return; auto Values = TypeLower.getValue(BC.getOperand(0)); @@ -282,8 +244,7 @@ void CpsStackLowering::visitBitCastInst(llvm::BitCastInst &BC) { void CpsStackLowering::visitCpsAlloc(lgc::cps::AllocOp &AllocOp) { IRBuilder<> Builder(&AllocOp); - Value *VSP = - Builder.CreateLoad(CpsStackAlloca->getAllocatedType(), CpsStackAlloca); + Value *VSP = loadCsp(Builder); Value *Size = AllocOp.getSize(); int AlignedSize = cast(Size)->getSExtValue(); @@ -305,8 +266,7 @@ void CpsStackLowering::visitCpsAlloc(lgc::cps::AllocOp &AllocOp) { void CpsStackLowering::visitCpsFree(lgc::cps::FreeOp &FreeOp) { IRBuilder<> Builder(&FreeOp); - Value *VSP = - Builder.CreateLoad(CpsStackAlloca->getAllocatedType(), CpsStackAlloca); + Value *VSP = loadCsp(Builder); Value *Size = FreeOp.getSize(); int AlignedSize = cast(Size)->getSExtValue(); @@ -326,8 +286,7 @@ void CpsStackLowering::visitCpsFree(lgc::cps::FreeOp &FreeOp) { void CpsStackLowering::visitCpsPeek(lgc::cps::PeekOp &PeekOp) { IRBuilder<> Builder(&PeekOp); - auto *Ptr = - Builder.CreateLoad(CpsStackAlloca->getAllocatedType(), CpsStackAlloca); + auto *Ptr = loadCsp(Builder); auto *Size = PeekOp.getSize(); int ImmSize = cast(Size)->getSExtValue(); @@ -360,9 +319,7 @@ void CpsStackLowering::visitSetVsp(lgc::cps::SetVspOp &SetVsp) { // @param GetVsp: the instruction void CpsStackLowering::visitGetVsp(lgc::cps::GetVspOp &GetVsp) { IRBuilder<> B(&GetVsp); - - auto *Ptr = B.CreateLoad(CpsStackAlloca->getAllocatedType(), CpsStackAlloca); - TypeLower.replaceInstruction(&GetVsp, {Ptr}); + TypeLower.replaceInstruction(&GetVsp, {loadCsp(B)}); } // ===================================================================================================================== @@ -374,12 +331,10 @@ void CpsStackLowering::visitGetVsp(lgc::cps::GetVspOp &GetVsp) { // @param Offset: The offset to the base address, given as integer with bitwidth // <= 32. // -Value *CpsStackLowering::getRealMemoryAddress(IRBuilder<> &Builder, - Value *Offset) { +Value *CpsStackLowering::getRealMemoryAddress(IRBuilder<> &Builder, Value *Offset) { // Since we are using at most 32-bit offsets, assert that we don't put in any // offset larger 32 bit. - assert(Offset->getType()->isIntegerTy() && - Offset->getType()->getIntegerBitWidth() <= 32); + assert(Offset->getType()->isIntegerTy() && Offset->getType()->getIntegerBitWidth() <= 32); // Create a byte-addressed GEP the global memory address + offset or just the // offset. Note: Don't currently return a inttoptr because the translator @@ -389,8 +344,7 @@ Value *CpsStackLowering::getRealMemoryAddress(IRBuilder<> &Builder, Type *I8 = Builder.getInt8Ty(); if (isa(BasePointer)) { - GepBase = Builder.CreateIntToPtr( - Offset, I8->getPointerTo(getLoweredCpsStackAddrSpace())); + GepBase = Builder.CreateIntToPtr(Offset, I8->getPointerTo(getLoweredCpsStackAddrSpace())); GepIndex = Builder.getInt32(0); } @@ -404,9 +358,7 @@ Value *CpsStackLowering::getRealMemoryAddress(IRBuilder<> &Builder, // @param GetGlobalMemBase: Get the base address for the stack. // `nullptr` if there is no base address and the csp // can be converted with ptrtoint. -Function *CpsStackLowering::addOrInitCsp(Function *F, - Function *GetGlobalMemBase, - bool RequiresIncomingCsp) { +Function *CpsStackLowering::addOrInitCsp(Function *F, Function *GetGlobalMemBase, bool RequiresIncomingCsp) { CompilerUtils::CrossModuleInliner CrossInliner; auto &GpurtContext = lgc::GpurtContext::get(Mod->getContext()); auto &GpurtLibrary = GpurtContext.theModule ? *GpurtContext.theModule : *Mod; @@ -424,8 +376,7 @@ Function *CpsStackLowering::addOrInitCsp(Function *F, const size_t CspArgIndex = lgc::cps::isCpsFunction(*F) ? 1 : 0; NewArgTys.insert(NewArgTys.begin() + CspArgIndex, Builder.getInt32Ty()); - Function *NewFunc = CompilerUtils::mutateFunctionArguments( - *F, F->getReturnType(), NewArgTys, F->getAttributes()); + Function *NewFunc = CompilerUtils::mutateFunctionArguments(*F, F->getReturnType(), NewArgTys, F->getAttributes()); Argument *CspArg = NewFunc->getArg(CspArgIndex); CspArg->setName("cspInit"); @@ -443,14 +394,11 @@ Function *CpsStackLowering::addOrInitCsp(Function *F, F->eraseFromParent(); F = NewFunc; - } else if (lgc::rt::getLgcRtShaderStage(F) != - lgc::rt::RayTracingShaderStage::KernelEntry) { + } else if (lgc::rt::getLgcRtShaderStage(F) != lgc::rt::RayTracingShaderStage::KernelEntry) { // Init csp through intrinsic - auto *InitFun = - GpurtLibrary.getFunction(ContDriverFunc::GetContinuationStackAddrName); + auto *InitFun = GpurtLibrary.getFunction(ContDriverFunc::GetContinuationStackAddrName); assert(InitFun && "_cont_GetContinuationStackAddr not found."); - assert(InitFun->arg_size() == 0 && - InitFun->getReturnType()->isIntegerTy(32)); + assert(InitFun->arg_size() == 0 && InitFun->getReturnType()->isIntegerTy(32)); Initializer = CrossInliner.inlineCall(Builder, InitFun).returnValue; } @@ -461,10 +409,13 @@ Function *CpsStackLowering::addOrInitCsp(Function *F, // Get the global memory base address. if (GetGlobalMemBase) { auto *Base = CrossInliner.inlineCall(Builder, GetGlobalMemBase).returnValue; - auto *CspTy = - Builder.getInt8Ty()->getPointerTo(getLoweredCpsStackAddrSpace()); + auto *CspTy = Builder.getInt8Ty()->getPointerTo(getLoweredCpsStackAddrSpace()); setRealBasePointer(Builder.CreateIntToPtr(Base, CspTy)); } return F; } + +Value *CpsStackLowering::loadCsp(IRBuilder<> &Builder) { + return Builder.CreateLoad(CpsStackAlloca->getAllocatedType(), CpsStackAlloca); +} diff --git a/llvmraytracing/lib/DXILContIntrinsicPrepare.cpp b/llvmraytracing/lib/DXILContIntrinsicPrepare.cpp index b2af28f724..810cfe4197 100644 --- a/llvmraytracing/lib/DXILContIntrinsicPrepare.cpp +++ b/llvmraytracing/lib/DXILContIntrinsicPrepare.cpp @@ -10,8 +10,8 @@ * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * - * The above copyright notice and this permission notice shall be included in - *all copies or substantial portions of the Software. + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, @@ -32,9 +32,10 @@ // //===----------------------------------------------------------------------===// -#include "lgc/LgcRtDialect.h" +#include "compilerutils/ArgPromotion.h" #include "llvmraytracing/Continuations.h" #include "llvmraytracing/ContinuationsUtil.h" +#include "lgc/LgcRtDialect.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" @@ -47,7 +48,8 @@ using namespace llvm; #define DEBUG_TYPE "dxil-cont-intrinsic-prepare" -DXILContIntrinsicPreparePass::DXILContIntrinsicPreparePass() {} +DXILContIntrinsicPreparePass::DXILContIntrinsicPreparePass() { +} /// - Unmangle the function names to be more readable and to prevent confusion /// with app defined functions later. @@ -65,12 +67,9 @@ static Function *transformFunction(Function &F) { // Extract unmangled name auto Start = Name.find('?') + 1; auto End = Name.find('@', Start); - if (Start == 0 || End == StringRef::npos || Start > Name.size() || - End > Name.size()) { - report_fatal_error( - Twine( - "Failed to unmangle function name: Failed to extract from '") + - Name + "' (start: " + Twine(Start) + ", end: " + Twine(End) + ")"); + if (Start == 0 || End == StringRef::npos || Start > Name.size() || End > Name.size()) { + report_fatal_error(Twine("Failed to unmangle function name: Failed to extract from '") + Name + + "' (start: " + Twine(Start) + ", end: " + Twine(End) + ")"); } // Copy name, otherwise it will be deleted before it's set @@ -80,11 +79,9 @@ static Function *transformFunction(Function &F) { LLVM_DEBUG(dbgs() << " Set new name " << NewName << "\n"); if (NewName == ContDriverFunc::TraversalName) - lgc::rt::setLgcRtShaderStage(&F, - lgc::rt::RayTracingShaderStage::Traversal); + lgc::rt::setLgcRtShaderStage(&F, lgc::rt::RayTracingShaderStage::Traversal); else if (NewName == ContDriverFunc::KernelEntryName) - lgc::rt::setLgcRtShaderStage(&F, - lgc::rt::RayTracingShaderStage::KernelEntry); + lgc::rt::setLgcRtShaderStage(&F, lgc::rt::RayTracingShaderStage::KernelEntry); F.setName(NewName); } @@ -92,33 +89,30 @@ static Function *transformFunction(Function &F) { Type *NewRetTy = F.getReturnType(); Function *NewFn = &F; if (NewRetTy->isStructTy() && NewRetTy->getStructNumElements() == 1) { - if (F.getName().contains("ObjectToWorld4x3") || - F.getName().contains("WorldToObject4x3")) { - NewFn = unpackStructReturnType(NewFn); + if (F.getName().contains("ObjectToWorld4x3") || F.getName().contains("WorldToObject4x3")) { + NewFn = CompilerUtils::unpackStructReturnType(NewFn); } } // Lower `StructRet` argument. if (NewFn->hasStructRetAttr()) - NewFn = lowerStructRetArgument(NewFn); + NewFn = CompilerUtils::lowerStructRetArgument(NewFn); SmallBitVector PromotionMask(NewFn->arg_size()); StringRef NameStr = NewFn->getName(); for (unsigned ArgNo = 0; ArgNo < NewFn->arg_size(); ArgNo++) { auto *Arg = NewFn->getArg(ArgNo); - ContArgTy ArgTy = ContArgTy::get(NewFn, Arg); + TypedArgTy ArgTy = TypedArgTy::get(Arg); if (!ArgTy.isPointerTy()) continue; - if ((NameStr.contains("Await") || NameStr.contains("Enqueue") || - NameStr.contains("Traversal") || - (NameStr == ContDriverFunc::SetTriangleHitAttributesName && - ArgNo != 0))) + if ((NameStr.contains("Await") || NameStr.contains("Enqueue") || NameStr.contains("Traversal") || + (NameStr == ContDriverFunc::SetTriangleHitAttributesName && ArgNo != 0))) PromotionMask.set(ArgNo); } // Promote pointer arguments to their pointee value types. - NewFn = promotePointerArguments(NewFn, PromotionMask); + NewFn = CompilerUtils::promotePointerArguments(NewFn, PromotionMask); NewFn->addFnAttr(Attribute::AlwaysInline); // Set external linkage, so the functions don't get removed, even if they are @@ -167,7 +161,6 @@ static bool isUtilFunction(StringRef Name) { "RestoreSystemData", "SetI32", "SetTriangleHitAttributes", - "SetupRayGen", "TraceRay", "Traversal", "ShaderStart", @@ -181,8 +174,8 @@ static bool isUtilFunction(StringRef Name) { return false; } -llvm::PreservedAnalyses DXILContIntrinsicPreparePass::run( - llvm::Module &M, llvm::ModuleAnalysisManager &AnalysisManager) { +llvm::PreservedAnalyses DXILContIntrinsicPreparePass::run(llvm::Module &M, + llvm::ModuleAnalysisManager &AnalysisManager) { LLVM_DEBUG(dbgs() << "Run the dxil-cont-intrinsic-prepare pass\n"); AnalysisManager.getResult(M); diff --git a/llvmraytracing/lib/DXILContLgcRtOpConverter.cpp b/llvmraytracing/lib/DXILContLgcRtOpConverter.cpp index 654738ffcc..71bcb22e9b 100644 --- a/llvmraytracing/lib/DXILContLgcRtOpConverter.cpp +++ b/llvmraytracing/lib/DXILContLgcRtOpConverter.cpp @@ -10,8 +10,8 @@ * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * - * The above copyright notice and this permission notice shall be included in - *all copies or substantial portions of the Software. + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, @@ -29,9 +29,9 @@ // //===----------------------------------------------------------------------===// -#include "lgc/LgcRtDialect.h" #include "llvmraytracing/Continuations.h" #include "llvmraytracing/ContinuationsUtil.h" +#include "lgc/LgcRtDialect.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Instructions.h" @@ -51,19 +51,10 @@ namespace { using namespace llvm; /// An enum to simplify fetching the attributes from reportHit operations. -enum class ReportHitAttributeIndex { - THit = 1, - HitKind, - Attributes, - Count = Attributes -}; +enum class ReportHitAttributeIndex { THit = 1, HitKind, Attributes, Count = Attributes }; /// An enum to simplify fetching the attributes from callShader operations. -enum class CallShaderAttributeIndex { - ShaderIndex = 1, - Param = 2, - Count = Param -}; +enum class CallShaderAttributeIndex { ShaderIndex = 1, Param = 2, Count = Param }; /// An enum to simplify fetching the attributes from traceRay operations. enum class TraceRayAttributeIndex { @@ -85,8 +76,7 @@ enum class TraceRayAttributeIndex { Count = Payload }; -template -llvm::Value *getEnumArgOperand(llvm::CallInst &CI, T Index) { +template llvm::Value *getEnumArgOperand(llvm::CallInst &CI, T Index) { static_assert(std::is_enum() && "T must be an enum!"); llvm::Value *Arg = CI.getArgOperand(static_cast(Index)); @@ -94,9 +84,7 @@ llvm::Value *getEnumArgOperand(llvm::CallInst &CI, T Index) { return Arg; } -static void -analyzeShaderKinds(Module &M, - MapVector &ShaderKinds) { +static void analyzeShaderKinds(Module &M, MapVector &ShaderKinds) { auto *EntryPoints = M.getNamedMetadata("dx.entryPoints"); if (!EntryPoints) return; @@ -118,12 +106,10 @@ analyzeShaderKinds(Module &M, // Iterate through tag-value pairs for (size_t I = 0; I < Props->getNumOperands(); I += 2) { - auto Tag = - mdconst::extract(Props->getOperand(I))->getZExtValue(); + auto Tag = mdconst::extract(Props->getOperand(I))->getZExtValue(); if (Tag != 8) // kDxilShaderKindTag continue; - auto KindI = mdconst::extract(Props->getOperand(I + 1)) - ->getZExtValue(); + auto KindI = mdconst::extract(Props->getOperand(I + 1))->getZExtValue(); auto Kind = static_cast(KindI); ShaderKinds[F] = Kind; } @@ -138,45 +124,28 @@ namespace llvm { std::optional DXILContLgcRtOpConverterPass::getCallbackByOpName(StringRef OpName) { using namespace lgc::rt; -#define LGC_RT_CALLBACK_TABLE_TRY_GET_CALLBACK(Op, Callback) \ - if (OpName.starts_with(Op)) \ - return std::bind(&DXILContLgcRtOpConverterPass::Callback, this, \ - std::placeholders::_1); - - LGC_RT_CALLBACK_TABLE_TRY_GET_CALLBACK( - "acceptHitAndEndSearch", handleSimpleCall) - LGC_RT_CALLBACK_TABLE_TRY_GET_CALLBACK("ignoreHit", - handleSimpleCall) - LGC_RT_CALLBACK_TABLE_TRY_GET_CALLBACK("instanceID", - handleSimpleCall) - LGC_RT_CALLBACK_TABLE_TRY_GET_CALLBACK("instanceIndex", - handleSimpleCall) - LGC_RT_CALLBACK_TABLE_TRY_GET_CALLBACK("geometryIndex", - handleSimpleCall) +#define LGC_RT_CALLBACK_TABLE_TRY_GET_CALLBACK(Op, Callback) \ + if (OpName.starts_with(Op)) \ + return std::bind(&DXILContLgcRtOpConverterPass::Callback, this, std::placeholders::_1); + + LGC_RT_CALLBACK_TABLE_TRY_GET_CALLBACK("acceptHitAndEndSearch", handleSimpleCall) + LGC_RT_CALLBACK_TABLE_TRY_GET_CALLBACK("ignoreHit", handleSimpleCall) + LGC_RT_CALLBACK_TABLE_TRY_GET_CALLBACK("instanceID", handleSimpleCall) + LGC_RT_CALLBACK_TABLE_TRY_GET_CALLBACK("instanceIndex", handleSimpleCall) + LGC_RT_CALLBACK_TABLE_TRY_GET_CALLBACK("geometryIndex", handleSimpleCall) LGC_RT_CALLBACK_TABLE_TRY_GET_CALLBACK("hitKind", handleSimpleCall) - LGC_RT_CALLBACK_TABLE_TRY_GET_CALLBACK("primitiveIndex", - handleSimpleCall) - LGC_RT_CALLBACK_TABLE_TRY_GET_CALLBACK("rayFlags", - handleSimpleCall) + LGC_RT_CALLBACK_TABLE_TRY_GET_CALLBACK("primitiveIndex", handleSimpleCall) + LGC_RT_CALLBACK_TABLE_TRY_GET_CALLBACK("rayFlags", handleSimpleCall) LGC_RT_CALLBACK_TABLE_TRY_GET_CALLBACK("rayTMin", handleSimpleCall) - LGC_RT_CALLBACK_TABLE_TRY_GET_CALLBACK("rayTCurrent", - handleSimpleCall) - LGC_RT_CALLBACK_TABLE_TRY_GET_CALLBACK("objectRayDirection", - handleVecResult) - LGC_RT_CALLBACK_TABLE_TRY_GET_CALLBACK("objectRayOrigin", - handleVecResult) - LGC_RT_CALLBACK_TABLE_TRY_GET_CALLBACK( - "dispatchRaysDimensions", handleVecResult) - LGC_RT_CALLBACK_TABLE_TRY_GET_CALLBACK("dispatchRaysIndex", - handleVecResult) - LGC_RT_CALLBACK_TABLE_TRY_GET_CALLBACK("worldRayDirection", - handleVecResult) - LGC_RT_CALLBACK_TABLE_TRY_GET_CALLBACK("worldRayOrigin", - handleVecResult) - LGC_RT_CALLBACK_TABLE_TRY_GET_CALLBACK("objectToWorld", - handleMatrixResult) - LGC_RT_CALLBACK_TABLE_TRY_GET_CALLBACK("worldToObject", - handleMatrixResult) + LGC_RT_CALLBACK_TABLE_TRY_GET_CALLBACK("rayTCurrent", handleSimpleCall) + LGC_RT_CALLBACK_TABLE_TRY_GET_CALLBACK("objectRayDirection", handleVecResult) + LGC_RT_CALLBACK_TABLE_TRY_GET_CALLBACK("objectRayOrigin", handleVecResult) + LGC_RT_CALLBACK_TABLE_TRY_GET_CALLBACK("dispatchRaysDimensions", handleVecResult) + LGC_RT_CALLBACK_TABLE_TRY_GET_CALLBACK("dispatchRaysIndex", handleVecResult) + LGC_RT_CALLBACK_TABLE_TRY_GET_CALLBACK("worldRayDirection", handleVecResult) + LGC_RT_CALLBACK_TABLE_TRY_GET_CALLBACK("worldRayOrigin", handleVecResult) + LGC_RT_CALLBACK_TABLE_TRY_GET_CALLBACK("objectToWorld", handleMatrixResult) + LGC_RT_CALLBACK_TABLE_TRY_GET_CALLBACK("worldToObject", handleMatrixResult) LGC_RT_CALLBACK_TABLE_TRY_GET_CALLBACK("traceRay", handleTraceRayOp) LGC_RT_CALLBACK_TABLE_TRY_GET_CALLBACK("reportHit", handleReportHitOp) LGC_RT_CALLBACK_TABLE_TRY_GET_CALLBACK("callShader", handleCallShaderOp) @@ -188,8 +157,7 @@ DXILContLgcRtOpConverterPass::getCallbackByOpName(StringRef OpName) { /// Handle a simple call without any arguments, replace the uses with the new /// op. -template -Value *DXILContLgcRtOpConverterPass::handleSimpleCall(CallInst &CI) { +template Value *DXILContLgcRtOpConverterPass::handleSimpleCall(CallInst &CI) { static_assert(std::is_base_of()); Builder->SetInsertPoint(&CI); @@ -198,27 +166,20 @@ Value *DXILContLgcRtOpConverterPass::handleSimpleCall(CallInst &CI) { /// Create a lgc.rt.trace.ray op from a dx.op.traceRay call. Value *DXILContLgcRtOpConverterPass::handleTraceRayOp(CallInst &CI) { - assert(CI.arg_size() >= - static_cast(TraceRayAttributeIndex::Count) && - "Invalid argument size!"); + assert(CI.arg_size() >= static_cast(TraceRayAttributeIndex::Count) && "Invalid argument size!"); Builder->SetInsertPoint(&CI); - Value *AccelStructHandle = - getEnumArgOperand(CI, TraceRayAttributeIndex::AccelStruct); + Value *AccelStructHandle = getEnumArgOperand(CI, TraceRayAttributeIndex::AccelStruct); Value *RayFlags = getEnumArgOperand(CI, TraceRayAttributeIndex::RayFlags); - Value *InstanceInclusionMask = - getEnumArgOperand(CI, TraceRayAttributeIndex::InstanceInclusionMask); - Value *RayContributionToHitGroupIndex = getEnumArgOperand( - CI, TraceRayAttributeIndex::RayContributionToHitGroupIndex); - Value *MultiplierForGeometryContribution = getEnumArgOperand( - CI, TraceRayAttributeIndex::MultiplierForGeometryContribution); - Value *MissShaderIndex = - getEnumArgOperand(CI, TraceRayAttributeIndex::MissShaderIndex); - Value *Origin = - createVec3(getEnumArgOperand(CI, TraceRayAttributeIndex::OriginX), - getEnumArgOperand(CI, TraceRayAttributeIndex::OriginY), - getEnumArgOperand(CI, TraceRayAttributeIndex::OriginZ)); + Value *InstanceInclusionMask = getEnumArgOperand(CI, TraceRayAttributeIndex::InstanceInclusionMask); + Value *RayContributionToHitGroupIndex = getEnumArgOperand(CI, TraceRayAttributeIndex::RayContributionToHitGroupIndex); + Value *MultiplierForGeometryContribution = + getEnumArgOperand(CI, TraceRayAttributeIndex::MultiplierForGeometryContribution); + Value *MissShaderIndex = getEnumArgOperand(CI, TraceRayAttributeIndex::MissShaderIndex); + Value *Origin = createVec3(getEnumArgOperand(CI, TraceRayAttributeIndex::OriginX), + getEnumArgOperand(CI, TraceRayAttributeIndex::OriginY), + getEnumArgOperand(CI, TraceRayAttributeIndex::OriginZ)); Value *TMin = getEnumArgOperand(CI, TraceRayAttributeIndex::TMin); Value *Dir = createVec3(getEnumArgOperand(CI, TraceRayAttributeIndex::DirX), getEnumArgOperand(CI, TraceRayAttributeIndex::DirY), @@ -226,28 +187,21 @@ Value *DXILContLgcRtOpConverterPass::handleTraceRayOp(CallInst &CI) { Value *TMax = getEnumArgOperand(CI, TraceRayAttributeIndex::TMax); Value *Payload = getEnumArgOperand(CI, TraceRayAttributeIndex::Payload); - Function *AccelStructGetter = - getAccelStructAddr(*CI.getModule(), AccelStructHandle->getType()); - Value *AccelStructAddr = - Builder->CreateCall(AccelStructGetter, AccelStructHandle); + Function *AccelStructGetter = getAccelStructAddr(*CI.getModule(), AccelStructHandle->getType()); + Value *AccelStructAddr = Builder->CreateCall(AccelStructGetter, AccelStructHandle); // TODO: This only creates a Paq array with the size of the payload data for // now. - Type *PaqTy = getFuncArgPtrElementType( - CI.getCalledFunction(), - static_cast(TraceRayAttributeIndex::Payload)); + Type *PaqTy = getFuncArgPtrElementType(CI.getCalledFunction(), static_cast(TraceRayAttributeIndex::Payload)); SmallVector PaqArgs; if (PaqTy) - PaqArgs.push_back(ConstantInt::get( - Builder->getInt32Ty(), DL->getTypeAllocSize(PaqTy).getKnownMinValue())); + PaqArgs.push_back(ConstantInt::get(Builder->getInt32Ty(), DL->getTypeAllocSize(PaqTy).getKnownMinValue())); - Constant *PaqArr = - ConstantArray::get(ArrayType::get(Builder->getInt32Ty(), 1), PaqArgs); + Constant *PaqArr = ConstantArray::get(ArrayType::get(Builder->getInt32Ty(), 1), PaqArgs); - auto *Op = Builder->create( - AccelStructAddr, RayFlags, InstanceInclusionMask, - RayContributionToHitGroupIndex, MultiplierForGeometryContribution, - MissShaderIndex, Origin, TMin, Dir, TMax, Payload, PaqArr); + auto *Op = Builder->create(AccelStructAddr, RayFlags, InstanceInclusionMask, + RayContributionToHitGroupIndex, MultiplierForGeometryContribution, + MissShaderIndex, Origin, TMin, Dir, TMax, Payload, PaqArr); addDXILPayloadTypeToCall(*CI.getCalledFunction(), *Op); @@ -256,21 +210,16 @@ Value *DXILContLgcRtOpConverterPass::handleTraceRayOp(CallInst &CI) { /// Create a lgc.rt.report.hit op from a dx.op.reportHit call. Value *DXILContLgcRtOpConverterPass::handleReportHitOp(CallInst &CI) { - assert(CI.arg_size() >= - static_cast(ReportHitAttributeIndex::Count) && - "Invalid argument size!"); + assert(CI.arg_size() >= static_cast(ReportHitAttributeIndex::Count) && "Invalid argument size!"); Builder->SetInsertPoint(&CI); Value *THit = getEnumArgOperand(CI, ReportHitAttributeIndex::THit); Value *HitKind = getEnumArgOperand(CI, ReportHitAttributeIndex::HitKind); - Value *Attributes = - getEnumArgOperand(CI, ReportHitAttributeIndex::Attributes); - auto AttributeSizeBytes = DL->getTypeAllocSize(getFuncArgPtrElementType( - CI.getCalledFunction(), - static_cast(ReportHitAttributeIndex::Attributes))); + Value *Attributes = getEnumArgOperand(CI, ReportHitAttributeIndex::Attributes); + auto AttributeSizeBytes = DL->getTypeAllocSize( + getFuncArgPtrElementType(CI.getCalledFunction(), static_cast(ReportHitAttributeIndex::Attributes))); - auto *Op = Builder->create(THit, HitKind, Attributes, - AttributeSizeBytes); + auto *Op = Builder->create(THit, HitKind, Attributes, AttributeSizeBytes); addDXILPayloadTypeToCall(*CI.getCalledFunction(), *Op); @@ -279,21 +228,16 @@ Value *DXILContLgcRtOpConverterPass::handleReportHitOp(CallInst &CI) { /// Create a lgc.rt.call.callable.shader op from a dx.op.callShader call. Value *DXILContLgcRtOpConverterPass::handleCallShaderOp(CallInst &CI) { - assert(CI.arg_size() >= - static_cast(CallShaderAttributeIndex::Count) && - "Invalid argument size!"); + assert(CI.arg_size() >= static_cast(CallShaderAttributeIndex::Count) && "Invalid argument size!"); Builder->SetInsertPoint(&CI); - Value *ShaderIndex = - getEnumArgOperand(CI, CallShaderAttributeIndex::ShaderIndex); + Value *ShaderIndex = getEnumArgOperand(CI, CallShaderAttributeIndex::ShaderIndex); Value *Param = getEnumArgOperand(CI, CallShaderAttributeIndex::Param); - auto ParamSizeBytes = DL->getTypeAllocSize(getFuncArgPtrElementType( - CI.getCalledFunction(), - static_cast(CallShaderAttributeIndex::Param))); + auto ParamSizeBytes = DL->getTypeAllocSize( + getFuncArgPtrElementType(CI.getCalledFunction(), static_cast(CallShaderAttributeIndex::Param))); - auto *Op = Builder->create( - ShaderIndex, Param, ParamSizeBytes.getKnownMinValue()); + auto *Op = Builder->create(ShaderIndex, Param, ParamSizeBytes.getKnownMinValue()); addDXILPayloadTypeToCall(*CI.getCalledFunction(), *Op); @@ -307,20 +251,17 @@ Value *DXILContLgcRtOpConverterPass::handleCallShaderOp(CallInst &CI) { /// sequence: /// %val = call lgc.rt.op(...) /// %extract.index = extractelement %val, arrayIndex -template -Value *DXILContLgcRtOpConverterPass::handleVecResult(CallInst &CI) { +template Value *DXILContLgcRtOpConverterPass::handleVecResult(CallInst &CI) { static_assert(std::is_base_of()); constexpr int ArrayIndexArgPosition = 1; - assert(CI.getNumOperands() > ArrayIndexArgPosition && - "Invalid number of operands!"); + assert(CI.getNumOperands() > ArrayIndexArgPosition && "Invalid number of operands!"); Value *Index = CI.getOperand(ArrayIndexArgPosition); if (!Index) { - report_fatal_error( - "DXILContLgcRtOpConverterPass::handleVecResult: Invalid operand index " - "at position " + - Twine(ArrayIndexArgPosition)); + report_fatal_error("DXILContLgcRtOpConverterPass::handleVecResult: Invalid operand index " + "at position " + + Twine(ArrayIndexArgPosition)); } if (auto *Constant = dyn_cast(Index)) { @@ -328,16 +269,13 @@ Value *DXILContLgcRtOpConverterPass::handleVecResult(CallInst &CI) { if (ElementIndex >= MaxElements) { report_fatal_error("DXILContLgcRtOpConverterPass::handleVecResult: " "Operand at position " + - Twine(ArrayIndexArgPosition) + - " is out of bounds (max: " + Twine(MaxElements) + - ")!"); + Twine(ArrayIndexArgPosition) + " is out of bounds (max: " + Twine(MaxElements) + ")!"); } } Builder->SetInsertPoint(&CI); Value *DialectOp = Builder->create(); - return Builder->CreateExtractElement(DialectOp, Index, - DialectOp->getName() + "extract"); + return Builder->CreateExtractElement(DialectOp, Index, DialectOp->getName() + "extract"); } /// Helper to convert single-value matrix operations from DXIL to matrix return @@ -359,12 +297,9 @@ Value *DXILContLgcRtOpConverterPass::handleMatrixResult(CallInst &CI) { constexpr unsigned RowArgumentIndex = 1; constexpr unsigned ColumnArgumentIndex = 2; - assert(CI.getNumOperands() > - std::max(ColumnArgumentIndex, RowArgumentIndex) && - "Invalid number of operands!"); + assert(CI.getNumOperands() > std::max(ColumnArgumentIndex, RowArgumentIndex) && "Invalid number of operands!"); - auto TryExtractIndexOperand = [&](unsigned ArgumentIndex, - unsigned UpperBound) -> Value * { + auto TryExtractIndexOperand = [&](unsigned ArgumentIndex, unsigned UpperBound) -> Value * { Value *Index = CI.getOperand(ArgumentIndex); if (!Index) { report_fatal_error("DXILContLgcRtOpConverterPass::handleMatrixResult: " @@ -378,10 +313,8 @@ Value *DXILContLgcRtOpConverterPass::handleMatrixResult(CallInst &CI) { if (ConstantIndex >= UpperBound) { report_fatal_error("DXILContLgcRtOpConverterPass::handleMatrixResult: " "Operand with value " + - Twine(ConstantIndex) + - " is out of bounds (upper bound: " + - Twine(UpperBound) + ", xMax, yMax = (" + - Twine(MaxColumns) + ", " + Twine(MaxRows) + "))!"); + Twine(ConstantIndex) + " is out of bounds (upper bound: " + Twine(UpperBound) + + ", xMax, yMax = (" + Twine(MaxColumns) + ", " + Twine(MaxRows) + "))!"); } } @@ -397,51 +330,39 @@ Value *DXILContLgcRtOpConverterPass::handleMatrixResult(CallInst &CI) { { IRBuilder<>::InsertPointGuard Guard(*Builder); - Builder->SetInsertPoint( - &*CI.getFunction()->getEntryBlock().getFirstNonPHIOrDbgOrAlloca()); + Builder->SetInsertPoint(&*CI.getFunction()->getEntryBlock().getFirstNonPHIOrDbgOrAlloca()); Alloca = Builder->CreateAlloca(DialectOp->getType()); } Builder->CreateStore(DialectOp, Alloca); - Value *InnerVecGEP = Builder->CreateGEP( - DialectOp->getType(), Alloca, {Builder->getInt32(0), Column}, "col.gep"); - Value *InnerVecLoad = Builder->CreateLoad( - DialectOp->getType()->getArrayElementType(), InnerVecGEP, "col.gep.load"); - return Builder->CreateExtractElement(InnerVecLoad, Row, - InnerVecLoad->getName() + ".row"); + Value *InnerVecGEP = Builder->CreateGEP(DialectOp->getType(), Alloca, {Builder->getInt32(0), Column}, "col.gep"); + Value *InnerVecLoad = Builder->CreateLoad(DialectOp->getType()->getArrayElementType(), InnerVecGEP, "col.gep.load"); + return Builder->CreateExtractElement(InnerVecLoad, Row, InnerVecLoad->getName() + ".row"); } /// Helper to create a vec3 from three elements. Value *DXILContLgcRtOpConverterPass::createVec3(Value *X, Value *Y, Value *Z) { - assert( - X->getType() == Y->getType() && - "DXILContLgcRtOpConverterPass::createVec3: Invalid types for X and Y!"); - assert( - X->getType() == Z->getType() && - "DXILContLgcRtOpConverterPass::createVec3: Invalid types for X and Z!"); - - auto *Vec = Builder->CreateInsertElement( - FixedVectorType::get(X->getType(), 3), X, static_cast(0)); + assert(X->getType() == Y->getType() && "DXILContLgcRtOpConverterPass::createVec3: Invalid types for X and Y!"); + assert(X->getType() == Z->getType() && "DXILContLgcRtOpConverterPass::createVec3: Invalid types for X and Z!"); + + auto *Vec = Builder->CreateInsertElement(FixedVectorType::get(X->getType(), 3), X, static_cast(0)); Vec = Builder->CreateInsertElement(Vec, Y, 1); return Builder->CreateInsertElement(Vec, Z, 2); } /// Helper to add the type of the DXIL payload to the lgc.rt callsite if it does /// not exist. -void DXILContLgcRtOpConverterPass::addDXILPayloadTypeToCall(Function &DXILFunc, - CallInst &CI) { +void DXILContLgcRtOpConverterPass::addDXILPayloadTypeToCall(Function &DXILFunc, CallInst &CI) { // This should not happen theoretically. if (DXILFunc.arg_empty()) { - report_fatal_error( - "DXILContLgcRtOpConverter::addDXILPayloadTypeToCall: DXIL " - "function " + - DXILFunc.getName() + " has no arguments.\n"); + report_fatal_error("DXILContLgcRtOpConverter::addDXILPayloadTypeToCall: DXIL " + "function " + + DXILFunc.getName() + " has no arguments.\n"); } auto *PayloadPtr = DXILFunc.getArg(DXILFunc.arg_size() - 1); - auto *PayloadPtrTy = - ContArgTy::get(&DXILFunc, PayloadPtr).getPointerElementType(); + auto *PayloadPtrTy = TypedArgTy::get(PayloadPtr).getPointerElementType(); // Store a poison value as metadata with the given type. ContHelper::setPayloadTypeMetadata(&CI, PayloadPtrTy); @@ -456,8 +377,7 @@ bool DXILContLgcRtOpConverterPass::convertDxOp(Function &Func) { StringRef OpName = FuncName.substr(std::strlen(CalleePrefix)); assert(!OpName.empty() && "Invalid op name"); - LLVM_DEBUG(dbgs() << "DXILContLgcRtOpConverter: Handling operation dx.op." - << OpName << '\n'); + LLVM_DEBUG(dbgs() << "DXILContLgcRtOpConverter: Handling operation dx.op." << OpName << '\n'); // Try to find the corresponding callback by the OpName. auto Callback = getCallbackByOpName(OpName); @@ -472,9 +392,8 @@ bool DXILContLgcRtOpConverterPass::convertDxOp(Function &Func) { Value *NewOp = (*Callback)(*CI, this); if (!NewOp) - report_fatal_error( - "DXILContLgcRtOpConverterPass::visitFunction: unexpected " - "nullptr when trying to replace instruction!"); + report_fatal_error("DXILContLgcRtOpConverterPass::visitFunction: unexpected " + "nullptr when trying to replace instruction!"); if (CI->hasName()) NewOp->takeName(CI); @@ -523,10 +442,8 @@ bool DXILContLgcRtOpConverterPass::prepareEntryPointShaders() { case DXILShaderKind::Callable: { Type *PayloadTy = getFuncArgPtrElementType(Func, 0); assert(PayloadTy && "Shader must have a payload argument"); - Func->setMetadata( - ContHelper::MDContPayloadTyName, - MDNode::get(Func->getContext(), - {ConstantAsMetadata::get(PoisonValue::get(PayloadTy))})); + Func->setMetadata(ContHelper::MDContPayloadTyName, + MDNode::get(Func->getContext(), {ConstantAsMetadata::get(PoisonValue::get(PayloadTy))})); break; } default: @@ -536,9 +453,7 @@ bool DXILContLgcRtOpConverterPass::prepareEntryPointShaders() { return Changed; } -PreservedAnalyses -DXILContLgcRtOpConverterPass::run(Module &Module, - ModuleAnalysisManager &AnalysisManager) { +PreservedAnalyses DXILContLgcRtOpConverterPass::run(Module &Module, ModuleAnalysisManager &AnalysisManager) { LLVM_DEBUG(dbgs() << "Run the pass dxil-cont-lgc-rt-op-converter\n"); AnalysisManager.getResult(Module); diff --git a/llvmraytracing/lib/DXILContPostProcess.cpp b/llvmraytracing/lib/DXILContPostProcess.cpp index ae434696fc..279dcda750 100644 --- a/llvmraytracing/lib/DXILContPostProcess.cpp +++ b/llvmraytracing/lib/DXILContPostProcess.cpp @@ -10,8 +10,8 @@ * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * - * The above copyright notice and this permission notice shall be included in - *all copies or substantial portions of the Software. + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, @@ -31,21 +31,18 @@ // * Wraps all uses of function pointers into an intrinsic that adds // metadata (e.g. VGPR counts) to the function pointer. // -// The addrspace(20) globals that represent registers are sorted by this pass -// and replaced with indices into a single @REGISTERS global. -// //===----------------------------------------------------------------------===// #include "compilerutils/CompilerUtils.h" -#include "lgc/LgcCpsDialect.h" -#include "lgc/LgcIlCpsDialect.h" -#include "lgc/LgcRtDialect.h" #include "llpc/GpurtEnums.h" -#include "llvm-dialects/Dialect/Builder.h" #include "llvmraytracing/Continuations.h" #include "llvmraytracing/ContinuationsUtil.h" #include "llvmraytracing/CpsStackLowering.h" #include "llvmraytracing/GpurtContext.h" +#include "lgc/LgcCpsDialect.h" +#include "lgc/LgcIlCpsDialect.h" +#include "lgc/LgcRtDialect.h" +#include "llvm-dialects/Dialect/Builder.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" @@ -67,25 +64,6 @@ using namespace llvm; #define DEBUG_TYPE "dxil-cont-post-process" -static cl::opt ReportContStateSizes( - "report-cont-state-sizes", - cl::desc("Report continuation state sizes for entry functions."), - cl::init(false)); - -static cl::opt ReportPayloadRegisterSizes( - "report-payload-register-sizes", - cl::desc("Report payload VGPR sizes for functions."), cl::init(false)); - -static cl::opt ReportSystemDataSizes( - "report-system-data-sizes", - cl::desc("Report incoming system data sizes for functions."), - cl::init(false)); - -static cl::opt ReportAllSizes( - "report-all-continuation-sizes", - cl::desc("Report continuation state, payload and system data sizes."), - cl::init(false)); - namespace { class DXILContPostProcessPassImpl final { public: @@ -106,144 +84,48 @@ class DXILContPostProcessPassImpl final { private: void lowerGetResumePointAddr(Function &F); - void handleRegisterBufferGetPointer(Function &F, GlobalVariable *Payload); - void handleValueI32Count(Function &F); - void handleValueGetI32(Function &F); - void handleValueSetI32(Function &F); void handleContStackIntrinsic(FunctionAnalysisManager &FAM, Function &F); void initializeProcessableFunctionData(); - bool handleRegisterBufferCalls(); bool replaceIntrinsicCalls(Function &F, const FunctionData &Data); bool handleIntrinsicCalls(llvm::ModuleAnalysisManager &AnalysisManager); bool lowerCpsOps(); void lowerJumpOp(lgc::cps::JumpOp &JumpOp); - bool unfoldGlobals(); bool handleAmdInternals(); + bool cleanupIncomingPayloadMetadata(Function &F); + bool cleanupOutgoingPayloadMetadata(); Module *Mod; Module *GpurtLibrary; - GlobalVariable *Registers; MapVector ToProcess; llvm_dialects::Builder Builder; std::optional StackAddrspace; std::optional StackLowering; CompilerUtils::CrossModuleInliner CrossInliner; - // For performance reasons, we keep this list of continuation.{wait}Continue - // calls here and update it when required. - SmallVector ContinueCalls; Function *GetGlobalMemBase = nullptr; }; -// Collects all calls to continuation.[wait]continue -static void collectContinueCalls(const Module &Mod, - SmallVectorImpl &ContinueCalls) { - for (const auto &Name : - {"continuation.continue", "continuation.waitContinue"}) { - auto *Func = Mod.getFunction(Name); - if (!Func) - continue; - - llvm::forEachCall( - *Func, [&](CallInst &CInst) { ContinueCalls.push_back(&CInst); }); - } -} - -static void reportContStateSizes(Module &M) { - // Determine the set of entry functions which have a continuation function - // We cannot rely on the state size for this, because functions without a - // continuation (e.g. a non-recursive CHS) have a state size of 0 in metadata. - SetVector EntriesWithContinuationFunctions; - for (auto &F : M.functions()) { - if (F.isDeclaration()) - continue; - if (auto *MD = dyn_cast_or_null( - F.getMetadata(ContHelper::MDContinuationName))) { - auto *EntryF = extractFunctionOrNull(MD->getOperand(0)); - if (EntryF != &F) - EntriesWithContinuationFunctions.insert(EntryF); - } - } - - for (auto *F : EntriesWithContinuationFunctions) { - auto Stage = lgc::rt::getLgcRtShaderStage(F); - if (!Stage || F->isDeclaration()) - continue; - - auto OptStateSize = ContHelper::tryGetContinuationStateByteCount(*F); - if (!OptStateSize.has_value()) - continue; - - DXILShaderKind ShaderKind = - ShaderStageHelper::rtShaderStageToDxilShaderKind(*Stage); - dbgs() << "Continuation state size of \"" << F->getName() << "\" (" - << ShaderKind << "): " << OptStateSize.value() << " bytes\n"; - } -} - -// For every function with incoming or outgoing (or both) payload registers, -// report the incoming size and the max outgoing size in bytes. -static void reportPayloadSizes(Module &M, ArrayRef ContinueCalls) { - // For every function with continue calls, determine the max number of - // outgoing registers - DenseMap MaxOutgoingRegisterCounts; - - for (auto *CallInst : ContinueCalls) { - auto RegCount = ContHelper::tryGetOutgoingRegisterCount(CallInst).value(); - MaxOutgoingRegisterCounts[CallInst->getFunction()] = - std::max(MaxOutgoingRegisterCounts[CallInst->getFunction()], RegCount); - } - - for (auto &F : M) { - auto Stage = lgc::rt::getLgcRtShaderStage(&F); - if (!Stage || F.isDeclaration()) - continue; - - DXILShaderKind ShaderKind = - ShaderStageHelper::rtShaderStageToDxilShaderKind(*Stage); - auto OptIncomingPayloadRegisterCount = - ContHelper::tryGetIncomingRegisterCount(&F); - bool HasIncomingPayload = OptIncomingPayloadRegisterCount.has_value(); - auto It = MaxOutgoingRegisterCounts.find(&F); - bool HasOutgoingPayload = (It != MaxOutgoingRegisterCounts.end()); - - if (!HasIncomingPayload && !HasOutgoingPayload) - continue; +// Removes outgoing payload metadata +bool DXILContPostProcessPassImpl::cleanupOutgoingPayloadMetadata() { + struct State { + bool Changed = false; + }; - dbgs() << "Incoming and max outgoing payload VGPR size of \"" << F.getName() - << "\" (" << ShaderKind << "): "; - if (HasIncomingPayload) { - dbgs() << OptIncomingPayloadRegisterCount.value() * RegisterBytes; - } else { - dbgs() << "(no incoming payload)"; - } - dbgs() << " and "; - if (HasOutgoingPayload) { - dbgs() << It->second * RegisterBytes; - } else { - dbgs() << "(no outgoing payload)"; - } - dbgs() << " bytes\n"; - } -} + static const auto Visitor = + llvm_dialects::VisitorBuilder() + .addSet([](State &State, Instruction &Op) { + ContHelper::OutgoingRegisterCount::reset(&Op); + ContHelper::ReturnedRegisterCount::reset(&Op); + State.Changed = true; + }) + .build(); -static void reportSystemDataSizes( - Module &M, - const MapVector - &FunctionData) { - for (const auto &[F, FuncData] : FunctionData) { - if (FuncData.SystemDataTy == nullptr) - continue; - auto SystemDataBytes = - M.getDataLayout().getTypeStoreSize(FuncData.SystemDataTy); + State S; + Visitor.visit(S, *Mod); - dbgs() << "Incoming system data of \"" << F->getName() << "\" (" - << FuncData.Kind << ") is \"" - << FuncData.SystemDataTy->getStructName() - << "\", size: " << SystemDataBytes << " bytes\n"; - } + return S.Changed; } static Function *getContinuationGetAddrAndMD(Module &M) { @@ -259,74 +141,29 @@ static Function *getContinuationGetAddrAndMD(Module &M) { /// Checks some properties guaranteed for a module containing continuations /// as expected by the backend. -[[maybe_unused]] static void -checkContinuationsModule(const Module &M, - const SmallVectorImpl &ContinueCalls) { - // Check that all continuation.continue calls have registercount metadata. - for (auto *CallInst : ContinueCalls) { - if (!ContHelper::tryGetOutgoingRegisterCount(CallInst)) - report_fatal_error("Missing registercount metadata on continue call!"); - } - - // Check that every function has at most one setLocalRootIndex call. - if (auto *SetF = M.getFunction("amd.dx.setLocalRootIndex")) { - SmallDenseSet HasSetF; - - llvm::forEachCall(*SetF, [&](CallInst &CInst) { - // Returns true if it is a new value - auto Inserted = HasSetF.insert(CInst.getFunction()); - if (!Inserted.second) - report_fatal_error( - "Found a function with more than one setLocalRootIndex"); - }); - } - +[[maybe_unused]] static void checkContinuationsModule(const Module &M) { // Check that resume functions do not have a stack size set. for (auto &Func : M) { - if (auto *MD = dyn_cast_or_null( - Func.getMetadata(ContHelper::MDContinuationName))) { + if (auto *MD = dyn_cast_or_null(Func.getMetadata(ContHelper::MDContinuationName))) { auto *StartFunc = extractFunctionOrNull(MD->getOperand(0)); bool IsStart = (&Func == StartFunc); - bool HasStackSizeMetadata = - ContHelper::tryGetStackSize(&Func).has_value(); + bool HasStackSizeMetadata = ContHelper::StackSize::tryGetValue(&Func).has_value(); if (!IsStart && HasStackSizeMetadata) report_fatal_error("Found resume function with stack size metadata!"); } } } -/// Replace a global with a part of another global. -/// Helper method for merging multiple globals into one. -static void replaceGlobal(const DataLayout &DL, GlobalVariable *Registers, - GlobalVariable *G, uint64_t Offset) { - LLVM_DEBUG(dbgs() << "Offset for global " << G->getName() - << " in @REGISTERS: " << (Offset / RegisterBytes) << "\n"); - - auto *I64 = Type::getInt64Ty(G->getContext()); - SmallVector Indices = { - ConstantInt::get(I64, 0), ConstantInt::get(I64, Offset / RegisterBytes)}; - Constant *Gep = Offset == 0 - ? Registers - : ConstantExpr::getInBoundsGetElementPtr( - Registers->getValueType(), Registers, Indices); - auto *Repl = ConstantExpr::getBitCast(Gep, G->getType()); - - G->replaceAllUsesWith(Repl); - G->eraseFromParent(); -} - void DXILContPostProcessPassImpl::lowerGetResumePointAddr(Function &F) { auto *GetResumePointAddr = &F; - assert(GetResumePointAddr->getReturnType()->isIntegerTy(64) && - GetResumePointAddr->arg_size() == 0); + assert(GetResumePointAddr->getReturnType()->isIntegerTy(64) && GetResumePointAddr->arg_size() == 0); // Search calls to GetResumePointAddr, and lower it to the argument of the // next continue call. Then remove it from that continue call. for (auto &Use : make_early_inc_range(GetResumePointAddr->uses())) { auto *CInst = dyn_cast(Use.getUser()); - if (!CInst || !CInst->isCallee(&Use) || - ToProcess.count(CInst->getFunction()) == 0) { + if (!CInst || !CInst->isCallee(&Use) || ToProcess.count(CInst->getFunction()) == 0) { // Non-call use, or call in unknown function. This will likely result in a // remaining non-lowered call reported as error at the end of this // function. @@ -344,27 +181,27 @@ void DXILContPostProcessPassImpl::lowerGetResumePointAddr(Function &F) { } auto *ContinueCall = *FoundContinueCall; - // Only used for non-cps functions. unsigned ReturnAddrArgNum = 1; Value *ReturnAddr = nullptr; - if (auto *Jump = dyn_cast(ContinueCall); Jump) { - ReturnAddr = Jump->getTarget(); + if (auto *Jump = dyn_cast(ContinueCall)) { + ReturnAddrArgNum = 3; + ReturnAddr = *Jump->getTail().begin(); } else { - auto Name = ContinueCall->getCalledFunction()->getName(); - - if (Name != "continuation.continue" && - Name != "continuation.waitContinue") + if (!isa(ContinueCall)) report_fatal_error("The BB must end in a continue call after a " "GetResumePointAddr"); - bool HasWaitMask = Name == "continuation.waitContinue"; - ReturnAddrArgNum = HasWaitMask ? 2 : 1; + if (auto *WaitContinue = dyn_cast(ContinueCall)) { + ReturnAddr = WaitContinue->getReturnAddr(); + ReturnAddrArgNum = 2; + } else { + ReturnAddr = cast(ContinueCall)->getReturnAddr(); + } + // Move up computation of the resume address - ReturnAddr = ContinueCall->getArgOperand(ReturnAddrArgNum); - assert((ReturnAddr->getType() == Builder.getInt64Ty()) && - "Unexpected return addr type!"); + assert((ReturnAddr->getType() == Builder.getInt64Ty()) && "Unexpected return addr type!"); } SmallVector MoveInstrs; @@ -388,7 +225,7 @@ void DXILContPostProcessPassImpl::lowerGetResumePointAddr(Function &F) { CInst->replaceAllUsesWith(ReturnAddr); - // Re-create the continuation.continue call without the return address + // Re-create the lgc.ilcps.continue / lgc.cps.jump call without the return address // argument. SmallVector Args; for (unsigned I = 0; I < ContinueCall->arg_size(); I++) { @@ -405,89 +242,9 @@ void DXILContPostProcessPassImpl::lowerGetResumePointAddr(Function &F) { } } -void DXILContPostProcessPassImpl::handleRegisterBufferGetPointer( - Function &F, GlobalVariable *Payload) { - // Check calls that take the payload as argument - llvm::forEachCall(F, [&](CallInst &CInst) { - if (isCastGlobal(Payload, CInst.getOperand(0))) { - // Replace call with first part of payload - static_assert(FirstPayloadMemoryPointerRegister == 0, - "Need to adjust offset here"); - Builder.SetInsertPoint(&CInst); - - Type *StackOffsetTy = CInst.getType(); - - // Load an addrspace(32) pointer from the payload global and let stack - // handling do the conversion into adds/muls and GEPs. - auto *CastPayload = Builder.CreateBitOrPointerCast( - Payload, StackOffsetTy->getPointerTo(Payload->getAddressSpace())); - Value *Ptr = Builder.CreateLoad(StackOffsetTy, CastPayload); - CInst.replaceAllUsesWith(Ptr); - CInst.eraseFromParent(); - } - }); -} - -void DXILContPostProcessPassImpl::handleValueI32Count(Function &F) { - assert(F.arg_size() == 1 - // i32 count - && F.getFunctionType()->getReturnType()->isIntegerTy(32) - // Pointer to a struct - && F.getFunctionType()->getParamType(0)->isPointerTy()); - - auto *Ty = getFuncArgPtrElementType(&F, 0); - auto *Size = Builder.getInt32( - Mod->getDataLayout().getTypeStoreSize(Ty).getFixedValue() / 4); - llvm::replaceCallsToFunction(F, *Size); -} - -void DXILContPostProcessPassImpl::handleValueGetI32(Function &F) { - assert(F.arg_size() == 2 - // value - && F.getFunctionType()->getReturnType()->isIntegerTy(32) - // Pointer to a struct - && F.getFunctionType()->getParamType(0)->isPointerTy() - // index - && F.getFunctionType()->getParamType(1)->isIntegerTy(32)); - - auto *I32 = Builder.getInt32Ty(); - - llvm::forEachCall(F, [&](CallInst &CInst) { - Builder.SetInsertPoint(&CInst); - Value *Addr = - Builder.CreateBitCast(CInst.getArgOperand(0), I32->getPointerTo()); - Addr = Builder.CreateGEP(I32, Addr, CInst.getArgOperand(1)); - auto *Load = Builder.CreateLoad(I32, Addr); - CInst.replaceAllUsesWith(Load); - CInst.eraseFromParent(); - }); -} - -void DXILContPostProcessPassImpl::handleValueSetI32(Function &F) { - assert(F.arg_size() == 3 && - F.getFunctionType()->getReturnType()->isVoidTy() - // Pointer to a struct - && F.getFunctionType()->getParamType(0)->isPointerTy() - // index - && F.getFunctionType()->getParamType(1)->isIntegerTy(32) - // value - && F.getFunctionType()->getParamType(2)->isIntegerTy(32)); - - auto *I32 = Builder.getInt32Ty(); - llvm::forEachCall(F, [&](CallInst &CInst) { - Builder.SetInsertPoint(&CInst); - Value *Addr = - Builder.CreateBitCast(CInst.getArgOperand(0), I32->getPointerTo()); - Addr = Builder.CreateGEP(I32, Addr, CInst.getArgOperand(1)); - Builder.CreateStore(CInst.getArgOperand(2), Addr); - CInst.eraseFromParent(); - }); -} - // Replace calls to _AmdContStack* with calls to lgc.cps dialect ops. // Do some simple constant propagation on the fly. -void DXILContPostProcessPassImpl::handleContStackIntrinsic( - FunctionAnalysisManager &FAM, Function &F) { +void DXILContPostProcessPassImpl::handleContStackIntrinsic(FunctionAnalysisManager &FAM, Function &F) { // Check if the function is either of void return type or i32 return type and // has no arguments or a single integer argument dividable by 32 (to allow @@ -495,24 +252,20 @@ void DXILContPostProcessPassImpl::handleContStackIntrinsic( // AmdContStackStore). Type *ReturnTy = F.getReturnType(); (void)ReturnTy; - assert( - (ReturnTy->isVoidTy() || (ReturnTy->isIntegerTy() && - (ReturnTy->getIntegerBitWidth() % 32 == 0))) && - "DXILContPostProcessPassImpl::handleContStackIntrinsic: Invalid " - "return type!"); + assert((ReturnTy->isVoidTy() || (ReturnTy->isIntegerTy() && (ReturnTy->getIntegerBitWidth() % 32 == 0))) && + "DXILContPostProcessPassImpl::handleContStackIntrinsic: Invalid " + "return type!"); Type *FuncTy = F.getFunctionType(); (void)(FuncTy); - assert((FuncTy->getFunctionNumParams() == 0 || - FuncTy->getFunctionParamType(0)->isIntegerTy()) && + assert((FuncTy->getFunctionNumParams() == 0 || FuncTy->getFunctionParamType(0)->isIntegerTy()) && "DXILContPostProcessPassImpl::handleContStackIntrinsic: Invalid " "argument signature!"); StringRef FuncName = F.getName(); FuncName.consume_front("_AmdContStack"); - auto ConstantFoldInstruction = [&](Function *Parent, - Value *SizeArg) -> Value * { + auto ConstantFoldInstruction = [&](Function *Parent, Value *SizeArg) -> Value * { if (!isa(SizeArg)) return SizeArg; @@ -524,8 +277,7 @@ void DXILContPostProcessPassImpl::handleContStackIntrinsic( auto &DT = FAM.getResult(*Parent); auto &TLI = FAM.getResult(*Parent); auto &AC = FAM.getResult(*Parent); - const SimplifyQuery SQ(Parent->getParent()->getDataLayout(), &TLI, &DT, - &AC); + const SimplifyQuery SQ(Parent->getParent()->getDataLayout(), &TLI, &DT, &AC); if (auto *NewSize = simplifyInstruction(I, SQ)) return NewSize; @@ -542,48 +294,37 @@ void DXILContPostProcessPassImpl::handleContStackIntrinsic( bool IsMemoryAccess = false; if (FuncName.starts_with("Alloc")) { - Value *SizeArg = - ConstantFoldInstruction(CInst.getFunction(), CInst.getArgOperand(0)); + Value *SizeArg = ConstantFoldInstruction(CInst.getFunction(), CInst.getArgOperand(0)); Replacement = Builder.create(SizeArg); if (auto *Size = dyn_cast(SizeArg)) - ContHelper::addStackSize(CInst.getFunction(), Size->getSExtValue()); + ContHelper::StackSize::inc(CInst.getFunction(), Size->getSExtValue()); } else if (FuncName.starts_with("Free")) { - Value *SizeArg = - ConstantFoldInstruction(CInst.getFunction(), CInst.getArgOperand(0)); + Value *SizeArg = ConstantFoldInstruction(CInst.getFunction(), CInst.getArgOperand(0)); Replacement = Builder.create(SizeArg); } else if (FuncName.starts_with("SetPtr")) { Value *Vsp = CInst.getArgOperand(0); - Replacement = Builder.create(Builder.CreateIntToPtr( - Vsp, - PointerType::get(Builder.getInt8Ty(), lgc::cps::stackAddrSpace))); + Replacement = Builder.create( + Builder.CreateIntToPtr(Vsp, PointerType::get(Builder.getInt8Ty(), lgc::cps::stackAddrSpace))); } else if (FuncName.starts_with("GetPtr")) { Replacement = Builder.create(); } else if (FuncName.starts_with("Load")) { - Value *Addr = - ConstantFoldInstruction(CInst.getFunction(), CInst.getArgOperand(0)); - Value *Ptr = Builder.CreateIntToPtr( - Addr, CInst.getType()->getPointerTo(lgc::cps::stackAddrSpace)); - Replacement = Builder.CreateAlignedLoad( - DestTy, Ptr, - Align(CpsStackLowering::getContinuationStackAlignment())); + Value *Addr = ConstantFoldInstruction(CInst.getFunction(), CInst.getArgOperand(0)); + Value *Ptr = Builder.CreateIntToPtr(Addr, CInst.getType()->getPointerTo(lgc::cps::stackAddrSpace)); + Replacement = Builder.CreateAlignedLoad(DestTy, Ptr, Align(CpsStackLowering::getContinuationStackAlignment())); if (FuncName.starts_with("LoadLastUse")) CompilerUtils::setIsLastUseLoad(*cast(Replacement)); IsMemoryAccess = true; } else if (FuncName.starts_with("Store")) { - assert(FuncTy->getFunctionNumParams() == 2 && - "DXILContPostProcessPassImpl::handleContStackIntrinsic: Invalid " - "argument signature for AmdContStackStore!"); + assert(FuncTy->getFunctionNumParams() == 2 && "DXILContPostProcessPassImpl::handleContStackIntrinsic: Invalid " + "argument signature for AmdContStackStore!"); - Value *Addr = - ConstantFoldInstruction(CInst.getFunction(), CInst.getArgOperand(0)); + Value *Addr = ConstantFoldInstruction(CInst.getFunction(), CInst.getArgOperand(0)); Value *Val = CInst.getArgOperand(1); - Value *Ptr = Builder.CreateIntToPtr( - Addr, Val->getType()->getPointerTo(lgc::cps::stackAddrSpace)); - Builder.CreateAlignedStore( - Val, Ptr, Align(CpsStackLowering::getContinuationStackAlignment())); + Value *Ptr = Builder.CreateIntToPtr(Addr, Val->getType()->getPointerTo(lgc::cps::stackAddrSpace)); + Builder.CreateAlignedStore(Val, Ptr, Align(CpsStackLowering::getContinuationStackAlignment())); IsMemoryAccess = true; } else { @@ -614,28 +355,19 @@ void DXILContPostProcessPassImpl::initializeProcessableFunctionData() { // For the kernel entry function in GPURT, we only care about its existence // in @ToProcess, since we only want to create an alloca for the // continuation stack pointer later (and do the lgc.cps lowering). - if (lgc::rt::getLgcRtShaderStage(&F) == - lgc::rt::RayTracingShaderStage::KernelEntry) { + if (Stage == lgc::rt::RayTracingShaderStage::KernelEntry) { FunctionData Data; Data.Kind = DXILShaderKind::Compute; - [[maybe_unused]] bool DidInsert = - ToProcess.insert({&F, std::move(Data)}).second; + [[maybe_unused]] bool DidInsert = ToProcess.insert({&F, std::move(Data)}).second; assert(DidInsert); continue; } - // Handle entry functions first - if (auto *MD = dyn_cast_or_null( - F.getMetadata(ContHelper::MDContinuationName))) { - auto *EntryF = extractFunctionOrNull(MD->getOperand(0)); - if (&F != EntryF) - continue; - } else { + // Handle start functions first + if (!llvm::isStartFunc(&F)) continue; - } - DXILShaderKind Kind = - ShaderStageHelper::rtShaderStageToDxilShaderKind(*Stage); + DXILShaderKind Kind = ShaderStageHelper::rtShaderStageToDxilShaderKind(*Stage); const bool IsCpsFunction = lgc::cps::isCpsFunction(F); switch (Kind) { @@ -643,14 +375,11 @@ void DXILContPostProcessPassImpl::initializeProcessableFunctionData() { FunctionData Data; Data.Kind = Kind; - Data.SystemDataArgumentIndex = - !IsCpsFunction ? SystemDataArgumentIndex : CpsArgIdxSystemData; + Data.SystemDataArgumentIndex = !IsCpsFunction ? SystemDataArgumentIndex : CpsArgIdxSystemData; - Data.SystemDataTy = - F.getFunctionType()->getParamType(Data.SystemDataArgumentIndex); + Data.SystemDataTy = F.getFunctionType()->getParamType(Data.SystemDataArgumentIndex); - [[maybe_unused]] bool DidInsert = - ToProcess.insert({&F, std::move(Data)}).second; + [[maybe_unused]] bool DidInsert = ToProcess.insert({&F, std::move(Data)}).second; assert(DidInsert); break; } @@ -662,12 +391,9 @@ void DXILContPostProcessPassImpl::initializeProcessableFunctionData() { FunctionData Data; Data.Kind = Kind; - Data.SystemDataArgumentIndex = - !IsCpsFunction ? SystemDataArgumentIndex : CpsArgIdxSystemData; - Data.SystemDataTy = - F.getFunctionType()->getParamType(Data.SystemDataArgumentIndex); - [[maybe_unused]] bool DidInsert = - ToProcess.insert({&F, std::move(Data)}).second; + Data.SystemDataArgumentIndex = !IsCpsFunction ? SystemDataArgumentIndex : CpsArgIdxSystemData; + Data.SystemDataTy = F.getFunctionType()->getParamType(Data.SystemDataArgumentIndex); + [[maybe_unused]] bool DidInsert = ToProcess.insert({&F, std::move(Data)}).second; assert(DidInsert); break; } @@ -680,52 +406,26 @@ void DXILContPostProcessPassImpl::initializeProcessableFunctionData() { for (auto &F : *Mod) { if (F.isDeclaration()) continue; - if (auto *MD = dyn_cast_or_null( - F.getMetadata(ContHelper::MDContinuationName))) { + if (auto *MD = dyn_cast_or_null(F.getMetadata(ContHelper::MDContinuationName))) { auto *EntryF = extractFunctionOrNull(MD->getOperand(0)); auto Stage = lgc::rt::getLgcRtShaderStage(EntryF); if (Stage && &F != EntryF) { FunctionData Data = ToProcess[EntryF]; Data.IsStart = false; - Data.SystemDataArgumentIndex = !lgc::cps::isCpsFunction(F) - ? SystemDataArgumentIndex - : CpsArgIdxSystemData; + Data.SystemDataArgumentIndex = !lgc::cps::isCpsFunction(F) ? SystemDataArgumentIndex : CpsArgIdxSystemData; - Data.SystemDataTy = F.getArg(Data.SystemDataArgumentIndex)->getType(); - [[maybe_unused]] bool DidInsert = - ToProcess.insert({&F, std::move(Data)}).second; + // Extract the actual system data type from the { systemData, padding, + // payload } struct returned by await. + Data.SystemDataTy = F.getArg(Data.SystemDataArgumentIndex)->getType()->getStructElementType(0); + [[maybe_unused]] bool DidInsert = ToProcess.insert({&F, std::move(Data)}).second; assert(DidInsert); } } } } -bool DXILContPostProcessPassImpl::handleRegisterBufferCalls() { - bool Changed = false; - auto *Payload = Mod->getGlobalVariable(ContHelper::GlobalPayloadName); - - for (auto &F : Mod->functions()) { - auto Name = F.getName(); - if (Name.starts_with("registerbuffer.setpointerbarrier")) { - // Remove setpointerbarrier instructions related to payload - llvm::forEachCall(F, [&](CallInst &CInst) { - if (isCastGlobal(Payload, CInst.getOperand(0))) { - CInst.eraseFromParent(); - Changed = true; - } - }); - } else if (Name.starts_with("registerbuffer.getpointer")) { - Changed = true; - handleRegisterBufferGetPointer(F, Payload); - } - } - - return Changed; -} - -bool DXILContPostProcessPassImpl::handleIntrinsicCalls( - llvm::ModuleAnalysisManager &AnalysisManager) { +bool DXILContPostProcessPassImpl::handleIntrinsicCalls(llvm::ModuleAnalysisManager &AnalysisManager) { bool Changed = false; for (auto &F : Mod->functions()) { @@ -746,9 +446,7 @@ bool DXILContPostProcessPassImpl::handleIntrinsicCalls( } else if (Name.contains("ContStack")) { Changed = true; - auto &FAM = - AnalysisManager.getResult(*Mod) - .getManager(); + auto &FAM = AnalysisManager.getResult(*Mod).getManager(); handleContStackIntrinsic(FAM, F); } @@ -757,28 +455,29 @@ bool DXILContPostProcessPassImpl::handleIntrinsicCalls( return Changed; } -bool DXILContPostProcessPassImpl::replaceIntrinsicCalls( - Function &F, const FunctionData &Data) { +bool DXILContPostProcessPassImpl::replaceIntrinsicCalls(Function &F, const FunctionData &Data) { if (Data.IntrinsicCalls.empty()) return false; [[maybe_unused]] auto *FuncTy = F.getFunctionType(); - assert(FuncTy->getNumParams() > Data.SystemDataArgumentIndex && - "Missing system data argument"); + assert(FuncTy->getNumParams() > Data.SystemDataArgumentIndex && "Missing system data argument"); Builder.SetInsertPointPastAllocas(&F); // Intrinsics need a pointer, so allocate and store the system data argument - Argument *SystemDataArgument = F.getArg(Data.SystemDataArgumentIndex); + Value *SystemDataArgument = F.getArg(Data.SystemDataArgumentIndex); Value *SystemDataPtr = Builder.CreateAlloca(Data.SystemDataTy); SystemDataPtr->setName("system.data.alloca"); + // Extract the original system data from the { systemData, padding, payload } + // struct returned by await. + if (!Data.IsStart) + SystemDataArgument = Builder.CreateExtractValue(SystemDataArgument, 0); Builder.CreateStore(SystemDataArgument, SystemDataPtr); for (auto *Call : Data.IntrinsicCalls) - replaceIntrinsicCall( - Builder, Data.SystemDataTy, SystemDataPtr, - ShaderStageHelper::dxilShaderKindToRtShaderStage(Data.Kind).value(), - Call, GpurtLibrary, CrossInliner); + replaceIntrinsicCall(Builder, Data.SystemDataTy, SystemDataPtr, + ShaderStageHelper::dxilShaderKindToRtShaderStage(Data.Kind).value(), Call, GpurtLibrary, + CrossInliner); return true; } @@ -802,27 +501,23 @@ bool DXILContPostProcessPassImpl::lowerCpsOps() { // but currently, there seems to be no support in dialects for marrying both // approaches: we would need a visitor that supports visiting function // definitions as well. - static const auto CpsVisitor = - llvm_dialects::VisitorBuilder() - .add( - [](CpsVisitorState &State, - lgc::cps::AsContinuationReferenceOp &AsCrOp) { - State.Builder.SetInsertPoint(&AsCrOp); - auto *AddrWithMD = State.Builder.CreateCall(State.GetAddrAndMD, - {AsCrOp.getFn()}); - AsCrOp.replaceAllUsesWith(AddrWithMD); - AsCrOp.eraseFromParent(); - State.Changed = true; - }) - .add( - [](CpsVisitorState &State, lgc::cps::JumpOp &JumpOp) { - State.Self.lowerJumpOp(JumpOp); - State.Changed = true; - }) - .build(); - - CpsVisitorState State{*this, Changed, Builder, - getContinuationGetAddrAndMD(*Mod)}; + static const auto CpsVisitor = llvm_dialects::VisitorBuilder() + .add( + [](CpsVisitorState &State, lgc::cps::AsContinuationReferenceOp &AsCrOp) { + State.Builder.SetInsertPoint(&AsCrOp); + auto *AddrWithMD = + State.Builder.CreateCall(State.GetAddrAndMD, {AsCrOp.getFn()}); + AsCrOp.replaceAllUsesWith(AddrWithMD); + AsCrOp.eraseFromParent(); + State.Changed = true; + }) + .add([](CpsVisitorState &State, lgc::cps::JumpOp &JumpOp) { + State.Self.lowerJumpOp(JumpOp); + State.Changed = true; + }) + .build(); + + CpsVisitorState State{*this, Changed, Builder, getContinuationGetAddrAndMD(*Mod)}; struct CspCandidateInfo { bool RequiresCspArgument = false; @@ -835,8 +530,7 @@ bool DXILContPostProcessPassImpl::lowerCpsOps() { if (Func.isDeclaration()) continue; - if (lgc::rt::getLgcRtShaderStage(&Func) == - lgc::rt::RayTracingShaderStage::KernelEntry) { + if (lgc::rt::getLgcRtShaderStage(&Func) == lgc::rt::RayTracingShaderStage::KernelEntry) { CandidateInfo.push_back({false, &Func}); continue; } @@ -859,70 +553,35 @@ bool DXILContPostProcessPassImpl::lowerCpsOps() { auto Data = std::move(ToProcess[F]); ToProcess.erase(F); - auto *NewFunc = StackLowering->lowerCpsStackOps(F, GetGlobalMemBase, - RequiresCspArgument); + auto *NewFunc = StackLowering->lowerCpsStackOps(F, GetGlobalMemBase, RequiresCspArgument); ToProcess.insert({NewFunc, Data}); } - collectContinueCalls(*Mod, ContinueCalls); - return Changed; } void DXILContPostProcessPassImpl::lowerJumpOp(lgc::cps::JumpOp &JumpOp) { Builder.SetInsertPoint(&JumpOp); - Value *RCR = JumpOp.getTarget(); - - Function *Continue = ContHelper::isWaitAwaitCall(JumpOp) - ? llvm::getContinuationWaitContinue(*Mod) - : llvm::getContinuationContinue(*Mod); - - SmallVector Args; - Args.push_back(Builder.CreateZExt(RCR, Builder.getInt64Ty())); - - // If this is a wait call, then the wait mask is at the start of the tail - // argument list. - Args.append(JumpOp.getTail().begin(), JumpOp.getTail().end()); - - CallInst *ContinueCall = Builder.CreateCall(Continue, Args); - ContinueCall->copyMetadata(JumpOp); - ContHelper::removeIsWaitAwaitMetadata(*ContinueCall); - JumpOp.eraseFromParent(); -} - -bool DXILContPostProcessPassImpl::unfoldGlobals() { - // Replace register globals with indices into a bigger global - const auto &DL = Mod->getDataLayout(); - GlobalVariable *PayloadGlobal = - Mod->getGlobalVariable(ContHelper::GlobalPayloadName); - - if (PayloadGlobal) { - // We use the maximum size for the continuation state and the actual size - // for the payload, so that the offset of the payload stays the same, but - // the global is only as big as necessary. - uint32_t RequiredSize = - PayloadGlobal->getValueType()->getArrayNumElements() * RegisterBytes; - - // Put continuation state first, it's callee save so we need to have it - // full in all cases. Payload can be truncated, so the backend is free to - // use registers that are unused in a function. - auto *I32 = Type::getInt32Ty(Mod->getContext()); - auto *RegistersTy = ArrayType::get(I32, RequiredSize / RegisterBytes); - Registers = cast(Mod->getOrInsertGlobal( - ContHelper::GlobalRegistersName, RegistersTy, [&] { - return new GlobalVariable( - *Mod, RegistersTy, false, GlobalVariable::ExternalLinkage, - nullptr, ContHelper::GlobalRegistersName, nullptr, - GlobalVariable::NotThreadLocal, GlobalRegisterAddrspace); - })); - - replaceGlobal(DL, Registers, PayloadGlobal, 0); - - return true; + Value *RCR = Builder.CreateZExt(JumpOp.getTarget(), Builder.getInt64Ty()); + + CallInst *ContinueOp = nullptr; + Value *ReturnAddr = *JumpOp.getTail().begin(); + SmallVector TailArgs{JumpOp.getTail().begin() + 1, JumpOp.getTail().end()}; + + if (auto WaitMask = ContHelper::tryGetWaitMask(JumpOp)) { + ContinueOp = Builder.create( + RCR, Builder.getInt64(WaitMask.value()), PoisonValue::get(Builder.getInt32Ty()), + Builder.CreateZExt(ReturnAddr, Builder.getInt64Ty()), TailArgs); + ContHelper::removeWaitMask(JumpOp); + } else { + ContinueOp = Builder.create(RCR, PoisonValue::get(Builder.getInt32Ty()), + Builder.CreateZExt(ReturnAddr, Builder.getInt64Ty()), TailArgs); } - return false; + ContinueOp->copyMetadata(JumpOp); + ContHelper::removeIsWaitAwaitMetadata(*ContinueOp); + JumpOp.eraseFromParent(); } bool DXILContPostProcessPassImpl::handleAmdInternals() { @@ -932,43 +591,42 @@ bool DXILContPostProcessPassImpl::handleAmdInternals() { auto Name = F.getName(); if (Name.starts_with("_AmdValueI32Count")) { Changed = true; - handleValueI32Count(F); + ContHelper::handleValueI32Count(F, Builder); } else if (Name.starts_with("_AmdValueGetI32")) { Changed = true; - handleValueGetI32(F); + ContHelper::handleValueGetI32(F, Builder); } else if (Name.starts_with("_AmdValueSetI32")) { Changed = true; - handleValueSetI32(F); + ContHelper::handleValueSetI32(F, Builder); } } return Changed; } -DXILContPostProcessPassImpl::DXILContPostProcessPassImpl(Module &M, - Module &GpurtLibrary) - : Mod{&M}, GpurtLibrary{&GpurtLibrary}, Builder{Mod->getContext()}, - StackAddrspace{ContHelper::tryGetStackAddrspace(*Mod)} {} +DXILContPostProcessPassImpl::DXILContPostProcessPassImpl(Module &M, Module &GpurtLibrary) + : Mod{&M}, GpurtLibrary{&GpurtLibrary}, Builder{Mod->getContext()}, StackAddrspace{ + ContHelper::tryGetStackAddrspace(*Mod)} { +} -PreservedAnalyses -DXILContPostProcessPassImpl::run(ModuleAnalysisManager &AnalysisManager) { +PreservedAnalyses DXILContPostProcessPassImpl::run(ModuleAnalysisManager &AnalysisManager) { bool Changed = false; - StackLowering.emplace(Mod->getContext(), - static_cast(StackAddrspace.value())); + StackLowering.emplace(Mod->getContext(), static_cast(StackAddrspace.value())); if (*StackAddrspace == ContStackAddrspace::Global) GetGlobalMemBase = getContinuationStackGlobalMemBase(*GpurtLibrary); initializeProcessableFunctionData(); - Changed |= handleRegisterBufferCalls(); - Changed |= unfoldGlobals(); Changed |= handleAmdInternals(); Changed |= handleIntrinsicCalls(AnalysisManager); - for (auto &[Func, Data] : ToProcess) + for (auto &[Func, Data] : ToProcess) { + ContHelper::IncomingRegisterCount::reset(Func); + ContHelper::ContinuationStateByteCount::reset(Func); Changed |= replaceIntrinsicCalls(*Func, Data); + } for (auto &F : make_early_inc_range(*Mod)) { auto FuncName = F.getName(); @@ -977,43 +635,31 @@ DXILContPostProcessPassImpl::run(ModuleAnalysisManager &AnalysisManager) { lowerGetResumePointAddr(F); } else if (FuncName.starts_with("_AmdComplete")) { Changed = true; - llvm::forEachCall(F, [&](llvm::CallInst &CInst) { - llvm::terminateShader(Builder, &CInst); - }); + llvm::forEachCall(F, [&](llvm::CallInst &CInst) { llvm::terminateShader(Builder, &CInst); }); } } Changed |= lowerCpsOps(); Changed |= fixupDxilMetadata(*Mod); + Changed |= cleanupOutgoingPayloadMetadata(); #ifndef NDEBUG - checkContinuationsModule(*Mod, ContinueCalls); + checkContinuationsModule(*Mod); #endif - if (ReportContStateSizes || ReportAllSizes) - reportContStateSizes(*Mod); - - if (ReportPayloadRegisterSizes || ReportAllSizes) - reportPayloadSizes(*Mod, ContinueCalls); - - if (ReportSystemDataSizes || ReportAllSizes) - reportSystemDataSizes(*Mod, ToProcess); - Changed |= llvm::removeUnusedFunctionDecls(Mod, false); return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); } } // anonymous namespace -llvm::PreservedAnalyses -DXILContPostProcessPass::run(llvm::Module &Module, - llvm::ModuleAnalysisManager &AnalysisManager) { +llvm::PreservedAnalyses DXILContPostProcessPass::run(llvm::Module &Module, + llvm::ModuleAnalysisManager &AnalysisManager) { LLVM_DEBUG(dbgs() << "Run the pass dxil-cont-post-process\n"); AnalysisManager.getResult(Module); auto &GpurtContext = lgc::GpurtContext::get(Module.getContext()); - DXILContPostProcessPassImpl Impl{ - Module, GpurtContext.theModule ? *GpurtContext.theModule : Module}; + DXILContPostProcessPassImpl Impl{Module, GpurtContext.theModule ? *GpurtContext.theModule : Module}; return Impl.run(AnalysisManager); } diff --git a/llvmraytracing/lib/DXILSupport.cpp b/llvmraytracing/lib/DXILSupport.cpp index efc5a37c14..67e8354036 100644 --- a/llvmraytracing/lib/DXILSupport.cpp +++ b/llvmraytracing/lib/DXILSupport.cpp @@ -10,8 +10,8 @@ * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * - * The above copyright notice and this permission notice shall be included in - *all copies or substantial portions of the Software. + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, @@ -52,8 +52,7 @@ static bool isInResources(Value *Handle, Metadata *MD) { return false; auto *ResourceMDs = cast(MD); for (auto &Res : ResourceMDs->operands()) { - auto *Val = - mdconst::extract(cast(Res.get())->getOperand(1)); + auto *Val = mdconst::extract(cast(Res.get())->getOperand(1)); // Strip casts while (auto *Cast = dyn_cast(Val)) { assert(Cast->getOpcode() == Instruction::BitCast); @@ -77,9 +76,8 @@ static bool isInResources(Value *Handle, Metadata *MD) { /// check that, so we rematerialize all constant loads. static bool isRematerializableDxilLoad(CallInst *CInst, StringRef CalledName) { // First, check if this is a dxil load - static const char *const LoadFunctions[] = { - "dx.op.bufferLoad", "dx.op.rawBufferLoad", "dx.op.sample", - "dx.op.textureLoad"}; + static const char *const LoadFunctions[] = {"dx.op.bufferLoad", "dx.op.rawBufferLoad", "dx.op.sample", + "dx.op.textureLoad"}; bool IsLoad = false; for (const auto *LoadFunc : LoadFunctions) { @@ -95,10 +93,8 @@ static bool isRematerializableDxilLoad(CallInst *CInst, StringRef CalledName) { auto *Handle = CInst->getArgOperand(1); // Unwrap dx.op.annotateHandle and dx.op.createHandleForLib calls. while (auto *Call = dyn_cast(Handle)) { - assert( - Call->getCalledFunction()->getName().starts_with( - "dx.op.annotateHandle") || - Call->getCalledFunction()->getName().starts_with("dx.op.createHandle")); + assert(Call->getCalledFunction()->getName().starts_with("dx.op.annotateHandle") || + Call->getCalledFunction()->getName().starts_with("dx.op.createHandle")); Handle = Call->getArgOperand(1); } @@ -113,11 +109,9 @@ static bool isRematerializableDxilLoad(CallInst *CInst, StringRef CalledName) { assert(isa(Handle) && "A resource should be a global value"); // Search variable in SRV list - auto *MD = - Load->getModule()->getNamedMetadata("dx.resources")->getOperand(0); + auto *MD = Load->getModule()->getNamedMetadata("dx.resources")->getOperand(0); // in SRVs or CBVs - if (isInResources(Handle, MD->getOperand(0).get()) || - isInResources(Handle, MD->getOperand(2).get())) + if (isInResources(Handle, MD->getOperand(0).get()) || isInResources(Handle, MD->getOperand(2).get())) return true; } else { // Failing the check in release mode is fine, but we still want to know @@ -181,8 +175,7 @@ bool llvm::DXILMaterializable(Instruction &OrigI) { return true; // Match by id - unsigned int IntrId = - cast(CInst->getArgOperand(0))->getZExtValue(); + unsigned int IntrId = cast(CInst->getArgOperand(0))->getZExtValue(); if ((IntrId >= 6 && // FAbs - Dot4 IntrId <= 56) || IntrId == 58 || // CBufferLoad diff --git a/llvmraytracing/lib/GpurtContext.cpp b/llvmraytracing/lib/GpurtContext.cpp index fbd3d6a46f..e8fc38215e 100644 --- a/llvmraytracing/lib/GpurtContext.cpp +++ b/llvmraytracing/lib/GpurtContext.cpp @@ -10,8 +10,8 @@ * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * - * The above copyright notice and this permission notice shall be included in - *all copies or substantial portions of the Software. + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, diff --git a/llvmraytracing/lib/GpurtDialect.cpp b/llvmraytracing/lib/GpurtDialect.cpp index 6db1bde69f..073c532255 100644 --- a/llvmraytracing/lib/GpurtDialect.cpp +++ b/llvmraytracing/lib/GpurtDialect.cpp @@ -10,8 +10,8 @@ * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * - * The above copyright notice and this permission notice shall be included in - *all copies or substantial portions of the Software. + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, @@ -34,3 +34,41 @@ #define GET_INCLUDES #define GET_DIALECT_DEFS #include "GpurtDialect.cpp.inc" + +using namespace llvm; + +namespace lgc::gpurt { +constexpr const char KnownSetRayFlagsMetadata[] = "lgc.gpurt.knownSetRayFlags"; +constexpr const char KnownUnsetRayFlagsMetadata[] = "lgc.gpurt.knownUnsetRayFlags"; + +void setKnownSetRayFlags(Module &module, unsigned flags) { + auto *md = module.getOrInsertNamedMetadata(KnownSetRayFlagsMetadata); + assert(md && "Failed to create metadata node!"); + md->clearOperands(); + md->addOperand(MDNode::get( + module.getContext(), {ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(module.getContext()), flags))})); +} + +void setKnownUnsetRayFlags(Module &module, unsigned flags) { + auto *md = module.getOrInsertNamedMetadata(KnownUnsetRayFlagsMetadata); + assert(md && "Failed to create metadata node!"); + md->clearOperands(); + md->addOperand(MDNode::get( + module.getContext(), {ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(module.getContext()), flags))})); +} + +unsigned getKnownSetRayFlags(const Module &module) { + auto *md = module.getNamedMetadata(KnownSetRayFlagsMetadata); + if (!md) + return 0; + return mdconst::extract(md->getOperand(0)->getOperand(0))->getZExtValue(); +} + +unsigned getKnownUnsetRayFlags(const Module &module) { + auto *md = module.getNamedMetadata(KnownUnsetRayFlagsMetadata); + if (!md) + return 0; + return mdconst::extract(md->getOperand(0)->getOperand(0))->getZExtValue(); +} + +} // namespace lgc::gpurt diff --git a/llvmraytracing/lib/LegacyCleanupContinuations.cpp b/llvmraytracing/lib/LegacyCleanupContinuations.cpp index 18fed80ce7..a7ffb57cc7 100644 --- a/llvmraytracing/lib/LegacyCleanupContinuations.cpp +++ b/llvmraytracing/lib/LegacyCleanupContinuations.cpp @@ -10,8 +10,8 @@ * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * - * The above copyright notice and this permission notice shall be included in - *all copies or substantial portions of the Software. + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, @@ -32,17 +32,15 @@ // Add arguments to resume functions, which are the return values of the called // continuation. // -// Add a global register buffer to store the continuation state. -// //===----------------------------------------------------------------------===// #include "compilerutils/CompilerUtils.h" +#include "llvmraytracing/Continuations.h" +#include "llvmraytracing/ContinuationsUtil.h" #include "lgc/LgcCpsDialect.h" #include "lgc/LgcIlCpsDialect.h" #include "lgc/LgcRtDialect.h" #include "llvm-dialects/Dialect/Builder.h" -#include "llvmraytracing/Continuations.h" -#include "llvmraytracing/ContinuationsUtil.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/Module.h" @@ -58,8 +56,7 @@ namespace { class LegacyCleanupContinuationsPassImpl { public: - LegacyCleanupContinuationsPassImpl( - llvm::Module &Mod, llvm::ModuleAnalysisManager &AnalysisManager); + LegacyCleanupContinuationsPassImpl(llvm::Module &Mod, llvm::ModuleAnalysisManager &AnalysisManager); PreservedAnalyses run(); @@ -80,9 +77,7 @@ class LegacyCleanupContinuationsPassImpl { // Returns the number of bytes used on the CPS stack for the continuation // state. - uint32_t getContStateStackBytes() const { - return alignTo(ContStateBytes, RegisterBytes); - } + uint32_t getContStateStackBytes() const { return alignTo(ContStateBytes, RegisterBytes); } }; void analyzeContinuation(Function &F, MDNode *MD); @@ -92,8 +87,7 @@ class LegacyCleanupContinuationsPassImpl { void processContinuation(Function *StartFunc, ContinuationData &FuncData); void handleFunctionEntry(ContinuationData &Data, Function *F, bool IsEntry); void handleContinue(ContinuationData &Data, Instruction *Ret); - void handleSingleContinue(ContinuationData &Data, CallInst *Call, - Value *ResumeFun); + void handleSingleContinue(ContinuationData &Data, CallInst *Call, Value *ResumeFun); void handleReturn(ContinuationData &Data, lgc::ilcps::ReturnOp &ContRet); Module &M; @@ -104,8 +98,6 @@ class LegacyCleanupContinuationsPassImpl { Type *I64 = nullptr; Function *ContMalloc = nullptr; Function *ContFree = nullptr; - Function *Continue = nullptr; - Function *WaitContinue = nullptr; MapVector ToProcess; CompilerUtils::CrossModuleInliner CrossInliner; }; @@ -115,9 +107,8 @@ class LegacyCleanupContinuationsPassImpl { /// /// Returns a map (origin BB, (call that created the continuation token, resume /// function)). -DenseMap> -findTokenOrigin(BasicBlock *BB, Value *V, - SmallVectorImpl &ToRemove) { +DenseMap> findTokenOrigin(BasicBlock *BB, Value *V, + SmallVectorImpl &ToRemove) { DenseMap> Result; Value *Call = nullptr; Value *ResumeFun = nullptr; @@ -150,10 +141,8 @@ findTokenOrigin(BasicBlock *BB, Value *V, ResumeFun = Const->getOperand(0); } - auto RegisterTokenOrigin = [&Result](BasicBlock *TheBB, Value *Token, - Value *TheResumeFun) { - assert(isa(TheResumeFun) && - "Resume function should be a constant function"); + auto RegisterTokenOrigin = [&Result](BasicBlock *TheBB, Value *Token, Value *TheResumeFun) { + assert(isa(TheResumeFun) && "Resume function should be a constant function"); // Strip away bitcasts -- this can happen with multiple token types if (auto *TokenBitcast = dyn_cast(Token)) Token = TokenBitcast->getOperand(0); @@ -169,8 +158,7 @@ findTokenOrigin(BasicBlock *BB, Value *V, ToRemove.push_back(CallPhi); ToRemove.push_back(ResumeFunPhi); - for (auto CallEntry : - llvm::zip(CallPhi->blocks(), CallPhi->incoming_values())) { + for (auto CallEntry : llvm::zip(CallPhi->blocks(), CallPhi->incoming_values())) { auto *PhiBB = std::get<0>(CallEntry); auto *ResumeFunEntry = ResumeFunPhi->getIncomingValueForBlock(PhiBB); assert(ResumeFunEntry && "Need a resume fun for each call"); @@ -182,8 +170,7 @@ findTokenOrigin(BasicBlock *BB, Value *V, return Result; } -void LegacyCleanupContinuationsPassImpl::analyzeContinuation(Function &F, - MDNode *MD) { +void LegacyCleanupContinuationsPassImpl::analyzeContinuation(Function &F, MDNode *MD) { // Only analyze main continuation auto *MDTup = cast(MD); auto *EntryF = mdconst::extract(MDTup->getOperand(0)); @@ -208,13 +195,11 @@ void LegacyCleanupContinuationsPassImpl::analyzeContinuation(Function &F, // Without malloc call, we check later if the continuation state is used if (Data.MallocCall) { - Data.ContStateBytes = - cast(Data.MallocCall->getArgOperand(0))->getSExtValue(); + Data.ContStateBytes = cast(Data.MallocCall->getArgOperand(0))->getSExtValue(); } } -void LegacyCleanupContinuationsPassImpl::finalizeContinuationData( - Function &StartFunc, ContinuationData &FuncData) { +void LegacyCleanupContinuationsPassImpl::finalizeContinuationData(Function &StartFunc, ContinuationData &FuncData) { if (FuncData.MallocCall) return; @@ -256,13 +241,11 @@ uint32_t getIncomingRegisterCount(Function *ResumeFunc) { Worklist.append(U->user_begin(), U->user_end()); continue; } - assert(isa(U) && - "User of a resume function should be a call to continue"); + assert(isa(U) && "User of a resume function should be a call to continue"); auto *Inst = cast(U); - if (auto Count = ContHelper::tryGetReturnedRegisterCount(Inst)) { - assert((!RegCount || *RegCount == *Count) && - "Got different returned registercounts in continues to " - "the same resume function"); + if (auto Count = ContHelper::ReturnedRegisterCount::tryGetValue(Inst)) { + assert((!RegCount || *RegCount == *Count) && "Got different returned registercounts in continues to " + "the same resume function"); RegCount = *Count; #ifdef NDEBUG break; @@ -276,8 +259,7 @@ uint32_t getIncomingRegisterCount(Function *ResumeFunc) { return RegCount.value(); } -Value *getContFrame(CallInst *MallocCall, Function *F, bool IsStart, - SmallVectorImpl &InstsToRemove) { +Value *getContFrame(CallInst *MallocCall, Function *F, bool IsStart, SmallVectorImpl &InstsToRemove) { Value *ContFrame = nullptr; if (MallocCall) { if (IsStart) { @@ -314,8 +296,7 @@ Value *getContFrame(CallInst *MallocCall, Function *F, bool IsStart, return ContFrame; } -void LegacyCleanupContinuationsPassImpl::processContinuation( - Function *StartFunc, ContinuationData &FuncData) { +void LegacyCleanupContinuationsPassImpl::processContinuation(Function *StartFunc, ContinuationData &FuncData) { auto *Void = Type::getVoidTy(Context); LLVM_DEBUG(dbgs() << "Processing function: " << StartFunc->getName() << "\n"); bool IsEntry = StartFunc->hasMetadata(ContHelper::MDEntryName); @@ -359,8 +340,7 @@ void LegacyCleanupContinuationsPassImpl::processContinuation( if (IsStart) { unsigned ArgNo = 0; assert(F->arg_size() >= 1 && "Entry function has at least one argument"); - for (auto Arg = F->arg_begin(), ArgEnd = F->arg_end() - 1; Arg != ArgEnd; - Arg++) { + for (auto Arg = F->arg_begin(), ArgEnd = F->arg_end() - 1; Arg != ArgEnd; Arg++) { AllArgTypes.push_back(Arg->getType()); AllArgValues.push_back(Arg); ParamAttrs.push_back(FAttrs.getParamAttrs(ArgNo)); @@ -369,8 +349,7 @@ void LegacyCleanupContinuationsPassImpl::processContinuation( } else { B.SetInsertPoint(&*F->getEntryBlock().getFirstNonPHIOrDbgOrAlloca()); - AllArgTypes.push_back( - B.getInt64Ty()); // Dummy return address for resume functions + AllArgTypes.push_back(B.getInt64Ty()); // Dummy return address for resume functions AllArgValues.push_back(nullptr); // Find arguments from lgc.ilcps.getreturnvalue calls @@ -385,14 +364,12 @@ void LegacyCleanupContinuationsPassImpl::processContinuation( // Find the free call if there is one if (ContFree) { - forEachCall(*ContFree, - [&](CallInst &CI) { InstsToRemove.push_back(&CI); }); + forEachCall(*ContFree, [&](CallInst &CI) { InstsToRemove.push_back(&CI); }); } // Find the continuation state pointer, either returned by the malloc or // given as an argument - Value *ContFrame = - getContFrame(FuncData.MallocCall, F, IsStart, InstsToRemove); + Value *ContFrame = getContFrame(FuncData.MallocCall, F, IsStart, InstsToRemove); // Try to eliminate unnecessary continuation state accesses // of values that are still available as SSA values by a simple @@ -405,8 +382,7 @@ void LegacyCleanupContinuationsPassImpl::processContinuation( // Create new empty function F->eraseMetadata(FuncData.MD->getMetadataID()); auto *NewFuncTy = FunctionType::get(Void, AllArgTypes, false); - Function *NewFunc = - CompilerUtils::cloneFunctionHeader(*F, NewFuncTy, ParamAttrs); + Function *NewFunc = CompilerUtils::cloneFunctionHeader(*F, NewFuncTy, ParamAttrs); NewFunc->takeName(F); NewFuncs.push_back({NewFunc, IsStart}); @@ -415,8 +391,7 @@ void LegacyCleanupContinuationsPassImpl::processContinuation( // Set arg names for new function // Skip the dummy return address for non-start functions - for (unsigned Idx = 0; Idx != NewFunc->getFunctionType()->params().size(); - ++Idx) { + for (unsigned Idx = 0; Idx != NewFunc->getFunctionType()->params().size(); ++Idx) { Value *OldVal = AllArgValues[Idx]; // Skip the dummy return address. if (!OldVal) @@ -439,8 +414,7 @@ void LegacyCleanupContinuationsPassImpl::processContinuation( B.SetInsertPoint(&*NewFunc->getEntryBlock().getFirstNonPHIOrDbgOrAlloca()); if (IsStart) { FuncData.NewStart = NewFunc; - ContMDTuple = - MDTuple::get(Context, {ValueAsMetadata::get(FuncData.NewStart)}); + ContMDTuple = MDTuple::get(Context, {ValueAsMetadata::get(FuncData.NewStart)}); } handleFunctionEntry(FuncData, NewFunc, IsEntry); @@ -453,11 +427,8 @@ void LegacyCleanupContinuationsPassImpl::processContinuation( PointerType *UsedContFrameTy = cast(ContFrame->getType()); Value *CastNewContState = B.CreateBitCast( FuncData.NewContState, - getWithSamePointeeType( - UsedContFrameTy, - FuncData.NewContState->getType()->getPointerAddressSpace())); - CompilerUtils::replaceAllPointerUses(&B, ContFrame, CastNewContState, - InstsToRemove); + getWithSamePointeeType(UsedContFrameTy, FuncData.NewContState->getType()->getPointerAddressSpace())); + CompilerUtils::replaceAllPointerUses(&B, ContFrame, CastNewContState, InstsToRemove); } else { // If there is no continuation state, replace it with a poison // value instead of a zero-sized stack allocation. @@ -470,12 +441,10 @@ void LegacyCleanupContinuationsPassImpl::processContinuation( auto *I = BB.getTerminator(); if (I->getOpcode() == Instruction::Ret) { handleContinue(FuncData, I); - } else if (I->getOpcode() == Instruction::Unreachable) { + } else if (I->getOpcode() == Instruction::Unreachable && BB.size() > 1) { if (auto *Call = dyn_cast(--I->getIterator())) { - if (auto *Called = Call->getCalledFunction()) { - if (auto *ContRet = dyn_cast(Call)) - handleReturn(FuncData, *ContRet); - } + if (auto *ContRet = dyn_cast(Call)) + handleReturn(FuncData, *ContRet); } } } @@ -497,7 +466,7 @@ void LegacyCleanupContinuationsPassImpl::processContinuation( for (auto [NewFunc, IsStart] : NewFuncs) { if (!IsStart) { uint32_t IncomingRegisterCount = getIncomingRegisterCount(NewFunc); - ContHelper::setIncomingRegisterCount(NewFunc, IncomingRegisterCount); + ContHelper::IncomingRegisterCount::setValue(NewFunc, IncomingRegisterCount); } } @@ -505,8 +474,7 @@ void LegacyCleanupContinuationsPassImpl::processContinuation( F->eraseFromParent(); } -void LegacyCleanupContinuationsPassImpl::handleFunctionEntry( - ContinuationData &Data, Function *F, bool IsEntry) { +void LegacyCleanupContinuationsPassImpl::handleFunctionEntry(ContinuationData &Data, Function *F, bool IsEntry) { uint64_t NeededStackSize = Data.getContStateStackBytes(); bool IsStart = F == Data.NewStart; @@ -515,22 +483,19 @@ void LegacyCleanupContinuationsPassImpl::handleFunctionEntry( // bytes // Technically, continuation state includes the spilled payload here. // However, we want to exclude it here for statistics. - uint32_t PayloadSpillSize = ContHelper::tryGetStackSize(F).value_or(0); + uint32_t PayloadSpillSize = ContHelper::StackSize::tryGetValue(F).value_or(0); assert(Data.ContStateBytes >= PayloadSpillSize); - ContHelper::setContinuationStateByteCount(*F, Data.ContStateBytes - - PayloadSpillSize); + ContHelper::ContinuationStateByteCount::setValue(F, Data.ContStateBytes - PayloadSpillSize); } if (NeededStackSize) { Value *ContStateOnStack = nullptr; if (IsStart) { - ContHelper::setStackSize(F, NeededStackSize); + ContHelper::StackSize::setValue(F, NeededStackSize); - ContStateOnStack = - B.create(B.getInt32(NeededStackSize)); + ContStateOnStack = B.create(B.getInt32(NeededStackSize)); } else { - ContStateOnStack = - B.create(B.getInt32(NeededStackSize)); + ContStateOnStack = B.create(B.getInt32(NeededStackSize)); } ContStateOnStack->setName("cont.state.stack.segment"); @@ -541,9 +506,8 @@ void LegacyCleanupContinuationsPassImpl::handleFunctionEntry( // Peek into CSP stack to obtain continuation state. // This can be handled in the same way for start and resume functions, // because for start functions we already allocated space above. - Data.NewContState = B.CreateBitCast( - ContStateOnStack, ContStateTy->getPointerTo(lgc::cps::stackAddrSpace), - "cont.state"); + Data.NewContState = + B.CreateBitCast(ContStateOnStack, ContStateTy->getPointerTo(lgc::cps::stackAddrSpace), "cont.state"); } } @@ -556,16 +520,14 @@ void LegacyCleanupContinuationsPassImpl::handleFunctionEntry( /// to /// %resume_addr = ptrtoint i8* ... @fun.resume.0 to i64 /// %foo = ptrtoint %continuation.token* () @foo to i64 -/// call void @continuation.continue(i64 %foo, i64 +/// call void @lgc.ilcps.continue(i64 %foo, i64 /// %resume_addr, ) !continuation.registercount !0 /// unreachable /// /// Also handles cases where the token and resume function are behind a phi. -void LegacyCleanupContinuationsPassImpl::handleContinue(ContinuationData &Data, - Instruction *Ret) { +void LegacyCleanupContinuationsPassImpl::handleContinue(ContinuationData &Data, Instruction *Ret) { // Find the function call that generates the token - LLVM_DEBUG(dbgs() << "Converting ret to continue: " << *Ret - << "\nArgument: " << *Ret->getOperand(0) << "\n"); + LLVM_DEBUG(dbgs() << "Converting ret to continue: " << *Ret << "\nArgument: " << *Ret->getOperand(0) << "\n"); auto *BB = Ret->getParent(); SmallVector ToRemove; ToRemove.push_back(Ret); @@ -575,8 +537,7 @@ void LegacyCleanupContinuationsPassImpl::handleContinue(ContinuationData &Data, I->eraseFromParent(); for (auto &Entry : Calls) { - LLVM_DEBUG(dbgs() << "Handling call: " << *Entry.second.first - << " with resume function " << Entry.second.second + LLVM_DEBUG(dbgs() << "Handling call: " << *Entry.second.first << " with resume function " << Entry.second.second << "\n"); auto *Call = Entry.second.first; auto *ResumeFun = Entry.second.second; @@ -584,45 +545,38 @@ void LegacyCleanupContinuationsPassImpl::handleContinue(ContinuationData &Data, } if (BB->empty()) { - assert(BB->hasNPredecessorsOrMore(0) && - "Handled all continues but the block still has predecessors left"); + assert(BB->hasNPredecessorsOrMore(0) && "Handled all continues but the block still has predecessors left"); BB->eraseFromParent(); } } -void LegacyCleanupContinuationsPassImpl::handleSingleContinue( - ContinuationData &Data, CallInst *Call, Value *ResumeFun) { +void LegacyCleanupContinuationsPassImpl::handleSingleContinue(ContinuationData &Data, CallInst *Call, + Value *ResumeFun) { // Pass resume address as argument B.SetInsertPoint(Call); - auto *ContinuationReference = - B.create(I64, ResumeFun); + auto *ContinuationReference = B.create(I64, ResumeFun); bool IsWait = ContHelper::isWaitAwaitCall(*Call); - Function *ContinueFunction = IsWait ? WaitContinue : Continue; - // Replace this instruction with a call to continuation.[wait]continue - SmallVector Args; - Args.push_back(B.CreatePointerCast(Call->getCalledOperand(), I64)); - // The wait mask is the first argument after the function pointer - if (IsWait) - Args.push_back(*Call->arg_begin()); - Args.push_back(ContinuationReference); + // The jump call tail argument list needs to start with the return address. + Value *JumpAddr = B.CreatePointerCast(Call->getCalledOperand(), I64); + SmallVector TailArgs{Call->arg_begin() + (IsWait ? 1 : 0), Call->arg_end()}; + TailArgs.insert(TailArgs.begin(), ContinuationReference); + + CallInst *Jump = + B.create(JumpAddr, -1, PoisonValue::get(StructType::get(B.getContext())), TailArgs); - Args.append(Call->arg_begin() + (IsWait ? 1 : 0), Call->arg_end()); - auto *ContinueCall = B.CreateCall(ContinueFunction, Args); + Jump->copyMetadata(*Call); + ContHelper::removeIsWaitAwaitMetadata(*Jump); - // Copy metadata, except for the wait flag, which is no longer needed. - ContinueCall->copyMetadata(*Call); if (IsWait) - ContHelper::removeIsWaitAwaitMetadata(*ContinueCall); - assert(ContHelper::tryGetOutgoingRegisterCount(ContinueCall) && - "Missing registercount metadata!"); + ContHelper::setWaitMask(*Jump, cast(Call->getArgOperand(0))->getSExtValue()); + assert(ContHelper::OutgoingRegisterCount::tryGetValue(Jump) && "Missing registercount metadata!"); // Remove instructions at the end of the block - auto *Unreachable = B.CreateUnreachable(); - for (auto &I : make_early_inc_range(reverse(*ContinueCall->getParent()))) { + for (auto &I : make_early_inc_range(reverse(*Jump->getParent()))) { if (&I == Unreachable) break; I.eraseFromParent(); @@ -634,10 +588,9 @@ void LegacyCleanupContinuationsPassImpl::handleSingleContinue( /// value>) unreachable /// to /// -/// call void @continuation.continue(i64 %returnaddr, ) +/// call void @lgc.ilcps.continue(i64 %returnaddr, ) /// unreachable -void LegacyCleanupContinuationsPassImpl::handleReturn( - ContinuationData &Data, lgc::ilcps::ReturnOp &ContRet) { +void LegacyCleanupContinuationsPassImpl::handleReturn(ContinuationData &Data, lgc::ilcps::ReturnOp &ContRet) { LLVM_DEBUG(dbgs() << "Converting return to continue: " << ContRet << "\n"); bool IsEntry = isa(ContRet.getReturnAddr()); B.SetInsertPoint(&ContRet); @@ -647,33 +600,29 @@ void LegacyCleanupContinuationsPassImpl::handleReturn( B.create(B.getInt32(NeededStackSize)); if (IsEntry) { - assert(ContRet.getArgs().empty() && - "Entry functions ignore the return value"); + assert(ContRet.getArgs().empty() && "Entry functions ignore the return value"); llvm::terminateShader(B, &ContRet); } else { - // Create the call to continuation.continue, but with the same argument list - // as for lgc.ilcps.return. The CSP is appended during + // Create the call to lgc.ilcps.continue, but with the same argument list + // as for lgc.ilcps.return. The CSP is being set during // DXILContPostProcess. // Append the dummy return address as well. - SmallVector Args(ContRet.args()); - Args.insert(Args.begin() + 1, PoisonValue::get(B.getInt64Ty())); - auto *ContinueCall = B.CreateCall(Continue, Args); - Data.NewReturnContinues.push_back(ContinueCall); - - ContinueCall->copyMetadata(ContRet); - assert(ContHelper::tryGetOutgoingRegisterCount(ContinueCall) && - "Missing registercount metadata!"); + SmallVector RetTail{ContRet.getArgs()}; + auto *ContinueOp = B.create(ContRet.getReturnAddr(), PoisonValue::get(B.getInt32Ty()), + PoisonValue::get(B.getInt64Ty()), RetTail); + Data.NewReturnContinues.push_back(ContinueOp); + + ContinueOp->copyMetadata(ContRet); + assert(ContHelper::OutgoingRegisterCount::tryGetValue(ContinueOp) && "Missing registercount metadata!"); ContRet.eraseFromParent(); } } -LegacyCleanupContinuationsPassImpl::LegacyCleanupContinuationsPassImpl( - llvm::Module &Mod, llvm::ModuleAnalysisManager &AnalysisManager) +LegacyCleanupContinuationsPassImpl::LegacyCleanupContinuationsPassImpl(llvm::Module &Mod, + llvm::ModuleAnalysisManager &AnalysisManager) : M{Mod}, Context{M.getContext()}, - FAM{AnalysisManager.getResult(Mod) - .getManager()}, - B{Context} { + FAM{AnalysisManager.getResult(Mod).getManager()}, B{Context} { AnalysisManager.getResult(M); ContMalloc = M.getFunction("continuation.malloc"); ContFree = M.getFunction("continuation.free"); @@ -696,8 +645,7 @@ PreservedAnalyses LegacyCleanupContinuationsPassImpl::run() { // Add !continuation metadata to KernelEntry and Traversal after // coroutine passes. The traversal loop is written as like the coroutine // passes were applied manually. - MDTuple *ContMDTuple = - MDTuple::get(Context, {ValueAsMetadata::get(&F)}); + MDTuple *ContMDTuple = MDTuple::get(Context, {ValueAsMetadata::get(&F)}); F.setMetadata(ContHelper::MDContinuationName, ContMDTuple); } } @@ -713,8 +661,6 @@ PreservedAnalyses LegacyCleanupContinuationsPassImpl::run() { if (!ToProcess.empty()) { I32 = Type::getInt32Ty(Context); I64 = Type::getInt64Ty(Context); - Continue = getContinuationContinue(M); - WaitContinue = getContinuationWaitContinue(M); for (auto &FuncData : ToProcess) { processContinuation(FuncData.first, FuncData.second); @@ -728,8 +674,8 @@ PreservedAnalyses LegacyCleanupContinuationsPassImpl::run() { } // namespace -llvm::PreservedAnalyses LegacyCleanupContinuationsPass::run( - llvm::Module &Mod, llvm::ModuleAnalysisManager &AnalysisManager) { +llvm::PreservedAnalyses LegacyCleanupContinuationsPass::run(llvm::Module &Mod, + llvm::ModuleAnalysisManager &AnalysisManager) { LLVM_DEBUG(dbgs() << "Run the cleanup-continuations pass\n"); AnalysisManager.getResult(Mod); LegacyCleanupContinuationsPassImpl Impl(Mod, AnalysisManager); diff --git a/llvmraytracing/lib/LgcCpsDialect.cpp b/llvmraytracing/lib/LgcCpsDialect.cpp index 3112541640..a7dcf202f7 100644 --- a/llvmraytracing/lib/LgcCpsDialect.cpp +++ b/llvmraytracing/lib/LgcCpsDialect.cpp @@ -10,8 +10,8 @@ * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * - * The above copyright notice and this permission notice shall be included in - *all copies or substantial portions of the Software. + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, @@ -33,7 +33,6 @@ #include "llvm/IR/Type.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" - #include #define GET_INCLUDES @@ -50,8 +49,7 @@ constexpr const char CpsMetadata[] = "lgc.cps"; // type. Note that this does not include any padding except for pointers. unsigned lgc::cps::getArgumentDwordCount(const DataLayout &DL, Type *type) { if (type->isSingleValueType()) { - unsigned numComponents = - type->isVectorTy() ? cast(type)->getNumElements() : 1; + unsigned numComponents = type->isVectorTy() ? cast(type)->getNumElements() : 1; // One VGPR lane can store 32 bit, e. g. 1 dword. // Note that this will not take into account that we could possibly store @@ -77,8 +75,7 @@ unsigned lgc::cps::getArgumentDwordCount(const DataLayout &DL, Type *type) { } if (type->isArrayTy()) - return getArgumentDwordCount(DL, type->getArrayElementType()) * - type->getArrayNumElements(); + return getArgumentDwordCount(DL, type->getArrayElementType()) * type->getArrayNumElements(); if (auto *structTy = dyn_cast(type)) { unsigned memberDwordCount = 0; @@ -93,8 +90,7 @@ unsigned lgc::cps::getArgumentDwordCount(const DataLayout &DL, Type *type) { // ===================================================================================================================== // Helper to determine how many dwords are occupied by a given set of types. -unsigned lgc::cps::getArgumentDwordCount(const DataLayout &DL, - ArrayRef types) { +unsigned lgc::cps::getArgumentDwordCount(const DataLayout &DL, ArrayRef types) { unsigned currentDwordUsage = 0; for (Type *type : types) @@ -108,9 +104,7 @@ unsigned lgc::cps::getArgumentDwordCount(const DataLayout &DL, // Returns // 0, if we reached the maximum given by MaxArgumentDwords // std::nullopt, if we exceeded it. -std::optional -lgc::cps::getRemainingArgumentDwords(const DataLayout &DL, - ArrayRef arguments) { +std::optional lgc::cps::getRemainingArgumentDwords(const DataLayout &DL, ArrayRef arguments) { const unsigned currentDwordUsage = getArgumentDwordCount(DL, arguments); if (currentDwordUsage > MaxArgumentDwords) @@ -134,8 +128,7 @@ void lgc::cps::setCpsFunctionLevel(Function &fn, CpsLevel level) { LLVMContext &context = fn.getContext(); MDNode *node = MDNode::get( - context, {ConstantAsMetadata::get(ConstantInt::get( - Type::getInt32Ty(context), static_cast(level)))}); + context, {ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(context), static_cast(level)))}); fn.setMetadata(CpsMetadata, node); } @@ -147,29 +140,25 @@ lgc::cps::CpsLevel lgc::cps::getCpsLevelFromFunction(const Function &fn) { MDNode *node = fn.getMetadata(fn.getContext().getMDKindID(CpsMetadata)); if (!node) { // Expect that we have set the CPS metadata. - llvm::report_fatal_error( - "Cannot call lgc::cps::getCpsLevelFromFunction on non-CPS function!"); + llvm::report_fatal_error("Cannot call lgc::cps::getCpsLevelFromFunction on non-CPS function!"); } const ConstantAsMetadata *c = cast(node->getOperand(0)); unsigned level = cast(c->getValue())->getZExtValue(); - assert(level < static_cast(CpsLevel::Count) && - "Invalid CPS level!"); + assert(level < static_cast(CpsLevel::Count) && "Invalid CPS level!"); return static_cast(level); } // ===================================================================================================================== // Transform a shader type into the corresponding CPS level. -lgc::cps::CpsLevel -lgc::cps::getCpsLevelForShaderStage(RayTracingShaderStage stage) { +lgc::cps::CpsLevel lgc::cps::getCpsLevelForShaderStage(RayTracingShaderStage stage) { if (stage == RayTracingShaderStage::RayGeneration) return CpsLevel::RayGen; if (stage == RayTracingShaderStage::Traversal) return CpsLevel::Traversal; - if (stage == RayTracingShaderStage::ClosestHit || - stage == RayTracingShaderStage::Miss || + if (stage == RayTracingShaderStage::ClosestHit || stage == RayTracingShaderStage::Miss || stage == RayTracingShaderStage::Callable) return CpsLevel::ClosestHit_Miss_Callable; @@ -188,9 +177,7 @@ lgc::cps::getCpsLevelForShaderStage(RayTracingShaderStage stage) { uint8_t lgc::cps::getPotentialCpsReturnLevels(RayTracingShaderStage stage) { std::bitset<8> CpsLevels; - auto SetLevel = [&CpsLevels](CpsLevel Level) -> void { - CpsLevels.set(static_cast(Level)); - }; + auto SetLevel = [&CpsLevels](CpsLevel Level) -> void { CpsLevels.set(static_cast(Level)); }; switch (stage) { case RayTracingShaderStage::RayGeneration: @@ -227,8 +214,7 @@ uint8_t lgc::cps::getPotentialCpsReturnLevels(RayTracingShaderStage stage) { // ===================================================================================================================== // Push the state passed to a lgc::cps::jump op to the stack and return the new // continuation stack pointer. Do nothing if there is no state to push. -void lgc::cps::pushStateToCpsStack(llvm_dialects::Builder &builder, - lgc::cps::JumpOp &jumpOp) { +void lgc::cps::pushStateToCpsStack(llvm_dialects::Builder &builder, lgc::cps::JumpOp &jumpOp) { Value *State = jumpOp.getState(); Type *StateType = State->getType(); @@ -238,8 +224,8 @@ void lgc::cps::pushStateToCpsStack(llvm_dialects::Builder &builder, const DataLayout &DL = jumpOp.getModule()->getDataLayout(); builder.SetInsertPoint(&jumpOp); - Value *NewCsp = builder.create( - builder.getInt32(static_cast(DL.getTypeStoreSize(StateType)))); + Value *NewCsp = + builder.create(builder.getInt32(static_cast(DL.getTypeStoreSize(StateType)))); builder.CreateStore(State, NewCsp); } @@ -248,13 +234,11 @@ void lgc::cps::pushStateToCpsStack(llvm_dialects::Builder &builder, // corresponding state size. Returns the popped state if eligible. If nothing // can to be popped, return nullptr. Assume that the builder has its insertion // point set after the CSP initializer. -Value *lgc::cps::popStateFromCpsStack(llvm_dialects::Builder &builder, - const DataLayout &DL, Type *stateType) { +Value *lgc::cps::popStateFromCpsStack(llvm_dialects::Builder &builder, const DataLayout &DL, Type *stateType) { if (stateType->isEmptyTy()) return nullptr; - ConstantInt *StateSize = - builder.getInt32(static_cast(DL.getTypeStoreSize(stateType))); + ConstantInt *StateSize = builder.getInt32(static_cast(DL.getTypeStoreSize(stateType))); Value *StatePtr = builder.create(StateSize); Value *NewState = builder.CreateLoad(stateType, StatePtr); builder.create(StateSize); @@ -266,9 +250,8 @@ Value *lgc::cps::popStateFromCpsStack(llvm_dialects::Builder &builder, // Lower lgc.cps.as.continuation.reference operations into an integer // representation of the pointer or a passed relocation. Return the new // reference. -Value *lgc::cps::lowerAsContinuationReference( - IRBuilder<> &Builder, lgc::cps::AsContinuationReferenceOp &AsCrOp, - Value *Relocation) { +Value *lgc::cps::lowerAsContinuationReference(IRBuilder<> &Builder, lgc::cps::AsContinuationReferenceOp &AsCrOp, + Value *Relocation) { Builder.SetInsertPoint(&AsCrOp); Value *Reference = nullptr; diff --git a/llvmraytracing/lib/LgcCpsJumpInliner.cpp b/llvmraytracing/lib/LgcCpsJumpInliner.cpp new file mode 100644 index 0000000000..01dd059f86 --- /dev/null +++ b/llvmraytracing/lib/LgcCpsJumpInliner.cpp @@ -0,0 +1,130 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + +//===- LgcCpsJumpInliner.cpp - Inline lgc.cps.jump and continue calls -===// +// +// A pass that inlines lgc.cps.jump calls with constant jump targets which reside in the GpuRt module. +// +//===----------------------------------------------------------------------===// + +#include "compilerutils/CompilerUtils.h" +#include "llvmraytracing/Continuations.h" +#include "llvmraytracing/ContinuationsUtil.h" +#include "llvmraytracing/GpurtContext.h" +#include "lgc/LgcCpsDialect.h" +#include "llvm-dialects/Dialect/Visitor.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/IR/Analysis.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" + +#define DEBUG_TYPE "lgc-cps-jump-inliner" + +namespace { +using namespace llvm; +using namespace lgc::cps; + +class LgcCpsJumpInlinerPassImpl final { +public: + LgcCpsJumpInlinerPassImpl(Module &M, Module &GpurtLibrary); + PreservedAnalyses run(); + +private: + Module *Mod; + Module *GpurtLibrary; + LLVMContext *Context; + const DataLayout *DL; + llvm_dialects::Builder Builder; + CompilerUtils::CrossModuleInliner CrossInliner; +}; +} // namespace + +LgcCpsJumpInlinerPassImpl::LgcCpsJumpInlinerPassImpl(Module &M, Module &GpurtLibrary) + : Mod{&M}, GpurtLibrary{&GpurtLibrary}, Context{&M.getContext()}, DL{&M.getDataLayout()}, Builder{ + Mod->getContext()} { +} + +PreservedAnalyses LgcCpsJumpInlinerPassImpl::run() { + using JumpVecTy = SmallVector; + static const auto Visitor = + llvm_dialects::VisitorBuilder>() + .add([](SmallVector &AllJumps, JumpOp &Jump) { AllJumps.push_back(&Jump); }) + .build(); + + JumpVecTy AllJumps; + // Collect lgc.cps.jump ops. + Visitor.visit(AllJumps, *Mod); + + bool Changed = false; + // Iterate over all collected jumps and try to inline the jump target. + for (auto *Jump : AllJumps) { + auto *AsCROp = dyn_cast(Jump->getTarget()); + if (!AsCROp) + continue; + + Function *JumpTargetFunc = cast(AsCROp->getFn()); + + JumpTargetFunc = GpurtLibrary->getFunction(JumpTargetFunc->getName()); + + assert(JumpTargetFunc && !JumpTargetFunc->isDeclaration()); + + Builder.SetInsertPoint(Jump); + SmallVector ArgList; + assert(Jump->getState()->getType()->isEmptyTy()); + + if (isCpsFunction(*JumpTargetFunc)) { + // TODO: We need to ensure we properly pass in RCR and shader index. + ArgList.push_back(Jump->getState()); + } + + ArgList.append(Jump->getTail().begin(), Jump->getTail().end()); + + CrossInliner.inlineCall(Builder, JumpTargetFunc, ArgList); + + // Cleanup work. + Jump->eraseFromParent(); + + if (AsCROp->user_empty()) + AsCROp->eraseFromParent(); + + // There might still be other users left, if the function is not referenced as direct jump target. + if (JumpTargetFunc->user_empty() && JumpTargetFunc->getLinkage() == GlobalValue::InternalLinkage) + JumpTargetFunc->eraseFromParent(); + + Changed = true; + } + + return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); +} + +PreservedAnalyses LgcCpsJumpInlinerPass::run(Module &Module, ModuleAnalysisManager &AnalysisManager) { + LLVM_DEBUG(dbgs() << "Run the pass lgc-cps-jump-inliner\n"); + + auto &GpurtContext = lgc::GpurtContext::get(Module.getContext()); + LgcCpsJumpInlinerPassImpl Impl(Module, GpurtContext.theModule ? *GpurtContext.theModule : Module); + + return Impl.run(); +} diff --git a/llvmraytracing/lib/LgcIlCpsDialect.cpp b/llvmraytracing/lib/LgcIlCpsDialect.cpp index facdcfab40..d5d02a1627 100644 --- a/llvmraytracing/lib/LgcIlCpsDialect.cpp +++ b/llvmraytracing/lib/LgcIlCpsDialect.cpp @@ -10,8 +10,8 @@ * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * - * The above copyright notice and this permission notice shall be included in - *all copies or substantial portions of the Software. + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, diff --git a/llvmraytracing/lib/LgcRtDialect.cpp b/llvmraytracing/lib/LgcRtDialect.cpp index 0f4934a1da..5495355bd9 100644 --- a/llvmraytracing/lib/LgcRtDialect.cpp +++ b/llvmraytracing/lib/LgcRtDialect.cpp @@ -10,8 +10,8 @@ * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * - * The above copyright notice and this permission notice shall be included in - *all copies or substantial portions of the Software. + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, @@ -80,8 +80,7 @@ constexpr const char MaxPayloadSizeMetadata[] = "lgc.rt.max.payload.size"; // ============================================================================================== // Helper to create an MDNode containing a constant. MDNode *getMdNodeForNumericConstant(LLVMContext &context, size_t value) { - return MDNode::get(context, {ConstantAsMetadata::get(ConstantInt::get( - Type::getInt32Ty(context), value))}); + return MDNode::get(context, {ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(context), value))}); } // ============================================================================================== @@ -100,14 +99,12 @@ std::optional extractNumericConstantFromMdNode(MDNode *node) { // Wrapper around setMetadata for unsigned integer cases, global object/function // version. void setMetadataNumericValue(GlobalObject *func, StringRef Kind, size_t size) { - func->setMetadata(Kind, - getMdNodeForNumericConstant(func->getContext(), size)); + func->setMetadata(Kind, getMdNodeForNumericConstant(func->getContext(), size)); } // ============================================================================================== // Helper to obtain a constant from global object/function metadata. -std::optional getMetadataNumericValue(const GlobalObject *obj, - StringRef Kind) { +std::optional getMetadataNumericValue(const GlobalObject *obj, StringRef Kind) { MDNode *node = obj->getMetadata(Kind); return extractNumericConstantFromMdNode(node); } @@ -122,8 +119,7 @@ void setMetadataNumericValue(Module *module, StringRef Kind, size_t size) { // ============================================================================================== // Helper to obtain a constant from a named metadata value. -std::optional getMetadataNumericValue(const llvm::Module *module, - StringRef Kind) { +std::optional getMetadataNumericValue(const llvm::Module *module, StringRef Kind) { NamedMDNode *node = module->getNamedMetadata(Kind); if (!node) return std::nullopt; @@ -136,8 +132,7 @@ std::optional getMetadataNumericValue(const llvm::Module *module, // ============================================================================================== // Get the metadata IDs associated with the lgc.rt dialect, so the caller knows // which ones can be removed when the dialect is processed. -void lgc::rt::getLgcRtMetadataIds(LLVMContext &context, - SmallVectorImpl &ids) { +void lgc::rt::getLgcRtMetadataIds(LLVMContext &context, SmallVectorImpl &ids) { ids.push_back(context.getMDKindID(ShaderStageMetadata)); ids.push_back(context.getMDKindID(PaqMetadata)); ids.push_back(context.getMDKindID(ArgSizeMetadata)); @@ -150,11 +145,9 @@ void lgc::rt::getLgcRtMetadataIds(LLVMContext &context, // func can instead be a GlobalVariable, allowing a front-end to use a // GlobalVariable to represent a shader retrieved from the cache, and wants to // mark it with a shader stage. -void lgc::rt::setLgcRtShaderStage(GlobalObject *func, - std::optional stage) { +void lgc::rt::setLgcRtShaderStage(GlobalObject *func, std::optional stage) { if (stage.has_value()) - setMetadataNumericValue(func, ShaderStageMetadata, - static_cast(stage.value())); + setMetadataNumericValue(func, ShaderStageMetadata, static_cast(stage.value())); else func->eraseMetadata(func->getContext().getMDKindID(ShaderStageMetadata)); } @@ -165,16 +158,38 @@ void lgc::rt::setLgcRtShaderStage(GlobalObject *func, // func can instead be a GlobalVariable, allowing a front-end to use a // GlobalVariable to represent a shader retrieved from the cache, and wants to // mark it with a shader stage. -std::optional -lgc::rt::getLgcRtShaderStage(const GlobalObject *func) { - std::optional mdValue = - getMetadataNumericValue(func, ShaderStageMetadata); +std::optional lgc::rt::getLgcRtShaderStage(const GlobalObject *func) { + std::optional mdValue = getMetadataNumericValue(func, ShaderStageMetadata); if (mdValue.has_value()) { return RayTracingShaderStage(*mdValue); } return std::nullopt; } +// Get the name string of shader subtype for the specified shader stage. +const char *lgc::rt::getShaderSubtypeForRtShaderStage(lgc::rt::RayTracingShaderStage stage) { + switch (stage) { + case RayTracingShaderStage::RayGeneration: + return "RayGeneration"; + case RayTracingShaderStage::Miss: + return "Miss"; + case RayTracingShaderStage::AnyHit: + return "AnyHit"; + case RayTracingShaderStage::ClosestHit: + return "ClosestHit"; + case RayTracingShaderStage::Intersection: + return "Intersection"; + case RayTracingShaderStage::Callable: + return "Callable"; + case RayTracingShaderStage::Traversal: + return "Traversal"; + case RayTracingShaderStage::KernelEntry: + return "LaunchKernel"; + default: + return "Unknown"; + } +} + // ============================================================================================== // Get PAQ (payload access qualifier) metadata for a ray-tracing shader // function, or nullptr if none. We allow for the PAQ metadata not existing @@ -184,7 +199,7 @@ lgc::rt::getLgcRtShaderStage(const GlobalObject *func) { Constant *lgc::rt::getShaderPaq(Function *func) { MDNode *node = func->getMetadata(PaqMetadata); if (node) - return mdconst::dyn_extract(node->getOperand(0)); + return mdconst::extract(node->getOperand(0)); return nullptr; } @@ -196,8 +211,7 @@ Constant *lgc::rt::getShaderPaq(Function *func) { // TODO: Extend to an array of i32 constants specifying byte offset ranges with // access bits, finishing with the size in bytes. void lgc::rt::setShaderPaq(Function *func, Constant *paq) { - func->setMetadata(PaqMetadata, MDNode::get(func->getContext(), - {ConstantAsMetadata::get(paq)})); + func->setMetadata(PaqMetadata, MDNode::get(func->getContext(), {ConstantAsMetadata::get(paq)})); } // ============================================================================================== @@ -205,8 +219,7 @@ void lgc::rt::setShaderPaq(Function *func, Constant *paq) { // that is the only information we have on the payload. Constant *lgc::rt::getPaqFromSize(LLVMContext &context, size_t size) { Type *i32Ty = Type::getInt32Ty(context); - return ConstantArray::get(ArrayType::get(i32Ty, 1), - ConstantInt::get(i32Ty, size)); + return ConstantArray::get(ArrayType::get(i32Ty, 1), ConstantInt::get(i32Ty, size)); } // ============================================================================================== @@ -217,9 +230,8 @@ Constant *lgc::rt::getPaqFromSize(LLVMContext &context, size_t size) { size_t lgc::rt::getShaderArgSize(Function *func) { std::optional result = getMetadataNumericValue(func, ArgSizeMetadata); - assert(result.has_value() && - "lgc::rt::getShaderArgSize: ArgSize metadata missing - forgot " - "to call setShaderArgSize?"); + assert(result.has_value() && "lgc::rt::getShaderArgSize: ArgSize metadata missing - forgot " + "to call setShaderArgSize?"); return result.value(); } @@ -246,8 +258,7 @@ void lgc::rt::setShaderHitAttributeSize(Function *func, size_t size) { // ============================================================================================== // Get max hit attribute size (in bytes) metadata for a ray-tracing module. // This is a pipeline-wide upper bound on the per-function hit attribute sizes. -std::optional -lgc::rt::getMaxHitAttributeSize(const llvm::Module *module) { +std::optional lgc::rt::getMaxHitAttributeSize(const llvm::Module *module) { return getMetadataNumericValue(module, MaxAttributeSizeMetadata); } diff --git a/llvmraytracing/lib/LgcRtqDialect.cpp b/llvmraytracing/lib/LgcRtqDialect.cpp index 726fe24d27..c32e2def65 100644 --- a/llvmraytracing/lib/LgcRtqDialect.cpp +++ b/llvmraytracing/lib/LgcRtqDialect.cpp @@ -10,8 +10,8 @@ * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * - * The above copyright notice and this permission notice shall be included in - *all copies or substantial portions of the Software. + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, @@ -38,4 +38,6 @@ Type *lgc::rtq::getRayQueryType(LLVMContext &C) { return IntegerType::get(C, 127); } -bool lgc::rtq::isRayQueryType(Type *Ty) { return Ty->isIntegerTy(127); } +bool lgc::rtq::isRayQueryType(Type *Ty) { + return Ty->isIntegerTy(127); +} diff --git a/llvmraytracing/lib/LowerAwait.cpp b/llvmraytracing/lib/LowerAwait.cpp index 235e513ead..36ca5f3f5a 100644 --- a/llvmraytracing/lib/LowerAwait.cpp +++ b/llvmraytracing/lib/LowerAwait.cpp @@ -10,8 +10,8 @@ * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * - * The above copyright notice and this permission notice shall be included in - *all copies or substantial portions of the Software. + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, @@ -34,11 +34,11 @@ // //===----------------------------------------------------------------------===// +#include "llvmraytracing/Continuations.h" #include "lgc/LgcCpsDialect.h" #include "lgc/LgcIlCpsDialect.h" #include "lgc/LgcRtDialect.h" #include "llvm-dialects/Dialect/Visitor.h" -#include "llvmraytracing/Continuations.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/Module.h" @@ -63,33 +63,18 @@ class LowerAwaitPassImpl final { }; } // anonymous namespace -Function *llvm::getContinuationWaitContinue(Module &M) { - auto *Name = "continuation.waitContinue"; - if (auto *F = M.getFunction(Name)) - return F; - auto &C = M.getContext(); - auto *Void = Type::getVoidTy(C); - auto *I64 = Type::getInt64Ty(C); - auto *FuncTy = FunctionType::get(Void, {I64, I64}, true); - AttributeList AL = AttributeList::get(C, AttributeList::FunctionIndex, - {Attribute::NoReturn}); - return cast(M.getOrInsertFunction(Name, FuncTy, AL).getCallee()); -} - -Function *llvm::getContinuationAwait(Module &M, Type *TokenTy, - StructType *RetTy) { +Function *llvm::getContinuationAwait(Module &M, Type *TokenTy, StructType *RetTy) { std::string Name = "await"; auto &C = M.getContext(); auto *AwaitTy = FunctionType::get(RetTy, TokenTy, false); - auto *AwaitFun = Function::Create( - AwaitTy, GlobalValue::LinkageTypes::ExternalLinkage, Name, &M); + auto *AwaitFun = Function::Create(AwaitTy, GlobalValue::LinkageTypes::ExternalLinkage, Name, &M); AwaitFun->setAttributes( - AttributeList::get(C, AttributeList::FunctionIndex, - {Attribute::NoUnwind, Attribute::WillReturn})); + AttributeList::get(C, AttributeList::FunctionIndex, {Attribute::NoUnwind, Attribute::WillReturn})); return AwaitFun; } -LowerAwaitPassImpl::LowerAwaitPassImpl(Module &Mod) : Mod{Mod} {} +LowerAwaitPassImpl::LowerAwaitPassImpl(Module &Mod) : Mod{Mod} { +} void LowerAwaitPassImpl::collectContinuationFunctions() { for (auto &F : Mod.functions()) { @@ -123,13 +108,11 @@ void LowerAwaitPassImpl::processContinuations(bool IsLgcCpsMode) { auto *I32 = Type::getInt32Ty(Context); auto *I64 = Type::getInt64Ty(Context); - Type *TokenTy = - StructType::create(Context, "continuation.token")->getPointerTo(); + Type *TokenTy = StructType::create(Context, "continuation.token")->getPointerTo(); SmallVector ReturnTypes; - ReturnTypes.push_back(I8Ptr); // Continue function pointer - ReturnTypes.push_back( - TokenTy); // Token to connect the function call with the resume point + ReturnTypes.push_back(I8Ptr); // Continue function pointer + ReturnTypes.push_back(TokenTy); // Token to connect the function call with the resume point StructType *NewRetTy = StructType::get(Context, ReturnTypes); for (auto &FuncData : ToProcess) { @@ -142,10 +125,8 @@ void LowerAwaitPassImpl::processContinuations(bool IsLgcCpsMode) { // Lgc.cps dialect will handle stack pointer and return address in // DXILContPostProcessPass. - bool IsTraversal = lgc::rt::getLgcRtShaderStage(F) == - lgc::rt::RayTracingShaderStage::Traversal; - bool IsLegacyNonEntry = - !ContHelper::isLegacyEntryFunction(F) && !IsLgcCpsMode && !IsTraversal; + bool IsTraversal = lgc::rt::getLgcRtShaderStage(F) == lgc::rt::RayTracingShaderStage::Traversal; + bool IsLegacyNonEntry = !ContHelper::isLegacyEntryFunction(F) && !IsLgcCpsMode && !IsTraversal; for (auto const &Arg : F->args()) AllArgTypes.push_back(Arg.getType()); @@ -156,15 +137,13 @@ void LowerAwaitPassImpl::processContinuations(bool IsLgcCpsMode) { // Create new empty function auto *NewFuncTy = FunctionType::get(NewRetTy, AllArgTypes, false); - Function *NewFunc = CompilerUtils::cloneFunctionHeader( - *F, NewFuncTy, ArrayRef{}); + Function *NewFunc = CompilerUtils::cloneFunctionHeader(*F, NewFuncTy, ArrayRef{}); NewFunc->takeName(F); // Transfer code from old function to new function llvm::moveFunctionBody(*F, *NewFunc); - for (unsigned Idx = 0; Idx != F->getFunctionType()->params().size(); - ++Idx) { + for (unsigned Idx = 0; Idx != F->getFunctionType()->params().size(); ++Idx) { Argument *Arg = NewFunc->getArg(Idx); Argument *OldArg = F->getArg(Idx); Arg->setName(OldArg->getName()); @@ -185,16 +164,12 @@ void LowerAwaitPassImpl::processContinuations(bool IsLgcCpsMode) { // We need one per continuation because they have different metadata SmallVector StrBuf; auto *ContProtoFunc = cast( - Mod.getOrInsertFunction( - (Twine("continuation.prototype.") + NewFunc->getName()) - .toStringRef(StrBuf), - FunctionType::get(NewRetTy, {I8Ptr, Type::getInt1Ty(Context)}, - false)) + Mod.getOrInsertFunction((Twine("continuation.prototype.") + NewFunc->getName()).toStringRef(StrBuf), + FunctionType::get(NewRetTy, {I8Ptr, Type::getInt1Ty(Context)}, false)) .getCallee()); // Add metadata, marking it as a continuation function - MDTuple *ContMDTuple = - MDTuple::get(Context, {ValueAsMetadata::get(NewFunc)}); + MDTuple *ContMDTuple = MDTuple::get(Context, {ValueAsMetadata::get(NewFunc)}); NewFunc->setMetadata(ContHelper::MDContinuationName, ContMDTuple); ContProtoFunc->setMetadata(ContHelper::MDContinuationName, ContMDTuple); @@ -202,28 +177,21 @@ void LowerAwaitPassImpl::processContinuations(bool IsLgcCpsMode) { // Alloc and free prototypes too auto *ContMallocTy = FunctionType::get(I8Ptr, {I32}, false); - auto *ContMalloc = dyn_cast( - Mod.getOrInsertFunction("continuation.malloc", ContMallocTy) - .getCallee()); + auto *ContMalloc = dyn_cast(Mod.getOrInsertFunction("continuation.malloc", ContMallocTy).getCallee()); auto *ContMallocPtr = ConstantExpr::getBitCast(ContMalloc, I8Ptr); - auto *ContDeallocTy = - FunctionType::get(Type::getVoidTy(Context), {I8Ptr}, false); - auto *ContDealloc = dyn_cast( - Mod.getOrInsertFunction("continuation.free", ContDeallocTy) - .getCallee()); + auto *ContDeallocTy = FunctionType::get(Type::getVoidTy(Context), {I8Ptr}, false); + auto *ContDealloc = dyn_cast(Mod.getOrInsertFunction("continuation.free", ContDeallocTy).getCallee()); auto *ContDeallocPtr = ConstantExpr::getBitCast(ContDealloc, I8Ptr); - llvm_dialects::Builder B( - &*NewFunc->getEntryBlock().getFirstNonPHIOrDbgOrAlloca()); + llvm_dialects::Builder B(&*NewFunc->getEntryBlock().getFirstNonPHIOrDbgOrAlloca()); // Claim that the buffer has the minimum required size of a pointer Value *BufSize = ConstantInt::get(I32, MinimumContinuationStateBytes); Value *BufAlign = ConstantInt::get(I32, 4); Value *const CoroId = B.CreateIntrinsic(Intrinsic::coro_id_retcon, {}, - {BufSize, BufAlign, StorageArg, ContProtoFuncPtr, - ContMallocPtr, ContDeallocPtr}); + {BufSize, BufAlign, StorageArg, ContProtoFuncPtr, ContMallocPtr, ContDeallocPtr}); auto *CPN = ConstantPointerNull::get(I8Ptr); B.CreateIntrinsic(Intrinsic::coro_begin, {}, {CoroId, CPN}); @@ -241,15 +209,13 @@ void LowerAwaitPassImpl::processContinuations(bool IsLgcCpsMode) { // Insert a dummy call to remember the arguments to lgc.cps.await. auto *ShaderTy = FunctionType::get(TokenTy, ArgTys, false); - auto *ShaderFun = - B.CreateIntToPtr(CI->getArgOperand(0), ShaderTy->getPointerTo()); + auto *ShaderFun = B.CreateIntToPtr(CI->getArgOperand(0), ShaderTy->getPointerTo()); SuspendRetconArg = B.CreateCall(ShaderTy, ShaderFun, Args); cast(SuspendRetconArg)->copyMetadata(*CI); } else { SuspendRetconArg = CI->getArgOperand(0); } - B.CreateIntrinsic(Intrinsic::coro_suspend_retcon, {B.getInt1Ty()}, - SuspendRetconArg); + B.CreateIntrinsic(Intrinsic::coro_suspend_retcon, {B.getInt1Ty()}, SuspendRetconArg); auto *RetTy = CI->getType(); if (!RetTy->isVoidTy()) { auto *RetVal = B.create(RetTy); @@ -305,13 +271,12 @@ PreservedAnalyses LowerAwaitPassImpl::run() { bool HasCpsAwaitCalls = false; }; - static auto Visitor = - llvm_dialects::VisitorBuilder() - .add([](VisitorPayload &Payload, auto &Op) { - Payload.Self.ToProcess[Op.getFunction()].push_back(&Op); - Payload.HasCpsAwaitCalls = true; - }) - .build(); + static auto Visitor = llvm_dialects::VisitorBuilder() + .add([](VisitorPayload &Payload, auto &Op) { + Payload.Self.ToProcess[Op.getFunction()].push_back(&Op); + Payload.HasCpsAwaitCalls = true; + }) + .build(); VisitorPayload P{*this}; Visitor.visit(P, Mod); @@ -328,11 +293,10 @@ PreservedAnalyses LowerAwaitPassImpl::run() { return PreservedAnalyses::all(); } -LowerAwaitPass::LowerAwaitPass() {} +LowerAwaitPass::LowerAwaitPass() { +} -llvm::PreservedAnalyses -LowerAwaitPass::run(llvm::Module &M, - llvm::ModuleAnalysisManager &AnalysisManager) { +llvm::PreservedAnalyses LowerAwaitPass::run(llvm::Module &M, llvm::ModuleAnalysisManager &AnalysisManager) { LLVM_DEBUG(dbgs() << "Run the lower-await pass\n"); AnalysisManager.getResult(M); diff --git a/llvmraytracing/lib/LowerRayQuery.cpp b/llvmraytracing/lib/LowerRayQuery.cpp new file mode 100644 index 0000000000..b472b31a32 --- /dev/null +++ b/llvmraytracing/lib/LowerRayQuery.cpp @@ -0,0 +1,765 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + +// LowerRayQuery.cpp : Pass to lower rayQuery ops by inlining GPURT functions. +// Typically used by running a pass class that derives from this one, setting m_staticFlags and setting up +// a GpurtContext as appropriate. + +#include "llvmraytracing/LowerRayQuery.h" +#include "compilerutils/CompilerUtils.h" +#include "compilerutils/TypeLowering.h" +#include "llvmraytracing/GpurtContext.h" +#include "lgc/GpurtDialect.h" +#include "lgc/LgcRtDialect.h" +#include "lgc/LgcRtqDialect.h" +#include "llvm-dialects/Dialect/Builder.h" +#include "llvm/IR/Module.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" + +#define DEBUG_TYPE "lgc-lower-rayquery" +using namespace lgc; +using namespace lgc::rt; +using namespace llvm; +using namespace llvm_dialects; +using namespace CompilerUtils; + +// Table of GPURT function names. Must match the order of enum GpurtFunc. +static const char *const GpurtFuncNames[] = { + "_RayQuery_Abort", + "_RayQuery_Allocate", + "_RayQuery_CandidateAabbOpaque", + "_RayQuery_CommitNonOpaqueTriangleHit", + "_RayQuery_CommitProceduralPrimitiveHit", + "_RayQuery_EndInterleavedProceed", + "FetchTrianglePositionFromRayQuery", + "_RayQuery_GeometryIndex", + "_RayQuery_GetObjId", + "_RayQuery_InstanceContributionToHitGroupIndex", + "_RayQuery_InstanceID", + "_RayQuery_InstanceIndex", + "_RayQuery_IntersectionType", + "LongRayQueryProceedAMD", + "_RayQuery_ObjectRayDirection", + "_RayQuery_ObjectRayOrigin", + "_RayQuery_ObjectToWorld4x3", + "_RayQuery_PrimitiveIndex", + "_RayQuery_RayFlags", + "RayQueryProceed", + "_RayQuery_RayT", + "_RayQuery_RayTMin", + "_RayQuery_SetObjId", + "TraceRayInline", + "_RayQuery_TriangleBarycentrics", + "_RayQuery_TriangleFrontFace", + "_RayQuery_WorldRayDirection", + "_RayQuery_WorldRayOrigin", + "_RayQuery_WorldToObject4x3", +}; +static_assert(sizeof(GpurtFuncNames) / sizeof(GpurtFuncNames[0]) == unsigned(LowerRayQuery::GpurtFunc::Count), + "Table size mismatch"); + +namespace RtqAlloc { +enum : unsigned { + RayQueryId, // Rayquery Id + PrevRayQueryId, // Previous rayquery Id + BoolP, // Committed condition + Count +}; +} + +struct LoweringVisitorRtqType { + LowerRayQuery *pass; + TypeLowering typeLower; + explicit LoweringVisitorRtqType(Type *rtqType, LowerRayQuery *pass) : pass(pass), typeLower(rtqType->getContext()) { + typeLower.addRule([pass](TypeLowering &, Type * type) -> auto { + SmallVector loweredTy; + if (pass->hasRtqOpaqueType(type)) { + loweredTy.push_back(pass->replaceRayQueryType(type)); + } + return loweredTy; + }); + } +}; + +template <> struct llvm_dialects::VisitorPayloadProjection { + static LowerRayQuery &project(LoweringVisitorRtqType &payload) { return *payload.pass; } +}; + +LLVM_DIALECTS_VISITOR_PAYLOAD_PROJECT_FIELD(LoweringVisitorRtqType, typeLower) + +// ===================================================================================================================== +// Lower InitializeOp dialect +// +// @param inst : the instruction to lower +void LowerRayQuery::visitInitializeOp(rtq::InitializeOp &inst) { + m_builder->SetInsertPoint(&inst); + SmallVector args; + auto rayQuery = getRayQuery(inst.getRayQuery()); + Function *traceRayInlineFunc = getGpurtFunc(GpurtFunc::TraceRayInline); + Type *rayDescTy = traceRayInlineFunc->getFunctionType()->getParamType(6); + // 0, rayQuery + args.push_back(rayQuery); + // 1, Scene addr low, + // 2, Scene addr high + Type *int32x2Ty = FixedVectorType::get(m_builder->getInt32Ty(), 2); + Value *scene = m_builder->CreateBitCast(inst.getAccelerationStructure(), int32x2Ty); + Value *sceneAddLow = m_builder->CreateExtractElement(scene, uint64_t(0)); + Value *sceneAddHigh = m_builder->CreateExtractElement(scene, 1); + args.push_back(sceneAddLow); + args.push_back(sceneAddHigh); + // 3, Const ray flags + args.push_back(m_builder->getInt32(0)); + // 4, Ray flags + args.push_back(inst.getRayFlags()); + // 5 instance mask + args.push_back(inst.getInstanceInclusionMask()); + // 6 RayDesc + Value *rayDesc = PoisonValue::get(rayDescTy); + // Origin + rayDesc = m_builder->CreateInsertValue(rayDesc, inst.getRayOrigin(), 0u); + // TMin + rayDesc = m_builder->CreateInsertValue(rayDesc, inst.getTMin(), 1u); + // Direction + rayDesc = m_builder->CreateInsertValue(rayDesc, inst.getDirection(), 2u); + // TMax + rayDesc = m_builder->CreateInsertValue(rayDesc, inst.getTMax(), 3u); + args.push_back(rayDesc); + // 7 dispatchId + args.push_back(m_builder->create()); + CrossModuleInliner inliner; + inliner.inlineCall(*m_builder, traceRayInlineFunc, args); + setRtqObjId(inst, rayQuery); + + m_typeLowering->eraseInstruction(&inst); + m_funcsToLower.insert(inst.getCalledFunction()); +} + +// ===================================================================================================================== +// Lower TerminateOp dialect +// +// @param inst : the instruction to lower +void LowerRayQuery::visitTerminateOp(rtq::TerminateOp &inst) { + m_builder->SetInsertPoint(&inst); + auto rayQuery = getRayQuery(inst.getRayQuery()); + CrossModuleInliner inliner; + inliner.inlineCall(*m_builder, getGpurtFunc(GpurtFunc::Abort), {rayQuery}); + m_typeLowering->eraseInstruction(&inst); + m_funcsToLower.insert(inst.getCalledFunction()); +} + +// ===================================================================================================================== +// Lower ProceedOp dialect +// +// @param inst : the instruction to lower +void LowerRayQuery::visitProceedOp(rtq::ProceedOp &inst) { + m_builder->SetInsertPoint(&inst); + auto rayQuery = getRayQuery(inst.getRayQuery()); + CrossModuleInliner inliner; + // Only use GetObjId if GPURT has it. + if (Function *getObjIdFunc = getGpurtFunc(GpurtFunc::GetObjId, /*optional=*/true)) { + Value *rayQueryObj = inliner.inlineCall(*m_builder, getObjIdFunc, {rayQuery}).returnValue; + // Check interleaved proceed, aka, proceed on the same rayquery object + Value *notEqual = m_builder->CreateICmpNE( + rayQueryObj, m_builder->CreateLoad(m_builder->getInt32Ty(), m_rtqAlloc[RtqAlloc::PrevRayQueryId])); + Instruction *terminator = SplitBlockAndInsertIfThen(notEqual, m_builder->GetInsertPoint(), false); + m_builder->SetInsertPoint(terminator); + inliner.inlineCall(*m_builder, getGpurtFunc(GpurtFunc::EndInterleavedProceed), {rayQuery}); + m_builder->SetInsertPoint(&inst); + m_builder->CreateStore(rayQueryObj, m_rtqAlloc[RtqAlloc::PrevRayQueryId]); + } else { + // If GPURT does not have GetObjId, we have to assume always interleaved, + // which is suboptimal. + inliner.inlineCall(*m_builder, getGpurtFunc(GpurtFunc::EndInterleavedProceed), {rayQuery}); + } + + // Call proceed function. Use LongRay version if available. + Value *zero = m_builder->getInt32(0); + Value *proceedResult = nullptr; + if (Function *proceedFunc = getGpurtFunc(GpurtFunc::LongRayQueryProceed, /*optional=*/true)) { + auto earlyRayThreshold = ConstantFP::get(m_builder->getFloatTy(), 0.0); + Value *args[] = {rayQuery, zero, earlyRayThreshold, m_builder->create()}; + proceedResult = inliner.inlineCall(*m_builder, proceedFunc, args).returnValue; + } else { + Value *args[] = {rayQuery, zero, m_builder->create()}; + proceedResult = inliner.inlineCall(*m_builder, getGpurtFunc(GpurtFunc::RayQueryProceed), args).returnValue; + } + inst.replaceAllUsesWith(proceedResult); + m_typeLowering->eraseInstruction(&inst); + m_funcsToLower.insert(inst.getCalledFunction()); +} + +// ===================================================================================================================== +// Lower IntersectionCommitAabbOp dialect +// +// @param inst : the instruction to lower +void LowerRayQuery::visitIntersectionCommitAabbOp(rtq::IntersectionCommitAabbOp &inst) { + m_builder->SetInsertPoint(&inst); + auto rayQuery = getRayQuery(inst.getRayQuery()); + CrossModuleInliner inliner; + inliner.inlineCall(*m_builder, getGpurtFunc(GpurtFunc::CommitProceduralPrimitiveHit), {rayQuery, inst.getTHit()}); + m_typeLowering->eraseInstruction(&inst); + m_funcsToLower.insert(inst.getCalledFunction()); +} + +// ===================================================================================================================== +// Lower IntersectionCommitTriangleOp dialect +// +// @param inst : the instruction to lower +void LowerRayQuery::visitIntersectionCommitTriangleOp(rtq::IntersectionCommitTriangleOp &inst) { + m_builder->SetInsertPoint(&inst); + auto rayQuery = getRayQuery(inst.getRayQuery()); + CrossModuleInliner inliner; + inliner.inlineCall(*m_builder, getGpurtFunc(GpurtFunc::CommitNonOpaqueTriangleHit), {rayQuery}); + m_typeLowering->eraseInstruction(&inst); + m_funcsToLower.insert(inst.getCalledFunction()); +} + +// ===================================================================================================================== +// Lower IntersectionTypeOp dialect +// +// @param inst : the instruction to lower +void LowerRayQuery::visitIntersectionTypeOp(rtq::IntersectionTypeOp &inst) { + m_builder->SetInsertPoint(&inst); + visitHitAccessor(GpurtFunc::IntersectionType, inst.getRayQuery(), inst.getCommitted(), &inst); +} + +// ===================================================================================================================== +// Lower RayTMinOp dialect +// +// @param inst : the instruction to lower +void LowerRayQuery::visitRayTMinOp(rtq::RayTMinOp &inst) { + m_builder->SetInsertPoint(&inst); + visitAccessor(GpurtFunc::RayTMin, inst.getRayQuery(), &inst); +} + +// ===================================================================================================================== +// Lower RayFlagsOp dialect +// +// @param inst : the instruction to lower +void LowerRayQuery::visitRayFlagsOp(rtq::RayFlagsOp &inst) { + m_builder->SetInsertPoint(&inst); + visitAccessor(GpurtFunc::RayFlags, inst.getRayQuery(), &inst); +} + +// ===================================================================================================================== +// Lower IntersectionTOp dialect +// +// @param inst : the instruction to lower +void LowerRayQuery::visitIntersectionTOp(rtq::IntersectionTOp &inst) { + m_builder->SetInsertPoint(&inst); + visitHitAccessor(GpurtFunc::RayT, inst.getRayQuery(), inst.getCommitted(), &inst); +} + +// ===================================================================================================================== +// Lower IntersectionInstanceIdOp dialect +// +// @param inst : the instruction to lower +void LowerRayQuery::visitIntersectionInstanceIdOp(rtq::IntersectionInstanceIdOp &inst) { + m_builder->SetInsertPoint(&inst); + visitHitAccessor(GpurtFunc::InstanceID, inst.getRayQuery(), inst.getCommitted(), &inst); +} + +// ===================================================================================================================== +// Lower IntersectionInstanceIndexOp dialect +// +// @param inst : the instruction to lower +void LowerRayQuery::visitIntersectionInstanceIndexOp(rtq::IntersectionInstanceIndexOp &inst) { + m_builder->SetInsertPoint(&inst); + visitHitAccessor(GpurtFunc::InstanceIndex, inst.getRayQuery(), inst.getCommitted(), &inst); +} + +// ===================================================================================================================== +// Lower IntersectionContributionToHitGroupIndexOp dialect +// +// @param inst : the instruction to lower +void LowerRayQuery::visitIntersectionContributionToHitGroupIndexOp( + rtq::IntersectionContributionToHitGroupIndexOp &inst) { + m_builder->SetInsertPoint(&inst); + visitHitAccessor(GpurtFunc::InstanceContributionToHitGroupIndex, inst.getRayQuery(), inst.getCommitted(), &inst); +} + +// ===================================================================================================================== +// Lower IntersectionGeometryIndexOp dialect +// +// @param inst : the instruction to lower +void LowerRayQuery::visitIntersectionGeometryIndexOp(rtq::IntersectionGeometryIndexOp &inst) { + m_builder->SetInsertPoint(&inst); + visitHitAccessor(GpurtFunc::GeometryIndex, inst.getRayQuery(), inst.getCommitted(), &inst); +} + +// ===================================================================================================================== +// Lower IntersectionPrimitiveIndexOp dialect +// +// @param inst : the instruction to lower +void LowerRayQuery::visitIntersectionPrimitiveIndexOp(rtq::IntersectionPrimitiveIndexOp &inst) { + m_builder->SetInsertPoint(&inst); + visitHitAccessor(GpurtFunc::PrimitiveIndex, inst.getRayQuery(), inst.getCommitted(), &inst); +} + +// ===================================================================================================================== +// Lower IntersectionPrimitiveIndexOp dialect +// +// @param inst : the instruction to lower +void LowerRayQuery::visitIntersectionBarycentricsOp(rtq::IntersectionBarycentricsOp &inst) { + m_builder->SetInsertPoint(&inst); + visitHitAccessor(GpurtFunc::TriangleBarycentrics, inst.getRayQuery(), inst.getCommitted(), &inst); +} + +// ===================================================================================================================== +// Lower IntersectionFrontFaceOp dialect +// +// @param inst : the instruction to lower +void LowerRayQuery::visitIntersectionFrontFaceOp(rtq::IntersectionFrontFaceOp &inst) { + m_builder->SetInsertPoint(&inst); + visitHitAccessor(GpurtFunc::TriangleFrontFace, inst.getRayQuery(), inst.getCommitted(), &inst); +} + +// ===================================================================================================================== +// Lower IntersectionCandidateAabbOpaqueOp dialect +// +// @param inst : the instruction to lower +void LowerRayQuery::visitIntersectionCandidateAabbOpaqueOp(rtq::IntersectionCandidateAabbOpaqueOp &inst) { + m_builder->SetInsertPoint(&inst); + visitAccessor(GpurtFunc::CandidateAabbOpaque, inst.getRayQuery(), &inst); +} + +// ===================================================================================================================== +// Lower IntersectionObjectRayDirectionOp dialect +// +// @param inst : the instruction to lower +void LowerRayQuery::visitIntersectionObjectRayDirectionOp(rtq::IntersectionObjectRayDirectionOp &inst) { + m_builder->SetInsertPoint(&inst); + visitHitAccessor(GpurtFunc::ObjectRayDirection, inst.getRayQuery(), inst.getCommitted(), &inst); +} + +// ===================================================================================================================== +// Lower IntersectionObjectRayOriginOp dialect +// +// @param inst : the instruction to lower +void LowerRayQuery::visitIntersectionObjectRayOriginOp(rtq::IntersectionObjectRayOriginOp &inst) { + m_builder->SetInsertPoint(&inst); + visitHitAccessor(GpurtFunc::ObjectRayOrigin, inst.getRayQuery(), inst.getCommitted(), &inst); +} + +// ===================================================================================================================== +// Lower IntersectionWorldRayDirectionOp dialect +// +// @param inst : the instruction to lower +void LowerRayQuery::visitIntersectionWorldRayDirectionOp(rtq::IntersectionWorldRayDirectionOp &inst) { + m_builder->SetInsertPoint(&inst); + visitAccessor(GpurtFunc::WorldRayDirection, inst.getRayQuery(), &inst); +} + +// ===================================================================================================================== +// Lower IntersectionWorldRayOriginOp dialect +// +// @param inst : the instruction to lower +void LowerRayQuery::visitIntersectionWorldRayOriginOp(rtq::IntersectionWorldRayOriginOp &inst) { + m_builder->SetInsertPoint(&inst); + visitAccessor(GpurtFunc::WorldRayOrigin, inst.getRayQuery(), &inst); +} + +// ===================================================================================================================== +// Lower IntersectionObjectToWorldOp dialect +// +// @param inst : the instruction to lower +void LowerRayQuery::visitIntersectionObjectToWorldOp(rtq::IntersectionObjectToWorldOp &inst) { + m_builder->SetInsertPoint(&inst); + visitHitAccessor(GpurtFunc::ObjectToWorld4x3, inst.getRayQuery(), inst.getCommitted(), &inst); +} + +// ===================================================================================================================== +// Lower IntersectionWorldToObjectOp dialect +// +// @param inst : the instruction to lower +void LowerRayQuery::visitIntersectionWorldToObjectOp(rtq::IntersectionWorldToObjectOp &inst) { + m_builder->SetInsertPoint(&inst); + visitHitAccessor(GpurtFunc::WorldToObject4x3, inst.getRayQuery(), inst.getCommitted(), &inst); +} + +// ===================================================================================================================== +// Lower IntersectionTriangleVertexPositionsOp dialect +// +// @param inst : the instruction to lower +void LowerRayQuery::visitIntersectionTriangleVertexPositionsOp(rtq::IntersectionTriangleVertexPositionsOp &inst) { + m_builder->SetInsertPoint(&inst); + Value *rayQuery = getRayQuery(inst.getRayQuery()); + CrossModuleInliner inliner; + auto call = inliner.inlineCall(*m_builder, getGpurtFunc(GpurtFunc::FetchTrianglePositionFromRayQuery), + {rayQuery, m_builder->getInt1(inst.getCommitted())}); + auto triangleData = call.returnValue; + auto floatx3Ty = FixedVectorType::get(m_builder->getFloatTy(), 3); + auto retType = ArrayType::get(floatx3Ty, 3); + // Convert from struct TriangleData to the array of vec3 + Value *vertexPos = PoisonValue::get(retType); + for (unsigned i = 0; i < 3; i++) + vertexPos = m_builder->CreateInsertValue(vertexPos, m_builder->CreateExtractValue(triangleData, {i}), {i}); + inst.replaceAllUsesWith(vertexPos); + m_typeLowering->eraseInstruction(&inst); + m_funcsToLower.insert(inst.getCalledFunction()); +} + +// ===================================================================================================================== +// Lower AllocInst instruction +// +// @param inst : the instruction to lower +VisitorResult LowerRayQuery::visitAlloca(AllocaInst &inst) { + auto types = m_typeLowering->convertType(inst.getAllocatedType()); + if (!types.empty() && types[0] != inst.getAllocatedType()) { + m_builder->SetInsertPoint(&inst); + auto newAllocRtq = m_builder->CreateAlloca(types[0]); + m_typeLowering->replaceInstruction(&inst, newAllocRtq); + } + return VisitorResult::Stop; +} + +// ===================================================================================================================== +// Set RayQuery ObjectID +// +// @param inst : the instruction to lower +// @param rtq : the rayquery object +void LowerRayQuery::setRtqObjId(rtq::InitializeOp &inst, Value *rtq) { + // Only use SetObjId if GPURT has it. + if (Function *setObjIdFunc = getGpurtFunc(GpurtFunc::SetObjId, /*optional=*/true)) { + CrossModuleInliner inliner; + inliner.inlineCall(*m_builder, setObjIdFunc, {rtq, m_rtqAlloc[RtqAlloc::RayQueryId]}); + } + Value *rayQueryObjId = m_builder->CreateLoad(m_builder->getInt32Ty(), m_rtqAlloc[RtqAlloc::RayQueryId]); + m_builder->CreateStore(m_builder->CreateAdd(rayQueryObjId, m_builder->getInt32(1)), m_rtqAlloc[RtqAlloc::RayQueryId]); +} + +// ===================================================================================================================== +// Visit ptrtoint instruction, in case its input is a pointer that we lowered. +void LowerRayQuery::visitPtrToInt(PtrToIntInst &inst) { + auto loweredVals = m_typeLowering->getValueOptional(inst.getOperand(0)); + if (!loweredVals.empty()) + inst.setOperand(0, loweredVals[0]); +} + +// ===================================================================================================================== +// Visit lgc.GepOpaqueOp instruction +// +// @param inst : the instruction to lower + +void LowerRayQuery::visitGepOpaqueOp(rtq::GepOpaqueOp &inst) { + m_builder->SetInsertPoint(&inst); + Type *gepTy = replaceRayQueryType(inst.getBaseType()); + Value *srcElement = m_typeLowering->getValue(inst.getBasePointer())[0]; + Value *newGep = nullptr; + SmallVector indices; + indices.insert(indices.end(), inst.getOffsets().begin(), inst.getOffsets().end()); + if (inst.getInbound()) + newGep = m_builder->CreateInBoundsGEP(gepTy, srcElement, indices); + else + newGep = m_builder->CreateGEP(gepTy, srcElement, indices); + // If the result of the GEP is not a type that we lower (is not and does not + // contain i127), then manually replace uses here. + SmallVector offsets; + for (Value *offset : inst.getOffsets()) + offsets.push_back(offset); + Type *elementTy = GetElementPtrInst::getIndexedType(inst.getBaseType(), offsets); + if (m_typeLowering->convertType(elementTy)[0] == elementTy) + inst.replaceAllUsesWith(newGep); + // Replace with the new GEP. + m_typeLowering->replaceInstruction(&inst, newGep); +} + +// ===================================================================================================================== +// Lower LifetimeIntrinsic instruction +// +// @param inst : the instruction to lower +VisitorResult LowerRayQuery::visitLifetimeIntrinsic(LifetimeIntrinsic &inst) { + Value *arg = inst.getArgOperand(1); + if (m_typeLowering->getValueOptional(arg).size()) + m_typeLowering->eraseInstruction(&inst); + + return VisitorResult::Stop; +} + +// ===================================================================================================================== +// Initialize alloc used later for gpurt functions calling +// +// @param func : the function to create alloc +void LowerRayQuery::initializeAlloc(Function *func) { + assert(m_rtqAlloc.empty()); + Type *funcArgTys[RtqAlloc::Count] = { + m_builder->getInt32Ty(), // RayQueryId + m_builder->getInt32Ty(), // PreviousRayQueryId + m_builder->getInt1Ty(), // bool committed + }; + m_builder->SetInsertPointPastAllocas(func); + for (auto ty : funcArgTys) + m_rtqAlloc.push_back(m_builder->CreateAlloca(ty, func->getParent()->getDataLayout().getAllocaAddrSpace())); + + m_builder->CreateStore(m_builder->getInt32(0), m_rtqAlloc[RtqAlloc::RayQueryId]); + m_builder->CreateStore(m_builder->getInt32(UINT32_MAX), m_rtqAlloc[RtqAlloc::PrevRayQueryId]); +} + +// ===================================================================================================================== +// Visit RayQueryInternal commit/candidate RaySystemData member +// +// @param funcType : the gpurt function to access member +// @param rayQuery : the internal rayquery structure +// @param committed : commit or candidate member +// @param inst : instruction to lower +void LowerRayQuery::visitHitAccessor(GpurtFunc funcType, Value *rayQuery, bool committed, CallBase *inst) { + rayQuery = getRayQuery(rayQuery); + Function *gpurtFunc = getGpurtFunc(funcType); + Value *committedArg = m_builder->getInt1(committed); + + // We need to cope with arg 1 (committed) being either an i1 or a pointer to + // i1. Pointer to i1 happens when GPURT is compiled to SPIR-V by DXC. A more + // correct fix would be to get llpcSpirvProcessGpurt to promote the arg, + // but there are 13 separate GPURT rayQuery functions involved, and building + // knowledge of that into llpcSpirvProcessGpurt would be too fiddly. + if (isa(gpurtFunc->getFunctionType()->getParamType(1))) { + m_builder->CreateStore(committedArg, m_rtqAlloc[RtqAlloc::BoolP]); + committedArg = m_rtqAlloc[RtqAlloc::BoolP]; + } + + CrossModuleInliner inliner; + auto call = inliner.inlineCall(*m_builder, gpurtFunc, {rayQuery, committedArg}); + inst->replaceAllUsesWith(call.returnValue); + m_typeLowering->eraseInstruction(inst); + m_funcsToLower.insert(inst->getCalledFunction()); +} + +// ===================================================================================================================== +// Visit RayQueryInternal member +// +// @param funcType : the gpurt function to access member +// @param rayQuery : the internal rayquery structure +// @param inst : instruction to lower +void LowerRayQuery::visitAccessor(GpurtFunc funcType, Value *rayQuery, CallBase *inst) { + rayQuery = getRayQuery(rayQuery); + CrossModuleInliner inliner; + auto call = inliner.inlineCall(*m_builder, getGpurtFunc(funcType), {rayQuery}); + inst->replaceAllUsesWith(call.returnValue); + m_typeLowering->eraseInstruction(inst); + m_funcsToLower.insert(inst->getCalledFunction()); +} + +// ===================================================================================================================== +// Visit lgc.gpurt.get.static.flags op +void LowerRayQuery::visitGetStaticFlagsOp(GpurtGetStaticFlagsOp &inst) { + inst.replaceAllUsesWith(m_builder->getInt32(m_staticFlags)); +} + +// ===================================================================================================================== +// Visits "lgc.gpurt.stack.read" instructions +// +// @param inst : The instruction +void LowerRayQuery::visitStackReadOp(GpurtStackReadOp &inst) { + auto stage = getLgcRtShaderStage(inst.getFunction()); + if (stage == RayTracingShaderStage::AnyHit || stage == RayTracingShaderStage::Intersection) + inst.setUseExtraStack(true); +} + +// ===================================================================================================================== +// Visits "lgc.gpurt.stack.write" instructions +// +// @param inst : The instruction +void LowerRayQuery::visitStackWriteOp(GpurtStackWriteOp &inst) { + auto stage = getLgcRtShaderStage(inst.getFunction()); + if (stage == RayTracingShaderStage::AnyHit || stage == RayTracingShaderStage::Intersection) + inst.setUseExtraStack(true); +} + +// ===================================================================================================================== +// Visits "lgc.gpurt.stack.init" instructions +// +// @param inst : The instruction +void LowerRayQuery::visitLdsStackInitOp(GpurtLdsStackInitOp &inst) { + auto stage = getLgcRtShaderStage(inst.getFunction()); + if (stage == RayTracingShaderStage::AnyHit || stage == RayTracingShaderStage::Intersection) + inst.setUseExtraStack(true); +} + +// ===================================================================================================================== +// Executes this LowerRayquery pass on the specified LLVM module. +// +// @param [in/out] module : LLVM module to be run on +// @param [in/out] analysisManager : Analysis manager to use for this transformation +PreservedAnalyses LowerRayQuery::run(Module &module, ModuleAnalysisManager &analysisManager) { + llvm_dialects::Builder builderImpl(module.getContext()); + m_builder = &builderImpl; + + Function *gpurtFuncs[unsigned(GpurtFunc::Count)] = {}; + m_gpurtFuncs = gpurtFuncs; + m_gpurtModule = GpurtContext::get(module.getContext()).theModule; + + typedef SmallSetVector FuncSet; + FuncSet rayQueryFuncs; + + static auto findRayqueryDialect = + llvm_dialects::VisitorBuilder() + .setStrategy(llvm_dialects::VisitorStrategy::ByFunctionDeclaration) + .add([](FuncSet &funcSet, auto &inst) { funcSet.insert(inst.getFunction()); }) + .build(); + findRayqueryDialect.visit(rayQueryFuncs, module); + + if (rayQueryFuncs.empty()) + return PreservedAnalyses::all(); + + // Get the ray-query object type from the return type of the GPURT _rayquery_allocate function; we do not + // otherwise use that function. + Function *allocateFunc = getGpurtFunc(GpurtFunc::Allocate); + m_rtqType = allocateFunc->getFunctionType()->getReturnType(); + LoweringVisitorRtqType payload(m_rtqType, this); + m_typeLowering = &payload.typeLower; + + static const auto visitor = llvm_dialects::VisitorBuilder() + .nest([](auto &b) { + b.add(&LowerRayQuery::visitAlloca); + b.add(&LowerRayQuery::visitPtrToInt); + b.add(&LowerRayQuery::visitLifetimeIntrinsic); + b.add(&LowerRayQuery::visitInitializeOp); + b.add(&LowerRayQuery::visitTerminateOp); + b.add(&LowerRayQuery::visitProceedOp); + b.add(&LowerRayQuery::visitIntersectionCommitAabbOp); + b.add(&LowerRayQuery::visitIntersectionCommitTriangleOp); + b.add(&LowerRayQuery::visitIntersectionTypeOp); + b.add(&LowerRayQuery::visitRayTMinOp); + b.add(&LowerRayQuery::visitRayFlagsOp); + b.add(&LowerRayQuery::visitIntersectionTOp); + b.add(&LowerRayQuery::visitIntersectionInstanceIdOp); + b.add(&LowerRayQuery::visitIntersectionInstanceIndexOp); + b.add(&LowerRayQuery::visitIntersectionContributionToHitGroupIndexOp); + b.add(&LowerRayQuery::visitIntersectionGeometryIndexOp); + b.add(&LowerRayQuery::visitIntersectionPrimitiveIndexOp); + b.add(&LowerRayQuery::visitIntersectionBarycentricsOp); + b.add(&LowerRayQuery::visitIntersectionFrontFaceOp); + b.add(&LowerRayQuery::visitIntersectionCandidateAabbOpaqueOp); + b.add(&LowerRayQuery::visitIntersectionObjectRayDirectionOp); + b.add(&LowerRayQuery::visitIntersectionObjectRayOriginOp); + b.add(&LowerRayQuery::visitIntersectionTriangleVertexPositionsOp); + b.add(&LowerRayQuery::visitIntersectionWorldRayDirectionOp); + b.add(&LowerRayQuery::visitIntersectionWorldRayOriginOp); + b.add(&LowerRayQuery::visitIntersectionObjectToWorldOp); + b.add(&LowerRayQuery::visitIntersectionWorldToObjectOp); + b.add(&LowerRayQuery::visitGepOpaqueOp); + }) + .nest(&TypeLowering::registerVisitors) + .build(); + + for (auto func : rayQueryFuncs) { + initializeAlloc(func); + visitor.visit(payload, *func); + m_rtqAlloc.clear(); + } + + payload.typeLower.finishPhis(); + payload.typeLower.finishCleanup(); + + static auto postVisit = llvm_dialects::VisitorBuilder() + .setStrategy(llvm_dialects::VisitorStrategy::ByFunctionDeclaration) + .add(&LowerRayQuery::visitGetStaticFlagsOp) + .add(&LowerRayQuery::visitStackReadOp) + .add(&LowerRayQuery::visitStackWriteOp) + .add(&LowerRayQuery::visitLdsStackInitOp) + .build(); + postVisit.visit(*this, module); + + m_typeLowering = nullptr; + for (Function *func : m_funcsToLower) { + func->dropAllReferences(); + func->eraseFromParent(); + } + return PreservedAnalyses::none(); +} + +// ===================================================================================================================== +// Recursive replace i127 opaque to the rayQueryInternal in the aggregation type +// +// @param ty : The type to replace +Type *LowerRayQuery::replaceRayQueryType(Type *ty) { + if (rtq::isRayQueryType(ty)) + return m_rtqType; + if (ty->isStructTy()) { + SmallVector elemTys; + for (unsigned i = 0; i < ty->getStructNumElements(); ++i) + elemTys.push_back(replaceRayQueryType(ty->getStructElementType(i))); + return StructType::get(m_rtqType->getContext(), elemTys); + } + if (ty->isArrayTy()) + return ArrayType::get(replaceRayQueryType(ty->getArrayElementType()), ty->getArrayNumElements()); + + return ty; +} + +// ===================================================================================================================== +// Recursive find i127 opaque in the aggregation type +// +// @param ty : The type to find +bool LowerRayQuery::hasRtqOpaqueType(Type *ty) { + if (rtq::isRayQueryType(ty)) + return true; + if (ty->isStructTy()) { + bool isMemberRtq = false; + for (unsigned i = 0; i < ty->getStructNumElements(); ++i) { + if ((isMemberRtq = hasRtqOpaqueType(ty->getStructElementType(i)))) + break; + } + return isMemberRtq; + } + if (ty->isArrayTy()) + return hasRtqOpaqueType(ty->getArrayElementType()); + + return false; +} + +// ===================================================================================================================== +// Given a pointer to an i127 rayQuery object, get the pointer to its actual lowered rayQuery object. +Value *LowerRayQuery::getRayQuery(Value *rayQuery) { + llvm::SmallVector loweredVals = m_typeLowering->getValueOptional(rayQuery); + if (!loweredVals.empty()) { + // This is the case that the value is the alloca or a GEP from it. That was + // lowered earlier. + return loweredVals[0]; + } + // This is the case that the value is something that generates an opaque + // pointer (e.g. inttoptr), so we just use the original value. + assert(!isa(rayQuery)); + return rayQuery; +} + +// ===================================================================================================================== +// Get GPURT function given its GpurtFunc::* enum value. The first time a particular function is requested, it +// is lazily found in the GPURT module. +// +// @param gpurtFunc : Enum value for which GPURT function we want +// @param optional : Return nullptr instead of throwing an error if the GPURT function is not found +Function *LowerRayQuery::getGpurtFunc(GpurtFunc gpurtFunc, bool optional) { + if (m_gpurtFuncs[unsigned(gpurtFunc)]) + return m_gpurtFuncs[unsigned(gpurtFunc)]; + StringRef name = GpurtFuncNames[unsigned(gpurtFunc)]; + m_gpurtFuncs[unsigned(gpurtFunc)] = m_gpurtModule->getFunction(name); + if (!m_gpurtFuncs[unsigned(gpurtFunc)]) { + if (!optional) + report_fatal_error(Twine("GPURT function '") + name + "' not found"); + return nullptr; + } + return m_gpurtFuncs[unsigned(gpurtFunc)]; +} diff --git a/llvmraytracing/lib/LowerRaytracingPipeline.cpp b/llvmraytracing/lib/LowerRaytracingPipeline.cpp index 7d662c5876..655f558fc4 100644 --- a/llvmraytracing/lib/LowerRaytracingPipeline.cpp +++ b/llvmraytracing/lib/LowerRaytracingPipeline.cpp @@ -10,8 +10,8 @@ * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * - * The above copyright notice and this permission notice shall be included in - *all copies or substantial portions of the Software. + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, @@ -39,15 +39,16 @@ //===----------------------------------------------------------------------===// #include "compilerutils/CompilerUtils.h" +#include "llpc/GpurtEnums.h" +#include "llvmraytracing/Continuations.h" +#include "llvmraytracing/ContinuationsUtil.h" +#include "llvmraytracing/GpurtContext.h" +#include "llvmraytracing/PayloadAccessQualifiers.h" #include "lgc/LgcCpsDialect.h" #include "lgc/LgcIlCpsDialect.h" #include "lgc/LgcRtDialect.h" #include "llvm-dialects/Dialect/OpSet.h" #include "llvm-dialects/Dialect/Visitor.h" -#include "llvmraytracing/Continuations.h" -#include "llvmraytracing/ContinuationsUtil.h" -#include "llvmraytracing/GpurtContext.h" -#include "llvmraytracing/PayloadAccessQualifiers.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" @@ -82,16 +83,14 @@ using namespace lgc::rt; namespace { // Create a GEP if I is non-null, otherwise return the pointer. -static Value *SimplifyingCreateConstGEP1_32(IRBuilder<> &B, Type *Ty, - Value *Ptr, uint32_t I) { +static Value *SimplifyingCreateConstGEP1_32(IRBuilder<> &B, Type *Ty, Value *Ptr, uint32_t I) { // A GEP with a single zero index is redundant with opaque pointers if (I == 0) return Ptr; return B.CreateConstGEP1_32(Ty, Ptr, I); } -static Value *SimplifyingCreateConstInBoundsGEP1_32(IRBuilder<> &B, Type *Ty, - Value *Ptr, uint32_t I) { +static Value *SimplifyingCreateConstInBoundsGEP1_32(IRBuilder<> &B, Type *Ty, Value *Ptr, uint32_t I) { // A GEP with a single zero index is redundant with opaque pointers if (I == 0) return Ptr; @@ -153,24 +152,20 @@ struct PayloadCopyHelper { // storage memory) void copyField(Type *FieldTy, const PAQIndexIntervals &Intervals) { // Pointer to the node field in the local payload - auto *LocalFieldPtr = - B.CreateInBoundsGEP(&PayloadTy, LocalPayload, PayloadIdxList); + auto *LocalFieldPtr = B.CreateInBoundsGEP(&PayloadTy, LocalPayload, PayloadIdxList); // Counts how many bytes have already been copied unsigned FieldByteOffset = 0; - unsigned FieldNumBytes = - M.getDataLayout().getTypeStoreSize(FieldTy).getFixedValue(); + unsigned FieldNumBytes = M.getDataLayout().getTypeStoreSize(FieldTy).getFixedValue(); for (auto [IntervalIdx, CompleteInterval] : enumerate(Intervals)) { - copyFieldInterval(LocalFieldPtr, &FieldByteOffset, FieldNumBytes, - CompleteInterval); + copyFieldInterval(LocalFieldPtr, &FieldByteOffset, FieldNumBytes, CompleteInterval); } assert(FieldByteOffset == FieldNumBytes && "Inconsistent storage size!"); } - void copyFieldInterval(Value *LocalFieldPtr, unsigned *FieldByteOffset, - unsigned FieldNumBytes, + void copyFieldInterval(Value *LocalFieldPtr, unsigned *FieldByteOffset, unsigned FieldNumBytes, const PAQIndexInterval CompleteInterval) { auto *I32 = Type::getInt32Ty(M.getContext()); // Split interval into registers and memory part. @@ -178,21 +173,16 @@ struct PayloadCopyHelper { SmallVector, 2> TmpIntervals; if (CompleteInterval.Begin < PayloadRegisterCount) { - PAQIndexInterval Interval = { - CompleteInterval.Begin, - std::min(CompleteInterval.End, PayloadRegisterCount)}; + PAQIndexInterval Interval = {CompleteInterval.Begin, std::min(CompleteInterval.End, PayloadRegisterCount)}; // Pointer to start of current interval in global payload - auto *GlobalIntervalI32Ptr = SimplifyingCreateConstInBoundsGEP1_32( - B, I32, Serialization, Interval.Begin); + auto *GlobalIntervalI32Ptr = SimplifyingCreateConstInBoundsGEP1_32(B, I32, Serialization, Interval.Begin); TmpIntervals.push_back({Interval, GlobalIntervalI32Ptr}); } if (CompleteInterval.End > PayloadRegisterCount) { - PAQIndexInterval Interval = { - std::max(CompleteInterval.Begin, PayloadRegisterCount), - CompleteInterval.End}; + PAQIndexInterval Interval = {std::max(CompleteInterval.Begin, PayloadRegisterCount), CompleteInterval.End}; // Pointer to start of current interval in global payload - auto *GlobalIntervalI32Ptr = SimplifyingCreateConstInBoundsGEP1_32( - B, I32, SpilledPayloadPtr, Interval.Begin - PayloadRegisterCount); + auto *GlobalIntervalI32Ptr = + SimplifyingCreateConstInBoundsGEP1_32(B, I32, SpilledPayloadPtr, Interval.Begin - PayloadRegisterCount); TmpIntervals.push_back({Interval, GlobalIntervalI32Ptr}); } @@ -203,8 +193,7 @@ struct PayloadCopyHelper { unsigned FieldI32Offset = *FieldByteOffset / RegisterBytes; assert(*FieldByteOffset == FieldI32Offset * RegisterBytes); // I32 pointer into field, offset by FieldI32Offset - auto *FieldIntervalI32Ptr = SimplifyingCreateConstInBoundsGEP1_32( - B, I32, LocalFieldPtr, FieldI32Offset); + auto *FieldIntervalI32Ptr = SimplifyingCreateConstInBoundsGEP1_32(B, I32, LocalFieldPtr, FieldI32Offset); // Determine Src and Dst auto *Src = FieldIntervalI32Ptr; @@ -237,25 +226,17 @@ class ModuleMetadataState final { ModuleMetadataState(const ModuleMetadataState &) = delete; ModuleMetadataState(ModuleMetadataState &&) = default; - uint32_t getMaxPayloadRegisterCount() const { - return MaxPayloadRegisterCount; - } + uint32_t getMaxPayloadRegisterCount() const { return MaxPayloadRegisterCount; } - std::optional tryGetPreservedPayloadRegisterCount() const { - return PreservedPayloadRegisterCount; - } + std::optional tryGetPreservedPayloadRegisterCount() const { return PreservedPayloadRegisterCount; } void updateMaxUsedPayloadRegisterCount(uint32_t Count) { MaxUsedPayloadRegisterCount = std::max(Count, MaxUsedPayloadRegisterCount); } - uint32_t getMaxUsedPayloadRegisterCount() const { - return MaxUsedPayloadRegisterCount; - } + uint32_t getMaxUsedPayloadRegisterCount() const { return MaxUsedPayloadRegisterCount; } - uint32_t getMaxHitAttributeByteCount() const { - return MaxHitAttributeByteCount; - } + uint32_t getMaxHitAttributeByteCount() const { return MaxHitAttributeByteCount; } bool isInLgcCpsMode() const { return IsInLgcCpsMode; } @@ -331,8 +312,13 @@ class LowerRaytracingPipelinePassImpl final { /// The payload storage and its type belongs to this function. Value *PayloadStorage = nullptr; Type *PayloadStorageTy = nullptr; - /// The starting dword of payload storage argument - uint32_t FirstPayloadArgumentDword = 0; + /// The starting dword of payload storage argument. If there is no payload + /// argument, this is std::nullopt. + std::optional FirstPayloadArgumentDword = std::nullopt; + // For shaders that pass through a payload (e. g. intersection and + // traversal), use this value to indicate the number of passed-through + // payload dwords. + std::optional NumPassedThroughPayloadDwords; }; /// Needed data for handling the end of a function @@ -347,18 +333,16 @@ class LowerRaytracingPipelinePassImpl final { Type *NewRetTy = nullptr; }; - // Simplify some code used to handle padding and payload computation and - // related things. + // Simplify some code used to compute and append padding and payload on + // function signatures and continue / jump calls. class PayloadHelper final { public: - PayloadHelper(Module &Mod, const DataLayout &DL, - llvm_dialects::Builder &Builder, bool CpsMode) + PayloadHelper(Module &Mod, const DataLayout &DL, llvm_dialects::Builder &Builder, bool CpsMode) : Mod{Mod}, DL{DL}, Builder{Builder}, IsCpsMode{CpsMode} {} /// Append padding and payload to lgc.cps.jump calls. - void patchJumpCalls(Function *Parent, ArrayRef JumpCalls, - uint32_t PayloadStartDword) { - if (!IsCpsMode) + void patchJumpCalls(Function *Parent, ArrayRef JumpCalls, std::optional PayloadStartDword) { + if (!IsCpsMode || !PayloadStartDword.has_value()) return; for (auto *Jump : JumpCalls) { @@ -366,87 +350,138 @@ class LowerRaytracingPipelinePassImpl final { SmallVector NewTailArgs(Jump->getTail()); // Add padding so that payload starts at a fixed dword. - ContHelper::addPaddingValue(DL, Parent->getContext(), NewTailArgs, - PayloadStartDword); - + ContHelper::addPaddingValue(DL, Parent->getContext(), NewTailArgs, PayloadStartDword.value()); // Insert payload into tail args. NewTailArgs.push_back(Parent->getArg(CpsArgIdxPayload)); - Builder.create(Jump->getTarget(), Jump->getLevels(), - Jump->getState(), NewTailArgs); - Jump->dropAllReferences(); - Jump->eraseFromParent(); + Jump->replaceTail(NewTailArgs); } } - /// Create and initialize payload serialization storage from the incoming - /// payload argument. - void initializePayloadSerializationStorage(Function *Parent, - FunctionData &Data) { + /// Find a continue call starting from the terminator of a given basic + /// block. + /// Returns a pair containing a pointer to the call, and the iterator range + /// containing the tail argument list used, for computing the padding at the + /// callsite. + std::pair> + getContinueCallFromTerminator(Instruction *Terminator) { + assert((isa(Terminator))); + auto RIt = Terminator->getReverseIterator(); + + // We technically could have an eligible terminator + // as the single instruction of a BB, so we don't want to assert here. + BasicBlock *BB = Terminator->getParent(); + + // Find a continue call starting from the unreachable. + // Don't single-step because at this point the caller + // has created the payload load before the terminator, + // and re-creating the continue call will fix up the order again. + CallInst *CInst = nullptr; + while (RIt != BB->rend()) { + CInst = dyn_cast(&*RIt); + + if (CInst) + break; + + ++RIt; + } + + assert(CInst); + + if (auto *Continue = dyn_cast(CInst)) + return {Continue, Continue->getTail()}; + + if (auto *WaitContinue = dyn_cast(CInst)) + return {WaitContinue, WaitContinue->getTail()}; + + report_fatal_error("LowerRaytracingPipelinePassImpl::PayloadHelper::" + "getContinueCallFromTerminator: expected either a " + "lgc.ilcps.continue or a lgc.ilcps.waitContinue op!"); + } + + /// Create and initialize payload serialization storage for non-Traversal + /// shader. + void initializePayloadSerializationStorage(Function *Parent, FunctionData &Data) { llvm_dialects::Builder::InsertPointGuard Guard{Builder}; Builder.SetInsertPointPastAllocas(Parent); Data.PayloadStorage = Builder.CreateAlloca(Data.PayloadStorageTy); Data.PayloadStorage->setName("payload.serialization.alloca"); // TODO: We shouldn't need to create the alloca for RGS. - if (Data.Kind != RayTracingShaderStage::RayGeneration) - Builder.CreateStore(Parent->getArg(Parent->arg_size() - 1), - Data.PayloadStorage); + if (Data.Kind != RayTracingShaderStage::RayGeneration && Data.FirstPayloadArgumentDword.has_value()) + Builder.CreateStore(Parent->getArg(Parent->arg_size() - 1), Data.PayloadStorage); } - Type *getPayloadStorageTy(uint32_t MaxPayloadRegisterCount, - FunctionData &Data) { - uint32_t PayloadStorageI32s = - std::max(MaxPayloadRegisterCount, Data.MaxOutgoingPayloadI32s); - if (Data.IncomingPayloadSerializationInfo) - PayloadStorageI32s = - std::max(PayloadStorageI32s, - Data.IncomingPayloadSerializationInfo->MaxStorageI32s); - + Type *getPayloadStorageTy(uint32_t MaxPayloadRegisterCount, FunctionData &Data) { + uint32_t PayloadStorageI32s = 0; + if (Data.NumPassedThroughPayloadDwords.has_value()) { + PayloadStorageI32s = Data.NumPassedThroughPayloadDwords.value(); + } else { + // Take (for RGS) the maximum outgoing payload, otherwise take the max + // with the serialized incoming payload info. + PayloadStorageI32s = Data.MaxOutgoingPayloadI32s; + if (Data.IncomingPayloadSerializationInfo) + PayloadStorageI32s = std::max(PayloadStorageI32s, Data.IncomingPayloadSerializationInfo->MaxStorageI32s); + } return ArrayType::get(Builder.getInt32Ty(), PayloadStorageI32s); } - // Compute the dword at which payload starts in the argument at most in the - // argument list. Only valid for lgc.cps mode since we only compute padding - // there. - uint32_t getPayloadStartDword(FunctionData &Data, - uint32_t MaxHitAttributeBytes, - Type *TraversalDataTy) { + /// Compute the dword at which payload starts in the argument at most in the + /// argument list. + std::optional getPayloadStartDword(FunctionData &Data, uint32_t MaxHitAttributeBytes, + Type *TraversalDataTy) { + if (Data.PayloadStorageTy->getArrayNumElements() == 0) + return std::nullopt; + assert(TraversalDataTy && "Failed to detect traversal system data type"); - assert(IsCpsMode); // For lgc.cps mode, take into account that the return address and shader // index dwords are inserted at a later stage. - // For non-lgc.cps mode, we do not use padding yet. - return 1 + 1 + getArgumentDwordCount(DL, TraversalDataTy) + - std::max(divideCeil(MaxHitAttributeBytes, RegisterBytes), - uint64_t(2)); + // Always ensure that we consider the two dword barycentric coordinates + // passed as argument for _AmdEnqueueAnyHit calls. + return (IsCpsMode ? 1 + 1 : 0) + getArgumentDwordCount(DL, TraversalDataTy) + +#if LLVM_MAIN_REVISION && LLVM_MAIN_REVISION < 503627 + // Old version of the code + std::max(divideCeil(MaxHitAttributeBytes, RegisterBytes), uint64_t(2)); +#else + // New version of the code (also handles unknown version, which we + // treat as latest) + std::max(divideCeil(MaxHitAttributeBytes, RegisterBytes), 2u); +#endif } /// Compute padding and payload arguments based on the passed arguments and /// append them to ArgTys. /// Returns a pair (paddingType, payloadType). - std::pair - computePaddingAndPayloadArgTys(SmallVectorImpl &ArgTys, - uint32_t PayloadSizeDwords, - uint32_t PayloadStartDword) { - // Compute padding type so that payload starts at a fixed dword. - Type *PaddingTy = ContHelper::getPaddingType(DL, Mod.getContext(), ArgTys, - PayloadStartDword); - Type *PayloadTy = ArrayType::get(Builder.getInt32Ty(), PayloadSizeDwords); + std::pair computePaddingAndPayloadArgTys(SmallVectorImpl &ArgTys, + uint32_t PayloadSizeDwords, + std::optional PayloadStartDword, + uint32_t Offset = 0) { + Type *PaddingTy = nullptr; + const uint32_t ShiftedStartDword = PayloadStartDword.has_value() ? PayloadStartDword.value() - Offset : 0; #ifndef NDEBUG - LLVM_DEBUG( - dbgs() << "Computing padding and payload based on following data:\n" - << "Payload size: " << PayloadSizeDwords << " dwords\n" - << "Payload start dword: " << PayloadStartDword - << "\nArgument types:\n"); + LLVM_DEBUG(dbgs() << "Computing padding and payload based on following data:\n" + << "Payload size: " << PayloadSizeDwords << " dwords\n" + << "Payload start dword: " << ShiftedStartDword << "\nArgument types:\n"); for (Type *Ty : ArgTys) - LLVM_DEBUG(dbgs() << *Ty << ": " - << lgc::cps::getArgumentDwordCount(DL, Ty) - << " dwords\n"); + LLVM_DEBUG(dbgs() << *Ty << ": " << lgc::cps::getArgumentDwordCount(DL, Ty) << " dwords\n"); +#endif + + // Compute padding type so that payload starts at a fixed dword. + // If PayloadStartDword is set to std::nullopt, then we don't pass + // payload, thus we don't need padding. + if (PayloadStartDword.has_value()) { + PaddingTy = ContHelper::getPaddingType(DL, Mod.getContext(), ArgTys, ShiftedStartDword); + } else { + assert(PayloadSizeDwords == 0 && "PayloadHelper::computePaddingAndPayloadArgTys: Expected zero " + "payload dwords!"); + PaddingTy = ArrayType::get(Builder.getInt32Ty(), 0); + } + + Type *PayloadTy = ArrayType::get(Builder.getInt32Ty(), PayloadSizeDwords); - LLVM_DEBUG(dbgs() << "Resulting padding type: " << *PaddingTy - << "\nResulting payload type: " << *PayloadTy +#ifndef NDEBUG + LLVM_DEBUG(dbgs() << "Resulting padding type: " << *PaddingTy << "\nResulting payload type: " << *PayloadTy << "\n---\n"); #endif @@ -456,6 +491,24 @@ class LowerRaytracingPipelinePassImpl final { return {PaddingTy, PayloadTy}; } + /// Append the actual padding and payload arguments to a jump or continue + /// call. Uses PaddingArgs to compute the padding, loads the payload from + /// the PayloadSerializationStorage and appends both to the OutArgList. + void appendPaddingAndPayloadValues(SmallVectorImpl &PaddingArgs, SmallVectorImpl &OutArgList, + uint32_t OutgoingPayloadRegisterCount, std::optional PayloadStartDword, + Value *PayloadSerializationStorage) { + + if (!PayloadStartDword.has_value()) + return; + + ContHelper::addPaddingValue(DL, Mod.getContext(), PaddingArgs, PayloadStartDword.value()); + + OutArgList.push_back(PaddingArgs.back()); + + OutArgList.push_back(Builder.CreateLoad(ArrayType::get(Builder.getInt32Ty(), OutgoingPayloadRegisterCount), + PayloadSerializationStorage)); + } + private: Module &Mod; const DataLayout &DL; @@ -463,19 +516,16 @@ class LowerRaytracingPipelinePassImpl final { bool IsCpsMode = false; }; - void replaceCall(FunctionData &Data, CallInst *Call, Function *Func, - ContinuationCallType CallType); + void replaceCall(FunctionData &Data, CallInst *Call, Function *Func, ContinuationCallType CallType); void handleRestoreSystemData(CallInst *Call); void handleExitRayGen(const FunctionData &Data); - void replaceContinuationCall(ContinuationCallType CallType, CallInst *Call, - const FunctionData &Data, Value *PayloadOrAttrs, - Type *PayloadOrAttrsTy); + void replaceContinuationCall(ContinuationCallType CallType, CallInst *Call, const FunctionData &Data, + Value *PayloadOrAttrs, Type *PayloadOrAttrsTy); void replaceReportHitCall(FunctionData &Data, CallInst *Call); void replaceShaderIndexCall(FunctionData &Data, CallInst *Call); void replaceShaderRecordBufferCall(FunctionData &Data, CallInst *Call); - void handleGetFuncAddr(Function &Func); void handleGetShaderKind(Function &Func); void handleGetCurrentFuncAddr(Function &Func); @@ -500,10 +550,8 @@ class LowerRaytracingPipelinePassImpl final { // are copied. Used for CallShader accesses which are not PAQ qualified and do // not have PAQShaderStage values. If CopiedNodes is set, nodes contained will // not be copied, and all copied nodes are added to it. - void copyPayload(Type &PayloadTy, Value *LocalPayload, Value *PayloadStorage, - std::optional Stage, - PAQAccessKind GlobalAccessKind, - const PAQSerializationLayout &Layout, + void copyPayload(Type &PayloadTy, Value *LocalPayload, Value *PayloadStorage, std::optional Stage, + PAQAccessKind GlobalAccessKind, const PAQSerializationLayout &Layout, SmallDenseSet *CopiedNodes = nullptr); // Special handling for case of copying the result payload of a traceray call @@ -512,50 +560,41 @@ class LowerRaytracingPipelinePassImpl final { // We first perform a copy using the ClosestHitOut layout, and then perform an // additional copy using the MissOut layout, skipping any fields already // copied (i.e. only copying write(miss) : read(caller) fields). - void copyTraceRayPayloadIncomingToCaller( - const PAQTraceRaySerializationInfo &PAQSerializationInfo, - Value *LocalPayload, Value *PayloadStorage); + void copyTraceRayPayloadIncomingToCaller(const PAQTraceRaySerializationInfo &PAQSerializationInfo, + Value *LocalPayload, Value *PayloadStorage); // Caller-save payload registers before CallShader() or TraceRay(), // which can override payload registers. A register needs to be saved // if it is live in OutgoingLayout, and not written in OutgoingLayout. // This includes the payload memory pointer if present. // SavedRegisters maps indices of payload registers to their saved values. - void savePayloadRegistersBeforeRecursion( - Value *PayloadStorage, RayTracingShaderStage Kind, - const PAQSerializationLayout &IncomingLayout, - const PAQSerializationLayout &OutgoingLayout, - SmallVectorImpl &SavedRegisterValues); + void savePayloadRegistersBeforeRecursion(Value *PayloadStorage, RayTracingShaderStage Kind, + const PAQSerializationLayout &IncomingLayout, + const PAQSerializationLayout &OutgoingLayout, + SmallVectorImpl &SavedRegisterValues); // Restore previously saved registers. - void restorePayloadRegistersAfterRecursion( - Value *PayloadStorage, - const SmallVectorImpl &SavedRegisterValues); - - void createPayloadGlobal(); + void restorePayloadRegistersAfterRecursion(Value *PayloadStorage, + const SmallVectorImpl &SavedRegisterValues); // Sets register count metadata (incoming on entry functions, outgoing on // continue calls) in GpuRt entries (Traversal and launch kernel). void setGpurtEntryRegisterCountMetadata(); - void copyHitAttributes(FunctionData &Data, Value *SystemData, - Type *SystemDataTy, Value *LocalHitAttributes, - bool GlobalToLocal, - const PAQSerializationLayout *Layout); + void copyHitAttributes(FunctionData &Data, Value *SystemData, Type *SystemDataTy, Value *LocalHitAttributes, + bool GlobalToLocal, const PAQSerializationLayout *Layout); void processContinuations(); void processFunctionEntry(FunctionData &Data, Argument *SystemDataArgument); void processFunctionEnd(FunctionData &Data, FunctionEndData &EData); void processFunction(Function *F, FunctionData &FuncData); void handleContPayloadRegisterI32Count(Function &F); - void handleContPayloadRegistersGetI32(Function &F); - void handleContPayloadRegistersSetI32(Function &F); + void handleContPayloadRegistersGetI32(Function &F, Function &Parent, FunctionData &Data); + void handleContPayloadRegistersSetI32(Function &F, Function &Parent, FunctionData &Data); void collectProcessableFunctions(); - Instruction *insertCpsAwait(Type *ReturnTy, Value *ShaderAddr, - Instruction *Call, ArrayRef Args, - ContinuationCallType CallType, - RayTracingShaderStage ShaderStage); + Instruction *insertCpsAwait(Type *ReturnTy, Value *ShaderAddr, Instruction *Call, ArrayRef Args, + ContinuationCallType CallType, RayTracingShaderStage ShaderStage); MapVector ToProcess; Module *Mod; @@ -576,8 +615,6 @@ class LowerRaytracingPipelinePassImpl final { /// Dispatch system data type passed to RayGen and others Type *DispatchSystemDataTy; - GlobalVariable *PayloadStorageGlobal; - // Function definitions and declarations from HLSL // Driver implementation that returns if AcceptHitAndEndSearch was called Function *IsEndSearch; @@ -604,33 +641,25 @@ constexpr unsigned ModuleMetadataState::DefaultPayloadRegisterCount; ModuleMetadataState::ModuleMetadataState(Module &Module) : Mod{Module} { // Import PayloadRegisterCount from metadata if set, // otherwise from default - auto RegisterCountFromMD = ContHelper::tryGetMaxPayloadRegisterCount(Module); - MaxPayloadRegisterCount = - RegisterCountFromMD.value_or(DefaultPayloadRegisterCount); + auto RegisterCountFromMD = ContHelper::MaxPayloadRegisterCount::tryGetValue(&Module); + MaxPayloadRegisterCount = RegisterCountFromMD.value_or(DefaultPayloadRegisterCount); // Check that if there is a required minimum number of payload registers, // it is compatible - PreservedPayloadRegisterCount = - ContHelper::tryGetPreservedPayloadRegisterCount(Module); - assert(PreservedPayloadRegisterCount.value_or(MaxPayloadRegisterCount) <= - MaxPayloadRegisterCount); + PreservedPayloadRegisterCount = ContHelper::PreservedPayloadRegisterCount::tryGetValue(&Module); + assert(PreservedPayloadRegisterCount.value_or(MaxPayloadRegisterCount) <= MaxPayloadRegisterCount); - MaxUsedPayloadRegisterCount = - ContHelper::tryGetMaxUsedPayloadRegisterCount(Module).value_or(0); + MaxUsedPayloadRegisterCount = ContHelper::MaxUsedPayloadRegisterCount::tryGetValue(&Module).value_or(0); if (PreservedPayloadRegisterCount.has_value()) - MaxUsedPayloadRegisterCount = std::max( - MaxUsedPayloadRegisterCount, PreservedPayloadRegisterCount.value()); + MaxUsedPayloadRegisterCount = std::max(MaxUsedPayloadRegisterCount, PreservedPayloadRegisterCount.value()); // Use max hit attribute size from metadata, or use globally max allowed // value for the max if metadata is not set - MaxHitAttributeByteCount = - getMaxHitAttributeSize(&Mod).value_or(GlobalMaxHitAttributeBytes); + MaxHitAttributeByteCount = getMaxHitAttributeSize(&Mod).value_or(GlobalMaxHitAttributeBytes); if (MaxHitAttributeByteCount % RegisterBytes != 0) { - auto AlignedMaxHitAttributeSize = - alignTo(MaxHitAttributeByteCount, RegisterBytes); - LLVM_DEBUG(dbgs() << "Aligning misaligned max hit attribute size " - << MaxHitAttributeByteCount << " to " + auto AlignedMaxHitAttributeSize = alignTo(MaxHitAttributeByteCount, RegisterBytes); + LLVM_DEBUG(dbgs() << "Aligning misaligned max hit attribute size " << MaxHitAttributeByteCount << " to " << AlignedMaxHitAttributeSize << "\n"); MaxHitAttributeByteCount = AlignedMaxHitAttributeSize; } @@ -645,16 +674,15 @@ ModuleMetadataState::ModuleMetadataState(Module &Module) : Mod{Module} { /// Write the previously derived information about max payload registers and /// stack address space that was derived by metadata as global state. void ModuleMetadataState::updateModuleMetadata() const { - ContHelper::setMaxPayloadRegisterCount(Mod, MaxPayloadRegisterCount); - ContHelper::setMaxUsedPayloadRegisterCount(Mod, MaxUsedPayloadRegisterCount); + ContHelper::MaxPayloadRegisterCount::setValue(&Mod, MaxPayloadRegisterCount); + ContHelper::MaxUsedPayloadRegisterCount::setValue(&Mod, MaxUsedPayloadRegisterCount); ContHelper::setStackAddrspace(Mod, StackAddrspace); } // Create a lgc.cps.await operation for a given shader address. -Instruction *LowerRaytracingPipelinePassImpl::insertCpsAwait( - Type *ReturnTy, Value *ShaderAddr, Instruction *Call, - ArrayRef Args, ContinuationCallType CallType, - RayTracingShaderStage ShaderStage) { +Instruction *LowerRaytracingPipelinePassImpl::insertCpsAwait(Type *ReturnTy, Value *ShaderAddr, Instruction *Call, + ArrayRef Args, ContinuationCallType CallType, + RayTracingShaderStage ShaderStage) { Builder.SetInsertPoint(Call); Value *CR = nullptr; @@ -671,13 +699,10 @@ Instruction *LowerRaytracingPipelinePassImpl::insertCpsAwait( else if (CallType == ContinuationCallType::AnyHit) CallStage = RayTracingShaderStage::AnyHit; - assert(CallStage != RayTracingShaderStage::Count && - "LowerRaytracingPipelinePassImpl::insertCpsAwait: Invalid " - "call stage before inserting lgc.cps.await operation!"); + assert(CallStage != RayTracingShaderStage::Count && "LowerRaytracingPipelinePassImpl::insertCpsAwait: Invalid " + "call stage before inserting lgc.cps.await operation!"); - return Builder.create( - ReturnTy, CR, - 1 << static_cast(getCpsLevelForShaderStage(CallStage)), Args); + return Builder.create(ReturnTy, CR, 1 << static_cast(getCpsLevelForShaderStage(CallStage)), Args); } Function *llvm::getSetLocalRootIndex(Module &M) { @@ -688,30 +713,26 @@ Function *llvm::getSetLocalRootIndex(Module &M) { auto *Void = Type::getVoidTy(C); auto *I32 = Type::getInt32Ty(C); auto *FuncTy = FunctionType::get(Void, {I32}, false); - AttributeList AL = AttributeList::get( - C, AttributeList::FunctionIndex, - {Attribute::NoFree, Attribute::NoUnwind, Attribute::WillReturn}); + AttributeList AL = AttributeList::get(C, AttributeList::FunctionIndex, + {Attribute::NoFree, Attribute::NoUnwind, Attribute::WillReturn}); return cast(M.getOrInsertFunction(Name, FuncTy, AL).getCallee()); } // Set maximum continuation stack size metadata static void setStacksizeMetadata(Function &F, uint64_t NeededStackSize) { - uint64_t CurStackSize = ContHelper::tryGetStackSize(&F).value_or(0); + uint64_t CurStackSize = ContHelper::StackSize::tryGetValue(&F).value_or(0); if (NeededStackSize > CurStackSize) - ContHelper::setStackSize(&F, NeededStackSize); + ContHelper::StackSize::setValue(&F, NeededStackSize); } // Create an ExtractElement instruction for each index of a FixedVector @Vector // and return it. -static SmallVector flattenVectorArgument(IRBuilder<> &B, - Value *Vector) { +static SmallVector flattenVectorArgument(IRBuilder<> &B, Value *Vector) { assert(isa(Vector->getType()) && "Not a FixedVectorType!"); SmallVector Arguments; - for (unsigned Idx = 0; - Idx < cast(Vector->getType())->getNumElements(); - ++Idx) { + for (unsigned Idx = 0; Idx < cast(Vector->getType())->getNumElements(); ++Idx) { Arguments.push_back(B.CreateExtractElement(Vector, B.getInt32(Idx))); } @@ -721,8 +742,7 @@ static SmallVector flattenVectorArgument(IRBuilder<> &B, // Check if @Arg is of fixed vector type. If yes, flatten it into extractelement // instructions and append them to @Arguments. Return true if @Arguments // changed, false otherwise. -static bool flattenVectorArgument(IRBuilder<> &B, Value *Arg, - SmallVectorImpl &Arguments) { +static bool flattenVectorArgument(IRBuilder<> &B, Value *Arg, SmallVectorImpl &Arguments) { if (isa(Arg->getType())) { const auto &FlattenedArguments = flattenVectorArgument(B, Arg); if (!FlattenedArguments.empty()) { @@ -736,18 +756,15 @@ static bool flattenVectorArgument(IRBuilder<> &B, Value *Arg, } /// Clone a function and replace a call with a call to the cloned function -void LowerRaytracingPipelinePassImpl::replaceCall( - FunctionData &Data, CallInst *Call, Function *Func, - ContinuationCallType CallType) { +void LowerRaytracingPipelinePassImpl::replaceCall(FunctionData &Data, CallInst *Call, Function *Func, + ContinuationCallType CallType) { Builder.SetInsertPoint(Call); auto *AfterCall = &*++Builder.GetInsertPoint(); auto *SystemDataTy = getFuncArgPtrElementType(Func, 0); Value *PayloadOrAttrs = nullptr; SmallVector Arguments; - Arguments.push_back(getDXILSystemData(Builder, Data.SystemData, - Data.SystemDataTy, - cast(SystemDataTy))); + Arguments.push_back(getDXILSystemData(Builder, Data.SystemData, Data.SystemDataTy, cast(SystemDataTy))); // Construct the new argument list for the driver-side call from a lgc.rt // dialect op. This requires some special handling since we cannot pass all @@ -802,8 +819,7 @@ void LowerRaytracingPipelinePassImpl::replaceCall( if (!Callee) continue; auto FuncName = Callee->getName(); - if (FuncName.starts_with("_AmdAwait") || - FuncName.starts_with("_AmdWaitAwait")) { + if (FuncName.starts_with("_AmdAwait") || FuncName.starts_with("_AmdWaitAwait")) { AwaitCalls.push_back(CI); } else if (FuncName.starts_with("_AmdAcceptHitAttributes")) { AcceptHitAttrsCalls.push_back(CI); @@ -814,21 +830,18 @@ void LowerRaytracingPipelinePassImpl::replaceCall( for (auto *CI : AwaitCalls) { Builder.SetInsertPoint(CI); - replaceContinuationCall(CallType, CI, Data, PayloadOrAttrs, - PayloadOrAttrsTy); + replaceContinuationCall(CallType, CI, Data, PayloadOrAttrs, PayloadOrAttrsTy); } for (auto *CI : AcceptHitAttrsCalls) { // Commit hit attributes Builder.SetInsertPoint(CI); assert(TraversalDataTy != 0 && "Missing traversal system data!"); - copyHitAttributes(Data, CI->getArgOperand(0), TraversalDataTy, - PayloadOrAttrs, false, nullptr); + copyHitAttributes(Data, CI->getArgOperand(0), TraversalDataTy, PayloadOrAttrs, false, nullptr); // Make sure that we store the hit attributes into the correct system // data (just in case dxc copied them around). - assert(CI->getArgOperand(0) == Arguments[0] && - "AcceptHitAttributes does not take the correct system data as " - "argument!"); + assert(CI->getArgOperand(0) == Arguments[0] && "AcceptHitAttributes does not take the correct system data as " + "argument!"); CI->eraseFromParent(); } Builder.SetInsertPoint(AfterCall); @@ -836,33 +849,26 @@ void LowerRaytracingPipelinePassImpl::replaceCall( void LowerRaytracingPipelinePassImpl::handleRestoreSystemData(CallInst *Call) { // Store system data - auto *SystemDataTy = - cast(getFuncArgPtrElementType(Call->getCalledFunction(), 0)); + auto *SystemDataTy = cast(getFuncArgPtrElementType(Call->getCalledFunction(), 0)); auto *SystemData = Call->getArgOperand(0); // Set local root signature on re-entry - auto *LocalIndexSystemDataTy = - cast(getFuncArgPtrElementType(GetLocalRootIndex, 0)); - auto *LocalIndexSystemData = getDXILSystemData( - Builder, SystemData, SystemDataTy, LocalIndexSystemDataTy); + auto *LocalIndexSystemDataTy = cast(getFuncArgPtrElementType(GetLocalRootIndex, 0)); + auto *LocalIndexSystemData = getDXILSystemData(Builder, SystemData, SystemDataTy, LocalIndexSystemDataTy); auto Stage = getLgcRtShaderStage(Call->getFunction()); Value *LocalIndex = nullptr; if (Stage == RayTracingShaderStage::RayGeneration) LocalIndex = Builder.getInt32(0); else - LocalIndex = - CrossInliner - .inlineCall(Builder, GetLocalRootIndex, LocalIndexSystemData) - .returnValue; + LocalIndex = CrossInliner.inlineCall(Builder, GetLocalRootIndex, LocalIndexSystemData).returnValue; LocalIndex->setName("local.root.index"); Builder.CreateCall(SetLocalRootIndex, LocalIndex); } /// Replace a call to lgc.rt.report.hit with a call to the driver /// implementation. -void LowerRaytracingPipelinePassImpl::replaceReportHitCall(FunctionData &Data, - CallInst *Call) { +void LowerRaytracingPipelinePassImpl::replaceReportHitCall(FunctionData &Data, CallInst *Call) { assert(ReportHit && "ReportHit not found"); replaceCall(Data, Call, ReportHit, ContinuationCallType::AnyHit); @@ -870,12 +876,9 @@ void LowerRaytracingPipelinePassImpl::replaceReportHitCall(FunctionData &Data, // Check if the search ended and return from Intersection if this is the case assert(IsEndSearch && "IsEndSearch not found"); auto *SystemDataTy = getFuncArgPtrElementType(IsEndSearch, 0); - auto *SystemData = getDXILSystemData(Builder, Data.SystemData, - Data.SystemDataTy, SystemDataTy); - auto *IsEnd = - CrossInliner.inlineCall(Builder, IsEndSearch, SystemData).returnValue; - Instruction *Then = - SplitBlockAndInsertIfThen(IsEnd, &*Builder.GetInsertPoint(), true); + auto *SystemData = getDXILSystemData(Builder, Data.SystemData, Data.SystemDataTy, SystemDataTy); + auto *IsEnd = CrossInliner.inlineCall(Builder, IsEndSearch, SystemData).returnValue; + Instruction *Then = SplitBlockAndInsertIfThen(IsEnd, &*Builder.GetInsertPoint(), true); Builder.SetInsertPoint(Then); FunctionEndData EData; @@ -883,14 +886,11 @@ void LowerRaytracingPipelinePassImpl::replaceReportHitCall(FunctionData &Data, processFunctionEnd(Data, EData); } -/// Replace a call to Await with -/// - Allocate space for the passed payload -/// - Store payload into the global -/// - Call given address and pass generated token into an await call -/// - Read payload from global -void LowerRaytracingPipelinePassImpl::replaceContinuationCall( - ContinuationCallType CallType, CallInst *Call, const FunctionData &Data, - Value *PayloadOrAttrs, Type *PayloadOrAttrsTy) { +/// Replace a call to Await with a call to a given address and pass generated +/// token into an await call +void LowerRaytracingPipelinePassImpl::replaceContinuationCall(ContinuationCallType CallType, CallInst *Call, + const FunctionData &Data, Value *PayloadOrAttrs, + Type *PayloadOrAttrsTy) { Builder.SetInsertPoint(Call); const PAQSerializationLayout *OutgoingSerializationLayout = nullptr; @@ -902,37 +902,26 @@ void LowerRaytracingPipelinePassImpl::replaceContinuationCall( if (CallType != ContinuationCallType::AnyHit) { // Specify hit attribute size also in case it is used for CallShader. // It is ignored by the implementation in that case. - PAQPayloadConfig PAQConfig = {PayloadOrAttrsTy, - MetadataState.getMaxHitAttributeByteCount()}; + PAQPayloadConfig PAQConfig = {PayloadOrAttrsTy, MetadataState.getMaxHitAttributeByteCount()}; if (CallType == ContinuationCallType::Traversal) { - const auto *TraceRayInfo = - &PAQManager.getOrCreateTraceRaySerializationInfo(PAQConfig); + const auto *TraceRayInfo = &PAQManager.getOrCreateTraceRaySerializationInfo(PAQConfig); OutgoingSerializationInfo = TraceRayInfo; - OutgoingSerializationLayout = - &TraceRayInfo->LayoutsByKind[PAQSerializationLayoutKind::CallerOut]; + OutgoingSerializationLayout = &TraceRayInfo->LayoutsByKind[PAQSerializationLayoutKind::CallerOut]; ShaderStage = PAQShaderStage::Caller; // determine ReturnedRegisterCount - ReturnedRegisterCount = std::min( - std::max( - TraceRayInfo - ->LayoutsByKind[PAQSerializationLayoutKind::ClosestHitOut] - .NumStorageI32s, - TraceRayInfo->LayoutsByKind[PAQSerializationLayoutKind::MissOut] - .NumStorageI32s), - MetadataState.getMaxPayloadRegisterCount()); + ReturnedRegisterCount = + std::min(std::max(TraceRayInfo->LayoutsByKind[PAQSerializationLayoutKind::ClosestHitOut].NumStorageI32s, + TraceRayInfo->LayoutsByKind[PAQSerializationLayoutKind::MissOut].NumStorageI32s), + MetadataState.getMaxPayloadRegisterCount()); } else { - assert(CallType == ContinuationCallType::CallShader && - "Unexpected call type!"); - const auto *CallShaderInfo = - &PAQManager.getOrCreateCallShaderSerializationInfo(PAQConfig); - OutgoingSerializationLayout = - &CallShaderInfo->CallShaderSerializationLayout; + assert(CallType == ContinuationCallType::CallShader && "Unexpected call type!"); + const auto *CallShaderInfo = &PAQManager.getOrCreateCallShaderSerializationInfo(PAQConfig); + OutgoingSerializationLayout = &CallShaderInfo->CallShaderSerializationLayout; OutgoingSerializationInfo = CallShaderInfo; // For CallShader, incoming and outgoing layouts are the same ReturnedRegisterCount = - std::min(MetadataState.getMaxPayloadRegisterCount(), - OutgoingSerializationLayout->NumStorageI32s); + std::min(MetadataState.getMaxPayloadRegisterCount(), OutgoingSerializationLayout->NumStorageI32s); } assert(OutgoingSerializationLayout && "Missing serialization layout!"); } else { @@ -949,20 +938,16 @@ void LowerRaytracingPipelinePassImpl::replaceContinuationCall( Value *LocalPayloadMem = Builder.CreatePtrToInt(Data.SpilledPayload, I32); #ifndef NDEBUG // Check that payload pointer exists and is in first position - auto It = OutgoingSerializationLayout->NodeStorageInfos.find( - OutgoingSerializationLayout->PayloadMemPointerNode); + auto It = OutgoingSerializationLayout->NodeStorageInfos.find(OutgoingSerializationLayout->PayloadMemPointerNode); assert(It != OutgoingSerializationLayout->NodeStorageInfos.end() && (It->second.IndexIntervals == - PAQIndexIntervals{{FirstPayloadMemoryPointerRegister, - FirstPayloadMemoryPointerRegister + 1}}) && + PAQIndexIntervals{{FirstPayloadMemoryPointerRegister, FirstPayloadMemoryPointerRegister + 1}}) && "Payload memory pointer at unexpected location!"); #endif // Copy to payload storage Value *CastPayload = Builder.CreateBitCast( - Data.PayloadStorage, - I32->getPointerTo( - Data.PayloadStorage->getType()->getPointerAddressSpace())); + Data.PayloadStorage, I32->getPointerTo(Data.PayloadStorage->getType()->getPointerAddressSpace())); Builder.CreateStore(LocalPayloadMem, CastPayload); // Set stacksize metadata on F @@ -970,8 +955,7 @@ void LowerRaytracingPipelinePassImpl::replaceContinuationCall( } // Copy local payload to global payload, before await call (e.g. TraceRay, // CallShader) - copyPayload(*PayloadOrAttrsTy, PayloadOrAttrs, Data.PayloadStorage, - ShaderStage, PAQAccessKind::Write, + copyPayload(*PayloadOrAttrsTy, PayloadOrAttrs, Data.PayloadStorage, ShaderStage, PAQAccessKind::Write, *OutgoingSerializationLayout); } @@ -981,9 +965,10 @@ void LowerRaytracingPipelinePassImpl::replaceContinuationCall( SmallVector ArgTys; SmallVector Args; - bool IsWait = - (Call->getCalledFunction()->getName().starts_with("_AmdWaitAwait")); + bool IsWait = (Call->getCalledFunction()->getName().starts_with("_AmdWaitAwait")); + Value *WaitMask = nullptr; + Value *RetAddr = nullptr; if (MetadataState.isInLgcCpsMode()) { // For LgcCps, skip function-addr, the return address will be filled at late // stage of continuation transform. Add shader index so that the callee cps @@ -997,21 +982,35 @@ void LowerRaytracingPipelinePassImpl::replaceContinuationCall( } ArgTys.push_back(I32); - auto *ShaderIndex = - CrossInliner - .inlineCall(Builder, GetLocalRootIndex, - getDXILSystemData( - Builder, Data.SystemData, Data.SystemDataTy, - getFuncArgPtrElementType(GetLocalRootIndex, 0))) - .returnValue; + auto *ShaderIndex = CrossInliner + .inlineCall(Builder, GetLocalRootIndex, + getDXILSystemData(Builder, Data.SystemData, Data.SystemDataTy, + getFuncArgPtrElementType(GetLocalRootIndex, 0))) + .returnValue; Args.push_back(ShaderIndex); ArgTys.append(FTy->param_begin() + 2, FTy->param_end()); Args.append(Call->arg_begin() + 2, Call->arg_end()); } else { - // Pass the given arguments, skipping the function address - ArgTys.append(FTy->param_begin() + 1, FTy->param_end()); - Args.append(Call->arg_begin() + 1, Call->arg_end()); + // We want to avoid having the return address included in the padding + // computation, since it is included nowhere else. This allows us to compute + // padding only on the actual tail arguments, which is the only varying part + // of the final continue call at the end. WaitAwaitTraversal calls don't + // have a return address, so keep that in mind here. + + if (IsWait) + WaitMask = Call->getArgOperand(1); + + uint32_t RetAddrArgIndex = IsWait ? 2 : 1; + if (CallType == ContinuationCallType::Traversal) { + RetAddr = PoisonValue::get(Builder.getInt64Ty()); + } else { + RetAddr = Call->getArgOperand(RetAddrArgIndex); + ++RetAddrArgIndex; + } + + ArgTys.append(FTy->param_begin() + RetAddrArgIndex, FTy->param_end()); + Args.append(Call->arg_begin() + RetAddrArgIndex, Call->arg_end()); } if (CallType == ContinuationCallType::AnyHit) { @@ -1024,79 +1023,97 @@ void LowerRaytracingPipelinePassImpl::replaceContinuationCall( Instruction *Annotatable = nullptr; Value *NewCall = nullptr; - if (MetadataState.isInLgcCpsMode()) { - // Put payload at last - auto OutgoingPayloadI32s = - std::min(OutgoingSerializationLayout - ? OutgoingSerializationLayout->NumStorageI32s - : MetadataState.getMaxPayloadRegisterCount(), - MetadataState.getMaxPayloadRegisterCount()); - - // Add padding so that payload starts at a fixed dword. - // NOTE: Minus 1 as return address is not included - const auto &[OutgoingPaddingTy, OutgoingPayloadTy] = - PayloadHelper.computePaddingAndPayloadArgTys( - ArgTys, OutgoingPayloadI32s, Data.FirstPayloadArgumentDword - 1); + uint32_t OutgoingPayloadDwords = 0; + if (Data.NumPassedThroughPayloadDwords.has_value()) { + OutgoingPayloadDwords = Data.NumPassedThroughPayloadDwords.value(); + } else { + OutgoingPayloadDwords = std::min(OutgoingSerializationLayout ? OutgoingSerializationLayout->NumStorageI32s + : MetadataState.getMaxPayloadRegisterCount(), + MetadataState.getMaxPayloadRegisterCount()); + } + + SmallVector ReturnedArgTys{Call->getType()}; + + const bool IsLgcCpsMode = MetadataState.isInLgcCpsMode(); + const bool HasPayload = Data.FirstPayloadArgumentDword.has_value(); + + // Add padding so that returned payload starts at a fixed dword. + // NOTE: In lgc.cps mode, subtract 1 as return address is not + // included in the returned argument list. + if (HasPayload) { + const uint32_t PaddingOffset = IsLgcCpsMode ? 1 : 0; + const auto &[OutgoingPaddingTy, OutgoingPayloadTy] = PayloadHelper.computePaddingAndPayloadArgTys( + ArgTys, OutgoingPayloadDwords, Data.FirstPayloadArgumentDword, PaddingOffset); Args.push_back(PoisonValue::get(OutgoingPaddingTy)); Args.push_back(Builder.CreateLoad(OutgoingPayloadTy, Data.PayloadStorage)); + } - SmallVector ReturnedArgTys{Call->getType()}; + if (IsLgcCpsMode) { + if (HasPayload) { + // Compute padding for the resume function so that payload starts at a + // fixed dword. NOTE: Minus 2 as in lgc.cps mode, return address (i32) and + // shader index (i32) are not included. + PayloadHelper.computePaddingAndPayloadArgTys(ReturnedArgTys, ReturnedRegisterCount.value(), + Data.FirstPayloadArgumentDword, 2); + } - // Add padding so that returned payload starts at a fixed dword. - // NOTE: Minus 2 as return address and shader index are not included. - PayloadHelper.computePaddingAndPayloadArgTys( - ReturnedArgTys, ReturnedRegisterCount.value(), - Data.FirstPayloadArgumentDword - 2); auto *NewRetTy = StructType::get(Builder.getContext(), ReturnedArgTys); - Annotatable = - insertCpsAwait(NewRetTy, ShaderAddr, Call, Args, CallType, Data.Kind); + Annotatable = insertCpsAwait(NewRetTy, ShaderAddr, Call, Args, CallType, Data.Kind); NewCall = Annotatable; - - auto *ReturnedPayload = Builder.CreateExtractValue(NewCall, 2); - // Store returned payload to make the payload argument being updated. - Builder.CreateStore(ReturnedPayload, Data.PayloadStorage); - - NewCall = Builder.CreateExtractValue(NewCall, 0); } else { - // Patch the dummy return address into await calls resulting from - // WaitAwaitTraversal. Note: this needs to be removed once we have the - // TraversalEntry function. - if (CallType == ContinuationCallType::Traversal) { - ArgTys.insert(ArgTys.begin() + 1, Builder.getInt64Ty()); - Args.insert(Args.begin() + 1, PoisonValue::get(Builder.getInt64Ty())); + // The wait mask isn't part of regular arguments and thus shouldn't be + // considered for padding. Thus, we first compute padding, and then add the + // wait mask. + + // Patch the return address into the await call, since it got excluded for + // the padding computation previously. For WaitAwaitTraversal, this needs to + // be removed later once we have the TraversalEntry function. + ArgTys.insert(ArgTys.begin(), RetAddr->getType()); + Args.insert(Args.begin(), RetAddr); + + if (WaitMask) { + ArgTys.insert(ArgTys.begin(), WaitMask->getType()); + Args.insert(Args.begin(), WaitMask); } auto *ShaderTy = FunctionType::get(TokenTy, ArgTys, false); - auto *ShaderFun = - Builder.CreateIntToPtr(ShaderAddr, ShaderTy->getPointerTo()); + auto *ShaderFun = Builder.CreateIntToPtr(ShaderAddr, ShaderTy->getPointerTo()); auto *Token = Builder.CreateCall(ShaderTy, ShaderFun, Args); - auto *Await = - getContinuationAwait(*Mod, TokenTy, cast(Call->getType())); + + if (HasPayload) { + PayloadHelper.computePaddingAndPayloadArgTys(ReturnedArgTys, ReturnedRegisterCount.value(), + Data.FirstPayloadArgumentDword); + } + + auto *NewRetTy = StructType::get(Builder.getContext(), ReturnedArgTys); + auto *Await = getContinuationAwait(*Mod, TokenTy, NewRetTy); NewCall = Builder.CreateCall(Await, {Token}); Annotatable = Token; } + // Copy back returned payload to the payload serialization alloca as part of + // the payload copying. + if (HasPayload) + Builder.CreateStore(Builder.CreateExtractValue(NewCall, ReturnedArgTys.size() - 1), Data.PayloadStorage); + // For WaitAwait, add metadata indicating that we wait. After coroutine // passes, we then generate a waitContinue on the awaited function. if (IsWait) ContHelper::setIsWaitAwaitCall(*cast(Annotatable)); - ContHelper::setReturnedRegisterCount(Annotatable, - ReturnedRegisterCount.value()); + ContHelper::ReturnedRegisterCount::setValue(Annotatable, ReturnedRegisterCount.value()); - auto OutgoingRegisterCount = std::min( - OutgoingSerializationLayout ? OutgoingSerializationLayout->NumStorageI32s - : MetadataState.getMaxPayloadRegisterCount(), - MetadataState.getMaxPayloadRegisterCount()); + auto OutgoingRegisterCount = std::min(OutgoingSerializationLayout ? OutgoingSerializationLayout->NumStorageI32s + : MetadataState.getMaxPayloadRegisterCount(), + MetadataState.getMaxPayloadRegisterCount()); // Annotate call with the number of registers used for payload - ContHelper::setOutgoingRegisterCount(Annotatable, OutgoingRegisterCount); + ContHelper::OutgoingRegisterCount::setValue(Annotatable, OutgoingRegisterCount); if (OutgoingSerializationLayout) { MetadataState.updateMaxUsedPayloadRegisterCount(OutgoingRegisterCount); - MetadataState.updateMaxUsedPayloadRegisterCount( - ReturnedRegisterCount.value()); + MetadataState.updateMaxUsedPayloadRegisterCount(ReturnedRegisterCount.value()); } if (CallType != ContinuationCallType::AnyHit) { @@ -1108,25 +1125,27 @@ void LowerRaytracingPipelinePassImpl::replaceContinuationCall( if (CallType == ContinuationCallType::CallShader) { // For CallShader, there is only a single layout // Copy global payload to local payload, after CallShader call - copyPayload(*PayloadOrAttrsTy, PayloadOrAttrs, Data.PayloadStorage, - ShaderStage, PAQAccessKind::Read, + copyPayload(*PayloadOrAttrsTy, PayloadOrAttrs, Data.PayloadStorage, ShaderStage, PAQAccessKind::Read, *OutgoingSerializationLayout); } else { - copyTraceRayPayloadIncomingToCaller( - *cast(OutgoingSerializationInfo), - PayloadOrAttrs, Data.PayloadStorage); + copyTraceRayPayloadIncomingToCaller(*cast(OutgoingSerializationInfo), + PayloadOrAttrs, Data.PayloadStorage); } } - if (!Call->getType()->isVoidTy()) + if (!Call->getType()->isVoidTy()) { + // Extract the system data from the { %systemData, %padding, %payload } + // struct returned by the await call. + NewCall = Builder.CreateExtractValue(NewCall, 0); Call->replaceAllUsesWith(NewCall); + } + Call->eraseFromParent(); } /// Replace a call to lgc.rt.shader.index with the passed shader index argument /// for LgcCps mode or get the value from system data for non-LgcCps mode. -void LowerRaytracingPipelinePassImpl::replaceShaderIndexCall(FunctionData &Data, - CallInst *Call) { +void LowerRaytracingPipelinePassImpl::replaceShaderIndexCall(FunctionData &Data, CallInst *Call) { if (Data.Kind == RayTracingShaderStage::RayGeneration) { Call->replaceAllUsesWith(Builder.getInt32(0)); } else { @@ -1136,13 +1155,11 @@ void LowerRaytracingPipelinePassImpl::replaceShaderIndexCall(FunctionData &Data, } else { assert(Data.SystemDataFirstStore != nullptr); Builder.SetInsertPoint(&*++Data.SystemDataFirstStore->getIterator()); - ShaderIndex = - CrossInliner - .inlineCall(Builder, GetLocalRootIndex, - getDXILSystemData( - Builder, Data.SystemData, Data.SystemDataTy, - getFuncArgPtrElementType(GetLocalRootIndex, 0))) - .returnValue; + ShaderIndex = CrossInliner + .inlineCall(Builder, GetLocalRootIndex, + getDXILSystemData(Builder, Data.SystemData, Data.SystemDataTy, + getFuncArgPtrElementType(GetLocalRootIndex, 0))) + .returnValue; } Call->replaceAllUsesWith(ShaderIndex); } @@ -1150,18 +1167,15 @@ void LowerRaytracingPipelinePassImpl::replaceShaderIndexCall(FunctionData &Data, } /// Replace a call to lgc.rt.shader.record.buffer with loading the resource. -void LowerRaytracingPipelinePassImpl::replaceShaderRecordBufferCall( - FunctionData &Data, CallInst *Call) { +void LowerRaytracingPipelinePassImpl::replaceShaderRecordBufferCall(FunctionData &Data, CallInst *Call) { auto *ShaderRecordBuffer = cast(Call); auto *TableIndex = ShaderRecordBuffer->getShaderIndex(); assert(GetSbtAddress && "Could not find GetSbtAddress function"); assert(GetSbtStride && "Could not find GetSbtStride function"); - Value *TableAddr = - CrossInliner.inlineCall(Builder, GetSbtAddress).returnValue; - Value *TableStride = - CrossInliner.inlineCall(Builder, GetSbtStride).returnValue; + Value *TableAddr = CrossInliner.inlineCall(Builder, GetSbtAddress).returnValue; + Value *TableStride = CrossInliner.inlineCall(Builder, GetSbtStride).returnValue; // SBT starts with shader group handle (aka shader identifier), which is 32 // bytes, then the data for shader record buffer. @@ -1178,8 +1192,7 @@ void LowerRaytracingPipelinePassImpl::replaceShaderRecordBufferCall( // Final addr TableAddr = Builder.CreateAdd(TableAddr, Offset); - Type *GpuAddrAsPtrTy = - PointerType::get(Builder.getContext(), 1 /* ADDR_SPACE_GLOBAL */); + Type *GpuAddrAsPtrTy = PointerType::get(Builder.getContext(), 1 /* ADDR_SPACE_GLOBAL */); TableAddr = Builder.CreateIntToPtr(TableAddr, GpuAddrAsPtrTy); Call->replaceAllUsesWith(TableAddr); @@ -1199,59 +1212,31 @@ void LowerRaytracingPipelinePassImpl::handleGetShaderKind(Function &Func) { if (!Stage) return; - DXILShaderKind ShaderKind = - ShaderStageHelper::rtShaderStageToDxilShaderKind(*Stage); - auto *ShaderKindVal = ConstantInt::get(Func.getReturnType(), - static_cast(ShaderKind)); + DXILShaderKind ShaderKind = ShaderStageHelper::rtShaderStageToDxilShaderKind(*Stage); + auto *ShaderKindVal = ConstantInt::get(Func.getReturnType(), static_cast(ShaderKind)); CInst.replaceAllUsesWith(ShaderKindVal); CInst.eraseFromParent(); }); } -void LowerRaytracingPipelinePassImpl::handleGetFuncAddr(Function &Func) { - assert(Func.arg_empty() - // returns i64 or i32 - && (Func.getFunctionType()->getReturnType()->isIntegerTy(64) || - Func.getFunctionType()->getReturnType()->isIntegerTy(32))); - - auto Name = Func.getName(); - [[maybe_unused]] bool Consumed = Name.consume_front("_AmdGetFuncAddr"); - assert(Consumed); - - Function *F = Mod->getFunction(Name); - if (!F) - report_fatal_error(Twine("Did not find function '") + Name + - "' requested by _AmdGetFuncAddr"); - - llvm::forEachCall(Func, [&](llvm::CallInst &CInst) { - auto *RetTy = Func.getReturnType(); - Builder.SetInsertPoint(&CInst); - Value *AsContRef = Builder.create(RetTy, F); - CInst.replaceAllUsesWith(AsContRef); - CInst.eraseFromParent(); - }); -} - void LowerRaytracingPipelinePassImpl::handleGetCurrentFuncAddr(Function &Func) { assert(Func.empty() && // Returns an i32 or i64 - (Func.getReturnType()->isIntegerTy(32) || - Func.getReturnType()->isIntegerTy(64))); + (Func.getReturnType()->isIntegerTy(32) || Func.getReturnType()->isIntegerTy(64))); llvm::forEachCall(Func, [&](llvm::CallInst &CInst) { auto *F = CInst.getFunction(); - auto *RetTy = Func.getReturnType(); + auto *RetTy = MetadataState.isInLgcCpsMode() ? Builder.getInt32Ty() : Builder.getInt64Ty(); Builder.SetInsertPoint(&CInst); Value *AsContRef = Builder.create(RetTy, F); + AsContRef = MetadataState.isInLgcCpsMode() ? Builder.CreateZExt(AsContRef, Builder.getInt64Ty()) : AsContRef; CInst.replaceAllUsesWith(AsContRef); CInst.eraseFromParent(); }); } -void llvm::copyBytes(IRBuilder<> &B, Value *Dst, Value *Src, - uint64_t NumBytes) { - assert(Dst->getType()->isPointerTy() && Src->getType()->isPointerTy() && - "Dst and Src must be pointers!"); +void llvm::copyBytes(IRBuilder<> &B, Value *Dst, Value *Src, uint64_t NumBytes) { + assert(Dst->getType()->isPointerTy() && Src->getType()->isPointerTy() && "Dst and Src must be pointers!"); auto *I32 = B.getInt32Ty(); uint64_t NumFullI32s = NumBytes / RegisterBytes; @@ -1270,8 +1255,7 @@ void llvm::copyBytes(IRBuilder<> &B, Value *Dst, Value *Src, // Create i8 loads and stores for the remaining bytes Type *I8 = B.getIntNTy(8); - for (uint64_t I8Index = NumFullI32s * RegisterBytes; I8Index < NumBytes; - ++I8Index) { + for (uint64_t I8Index = NumFullI32s * RegisterBytes; I8Index < NumBytes; ++I8Index) { auto *DstPtr = SimplifyingCreateConstGEP1_32(B, I8, Dst, I8Index); auto *SrcPtr = SimplifyingCreateConstGEP1_32(B, I8, Src, I8Index); auto *Val = B.CreateLoad(I8, SrcPtr); @@ -1279,28 +1263,23 @@ void llvm::copyBytes(IRBuilder<> &B, Value *Dst, Value *Src, } } -void LowerRaytracingPipelinePassImpl::copyPayload( - Type &PayloadTy, Value *LocalPayload, Value *PayloadStorage, - std::optional Stage, PAQAccessKind GlobalAccessKind, - const PAQSerializationLayout &Layout, - SmallDenseSet *CopiedNodes) { +void LowerRaytracingPipelinePassImpl::copyPayload(Type &PayloadTy, Value *LocalPayload, Value *PayloadStorage, + std::optional Stage, PAQAccessKind GlobalAccessKind, + const PAQSerializationLayout &Layout, + SmallDenseSet *CopiedNodes) { // Nothing to do if there is no serialization type, i.e. the layout is empty if (!Layout.SerializationTy) return; - LLVM_DEBUG(dbgs() << (GlobalAccessKind == PAQAccessKind::Read ? "Incoming" - : "Outgoing") - << " serialization layout of " - << cast(LocalPayload)->getFunction()->getName() - << ": " << *Layout.SerializationTy << "\n"); + LLVM_DEBUG(dbgs() << (GlobalAccessKind == PAQAccessKind::Read ? "Incoming" : "Outgoing") + << " serialization layout of " << cast(LocalPayload)->getFunction()->getName() << ": " + << *Layout.SerializationTy << "\n"); Value *SpilledPayloadPtr = nullptr; if (Layout.PayloadMemPointerNode) { - auto *SpillPtr = SimplifyingCreateConstInBoundsGEP1_32( - Builder, Builder.getInt8Ty(), PayloadStorage, - FirstPayloadMemoryPointerRegister); - SpilledPayloadPtr = Builder.CreateLoad( - Builder.getPtrTy(lgc::cps::stackAddrSpace), SpillPtr); + auto *SpillPtr = SimplifyingCreateConstInBoundsGEP1_32(Builder, Builder.getInt8Ty(), PayloadStorage, + FirstPayloadMemoryPointerRegister); + SpilledPayloadPtr = Builder.CreateLoad(Builder.getPtrTy(lgc::cps::stackAddrSpace), SpillPtr); } PayloadCopyHelper Helper{ @@ -1321,25 +1300,19 @@ void LowerRaytracingPipelinePassImpl::copyPayload( } void LowerRaytracingPipelinePassImpl::copyTraceRayPayloadIncomingToCaller( - const PAQTraceRaySerializationInfo &SerializationInfo, Value *LocalPayload, - Value *PayloadStorage) { + const PAQTraceRaySerializationInfo &SerializationInfo, Value *LocalPayload, Value *PayloadStorage) { SmallDenseSet CopiedNodes; - for (auto LayoutKind : {PAQSerializationLayoutKind::ClosestHitOut, - PAQSerializationLayoutKind::MissOut}) { - const PAQSerializationLayout &Layout = - SerializationInfo.LayoutsByKind[LayoutKind]; - copyPayload(*SerializationInfo.PayloadRootNode->Ty, LocalPayload, - PayloadStorage, PAQShaderStage::Caller, PAQAccessKind::Read, - Layout, &CopiedNodes); + for (auto LayoutKind : {PAQSerializationLayoutKind::ClosestHitOut, PAQSerializationLayoutKind::MissOut}) { + const PAQSerializationLayout &Layout = SerializationInfo.LayoutsByKind[LayoutKind]; + copyPayload(*SerializationInfo.PayloadRootNode->Ty, LocalPayload, PayloadStorage, PAQShaderStage::Caller, + PAQAccessKind::Read, Layout, &CopiedNodes); } } void LowerRaytracingPipelinePassImpl::savePayloadRegistersBeforeRecursion( - Value *PayloadStorage, RayTracingShaderStage Kind, - const PAQSerializationLayout &IncomingLayout, - const PAQSerializationLayout &OutgoingLayout, - SmallVectorImpl &SavedRegisterValues) { + Value *PayloadStorage, RayTracingShaderStage Kind, const PAQSerializationLayout &IncomingLayout, + const PAQSerializationLayout &OutgoingLayout, SmallVectorImpl &SavedRegisterValues) { if (!OutgoingLayout.SerializationTy) return; @@ -1362,17 +1335,12 @@ void LowerRaytracingPipelinePassImpl::savePayloadRegistersBeforeRecursion( continue; // A node that is not written should be live in the incoming layout. - assert(IncomingLayout.NodeStorageInfos.count(Node) && - "Unexpectedly dead node!"); + assert(IncomingLayout.NodeStorageInfos.count(Node) && "Unexpectedly dead node!"); for (const PAQIndexInterval &Interval : StorageInfo.IndexIntervals) { - for (unsigned I = Interval.Begin; - I < - std::min(Interval.End, MetadataState.getMaxPayloadRegisterCount()); - ++I) { + for (unsigned I = Interval.Begin; I < std::min(Interval.End, MetadataState.getMaxPayloadRegisterCount()); ++I) { // Create backup of the I-th payload register - auto *LoadPtr = - SimplifyingCreateConstGEP1_32(Builder, I32, PayloadStorage, I); + auto *LoadPtr = SimplifyingCreateConstGEP1_32(Builder, I32, PayloadStorage, I); auto *OldValue = Builder.CreateLoad(RegTy, LoadPtr); // As long as we keep a 32 bit alignment of all fields, all fields // get disjoint registers, and we should never save a register twice. @@ -1387,28 +1355,24 @@ void LowerRaytracingPipelinePassImpl::savePayloadRegistersBeforeRecursion( } } - assert((OutgoingLayout.PayloadMemPointerNode == nullptr || - SavedRegisterValues[FirstPayloadMemoryPointerRegister]) && + assert((OutgoingLayout.PayloadMemPointerNode == nullptr || SavedRegisterValues[FirstPayloadMemoryPointerRegister]) && "Payload mem pointer missing from saved registers!"); } void LowerRaytracingPipelinePassImpl::restorePayloadRegistersAfterRecursion( - Value *PayloadStorage, - const SmallVectorImpl &SavedRegisterValues) { + Value *PayloadStorage, const SmallVectorImpl &SavedRegisterValues) { for (unsigned I = 0; I < SavedRegisterValues.size(); ++I) { Value *OldValue = SavedRegisterValues[I]; if (OldValue) { - auto *StorePtr = - SimplifyingCreateConstGEP1_32(Builder, I32, PayloadStorage, I); + auto *StorePtr = SimplifyingCreateConstGEP1_32(Builder, I32, PayloadStorage, I); Builder.CreateStore(SavedRegisterValues[I], StorePtr); } } } -void LowerRaytracingPipelinePassImpl::copyHitAttributes( - FunctionData &Data, Value *SystemDataPtr, Type *SystemDataPtrTy, - Value *LocalHitAttributes, bool GlobalToLocal, - const PAQSerializationLayout *Layout) { +void LowerRaytracingPipelinePassImpl::copyHitAttributes(FunctionData &Data, Value *SystemDataPtr, Type *SystemDataPtrTy, + Value *LocalHitAttributes, bool GlobalToLocal, + const PAQSerializationLayout *Layout) { auto *InlineHitAttrsTy = GetTriangleHitAttributes->getReturnType(); uint64_t InlineHitAttrsBytes = getInlineHitAttrsBytes(*GpurtLibrary); uint64_t InlineRegSize = InlineHitAttrsBytes / RegisterBytes; @@ -1422,8 +1386,7 @@ void LowerRaytracingPipelinePassImpl::copyHitAttributes( // depending on GlobalToLocal. Then, in the actual copy implementation, we // just access the alloca using loads and stores as for payload registers. auto InsertPoint = Builder.saveIP(); - Builder.SetInsertPoint( - Builder.GetInsertBlock()->getParent()->getEntryBlock().getFirstNonPHI()); + Builder.SetInsertPoint(Builder.GetInsertBlock()->getParent()->getEntryBlock().getFirstNonPHI()); auto *InlineHitAttrsAlloc = Builder.CreateAlloca(InlineHitAttrsTy); auto *RegTyPtr = RegTy->getPointerTo(InlineHitAttrsAlloc->getAddressSpace()); Builder.restoreIP(InsertPoint); @@ -1431,13 +1394,9 @@ void LowerRaytracingPipelinePassImpl::copyHitAttributes( if (GlobalToLocal) { // Load inline hit attributes from system data - auto *SystemDataTy = - cast(getFuncArgPtrElementType(GetTriangleHitAttributes, 0)); - auto *SystemData = getDXILSystemData(Builder, SystemDataPtr, - SystemDataPtrTy, SystemDataTy); - auto *InlineHitAttrs = - CrossInliner.inlineCall(Builder, GetTriangleHitAttributes, SystemData) - .returnValue; + auto *SystemDataTy = cast(getFuncArgPtrElementType(GetTriangleHitAttributes, 0)); + auto *SystemData = getDXILSystemData(Builder, SystemDataPtr, SystemDataPtrTy, SystemDataTy); + auto *InlineHitAttrs = CrossInliner.inlineCall(Builder, GetTriangleHitAttributes, SystemData).returnValue; Builder.CreateStore(InlineHitAttrs, InlineHitAttrsAlloc); } @@ -1449,58 +1408,46 @@ void LowerRaytracingPipelinePassImpl::copyHitAttributes( if (Layout) { if (Layout->HitAttributeStorageNode) { auto It = Layout->NodeStorageInfos.find(Layout->HitAttributeStorageNode); - assert(It != Layout->NodeStorageInfos.end() && - "Missing hit attributes in layout!"); + assert(It != Layout->NodeStorageInfos.end() && "Missing hit attributes in layout!"); const PAQIndexIntervals &IndexIntervals = It->second.IndexIntervals; - assert(IndexIntervals.size() == 1 && - "Hit attributes must be contiguous!"); + assert(IndexIntervals.size() == 1 && "Hit attributes must be contiguous!"); const PAQIndexInterval &IndexInterval = IndexIntervals[0]; // Obtain pointer to global payload serialization struct Value *PayloadSerialization = Builder.CreateBitCast( Data.PayloadStorage, - Layout->SerializationTy->getPointerTo( - Data.PayloadStorage->getType()->getPointerAddressSpace())); + Layout->SerializationTy->getPointerTo(Data.PayloadStorage->getType()->getPointerAddressSpace())); // Last zero yields pointer to the first element of the i32 array - PayloadHitAttrs = Builder.CreateInBoundsGEP( - Layout->SerializationTy, PayloadSerialization, - {Builder.getInt32(0), Builder.getInt32(0), - Builder.getInt32(IndexInterval.Begin)}); + PayloadHitAttrs = + Builder.CreateInBoundsGEP(Layout->SerializationTy, PayloadSerialization, + {Builder.getInt32(0), Builder.getInt32(0), Builder.getInt32(IndexInterval.Begin)}); PayloadHitAttrBytes = RegisterBytes * IndexInterval.size(); } else { // Inline attributes suffice, nothing to do. } } else { - assert(Data.Kind == RayTracingShaderStage::Intersection && - "Unexpected shader kind"); + assert(Data.Kind == RayTracingShaderStage::Intersection && "Unexpected shader kind"); // We are in an intersection shader, which does not know the payload type. // Assume maximum possible size - PayloadHitAttrBytes = - MetadataState.getMaxHitAttributeByteCount() - InlineHitAttrsBytes; + PayloadHitAttrBytes = MetadataState.getMaxHitAttributeByteCount() - InlineHitAttrsBytes; // Use hit attribute storage at fixed index PayloadHitAttrs = - SimplifyingCreateConstGEP1_32(Builder, I32, Data.PayloadStorage, - FirstPayloadHitAttributeStorageRegister); + SimplifyingCreateConstGEP1_32(Builder, I32, Data.PayloadStorage, FirstPayloadHitAttributeStorageRegister); } - uint64_t HitAttrsBytes = - DL->getTypeStoreSize(Data.HitAttributes).getFixedValue(); + uint64_t HitAttrsBytes = DL->getTypeStoreSize(Data.HitAttributes).getFixedValue(); if (HitAttrsBytes > MetadataState.getMaxHitAttributeByteCount()) report_fatal_error("Hit attributes are too large!"); - assert(InlineHitAttrsBytes + PayloadHitAttrBytes >= HitAttrsBytes && - "Insufficient hit attribute storage!"); + assert(InlineHitAttrsBytes + PayloadHitAttrBytes >= HitAttrsBytes && "Insufficient hit attribute storage!"); LocalHitAttributes = Builder.CreateBitCast(LocalHitAttributes, RegTyPtr); auto *I8Ty = Builder.getInt8Ty(); for (unsigned I = 0; I < divideCeil(HitAttrsBytes, RegisterBytes); I++) { - auto *LocalPtr = SimplifyingCreateConstInBoundsGEP1_32( - Builder, RegTy, LocalHitAttributes, I); + auto *LocalPtr = SimplifyingCreateConstInBoundsGEP1_32(Builder, RegTy, LocalHitAttributes, I); Value *GlobalPtr; if (I < InlineRegSize) - GlobalPtr = SimplifyingCreateConstInBoundsGEP1_32(Builder, RegTy, - InlineHitAttrs, I); + GlobalPtr = SimplifyingCreateConstInBoundsGEP1_32(Builder, RegTy, InlineHitAttrs, I); else - GlobalPtr = SimplifyingCreateConstInBoundsGEP1_32( - Builder, RegTy, PayloadHitAttrs, I - InlineRegSize); + GlobalPtr = SimplifyingCreateConstInBoundsGEP1_32(Builder, RegTy, PayloadHitAttrs, I - InlineRegSize); auto *LoadPtr = GlobalToLocal ? GlobalPtr : LocalPtr; auto *StorePtr = GlobalToLocal ? LocalPtr : GlobalPtr; @@ -1511,14 +1458,10 @@ void LowerRaytracingPipelinePassImpl::copyHitAttributes( } else { // Load byte by byte into a vector and pad the rest with undef auto *ByteLoadPtr = Builder.CreateBitCast(LoadPtr, I8Ty->getPointerTo()); - auto *ByteStorePtr = - Builder.CreateBitCast(StorePtr, I8Ty->getPointerTo()); + auto *ByteStorePtr = Builder.CreateBitCast(StorePtr, I8Ty->getPointerTo()); for (unsigned J = 0; J < HitAttrsBytes % RegisterBytes; J++) { - auto *Val = - Builder.CreateLoad(I8Ty, SimplifyingCreateConstInBoundsGEP1_32( - Builder, I8Ty, ByteLoadPtr, J)); - Builder.CreateStore(Val, SimplifyingCreateConstInBoundsGEP1_32( - Builder, I8Ty, ByteStorePtr, J)); + auto *Val = Builder.CreateLoad(I8Ty, SimplifyingCreateConstInBoundsGEP1_32(Builder, I8Ty, ByteLoadPtr, J)); + Builder.CreateStore(Val, SimplifyingCreateConstInBoundsGEP1_32(Builder, I8Ty, ByteStorePtr, J)); } } } @@ -1526,120 +1469,87 @@ void LowerRaytracingPipelinePassImpl::copyHitAttributes( if (!GlobalToLocal) { // Store inline hit attributes to system data auto *Attrs = Builder.CreateLoad(InlineHitAttrsTy, InlineHitAttrsAlloc); - auto *SystemDataTy = - cast(getFuncArgPtrElementType(GetTriangleHitAttributes, 0)); - auto *SystemData = getDXILSystemData(Builder, SystemDataPtr, - SystemDataPtrTy, SystemDataTy); - assert(SetTriangleHitAttributes && - "Could not find SetTriangleHitAttributes function"); - CrossInliner.inlineCall(Builder, SetTriangleHitAttributes, - {SystemData, Attrs}); + auto *SystemDataTy = cast(getFuncArgPtrElementType(GetTriangleHitAttributes, 0)); + auto *SystemData = getDXILSystemData(Builder, SystemDataPtr, SystemDataPtrTy, SystemDataTy); + assert(SetTriangleHitAttributes && "Could not find SetTriangleHitAttributes function"); + CrossInliner.inlineCall(Builder, SetTriangleHitAttributes, {SystemData, Attrs}); } } -void LowerRaytracingPipelinePassImpl::createPayloadGlobal() { - I32 = Type::getInt32Ty(*Context); - - // Determine an upper bound on the maximum required size for the @PAYLOAD - // global. Its size doesn't have an important meaning, but it needs to be - // large enough for generated code in this pass. Later, the RegisterBufferPass - // will shrink the used global if necessary. - uint32_t MaxPayloadI32s = MetadataState.getMaxPayloadRegisterCount(); - for (const auto &[_, FuncData] : ToProcess) { - MaxPayloadI32s = std::max(MaxPayloadI32s, FuncData.MaxOutgoingPayloadI32s); - if (FuncData.IncomingPayloadSerializationInfo) - MaxPayloadI32s = - std::max(MaxPayloadI32s, - FuncData.IncomingPayloadSerializationInfo->MaxStorageI32s); - } - auto *PayloadTy = - ArrayType::get(I32, MetadataState.getMaxPayloadRegisterCount()); - - PayloadStorageGlobal = cast( - Mod->getOrInsertGlobal(ContHelper::GlobalPayloadName, PayloadTy, [&] { - return new GlobalVariable( - *Mod, PayloadTy, false, GlobalVariable::ExternalLinkage, nullptr, - ContHelper::GlobalPayloadName, nullptr, - GlobalVariable::NotThreadLocal, GlobalRegisterAddrspace); - })); -} - void LowerRaytracingPipelinePassImpl::setGpurtEntryRegisterCountMetadata() { // Even if PreservedPayloadRegisterCount is set, there may be // additional shaders in the current module whose usage is recorded // in MaxUsedPayloadRegisterCount, to take the max with it. uint32_t MaxRegisterCount = - std::max(MetadataState.tryGetPreservedPayloadRegisterCount().value_or( - MetadataState.getMaxPayloadRegisterCount()), + std::max(MetadataState.tryGetPreservedPayloadRegisterCount().value_or(MetadataState.getMaxPayloadRegisterCount()), MetadataState.getMaxUsedPayloadRegisterCount()); - for (const auto &Name : - {"continuation.continue", "continuation.waitContinue"}) { - auto *Func = Mod->getFunction(Name); - if (!Func) - continue; - for (auto *User : Func->users()) { - CallInst *CI = dyn_cast(User); - if (!isa(User) || CI->getCalledFunction() != Func) - continue; + struct VisitorState { + ModuleMetadataState &Metadata; + uint32_t MaxRegisterCount; + }; - uint32_t InRegisterCount = 0; - uint32_t OutRegisterCount = 0; - auto *CallerFunc = CI->getFunction(); - auto ShaderStage = getLgcRtShaderStage(CallerFunc); - if (!ShaderStage) - continue; + static const auto Visitor = + llvm_dialects::VisitorBuilder() + .addSet([](VisitorState &State, Instruction &Op) { + uint32_t InRegisterCount = 0; + uint32_t OutRegisterCount = 0; + auto *CallerFunc = Op.getFunction(); + auto ShaderStage = getLgcRtShaderStage(CallerFunc); + if (!ShaderStage) + return; - switch (ShaderStage.value()) { - case RayTracingShaderStage::Traversal: - InRegisterCount = MaxRegisterCount; - OutRegisterCount = MaxRegisterCount; - break; - case RayTracingShaderStage::KernelEntry: - InRegisterCount = 0; - OutRegisterCount = 0; - break; - default: - continue; - } + switch (ShaderStage.value()) { + case RayTracingShaderStage::Traversal: + InRegisterCount = State.MaxRegisterCount; + OutRegisterCount = State.MaxRegisterCount; + break; + case RayTracingShaderStage::KernelEntry: + InRegisterCount = 0; + OutRegisterCount = 0; + break; + default: + return; + } - assert(!ContHelper::tryGetOutgoingRegisterCount(CI).has_value() && - "Unexpected register count metadata"); - ContHelper::setOutgoingRegisterCount(CI, OutRegisterCount); - MetadataState.updateMaxUsedPayloadRegisterCount(OutRegisterCount); + assert(!ContHelper::OutgoingRegisterCount::tryGetValue(&Op).has_value() && + "Unexpected register count metadata"); + ContHelper::OutgoingRegisterCount::setValue(&Op, OutRegisterCount); + State.Metadata.updateMaxUsedPayloadRegisterCount(OutRegisterCount); - assert(ContHelper::tryGetIncomingRegisterCount(CallerFunc) - .value_or(InRegisterCount) == InRegisterCount && - "Unexpected incoming register count on Traversal"); - ContHelper::setIncomingRegisterCount(CallerFunc, InRegisterCount); - MetadataState.updateMaxUsedPayloadRegisterCount(InRegisterCount); - } - } + assert(ContHelper::IncomingRegisterCount::tryGetValue(CallerFunc).value_or(InRegisterCount) == + InRegisterCount && + "Unexpected incoming register count on Traversal"); + ContHelper::IncomingRegisterCount::setValue(CallerFunc, InRegisterCount); + State.Metadata.updateMaxUsedPayloadRegisterCount(InRegisterCount); + }) + .build(); + + VisitorState State{MetadataState, MaxRegisterCount}; + Visitor.visit(State, *Mod); } void LowerRaytracingPipelinePassImpl::processContinuations() { TokenTy = StructType::create(*Context, "continuation.token")->getPointerTo(); + I32 = Type::getInt32Ty(*Context); for (auto &FuncData : ToProcess) { processFunction(FuncData.first, FuncData.second); } } -void LowerRaytracingPipelinePassImpl::processFunctionEntry( - FunctionData &Data, Argument *SystemDataArgument) { +void LowerRaytracingPipelinePassImpl::processFunctionEntry(FunctionData &Data, Argument *SystemDataArgument) { // See also the system data documentation at the top of Continuations.h. Data.SystemData = Builder.CreateAlloca(Data.SystemDataTy); Data.SystemData->setName("system.data.alloca"); // Allocate payload spilling space if (Data.PayloadSpillSize > 0) - Data.SpilledPayload = Builder.CreateAlloca( - ArrayType::get(I32, divideCeil(Data.PayloadSpillSize, RegisterBytes)), - nullptr, "payload.spill.alloca"); + Data.SpilledPayload = Builder.CreateAlloca(ArrayType::get(I32, divideCeil(Data.PayloadSpillSize, RegisterBytes)), + nullptr, "payload.spill.alloca"); // Initialize system data by copying the argument - Data.SystemDataFirstStore = - Builder.CreateStore(SystemDataArgument, Data.SystemData); + Data.SystemDataFirstStore = Builder.CreateStore(SystemDataArgument, Data.SystemData); // Shader preamble // NOTE: Skip Traversal, as it can call its own shader start function in @@ -1659,8 +1569,7 @@ void LowerRaytracingPipelinePassImpl::processFunctionEntry( } } -void LowerRaytracingPipelinePassImpl::processFunctionEnd( - FunctionData &Data, FunctionEndData &EData) { +void LowerRaytracingPipelinePassImpl::processFunctionEnd(FunctionData &Data, FunctionEndData &EData) { AnyHitExitKind AHExitKind = AnyHitExitKind::None; bool IsAnyHit = Data.Kind == RayTracingShaderStage::AnyHit; @@ -1682,8 +1591,7 @@ void LowerRaytracingPipelinePassImpl::processFunctionEnd( Builder.SetInsertPoint(EData.Terminator); auto *PayloadTy = Data.IncomingPayload; - if (Data.Kind != RayTracingShaderStage::RayGeneration && - Data.Kind != RayTracingShaderStage::Intersection && + if (Data.Kind != RayTracingShaderStage::RayGeneration && Data.Kind != RayTracingShaderStage::Intersection && Data.Kind != RayTracingShaderStage::Traversal) { assert(PayloadTy && "Missing payload type!"); @@ -1691,47 +1599,38 @@ void LowerRaytracingPipelinePassImpl::processFunctionEnd( if (AHExitKind == AnyHitExitKind::AcceptHit) { // Add a call to AcceptHit assert(AcceptHit && "Could not find AcceptHit function"); - auto *SystemDataTy = - cast(getFuncArgPtrElementType(AcceptHit, 0)); - auto *SystemData = getDXILSystemData(Builder, Data.SystemData, - Data.SystemDataTy, SystemDataTy); + auto *SystemDataTy = cast(getFuncArgPtrElementType(AcceptHit, 0)); + auto *SystemData = getDXILSystemData(Builder, Data.SystemData, Data.SystemDataTy, SystemDataTy); CrossInliner.inlineCall(Builder, AcceptHit, SystemData); } - EData.OutgoingSerializationLayout = - &PAQManager.getOrCreateShaderExitSerializationLayout( - *Data.IncomingPayloadSerializationInfo, Data.Kind, - Data.HitAttributes, AHExitKind); + EData.OutgoingSerializationLayout = &PAQManager.getOrCreateShaderExitSerializationLayout( + *Data.IncomingPayloadSerializationInfo, Data.Kind, Data.HitAttributes, AHExitKind); } assert(EData.OutgoingSerializationLayout && "Missing layout"); // Restore saved registers. This needs to be done *before* copying // back the payload, which depends on the restored memory pointer! - restorePayloadRegistersAfterRecursion(Data.PayloadStorage, - EData.SavedRegisterValues); + restorePayloadRegistersAfterRecursion(Data.PayloadStorage, EData.SavedRegisterValues); // Copy local payload into global payload at end of shader if (EData.OutgoingSerializationLayout->NumStorageI32s) { - copyPayload(*PayloadTy, EData.NewPayload, Data.PayloadStorage, - EData.ShaderStage, PAQAccessKind::Write, + copyPayload(*PayloadTy, EData.NewPayload, Data.PayloadStorage, EData.ShaderStage, PAQAccessKind::Write, *EData.OutgoingSerializationLayout); } if (IsAnyHit) { // Copy hit attributes into payload for closest hit - if (AHExitKind == AnyHitExitKind::AcceptHit || - AHExitKind == AnyHitExitKind::AcceptHitAndEndSearch) { + if (AHExitKind == AnyHitExitKind::AcceptHit || AHExitKind == AnyHitExitKind::AcceptHitAndEndSearch) { // TODO Only if there is a ClosestHit shader in any hit group // where this AnyHit is used. If there is no ClosestHit, the // attributes can never be read, so we don't need to store them. - copyHitAttributes(Data, Data.SystemData, Data.SystemDataTy, - EData.HitAttrsAlloca, false, + copyHitAttributes(Data, Data.SystemData, Data.SystemDataTy, EData.HitAttrsAlloca, false, EData.OutgoingSerializationLayout); } else { assert(AHExitKind == AnyHitExitKind::IgnoreHit); // Copy original hit attributes - copyHitAttributes(Data, Data.SystemData, Data.SystemDataTy, - EData.OrigHitAttrsAlloca, false, + copyHitAttributes(Data, Data.SystemData, Data.SystemDataTy, EData.OrigHitAttrsAlloca, false, EData.OutgoingSerializationLayout); } } @@ -1739,101 +1638,143 @@ void LowerRaytracingPipelinePassImpl::processFunctionEnd( Value *RetValue = nullptr; if (!Data.ReturnTy->isVoidTy()) { - auto *SystemData = - getDXILSystemData(Builder, Data.SystemData, Data.SystemDataTy, - cast(Data.ReturnTy)); + auto *SystemData = getDXILSystemData(Builder, Data.SystemData, Data.SystemDataTy, cast(Data.ReturnTy)); RetValue = Builder.CreateLoad(Data.ReturnTy, SystemData); } - Instruction *Ret = nullptr; - unsigned OutgoingRegisterCount = - EData.OutgoingSerializationLayout - ? std::min(EData.OutgoingSerializationLayout->NumStorageI32s, - MetadataState.getMaxPayloadRegisterCount()) - : MetadataState.getMaxPayloadRegisterCount(); - if (Data.Kind == RayTracingShaderStage::RayGeneration) { assert(!RetValue && "RayGen cannot return anything"); if (ExitRayGen) handleExitRayGen(Data); Builder.CreateRetVoid(); - } else { - Function *Parent = EData.Terminator->getFunction(); + EData.Terminator->eraseFromParent(); - if (MetadataState.isInLgcCpsMode()) { - uint32_t CpsRetLevel = getPotentialCpsReturnLevels(Data.Kind); - // Jump to resume point of caller, pass Poison Rcr and ShaderIndex as they - // are not meaningful for the case. - SmallVector TailArgs = {PoisonValue::get(I32), - PoisonValue::get(I32)}; - if (RetValue) - TailArgs.push_back(RetValue); - - // Add padding so that payload starts at a fixed dword. - ContHelper::addPaddingValue(*DL, *Context, TailArgs, - Data.FirstPayloadArgumentDword); - - TailArgs.push_back(Builder.CreateLoad( - ArrayType::get(I32, OutgoingRegisterCount), Data.PayloadStorage)); - - Ret = Builder.create( - Parent->getArg(CpsArgIdxReturnAddr), CpsRetLevel, - PoisonValue::get(StructType::get(Builder.getContext())), TailArgs); - Builder.CreateUnreachable(); - } else { - SmallVector TailArgs; + return; + } - if (RetValue) - TailArgs.push_back(RetValue); + const bool IsTraversal = Data.Kind == RayTracingShaderStage::Traversal; + SmallVector PaddingArgs; + if (MetadataState.isInLgcCpsMode()) { + // Jump to resume point of caller, pass Poison Rcr and ShaderIndex as they + // are not meaningful for the case. + PaddingArgs.append({PoisonValue::get(I32), PoisonValue::get(I32)}); + } + + Function *Parent = EData.Terminator->getFunction(); + + SmallVector TailArgList; + unsigned OutgoingRegisterCount = 0; + // For Traversal and Intersection, only pass through the payload registers + // after reading them back from the serialization alloca. + if (Data.NumPassedThroughPayloadDwords.has_value()) { + OutgoingRegisterCount = Data.NumPassedThroughPayloadDwords.value(); + } else { + assert(EData.OutgoingSerializationLayout && "LowerRaytracingPipelinePassImpl::processFunctionEnd: No outgoing " + "serialization layout found!"); + OutgoingRegisterCount = + std::min(EData.OutgoingSerializationLayout->NumStorageI32s, MetadataState.getMaxPayloadRegisterCount()); + } - Ret = Builder.create(Parent->getArg(0), TailArgs); - Builder.CreateUnreachable(); + Instruction *Ret = nullptr; + if (MetadataState.isInLgcCpsMode()) { + if (RetValue) + PaddingArgs.push_back(RetValue); + + // Construct the tail argument list and append the padding and payload + // values. + TailArgList.append(PaddingArgs); + PayloadHelper.appendPaddingAndPayloadValues(PaddingArgs, TailArgList, OutgoingRegisterCount, + Data.FirstPayloadArgumentDword, Data.PayloadStorage); + + Ret = Builder.create(Parent->getArg(CpsArgIdxReturnAddr), getPotentialCpsReturnLevels(Data.Kind), + PoisonValue::get(StructType::get(Builder.getContext())), TailArgList); + Builder.CreateUnreachable(); + EData.Terminator->eraseFromParent(); + } else if (IsTraversal) { + // TODO: For Traversal, we already have continue calls from the + // IntrinsicPrepare pass. So, we only want to include padding and payload + // for these existing calls. + auto [ContinueCall, ItRange] = PayloadHelper.getContinueCallFromTerminator(EData.Terminator); + + PaddingArgs.append(ItRange.begin(), ItRange.end()); + TailArgList.append(PaddingArgs); + + PayloadHelper.appendPaddingAndPayloadValues(PaddingArgs, TailArgList, OutgoingRegisterCount, + Data.FirstPayloadArgumentDword, Data.PayloadStorage); + + Builder.SetInsertPoint(EData.Terminator); + + // Create a lgc.cps.jump call with all arguments including the padding and the + // payload. + Value *ReturnAddr = nullptr; + Value *WaitMask = nullptr; + if (auto *WaitContinue = dyn_cast(ContinueCall)) { + WaitMask = WaitContinue->getWaitMask(); + ReturnAddr = WaitContinue->getReturnAddr(); + } else if (auto *Continue = dyn_cast(ContinueCall)) { + ReturnAddr = Continue->getReturnAddr(); } + + assert(ReturnAddr); + + TailArgList.insert(TailArgList.begin(), ReturnAddr); + CallInst *NewCall = Builder.create( + ContinueCall->getArgOperand(0), -1, PoisonValue::get(StructType::get(ContinueCall->getContext())), TailArgList); + + NewCall->copyMetadata(*ContinueCall); + + if (WaitMask) + ContHelper::setWaitMask(*NewCall, cast(WaitMask)->getZExtValue()); + + ContinueCall->eraseFromParent(); + } else { + if (RetValue) + PaddingArgs.push_back(RetValue); + + PayloadHelper.appendPaddingAndPayloadValues(PaddingArgs, TailArgList, OutgoingRegisterCount, + Data.FirstPayloadArgumentDword, Data.PayloadStorage); + + // Include the return value (it was already included in the PaddingArgs + // set itself). + if (RetValue) + TailArgList.insert(TailArgList.begin(), RetValue); + Ret = Builder.create(Parent->getArg(0), TailArgList); + Builder.CreateUnreachable(); + + EData.Terminator->eraseFromParent(); } + // Annotate the terminator with number of outgoing payload registers. + // This annotation will be passed along the following transformations, + // ending up at the final continuation call. if (Ret) { - // Annotate the terminator with number of outgoing payload registers. - // This annotation will be passed along the following transformations, - // ending up at the final continuation call. - ContHelper::setOutgoingRegisterCount(Ret, OutgoingRegisterCount); + ContHelper::OutgoingRegisterCount::setValue(Ret, OutgoingRegisterCount); if (EData.OutgoingSerializationLayout) MetadataState.updateMaxUsedPayloadRegisterCount(OutgoingRegisterCount); } - - EData.Terminator->eraseFromParent(); } -void LowerRaytracingPipelinePassImpl::handleExitRayGen( - const FunctionData &Data) { +void LowerRaytracingPipelinePassImpl::handleExitRayGen(const FunctionData &Data) { assert(ExitRayGen && "Could not find ExitRayGen function"); // Create a call to _cont_ExitRayGen - auto *SystemDataTy = - cast(getFuncArgPtrElementType(ExitRayGen, 0)); - auto *SystemData = getDXILSystemData(Builder, Data.SystemData, - Data.SystemDataTy, SystemDataTy); + auto *SystemDataTy = cast(getFuncArgPtrElementType(ExitRayGen, 0)); + auto *SystemData = getDXILSystemData(Builder, Data.SystemData, Data.SystemDataTy, SystemDataTy); CrossInliner.inlineCall(Builder, ExitRayGen, SystemData); } -unsigned -LowerRaytracingPipelinePassImpl::getUpperBoundOnTraceRayPayloadRegisters() - const { +unsigned LowerRaytracingPipelinePassImpl::getUpperBoundOnTraceRayPayloadRegisters() const { unsigned MaxHitAttributeBytes = MetadataState.getMaxHitAttributeByteCount(); unsigned AttributeBytes = - MaxHitAttributeBytes - - std::min(MaxHitAttributeBytes, - unsigned(getInlineHitAttrsBytes(*GpurtLibrary))); - unsigned PayloadBytes = getMaxPayloadSize(Mod).value_or( - MetadataState.getMaxPayloadRegisterCount() * RegisterBytes); - - unsigned IncomingStorageBytes = alignTo(AttributeBytes, RegisterBytes) + - alignTo(PayloadBytes, RegisterBytes); + MaxHitAttributeBytes - std::min(MaxHitAttributeBytes, unsigned(getInlineHitAttrsBytes(*GpurtLibrary))); + unsigned PayloadBytes = getMaxPayloadSize(Mod).value_or(MetadataState.getMaxPayloadRegisterCount() * RegisterBytes); + + unsigned IncomingStorageBytes = alignTo(AttributeBytes, RegisterBytes) + alignTo(PayloadBytes, RegisterBytes); return std::min(unsigned(divideCeil(IncomingStorageBytes, RegisterBytes)), MetadataState.getMaxPayloadRegisterCount()); } -void LowerRaytracingPipelinePassImpl::processFunction(Function *F, - FunctionData &Data) { +void LowerRaytracingPipelinePassImpl::processFunction(Function *F, FunctionData &Data) { Builder.SetInsertPointPastAllocas(F); // Change the return type and arguments for shaders that are not RayGen @@ -1841,7 +1782,8 @@ void LowerRaytracingPipelinePassImpl::processFunction(Function *F, Type *NewRetTy; Type *SystemDataTy = nullptr; - unsigned IncomingStorageI32s = 0; + uint32_t SystemDataArgumentIndex = 0; + if (MetadataState.isInLgcCpsMode()) { // Create the CPS function header. @@ -1860,48 +1802,27 @@ void LowerRaytracingPipelinePassImpl::processFunction(Function *F, AllArgTypes.push_back(Builder.getInt32Ty()); AllArgTypes.push_back(Builder.getInt32Ty()); - // Determine payload storage type - Data.PayloadStorageTy = PayloadHelper.getPayloadStorageTy( - MetadataState.getMaxPayloadRegisterCount(), Data); - - Data.FirstPayloadArgumentDword = PayloadHelper.getPayloadStartDword( - Data, MetadataState.getMaxHitAttributeByteCount(), TraversalDataTy); - - if (Data.Kind != RayTracingShaderStage::RayGeneration && - Data.Kind != RayTracingShaderStage::Intersection && - Data.Kind != RayTracingShaderStage::Traversal) { - const PAQSerializationLayout &IncomingSerializationLayout = - PAQManager.getOrCreateShaderStartSerializationLayout( - *Data.IncomingPayloadSerializationInfo, Data.Kind, - Data.HitAttributes); - IncomingStorageI32s = - std::min(IncomingSerializationLayout.NumStorageI32s, - MetadataState.getMaxPayloadRegisterCount()); - } else if (Data.Kind == RayTracingShaderStage::Traversal) { - // We should have set up preserved register count for Traversal, if not, - // fall back to max count. - // Even if PreservedPayloadRegisterCount is set, there may be additional - // shaders in the current module whose usage is recorded in - // MaxUsedPayloadRegisterCount, to take the max with it. - IncomingStorageI32s = - std::max(MetadataState.tryGetPreservedPayloadRegisterCount().value_or( - MetadataState.getMaxPayloadRegisterCount()), - MetadataState.getMaxUsedPayloadRegisterCount()); - } else if (Data.Kind == RayTracingShaderStage::Intersection) { - IncomingStorageI32s = getUpperBoundOnTraceRayPayloadRegisters(); - } + SystemDataArgumentIndex = 3; } else { - // Pass in the return address. - AllArgTypes.push_back(Builder.getInt64Ty()); - } + // For non-lgc.cps mode, we always have a return address argument, which + // must not be included in the padding computation. The overall layout is: + // | returnAddr | systemData | (hitAttrs, remaining args) | padding | + // payload + // If we don't pass payload, then for stability reasons, we still pass in a + // zero- padding and payload-array that remains unused. - const auto SystemDataArgumentIndex = AllArgTypes.size(); + SystemDataArgumentIndex = 1; + } + // If the value is not computed in the switch case, it will be re-computed + // based on the incoming serialization layout info. + std::optional NumIncomingPayloadDwords; switch (Data.Kind) { case RayTracingShaderStage::RayGeneration: { SystemDataTy = DispatchSystemDataTy; AllArgTypes.push_back(SystemDataTy); NewRetTy = Builder.getVoidTy(); + NumIncomingPayloadDwords = 0; break; } case RayTracingShaderStage::Intersection: { @@ -1909,6 +1830,7 @@ void LowerRaytracingPipelinePassImpl::processFunction(Function *F, SystemDataTy = TraversalDataTy; AllArgTypes.push_back(SystemDataTy); NewRetTy = SystemDataTy; + Data.NumPassedThroughPayloadDwords = MetadataState.getMaxPayloadRegisterCount(); break; } case RayTracingShaderStage::AnyHit: { @@ -1941,101 +1863,115 @@ void LowerRaytracingPipelinePassImpl::processFunction(Function *F, AllArgTypes.push_back(SystemDataTy); NewRetTy = SystemDataTy; + + // We should have set up preserved register count for Traversal, if not, + // fall back to max count. + Data.NumPassedThroughPayloadDwords = + MetadataState.tryGetPreservedPayloadRegisterCount().value_or(MetadataState.getMaxPayloadRegisterCount()); break; } default: llvm_unreachable("Unhandled ShaderKind"); } - if (MetadataState.isInLgcCpsMode() && - Data.Kind != RayTracingShaderStage::RayGeneration) { - auto *DummyArgTy = StructType::get(*Context, {}); + if (!NumIncomingPayloadDwords.has_value()) { + if (Data.NumPassedThroughPayloadDwords.has_value()) { + NumIncomingPayloadDwords = Data.NumPassedThroughPayloadDwords.value(); + } else { + const PAQSerializationLayout &IncomingSerializationLayout = PAQManager.getOrCreateShaderStartSerializationLayout( + *Data.IncomingPayloadSerializationInfo, Data.Kind, Data.HitAttributes); + NumIncomingPayloadDwords = + std::min(IncomingSerializationLayout.NumStorageI32s, MetadataState.getMaxPayloadRegisterCount()); + } + } + + assert(NumIncomingPayloadDwords.has_value()); + + Data.PayloadStorageTy = PayloadHelper.getPayloadStorageTy(MetadataState.getMaxPayloadRegisterCount(), Data); + Data.FirstPayloadArgumentDword = + PayloadHelper.getPayloadStartDword(Data, MetadataState.getMaxHitAttributeByteCount(), TraversalDataTy); - if (Data.Kind != RayTracingShaderStage::AnyHit) { + const bool HasPayloadArgument = Data.Kind != RayTracingShaderStage::RayGeneration; + if (HasPayloadArgument) { + if (MetadataState.isInLgcCpsMode() && Data.Kind != RayTracingShaderStage::AnyHit) { // Add a dummy argument for CpsArgIdxHitAttributes so that the arg index // of payload matches CpsArgIdxPayload - AllArgTypes.push_back(DummyArgTy); + AllArgTypes.push_back(StructType::get(*Context, {})); } - PayloadHelper.computePaddingAndPayloadArgTys( - AllArgTypes, IncomingStorageI32s, Data.FirstPayloadArgumentDword); + PayloadHelper.computePaddingAndPayloadArgTys(AllArgTypes, NumIncomingPayloadDwords.value(), + Data.FirstPayloadArgumentDword); } - Data.PayloadSpillSize = computeNeededStackSizeForRegisterBuffer( - Data.MaxOutgoingPayloadI32s, MetadataState.getMaxPayloadRegisterCount()); - assert(Data.PayloadSpillSize == 0 || - Data.Kind != RayTracingShaderStage::Intersection); + // Pass in the return address argument + if (!MetadataState.isInLgcCpsMode()) + AllArgTypes.insert(AllArgTypes.begin(), Builder.getInt64Ty()); - if (!MetadataState.isInLgcCpsMode()) { - Data.PayloadStorage = PayloadStorageGlobal; - Data.PayloadStorageTy = PayloadStorageGlobal->getValueType(); - } - - Type *FunctionTypeRetTy = nullptr; - if (MetadataState.isInLgcCpsMode()) - FunctionTypeRetTy = Builder.getVoidTy(); - else - FunctionTypeRetTy = NewRetTy; + Data.PayloadSpillSize = + computePayloadSpillSize(Data.MaxOutgoingPayloadI32s, MetadataState.getMaxPayloadRegisterCount()); + assert(Data.PayloadSpillSize == 0 || Data.Kind != RayTracingShaderStage::Intersection); + auto *FunctionTypeRetTy = MetadataState.isInLgcCpsMode() ? Builder.getVoidTy() : NewRetTy; // Create new function to change signature auto *NewFuncTy = FunctionType::get(FunctionTypeRetTy, AllArgTypes, false); - Function *NewFunc = CompilerUtils::cloneFunctionHeader( - *F, NewFuncTy, ArrayRef{}); + Function *NewFunc = CompilerUtils::cloneFunctionHeader(*F, NewFuncTy, ArrayRef{}); NewFunc->takeName(F); - // FIXME: Remove !types metadata to workaround an llvm bug. If struct types + // FIXME: Remove !pointeetypes metadata to workaround an llvm bug. If struct types // are referenced only from metadata, LLVM omits the type declaration when // printing IR and fails to read it back in because of an unknown type. - NewFunc->setMetadata("types", nullptr); + NewFunc->setMetadata("pointeetys", nullptr); llvm::moveFunctionBody(*F, *NewFunc); Data.SystemDataTy = cast(SystemDataTy); processFunctionEntry(Data, NewFunc->getArg(SystemDataArgumentIndex)); - Value *NewSystemData = nullptr; uint64_t RetAddrArgIdx = 0; if (MetadataState.isInLgcCpsMode()) { NewFunc->getArg(CpsArgIdxContState)->setName("cont.state"); RetAddrArgIdx = CpsArgIdxReturnAddr; NewFunc->getArg(CpsArgIdxShaderIndex)->setName("shader.index"); - if (Data.Kind != RayTracingShaderStage::RayGeneration) { - NewFunc->getArg(CpsArgIdxSystemData)->setName("system.data"); - NewFunc->getArg(CpsArgIdxHitAttributes)->setName("hit.attrs"); - NewFunc->getArg(CpsArgIdxPadding)->setName("padding"); - NewFunc->getArg(CpsArgIdxPayload)->setName("payload"); - } // Mark as CPS function with the corresponding level. CpsLevel Level = getCpsLevelForShaderStage(Data.Kind); setCpsFunctionLevel(*NewFunc, Level); + } - if (Data.Kind == RayTracingShaderStage::Traversal) { - // Compute here means Traversal. - assert(F->arg_size() == 1); - // System data - // NOTE: Pointer address space may not match based on data layout, mutate - // the address space here to keep later GEP valid. - Data.SystemData->mutateType(getWithSamePointeeType( - Data.SystemData->getType(), - F->getArg(0)->getType()->getPointerAddressSpace())); - NewSystemData = Data.SystemData; - } else { - // Create local payload storage for non-Traversal shader. - PayloadHelper.initializePayloadSerializationStorage(NewFunc, Data); + if (Data.Kind != RayTracingShaderStage::RayGeneration) { + if (MetadataState.isInLgcCpsMode()) { + NewFunc->getArg(CpsArgIdxSystemData)->setName("system.data"); + NewFunc->getArg(CpsArgIdxHitAttributes)->setName("hit.attrs"); } - } else if (Data.Kind == RayTracingShaderStage::Traversal) { - // Replace old system data argument with cloned functions' argument - NewSystemData = NewFunc->getArg(1); + + NewFunc->getArg(NewFunc->arg_size() - 2)->setName("padding"); + NewFunc->getArg(NewFunc->arg_size() - 1)->setName("payload"); } - if (auto *ContPayloadRegistersGetI32 = - Mod->getFunction("_AmdContPayloadRegistersGetI32")) - handleContPayloadRegistersGetI32(*ContPayloadRegistersGetI32); + Value *NewSystemData = nullptr; + const bool IsTraversal = Data.Kind == RayTracingShaderStage::Traversal; + if (IsTraversal && MetadataState.isInLgcCpsMode()) { + assert(F->arg_size() == 1); + // System data + // NOTE: Pointer address space may not match based on data layout, mutate + // the address space here to keep later GEP valid. + Data.SystemData->mutateType( + getWithSamePointeeType(Data.SystemData->getType(), F->getArg(0)->getType()->getPointerAddressSpace())); + NewSystemData = Data.SystemData; + } else { + PayloadHelper.initializePayloadSerializationStorage(NewFunc, Data); + + if (auto *ContPayloadRegistersGetI32 = Mod->getFunction("_AmdContPayloadRegistersGetI32")) + handleContPayloadRegistersGetI32(*ContPayloadRegistersGetI32, *NewFunc, Data); + + if (auto *ContPayloadRegistersSetI32 = Mod->getFunction("_AmdContPayloadRegistersSetI32")) + handleContPayloadRegistersSetI32(*ContPayloadRegistersSetI32, *NewFunc, Data); - if (auto *ContPayloadRegistersSetI32 = - Mod->getFunction("_AmdContPayloadRegistersSetI32")) - handleContPayloadRegistersSetI32(*ContPayloadRegistersSetI32); + if (IsTraversal) { + // Replace old system data argument with cloned functions' argument + NewSystemData = NewFunc->getArg(1); + } + } if (NewSystemData) F->getArg(0)->replaceAllUsesWith(NewSystemData); @@ -2048,7 +1984,7 @@ void LowerRaytracingPipelinePassImpl::processFunction(Function *F, NewFunc->setMetadata(ContHelper::MDEntryName, MDTuple::get(*Context, {})); // Entry functions have no incoming payload or continuation state - ContHelper::setIncomingRegisterCount(NewFunc, 0); + ContHelper::IncomingRegisterCount::setValue(NewFunc, 0); } } else { // Ignore payload for intersection shaders, they don't touch payload @@ -2059,18 +1995,14 @@ void LowerRaytracingPipelinePassImpl::processFunction(Function *F, Value *HitAttrsAlloca = nullptr; Type *PayloadTy = Data.IncomingPayload; - std::optional ShaderStage = - rtShaderStageToPAQShaderStage(Data.Kind); - PAQSerializationInfoBase *SerializationInfo = - Data.IncomingPayloadSerializationInfo; + std::optional ShaderStage = rtShaderStageToPAQShaderStage(Data.Kind); + PAQSerializationInfoBase *SerializationInfo = Data.IncomingPayloadSerializationInfo; // Check that our assumptions about the number of required payload registers // are correct. We exclude callable shaders because the max payload size // doesn't apply to them. - assert((Data.Kind == RayTracingShaderStage::Callable || - SerializationInfo == nullptr || - std::min(MetadataState.getMaxPayloadRegisterCount(), - SerializationInfo->MaxStorageI32s) <= + assert((Data.Kind == RayTracingShaderStage::Callable || SerializationInfo == nullptr || + std::min(MetadataState.getMaxPayloadRegisterCount(), SerializationInfo->MaxStorageI32s) <= getUpperBoundOnTraceRayPayloadRegisters()) && "Payload serialization layout uses too many registers!"); @@ -2082,22 +2014,18 @@ void LowerRaytracingPipelinePassImpl::processFunction(Function *F, // recursive TraceRay or CallShader) SmallVector SavedRegisterValues{}; - if (Data.Kind != RayTracingShaderStage::Intersection && - Data.Kind != RayTracingShaderStage::Traversal) { + if (Data.Kind != RayTracingShaderStage::Intersection && Data.Kind != RayTracingShaderStage::Traversal) { assert(PayloadTy && "Missing payload type!"); // For AnyHit, the layout depends on whether we accept or ignore, which // we do not know yet. In that case, the layout is determined later. if (Data.Kind != RayTracingShaderStage::AnyHit) { - OutgoingSerializationLayout = - &PAQManager.getOrCreateShaderExitSerializationLayout( - *SerializationInfo, Data.Kind, Data.HitAttributes, - AnyHitExitKind::None); + OutgoingSerializationLayout = &PAQManager.getOrCreateShaderExitSerializationLayout( + *SerializationInfo, Data.Kind, Data.HitAttributes, AnyHitExitKind::None); } const PAQSerializationLayout &IncomingSerializationLayout = - PAQManager.getOrCreateShaderStartSerializationLayout( - *SerializationInfo, Data.Kind, Data.HitAttributes); + PAQManager.getOrCreateShaderStartSerializationLayout(*SerializationInfo, Data.Kind, Data.HitAttributes); // Handle reading global payload auto *FPayload = F->getArg(0); @@ -2110,26 +2038,23 @@ void LowerRaytracingPipelinePassImpl::processFunction(Function *F, } auto IncomingRegisterCount = - std::min(IncomingSerializationLayout.NumStorageI32s, - MetadataState.getMaxPayloadRegisterCount()); + std::min(IncomingSerializationLayout.NumStorageI32s, MetadataState.getMaxPayloadRegisterCount()); MetadataState.updateMaxUsedPayloadRegisterCount(IncomingRegisterCount); if (!MetadataState.isInLgcCpsMode()) { // Annotate function with the number of registers for incoming payload - ContHelper::setIncomingRegisterCount(NewFunc, IncomingRegisterCount); + ContHelper::IncomingRegisterCount::setValue(NewFunc, IncomingRegisterCount); } // Copy global payload into local payload at start of shader if (IncomingSerializationLayout.NumStorageI32s) { - copyPayload(*PayloadTy, NewPayload, Data.PayloadStorage, ShaderStage, - PAQAccessKind::Read, IncomingSerializationLayout); + copyPayload(*PayloadTy, NewPayload, Data.PayloadStorage, ShaderStage, PAQAccessKind::Read, + IncomingSerializationLayout); } if (!Data.CallShaderCalls.empty() || !Data.TraceRayCalls.empty()) { - assert(OutgoingSerializationLayout && - "Missing outgoing serialization layout!"); - savePayloadRegistersBeforeRecursion( - Data.PayloadStorage, Data.Kind, IncomingSerializationLayout, - *OutgoingSerializationLayout, SavedRegisterValues); + assert(OutgoingSerializationLayout && "Missing outgoing serialization layout!"); + savePayloadRegistersBeforeRecursion(Data.PayloadStorage, Data.Kind, IncomingSerializationLayout, + *OutgoingSerializationLayout, SavedRegisterValues); } // Handle hit attributes @@ -2141,8 +2066,8 @@ void LowerRaytracingPipelinePassImpl::processFunction(Function *F, // Preserve current insert point IRBuilder<>::InsertPointGuard Guard(Builder); Builder.SetInsertPointPastAllocas(NewFunc); - OrigHitAttrsAlloca = Builder.CreateAlloca(ArrayType::get( - I32, divideCeil(GlobalMaxHitAttributeBytes, RegisterBytes))); + OrigHitAttrsAlloca = + Builder.CreateAlloca(ArrayType::get(I32, divideCeil(GlobalMaxHitAttributeBytes, RegisterBytes))); OrigHitAttrsAlloca->setName("OrigHitAttrs"); HitAttrsAlloca = Builder.CreateAlloca(Data.HitAttributes); @@ -2150,14 +2075,13 @@ void LowerRaytracingPipelinePassImpl::processFunction(Function *F, } // Copy old hit attributes from payload - copyHitAttributes(Data, Data.SystemData, Data.SystemDataTy, - OrigHitAttrsAlloca, true, + copyHitAttributes(Data, Data.SystemData, Data.SystemDataTy, OrigHitAttrsAlloca, true, &IncomingSerializationLayout); - // Copy new hit attributes from argument - unsigned HitAttributesIdx = MetadataState.isInLgcCpsMode() - ? CpsArgIdxHitAttributes - : NewFunc->arg_size() - 1; + // Copy new hit attributes from argument: + // Since the argument list of NewFunc ends with padding and payload, + // subtract 3 to get the hit attributes. + unsigned HitAttributesIdx = MetadataState.isInLgcCpsMode() ? CpsArgIdxHitAttributes : NewFunc->arg_size() - 3; Builder.CreateStore(NewFunc->getArg(HitAttributesIdx), HitAttrsAlloca); HitAttrs->replaceAllUsesWith(HitAttrsAlloca); } else if (Data.Kind == RayTracingShaderStage::ClosestHit) { @@ -2176,8 +2100,7 @@ void LowerRaytracingPipelinePassImpl::processFunction(Function *F, // Copy hit attributes from system data and payload into the local // variable OrigHitAttrs->replaceAllUsesWith(NewHitAttrs); - copyHitAttributes(Data, Data.SystemData, Data.SystemDataTy, NewHitAttrs, - true, &IncomingSerializationLayout); + copyHitAttributes(Data, Data.SystemData, Data.SystemDataTy, NewHitAttrs, true, &IncomingSerializationLayout); } } else { if (!MetadataState.isInLgcCpsMode()) { @@ -2189,8 +2112,7 @@ void LowerRaytracingPipelinePassImpl::processFunction(Function *F, // use that instead. For a library compile, we can't know the // max payload size of shaders in pipelines this shader is used // in. - ContHelper::setIncomingRegisterCount( - NewFunc, MetadataState.getMaxPayloadRegisterCount()); + ContHelper::IncomingRegisterCount::setValue(NewFunc, MetadataState.getMaxPayloadRegisterCount()); // Intentionally do NOT update MaxUsedPayloadRegisterCount } else { assert(Data.Kind == RayTracingShaderStage::Traversal); @@ -2212,18 +2134,15 @@ void LowerRaytracingPipelinePassImpl::processFunction(Function *F, // Modify function ends // While iterating over function ends, basic blocks are inserted by inlining // functions, so we copy them beforehand. - if (MetadataState.isInLgcCpsMode() && - Data.Kind == RayTracingShaderStage::Traversal) { - PayloadHelper.patchJumpCalls(NewFunc, Data.JumpCalls, - Data.FirstPayloadArgumentDword); + if (MetadataState.isInLgcCpsMode() && Data.Kind == RayTracingShaderStage::Traversal) { + PayloadHelper.patchJumpCalls(NewFunc, Data.JumpCalls, Data.FirstPayloadArgumentDword); } else { SmallVector BBs(make_pointer_range(*NewFunc)); for (auto *BB : BBs) { auto *I = BB->getTerminator(); assert(I && "BB must have terminator"); // Replace the end of the BB if it terminates the function - bool IsFunctionEnd = (I->getOpcode() == Instruction::Ret || - I->getOpcode() == Instruction::Unreachable); + bool IsFunctionEnd = (I->getOpcode() == Instruction::Ret || I->getOpcode() == Instruction::Unreachable); if (IsFunctionEnd) { EData.Terminator = I; processFunctionEnd(Data, EData); @@ -2271,57 +2190,57 @@ void LowerRaytracingPipelinePassImpl::processFunction(Function *F, // Replace non-rematerializable intrinsic calls for (auto *Call : Data.IntrinsicCalls) - replaceIntrinsicCall(Builder, Data.SystemDataTy, Data.SystemData, Data.Kind, - Call, GpurtLibrary, CrossInliner); + replaceIntrinsicCall(Builder, Data.SystemDataTy, Data.SystemData, Data.Kind, Call, GpurtLibrary, CrossInliner); #ifndef NDEBUG - if (!MetadataState.isInLgcCpsMode() && - Data.Kind != RayTracingShaderStage::RayGeneration) { + if (!MetadataState.isInLgcCpsMode() && Data.Kind != RayTracingShaderStage::RayGeneration) { // Check that all returns have registercount metadata for (const auto &BB : *F) { auto *Terminator = BB.getTerminator(); - if (Terminator->getOpcode() == Instruction::Ret && - !ContHelper::tryGetOutgoingRegisterCount(Terminator)) + if (Terminator->getOpcode() == Instruction::Ret && !ContHelper::OutgoingRegisterCount::tryGetValue(Terminator)) report_fatal_error("Missing registercount metadata!"); } } #endif } -void LowerRaytracingPipelinePassImpl::handleContPayloadRegisterI32Count( - Function &F) { +void LowerRaytracingPipelinePassImpl::handleContPayloadRegisterI32Count(Function &F) { assert(F.arg_empty() // register count && F.getFunctionType()->getReturnType()->isIntegerTy(32)); - uint32_t RegCount = - ContHelper::tryGetMaxUsedPayloadRegisterCount(*Mod).value_or(0); - auto *RegCountAsConstant = - ConstantInt::get(IntegerType::get(F.getContext(), 32), RegCount); + uint32_t RegCount = ContHelper::MaxUsedPayloadRegisterCount::tryGetValue(Mod).value_or(0); + auto *RegCountAsConstant = ConstantInt::get(IntegerType::get(F.getContext(), 32), RegCount); llvm::replaceCallsToFunction(F, *RegCountAsConstant); } -void LowerRaytracingPipelinePassImpl::handleContPayloadRegistersGetI32( - Function &F) { +void LowerRaytracingPipelinePassImpl::handleContPayloadRegistersGetI32(Function &F, Function &Parent, + FunctionData &Data) { assert(F.getReturnType()->isIntegerTy(32) && F.arg_size() == 1 // index && F.getFunctionType()->getParamType(0)->isIntegerTy(32)); llvm::forEachCall(F, [&](CallInst &CInst) { - Builder.SetInsertPoint(&CInst); - auto *Addr = Builder.CreateGEP( - PayloadStorageGlobal->getValueType(), PayloadStorageGlobal, - {Builder.getInt32(0), CInst.getArgOperand(0)}); - auto *Load = Builder.CreateLoad(Builder.getInt32Ty(), Addr); - CInst.replaceAllUsesWith(Load); + if (CInst.getFunction() != &Parent) + return; + + if (Data.FirstPayloadArgumentDword.has_value()) { + Builder.SetInsertPoint(&CInst); + auto *Addr = + Builder.CreateGEP(Data.PayloadStorageTy, Data.PayloadStorage, {Builder.getInt32(0), CInst.getArgOperand(0)}); + auto *Load = Builder.CreateLoad(Builder.getInt32Ty(), Addr); + CInst.replaceAllUsesWith(Load); + } else { + CInst.replaceAllUsesWith(PoisonValue::get(Builder.getInt32Ty())); + } CInst.eraseFromParent(); }); } -void LowerRaytracingPipelinePassImpl::handleContPayloadRegistersSetI32( - Function &F) { +void LowerRaytracingPipelinePassImpl::handleContPayloadRegistersSetI32(Function &F, Function &Parent, + FunctionData &Data) { assert(F.getReturnType()->isVoidTy() && F.arg_size() == 2 // index @@ -2330,11 +2249,15 @@ void LowerRaytracingPipelinePassImpl::handleContPayloadRegistersSetI32( && F.getFunctionType()->getParamType(1)->isIntegerTy(32)); llvm::forEachCall(F, [&](CallInst &CInst) { - Builder.SetInsertPoint(&CInst); - auto *Addr = Builder.CreateGEP( - PayloadStorageGlobal->getValueType(), PayloadStorageGlobal, - {Builder.getInt32(0), CInst.getArgOperand(0)}); - Builder.CreateStore(CInst.getOperand(1), Addr); + if (CInst.getFunction() != &Parent) + return; + + if (Data.FirstPayloadArgumentDword.has_value()) { + Builder.SetInsertPoint(&CInst); + auto *Addr = + Builder.CreateGEP(Data.PayloadStorageTy, Data.PayloadStorage, {Builder.getInt32(0), CInst.getArgOperand(0)}); + Builder.CreateStore(CInst.getOperand(1), Addr); + } CInst.eraseFromParent(); }); } @@ -2361,29 +2284,21 @@ void LowerRaytracingPipelinePassImpl::collectProcessableFunctions() { FunctionData Data; Data.Kind = Kind; - if (Kind != RayTracingShaderStage::Intersection && - Kind != RayTracingShaderStage::RayGeneration && + if (Kind != RayTracingShaderStage::Intersection && Kind != RayTracingShaderStage::RayGeneration && Kind != RayTracingShaderStage::Traversal) { assert(!Func.arg_empty() && "Shader must have at least one argument"); Data.IncomingPayload = getFuncArgPtrElementType(&Func, 0); - PAQPayloadConfig PAQConfig = { - Data.IncomingPayload, MetadataState.getMaxHitAttributeByteCount()}; - Data.IncomingPayloadSerializationInfo = - &PAQManager.getOrCreateSerializationInfo(PAQConfig, Kind); - assert(Data.IncomingPayloadSerializationInfo != nullptr && - "Missing serialization info!"); + PAQPayloadConfig PAQConfig = {Data.IncomingPayload, MetadataState.getMaxHitAttributeByteCount()}; + Data.IncomingPayloadSerializationInfo = &PAQManager.getOrCreateSerializationInfo(PAQConfig, Kind); + assert(Data.IncomingPayloadSerializationInfo != nullptr && "Missing serialization info!"); } - if (Kind == RayTracingShaderStage::AnyHit || - Kind == RayTracingShaderStage::ClosestHit) { - assert(Func.arg_size() >= 2 && - "Shader must have at least two arguments"); - Data.HitAttributes = - getFuncArgPtrElementType(&Func, Func.arg_size() - 1); + if (Kind == RayTracingShaderStage::AnyHit || Kind == RayTracingShaderStage::ClosestHit) { + assert(Func.arg_size() >= 2 && "Shader must have at least two arguments"); + Data.HitAttributes = getFuncArgPtrElementType(&Func, Func.arg_size() - 1); } if (Kind == RayTracingShaderStage::Intersection) { - Data.MaxOutgoingPayloadI32s = - MetadataState.getMaxPayloadRegisterCount(); + Data.MaxOutgoingPayloadI32s = MetadataState.getMaxPayloadRegisterCount(); } ToProcess[&Func] = Data; @@ -2407,7 +2322,7 @@ void LowerRaytracingPipelinePassImpl::handleAmdInternalFunc(Function &Func) { handleRestoreSystemData(&CInst); }); } else if (FuncName.starts_with("_AmdGetFuncAddr")) { - handleGetFuncAddr(Func); + ContHelper::handleGetFuncAddr(Func, Builder); } else if (FuncName.starts_with("_AmdGetShaderKind")) { handleGetShaderKind(Func); } else if (FuncName.starts_with("_AmdGetCurrentFuncAddr")) { @@ -2442,8 +2357,7 @@ void LowerRaytracingPipelinePassImpl::handleUnrematerializableCandidates() { continue; static const llvm_dialects::OpSet NonRematerializableDialectOps = - llvm_dialects::OpSet::get(); if (!NonRematerializableDialectOps.contains(Func)) { llvm::forEachCall(Func, [&](llvm::CallInst &CInst) { @@ -2461,56 +2375,41 @@ void LowerRaytracingPipelinePassImpl::handleUnrematerializableCandidates() { void LowerRaytracingPipelinePassImpl::collectGpuRtFunctions() { IsEndSearch = GpurtLibrary->getFunction(ContDriverFunc::IsEndSearchName); if (IsEndSearch) - assert(IsEndSearch->getReturnType() == Type::getInt1Ty(*Context) && + assert(IsEndSearch->getReturnType()->isIntegerTy(1) && IsEndSearch->arg_size() == 1 // Traversal data && IsEndSearch->getFunctionType()->getParamType(0)->isPointerTy()); - GetTriangleHitAttributes = - GpurtLibrary->getFunction(ContDriverFunc::GetTriangleHitAttributesName); + GetTriangleHitAttributes = GpurtLibrary->getFunction(ContDriverFunc::GetTriangleHitAttributesName); if (GetTriangleHitAttributes) - assert(GetTriangleHitAttributes->getReturnType() - ->isStructTy() // BuiltinTriangleIntersectionAttributes + assert(GetTriangleHitAttributes->getReturnType()->isStructTy() // BuiltinTriangleIntersectionAttributes && GetTriangleHitAttributes->arg_size() == 1 // System data - && GetTriangleHitAttributes->getFunctionType() - ->getParamType(0) - ->isPointerTy()); + && GetTriangleHitAttributes->getFunctionType()->getParamType(0)->isPointerTy()); - SetTriangleHitAttributes = - GpurtLibrary->getFunction(ContDriverFunc::SetTriangleHitAttributesName); + SetTriangleHitAttributes = GpurtLibrary->getFunction(ContDriverFunc::SetTriangleHitAttributesName); if (SetTriangleHitAttributes) assert(SetTriangleHitAttributes->getReturnType()->isVoidTy() && SetTriangleHitAttributes->arg_size() == 2 // System data - && SetTriangleHitAttributes->getFunctionType() - ->getParamType(0) - ->isPointerTy() + && SetTriangleHitAttributes->getFunctionType()->getParamType(0)->isPointerTy() // BuiltinTriangleIntersectionAttributes - && (SetTriangleHitAttributes->getFunctionType() - ->getParamType(1) - ->isStructTy() || - SetTriangleHitAttributes->getFunctionType() - ->getParamType(1) - ->isPointerTy())); + && (SetTriangleHitAttributes->getFunctionType()->getParamType(1)->isStructTy() || + SetTriangleHitAttributes->getFunctionType()->getParamType(1)->isPointerTy())); - GetLocalRootIndex = - GpurtLibrary->getFunction(ContDriverFunc::GetLocalRootIndexName); + GetLocalRootIndex = GpurtLibrary->getFunction(ContDriverFunc::GetLocalRootIndexName); assert(GetLocalRootIndex && "Could not find GetLocalRootIndex function"); - assert(GetLocalRootIndex->getReturnType() == - Type::getInt32Ty(Mod->getContext()) && + assert(GetLocalRootIndex->getReturnType()->isIntegerTy(32) && GetLocalRootIndex->arg_size() == 1 // Dispatch data - && - GetLocalRootIndex->getFunctionType()->getParamType(0)->isPointerTy()); + && GetLocalRootIndex->getFunctionType()->getParamType(0)->isPointerTy()); SetLocalRootIndex = getSetLocalRootIndex(*Mod); ExitRayGen = GpurtLibrary->getFunction(ContDriverFunc::ExitRayGenName); if (ExitRayGen) - assert(ExitRayGen->getReturnType()->isVoidTy() && - ExitRayGen->arg_size() == 1 && + assert(ExitRayGen->getReturnType()->isVoidTy() && ExitRayGen->arg_size() == 1 && ExitRayGen->getFunctionType()->getParamType(0)->isPointerTy()); TraceRay = GpurtLibrary->getFunction(ContDriverFunc::TraceRayName); @@ -2527,8 +2426,7 @@ void LowerRaytracingPipelinePassImpl::collectGpuRtFunctions() { // Dispatch data && CallShader->getFunctionType()->getParamType(0)->isPointerTy() // Shader id - && CallShader->getFunctionType()->getParamType(1) == - Type::getInt32Ty(*Context)); + && CallShader->getFunctionType()->getParamType(1)->isIntegerTy(32)); ReportHit = GpurtLibrary->getFunction(ContDriverFunc::ReportHitName); if (ReportHit) @@ -2546,13 +2444,11 @@ void LowerRaytracingPipelinePassImpl::collectGpuRtFunctions() { GetSbtAddress = GpurtLibrary->getFunction(ContDriverFunc::GetSbtAddressName); if (GetSbtAddress) - assert(GetSbtAddress->getReturnType()->isIntegerTy(64) && - GetSbtAddress->arg_empty()); + assert(GetSbtAddress->getReturnType()->isIntegerTy(64) && GetSbtAddress->arg_empty()); GetSbtStride = GpurtLibrary->getFunction(ContDriverFunc::GetSbtStrideName); if (GetSbtStride) - assert(GetSbtStride->getReturnType()->isIntegerTy(32) && - GetSbtStride->arg_empty()); + assert(GetSbtStride->getReturnType()->isIntegerTy(32) && GetSbtStride->arg_empty()); // _cont_ShaderStart has one overload for each system data type llvm::for_each(GpurtLibrary->functions(), [&](Function &F) { @@ -2566,13 +2462,12 @@ void LowerRaytracingPipelinePassImpl::collectGpuRtFunctions() { }); } -LowerRaytracingPipelinePassImpl::LowerRaytracingPipelinePassImpl( - llvm::Module &M, Module &GpurtLibrary) - : Mod{&M}, GpurtLibrary{&GpurtLibrary}, Context{&M.getContext()}, - DL{&M.getDataLayout()}, Builder{Mod->getContext()}, MetadataState{*Mod}, - PAQManager{Mod, &GpurtLibrary, - MetadataState.getMaxPayloadRegisterCount()}, - PayloadHelper{*Mod, *DL, Builder, MetadataState.isInLgcCpsMode()} {} +LowerRaytracingPipelinePassImpl::LowerRaytracingPipelinePassImpl(llvm::Module &M, Module &GpurtLibrary) + : Mod{&M}, GpurtLibrary{&GpurtLibrary}, Context{&M.getContext()}, DL{&M.getDataLayout()}, + Builder{Mod->getContext()}, MetadataState{*Mod}, PAQManager{Mod, &GpurtLibrary, + MetadataState.getMaxPayloadRegisterCount()}, + PayloadHelper{*Mod, *DL, Builder, MetadataState.isInLgcCpsMode()} { +} PreservedAnalyses LowerRaytracingPipelinePassImpl::run() { collectGpuRtFunctions(); @@ -2592,63 +2487,56 @@ PreservedAnalyses LowerRaytracingPipelinePassImpl::run() { static const auto Visitor = llvm_dialects::VisitorBuilder() .setStrategy(llvm_dialects::VisitorStrategy::ByInstruction) - .addSet([](VisitorState &State, - Instruction &Op) { - auto *CInst = cast(&Op); - auto Data = State.Processables.find(CInst->getFunction()); - if (Data == State.Processables.end()) - return; - - if (isa(Op)) { - Data->second.ShaderIndexCalls.push_back(CInst); - return; - } - - if (isa(Op)) { - Data->second.ShaderRecordBufferCalls.push_back(CInst); - return; - } - - if (auto *Jump = dyn_cast(CInst)) { - Data->second.JumpCalls.push_back(Jump); - return; - } - - Type *PayloadTy = ContHelper::getPayloadTypeFromMetadata(*CInst); - - if (!isa(Op)) { - PAQPayloadConfig PAQPayload = { - PayloadTy, State.Metadata.getMaxHitAttributeByteCount()}; - - uint32_t PayloadStorageI32s = 0; - if (isa(Op)) { - PayloadStorageI32s = - State.PAQManager.getMaxPayloadStorageI32sForTraceRayFunc( - PAQPayload); - - Data->second.TraceRayCalls.push_back(CInst); - } else if (isa(Op)) { - PayloadStorageI32s = - State.PAQManager.getMaxPayloadStorageI32sForCallShaderFunc( - PAQPayload); - - Data->second.CallShaderCalls.push_back(CInst); - } - - Data->second.MaxOutgoingPayloadI32s = std::max( - Data->second.MaxOutgoingPayloadI32s, PayloadStorageI32s); - } else { - // The converter uses payload type metadata also to indicate hit - // attribute types - assert((!Data->second.HitAttributes || - Data->second.HitAttributes == PayloadTy) && - "Multiple reportHit calls with different hit attributes"); - Data->second.HitAttributes = PayloadTy; - - Data->second.ReportHitCalls.push_back(CInst); - } - }) + .addSet( + [](VisitorState &State, Instruction &Op) { + auto *CInst = cast(&Op); + auto Data = State.Processables.find(CInst->getFunction()); + if (Data == State.Processables.end()) + return; + + if (isa(Op)) { + Data->second.ShaderIndexCalls.push_back(CInst); + return; + } + + if (isa(Op)) { + Data->second.ShaderRecordBufferCalls.push_back(CInst); + return; + } + + if (auto *Jump = dyn_cast(CInst)) { + Data->second.JumpCalls.push_back(Jump); + return; + } + + Type *PayloadTy = ContHelper::getPayloadTypeFromMetadata(*CInst); + + if (!isa(Op)) { + PAQPayloadConfig PAQPayload = {PayloadTy, State.Metadata.getMaxHitAttributeByteCount()}; + + uint32_t PayloadStorageI32s = 0; + if (isa(Op)) { + PayloadStorageI32s = State.PAQManager.getMaxPayloadStorageI32sForTraceRayFunc(PAQPayload); + + Data->second.TraceRayCalls.push_back(CInst); + } else if (isa(Op)) { + PayloadStorageI32s = State.PAQManager.getMaxPayloadStorageI32sForCallShaderFunc(PAQPayload); + + Data->second.CallShaderCalls.push_back(CInst); + } + + Data->second.MaxOutgoingPayloadI32s = + std::max(Data->second.MaxOutgoingPayloadI32s, PayloadStorageI32s); + } else { + // The converter uses payload type metadata also to indicate hit + // attribute types + assert((!Data->second.HitAttributes || Data->second.HitAttributes == PayloadTy) && + "Multiple reportHit calls with different hit attributes"); + Data->second.HitAttributes = PayloadTy; + + Data->second.ReportHitCalls.push_back(CInst); + } + }) .build(); VisitorState S{PAQManager, ToProcess, MetadataState}; @@ -2664,11 +2552,9 @@ PreservedAnalyses LowerRaytracingPipelinePassImpl::run() { HitMissDataTy = nullptr; if (auto *HitKind = GpurtLibrary->getFunction(ContDriverFunc::HitKindName)) { HitMissDataTy = getFuncArgPtrElementType(HitKind, 0); - LLVM_DEBUG(dbgs() << "HitMiss system data from _cont_HitKind: "; - HitMissDataTy->dump()); + LLVM_DEBUG(dbgs() << "HitMiss system data from _cont_HitKind: "; HitMissDataTy->dump()); } - createPayloadGlobal(); setGpurtEntryRegisterCountMetadata(); processContinuations(); @@ -2685,10 +2571,8 @@ PreservedAnalyses LowerRaytracingPipelinePassImpl::run() { // For tests, remove intrinsic implementations from the module for (auto &F : make_early_inc_range(*Mod)) { auto Name = F.getName(); - if (Name.starts_with(ContDriverFunc::TraceRayName) || - Name.starts_with(ContDriverFunc::CallShaderName) || - Name.starts_with(ContDriverFunc::ExitRayGenName) || - Name.starts_with(ContDriverFunc::ReportHitName)) { + if (Name.starts_with(ContDriverFunc::TraceRayName) || Name.starts_with(ContDriverFunc::CallShaderName) || + Name.starts_with(ContDriverFunc::ExitRayGenName) || Name.starts_with(ContDriverFunc::ReportHitName)) { F.eraseFromParent(); } } @@ -2702,15 +2586,13 @@ PreservedAnalyses LowerRaytracingPipelinePassImpl::run() { MetadataState.updateModuleMetadata(); - if (auto *ContPayloadRegistersI32Count = - Mod->getFunction("_AmdContPayloadRegistersI32Count")) + if (auto *ContPayloadRegistersI32Count = Mod->getFunction("_AmdContPayloadRegistersI32Count")) handleContPayloadRegisterI32Count(*ContPayloadRegistersI32Count); return PreservedAnalyses::none(); } -std::optional -llvm::rtShaderStageToPAQShaderStage(RayTracingShaderStage ShaderKind) { +std::optional llvm::rtShaderStageToPAQShaderStage(RayTracingShaderStage ShaderKind) { switch (ShaderKind) { case RayTracingShaderStage::RayGeneration: return PAQShaderStage::Caller; @@ -2731,14 +2613,12 @@ llvm::rtShaderStageToPAQShaderStage(RayTracingShaderStage ShaderKind) { } } // anonymous namespace -llvm::PreservedAnalyses -LowerRaytracingPipelinePass::run(llvm::Module &M, - llvm::ModuleAnalysisManager &AnalysisManager) { +llvm::PreservedAnalyses LowerRaytracingPipelinePass::run(llvm::Module &M, + llvm::ModuleAnalysisManager &AnalysisManager) { LLVM_DEBUG(dbgs() << "Run the pass lower-raytracing-pipeline\n"); AnalysisManager.getResult(M); auto &GpurtContext = lgc::GpurtContext::get(M.getContext()); - LowerRaytracingPipelinePassImpl Impl( - M, GpurtContext.theModule ? *GpurtContext.theModule : M); + LowerRaytracingPipelinePassImpl Impl(M, GpurtContext.theModule ? *GpurtContext.theModule : M); return Impl.run(); } diff --git a/llvmraytracing/lib/PassRegistry.inc b/llvmraytracing/lib/PassRegistry.inc index 36e61575c8..848a30181f 100644 --- a/llvmraytracing/lib/PassRegistry.inc +++ b/llvmraytracing/lib/PassRegistry.inc @@ -44,22 +44,21 @@ #define CONT_MODULE_ANALYSIS(NAME, CREATE_PASS) #endif -CONT_MODULE_ANALYSIS("dialect-context-analysis", - DialectContextAnalysis(NeedDialectContext)) +CONT_MODULE_ANALYSIS("dialect-context-analysis", DialectContextAnalysis(NeedDialectContext)) -CONT_MODULE_PASS("legacy-cleanup-continuations", - LegacyCleanupContinuationsPass()) +CONT_MODULE_PASS("legacy-cleanup-continuations", LegacyCleanupContinuationsPass()) CONT_MODULE_PASS("dxil-cleanup-continuations", DXILCleanupContinuationsPass()) CONT_MODULE_PASS("cleanup-continuations", CleanupContinuationsPass()) +CONT_MODULE_PASS("continuations-stats-report", ContinuationsStatsReportPass()) CONT_MODULE_PASS("lower-raytracing-pipeline", LowerRaytracingPipelinePass()) +CONT_MODULE_PASS("lgc-cps-jump-inliner", LgcCpsJumpInlinerPass()) CONT_MODULE_PASS("dxil-cont-intrinsic-prepare", DXILContIntrinsicPreparePass()) -CONT_MODULE_PASS("dxil-cont-lgc-rt-op-converter", - DXILContLgcRtOpConverterPass()) +CONT_MODULE_PASS("dxil-cont-lgc-rt-op-converter", DXILContLgcRtOpConverterPass()) CONT_MODULE_PASS("dxil-cont-post-process", DXILContPostProcessPass()) +CONT_MODULE_PASS("continuations-lint", ContinuationsLintPass()) CONT_MODULE_PASS("dxil-cont-post-hook", DXILContPostHookPass()) CONT_MODULE_PASS("dxil-cont-pre-hook", DXILContPreHookPass()) CONT_MODULE_PASS("lower-await", LowerAwaitPass()) -CONT_MODULE_PASS("register-buffer", RegisterBufferPass()) CONT_MODULE_PASS("remove-types-metadata", RemoveTypesMetadataPass()) CONT_CGSCC_PASS("dxil-coro-split", DXILCoroSplitPass()) diff --git a/llvmraytracing/lib/PayloadAccessQualifiers.cpp b/llvmraytracing/lib/PayloadAccessQualifiers.cpp index f43fb1bef3..03f79bb073 100644 --- a/llvmraytracing/lib/PayloadAccessQualifiers.cpp +++ b/llvmraytracing/lib/PayloadAccessQualifiers.cpp @@ -10,8 +10,8 @@ * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * - * The above copyright notice and this permission notice shall be included in - *all copies or substantial portions of the Software. + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, @@ -47,8 +47,7 @@ using namespace llvm; // Maybe change to PAQ-specific value #define DEBUG_TYPE "lower-raytracing-pipeline" -llvm::raw_ostream &llvm::operator<<(llvm::raw_ostream &Stream, - PAQShaderStage ShaderStage) { +llvm::raw_ostream &llvm::operator<<(llvm::raw_ostream &Stream, PAQShaderStage ShaderStage) { StringRef String = [ShaderStage]() { switch (ShaderStage) { case PAQShaderStage::Caller: @@ -68,8 +67,7 @@ llvm::raw_ostream &llvm::operator<<(llvm::raw_ostream &Stream, return Stream; } -llvm::raw_ostream &llvm::operator<<(llvm::raw_ostream &Stream, - PAQAccessKind AccessKind) { +llvm::raw_ostream &llvm::operator<<(llvm::raw_ostream &Stream, PAQAccessKind AccessKind) { if (AccessKind == PAQAccessKind::Read) { Stream << "read"; } else { @@ -79,8 +77,7 @@ llvm::raw_ostream &llvm::operator<<(llvm::raw_ostream &Stream, return Stream; } -void PAQAccessMask::print(llvm::raw_ostream &Stream, - std::optional RestrAccessKind) const { +void PAQAccessMask::print(llvm::raw_ostream &Stream, std::optional RestrAccessKind) const { for (PAQAccessKind AccessKind : {PAQAccessKind::Write, PAQAccessKind::Read}) { if (RestrAccessKind && AccessKind != RestrAccessKind) continue; @@ -102,8 +99,7 @@ void PAQAccessMask::print(llvm::raw_ostream &Stream, } } -llvm::raw_ostream &llvm::operator<<(llvm::raw_ostream &Stream, - PAQLifetimeClass LifetimeClass) { +llvm::raw_ostream &llvm::operator<<(llvm::raw_ostream &Stream, PAQLifetimeClass LifetimeClass) { StringRef String = [LifetimeClass]() { switch (LifetimeClass) { case PAQLifetimeClass::Caller_To_Caller: @@ -137,16 +133,14 @@ llvm::raw_ostream &llvm::operator<<(llvm::raw_ostream &Stream, return Stream; } -static std::string -determineSerializationInfoPrefix(const PAQPayloadConfig &PAQConfig) { +static std::string determineSerializationInfoPrefix(const PAQPayloadConfig &PAQConfig) { std::string Result; raw_string_ostream Str{Result}; if (cast(PAQConfig.PayloadTy)->hasName()) Str << PAQConfig.PayloadTy->getStructName(); if (PAQConfig.MaxHitAttributeByteCount != 0) { assert(PAQConfig.MaxHitAttributeByteCount % RegisterBytes == 0); - Str << ".attr_max_" << PAQConfig.MaxHitAttributeByteCount / RegisterBytes - << "_i32s"; + Str << ".attr_max_" << PAQConfig.MaxHitAttributeByteCount / RegisterBytes << "_i32s"; } return Result; } @@ -155,9 +149,8 @@ determineSerializationInfoPrefix(const PAQPayloadConfig &PAQConfig) { // OptNumHitAttrI32s is only set if we are generating a specialized layout for a // particular hit attribute size obtained from the actual hit attribute type // (not the max hit attribute size) -static std::string -determineLayoutSuffix(std::optional OptLayoutKind, - std::optional OptNumPayloadHitAttrI32s) { +static std::string determineLayoutSuffix(std::optional OptLayoutKind, + std::optional OptNumPayloadHitAttrI32s) { std::string Result; raw_string_ostream Str{Result}; Str << "layout_"; @@ -176,8 +169,7 @@ determineLayoutSuffix(std::optional OptLayoutKind, // Also used to determine the names of serialization structs, // hence no spaces are used. -llvm::raw_ostream &llvm::operator<<(llvm::raw_ostream &Stream, - PAQSerializationLayoutKind LayoutKind) { +llvm::raw_ostream &llvm::operator<<(llvm::raw_ostream &Stream, PAQSerializationLayoutKind LayoutKind) { StringRef Identifier = [LayoutKind]() { switch (LayoutKind) { case PAQSerializationLayoutKind::CallerOut: @@ -203,12 +195,9 @@ llvm::raw_ostream &llvm::operator<<(llvm::raw_ostream &Stream, return Stream; } -std::optional -llvm::tryDetermineLayoutKind(PAQShaderStage ShaderStage, - PAQAccessKind AccessKind) { - assert((AccessKind == PAQAccessKind::Read || - AccessKind == PAQAccessKind::Write) && - "Invalid access kind!"); +std::optional llvm::tryDetermineLayoutKind(PAQShaderStage ShaderStage, + PAQAccessKind AccessKind) { + assert((AccessKind == PAQAccessKind::Read || AccessKind == PAQAccessKind::Write) && "Invalid access kind!"); switch (ShaderStage) { case PAQShaderStage::Caller: { if (AccessKind == PAQAccessKind::Write) @@ -238,16 +227,14 @@ llvm::tryDetermineLayoutKind(PAQShaderStage ShaderStage, llvm_unreachable("invalid shader stage"); } -static void printPAQNodeImpl(llvm::raw_ostream &Stream, const PAQNode &Node, - int Depth) { +static void printPAQNodeImpl(llvm::raw_ostream &Stream, const PAQNode &Node, int Depth) { Stream.indent(2 * (Depth + 1)); // Print mask Stream << "Access: "; if (Node.AccessMask) { // Print partially manually to align access masks - for (PAQAccessKind AccessKind : - {PAQAccessKind::Write, PAQAccessKind::Read}) { + for (PAQAccessKind AccessKind : {PAQAccessKind::Write, PAQAccessKind::Read}) { uint64_t Begin = Stream.tell(); Node.AccessMask->print(Stream, AccessKind); uint64_t CharsWritten = Stream.tell() - Begin; @@ -308,13 +295,10 @@ void PAQSerializationLayout::print(raw_ostream &O, bool SingleLine) const { } // Sort by interval for output - SmallVector, 32> - SortedNodeStorageInfosVector(NodeStorageInfos.begin(), - NodeStorageInfos.end()); + SmallVector, 32> SortedNodeStorageInfosVector(NodeStorageInfos.begin(), + NodeStorageInfos.end()); llvm::sort(SortedNodeStorageInfosVector, - [](const auto &LHS, const auto &RHS) { - return LHS.second.IndexIntervals < RHS.second.IndexIntervals; - }); + [](const auto &LHS, const auto &RHS) { return LHS.second.IndexIntervals < RHS.second.IndexIntervals; }); if (SingleLine) { O << *SerializationTy << " ; { "; @@ -332,9 +316,7 @@ void PAQSerializationLayout::print(raw_ostream &O, bool SingleLine) const { assert(PayloadRootNode); auto *Indent = " "; - auto TypeName = cast(PayloadRootNode->Ty)->hasName() - ? PayloadRootNode->Ty->getStructName() - : "unnamed"; + auto TypeName = cast(PayloadRootNode->Ty)->hasName() ? PayloadRootNode->Ty->getStructName() : "unnamed"; O << "Serialization layout for type " << TypeName << "\n"; // Print type with body @@ -363,12 +345,13 @@ void PAQSerializationLayout::print(raw_ostream &O, bool SingleLine) const { O << Indent << "Node storage intervals:\n"; for (const auto &NodeWithInfo : SortedNodeStorageInfosVector) { - O << Indent << Indent << *NodeWithInfo.first->Ty << " at " - << NodeWithInfo.second << "\n"; + O << Indent << Indent << *NodeWithInfo.first->Ty << " at " << NodeWithInfo.second << "\n"; } } -void PAQSerializationLayout::dump() const { print(dbgs()); } +void PAQSerializationLayout::dump() const { + print(dbgs()); +} // In DXIL Metadata, read/write qualifiers are encoded in a bitmask with a // single bit per combination of read or write and shader stage. @@ -382,9 +365,7 @@ void PAQSerializationLayout::dump() const { print(dbgs()); } // Miss 8-11 // Anyhit 12-15 static PAQAccessMask importPAQAccessMaskFromDXILBitMask(uint32_t DXILBitMask) { - auto GetAccessOffset = [](PAQAccessKind AccessKind) { - return AccessKind == PAQAccessKind::Read ? 0 : 1; - }; + auto GetAccessOffset = [](PAQAccessKind AccessKind) { return AccessKind == PAQAccessKind::Read ? 0 : 1; }; auto GetStageOffset = [](PAQShaderStage ShaderStage) { switch (ShaderStage) { case PAQShaderStage::Caller: @@ -403,8 +384,7 @@ static PAQAccessMask importPAQAccessMaskFromDXILBitMask(uint32_t DXILBitMask) { PAQAccessMask Result; for (PAQShaderStage Stage : PAQShaderStages) { - for (PAQAccessKind AccessKind : - {PAQAccessKind::Read, PAQAccessKind::Write}) { + for (PAQAccessKind AccessKind : {PAQAccessKind::Read, PAQAccessKind::Write}) { uint64_t Offset = GetAccessOffset(AccessKind) + GetStageOffset(Stage); Result.set(Stage, AccessKind, DXILBitMask & (1u << Offset)); @@ -414,8 +394,7 @@ static PAQAccessMask importPAQAccessMaskFromDXILBitMask(uint32_t DXILBitMask) { } // Constexpr so we can test with static_asserts -static constexpr PAQLifetimeClass -lifetimeClassFromAccessMask(PAQAccessMask AccessMask) { +static constexpr PAQLifetimeClass lifetimeClassFromAccessMask(PAQAccessMask AccessMask) { using Stage = PAQShaderStage; if (AccessMask.get(Stage::Caller, PAQAccessKind::Write)) { if (AccessMask.get(Stage::Caller, PAQAccessKind::Read)) @@ -424,8 +403,7 @@ lifetimeClassFromAccessMask(PAQAccessMask AccessMask) { return PAQLifetimeClass::Caller_To_ClosestHitAndMiss; if (AccessMask.get(Stage::ClosestHit, PAQAccessKind::Read)) return PAQLifetimeClass::Caller_To_ClosestHit; - assert(AccessMask.get(Stage::AnyHit, PAQAccessKind::Read) && - "Unexpected access mask!"); + assert(AccessMask.get(Stage::AnyHit, PAQAccessKind::Read) && "Unexpected access mask!"); return PAQLifetimeClass::Caller_To_AnyHit; } // write(caller) is unset @@ -436,27 +414,23 @@ lifetimeClassFromAccessMask(PAQAccessMask AccessMask) { return PAQLifetimeClass::AnyHit_To_ClosestHitAndMiss; if (AccessMask.get(Stage::ClosestHit, PAQAccessKind::Read)) return PAQLifetimeClass::AnyHit_To_ClosestHit; - assert(AccessMask.get(Stage::AnyHit, PAQAccessKind::Read) && - "Unexpected access mask!"); + assert(AccessMask.get(Stage::AnyHit, PAQAccessKind::Read) && "Unexpected access mask!"); return PAQLifetimeClass::AnyHit_To_AnyHit; } // write(caller, anyhit) are unset - assert(AccessMask.get(Stage::Caller, PAQAccessKind::Read) && - "Unexpected PAQ access mask!"); + assert(AccessMask.get(Stage::Caller, PAQAccessKind::Read) && "Unexpected PAQ access mask!"); if (AccessMask.get(Stage::ClosestHit, PAQAccessKind::Write)) { if (AccessMask.get(Stage::Miss, PAQAccessKind::Write)) return PAQLifetimeClass::ClosestHitAndMiss_To_Caller; return PAQLifetimeClass::ClosestHit_To_Caller; } - assert(AccessMask.get(Stage::Miss, PAQAccessKind::Write) && - "Unexpected PAQ access mask!"); + assert(AccessMask.get(Stage::Miss, PAQAccessKind::Write) && "Unexpected PAQ access mask!"); return PAQLifetimeClass::Miss_To_Caller; } // Helper namespace containing testing code for lifetimeClassFromAccessMask namespace lifetimeTest { -static constexpr PAQAccessMask makeMask(PAQShaderStage WriteStage, - PAQShaderStage ReadStage) { +static constexpr PAQAccessMask makeMask(PAQShaderStage WriteStage, PAQShaderStage ReadStage) { PAQAccessMask Result; Result.set(WriteStage, PAQAccessKind::Write); Result.set(ReadStage, PAQAccessKind::Read); @@ -465,49 +439,30 @@ static constexpr PAQAccessMask makeMask(PAQShaderStage WriteStage, using Stage = PAQShaderStage; using Lifetime = PAQLifetimeClass; -static_assert(lifetimeClassFromAccessMask(makeMask(Stage::Caller, - Stage::Caller)) == - Lifetime::Caller_To_Caller, +static_assert(lifetimeClassFromAccessMask(makeMask(Stage::Caller, Stage::Caller)) == Lifetime::Caller_To_Caller, "Invalid lifetime class!"); -static_assert(lifetimeClassFromAccessMask(makeMask(Stage::Caller, - Stage::ClosestHit)) == - Lifetime::Caller_To_ClosestHit, +static_assert(lifetimeClassFromAccessMask(makeMask(Stage::Caller, Stage::ClosestHit)) == Lifetime::Caller_To_ClosestHit, "Invalid lifetime class!"); -static_assert(lifetimeClassFromAccessMask(makeMask(Stage::Caller, - Stage::Miss)) == +static_assert(lifetimeClassFromAccessMask(makeMask(Stage::Caller, Stage::Miss)) == Lifetime::Caller_To_ClosestHitAndMiss, "Invalid lifetime class!"); -static_assert(lifetimeClassFromAccessMask(makeMask(Stage::Caller, - Stage::AnyHit)) == - Lifetime::Caller_To_AnyHit, +static_assert(lifetimeClassFromAccessMask(makeMask(Stage::Caller, Stage::AnyHit)) == Lifetime::Caller_To_AnyHit, "Invalid lifetime class!"); -static_assert(lifetimeClassFromAccessMask(makeMask(Stage::AnyHit, - Stage::Caller)) == - Lifetime::AnyHit_To_Caller, +static_assert(lifetimeClassFromAccessMask(makeMask(Stage::AnyHit, Stage::Caller)) == Lifetime::AnyHit_To_Caller, "Invalid lifetime class!"); -static_assert(lifetimeClassFromAccessMask(makeMask(Stage::AnyHit, - Stage::ClosestHit)) == - Lifetime::AnyHit_To_ClosestHit, +static_assert(lifetimeClassFromAccessMask(makeMask(Stage::AnyHit, Stage::ClosestHit)) == Lifetime::AnyHit_To_ClosestHit, "Invalid lifetime class!"); -static_assert(lifetimeClassFromAccessMask(makeMask(Stage::AnyHit, - Stage::Miss)) == +static_assert(lifetimeClassFromAccessMask(makeMask(Stage::AnyHit, Stage::Miss)) == Lifetime::AnyHit_To_ClosestHitAndMiss, "Invalid lifetime class!"); -static_assert(lifetimeClassFromAccessMask(makeMask(Stage::AnyHit, - Stage::AnyHit)) == - Lifetime::AnyHit_To_AnyHit, +static_assert(lifetimeClassFromAccessMask(makeMask(Stage::AnyHit, Stage::AnyHit)) == Lifetime::AnyHit_To_AnyHit, "Invalid lifetime class!"); -static_assert(lifetimeClassFromAccessMask(makeMask(Stage::ClosestHit, - Stage::Caller)) == - Lifetime::ClosestHit_To_Caller, +static_assert(lifetimeClassFromAccessMask(makeMask(Stage::ClosestHit, Stage::Caller)) == Lifetime::ClosestHit_To_Caller, "Invalid lifetime class!"); -static_assert(lifetimeClassFromAccessMask(makeMask(Stage::Miss, - Stage::Caller)) == - Lifetime::Miss_To_Caller, +static_assert(lifetimeClassFromAccessMask(makeMask(Stage::Miss, Stage::Caller)) == Lifetime::Miss_To_Caller, "Invalid lifetime class!"); static_assert( - lifetimeClassFromAccessMask(makeMask(Stage::ClosestHit, Stage::Caller) - .set(Stage::Miss, PAQAccessKind::Write)) == + lifetimeClassFromAccessMask(makeMask(Stage::ClosestHit, Stage::Caller).set(Stage::Miss, PAQAccessKind::Write)) == Lifetime::ClosestHitAndMiss_To_Caller, "Invalid lifetime class!"); } // namespace lifetimeTest @@ -524,19 +479,16 @@ std::optional tryExtractSExtIntegerFromMDOp(const MDOperand &Op) { // the returned object corresponding to the fields in PayloadType have no // children yet. If TypeAnnotationMDTuple is nullptr, all fields have // write(all) + read(all) access masks. -static std::unique_ptr -createPayloadRootNode(Type &PayloadType, MDTuple *TypeAnnotationMDTuple) { +static std::unique_ptr createPayloadRootNode(Type &PayloadType, MDTuple *TypeAnnotationMDTuple) { StructType *PayloadStructType = dyn_cast(&PayloadType); if (!PayloadStructType) report_fatal_error("Unexpected non-struct annotated payload type"); uint32_t NumElements = PayloadStructType->getNumElements(); - if (TypeAnnotationMDTuple && - NumElements != TypeAnnotationMDTuple->getNumOperands()) + if (TypeAnnotationMDTuple && NumElements != TypeAnnotationMDTuple->getNumOperands()) report_fatal_error("Incorrect number of metadata entries"); - std::unique_ptr RootNode = - std::make_unique(PAQNode{&PayloadType}); + std::unique_ptr RootNode = std::make_unique(PAQNode{&PayloadType}); // If the payload type is PAQ-annotated, create child nodes // with their access masks. Otherwise, set a trivial access @@ -553,13 +505,10 @@ createPayloadRootNode(Type &PayloadType, MDTuple *TypeAnnotationMDTuple) { if (!FieldMDTuple || FieldMDTuple->getNumOperands() != 2) report_fatal_error("Unexpected metadata format"); - std::optional OptTag = - tryExtractSExtIntegerFromMDOp(FieldMDTuple->getOperand(0)); - std::optional BitMask = - tryExtractSExtIntegerFromMDOp(FieldMDTuple->getOperand(1)); + std::optional OptTag = tryExtractSExtIntegerFromMDOp(FieldMDTuple->getOperand(0)); + std::optional BitMask = tryExtractSExtIntegerFromMDOp(FieldMDTuple->getOperand(1)); constexpr int64_t KDxilPayloadFieldAnnotationAccessTag = 0; - if (OptTag != KDxilPayloadFieldAnnotationAccessTag || - !BitMask.has_value()) + if (OptTag != KDxilPayloadFieldAnnotationAccessTag || !BitMask.has_value()) report_fatal_error("Unexpected metadata format"); // Only import bitmask if the value is non-zero. @@ -577,8 +526,7 @@ createPayloadRootNode(Type &PayloadType, MDTuple *TypeAnnotationMDTuple) { // in both cases, and differentiate later on to assign the empty mask // for the first case. if (BitMask.value() != 0) { - ChildNode.AccessMask = - importPAQAccessMaskFromDXILBitMask(BitMask.value()); + ChildNode.AccessMask = importPAQAccessMaskFromDXILBitMask(BitMask.value()); } RootNode->Children.push_back(std::move(ChildNode)); @@ -604,9 +552,9 @@ createPayloadRootNode(Type &PayloadType, MDTuple *TypeAnnotationMDTuple) { // from children if uniform. // For leaves, the lifetime class is set from the access mask (if set). // For inner nodes, the lifetime class is propagated from children if uniform. -static void createNestedStructHierarchyRecursively( - PAQNode &Node, - const MapVector> *ModulePayloadRootNodes) { +static void +createNestedStructHierarchyRecursively(PAQNode &Node, + const MapVector> *ModulePayloadRootNodes) { // If Node.AccessMask is unset, there are two possible cases: // - Node is a nested payload field. In this case, the field was *not* @@ -641,8 +589,7 @@ static void createNestedStructHierarchyRecursively( IsNestedPayload = true; if (It->second.get() != &Node) { PayloadTypeRootNode = It->second.get(); - assert(PayloadTypeRootNode->Children.size() == - StructTy->getNumElements() && + assert(PayloadTypeRootNode->Children.size() == StructTy->getNumElements() && "Inconsistent number of elements in payload PAQ node!"); } } @@ -683,8 +630,7 @@ static void createNestedStructHierarchyRecursively( ChildAccessMask = PayloadTypeRootNode->Children[I].AccessMask; } if (ChildrenArePrepopulated) { - assert(!ChildAccessMask.has_value() || - Node.Children[I].AccessMask == ChildAccessMask); + assert(!ChildAccessMask.has_value() || Node.Children[I].AccessMask == ChildAccessMask); } else { Type *ChildTy = StructTy->getElementType(I); Node.Children.emplace_back(); @@ -715,16 +661,13 @@ static void createNestedStructHierarchyRecursively( assert(Node.AccessMask.value_or(CommonAccessMask) == CommonAccessMask); Node.AccessMask = CommonAccessMask; assert(!Node.LifetimeClass.has_value() || - *Node.LifetimeClass == - lifetimeClassFromAccessMask(Node.AccessMask.value())); + *Node.LifetimeClass == lifetimeClassFromAccessMask(Node.AccessMask.value())); } } -[[maybe_unused]] static void dumpPAQTree(StructType *PayloadType, - const PAQNode &Node) { +[[maybe_unused]] static void dumpPAQTree(StructType *PayloadType, const PAQNode &Node) { // print for testing - llvm::dbgs() << "PAQ qualifiers for payload struct " << PayloadType->getName() - << ":\n"; + llvm::dbgs() << "PAQ qualifiers for payload struct " << PayloadType->getName() << ":\n"; for (const auto &Child : Node.Children) llvm::dbgs() << Child; @@ -743,17 +686,14 @@ static void createNestedStructHierarchyRecursively( // may exist in Node. // Note that setting an access mask for a node applies the same mask to its // whole subtree. -static void createNestedStructHierarchy( - Type *PayloadType, PAQNode &Node, - const MapVector> *ModulePayloadRootNodes) { +static void createNestedStructHierarchy(Type *PayloadType, PAQNode &Node, + const MapVector> *ModulePayloadRootNodes) { createNestedStructHierarchyRecursively(Node, ModulePayloadRootNodes); LLVM_DEBUG(dumpPAQTree(cast(PayloadType), Node)); } -static std::unique_ptr -createTrivialHierarchicalPayloadRootNode(Type &PayloadType) { - std::unique_ptr RootNode = - createPayloadRootNode(PayloadType, nullptr); +static std::unique_ptr createTrivialHierarchicalPayloadRootNode(Type &PayloadType) { + std::unique_ptr RootNode = createPayloadRootNode(PayloadType, nullptr); assert(RootNode && "Failed to create PAQ tree for payload type"); createNestedStructHierarchy(&PayloadType, *RootNode, nullptr); return RootNode; @@ -803,14 +743,12 @@ createTrivialHierarchicalPayloadRootNode(Type &PayloadType) { // This function only imports qualifiers on direct members from DXIL metadata. // Recursive traversal of nested structs is done later, using the annotations on // the top-level payload structs collected in this first phase. -static MapVector> -importModulePAQRootNodes(const Module &M) { +static MapVector> importModulePAQRootNodes(const Module &M) { LLVM_DEBUG(dbgs() << "Importing DXIL PAQ metadata\n"); auto *MDName = "dx.dxrPayloadAnnotations"; auto *MD = M.getNamedMetadata(MDName); if (!MD) { - LLVM_DEBUG(dbgs() << "PAQ: metadata " << MDName - << " not found, skipping PAQ import\n"); + LLVM_DEBUG(dbgs() << "PAQ: metadata " << MDName << " not found, skipping PAQ import\n"); return {}; } @@ -822,8 +760,7 @@ importModulePAQRootNodes(const Module &M) { MDTuple *MDTup = dyn_cast(Annot); if (!MDTup || MDTup->getNumOperands() == 0) continue; - std::optional OptTag = - tryExtractSExtIntegerFromMDOp(MDTup->getOperand(0)); + std::optional OptTag = tryExtractSExtIntegerFromMDOp(MDTup->getOperand(0)); constexpr int64_t KDxilPayloadAnnotationStructTag = 0; if (OptTag != KDxilPayloadAnnotationStructTag) continue; @@ -856,10 +793,8 @@ importModulePAQRootNodes(const Module &M) { report_fatal_error("Unexpected metadata format."); Type *PayloadType = TypeConstMD->getType(); - std::unique_ptr RootNode = - createPayloadRootNode(*PayloadType, TypeAnnotationMDTuple); - bool Inserted = - PayloadRootNodes.insert({PayloadType, std::move(RootNode)}).second; + std::unique_ptr RootNode = createPayloadRootNode(*PayloadType, TypeAnnotationMDTuple); + bool Inserted = PayloadRootNodes.insert({PayloadType, std::move(RootNode)}).second; (void)Inserted; assert(Inserted && "Duplicate PayloadType in result map!"); } @@ -870,19 +805,16 @@ importModulePAQRootNodes(const Module &M) { // Computes PAQ trees for all payload types for which DXIL payload annotation // metadata is present. For payload types without annotations, trivial // PAQ trees are created later on demand. -static MapVector> -importModulePayloadPAQNodes(const Module &M) { +static MapVector> importModulePayloadPAQNodes(const Module &M) { // Import from metadata. This needs to happen for all structs // before we recursively traverse field members, because // payload fields can be of payload struct type, in which case // the qualifiers are obtained from its type. - MapVector> PayloadRootNodes = - importModulePAQRootNodes(M); + MapVector> PayloadRootNodes = importModulePAQRootNodes(M); // Recursively create the nested struct hierarchy for (auto &TypeWithInfo : PayloadRootNodes) { - createNestedStructHierarchy(TypeWithInfo.first, *TypeWithInfo.second, - &PayloadRootNodes); + createNestedStructHierarchy(TypeWithInfo.first, *TypeWithInfo.second, &PayloadRootNodes); } return PayloadRootNodes; @@ -924,9 +856,8 @@ void PAQNode::collectNodes(SmallVectorImpl &Result) const { // have layouts with overlapping storage for fields that cannot be // simultaneously live. For example, a "write(closesthit)" field may share // storage with a "write(miss)" field in a CallerIn layout. -static MapVector -checkSerializationLayout(const PAQSerializationLayout &Layout, - const DataLayout &DL) { +static MapVector checkSerializationLayout(const PAQSerializationLayout &Layout, + const DataLayout &DL) { StructType *SerializationTy = Layout.SerializationTy; if (!SerializationTy) return {}; @@ -981,21 +912,19 @@ checkSerializationLayout(const PAQSerializationLayout &Layout, // structs. If at some point we also use inner nodes in serialization // structs, we should also check consistency between a node and its // ancestors (i.e. parent structs). -[[maybe_unused]] static void checkTraceRaySerializationInfoImpl( - ArrayRef Layouts, - const SmallDenseMap &EquivalentNodes, - const DataLayout &DL) { +[[maybe_unused]] static void +checkTraceRaySerializationInfoImpl(ArrayRef Layouts, + const SmallDenseMap &EquivalentNodes, + const DataLayout &DL) { MapVector MergedNodeIntervals; for (const PAQSerializationLayout *Layout : Layouts) { StructType *SerializationTy = Layout->SerializationTy; if (!SerializationTy) { if (!Layout->NodeStorageInfos.empty()) - report_fatal_error( - "Empty serialization struct but non-empty contained fields!"); + report_fatal_error("Empty serialization struct but non-empty contained fields!"); continue; } - MapVector NodeIntervals = - checkSerializationLayout(*Layout, DL); + MapVector NodeIntervals = checkSerializationLayout(*Layout, DL); for (const auto &NodeWithIntervals : NodeIntervals) { const PAQNode *Node = NodeWithIntervals.first; @@ -1009,8 +938,7 @@ checkSerializationLayout(const PAQSerializationLayout &Layout, } // Try to insert. If already present, compare offsets auto InsertionResult = MergedNodeIntervals.insert({Node, Intervals}); - const PAQIndexIntervals &ExistingIntervals = - InsertionResult.first->second; + const PAQIndexIntervals &ExistingIntervals = InsertionResult.first->second; if (!IsEquivalent && Intervals != ExistingIntervals) { report_fatal_error("Inconsistent serialization offset!"); } @@ -1027,8 +955,7 @@ checkSerializationLayout(const PAQSerializationLayout &Layout, if (PrefixRange->empty() || ContainingRange->empty()) continue; if (ContainingRange->size() < PrefixRange->size() || - (ContainingRange->size() == PrefixRange->size() && - ContainingRange->back().End < PrefixRange->back().End)) { + (ContainingRange->size() == PrefixRange->size() && ContainingRange->back().End < PrefixRange->back().End)) { std::swap(PrefixRange, ContainingRange); } @@ -1056,14 +983,12 @@ checkSerializationLayout(const PAQSerializationLayout &Layout, // HitGroupLayouts in TraceRaySerializationInfo are not checked. // However, if HitGroupLayout is non-null, its consistency with the // other layouts will be checked as well. -[[maybe_unused]] static void checkTraceRaySerializationInfo( - const PAQTraceRaySerializationInfo &TraceRaySerializationInfo, - const DataLayout &DL, - const PAQHitGroupLayoutInfo *HitGroupLayout = nullptr) { +[[maybe_unused]] static void +checkTraceRaySerializationInfo(const PAQTraceRaySerializationInfo &TraceRaySerializationInfo, const DataLayout &DL, + const PAQHitGroupLayoutInfo *HitGroupLayout = nullptr) { - SmallVector(PAQSerializationLayoutKind::Count) + 2> - Layouts{make_pointer_range(TraceRaySerializationInfo.LayoutsByKind)}; + SmallVector(PAQSerializationLayoutKind::Count) + 2> Layouts{ + make_pointer_range(TraceRaySerializationInfo.LayoutsByKind)}; SmallDenseMap EquivalentNodes; if (HitGroupLayout) { @@ -1099,19 +1024,17 @@ PAQLifetimeClassPackingOrder llvm::determineLifetimeClassPackingOrder() { // lifetime class its index in the ordering. This allows to quickly determine // the relative order of two given lifetime classes in the PackingOrder. using PAQLifetimeClassOrderingIndices = - llvm::EnumeratedArray; + llvm::EnumeratedArray; -static PAQLifetimeClassOrderingIndices computeLifetimeClassOrderingIndices( - const PAQLifetimeClassPackingOrder &Ordering) { +static PAQLifetimeClassOrderingIndices +computeLifetimeClassOrderingIndices(const PAQLifetimeClassPackingOrder &Ordering) { PAQLifetimeClassOrderingIndices Result{}; assert(Result.size() == Ordering.size() && "Inconsistent array lengths!"); for (PAQLifetimeClass LifetimeClass : PAQLifetimeClasses) Result[LifetimeClass] = -1; for (unsigned I = 0; I < Ordering.size(); ++I) { PAQLifetimeClass LifetimeClass = Ordering[I]; - assert(Result[LifetimeClass] == static_cast(-1) && - "Duplicate ordering entry!"); + assert(Result[LifetimeClass] == static_cast(-1) && "Duplicate ordering entry!"); Result[LifetimeClass] = I; } return Result; @@ -1120,24 +1043,19 @@ static PAQLifetimeClassOrderingIndices computeLifetimeClassOrderingIndices( // Returns whether a lifetime class is functionally live in the given layout // kind. Even if not live, it might be contained as dummy in the layout to // guarantee stable offsets of other lifetime classes. -static bool isLiveInLayout(PAQLifetimeClass LifetimeClass, - PAQSerializationLayoutKind LayoutKind) { +static bool isLiveInLayout(PAQLifetimeClass LifetimeClass, PAQSerializationLayoutKind LayoutKind) { // Consistent criteria to query whether a lifetime class of type FromXXX // or ToXXX is live in LayoutKind const bool FromCaller = true; const bool FromAnyHit = (LayoutKind != PAQSerializationLayoutKind::CallerOut); - const bool FromClosestHit = - (LayoutKind == PAQSerializationLayoutKind::ClosestHitOut); + const bool FromClosestHit = (LayoutKind == PAQSerializationLayoutKind::ClosestHitOut); const bool FromMiss = (LayoutKind == PAQSerializationLayoutKind::MissOut); const bool ToCaller = true; - const bool ToAnyHit = - (LayoutKind <= PAQSerializationLayoutKind::AnyHitOutAcceptHit); + const bool ToAnyHit = (LayoutKind <= PAQSerializationLayoutKind::AnyHitOutAcceptHit); const bool ToClosestHit = - (LayoutKind != PAQSerializationLayoutKind::MissIn && - LayoutKind <= PAQSerializationLayoutKind::ClosestHitIn); + (LayoutKind != PAQSerializationLayoutKind::MissIn && LayoutKind <= PAQSerializationLayoutKind::ClosestHitIn); const bool ToClosestHitAndMiss = - (LayoutKind <= std::max(PAQSerializationLayoutKind::MissIn, - PAQSerializationLayoutKind::ClosestHitIn)); + (LayoutKind <= std::max(PAQSerializationLayoutKind::MissIn, PAQSerializationLayoutKind::ClosestHitIn)); switch (LifetimeClass) { case PAQLifetimeClass::Caller_To_Caller: @@ -1182,8 +1100,7 @@ static bool isLiveInLayout(PAQLifetimeClass LifetimeClass, // or PAQSerializationLayoutKind. using LayoutBitmask = uint8_t; static constexpr uint64_t MaxNumLayoutsInBitmask = 8 * sizeof(LayoutBitmask); -static_assert(static_cast(PAQSerializationLayoutKind::Count) <= - MaxNumLayoutsInBitmask, +static_assert(static_cast(PAQSerializationLayoutKind::Count) <= MaxNumLayoutsInBitmask, "Increase BitMask width"); // Used from LayoutComputer if the serialization does not fit into registers @@ -1195,8 +1112,7 @@ static std::unique_ptr createPayloadMemPointerNode(Module &M) { // Use a single I32 to store the pointer Type *I32 = Type::getInt32Ty(M.getContext()); - return std::make_unique( - PAQNode{I32, {}, AccessMask, lifetimeClassFromAccessMask(AccessMask)}); + return std::make_unique(PAQNode{I32, {}, AccessMask, lifetimeClassFromAccessMask(AccessMask)}); } namespace { @@ -1293,9 +1209,7 @@ class LayoutComputer { // Set of all node infos to be possibly included in one of the layouts. SmallVector NodeInfos = {}; // We generate one result layout per layout info - SmallVector(PAQSerializationLayoutKind::Count)> - LayoutInfos = {}; + SmallVector(PAQSerializationLayoutKind::Count)> LayoutInfos = {}; // Storage is allocated greedily, ordered by PAQLifetimeClass as in // PackingOrder PAQLifetimeClassPackingOrder PackingOrder = {}; @@ -1316,9 +1230,7 @@ class LayoutComputer { struct Result { // Computed layouts, in order. - SmallVector(PAQSerializationLayoutKind::Count)> - Layouts; + SmallVector(PAQSerializationLayoutKind::Count)> Layouts; // Non-null if a payload memory pointer is required std::unique_ptr PayloadMemPointerNode; @@ -1336,8 +1248,7 @@ class LayoutComputer { LayoutComputer(const CreateInfo &CInfo) : CInfo{CInfo} {} Result run() { - assert(CInfo.LayoutInfos.size() <= MaxNumLayoutsInBitmask && - "Too many layouts"); + assert(CInfo.LayoutInfos.size() <= MaxNumLayoutsInBitmask && "Too many layouts"); prepareSortedNodeInfos(); computeAllocation(); @@ -1370,10 +1281,8 @@ class LayoutComputer { // Tries to allocate the given fixed intervals. // Returns true on success. - bool tryAllocateFixedIntervals(const PAQIndexIntervals &FixedIndexIntervals, - LayoutBitmask LivenessBitmask, - unsigned MaxNumI32s, - unsigned &NumAllocatedI32s) { + bool tryAllocateFixedIntervals(const PAQIndexIntervals &FixedIndexIntervals, LayoutBitmask LivenessBitmask, + unsigned MaxNumI32s, unsigned &NumAllocatedI32s) { NumAllocatedI32s = 0; for (const PAQIndexInterval &Interval : FixedIndexIntervals) { assert(Interval.size() != 0 && "Trying to allocate empty interval!"); @@ -1382,8 +1291,7 @@ class LayoutComputer { return false; } ensureSize(Interval.End); - for (unsigned I32Index = Interval.Begin; I32Index < Interval.End; - ++I32Index) { + for (unsigned I32Index = Interval.Begin; I32Index < Interval.End; ++I32Index) { // Check for overlap if (UsageMatrix[I32Index] & LivenessBitmask) return false; @@ -1403,24 +1311,20 @@ class LayoutComputer { SortedNodeInfos.push_back(&NodeInfo); } - PAQLifetimeClassOrderingIndices OrderingIndices = - computeLifetimeClassOrderingIndices(CInfo.PackingOrder); + PAQLifetimeClassOrderingIndices OrderingIndices = computeLifetimeClassOrderingIndices(CInfo.PackingOrder); - auto GetSortKey = - [&](const NodeInfo &Info) -> std::tuple { + auto GetSortKey = [&](const NodeInfo &Info) -> std::tuple { // Nodes with fixed assignments come first unsigned Order = Info.FixedIndexIntervals.empty() ? 1 : 0; const auto &OptLifetimeClass = Info.Node->LifetimeClass; - unsigned LifetimeClassIndex = - OptLifetimeClass ? OrderingIndices[OptLifetimeClass.value()] : 0; + unsigned LifetimeClassIndex = OptLifetimeClass ? OrderingIndices[OptLifetimeClass.value()] : 0; return {Order, LifetimeClassIndex}; }; // stable_sort so original order is preserved if possible - std::stable_sort(SortedNodeInfos.begin(), SortedNodeInfos.end(), - [GetSortKey](const NodeInfo *LHS, const NodeInfo *RHS) { - return GetSortKey(*LHS) < GetSortKey(*RHS); - }); + std::stable_sort( + SortedNodeInfos.begin(), SortedNodeInfos.end(), + [GetSortKey](const NodeInfo *LHS, const NodeInfo *RHS) { return GetSortKey(*LHS) < GetSortKey(*RHS); }); } // Sets OptAllocation. @@ -1474,9 +1378,8 @@ class LayoutComputer { // Handle case that the node has pre-assigned indices first if (!NodeInfo->FixedIndexIntervals.empty()) { unsigned NumAllocatedI32s = 0; - if (!Allocation->tryAllocateFixedIntervals( - NodeInfo->FixedIndexIntervals, LivenessBitmask, MaxNumI32s, - NumAllocatedI32s)) { + if (!Allocation->tryAllocateFixedIntervals(NodeInfo->FixedIndexIntervals, LivenessBitmask, MaxNumI32s, + NumAllocatedI32s)) { // Failure. Reset allocation and return. OptAllocation.reset(); return; @@ -1525,31 +1428,23 @@ class LayoutComputer { } void addPayloadMemPointer() { - assert(PayloadMemPointerNode == nullptr && - "Payload mem pointer already initialized!"); + assert(PayloadMemPointerNode == nullptr && "Payload mem pointer already initialized!"); PayloadMemPointerNode = createPayloadMemPointerNode(CInfo.M); - PAQIndexInterval Interval = {FirstPayloadMemoryPointerRegister, - FirstPayloadMemoryPointerRegister + 1}; - PayloadMemPointerNodeInfo = - NodeInfo{PayloadMemPointerNode.get(), {Interval}, LayoutBitmask(-1)}; - SortedNodeInfos.insert(SortedNodeInfos.begin(), - &PayloadMemPointerNodeInfo.value()); + PAQIndexInterval Interval = {FirstPayloadMemoryPointerRegister, FirstPayloadMemoryPointerRegister + 1}; + PayloadMemPointerNodeInfo = NodeInfo{PayloadMemPointerNode.get(), {Interval}, LayoutBitmask(-1)}; + SortedNodeInfos.insert(SortedNodeInfos.begin(), &PayloadMemPointerNodeInfo.value()); } - PAQSerializationLayout - createSerializationLayout(const I32Allocation &Allocation, - unsigned LayoutIndex) const { + PAQSerializationLayout createSerializationLayout(const I32Allocation &Allocation, unsigned LayoutIndex) const { PAQSerializationLayout Layout = {}; - for (unsigned NodeIndex = 0; NodeIndex < SortedNodeInfos.size(); - ++NodeIndex) { + for (unsigned NodeIndex = 0; NodeIndex < SortedNodeInfos.size(); ++NodeIndex) { const NodeInfo *NInfo = SortedNodeInfos[NodeIndex]; assert(NInfo->Node && "Nullptr node in layout!"); // Check whether this node is included in the current layout if ((NInfo->LivenessBitmask & (1u << LayoutIndex)) == 0) continue; - Layout.NodeStorageInfos[NInfo->Node] = - PAQNodeStorageInfo{Allocation.NodeIndexIntervals[NodeIndex]}; + Layout.NodeStorageInfos[NInfo->Node] = PAQNodeStorageInfo{Allocation.NodeIndexIntervals[NodeIndex]}; for (const auto &Interval : Allocation.NodeIndexIntervals[NodeIndex]) { assert(Interval.size() != 0 && "Unexpected empty interval!"); @@ -1560,8 +1455,7 @@ class LayoutComputer { if (Layout.NumStorageI32s) { Type *I32 = Type::getInt32Ty(CInfo.M.getContext()); ArrayType *ArrType = ArrayType::get(I32, Layout.NumStorageI32s); - Layout.SerializationTy = - StructType::create({ArrType}, CInfo.LayoutInfos[LayoutIndex].Name); + Layout.SerializationTy = StructType::create({ArrType}, CInfo.LayoutInfos[LayoutIndex].Name); } Layout.PayloadMemPointerNode = PayloadMemPointerNode.get(); @@ -1575,10 +1469,8 @@ class LayoutComputer { Result Result{}; Result.MaxNumI32s = Allocation.numUsedI32s(); Result.Layouts.reserve(CInfo.LayoutInfos.size()); - for (unsigned LayoutIndex = 0; LayoutIndex < CInfo.LayoutInfos.size(); - ++LayoutIndex) { - Result.Layouts.push_back( - createSerializationLayout(Allocation, LayoutIndex)); + for (unsigned LayoutIndex = 0; LayoutIndex < CInfo.LayoutInfos.size(); ++LayoutIndex) { + Result.Layouts.push_back(createSerializationLayout(Allocation, LayoutIndex)); } Result.PayloadMemPointerNode = std::move(PayloadMemPointerNode); return Result; @@ -1595,8 +1487,7 @@ class LayoutComputer { } // namespace -static std::unique_ptr -createHitAttributeStorageNode(Module &M, uint64_t PayloadHitAttrI32s) { +static std::unique_ptr createHitAttributeStorageNode(Module &M, uint64_t PayloadHitAttrI32s) { assert(PayloadHitAttrI32s && "Attempting to create empty hit attribute node"); Type *I32 = Type::getInt32Ty(M.getContext()); Type *I32Arr = ArrayType::get(I32, PayloadHitAttrI32s); @@ -1604,8 +1495,7 @@ createHitAttributeStorageNode(Module &M, uint64_t PayloadHitAttrI32s) { AccessMask.set(PAQShaderStage::AnyHit, PAQAccessKind::Write); AccessMask.set(PAQShaderStage::AnyHit, PAQAccessKind::Read); AccessMask.set(PAQShaderStage::ClosestHit, PAQAccessKind::Read); - return std::make_unique( - PAQNode{I32Arr, {}, AccessMask, lifetimeClassFromAccessMask(AccessMask)}); + return std::make_unique(PAQNode{I32Arr, {}, AccessMask, lifetimeClassFromAccessMask(AccessMask)}); } // Table indexed by PAQLifetimeClass containing liveness bitmasks, @@ -1613,19 +1503,16 @@ createHitAttributeStorageNode(Module &M, uint64_t PayloadHitAttrI32s) { // In other words, the j-th bit in the i-th bitmask specifies whether // PAQLifetimeClass i is live in PAQSerializationLayoutKind j. using LivenessBitmaskTable = - llvm::EnumeratedArray; + llvm::EnumeratedArray; static const LivenessBitmaskTable &getLivenessBitmaskTable() { static const LivenessBitmaskTable LivenessTable = []() { LivenessBitmaskTable Initializer = {}; for (PAQLifetimeClass LifetimeClass : PAQLifetimeClasses) { - for (PAQSerializationLayoutKind LayoutKind : - PAQSerializationLayoutKinds) { + for (PAQSerializationLayoutKind LayoutKind : PAQSerializationLayoutKinds) { bool IsLive = isLiveInLayout(LifetimeClass, LayoutKind); if (IsLive) { - Initializer[LifetimeClass] |= - (1u << static_cast(LayoutKind)); + Initializer[LifetimeClass] |= (1u << static_cast(LayoutKind)); } } } @@ -1636,23 +1523,19 @@ static const LivenessBitmaskTable &getLivenessBitmaskTable() { } // LayoutComputer wrapper for TraceRay -static LayoutComputer::Result -computeTraceRayLayouts(Module &M, ArrayRef Nodes, - const PAQNode *HitAttributesNode, - const PAQNode *PayloadRootNode, - unsigned PayloadRegisterCount, StringRef NamePrefix) { +static LayoutComputer::Result computeTraceRayLayouts(Module &M, ArrayRef Nodes, + const PAQNode *HitAttributesNode, const PAQNode *PayloadRootNode, + unsigned PayloadRegisterCount, StringRef NamePrefix) { LayoutComputer::CreateInfo LayoutCreateInfo = {M}; LayoutCreateInfo.LayoutInfos.reserve(PAQSerializationLayoutKinds.size()); for (auto LayoutKind : PAQSerializationLayoutKinds) { std::string TypeName; raw_string_ostream TypeNameStream(TypeName); - TypeNameStream << NamePrefix << "." - << determineLayoutSuffix(LayoutKind, {}); + TypeNameStream << NamePrefix << "." << determineLayoutSuffix(LayoutKind, {}); LayoutComputer::LayoutInfo LayoutInfo = {TypeNameStream.str()}; // We rely on using layout kinds as index into layout infos - assert(static_cast(LayoutKind) == - LayoutCreateInfo.LayoutInfos.size()); + assert(static_cast(LayoutKind) == LayoutCreateInfo.LayoutInfos.size()); LayoutCreateInfo.LayoutInfos.push_back({LayoutInfo}); } @@ -1661,23 +1544,19 @@ computeTraceRayLayouts(Module &M, ArrayRef Nodes, LayoutCreateInfo.PackingOrder = determineLifetimeClassPackingOrder(); LayoutCreateInfo.PayloadRegisterCount = PayloadRegisterCount; - const LivenessBitmaskTable &BitmaskByLifetimeClass = - getLivenessBitmaskTable(); + const LivenessBitmaskTable &BitmaskByLifetimeClass = getLivenessBitmaskTable(); for (const PAQNode *Node : Nodes) { assert(Node); - LayoutBitmask LivenessBitmask = - BitmaskByLifetimeClass[Node->LifetimeClass.value()]; + LayoutBitmask LivenessBitmask = BitmaskByLifetimeClass[Node->LifetimeClass.value()]; LayoutComputer::NodeInfo NodeInfo = {Node, {}, LivenessBitmask}; if (Node == HitAttributesNode) { // fix hit attribute registers - assert(Node->Ty->isArrayTy() && - Node->Ty->getArrayElementType()->isIntegerTy(32) && + assert(Node->Ty->isArrayTy() && Node->Ty->getArrayElementType()->isIntegerTy(32) && "Hit attribute storage must be i32 array!"); unsigned NumHitAttributeI32s = Node->Ty->getArrayNumElements(); NodeInfo.FixedIndexIntervals = { - {FirstPayloadHitAttributeStorageRegister, - FirstPayloadHitAttributeStorageRegister + NumHitAttributeI32s}}; + {FirstPayloadHitAttributeStorageRegister, FirstPayloadHitAttributeStorageRegister + NumHitAttributeI32s}}; } LayoutCreateInfo.NodeInfos.push_back(NodeInfo); } @@ -1685,14 +1564,12 @@ computeTraceRayLayouts(Module &M, ArrayRef Nodes, return LayoutComputer::create(LayoutCreateInfo); } -std::unique_ptr -PAQTraceRaySerializationInfo::create(Module &M, - const PAQPayloadConfig &PAQConfig, - const PAQNode &RootNode, - uint64_t PayloadRegisterCount) { +std::unique_ptr PAQTraceRaySerializationInfo::create(Module &M, + const PAQPayloadConfig &PAQConfig, + const PAQNode &RootNode, + uint64_t PayloadRegisterCount) { assert(PAQConfig.PayloadTy == RootNode.Ty); - std::unique_ptr Result = - std::make_unique(); + std::unique_ptr Result = std::make_unique(); Result->PayloadRootNode = &RootNode; Result->PAQConfig = PAQConfig; @@ -1707,16 +1584,14 @@ PAQTraceRaySerializationInfo::create(Module &M, // registers, e.g. in case no intersection shaders are present. assert(PAQConfig.MaxHitAttributeByteCount <= GlobalMaxHitAttributeBytes); const uint32_t MaxInlineHitAttrBytes = getInlineHitAttrsBytes(M); - const uint32_t InlineHitAttrBytes = - std::min(MaxInlineHitAttrBytes, PAQConfig.MaxHitAttributeByteCount); - const uint64_t PayloadHitAttrI32s = divideCeil( - PAQConfig.MaxHitAttributeByteCount - InlineHitAttrBytes, RegisterBytes); + const uint32_t InlineHitAttrBytes = std::min(MaxInlineHitAttrBytes, PAQConfig.MaxHitAttributeByteCount); + const uint64_t PayloadHitAttrI32s = + divideCeil(PAQConfig.MaxHitAttributeByteCount - InlineHitAttrBytes, RegisterBytes); if (PayloadHitAttrI32s != 0) { // Add node representing hit attribute storage Result->MaximumNumHitAttributesI32s = PayloadHitAttrI32s; - Result->WorstCaseHitAttributesNode = - createHitAttributeStorageNode(M, PayloadHitAttrI32s); + Result->WorstCaseHitAttributesNode = createHitAttributeStorageNode(M, PayloadHitAttrI32s); } // Compute set of individual layouts using LayoutComputer @@ -1724,16 +1599,14 @@ PAQTraceRaySerializationInfo::create(Module &M, Result->collectAllNodes(Nodes); std::string NamePrefix = determineSerializationInfoPrefix(PAQConfig); LayoutComputer::Result LayoutResult = computeTraceRayLayouts( - M, Nodes, Result->WorstCaseHitAttributesNode.get(), - Result->PayloadRootNode, PayloadRegisterCount, NamePrefix); + M, Nodes, Result->WorstCaseHitAttributesNode.get(), Result->PayloadRootNode, PayloadRegisterCount, NamePrefix); // Move layouts to Result, and do dumping and checking Result->MaxStorageI32s = LayoutResult.MaxNumI32s; // This may be nullptr if registers suffice Result->PayloadMemPointerNode = std::move(LayoutResult.PayloadMemPointerNode); for (PAQSerializationLayoutKind LayoutKind : PAQSerializationLayoutKinds) { - Result->LayoutsByKind[LayoutKind] = - std::move(LayoutResult.Layouts[static_cast(LayoutKind)]); + Result->LayoutsByKind[LayoutKind] = std::move(LayoutResult.Layouts[static_cast(LayoutKind)]); // For lit testing: Dump type information LLVM_DEBUG(Result->LayoutsByKind[LayoutKind].print(dbgs(), true)); } @@ -1744,33 +1617,28 @@ PAQTraceRaySerializationInfo::create(Module &M, return Result; } -PAQHitGroupLayoutInfo PAQTraceRaySerializationInfo::createHitGroupLayoutInfo( - Module &M, uint32_t PayloadHitAttrI32s) const { +PAQHitGroupLayoutInfo PAQTraceRaySerializationInfo::createHitGroupLayoutInfo(Module &M, + uint32_t PayloadHitAttrI32s) const { PAQHitGroupLayoutInfo HitGroupLayoutInfo{}; HitGroupLayoutInfo.NumHitAttributesI32s = PayloadHitAttrI32s; if (PayloadHitAttrI32s != 0) { // Add node representing hit attribute storage of reduced size - HitGroupLayoutInfo.HitAttributesNode = - createHitAttributeStorageNode(M, PayloadHitAttrI32s); + HitGroupLayoutInfo.HitAttributesNode = createHitAttributeStorageNode(M, PayloadHitAttrI32s); } for (PAQSerializationLayoutKind LayoutKind : - {PAQSerializationLayoutKind::AnyHitOutAcceptHit, - PAQSerializationLayoutKind::ClosestHitIn}) { + {PAQSerializationLayoutKind::AnyHitOutAcceptHit, PAQSerializationLayoutKind::ClosestHitIn}) { const PAQSerializationLayout &DefaultLayout = LayoutsByKind[LayoutKind]; // Look up storage interval of hit attributes in default layout - auto It = - DefaultLayout.NodeStorageInfos.find(WorstCaseHitAttributesNode.get()); + auto It = DefaultLayout.NodeStorageInfos.find(WorstCaseHitAttributesNode.get()); assert(It != DefaultLayout.NodeStorageInfos.end()); const PAQNodeStorageInfo &HitAtttrsSI = It->second; - assert(HitAtttrsSI.IndexIntervals.size() == 1 && - "Hit attributes must be contiguous!"); + assert(HitAtttrsSI.IndexIntervals.size() == 1 && "Hit attributes must be contiguous!"); PAQIndexInterval HitAttrInterval = HitAtttrsSI.IndexIntervals[0]; - PAQIndexInterval NewHitAttrInterval = { - HitAttrInterval.Begin, HitAttrInterval.Begin + PayloadHitAttrI32s}; + PAQIndexInterval NewHitAttrInterval = {HitAttrInterval.Begin, HitAttrInterval.Begin + PayloadHitAttrI32s}; // Start with copy, then specialize PAQSerializationLayout Layout = DefaultLayout; @@ -1778,10 +1646,8 @@ PAQHitGroupLayoutInfo PAQTraceRaySerializationInfo::createHitGroupLayoutInfo( // Update hit attribute index interval and hit attribute node Layout.NodeStorageInfos.erase(WorstCaseHitAttributesNode.get()); if (HitGroupLayoutInfo.HitAttributesNode) { - Layout.NodeStorageInfos[HitGroupLayoutInfo.HitAttributesNode.get()] = { - {NewHitAttrInterval}}; - Layout.HitAttributeStorageNode = - HitGroupLayoutInfo.HitAttributesNode.get(); + Layout.NodeStorageInfos[HitGroupLayoutInfo.HitAttributesNode.get()] = {{NewHitAttrInterval}}; + Layout.HitAttributeStorageNode = HitGroupLayoutInfo.HitAttributesNode.get(); } else { Layout.HitAttributeStorageNode = nullptr; } @@ -1803,12 +1669,10 @@ PAQHitGroupLayoutInfo PAQTraceRaySerializationInfo::createHitGroupLayoutInfo( std::string NewTypeName; raw_string_ostream NewTypeNameStream(NewTypeName); NewTypeNameStream << determineSerializationInfoPrefix(PAQConfig) << "." - << determineLayoutSuffix(LayoutKind, - PayloadHitAttrI32s); + << determineLayoutSuffix(LayoutKind, PayloadHitAttrI32s); Type *I32 = Type::getInt32Ty(M.getContext()); ArrayType *ArrType = ArrayType::get(I32, Layout.NumStorageI32s); - Layout.SerializationTy = - StructType::create({ArrType}, NewTypeNameStream.str()); + Layout.SerializationTy = StructType::create({ArrType}, NewTypeNameStream.str()); // For lit testing: Dump type information LLVM_DEBUG(Layout.print(dbgs(), true)); @@ -1829,17 +1693,15 @@ PAQHitGroupLayoutInfo PAQTraceRaySerializationInfo::createHitGroupLayoutInfo( return HitGroupLayoutInfo; } -[[maybe_unused]] static void -checkCallShaderSerializationInfo(const PAQCallShaderSerializationInfo &Info, - const DataLayout &DL) { +[[maybe_unused]] static void checkCallShaderSerializationInfo(const PAQCallShaderSerializationInfo &Info, + const DataLayout &DL) { checkSerializationLayout(Info.CallShaderSerializationLayout, DL); } // LayoutComputer wrapper for CallShader -static LayoutComputer::Result -computeCallShaderLayout(Module &M, ArrayRef Nodes, - const PAQNode *PayloadRootNode, - unsigned PayloadRegisterCount, StringRef NamePrefix) { +static LayoutComputer::Result computeCallShaderLayout(Module &M, ArrayRef Nodes, + const PAQNode *PayloadRootNode, unsigned PayloadRegisterCount, + StringRef NamePrefix) { std::string TypeName; raw_string_ostream TypeNameStream(TypeName); TypeNameStream << NamePrefix @@ -1861,20 +1723,17 @@ computeCallShaderLayout(Module &M, ArrayRef Nodes, } std::unique_ptr -PAQCallShaderSerializationInfo::create(Module &M, - const PAQPayloadConfig &PAQConfig, - const PAQNode &PAQRootNode, +PAQCallShaderSerializationInfo::create(Module &M, const PAQPayloadConfig &PAQConfig, const PAQNode &PAQRootNode, uint64_t PayloadRegisterCount) { assert(PAQConfig.PayloadTy == PAQRootNode.Ty); - std::unique_ptr Result = - std::make_unique(); + std::unique_ptr Result = std::make_unique(); Result->PayloadRootNode = &PAQRootNode; SmallVector Nodes; Result->collectAllNodes(Nodes); std::string NamePrefix = determineSerializationInfoPrefix(PAQConfig); - LayoutComputer::Result LayoutResult = computeCallShaderLayout( - M, Nodes, Result->PayloadRootNode, PayloadRegisterCount, NamePrefix); + LayoutComputer::Result LayoutResult = + computeCallShaderLayout(M, Nodes, Result->PayloadRootNode, PayloadRegisterCount, NamePrefix); // may be nullptr if registers suffice Result->PayloadMemPointerNode = std::move(LayoutResult.PayloadMemPointerNode); @@ -1894,17 +1753,14 @@ PAQCallShaderSerializationInfo::create(Module &M, return Result; } -PAQSerializationInfoManager::PAQSerializationInfoManager( - Module *M, Module *GpurtLibrary, uint32_t MaxPayloadRegCount) - : Mod{M}, GpurtLibrary{GpurtLibrary}, - MaxPayloadRegisterCount(MaxPayloadRegCount) { +PAQSerializationInfoManager::PAQSerializationInfoManager(Module *M, Module *GpurtLibrary, uint32_t MaxPayloadRegCount) + : Mod{M}, GpurtLibrary{GpurtLibrary}, MaxPayloadRegisterCount(MaxPayloadRegCount) { TraceRayCache.PAQRootNodes = importModulePayloadPAQNodes(*M); } PAQSerializationInfoBase & -PAQSerializationInfoManager::getOrCreateSerializationInfo( - const PAQPayloadConfig &PayloadConfig, - lgc::rt::RayTracingShaderStage ShaderKind) { +PAQSerializationInfoManager::getOrCreateSerializationInfo(const PAQPayloadConfig &PayloadConfig, + lgc::rt::RayTracingShaderStage ShaderKind) { switch (ShaderKind) { case lgc::rt::RayTracingShaderStage::RayGeneration: llvm_unreachable("RayGen does not have an incoming payload"); @@ -1921,26 +1777,22 @@ PAQSerializationInfoManager::getOrCreateSerializationInfo( } PAQTraceRaySerializationInfo & -PAQSerializationInfoManager::getOrCreateTraceRaySerializationInfo( - const PAQPayloadConfig &PAQConfig) { - return TraceRayCache.getOrCreateSerializationInfo( - *GpurtLibrary, MaxPayloadRegisterCount, PAQConfig); +PAQSerializationInfoManager::getOrCreateTraceRaySerializationInfo(const PAQPayloadConfig &PAQConfig) { + return TraceRayCache.getOrCreateSerializationInfo(*GpurtLibrary, MaxPayloadRegisterCount, PAQConfig); } PAQCallShaderSerializationInfo & -PAQSerializationInfoManager::getOrCreateCallShaderSerializationInfo( - const PAQPayloadConfig &PAQConfig) { +PAQSerializationInfoManager::getOrCreateCallShaderSerializationInfo(const PAQPayloadConfig &PAQConfig) { // Ensure caching doesn't depend on irrelevant fields PAQPayloadConfig PAQConfigWithRelevantData = PAQConfig; PAQConfigWithRelevantData.MaxHitAttributeByteCount = 0; - return CallShaderCache.getOrCreateSerializationInfo( - *GpurtLibrary, MaxPayloadRegisterCount, PAQConfigWithRelevantData); + return CallShaderCache.getOrCreateSerializationInfo(*GpurtLibrary, MaxPayloadRegisterCount, + PAQConfigWithRelevantData); } template -SerializationInfoT &PAQSerializationInfoManager::PAQCache:: - getOrCreateSerializationInfo(Module &M, uint32_t MaxPayloadRegisterCount, - const PAQPayloadConfig &PAQConfig) { +SerializationInfoT &PAQSerializationInfoManager::PAQCache::getOrCreateSerializationInfo( + Module &M, uint32_t MaxPayloadRegisterCount, const PAQPayloadConfig &PAQConfig) { auto It = SerializationInfos.find(PAQConfig); if (It != SerializationInfos.end()) return *It->second; @@ -1950,52 +1802,43 @@ SerializationInfoT &PAQSerializationInfoManager::PAQCache:: if (PAQNodeIt != PAQRootNodes.end()) { PAQRootNode = PAQNodeIt->second.get(); } else { - auto PAQRootNodeUniquePtr = - createTrivialHierarchicalPayloadRootNode(*PAQConfig.PayloadTy); + auto PAQRootNodeUniquePtr = createTrivialHierarchicalPayloadRootNode(*PAQConfig.PayloadTy); PAQRootNode = PAQRootNodeUniquePtr.get(); PAQRootNodes.insert({PAQConfig.PayloadTy, std::move(PAQRootNodeUniquePtr)}); } // Compute info - std::unique_ptr Info = SerializationInfoT::create( - M, PAQConfig, *PAQRootNode, MaxPayloadRegisterCount); - auto InsertionResult = - SerializationInfos.insert({PAQConfig, std::move(Info)}); + std::unique_ptr Info = + SerializationInfoT::create(M, PAQConfig, *PAQRootNode, MaxPayloadRegisterCount); + auto InsertionResult = SerializationInfos.insert({PAQConfig, std::move(Info)}); assert(InsertionResult.second && "Unexpected map duplicate!"); return *InsertionResult.first->second; } -uint32_t PAQSerializationInfoManager::getMaxPayloadStorageI32s( - const PAQPayloadConfig &PAQConfig, - MaxPayloadStorageConsideration Consideration) { +uint32_t PAQSerializationInfoManager::getMaxPayloadStorageI32s(const PAQPayloadConfig &PAQConfig, + MaxPayloadStorageConsideration Consideration) { if (!PAQConfig.PayloadTy) return 0; uint32_t Result = 0; if (Consideration == MaxPayloadStorageConsideration::ConsiderOnlyTraceRay || - Consideration == - MaxPayloadStorageConsideration::ConsiderTraceRayAndCallShader) { - Result = std::max( - Result, getOrCreateTraceRaySerializationInfo(PAQConfig).MaxStorageI32s); + Consideration == MaxPayloadStorageConsideration::ConsiderTraceRayAndCallShader) { + Result = std::max(Result, getOrCreateTraceRaySerializationInfo(PAQConfig).MaxStorageI32s); } if (Consideration == MaxPayloadStorageConsideration::ConsiderOnlyCallShader || - Consideration == - MaxPayloadStorageConsideration::ConsiderTraceRayAndCallShader) { - Result = std::max( - Result, - getOrCreateCallShaderSerializationInfo(PAQConfig).MaxStorageI32s); + Consideration == MaxPayloadStorageConsideration::ConsiderTraceRayAndCallShader) { + Result = std::max(Result, getOrCreateCallShaderSerializationInfo(PAQConfig).MaxStorageI32s); } return Result; } const PAQSerializationLayout & -PAQSerializationInfoManager::getOrCreateTraceRayLayout( - PAQTraceRaySerializationInfo &TraceRayInfo, - PAQSerializationLayoutKind LayoutKind, Type *HitAttributesTy) { +PAQSerializationInfoManager::getOrCreateTraceRayLayout(PAQTraceRaySerializationInfo &TraceRayInfo, + PAQSerializationLayoutKind LayoutKind, Type *HitAttributesTy) { if (LayoutKind != PAQSerializationLayoutKind::AnyHitOutAcceptHit && LayoutKind != PAQSerializationLayoutKind::ClosestHitIn) @@ -2006,18 +1849,15 @@ PAQSerializationInfoManager::getOrCreateTraceRayLayout( // create a specialized layout with reduced hit attribute storage size. assert(HitAttributesTy && "Hit attributes type required!"); - uint64_t AttrsBytes = - Mod->getDataLayout().getTypeStoreSize(HitAttributesTy).getFixedValue(); + uint64_t AttrsBytes = Mod->getDataLayout().getTypeStoreSize(HitAttributesTy).getFixedValue(); if (AttrsBytes > TraceRayInfo.PAQConfig.MaxHitAttributeByteCount) report_fatal_error("Hit attributes are too large!"); uint64_t InlineHitAttrsBytes = getInlineHitAttrsBytes(*GpurtLibrary); - uint64_t AttrsInPayloadBytes = - AttrsBytes > InlineHitAttrsBytes ? AttrsBytes - InlineHitAttrsBytes : 0; + uint64_t AttrsInPayloadBytes = AttrsBytes > InlineHitAttrsBytes ? AttrsBytes - InlineHitAttrsBytes : 0; // Number of I32s required in the payload storage uint64_t PayloadHitAttrI32s = divideCeil(AttrsInPayloadBytes, RegisterBytes); - assert(PayloadHitAttrI32s <= TraceRayInfo.MaximumNumHitAttributesI32s && - "Hit attributes are too large!"); + assert(PayloadHitAttrI32s <= TraceRayInfo.MaximumNumHitAttributesI32s && "Hit attributes are too large!"); if (PayloadHitAttrI32s == TraceRayInfo.MaximumNumHitAttributesI32s) { // Hit attributes have maximum size, no need to use specialized layout return TraceRayInfo.LayoutsByKind[LayoutKind]; @@ -2028,60 +1868,43 @@ PAQSerializationInfoManager::getOrCreateTraceRayLayout( auto It = HitGroupLayouts.find(PayloadHitAttrI32s); if (It == HitGroupLayouts.end()) { // Create new specialized hit group layout - PAQHitGroupLayoutInfo HitGroupLayout = - TraceRayInfo.createHitGroupLayoutInfo(*Mod, PayloadHitAttrI32s); - It = HitGroupLayouts.insert({PayloadHitAttrI32s, std::move(HitGroupLayout)}) - .first; + PAQHitGroupLayoutInfo HitGroupLayout = TraceRayInfo.createHitGroupLayoutInfo(*Mod, PayloadHitAttrI32s); + It = HitGroupLayouts.insert({PayloadHitAttrI32s, std::move(HitGroupLayout)}).first; } const PAQHitGroupLayoutInfo &HitGroupLayoutInfo = It->second; if (LayoutKind == PAQSerializationLayoutKind::AnyHitOutAcceptHit) return HitGroupLayoutInfo.AnyHitOutAcceptHitLayout; - assert(LayoutKind == PAQSerializationLayoutKind::ClosestHitIn && - "Unexpected layout kind!"); + assert(LayoutKind == PAQSerializationLayoutKind::ClosestHitIn && "Unexpected layout kind!"); return HitGroupLayoutInfo.ClosestHitInLayout; } -const PAQSerializationLayout & -PAQSerializationInfoManager::getOrCreateShaderStartSerializationLayout( - PAQSerializationInfoBase &SerializationInfo, - lgc::rt::RayTracingShaderStage ShaderKind, Type *HitAttributesTy) { +const PAQSerializationLayout &PAQSerializationInfoManager::getOrCreateShaderStartSerializationLayout( + PAQSerializationInfoBase &SerializationInfo, lgc::rt::RayTracingShaderStage ShaderKind, Type *HitAttributesTy) { assert(ShaderKind != lgc::rt::RayTracingShaderStage::RayGeneration && - ShaderKind != lgc::rt::RayTracingShaderStage::Intersection && - "Invalid shader kind!"); + ShaderKind != lgc::rt::RayTracingShaderStage::Intersection && "Invalid shader kind!"); if (ShaderKind == lgc::rt::RayTracingShaderStage::Callable) - return cast(SerializationInfo) - .CallShaderSerializationLayout; + return cast(SerializationInfo).CallShaderSerializationLayout; // Always set for non-intersection - PAQShaderStage ShaderStage = - rtShaderStageToPAQShaderStage(ShaderKind).value(); + PAQShaderStage ShaderStage = rtShaderStageToPAQShaderStage(ShaderKind).value(); // Always set for non-caller, non-intersection read access - PAQSerializationLayoutKind LayoutKind = - tryDetermineLayoutKind(ShaderStage, PAQAccessKind::Read).value(); - return getOrCreateTraceRayLayout( - cast(SerializationInfo), LayoutKind, - HitAttributesTy); + PAQSerializationLayoutKind LayoutKind = tryDetermineLayoutKind(ShaderStage, PAQAccessKind::Read).value(); + return getOrCreateTraceRayLayout(cast(SerializationInfo), LayoutKind, HitAttributesTy); } -const PAQSerializationLayout & -PAQSerializationInfoManager::getOrCreateShaderExitSerializationLayout( - PAQSerializationInfoBase &SerializationInfo, - lgc::rt::RayTracingShaderStage ShaderKind, Type *HitAttributesTy, +const PAQSerializationLayout &PAQSerializationInfoManager::getOrCreateShaderExitSerializationLayout( + PAQSerializationInfoBase &SerializationInfo, lgc::rt::RayTracingShaderStage ShaderKind, Type *HitAttributesTy, AnyHitExitKind AHExitKind) { assert(ShaderKind != lgc::rt::RayTracingShaderStage::RayGeneration && - ShaderKind != lgc::rt::RayTracingShaderStage::Intersection && - "Invalid shader kind!"); + ShaderKind != lgc::rt::RayTracingShaderStage::Intersection && "Invalid shader kind!"); if (ShaderKind == lgc::rt::RayTracingShaderStage::Callable) - return cast(SerializationInfo) - .CallShaderSerializationLayout; + return cast(SerializationInfo).CallShaderSerializationLayout; - PAQShaderStage ShaderStage = - rtShaderStageToPAQShaderStage(ShaderKind).value(); - std::optional OptLayoutKind = - tryDetermineLayoutKind(ShaderStage, PAQAccessKind::Write); + PAQShaderStage ShaderStage = rtShaderStageToPAQShaderStage(ShaderKind).value(); + std::optional OptLayoutKind = tryDetermineLayoutKind(ShaderStage, PAQAccessKind::Write); if (!OptLayoutKind) { // Only for anyhit there are multiple outgoing layout alternatives assert(ShaderStage == PAQShaderStage::AnyHit && "Unexpected shader stage!"); @@ -2089,14 +1912,12 @@ PAQSerializationInfoManager::getOrCreateShaderExitSerializationLayout( if (AHExitKind == AnyHitExitKind::IgnoreHit) { OptLayoutKind = PAQSerializationLayoutKind::AnyHitOutIgnoreHit; } else if (AHExitKind == AnyHitExitKind::AcceptHitAndEndSearch) { - OptLayoutKind = - PAQSerializationLayoutKind::AnyHitOutAcceptHitAndEndSearch; + OptLayoutKind = PAQSerializationLayoutKind::AnyHitOutAcceptHitAndEndSearch; } else { assert(AHExitKind == AnyHitExitKind::AcceptHit); OptLayoutKind = PAQSerializationLayoutKind::AnyHitOutAcceptHit; } } - return getOrCreateTraceRayLayout( - cast(SerializationInfo), - OptLayoutKind.value(), HitAttributesTy); + return getOrCreateTraceRayLayout(cast(SerializationInfo), OptLayoutKind.value(), + HitAttributesTy); } diff --git a/llvmraytracing/lib/RegisterBuffer.cpp b/llvmraytracing/lib/RegisterBuffer.cpp deleted file mode 100644 index c69c5d53eb..0000000000 --- a/llvmraytracing/lib/RegisterBuffer.cpp +++ /dev/null @@ -1,775 +0,0 @@ -/* - *********************************************************************************************************************** - * - * Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - *all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - **********************************************************************************************************************/ - -//===- RegisterBuffer.cpp - Split an array into register and memory -------===// -// -// A pass that converts a global to be partially buffered in registers and spill -// to memory. -// -// This pass handles all globals marked with registerbuffer metadata: -// @GLOBAL = external global [20 x i32], !registerbuffer !1 -// !1 = !{ i32 15 } -// -// The global has to be an array. The registerbuffer metadata contains a -// single i32 that specifies the number of buffered array elements that are -// in registers. -// In the previous example, 15 elements are put into registers. The user of this -// pass is responsible for saving the pointer to the memory region that stores -// the rest of the elements, which will contain element 15 to 20 in this -// example. -// -// The result will be a smaller global, representing the register part: -// @GLOBAL = external addrspace(20) global [15 x i32] -// -// After the buffer is lowered, the memory pointer is accessed -// through the intrinsics -// i32 addrspace(32)* @registerbuffer.getpointer.a20i32([20 x i32] -// addrspace(20)*) -// A later pass needs to find these and change them to the actual memory -// pointer. -// -// For changing a global access to accessing the memory pointer, all GEPs and -// casts are copied to use (getpointer() - sizeof(GLOBAL)) as the base address. -// This ensures that the correct offset will be reached, no matter how it is -// computed. -// -//===----------------------------------------------------------------------===// - -#include "llvmraytracing/Continuations.h" -#include "llvm/ADT/ScopeExit.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Type.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include -#include - -using namespace llvm; - -#define DEBUG_TYPE "register-buffer" - -/// Try to find a static offset of the address relative to the global. -static std::optional findOffset(const DataLayout &DL, - GlobalValue *Global, Value *Address) { - // Strip casts - while (true) { - if (auto *C = dyn_cast(Address)) { - Address = C->getOperand(0); - } else if (auto *C = dyn_cast(Address)) { - Address = C->getOperand(0); - } else if (auto *C = dyn_cast(Address)) { - if (C->getOpcode() == Instruction::BitCast) - Address = C->getOperand(0); - else if (C->getOpcode() == Instruction::AddrSpaceCast) - Address = C->getOperand(0); - else - break; - } else { - break; - } - } - - if (Address == Global) - return 0; - - if (auto *GEP = dyn_cast(Address)) { - APInt Offset(DL.getIndexSizeInBits(GEP->getPointerAddressSpace()), 0); - if (GEP->accumulateConstantOffset(DL, Offset)) { - if (auto O = findOffset(DL, Global, GEP->getPointerOperand())) - return Offset.getZExtValue() + *O; - } - } - return {}; -} - -static Function *getRegisterBufferGetPointer(Module &M, - Type *RegisterBufferType, - unsigned Addrspace) { - SmallVector StrBuf; // 32 chars for a double digit array - auto *ElemTy = RegisterBufferType->getArrayElementType(); - uint64_t RegisterBufferSize = RegisterBufferType->getArrayNumElements(); - uint64_t IntSize = ElemTy->getPrimitiveSizeInBits(); - auto Name = (Twine("registerbuffer.getpointer.a") + - Twine(RegisterBufferSize) + "i" + Twine(IntSize)) - .toStringRef(StrBuf); - if (auto *F = M.getFunction(Name)) - return F; - auto &C = M.getContext(); - AttributeList AL = AttributeList::get( - C, AttributeList::FunctionIndex, - {Attribute::NoFree, Attribute::NoRecurse, Attribute::NoSync, - Attribute::NoUnwind, Attribute::WillReturn}); - auto *Func = cast( - M.getOrInsertFunction( - Name, AL, ElemTy->getPointerTo(Addrspace), - RegisterBufferType->getPointerTo(GlobalRegisterAddrspace)) - .getCallee()); - Func->setOnlyReadsMemory(); - return Func; -} - -/// Return a pointer to the memory region by getting the memory address from the -/// intrinsic and subtracting the size of the global. -static Value *getMemoryPtr(IRBuilder<> &Builder, GlobalValue *Global, - uint64_t RegisterCount, unsigned Addrspace) { - auto *BufferTy = Global->getValueType(); - auto *Ty = BufferTy->getArrayElementType(); - auto *GetPtr = - getRegisterBufferGetPointer(*Global->getParent(), BufferTy, Addrspace); - Value *MemPtr = Builder.CreateCall(GetPtr, {Global}); - MemPtr = Builder.CreateGEP(Ty, MemPtr, Builder.getInt32(-RegisterCount)); - return Builder.CreateBitCast( - MemPtr, Global->getValueType()->getPointerTo( - MemPtr->getType()->getPointerAddressSpace())); -} - -static Instruction *createLoadStore(IRBuilder<> &Builder, Type *Ty, - Value *StoreVal, Value *Address, - Align Alignment, AAMDNodes AATags, - bool IsLoad) { - Instruction *LoadStore; - if (IsLoad) - LoadStore = Builder.CreateAlignedLoad(Ty, Address, Alignment); - else - LoadStore = Builder.CreateAlignedStore(StoreVal, Address, Alignment); - if (AATags) - LoadStore->setAAMetadata(AATags); - return LoadStore; -} - -Value *RegisterBufferPass::computeMemAddr(IRBuilder<> &Builder, - Value *Address) { - if (Address == Global) - return getMemoryPtr(Builder, Global, Data.RegisterCount, Data.Addrspace); - - if (MemAccessors.count(Address)) - return MemAccessors[Address]; - - IRBuilder<>::InsertPointGuard Guard(Builder); - - // Do not cache constant expressions, we don't know where in the code they - // need to be duplicated - bool DoCache = false; - if (auto *Inst = dyn_cast(Address)) { - DoCache = true; - Builder.SetInsertPoint(Inst); - } - - Value *New; - if (auto *Inst = dyn_cast(Address)) { - auto *Src = Inst->getPointerOperand(); - Value *MemSrc = computeMemAddr(Builder, Src); - // Clone instruction without inbounds (may be out-of-bounds in memory for - // the register part) - SmallVector Indices(Inst->indices()); - New = Builder.CreateGEP(Inst->getSourceElementType(), MemSrc, Indices); - } else if (auto *Inst = dyn_cast(Address)) { - auto *Src = Inst->getOperand(0); - Value *MemSrc = computeMemAddr(Builder, Src); - New = Builder.CreateCast( - Inst->getOpcode(), MemSrc, - getWithSamePointeeType(cast(Inst->getDestTy()), - Data.Addrspace)); - } else if (auto *Inst = dyn_cast(Address)) { - if (Inst->isCast()) { - auto *Src = Inst->getOperand(0); - Value *MemSrc = computeMemAddr(Builder, Src); - New = Builder.CreateCast( - static_cast(Inst->getOpcode()), MemSrc, - getWithSamePointeeType(cast(Inst->getType()), - Data.Addrspace)); - } else { - LLVM_DEBUG(Address->dump()); - llvm_unreachable( - "Unhandled constant when rebasing pointer path to memory"); - } - } else { - LLVM_DEBUG(Address->dump()); - llvm_unreachable( - "Unhandled instruction when rebasing pointer path to memory"); - } - - if (DoCache) - MemAccessors[Address] = New; - return New; -} - -Value *RegisterBufferPass::handleSingleLoadStore( - IRBuilder<> &Builder, Type *Ty, Value *StoreVal, Value *Address, - Align Alignment, AAMDNodes AATags, bool IsLoad) { - LLVM_DEBUG(dbgs() << "register buffer: Check address " << *Address << "\n"); - assert(IsLoad != (!!StoreVal) && "Expected either IsLoad or StoreVal"); - - const DataLayout &DL = Global->getParent()->getDataLayout(); - std::optional Offset = findOffset(DL, Global, Address); - -#ifndef NDEBUG - // Check if the offset is out-of-bounds - uint32_t ElementSize = DL.getTypeStoreSize(ElementType); - if (Offset && (*Offset / ElementSize) >= TotalElementCount) { - dbgs() << "Out-of-bounds access at index " << *Offset << " into global " - << *Global << " with total size " << TotalElementCount << "\n"; - llvm_unreachable("Out-of-bounds register buffer access"); - } -#endif - - // Change load/store to use addrspace(20) - auto *AddressType = cast(Address->getType()); - Address = Builder.CreateAddrSpaceCast( - Address, getWithSamePointeeType(AddressType, GlobalRegisterAddrspace)); - - // If only registers are accessed, emit a simple load/store - if (TotalElementCount <= Data.RegisterCount) - return createLoadStore(Builder, Ty, StoreVal, Address, Alignment, AATags, - IsLoad); - - // If the offset is known, emit a load/store statically - if (Offset) { - LLVM_DEBUG(dbgs() << "register buffer: Found constant offset: " - << Offset.value() << "\n"); - uint64_t ElementSize = ElementType->getPrimitiveSizeInBits() / 8; - const uint32_t Index = Offset.value() / ElementSize; - if (Index < Data.RegisterCount) { - LLVM_DEBUG(dbgs() << "register buffer: " << Index << " < " - << Data.RegisterCount << " => register\n"); - // Access goes into the register part - return createLoadStore(Builder, Ty, StoreVal, Address, Alignment, AATags, - IsLoad); - } - LLVM_DEBUG(dbgs() << "register buffer: " << Index - << " >= " << Data.RegisterCount << " => memory\n"); - - // Get memory address - auto *Addr = computeMemAddr(Builder, Address); - - // Convert to load from memory - return createLoadStore(Builder, Ty, StoreVal, Addr, Alignment, AATags, - IsLoad); - } - LLVM_DEBUG(dbgs() << "register buffer: Found dynamic offset\n"); - - // Add a dynamic switch based on the address - uint64_t RegistersByteCount = - DL.getTypeStoreSize(Global->getValueType()).getFixedValue(); - - if (RegistersByteCount > 0) { - LLVM_DEBUG(dbgs() << "register buffer: Add dynamic switch\n"); - auto *GlobalInt = Builder.CreatePtrToInt(Global, Builder.getInt32Ty()); - auto *AddressInt = Builder.CreatePtrToInt(Address, Builder.getInt32Ty()); - auto *Difference = Builder.CreateSub(AddressInt, GlobalInt); - Instruction *InsertI = &*Builder.GetInsertPoint(); - auto ResetInsertPoint = make_scope_exit( - [InsertI, &Builder]() { Builder.SetInsertPoint(InsertI); }); - - Instruction *Then; - Instruction *Else; - auto *Cond = - Builder.CreateICmpULT(Difference, Builder.getInt32(RegistersByteCount)); - SplitBlockAndInsertIfThenElse(Cond, InsertI, &Then, &Else); - BasicBlock *TailBB = InsertI->getParent(); - BasicBlock *ThenBB = Then->getParent(); - - // Access goes into the register part - Builder.SetInsertPoint(Then); - Instruction *ThenLoadStore = createLoadStore(Builder, Ty, StoreVal, Address, - Alignment, AATags, IsLoad); - - // Not in the register range - auto *Addr = computeMemAddr(Builder, Address); - Builder.SetInsertPoint(Else); - - Instruction *ElseLoadStore = - createLoadStore(Builder, Ty, StoreVal, Addr, Alignment, AATags, IsLoad); - if (IsLoad) { - Builder.SetInsertPoint(&*TailBB->getFirstInsertionPt()); - auto *PHI = Builder.CreatePHI(Ty, 2); - PHI->addIncoming(ThenLoadStore, ThenBB); - PHI->addIncoming(ElseLoadStore, ElseLoadStore->getParent()); - return PHI; - } - return ElseLoadStore; - } - LLVM_DEBUG( - dbgs() << "register buffer: register-part is empty, always use memory\n"); - // RegistersByteCount is zero, so we know that the access goes into the memory - // part - auto *Addr = computeMemAddr(Builder, Address); - return createLoadStore(Builder, Ty, StoreVal, Addr, Alignment, AATags, - IsLoad); -} - -namespace { - -/// Generic recursive split emission class. -/// The OpSplitter originates from the SROA pass and is extended to split -/// integers into smaller sizes. -template class OpSplitter { -protected: - /// The builder used to form new instructions. - IRBuilder<> IRB; - - /// The indices which to be used with insert- or extractvalue to select the - /// appropriate value within the aggregate. - SmallVector Indices; - - /// The indices to a GEP instruction which will move Ptr to the correct slot - /// within the aggregate. - SmallVector GEPIndices; - - /// The base pointer of the original op, used as a base for GEPing the - /// split operations. - Value *Ptr; - - /// The base pointee type being GEPed into. - Type *BaseTy; - - /// Known alignment of the base pointer. - Align BaseAlign; - - /// To calculate offset of each component so we can correctly deduce - /// alignments. - const DataLayout &DL; - - /// Initialize the splitter with an insertion point, Ptr and start with a - /// single zero GEP index. - OpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy, - Align BaseAlign, const DataLayout &DL) - : IRB(InsertionPoint), GEPIndices(1, IRB.getInt32(0)), Ptr(Ptr), - BaseTy(BaseTy), BaseAlign(BaseAlign), DL(DL) {} - -public: - /// Parts of a load that is split in multiple int loads. - struct LoadStorePart { - // In Bytes - uint64_t Offset; - // In Bytes - uint64_t Size; - }; - - /// Generic recursive split emission routine. - /// - /// This method recursively splits an aggregate op (load or store) into - /// scalar or vector ops. It splits recursively until it hits a single value - /// and emits that single value operation via the template argument. - /// - /// The logic of this routine relies on GEPs and insertvalue and - /// extractvalue all operating with the same fundamental index list, merely - /// formatted differently (GEPs need actual values). - /// - /// \param Ty The type being split recursively into smaller ops. - /// \param Agg The aggregate value being built up or stored, depending on - /// whether this is splitting a load or a store respectively. - void emitSplitOps(Type *Ty, Value *&Agg, const Twine &Name) { - if (Ty->isSingleValueType()) { - unsigned Offset = DL.getIndexedOffsetInType(BaseTy, GEPIndices); - Align Alignment = commonAlignment(BaseAlign, Offset); - - // Split too large and unaligned values - // Load the single value and insert it using the indices. - uint64_t Size = DL.getTypeStoreSize(Ty).getFixedValue(); - // Split types bigger than a register - uint64_t SingleSize = - std::min(Size, static_cast(RegisterBytes)); - // Split unaligned types into byte - if (Alignment.value() < SingleSize) - SingleSize = 1; - - if (SingleSize < Size) { - // Use a packed struct to describe and load all the parts - SmallVector Elements; - // Split load - uint64_t Offset = 0; - while (Offset < Size) { - uint64_t ThisPartSize = std::min(SingleSize, Size - Offset); - Elements.push_back(IRB.getIntNTy(ThisPartSize * 8)); - - Offset += ThisPartSize; - } - auto *StructTy = StructType::get(IRB.getContext(), Elements, true); - return static_cast(this)->emitFunc(Ty, Agg, StructTy, - Alignment, Name); - } - - return static_cast(this)->emitFunc(Ty, Agg, nullptr, Alignment, - Name); - } - - if (ArrayType *ATy = dyn_cast(Ty)) { - unsigned OldSize = Indices.size(); - (void)OldSize; - for (unsigned Idx = 0, Size = ATy->getNumElements(); Idx != Size; ++Idx) { - assert(Indices.size() == OldSize && "Did not return to the old size"); - Indices.push_back(Idx); - GEPIndices.push_back(IRB.getInt32(Idx)); - emitSplitOps(ATy->getElementType(), Agg, Name + "." + Twine(Idx)); - GEPIndices.pop_back(); - Indices.pop_back(); - } - return; - } - - if (StructType *STy = dyn_cast(Ty)) { - unsigned OldSize = Indices.size(); - (void)OldSize; - for (unsigned Idx = 0, Size = STy->getNumElements(); Idx != Size; ++Idx) { - assert(Indices.size() == OldSize && "Did not return to the old size"); - Indices.push_back(Idx); - GEPIndices.push_back(IRB.getInt32(Idx)); - emitSplitOps(STy->getElementType(Idx), Agg, Name + "." + Twine(Idx)); - GEPIndices.pop_back(); - Indices.pop_back(); - } - return; - } - - llvm_unreachable("Only arrays and structs are aggregate loadable types"); - } -}; - -struct LoadOpSplitter : public OpSplitter { - RegisterBufferPass *Pass; - AAMDNodes AATags; - - LoadOpSplitter(RegisterBufferPass *Pass, Instruction *InsertionPoint, - Value *Ptr, Type *BaseTy, AAMDNodes AATags, Align BaseAlign, - const DataLayout &DL) - : OpSplitter(InsertionPoint, Ptr, BaseTy, BaseAlign, DL), - Pass(Pass), AATags(AATags) {} - - /// Emit a leaf load of a single value. This is called at the leaves of the - /// recursive emission to actually load values. - void emitFunc(Type *Ty, Value *&Agg, StructType *Parts, Align Alignment, - const Twine &Name) { - assert(Ty->isSingleValueType()); - // Load the single value and insert it using the indices. - Value *GEP = IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep"); - - unsigned Offset = DL.getIndexedOffsetInType(BaseTy, GEPIndices); - Value *Load = nullptr; - if (Parts) { - Load = PoisonValue::get(Parts); - Value *ElemTyPtr = - IRB.CreateBitCast(GEP, Parts->getPointerTo(), Name + ".ptr"); - - // A struct cannot be cast into an integer, so we store it in an alloca - // and cast the pointer instead. The packed struct may have padding and a - // greater store size, ignore that. - assert(DL.getTypeStoreSize(Ty).getFixedValue() <= - DL.getTypeStoreSize(Parts) && - "Type sizes do not match"); - // Load parts - for (unsigned PartI = 0; PartI < Parts->getStructNumElements(); PartI++) { - auto *Part = Parts->getStructElementType(PartI); - Value *PtrI = IRB.CreateConstInBoundsGEP2_32( - Parts, ElemTyPtr, 0, PartI, Name + ".gep." + Twine(PartI)); - - APInt FieldOffsetInt( - DL.getIndexSizeInBits(PtrI->getType()->getPointerAddressSpace()), - 0); - bool FieldOffsetSuccess = GEPOperator::accumulateConstantOffset( - Parts, {IRB.getInt64(0), IRB.getInt64(PartI)}, DL, FieldOffsetInt); - assert(FieldOffsetSuccess && - "Failed to compute field offset of packed struct"); - (void)FieldOffsetSuccess; - uint64_t FieldOffset = FieldOffsetInt.getZExtValue(); - - Value *LoadStorePart = Pass->handleSingleLoadStore( - IRB, Part, nullptr, PtrI, commonAlignment(Alignment, FieldOffset), - AATags ? AATags.shift(Offset + FieldOffset) : AATags, true); - - // Insert into struct - Load = IRB.CreateInsertValue(Load, LoadStorePart, {PartI}, - Name + ".insert." + Twine(PartI)); - } - - auto *InsertPoint = &*IRB.GetInsertPoint(); - IRB.SetInsertPointPastAllocas(InsertPoint->getFunction()); - auto *Alloca = IRB.CreateAlloca(Parts, nullptr, Name + ".alloca"); - IRB.SetInsertPoint(InsertPoint); - - IRB.CreateStore(Load, Alloca); - auto *CastTyPtr = - IRB.CreateBitCast(Alloca, Ty->getPointerTo(Alloca->getAddressSpace()), - Name + ".alloca.cast"); - Load = IRB.CreateAlignedLoad(Ty, CastTyPtr, Alloca->getAlign(), - Name + ".alloca.load"); - } else { - Load = Pass->handleSingleLoadStore(IRB, Ty, nullptr, GEP, Alignment, - AATags ? AATags.shift(Offset) : AATags, - true); - } - - if (Agg->getType()->isAggregateType()) - Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert"); - else - Agg = Load; - LLVM_DEBUG(dbgs() << " to: " << *Load << "\n"); - } -}; - -struct StoreOpSplitter : public OpSplitter { - RegisterBufferPass *Pass; - AAMDNodes AATags; - - StoreOpSplitter(RegisterBufferPass *Pass, Instruction *InsertionPoint, - Value *Ptr, Type *BaseTy, AAMDNodes AATags, Align BaseAlign, - const DataLayout &DL) - : OpSplitter(InsertionPoint, Ptr, BaseTy, BaseAlign, DL), - Pass(Pass), AATags(AATags) {} - - /// Emit a leaf store of a single value. This is called at the leaves of the - /// recursive emission to actually produce stores. - void emitFunc(Type *Ty, Value *&Agg, StructType *Parts, Align Alignment, - const Twine &Name) { - assert(Ty->isSingleValueType()); - // Extract the single value and store it using the indices. - // - // The gep and extractvalue values are factored out of the CreateStore - // call to make the output independent of the argument evaluation order. - Value *Val; - if (Agg->getType()->isAggregateType()) - Val = IRB.CreateExtractValue(Agg, Indices, Name + ".extract"); - else - Val = Agg; - - Value *GEP = IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep"); - - unsigned Offset = DL.getIndexedOffsetInType(BaseTy, GEPIndices); - if (Parts) { - Value *ElemTyPtr = - IRB.CreateBitCast(GEP, Parts->getPointerTo(), Name + ".ptr"); - - // A struct cannot be cast from an integer, so we store it in an alloca - // and cast the pointer instead. The packed struct may have padding a a - // greater store size, ignore that. - assert(DL.getTypeStoreSize(Ty).getFixedValue() <= - DL.getTypeStoreSize(Parts) && - "Type sizes do not match"); - auto *InsertPoint = &*IRB.GetInsertPoint(); - IRB.SetInsertPointPastAllocas(InsertPoint->getFunction()); - auto *Alloca = IRB.CreateAlloca(Parts, nullptr, Name + ".alloca"); - IRB.SetInsertPoint(InsertPoint); - - auto *CastTyPtr = - IRB.CreateBitCast(Alloca, Ty->getPointerTo(Alloca->getAddressSpace()), - Name + ".alloca.cast"); - IRB.CreateAlignedStore(Val, CastTyPtr, Alloca->getAlign()); - Value *CastVal = IRB.CreateLoad(Parts, Alloca, Name + ".alloca.load"); - - // Store parts - for (unsigned PartI = 0; PartI < Parts->getStructNumElements(); PartI++) { - auto *Part = Parts->getStructElementType(PartI); - Value *PtrI = IRB.CreateConstInBoundsGEP2_32( - Parts, ElemTyPtr, 0, PartI, Name + ".gep." + Twine(PartI)); - - Value *ThisVal = IRB.CreateExtractValue( - CastVal, {PartI}, Name + ".extract." + Twine(PartI)); - - APInt FieldOffsetInt( - DL.getIndexSizeInBits(PtrI->getType()->getPointerAddressSpace()), - 0); - bool FieldOffsetSuccess = GEPOperator::accumulateConstantOffset( - Parts, {IRB.getInt64(0), IRB.getInt64(PartI)}, DL, FieldOffsetInt); - assert(FieldOffsetSuccess && - "Failed to compute field offset of packed struct"); - (void)FieldOffsetSuccess; - uint64_t FieldOffset = FieldOffsetInt.getZExtValue(); - - Pass->handleSingleLoadStore( - IRB, Part, ThisVal, PtrI, commonAlignment(Alignment, FieldOffset), - AATags ? AATags.shift(Offset + FieldOffset) : AATags, false); - } - } else { - Pass->handleSingleLoadStore(IRB, Ty, Val, GEP, Alignment, - AATags ? AATags.shift(Offset) : AATags, - false); - } - - LLVM_DEBUG(dbgs() << " to: Store\n"); - } -}; - -} // namespace - -RegisterBufferPass::RegisterBufferPass() {} - -/// Compute the adjusted alignment for a load or store from an offset. -static Align getAdjustedAlignment(Instruction *I, uint64_t Offset) { - return commonAlignment(getLoadStoreAlignment(I), Offset); -} - -/// Either stores StoreVal or returns the result from a load. -void RegisterBufferPass::handleLoadStore(IRBuilder<> &Builder, Instruction *I, - Value *Address, bool IsLoad) { - LLVM_DEBUG(dbgs() << "register buffer: Check address " << *Address << "\n"); - - // Split usages of structs/arrays, unaligned loads/stores, and loads/stores - // bigger than the register size. - const auto &DL = I->getModule()->getDataLayout(); - if (IsLoad) { - LoadOpSplitter Splitter(this, I, cast(I)->getPointerOperand(), - I->getType(), I->getAAMetadata(), - getAdjustedAlignment(I, 0), DL); - Value *V = PoisonValue::get(I->getType()); - Splitter.emitSplitOps(I->getType(), V, I->getName() + ".fca"); - I->replaceAllUsesWith(V); - I->eraseFromParent(); - } else { - auto *SI = cast(I); - Value *V = SI->getValueOperand(); - StoreOpSplitter Splitter(this, I, SI->getPointerOperand(), V->getType(), - I->getAAMetadata(), getAdjustedAlignment(I, 0), - DL); - Splitter.emitSplitOps(V->getType(), V, V->getName() + ".fca"); - I->eraseFromParent(); - } -} - -llvm::PreservedAnalyses -RegisterBufferPass::run(llvm::Module &M, - llvm::ModuleAnalysisManager &AnalysisManager) { - LLVM_DEBUG(dbgs() << "Run the pass register-buffer\n"); - - MemAccessors.clear(); - IRBuilder<> Builder(M.getContext()); - - bool Changed = false; - - for (auto &OldGlobal : make_early_inc_range(M.globals())) { - const auto *MD = OldGlobal.getMetadata("registerbuffer"); - if (!MD) - continue; - Changed = true; - Data = getRegisterBufferMetadata(MD); - - // Check that the global is an [_ x i32] with a size greater than the size - // specified in metadata. - auto *ATy = dyn_cast(OldGlobal.getValueType()); - TotalElementCount = ATy->getArrayNumElements(); - assert(ATy && "register buffer global must be an array"); - ElementType = dyn_cast(ATy->getElementType()); - assert(ElementType && ElementType->getIntegerBitWidth() == 32 && - "register buffer global must be an array of i32"); - - // Create a new global with the right size and addrspace - auto *NewATy = ArrayType::get( - ElementType, std::min(Data.RegisterCount, TotalElementCount)); - Global = cast(M.getOrInsertGlobal("", NewATy, [&] { - return new GlobalVariable( - M, NewATy, false, GlobalVariable::ExternalLinkage, nullptr, - OldGlobal.getName(), nullptr, GlobalVariable::NotThreadLocal, - GlobalRegisterAddrspace); - })); - Global->takeName(&OldGlobal); - Global->setUnnamedAddr(OldGlobal.getUnnamedAddr()); - Global->setVisibility(OldGlobal.getVisibility()); - Global->setThreadLocalMode(OldGlobal.getThreadLocalMode()); - Global->setDLLStorageClass(OldGlobal.getDLLStorageClass()); - Global->setPartition(OldGlobal.getPartition()); - Global->setLinkage(OldGlobal.getLinkage()); - - // Replace with a bitcast to the previous addrspace - // and gather uses. - auto *CastNewGlobal = ConstantExpr::getPointerBitCastOrAddrSpaceCast( - Global, OldGlobal.getType()); - OldGlobal.replaceAllUsesWith(CastNewGlobal); - OldGlobal.eraseFromParent(); - - // RAUW may fold casts, so we need to search uses of NewGlobal, not of - // CastNewGlobal - DenseSet UseList(Global->user_begin(), Global->user_end()); - SmallVector UseWorklist(Global->user_begin(), Global->user_end()); - // Collect first to prevent constant expressions from being removed while we - // iterate over them. - SmallVector Uses; - while (!UseWorklist.empty()) { - auto *Use = UseWorklist.pop_back_val(); - - bool IsConstExprCast = false; - if (auto *Const = dyn_cast(Use)) { - auto OpCode = Const->getOpcode(); - IsConstExprCast = OpCode == Instruction::GetElementPtr || - OpCode == Instruction::BitCast || - OpCode == Instruction::AddrSpaceCast; - } - - if (isa(Use) || IsConstExprCast) { - for (auto *U : Use->users()) { - if (!UseList.count(U)) { - UseList.insert(U); - UseWorklist.push_back(U); - } else { - LLVM_DEBUG(dbgs() << "Already there " << *U << "\n"); - } - } - } else if (isa(Use)) { - Uses.push_back(Use); - } else { - LLVM_DEBUG(dbgs() << "Failed to handle use of global: " << *Use - << "\n"); - llvm_unreachable("Failed to handle global use"); - } - } - - // Go through all uses and handle loads, stores and intrinsic calls - for (auto *Use : Uses) { - LLVM_DEBUG(dbgs() << "Handle use " << *Use << "\n"); - - if (auto *I = dyn_cast(Use)) { - handleLoadStore(Builder, I, I->getPointerOperand(), true); - } else if (auto *I = dyn_cast(Use)) { - handleLoadStore(Builder, I, I->getPointerOperand(), false); - } else if (auto *I = dyn_cast(Use)) { - if (auto *Intr = I->getCalledFunction()) { - // Handle intrinsics - auto Name = Intr->getName(); - // Ignore registerbuffer.setpointerbarrier barriers but leave them in - // the code - if (Name.starts_with("registerbuffer.setpointerbarrier")) - continue; - - if (Name.starts_with("llvm.lifetime.")) { - // Remove lifetime intrinsics, these are an optimization only - } else { - LLVM_DEBUG(dbgs() << "Failed to handle call taking global address: " - << *Use << "\n"); - llvm_unreachable("Failed to handle call taking global address"); - } - I->eraseFromParent(); - } else { - LLVM_DEBUG(dbgs() << "Failed to handle call taking global address: " - << *Use << "\n"); - llvm_unreachable("Failed to handle call taking global address"); - } - } - } - } - - if (Changed) - return PreservedAnalyses::none(); - return PreservedAnalyses::all(); -} diff --git a/llvmraytracing/lib/RemoveTypesMetadata.cpp b/llvmraytracing/lib/RemoveTypesMetadata.cpp index 031e3f84da..c0f78a8f75 100644 --- a/llvmraytracing/lib/RemoveTypesMetadata.cpp +++ b/llvmraytracing/lib/RemoveTypesMetadata.cpp @@ -10,8 +10,8 @@ * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * - * The above copyright notice and this permission notice shall be included in - *all copies or substantial portions of the Software. + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, @@ -23,9 +23,9 @@ * **********************************************************************************************************************/ -//===- RemoveTypesMetadata.cpp - Erase !types metadata --------------------===// +//===- RemoveTypesMetadata.cpp - Erase !pointeetys metadata ---------------===// // -// A pass that removes !types metadata from functions. +// A pass that removes !pointeetys metadata from functions. // //===----------------------------------------------------------------------===// @@ -37,15 +37,13 @@ using namespace llvm; #define DEBUG_TYPE "remove-types-metadata" -llvm::PreservedAnalyses -RemoveTypesMetadataPass::run(llvm::Module &M, - llvm::ModuleAnalysisManager &AnalysisManager) { +llvm::PreservedAnalyses RemoveTypesMetadataPass::run(llvm::Module &M, llvm::ModuleAnalysisManager &AnalysisManager) { LLVM_DEBUG(dbgs() << "Run remove-types-metadata pass\n"); bool Changed = false; for (Function &F : M) { - if (F.hasMetadata("types")) { - F.setMetadata("types", nullptr); + if (F.hasMetadata("pointeetys")) { + F.setMetadata("pointeetys", nullptr); Changed = true; } } diff --git a/llvmraytracing/lib/TypesMetadata.cpp b/llvmraytracing/lib/TypesMetadata.cpp deleted file mode 100644 index c06f116290..0000000000 --- a/llvmraytracing/lib/TypesMetadata.cpp +++ /dev/null @@ -1,277 +0,0 @@ -/* - *********************************************************************************************************************** - * - * Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - *all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - **********************************************************************************************************************/ - -//===- TypesMetadata.cpp - Generators, decoders and wrappers for metadata --==// -// -// This file implements metadata functions for the DXIL continuations -// -//===----------------------------------------------------------------------===// - -#include "llvmraytracing/Continuations.h" - -namespace llvm { - -ContArgTy::ContArgTy(Type *Arg) { - assert(!Arg->isPointerTy() && - "pointers are not supported by this constructor"); - ArgTy = Arg; - ElemTy = nullptr; -} - -ContArgTy ContArgTy::get(const Function *F, const Argument *Arg) { - // only consult metadata for pointer types - Type *ArgTy = Arg->getType(); - if (!ArgTy->isPointerTy()) - return ContArgTy(ArgTy, nullptr); - - // types metadata of the form { !"function", , - // , ... } - auto *TypesMD = F->getMetadata(ContHelper::MDTypesName); - if (TypesMD) { - unsigned ArgNo = Arg->getArgNo() + 2; - assert(ArgNo < TypesMD->getNumOperands() && - "insufficient operands in types metadata"); - - ContArgTy Result = get(&*TypesMD->getOperand(ArgNo), F->getContext()); - - return Result; - } - - report_fatal_error("Missing metadata for pointer type!"); -} - -ContArgTy ContArgTy::get(const Function *F, const unsigned ArgNo) { - return get(F, F->getArg(ArgNo)); -} - -ContArgTy ContArgTy::get(const Metadata *MD, LLVMContext &Context) { - if (const auto *ConstantMD = dyn_cast(MD)) { - return ContArgTy(ConstantMD->getType(), nullptr); - } - if (const auto *StringMD = dyn_cast(MD)) { - assert(StringMD->getString() == ContHelper::MDTypesVoidName && - "unknown string in types metadata"); - return ContArgTy(Type::getVoidTy(Context)); - } - if (const auto *PointerMD = dyn_cast(MD)) { - assert(PointerMD && PointerMD->getNumOperands() == 2 && - "invalid pointer metadata"); - - auto *AddressSpaceMD = - dyn_cast(PointerMD->getOperand(0)); - assert(AddressSpaceMD && "invalid address space metadata"); - auto *AddressSpace = dyn_cast(AddressSpaceMD->getValue()); - assert(AddressSpace && "invalid address space metadata"); - - if (const auto *ValueMD = - dyn_cast(PointerMD->getOperand(1))) { - Type *ElemTy = ValueMD->getType(); - Type *PtrTy = - ElemTy->getPointerTo((unsigned)AddressSpace->getZExtValue()); - return ContArgTy(PtrTy, ElemTy); - } - } - - assert(false && "unknown node type in types metadata"); - return ContArgTy(Type::getVoidTy(Context)); -} - -Type *ContArgTy::asType(LLVMContext &Context) { return ArgTy; } - -Type *ContArgTy::getPointerElementType() const { - assert(ElemTy && "cannot get element type of non-pointer"); - return ElemTy; -} - -bool ContArgTy::isPointerTy() const { return !!ElemTy; } - -bool ContArgTy::isVoidTy() const { return (!ArgTy || ArgTy->isVoidTy()); } - -Metadata *ContArgTy::getTypeMetadata(LLVMContext &Context) { - if (isVoidTy()) - return MDString::get(Context, ContHelper::MDTypesVoidName); - - if (!ElemTy) { - assert(ArgTy && !ArgTy->isPointerTy()); - return ConstantAsMetadata::get(PoisonValue::get(ArgTy)); - } - - assert(!ElemTy->isFunctionTy() && "cannot encode function pointers"); - - // Return !{, } for pointer - SmallVector MD; - MD.push_back(ConstantAsMetadata::get(ConstantInt::get( - Type::getInt32Ty(Context), ArgTy->getPointerAddressSpace()))); - MD.push_back(ConstantAsMetadata::get(PoisonValue::get(ElemTy))); - return MDTuple::get(Context, MD); -} - -ContFuncTy ContFuncTy::get(const Function *F) { - auto *TypesMD = F->getMetadata(ContHelper::MDTypesName); - assert(TypesMD); - - return get(TypesMD, F->getContext()); -} - -ContFuncTy ContFuncTy::get(const Metadata *MD, LLVMContext &Context) { - // Decode types metadata of the form { !"function", , - // , ... } - const MDNode *TypesMD = dyn_cast(MD); - if (!TypesMD) - report_fatal_error("Invalid metadata type for function."); - - assert(TypesMD->getNumOperands() >= 2 && "invalid function metadata"); - assert(isa(TypesMD->getOperand(0)) && - dyn_cast(TypesMD->getOperand(0))->getString() == - ContHelper::MDTypesFunctionName && - "metadata is not a function type"); - - ContFuncTy FuncTy; - for (unsigned OpNo = 1; OpNo < TypesMD->getNumOperands(); ++OpNo) { - Metadata *Arg = TypesMD->getOperand(OpNo); - FuncTy.ArgTys.push_back(ContArgTy::get(Arg, Context)); - } - // FIXME: do something more efficient - assert(FuncTy.ArgTys.size() >= 1); - FuncTy.ReturnTy = FuncTy.ArgTys[0]; - FuncTy.ArgTys.erase(FuncTy.ArgTys.begin()); - return FuncTy; -} - -FunctionType *ContFuncTy::asFunctionType(LLVMContext &Context) { - SmallVector FuncArgTys; - for (auto Arg : ArgTys) - FuncArgTys.push_back(Arg.asType(Context)); - return FunctionType::get(ReturnTy.asType(Context), FuncArgTys, false); -} - -void ContFuncTy::writeMetadata(Function *F) { - // Don't generate metadata if there are no pointers - if (!ReturnTy.isPointerTy() && - llvm::none_of(ArgTys, - [](const ContArgTy &Arg) { return Arg.isPointerTy(); })) - return; - - LLVMContext &Context = F->getContext(); - SmallVector SignatureMD; - - // Encode types metadata of the form { !"function", , - // , ... } - SignatureMD.push_back( - MDString::get(Context, ContHelper::MDTypesFunctionName)); - SignatureMD.push_back(ReturnTy.getTypeMetadata(Context)); - for (auto ArgTy : ArgTys) - SignatureMD.push_back(ArgTy.getTypeMetadata(Context)); - - assert(SignatureMD.size() >= 2 && "return type must be specified"); - F->setMetadata(ContHelper::MDTypesName, MDTuple::get(Context, SignatureMD)); -} - -static Metadata *getTypeMetadataEntry(unsigned TypeID, LLVMContext &Context, - GetTypeByIDTy GetTypeByID, - GetContainedTypeIDTy GetContainedTypeID); - -// Recursively look into a (pointer) type and build metadata description. -// For primitive types it's a poison value of the type, for a pointer it's a -// metadata tuple with the addrspace and the referenced type. For a function, -// it's a tuple where the first element is the string "function", the second -// element is the return type or the string "void" and the following elements -// are the argument types. -static Metadata * -getTypeMetadataEntryImpl(Type *Ty, unsigned TypeID, LLVMContext &Context, - GetTypeByIDTy GetTypeByID, - GetContainedTypeIDTy GetContainedTypeID) { - if (auto *FTy = dyn_cast(Ty)) { - // Don't generate metadata if there are no pointers - if (!FTy->getReturnType()->isPointerTy() && - llvm::none_of(FTy->params(), [](const Type *ParamTy) { - return ParamTy->isPointerTy(); - })) - return nullptr; - // Save the function signature as metadata - SmallVector SignatureMD; - SignatureMD.push_back( - MDString::get(Context, ContHelper::MDTypesFunctionName)); - // Return type - if (FTy->getReturnType()->isVoidTy()) { - SignatureMD.push_back( - MDString::get(Context, ContHelper::MDTypesVoidName)); - } else { - SignatureMD.push_back(getTypeMetadataEntry(GetContainedTypeID(TypeID, 0), - Context, GetTypeByID, - GetContainedTypeID)); - } - - // Arguments - for (unsigned I = 0; I != FTy->getNumParams(); ++I) { - SignatureMD.push_back( - getTypeMetadataEntry(GetContainedTypeID(TypeID, I + 1), Context, - GetTypeByID, GetContainedTypeID)); - } - - return MDTuple::get(Context, SignatureMD); - } - - if (!Ty->isPointerTy()) - return ConstantAsMetadata::get(PoisonValue::get(Ty)); - - // Return !{, } for pointer - SmallVector MD; - MD.push_back(ConstantAsMetadata::get(ConstantInt::get( - Type::getInt32Ty(Context), Ty->getPointerAddressSpace()))); - MD.push_back(getTypeMetadataEntry(GetContainedTypeID(TypeID, 0), Context, - GetTypeByID, GetContainedTypeID)); - return MDTuple::get(Context, MD); -} - -static Metadata *getTypeMetadataEntry(unsigned TypeID, LLVMContext &Context, - GetTypeByIDTy GetTypeByID, - GetContainedTypeIDTy GetContainedTypeID) { - auto *Ty = GetTypeByID(TypeID); - Metadata *MD = getTypeMetadataEntryImpl(Ty, TypeID, Context, GetTypeByID, - GetContainedTypeID); - if (!MD) - return nullptr; - - assert(((Ty->isFunctionTy() && - ContFuncTy::get(MD, Context).asFunctionType(Context) == Ty) || - (!Ty->isFunctionTy() && - ContArgTy::get(MD, Context).asType(Context) == Ty)) && - "MD Type mismatch"); - return MD; -} - -void DXILValueTypeMetadataCallback(Value *V, unsigned TypeID, - GetTypeByIDTy GetTypeByID, - GetContainedTypeIDTy GetContainedTypeID) { - if (auto *F = dyn_cast(V)) { - auto *MD = getTypeMetadataEntry(TypeID, F->getContext(), GetTypeByID, - GetContainedTypeID); - if (MD) - F->setMetadata(ContHelper::MDTypesName, llvm::cast(MD)); - } -} - -} // End namespace llvm diff --git a/llvmraytracing/plugin/Plugin.cpp b/llvmraytracing/plugin/Plugin.cpp index 12ad6a3b93..01f5d3c91b 100644 --- a/llvmraytracing/plugin/Plugin.cpp +++ b/llvmraytracing/plugin/Plugin.cpp @@ -10,8 +10,8 @@ * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * - * The above copyright notice and this permission notice shall be included in - *all copies or substantial portions of the Software. + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, @@ -36,14 +36,11 @@ // New PM registration llvm::PassPluginLibraryInfo getRaytracingPluginPluginInfo() { return {LLVM_PLUGIN_API_VERSION, "Raytracing", LLVM_VERSION_STRING, - [](llvm::PassBuilder &PB) { - llvm::ContHelper::RegisterPasses(PB, true); - }}; + [](llvm::PassBuilder &PB) { llvm::ContHelper::RegisterPasses(PB, true); }}; } #ifndef LLVM_RAYTRACINGPLUGIN_LINK_INTO_TOOLS -extern "C" LLVM_ATTRIBUTE_WEAK ::llvm::PassPluginLibraryInfo -llvmGetPassPluginInfo() { +extern "C" LLVM_ATTRIBUTE_WEAK ::llvm::PassPluginLibraryInfo llvmGetPassPluginInfo() { return getRaytracingPluginPluginInfo(); } #endif diff --git a/llvmraytracing/test/dx/cleanup-continuations-malloc.ll b/llvmraytracing/test/dx/cleanup-continuations-malloc.ll index cef999368a..87993ba422 100644 --- a/llvmraytracing/test/dx/cleanup-continuations-malloc.ll +++ b/llvmraytracing/test/dx/cleanup-continuations-malloc.ll @@ -18,7 +18,7 @@ define <4 x i32> @simple_await(i64 %dummyRet, <4 x i32> %arg) !continuation.regi ; CHECK-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 1 ; CHECK-NEXT: store i64 [[DUMMYRET]], ptr addrspace(32) [[RETURNADDR_SPILL_ADDR]], align 4 ; CHECK-NEXT: [[TMP0:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @simple_await.resume.0) -; CHECK-NEXT: call void (i64, ...) @continuation.continue(i64 ptrtoint (ptr @async_fun to i64), i64 [[TMP0]]), !continuation.registercount [[META1]], !continuation.returnedRegistercount [[META1]] +; CHECK-NEXT: call void (...) @lgc.cps.jump(i64 ptrtoint (ptr @async_fun to i64), i32 -1, {} poison, i64 [[TMP0]]), !continuation.registercount [[META1]], !continuation.returnedRegistercount [[META1]] ; CHECK-NEXT: unreachable ; %tok = call %continuation.token* @async_fun(), !continuation.registercount !1, !continuation.returnedRegistercount !1 @@ -36,7 +36,7 @@ define void @simple_await_entry(i64 %dummyRet, <4 x i32> %arg, <4 x i32> addrspa ; CHECK-NEXT: [[ARG_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_ENTRY_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 ; CHECK-NEXT: store <4 x i32> [[ARG]], ptr addrspace(32) [[ARG_SPILL_ADDR]], align 4 ; CHECK-NEXT: [[TMP0:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @simple_await_entry.resume.0) -; CHECK-NEXT: call void (i64, ...) @continuation.continue(i64 ptrtoint (ptr @async_fun to i64), i64 [[TMP0]]), !continuation.registercount [[META1]], !continuation.returnedRegistercount [[META1]] +; CHECK-NEXT: call void (...) @lgc.cps.jump(i64 ptrtoint (ptr @async_fun to i64), i32 -1, {} poison, i64 [[TMP0]]), !continuation.registercount [[META1]], !continuation.returnedRegistercount [[META1]] ; CHECK-NEXT: unreachable ; %tok = call %continuation.token* @async_fun(), !continuation.registercount !1, !continuation.returnedRegistercount !1 diff --git a/llvmraytracing/test/dx/cleanup-continuations.ll b/llvmraytracing/test/dx/cleanup-continuations.ll index 6f61f71de3..838de9210f 100644 --- a/llvmraytracing/test/dx/cleanup-continuations.ll +++ b/llvmraytracing/test/dx/cleanup-continuations.ll @@ -7,6 +7,7 @@ target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16: %await_with_ret_value.Frame = type { i64 } %simple_await.Frame = type { i64 } %simple_await_entry.Frame = type { } +%phi_of_cont_state.Frame = type { i32, i32 } declare %continuation.token* @async_fun() declare i32 @lgc.ilcps.getReturnValue__i32() #0 @@ -21,7 +22,7 @@ define { i8*, %continuation.token* } @simple_await(i64 %dummyRet, i8* %0) !conti ; CHECK-NEXT: [[DOTSPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME:%.*]], ptr addrspace(32) [[FRAMEPTR]], i32 0, i32 0 ; CHECK-NEXT: store i64 -1, ptr addrspace(32) [[DOTSPILL_ADDR]], align 4 ; CHECK-NEXT: [[TMP0:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @simple_await.resume.0) -; CHECK-NEXT: call void (i64, ...) @continuation.continue(i64 ptrtoint (ptr @async_fun to i64), i64 [[TMP0]]), !continuation.registercount [[META2]], !continuation.returnedRegistercount [[META2]] +; CHECK-NEXT: call void (...) @lgc.cps.jump(i64 ptrtoint (ptr @async_fun to i64), i32 -1, {} poison, i64 [[TMP0]]), !continuation.registercount [[META2]], !continuation.returnedRegistercount [[META2]] ; CHECK-NEXT: unreachable ; AllocaSpillBB: @@ -43,7 +44,7 @@ define internal { i8*, %continuation.token* } @simple_await.resume.0(i8* noalias ; CHECK-NEXT: [[DOTRELOAD_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME:%.*]], ptr addrspace(32) [[FRAMEPTR]], i32 0, i32 0 ; CHECK-NEXT: [[DOTRELOAD:%.*]] = load i64, ptr addrspace(32) [[DOTRELOAD_ADDR]], align 4 ; CHECK-NEXT: call void @lgc.cps.free(i32 8) -; CHECK-NEXT: call void (i64, ...) @continuation.continue(i64 [[DOTRELOAD]], i64 poison, i64 undef), !continuation.registercount [[META2]] +; CHECK-NEXT: call void (...) @lgc.ilcps.continue(i64 [[DOTRELOAD]], i32 poison, i64 poison, i64 undef), !continuation.registercount [[META2]] ; CHECK-NEXT: unreachable ; entryresume.0: @@ -62,7 +63,7 @@ define { i8*, %continuation.token* } @simple_await_entry(i64 %dummyRet, i8* %0) ; CHECK-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) ; CHECK-NEXT: [[FRAMEPTR:%.*]] = bitcast ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]] to ptr addrspace(32) ; CHECK-NEXT: [[TMP0:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @simple_await_entry.resume.0) -; CHECK-NEXT: call void (i64, ...) @continuation.continue(i64 ptrtoint (ptr @async_fun to i64), i64 [[TMP0]]), !continuation.registercount [[META2]], !continuation.returnedRegistercount [[META2]] +; CHECK-NEXT: call void (...) @lgc.cps.jump(i64 ptrtoint (ptr @async_fun to i64), i32 -1, {} poison, i64 [[TMP0]]), !continuation.registercount [[META2]], !continuation.returnedRegistercount [[META2]] ; CHECK-NEXT: unreachable ; AllocaSpillBB: @@ -101,7 +102,7 @@ define { i8*, %continuation.token* } @await_with_ret_value(i64 %dummyRet, i8* %0 ; CHECK-NEXT: [[DOTSPILL_ADDR:%.*]] = getelementptr inbounds [[AWAIT_WITH_RET_VALUE_FRAME:%.*]], ptr addrspace(32) [[FRAMEPTR]], i32 0, i32 0 ; CHECK-NEXT: store i64 -1, ptr addrspace(32) [[DOTSPILL_ADDR]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @await_with_ret_value.resume.0) -; CHECK-NEXT: call void (i64, ...) @continuation.continue(i64 ptrtoint (ptr @async_fun to i64), i64 [[TMP1]]), !continuation.registercount [[META2]], !continuation.returnedRegistercount [[META2]] +; CHECK-NEXT: call void (...) @lgc.cps.jump(i64 ptrtoint (ptr @async_fun to i64), i32 -1, {} poison, i64 [[TMP1]]), !continuation.registercount [[META2]], !continuation.returnedRegistercount [[META2]] ; CHECK-NEXT: unreachable ; %FramePtr = bitcast i8* %0 to %await_with_ret_value.Frame* @@ -121,7 +122,7 @@ define internal { i8*, %continuation.token* } @await_with_ret_value.resume.0(i8* ; CHECK-NEXT: [[DOTRELOAD_ADDR:%.*]] = getelementptr inbounds [[AWAIT_WITH_RET_VALUE_FRAME:%.*]], ptr addrspace(32) [[FRAMEPTR]], i32 0, i32 0 ; CHECK-NEXT: [[DOTRELOAD:%.*]] = load i64, ptr addrspace(32) [[DOTRELOAD_ADDR]], align 4 ; CHECK-NEXT: call void @lgc.cps.free(i32 8) -; CHECK-NEXT: call void (i64, ...) @continuation.continue(i64 [[DOTRELOAD]], i64 poison, i32 [[RES1]], i64 undef), !continuation.registercount [[META2]] +; CHECK-NEXT: call void (...) @lgc.ilcps.continue(i64 [[DOTRELOAD]], i32 poison, i64 poison, i32 [[RES1]], i64 undef), !continuation.registercount [[META2]] ; CHECK-NEXT: unreachable ; %FramePtr = bitcast i8* %0 to %await_with_ret_value.Frame* @@ -133,6 +134,86 @@ define internal { i8*, %continuation.token* } @await_with_ret_value.resume.0(i8* unreachable } +; unreachables in their own block added by switch case statements should be ignored +define { i8*, %continuation.token* } @switch_case_unreachable(i64 %dummyRet, i8* %0) !continuation !6 !continuation.registercount !4 { +; CHECK-LABEL: define void @switch_case_unreachable( +; CHECK-SAME: i64 [[DUMMYRET:%.*]]) !continuation [[META7:![0-9]+]] !continuation.registercount [[META2]] !continuation.stacksize [[META3]] !continuation.state [[META3]] { +; CHECK-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) +; CHECK-NEXT: [[FRAMEPTR:%.*]] = bitcast ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]] to ptr addrspace(32) +; CHECK-NEXT: [[DOTSPILL_ADDR:%.*]] = getelementptr inbounds [[AWAIT_WITH_RET_VALUE_FRAME:%.*]], ptr addrspace(32) [[FRAMEPTR]], i32 0, i32 0 +; CHECK-NEXT: store i64 -1, ptr addrspace(32) [[DOTSPILL_ADDR]], align 4 +; CHECK-NEXT: [[VAL:%.*]] = urem i64 [[DUMMYRET]], 2 +; CHECK-NEXT: switch i64 [[VAL]], label [[UNREACHABLE:%.*]] [ +; CHECK-NEXT: i64 0, label [[A:%.*]] +; CHECK-NEXT: i64 1, label [[B:%.*]] +; CHECK-NEXT: ] +; CHECK: unreachable: +; CHECK-NEXT: unreachable +; CHECK: b: +; CHECK-NEXT: br label [[A]] +; CHECK: a: +; CHECK-NEXT: call void @lgc.cps.free(i32 8) +; CHECK-NEXT: call void (...) @lgc.ilcps.continue(i64 [[DUMMYRET]], i32 poison, i64 poison, i32 5, i64 undef), !continuation.registercount [[META2]] +; CHECK-NEXT: unreachable +; + %FramePtr = bitcast i8* %0 to %await_with_ret_value.Frame* + %.spill.addr = getelementptr inbounds %await_with_ret_value.Frame, %await_with_ret_value.Frame* %FramePtr, i32 0, i32 0 + store i64 -1, i64* %.spill.addr, align 4 + %val = urem i64 %dummyRet, 2 + switch i64 %val, label %unreachable [ + i64 0, label %a + i64 1, label %b + ] + +unreachable: + unreachable + +b: + br label %a + +a: + call void (i64, ...) @lgc.ilcps.return(i64 %dummyRet, i32 5, i64 undef), !continuation.registercount !4 + unreachable +} + +; Check that phis on the continuation state compile +define { i8*, %continuation.token* } @phi_of_cont_state(i64 %dummyRet, ptr %FramePtr) !continuation !7 !continuation.registercount !4 { +; CHECK-LABEL: define void @phi_of_cont_state( +; CHECK-SAME: i64 [[DUMMYRET:%.*]]) !continuation [[META8:![0-9]+]] !continuation.registercount [[META2]] !continuation.stacksize [[META3]] !continuation.state [[META3]] { +; CHECK-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) +; CHECK-NEXT: [[COND:%.*]] = trunc i64 [[DUMMYRET]] to i1 +; CHECK-NEXT: br i1 [[COND]], label [[LA:%.*]], label [[LB:%.*]] +; CHECK: la: +; CHECK-NEXT: [[A:%.*]] = getelementptr inbounds [[PHI_OF_CONT_STATE_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 +; CHECK-NEXT: br label [[END:%.*]] +; CHECK: lb: +; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds [[PHI_OF_CONT_STATE_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 1 +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: [[C:%.*]] = phi ptr addrspace(32) [ [[A]], [[LA]] ], [ [[B]], [[LB]] ] +; CHECK-NEXT: store i64 -1, ptr addrspace(32) [[C]], align 4 +; CHECK-NEXT: call void @lgc.cps.free(i32 8) +; CHECK-NEXT: call void (...) @lgc.ilcps.continue(i64 [[DUMMYRET]], i32 poison, i64 poison, i32 5, i64 undef), !continuation.registercount [[META2]] +; CHECK-NEXT: unreachable +; + %cond = trunc i64 %dummyRet to i1 + br i1 %cond, label %la, label %lb + +la: + %a = getelementptr inbounds %phi_of_cont_state.Frame, ptr %FramePtr, i32 0, i32 0 + br label %end + +lb: + %b = getelementptr inbounds %phi_of_cont_state.Frame, ptr %FramePtr, i32 0, i32 1 + br label %end + +end: + %c = phi ptr [ %a, %la ], [ %b, %lb ] + store i64 -1, ptr %c, align 4 + call void (i64, ...) @lgc.ilcps.return(i64 %dummyRet, i32 5, i64 undef), !continuation.registercount !4 + unreachable +} + attributes #0 = { nounwind } !continuation.stackAddrspace = !{!5} @@ -143,12 +224,15 @@ attributes #0 = { nounwind } !3 = !{{ i8*, %continuation.token* } (i8*)* @simple_await_entry} !4 = !{i32 0} !5 = !{i32 21} +!6 = !{{ i8*, %continuation.token* } (i8*)* @switch_case_unreachable} +!7 = !{{ i8*, %continuation.token* } (i8*)* @phi_of_cont_state} ;. ; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind } -; CHECK: attributes #[[ATTR1:[0-9]+]] = { noreturn } -; CHECK: attributes #[[ATTR2:[0-9]+]] = { nounwind willreturn memory(inaccessiblemem: readwrite) } -; CHECK: attributes #[[ATTR3:[0-9]+]] = { nounwind willreturn } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { nounwind willreturn memory(inaccessiblemem: readwrite) } +; CHECK: attributes #[[ATTR2:[0-9]+]] = { nounwind willreturn } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { noreturn } ; CHECK: attributes #[[ATTR4:[0-9]+]] = { nounwind willreturn memory(inaccessiblemem: read) } +; CHECK: attributes #[[ATTR5:[0-9]+]] = { noreturn nounwind } ;. ; CHECK: [[META0:![0-9]+]] = !{i32 21} ; CHECK: [[META1]] = !{ptr @simple_await} @@ -157,4 +241,6 @@ attributes #0 = { nounwind } ; CHECK: [[META4]] = !{ptr @simple_await_entry} ; CHECK: [[META5]] = !{} ; CHECK: [[META6]] = !{ptr @await_with_ret_value} +; CHECK: [[META7]] = !{ptr @switch_case_unreachable} +; CHECK: [[META8]] = !{ptr @phi_of_cont_state} ;. diff --git a/llvmraytracing/test/dx/closest-hit-procedural.ll b/llvmraytracing/test/dx/closest-hit-procedural.ll index efbe700e7b..d719f40f40 100644 --- a/llvmraytracing/test/dx/closest-hit-procedural.ll +++ b/llvmraytracing/test/dx/closest-hit-procedural.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 -; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,remove-types-metadata' -S %s --lint-abort-on-error | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE %s -; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' -S %s --lint-abort-on-error | FileCheck -check-prefix=DXILCONTPOSTPROCESS %s +; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,continuations-lint,remove-types-metadata' -S %s --lint-abort-on-error | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE %s +; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,dxil-cont-post-process,lint,continuations-lint,remove-types-metadata' -S %s --lint-abort-on-error | FileCheck -check-prefix=DXILCONTPOSTPROCESS %s ; Check a procedural closest hit shader with hit attributes that does not fit into system data alone @@ -26,23 +26,21 @@ declare i64 @_cont_GetTraversalAddr() #0 declare i32 @_cont_GetContinuationStackAddr() #0 -declare !types !15 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData*) #0 +declare !pointeetys !15 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData*) #0 -declare !types !17 void @_cont_SetTriangleHitAttributes(%struct.SystemData*, %struct.BuiltInTriangleIntersectionAttributes) #0 +declare !pointeetys !17 void @_cont_SetTriangleHitAttributes(%struct.SystemData*, %struct.BuiltInTriangleIntersectionAttributes) #0 -declare !types !18 i1 @_cont_IsEndSearch(%struct.TraversalData*) #0 +declare !pointeetys !18 i1 @_cont_IsEndSearch(%struct.TraversalData*) #0 declare %struct.DispatchSystemData @_cont_Traversal(%struct.TraversalData) #0 -declare %struct.DispatchSystemData @_cont_SetupRayGen() #0 - declare %struct.AnyHitTraversalData @_AmdAwaitAnyHit(i64, %struct.AnyHitTraversalData, float, i32) #0 -declare !types !20 %struct.HitData @_cont_GetCandidateState(%struct.AnyHitTraversalData*) #0 +declare !pointeetys !20 %struct.HitData @_cont_GetCandidateState(%struct.AnyHitTraversalData*) #0 -declare !types !22 %struct.HitData @_cont_GetCommittedState(%struct.SystemData*) #0 +declare !pointeetys !22 %struct.HitData @_cont_GetCommittedState(%struct.SystemData*) #0 -define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) #0 !types !23 { +define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) #0 !pointeetys !23 { ; LOWERRAYTRACINGPIPELINE-LABEL: define i32 @_cont_GetLocalRootIndex( ; LOWERRAYTRACINGPIPELINE-SAME: ptr [[DATA:%.*]]) #[[ATTR0:[0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: ret i32 5 @@ -54,7 +52,7 @@ define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) #0 !types ret i32 5 } -define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, float %6, float %7, float %8, float %9, float %10, float %11, float %12, float %13) #0 !types !25 { +define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, float %6, float %7, float %8, float %9, float %10, float %11, float %12, float %13) #0 !pointeetys !25 { %dis_data = load %struct.DispatchSystemData, %struct.DispatchSystemData* %data, align 4 %sys_data = insertvalue %struct.SystemData undef, %struct.DispatchSystemData %dis_data, 0 %trav_data = insertvalue %struct.TraversalData undef, %struct.SystemData %sys_data, 0 @@ -63,7 +61,7 @@ define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i ret void } -define i1 @_cont_ReportHit(%struct.AnyHitTraversalData* %data, float %t, i32 %hitKind) #0 !types !26 { +define i1 @_cont_ReportHit(%struct.AnyHitTraversalData* %data, float %t, i32 %hitKind) #0 !pointeetys !26 { %trav_data = load %struct.AnyHitTraversalData, %struct.AnyHitTraversalData* %data, align 4 %newdata = call %struct.AnyHitTraversalData @_AmdAwaitAnyHit(i64 3, %struct.AnyHitTraversalData %trav_data, float %t, i32 %hitKind) store %struct.AnyHitTraversalData %newdata, %struct.AnyHitTraversalData* %data, align 4 @@ -71,144 +69,165 @@ define i1 @_cont_ReportHit(%struct.AnyHitTraversalData* %data, float %t, i32 %hi } ; Function Attrs: nounwind memory(none) -declare !types !27 i32 @_cont_DispatchRaysIndex(%struct.DispatchSystemData* nocapture readnone, i32) #1 +declare !pointeetys !27 i32 @_cont_DispatchRaysIndex(%struct.DispatchSystemData* nocapture readnone, i32) #1 ; Function Attrs: nounwind memory(none) -declare !types !27 i32 @_cont_DispatchRaysDimensions(%struct.DispatchSystemData* nocapture readnone, i32) #1 +declare !pointeetys !27 i32 @_cont_DispatchRaysDimensions(%struct.DispatchSystemData* nocapture readnone, i32) #1 ; Function Attrs: nounwind memory(none) -declare !types !28 float @_cont_WorldRayOrigin(%struct.DispatchSystemData* nocapture readnone, i32) #1 +declare !pointeetys !28 float @_cont_WorldRayOrigin(%struct.DispatchSystemData* nocapture readnone, i32) #1 ; Function Attrs: nounwind memory(none) -declare !types !28 float @_cont_WorldRayDirection(%struct.DispatchSystemData* nocapture readnone, i32) #1 +declare !pointeetys !28 float @_cont_WorldRayDirection(%struct.DispatchSystemData* nocapture readnone, i32) #1 ; Function Attrs: nounwind memory(none) -declare !types !29 float @_cont_RayTMin(%struct.DispatchSystemData* nocapture readnone) #1 +declare !pointeetys !29 float @_cont_RayTMin(%struct.DispatchSystemData* nocapture readnone) #1 ; Function Attrs: nounwind memory(read) -declare !types !30 float @_cont_RayTCurrent(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #2 +declare !pointeetys !30 float @_cont_RayTCurrent(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #2 ; Function Attrs: nounwind memory(none) -declare !types !23 i32 @_cont_RayFlags(%struct.DispatchSystemData* nocapture readnone) #1 +declare !pointeetys !23 i32 @_cont_RayFlags(%struct.DispatchSystemData* nocapture readnone) #1 ; Function Attrs: nounwind memory(none) -declare !types !32 i32 @_cont_InstanceIndex(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #1 +declare !pointeetys !32 i32 @_cont_InstanceIndex(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #1 ; Function Attrs: nounwind memory(none) -declare !types !32 i32 @_cont_InstanceID(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #1 +declare !pointeetys !32 i32 @_cont_InstanceID(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #1 ; Function Attrs: nounwind memory(none) -declare !types !32 i32 @_cont_PrimitiveIndex(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #1 +declare !pointeetys !32 i32 @_cont_PrimitiveIndex(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #1 ; Function Attrs: nounwind memory(none) -declare !types !33 float @_cont_ObjectRayOrigin(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*, i32) #1 +declare !pointeetys !33 float @_cont_ObjectRayOrigin(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*, i32) #1 ; Function Attrs: nounwind memory(none) -declare !types !33 float @_cont_ObjectRayDirection(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*, i32) #1 +declare !pointeetys !33 float @_cont_ObjectRayDirection(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*, i32) #1 ; Function Attrs: nounwind memory(none) -declare !types !34 float @_cont_ObjectToWorld(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*, i32, i32) #1 +declare !pointeetys !34 float @_cont_ObjectToWorld(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*, i32, i32) #1 ; Function Attrs: nounwind memory(none) -declare !types !34 float @_cont_WorldToObject(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*, i32, i32) #1 +declare !pointeetys !34 float @_cont_WorldToObject(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*, i32, i32) #1 ; Function Attrs: nounwind memory(none) -declare !types !35 i32 @_cont_HitKind(%struct.SystemData* nocapture readnone, %struct.HitData*) #1 +declare !pointeetys !35 i32 @_cont_HitKind(%struct.SystemData* nocapture readnone, %struct.HitData*) #1 ; Function Attrs: nounwind -define void @ClosestHit(%struct.RayPayload* noalias nocapture %payload, %struct.HitAttributes* nocapture readonly %attr) #3 !types !36 { +define void @ClosestHit(%struct.RayPayload* noalias nocapture %payload, %struct.HitAttributes* nocapture readonly %attr) #3 !pointeetys !36 { ; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.DispatchSystemData @ClosestHit( -; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR4:[0-9]+]] !lgc.rt.shaderstage [[META19:![0-9]+]] !continuation [[META20:![0-9]+]] !continuation.registercount [[META16:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]], [19 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) #[[ATTR4:[0-9]+]] !lgc.rt.shaderstage [[META19:![0-9]+]] !continuation [[META20:![0-9]+]] !continuation.registercount [[META16:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [10 x i32], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[HITATTRS:%.*]] = alloca [[STRUCT_HITATTRIBUTES:%.*]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: store [10 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_SYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP3]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP8]], ptr [[TMP5]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP11]], ptr [[TMP9]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP7]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP10]], ptr [[TMP9]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP11]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP18]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[SYSTEM_DATA_ALLOCA]]) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP16]], ptr [[TMP2]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP17]], ptr [[HITATTRS]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_2_I32S:%.*]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 0, i32 0, i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP2]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP22]], ptr [[HITATTRS]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[HITATTRS]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP22]], ptr [[TMP20]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP21]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP24]], ptr [[TMP20]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[HITATTRS]], i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_2_I32S:%.*]], ptr addrspace(20) @PAYLOAD, i32 0, i32 0, i32 1), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP19]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP30]], ptr [[TMP23]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[HITATTRS]], i32 3 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_2_I32S]], ptr addrspace(20) @PAYLOAD, i32 0, i32 0, i32 2), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP28]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP3]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP27]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP30]], ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP27]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP33]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP31]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP33]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP31]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP35]], ptr [[TMP29]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[TMP29]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[TMP31]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP35]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP42:%.*]] = load i32, ptr [[TMP34]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP42]], ptr [[TMP32]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, ptr [[TMP29]], i32 2 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, ptr [[TMP31]], i32 2 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP37]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP37]], ptr [[TMP41]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP39:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP38]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP39]]), !continuation.registercount [[META16]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP40:%.*]] = load [10 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP39]], [19 x i32] poison, [10 x i32] [[TMP40]]), !continuation.registercount [[META16]] ; LOWERRAYTRACINGPIPELINE-NEXT: unreachable ; ; DXILCONTPOSTPROCESS-LABEL: define void @ClosestHit( -; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] !lgc.rt.shaderstage [[META18:![0-9]+]] !continuation [[META19:![0-9]+]] !continuation.registercount [[META16:![0-9]+]] !continuation.state [[META14:![0-9]+]] { +; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]], [19 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) #[[ATTR1:[0-9]+]] !lgc.rt.shaderstage [[META18:![0-9]+]] !continuation [[META19:![0-9]+]] { ; DXILCONTPOSTPROCESS-NEXT: AllocaSpillBB: ; DXILCONTPOSTPROCESS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 ; DXILCONTPOSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; DXILCONTPOSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 1 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 2 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 3 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 4 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 6 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 7 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 8 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 9 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 0, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 ; DXILCONTPOSTPROCESS-NEXT: store <3 x i32> [[DOTFCA_0_0_EXTRACT]], ptr [[DOTFCA_0_0_GEP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[SYSTEM_DATA_ALLOCA]]) -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP6]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_02_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = bitcast float [[DOTSROA_02_0_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_02_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 -; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = bitcast float [[DOTSROA_02_4_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_2_I32S:%.*]], ptr addrspace(20) @REGISTERS, i32 0, i32 0, i32 1), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds ([[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_3_CLOSESTHIT_IN_PAYLOAD_ATTR_2_I32S]], ptr addrspace(20) @REGISTERS, i32 0, i32 0, i32 2), align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[SYSTEM_DATA_ALLOCA]]) +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP2]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_09_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = bitcast float [[DOTSROA_09_0_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_09_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 +; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = bitcast float [[DOTSROA_09_4_VEC_EXTRACT]] to i32 ; DXILCONTPOSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP2]], ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP3]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP4]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP5]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP11]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP5]], i32 0, i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_LOAD:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_GEP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_LOAD]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP12]], i64 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META16]] +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT7:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_LOAD]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [10 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR]], i32 [[TMP6]], i64 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT7]], [19 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]) ; DXILCONTPOSTPROCESS-NEXT: unreachable ; ret void } ; Function Attrs: nounwind memory(read) -declare !types !39 void @dx.op.traceRay.struct.RayPayload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.RayPayload*) #2 +declare !pointeetys !39 void @dx.op.traceRay.struct.RayPayload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.RayPayload*) #2 ; Function Attrs: nounwind memory(none) declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #1 @@ -216,10 +235,10 @@ declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types. declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) -declare !types !40 void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #4 +declare !pointeetys !40 void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #4 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) -declare !types !40 void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #4 +declare !pointeetys !40 void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #4 attributes #0 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="0" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind memory(none) } @@ -250,30 +269,30 @@ attributes #4 = { nocallback nofree nosync nounwind willreturn memory(argmem: re !12 = !{void (%struct.RayPayload*, %struct.HitAttributes*)* @ClosestHit, !"ClosestHit", null, null, !13} !13 = !{i32 8, i32 10, i32 5, !14} !14 = !{i32 0} -!15 = !{!"function", %struct.BuiltInTriangleIntersectionAttributes poison, !16} +!15 = !{%struct.SystemData poison} !16 = !{i32 0, %struct.SystemData poison} -!17 = !{!"function", !"void", !16, %struct.BuiltInTriangleIntersectionAttributes poison} -!18 = !{!"function", i1 poison, !19} +!17 = !{%struct.SystemData poison} +!18 = !{%struct.TraversalData poison} !19 = !{i32 0, %struct.TraversalData poison} -!20 = !{!"function", %struct.HitData poison, !21} +!20 = !{%struct.AnyHitTraversalData poison} !21 = !{i32 0, %struct.AnyHitTraversalData poison} -!22 = !{!"function", %struct.HitData poison, !16} -!23 = !{!"function", i32 poison, !24} +!22 = !{%struct.SystemData poison} +!23 = !{%struct.DispatchSystemData poison} !24 = !{i32 0, %struct.DispatchSystemData poison} -!25 = !{!"function", !"void", !24, i64 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison} -!26 = !{!"function", i1 poison, !21, float poison, i32 poison} -!27 = !{!"function", i32 poison, !24, i32 poison} -!28 = !{!"function", float poison, !24, i32 poison} -!29 = !{!"function", float poison, !24} -!30 = !{!"function", float poison, !24, !31} +!25 = !{%struct.DispatchSystemData poison} +!26 = !{%struct.AnyHitTraversalData poison} +!27 = !{%struct.DispatchSystemData poison} +!28 = !{%struct.DispatchSystemData poison} +!29 = !{%struct.DispatchSystemData poison} +!30 = !{null, %struct.DispatchSystemData poison, %struct.HitData poison} !31 = !{i32 0, %struct.HitData poison} -!32 = !{!"function", i32 poison, !24, !31} -!33 = !{!"function", float poison, !24, !31, i32 poison} -!34 = !{!"function", float poison, !24, !31, i32 poison, i32 poison} -!35 = !{!"function", i32 poison, !16, !31} -!36 = !{!"function", !"void", !37, !38} +!32 = !{null, %struct.DispatchSystemData poison, %struct.HitData poison} +!33 = !{null, %struct.DispatchSystemData poison, %struct.HitData poison} +!34 = !{null, %struct.DispatchSystemData poison, %struct.HitData poison} +!35 = !{null, %struct.SystemData poison, %struct.HitData poison} +!36 = !{null, %struct.RayPayload poison, %struct.HitAttributes poison} !37 = !{i32 0, %struct.RayPayload poison} !38 = !{i32 0, %struct.HitAttributes poison} -!39 = !{!"function", !"void", i32 poison, %dx.types.Handle poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, !37} -!40 = !{!"function", !"void", i64 poison, !41} +!39 = !{%struct.RayPayload poison} +!40 = !{i8 poison} !41 = !{i32 0, i8 poison} diff --git a/llvmraytracing/test/dx/closest-hit-traceray.ll b/llvmraytracing/test/dx/closest-hit-traceray.ll index 0b161b949d..b31ab7befd 100644 --- a/llvmraytracing/test/dx/closest-hit-traceray.ll +++ b/llvmraytracing/test/dx/closest-hit-traceray.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 -; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,remove-types-metadata' -S %s --lint-abort-on-error | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE %s -; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' -S %s --lint-abort-on-error | FileCheck -check-prefix=DXILCONTPOSTPROCESS %s +; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,continuations-lint,remove-types-metadata' -S %s --lint-abort-on-error | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE %s +; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,dxil-cont-post-process,lint,continuations-lint,remove-types-metadata' -S %s --lint-abort-on-error | FileCheck -check-prefix=DXILCONTPOSTPROCESS %s target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:16-i32:32-i64:32-f16:16-f32:32-f64:32-v8:8-v16:16-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" @@ -23,23 +23,21 @@ declare i64 @_cont_GetTraversalAddr() #0 declare i32 @_cont_GetContinuationStackAddr() #0 -declare !types !15 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData*) #0 +declare !pointeetys !15 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData*) #0 -declare !types !17 void @_cont_SetTriangleHitAttributes(%struct.SystemData*, %struct.BuiltInTriangleIntersectionAttributes) #0 +declare !pointeetys !17 void @_cont_SetTriangleHitAttributes(%struct.SystemData*, %struct.BuiltInTriangleIntersectionAttributes) #0 -declare !types !18 i1 @_cont_IsEndSearch(%struct.TraversalData*) #0 +declare !pointeetys !18 i1 @_cont_IsEndSearch(%struct.TraversalData*) #0 declare %struct.DispatchSystemData @amd.dx.Traversal(%struct.TraversalData) #0 -declare %struct.DispatchSystemData @_cont_SetupRayGen() #0 - declare %struct.AnyHitTraversalData @_AmdAwaitAnyHit(i64, %struct.AnyHitTraversalData, float, i32) #0 -declare !types !20 %struct.HitData @_cont_GetCandidateState(%struct.AnyHitTraversalData*) #0 +declare !pointeetys !20 %struct.HitData @_cont_GetCandidateState(%struct.AnyHitTraversalData*) #0 -declare !types !22 %struct.HitData @_cont_GetCommittedState(%struct.SystemData*) #0 +declare !pointeetys !22 %struct.HitData @_cont_GetCommittedState(%struct.SystemData*) #0 -define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) #0 !types !23 { +define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) #0 !pointeetys !23 { ; LOWERRAYTRACINGPIPELINE-LABEL: define i32 @_cont_GetLocalRootIndex( ; LOWERRAYTRACINGPIPELINE-SAME: ptr [[DATA:%.*]]) #[[ATTR0:[0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: ret i32 5 @@ -51,7 +49,7 @@ define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) #0 !types ret i32 5 } -define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, float %6, float %7, float %8, float %9, float %10, float %11, float %12, float %13) #0 !types !25 { +define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, float %6, float %7, float %8, float %9, float %10, float %11, float %12, float %13) #0 !pointeetys !25 { %dis_data = load %struct.DispatchSystemData, %struct.DispatchSystemData* %data, align 4 %sys_data = insertvalue %struct.SystemData undef, %struct.DispatchSystemData %dis_data, 0 %trav_data = insertvalue %struct.TraversalData undef, %struct.SystemData %sys_data, 0 @@ -60,7 +58,7 @@ define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i ret void } -define i1 @_cont_ReportHit(%struct.AnyHitTraversalData* %data, float %t, i32 %hitKind) #0 !types !26 { +define i1 @_cont_ReportHit(%struct.AnyHitTraversalData* %data, float %t, i32 %hitKind) #0 !pointeetys !26 { %trav_data = load %struct.AnyHitTraversalData, %struct.AnyHitTraversalData* %data, align 4 %newdata = call %struct.AnyHitTraversalData @_AmdAwaitAnyHit(i64 3, %struct.AnyHitTraversalData %trav_data, float %t, i32 %hitKind) store %struct.AnyHitTraversalData %newdata, %struct.AnyHitTraversalData* %data, align 4 @@ -68,71 +66,76 @@ define i1 @_cont_ReportHit(%struct.AnyHitTraversalData* %data, float %t, i32 %hi } ; Function Attrs: nounwind memory(none) -declare !types !27 i32 @_cont_DispatchRaysIndex(%struct.DispatchSystemData* nocapture readnone, i32) #1 +declare !pointeetys !27 i32 @_cont_DispatchRaysIndex(%struct.DispatchSystemData* nocapture readnone, i32) #1 ; Function Attrs: nounwind memory(none) -declare !types !27 i32 @_cont_DispatchRaysDimensions(%struct.DispatchSystemData* nocapture readnone, i32) #1 +declare !pointeetys !27 i32 @_cont_DispatchRaysDimensions(%struct.DispatchSystemData* nocapture readnone, i32) #1 ; Function Attrs: nounwind memory(none) -declare !types !28 float @_cont_WorldRayOrigin(%struct.DispatchSystemData* nocapture readnone, i32) #1 +declare !pointeetys !28 float @_cont_WorldRayOrigin(%struct.DispatchSystemData* nocapture readnone, i32) #1 ; Function Attrs: nounwind memory(none) -declare !types !28 float @_cont_WorldRayDirection(%struct.DispatchSystemData* nocapture readnone, i32) #1 +declare !pointeetys !28 float @_cont_WorldRayDirection(%struct.DispatchSystemData* nocapture readnone, i32) #1 ; Function Attrs: nounwind memory(none) -declare !types !29 float @_cont_RayTMin(%struct.DispatchSystemData* nocapture readnone) #1 +declare !pointeetys !29 float @_cont_RayTMin(%struct.DispatchSystemData* nocapture readnone) #1 ; Function Attrs: nounwind memory(read) -declare !types !30 float @_cont_RayTCurrent(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #2 +declare !pointeetys !30 float @_cont_RayTCurrent(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #2 ; Function Attrs: nounwind memory(none) -declare !types !23 i32 @_cont_RayFlags(%struct.DispatchSystemData* nocapture readnone) #1 +declare !pointeetys !23 i32 @_cont_RayFlags(%struct.DispatchSystemData* nocapture readnone) #1 ; Function Attrs: nounwind memory(none) -declare !types !32 i32 @_cont_InstanceIndex(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #1 +declare !pointeetys !32 i32 @_cont_InstanceIndex(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #1 ; Function Attrs: nounwind memory(none) -declare !types !32 i32 @_cont_InstanceID(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #1 +declare !pointeetys !32 i32 @_cont_InstanceID(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #1 ; Function Attrs: nounwind memory(none) -declare !types !32 i32 @_cont_PrimitiveIndex(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #1 +declare !pointeetys !32 i32 @_cont_PrimitiveIndex(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #1 ; Function Attrs: nounwind memory(none) -declare !types !33 float @_cont_ObjectRayOrigin(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*, i32) #1 +declare !pointeetys !33 float @_cont_ObjectRayOrigin(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*, i32) #1 ; Function Attrs: nounwind memory(none) -declare !types !33 float @_cont_ObjectRayDirection(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*, i32) #1 +declare !pointeetys !33 float @_cont_ObjectRayDirection(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*, i32) #1 ; Function Attrs: nounwind memory(none) -declare !types !34 float @_cont_ObjectToWorld(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*, i32, i32) #1 +declare !pointeetys !34 float @_cont_ObjectToWorld(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*, i32, i32) #1 ; Function Attrs: nounwind memory(none) -declare !types !34 float @_cont_WorldToObject(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*, i32, i32) #1 +declare !pointeetys !34 float @_cont_WorldToObject(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*, i32, i32) #1 ; Function Attrs: nounwind memory(none) -declare !types !35 i32 @_cont_HitKind(%struct.SystemData* nocapture readnone, %struct.HitData*) #1 +declare !pointeetys !35 i32 @_cont_HitKind(%struct.SystemData* nocapture readnone, %struct.HitData*) #1 ; Function Attrs: nounwind -define void @ClosestHit(%struct.RayPayload* noalias nocapture %payload, %struct.BuiltInTriangleIntersectionAttributes* nocapture readonly %attr) #3 !types !36 { +define void @ClosestHit(%struct.RayPayload* noalias nocapture %payload, %struct.BuiltInTriangleIntersectionAttributes* nocapture readonly %attr) #3 !pointeetys !36 { ; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.DispatchSystemData @ClosestHit( -; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR5:[0-9]+]] !continuation [[META18:![0-9]+]] !lgc.rt.shaderstage [[META19:![0-9]+]] !continuation.registercount [[META16:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]], [19 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) #[[ATTR5:[0-9]+]] !continuation [[META18:![0-9]+]] !lgc.rt.shaderstage [[META19:![0-9]+]] !continuation.registercount [[META16:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [10 x i32], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[HITATTRS:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: store [10 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_SYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP3]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP8]], ptr [[TMP5]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP11]], ptr [[TMP9]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP18]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP19]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[SYSTEM_DATA_ALLOCA]]) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP16]], ptr [[TMP2]], align 4 @@ -143,85 +146,101 @@ define void @ClosestHit(%struct.RayPayload* noalias nocapture %payload, %struct. ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP22]], ptr [[TMP20]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = alloca [[STRUCT_RAYPAYLOAD]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = bitcast ptr [[TMP25]] to ptr -; LOWERRAYTRACINGPIPELINE-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[TMP26]]) #[[ATTR10:[0-9]+]] -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP25]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: store <4 x float> zeroinitializer, ptr [[TMP27]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP28:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP23]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP29:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP28]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP30:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP29]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[DIS_DATA_I:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP31]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP29:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP30:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP31:%.*]] = alloca [[STRUCT_RAYPAYLOAD]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP32:%.*]] = bitcast ptr [[TMP31]] to ptr +; LOWERRAYTRACINGPIPELINE-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[TMP32]]) #[[ATTR10:[0-9]+]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP31]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: store <4 x float> zeroinitializer, ptr [[TMP33]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP29]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP35:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP26]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP36:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP35]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[DIS_DATA_I:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP37]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I]], 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[NEWDATA_I:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] [[AMD_DX_TRAVERSAL:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]]) -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[NEWDATA_I]], ptr [[TMP31]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP3]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP32]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP35]], ptr addrspace(20) @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP36]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP38]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP39:%.*]] = getelementptr inbounds i32, ptr [[TMP36]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[NEWDATA_I]], ptr [[TMP37]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP3]], i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP46:%.*]] = load i32, ptr [[TMP45]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP46]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP47:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, ptr [[TMP45]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP48]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP34]], ptr [[TMP47]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP49:%.*]] = getelementptr inbounds i32, ptr [[TMP47]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP39:%.*]] = getelementptr inbounds i32, ptr [[TMP48]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP40]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, ptr [[TMP36]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP40]], ptr [[TMP49]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, ptr [[TMP47]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, ptr [[TMP48]], i32 2 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP42:%.*]] = load i32, ptr [[TMP41]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP42]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP42]], ptr [[TMP38]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP44:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP43]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP44]]), !continuation.registercount [[META16]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP50:%.*]] = load [10 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP44]], [19 x i32] poison, [10 x i32] [[TMP50]]), !continuation.registercount [[META16]] ; LOWERRAYTRACINGPIPELINE-NEXT: unreachable ; ; DXILCONTPOSTPROCESS-LABEL: define void @ClosestHit( -; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] !continuation [[META18:![0-9]+]] !lgc.rt.shaderstage [[META19:![0-9]+]] !continuation.registercount [[META16:![0-9]+]] !continuation.state [[META14:![0-9]+]] { +; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]], [19 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) #[[ATTR2:[0-9]+]] !continuation [[META18:![0-9]+]] !lgc.rt.shaderstage [[META19:![0-9]+]] { ; DXILCONTPOSTPROCESS-NEXT: AllocaSpillBB: ; DXILCONTPOSTPROCESS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 ; DXILCONTPOSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; DXILCONTPOSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 1 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 2 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 3 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 4 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 6 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 7 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 8 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 9 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 0, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 ; DXILCONTPOSTPROCESS-NEXT: store <3 x i32> [[DOTFCA_0_0_EXTRACT]], ptr [[DOTFCA_0_0_GEP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[SYSTEM_DATA_ALLOCA]]) -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP6]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_03_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = bitcast float [[DOTSROA_03_0_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_03_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 -; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = bitcast float [[DOTSROA_03_4_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[SYSTEM_DATA_ALLOCA]]) +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP2]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_08_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = bitcast float [[DOTSROA_08_0_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_08_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 +; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = bitcast float [[DOTSROA_08_4_VEC_EXTRACT]] to i32 ; DXILCONTPOSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; DXILCONTPOSTPROCESS-NEXT: [[TMP9:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP9]]) -; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP11]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) -; DXILCONTPOSTPROCESS-NEXT: [[TMP13:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP12]]) -; DXILCONTPOSTPROCESS-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[DIS_DATA_I_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP14]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP5]]) +; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP7]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; DXILCONTPOSTPROCESS-NEXT: [[TMP9:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP8]]) +; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[DIS_DATA_I_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP10]], i32 0, i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[DIS_DATA_I_FCA_0_LOAD:%.*]] = load <3 x i32>, ptr [[DIS_DATA_I_FCA_0_GEP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DIS_DATA_I_FCA_0_LOAD]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[NEWDATA_I:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] [[AMD_DX_TRAVERSAL:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]]) ; DXILCONTPOSTPROCESS-NEXT: [[NEWDATA_I_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[NEWDATA_I]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[NEWDATA_I_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP14]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[NEWDATA_I_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP10]], i32 0, i32 0 ; DXILCONTPOSTPROCESS-NEXT: store <3 x i32> [[NEWDATA_I_FCA_0_EXTRACT]], ptr [[NEWDATA_I_FCA_0_GEP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP2]], ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP3]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP4]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP5]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP15]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP11]], i32 0, i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_LOAD:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_GEP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_LOAD]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP16:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP16]], i64 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META16]] +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT6:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_LOAD]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [10 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR]], i32 [[TMP12]], i64 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT6]], [19 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]) ; DXILCONTPOSTPROCESS-NEXT: unreachable ; %1 = load %dx.types.Handle, %dx.types.Handle* @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 @@ -283,10 +302,10 @@ declare float @dx.op.worldToObject.f32(i32, i32, i8) #1 declare float @dx.op.objectToWorld.f32(i32, i32, i8) #1 ; Function Attrs: nounwind -declare !types !39 i1 @dx.op.reportHit.struct.BuiltInTriangleIntersectionAttributes(i32, float, i32, %struct.BuiltInTriangleIntersectionAttributes*) #4 +declare !pointeetys !39 i1 @dx.op.reportHit.struct.BuiltInTriangleIntersectionAttributes(i32, float, i32, %struct.BuiltInTriangleIntersectionAttributes*) #4 ; Function Attrs: nounwind memory(read) -declare !types !40 void @dx.op.traceRay.struct.RayPayload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.RayPayload*) #2 +declare !pointeetys !40 void @dx.op.traceRay.struct.RayPayload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.RayPayload*) #2 ; Function Attrs: nounwind memory(none) declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #1 @@ -294,10 +313,10 @@ declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types. declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) -declare !types !41 void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #5 +declare !pointeetys !41 void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #5 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) -declare !types !41 void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #5 +declare !pointeetys !41 void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #5 attributes #0 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="0" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind memory(none) } @@ -329,31 +348,31 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re !12 = !{void (%struct.RayPayload*, %struct.BuiltInTriangleIntersectionAttributes*)* @ClosestHit, !"ClosestHit", null, null, !13} !13 = !{i32 8, i32 10, i32 5, !14} !14 = !{i32 0} -!15 = !{!"function", %struct.BuiltInTriangleIntersectionAttributes poison, !16} +!15 = !{%struct.SystemData poison} !16 = !{i32 0, %struct.SystemData poison} -!17 = !{!"function", !"void", !16, %struct.BuiltInTriangleIntersectionAttributes poison} -!18 = !{!"function", i1 poison, !19} +!17 = !{%struct.SystemData poison} +!18 = !{%struct.TraversalData poison} !19 = !{i32 0, %struct.TraversalData poison} -!20 = !{!"function", %struct.HitData poison, !21} +!20 = !{%struct.AnyHitTraversalData poison} !21 = !{i32 0, %struct.AnyHitTraversalData poison} -!22 = !{!"function", %struct.HitData poison, !16} -!23 = !{!"function", i32 poison, !24} +!22 = !{%struct.SystemData poison} +!23 = !{%struct.DispatchSystemData poison} !24 = !{i32 0, %struct.DispatchSystemData poison} -!25 = !{!"function", !"void", !24, i64 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison} -!26 = !{!"function", i1 poison, !21, float poison, i32 poison} -!27 = !{!"function", i32 poison, !24, i32 poison} -!28 = !{!"function", float poison, !24, i32 poison} -!29 = !{!"function", float poison, !24} -!30 = !{!"function", float poison, !24, !31} +!25 = !{%struct.DispatchSystemData poison} +!26 = !{%struct.AnyHitTraversalData poison} +!27 = !{%struct.DispatchSystemData poison} +!28 = !{%struct.DispatchSystemData poison} +!29 = !{%struct.DispatchSystemData poison} +!30 = !{null, %struct.DispatchSystemData poison, %struct.HitData poison} !31 = !{i32 0, %struct.HitData poison} -!32 = !{!"function", i32 poison, !24, !31} -!33 = !{!"function", float poison, !24, !31, i32 poison} -!34 = !{!"function", float poison, !24, !31, i32 poison, i32 poison} -!35 = !{!"function", i32 poison, !16, !31} -!36 = !{!"function", !"void", !37, !38} +!32 = !{null, %struct.DispatchSystemData poison, %struct.HitData poison} +!33 = !{null, %struct.DispatchSystemData poison, %struct.HitData poison} +!34 = !{null, %struct.DispatchSystemData poison, %struct.HitData poison} +!35 = !{null, %struct.SystemData poison, %struct.HitData poison} +!36 = !{null, %struct.RayPayload poison, %struct.BuiltInTriangleIntersectionAttributes poison} !37 = !{i32 0, %struct.RayPayload poison} !38 = !{i32 0, %struct.BuiltInTriangleIntersectionAttributes poison} -!39 = !{!"function", i1 poison, i32 poison, float poison, i32 poison, !38} -!40 = !{!"function", !"void", i32 poison, %dx.types.Handle poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, !37} -!41 = !{!"function", !"void", i64 poison, !42} +!39 = !{%struct.BuiltInTriangleIntersectionAttributes poison} +!40 = !{%struct.RayPayload poison} +!41 = !{i8 poison} !42 = !{i32 0, i8 poison} diff --git a/llvmraytracing/test/dx/closest-hit.ll b/llvmraytracing/test/dx/closest-hit.ll index 1b22be001c..fe1e19f8f4 100644 --- a/llvmraytracing/test/dx/closest-hit.ll +++ b/llvmraytracing/test/dx/closest-hit.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 -; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,remove-types-metadata' -S %s --lint-abort-on-error | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE %s +; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,continuations-lint,remove-types-metadata' -S %s --lint-abort-on-error | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE %s target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:16-i32:32-i64:32-f16:16-f32:32-f64:32-v8:8-v16:16-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" @@ -15,23 +15,21 @@ declare i64 @_cont_GetTraversalAddr() #0 declare i32 @_cont_GetContinuationStackAddr() #0 -declare !types !9 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData*) #0 +declare !pointeetys !9 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData*) #0 -declare !types !11 void @_cont_SetTriangleHitAttributes(%struct.SystemData*, %struct.BuiltInTriangleIntersectionAttributes) #0 +declare !pointeetys !11 void @_cont_SetTriangleHitAttributes(%struct.SystemData*, %struct.BuiltInTriangleIntersectionAttributes) #0 -declare !types !12 i1 @_cont_IsEndSearch(%struct.TraversalData*) #0 +declare !pointeetys !12 i1 @_cont_IsEndSearch(%struct.TraversalData*) #0 declare %struct.DispatchSystemData @_cont_Traversal(%struct.TraversalData) #0 -declare %struct.DispatchSystemData @_cont_SetupRayGen() #0 - declare %struct.AnyHitTraversalData @_AmdAwaitAnyHit(i64, %struct.AnyHitTraversalData, float, i32) #0 -declare !types !14 %struct.HitData @_cont_GetCandidateState(%struct.AnyHitTraversalData*) #0 +declare !pointeetys !14 %struct.HitData @_cont_GetCandidateState(%struct.AnyHitTraversalData*) #0 -declare !types !16 %struct.HitData @_cont_GetCommittedState(%struct.SystemData*) #0 +declare !pointeetys !16 %struct.HitData @_cont_GetCommittedState(%struct.SystemData*) #0 -define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) #0 !types !17 { +define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) #0 !pointeetys !17 { ; LOWERRAYTRACINGPIPELINE-LABEL: define i32 @_cont_GetLocalRootIndex( ; LOWERRAYTRACINGPIPELINE-SAME: ptr [[DATA:%.*]]) #[[ATTR0:[0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: ret i32 5 @@ -39,7 +37,7 @@ define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) #0 !types ret i32 5 } -define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, float %6, float %7, float %8, float %9, float %10, float %11, float %12, float %13) #0 !types !19 { +define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, float %6, float %7, float %8, float %9, float %10, float %11, float %12, float %13) #0 !pointeetys !19 { %dis_data = load %struct.DispatchSystemData, %struct.DispatchSystemData* %data, align 4 %sys_data = insertvalue %struct.SystemData undef, %struct.DispatchSystemData %dis_data, 0 %trav_data = insertvalue %struct.TraversalData undef, %struct.SystemData %sys_data, 0 @@ -48,7 +46,7 @@ define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i ret void } -define i1 @_cont_ReportHit(%struct.AnyHitTraversalData* %data, float %t, i32 %hitKind) #0 !types !20 { +define i1 @_cont_ReportHit(%struct.AnyHitTraversalData* %data, float %t, i32 %hitKind) #0 !pointeetys !20 { %trav_data = load %struct.AnyHitTraversalData, %struct.AnyHitTraversalData* %data, align 4 %newdata = call %struct.AnyHitTraversalData @_AmdAwaitAnyHit(i64 3, %struct.AnyHitTraversalData %trav_data, float %t, i32 %hitKind) store %struct.AnyHitTraversalData %newdata, %struct.AnyHitTraversalData* %data, align 4 @@ -56,65 +54,68 @@ define i1 @_cont_ReportHit(%struct.AnyHitTraversalData* %data, float %t, i32 %hi } ; Function Attrs: nounwind memory(none) -declare !types !21 i32 @_cont_DispatchRaysIndex(%struct.DispatchSystemData* nocapture readnone, i32) #1 +declare !pointeetys !21 i32 @_cont_DispatchRaysIndex(%struct.DispatchSystemData* nocapture readnone, i32) #1 ; Function Attrs: nounwind memory(none) -declare !types !21 i32 @_cont_DispatchRaysDimensions(%struct.DispatchSystemData* nocapture readnone, i32) #1 +declare !pointeetys !21 i32 @_cont_DispatchRaysDimensions(%struct.DispatchSystemData* nocapture readnone, i32) #1 ; Function Attrs: nounwind memory(none) -declare !types !22 float @_cont_WorldRayOrigin(%struct.DispatchSystemData* nocapture readnone, i32) #1 +declare !pointeetys !22 float @_cont_WorldRayOrigin(%struct.DispatchSystemData* nocapture readnone, i32) #1 ; Function Attrs: nounwind memory(none) -declare !types !22 float @_cont_WorldRayDirection(%struct.DispatchSystemData* nocapture readnone, i32) #1 +declare !pointeetys !22 float @_cont_WorldRayDirection(%struct.DispatchSystemData* nocapture readnone, i32) #1 ; Function Attrs: nounwind memory(none) -declare !types !23 float @_cont_RayTMin(%struct.DispatchSystemData* nocapture readnone) #1 +declare !pointeetys !23 float @_cont_RayTMin(%struct.DispatchSystemData* nocapture readnone) #1 ; Function Attrs: nounwind memory(read) -declare !types !24 float @_cont_RayTCurrent(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #2 +declare !pointeetys !24 float @_cont_RayTCurrent(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #2 ; Function Attrs: nounwind memory(none) -declare !types !17 i32 @_cont_RayFlags(%struct.DispatchSystemData* nocapture readnone) #1 +declare !pointeetys !17 i32 @_cont_RayFlags(%struct.DispatchSystemData* nocapture readnone) #1 ; Function Attrs: nounwind memory(none) -declare !types !26 i32 @_cont_InstanceIndex(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #1 +declare !pointeetys !26 i32 @_cont_InstanceIndex(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #1 ; Function Attrs: nounwind memory(none) -declare !types !26 i32 @_cont_InstanceID(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #1 +declare !pointeetys !26 i32 @_cont_InstanceID(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #1 ; Function Attrs: nounwind memory(none) -declare !types !26 i32 @_cont_PrimitiveIndex(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #1 +declare !pointeetys !26 i32 @_cont_PrimitiveIndex(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #1 ; Function Attrs: nounwind memory(none) -declare !types !27 float @_cont_ObjectRayOrigin(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*, i32) #1 +declare !pointeetys !27 float @_cont_ObjectRayOrigin(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*, i32) #1 ; Function Attrs: nounwind memory(none) -declare !types !27 float @_cont_ObjectRayDirection(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*, i32) #1 +declare !pointeetys !27 float @_cont_ObjectRayDirection(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*, i32) #1 ; Function Attrs: nounwind memory(none) -declare !types !28 float @_cont_ObjectToWorld(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*, i32, i32) #1 +declare !pointeetys !28 float @_cont_ObjectToWorld(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*, i32, i32) #1 ; Function Attrs: nounwind memory(none) -declare !types !28 float @_cont_WorldToObject(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*, i32, i32) #1 +declare !pointeetys !28 float @_cont_WorldToObject(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*, i32, i32) #1 ; Function Attrs: nounwind memory(none) -declare !types !29 i32 @_cont_HitKind(%struct.SystemData* nocapture readnone, %struct.HitData*) #1 +declare !pointeetys !29 i32 @_cont_HitKind(%struct.SystemData* nocapture readnone, %struct.HitData*) #1 ; Function Attrs: nounwind -define void @ClosestHit(%struct.RayPayload* noalias nocapture %payload, %struct.BuiltInTriangleIntersectionAttributes* nocapture readonly %attr) #3 !types !30 { +define void @ClosestHit(%struct.RayPayload* noalias nocapture %payload, %struct.BuiltInTriangleIntersectionAttributes* nocapture readonly %attr) #3 !pointeetys !30 { ; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.DispatchSystemData @ClosestHit( -; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR4:[0-9]+]] !lgc.rt.shaderstage [[META13:![0-9]+]] !continuation [[META14:![0-9]+]] !continuation.registercount [[META10:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]], [19 x i32] [[PADDING:%.*]], [8 x i32] [[PAYLOAD:%.*]]) #[[ATTR4:[0-9]+]] !lgc.rt.shaderstage [[META13:![0-9]+]] !continuation [[META14:![0-9]+]] !continuation.registercount [[META10:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [8 x i32], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[HITATTRS:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: store [8 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_SYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP3]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP8]], ptr [[TMP5]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP11]], ptr [[TMP9]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[SYSTEM_DATA_ALLOCA]]) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP12]], ptr [[TMP2]], align 4 @@ -131,13 +132,15 @@ define void @ClosestHit(%struct.RayPayload* noalias nocapture %payload, %struct. ; LOWERRAYTRACINGPIPELINE-NEXT: store <2 x float> [[BARY]], ptr [[PTR]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP3]], i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP19]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP22]], ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP22]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP23]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP25]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP25]], ptr [[TMP20]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP26]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP27]]), !continuation.registercount [[META10]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = load [8 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP27]], [19 x i32] poison, [8 x i32] [[TMP24]]), !continuation.registercount [[META10]] ; LOWERRAYTRACINGPIPELINE-NEXT: unreachable ; %ptr = getelementptr inbounds %struct.RayPayload, %struct.RayPayload* %payload, i32 0, i32 0 @@ -169,27 +172,27 @@ attributes #3 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="fa !6 = !{void (%struct.RayPayload*, %struct.BuiltInTriangleIntersectionAttributes*)* @ClosestHit, !"ClosestHit", null, null, !7} !7 = !{i32 8, i32 10, i32 5, !8} !8 = !{i32 0} -!9 = !{!"function", %struct.BuiltInTriangleIntersectionAttributes poison, !10} +!9 = !{%struct.SystemData poison} !10 = !{i32 0, %struct.SystemData poison} -!11 = !{!"function", !"void", !10, %struct.BuiltInTriangleIntersectionAttributes poison} -!12 = !{!"function", i1 poison, !13} +!11 = !{%struct.SystemData poison} +!12 = !{%struct.TraversalData poison} !13 = !{i32 0, %struct.TraversalData poison} -!14 = !{!"function", %struct.HitData poison, !15} +!14 = !{%struct.AnyHitTraversalData poison} !15 = !{i32 0, %struct.AnyHitTraversalData poison} -!16 = !{!"function", %struct.HitData poison, !10} -!17 = !{!"function", i32 poison, !18} +!16 = !{%struct.SystemData poison} +!17 = !{%struct.DispatchSystemData poison} !18 = !{i32 0, %struct.DispatchSystemData poison} -!19 = !{!"function", !"void", !18, i64 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison} -!20 = !{!"function", i1 poison, !15, float poison, i32 poison} -!21 = !{!"function", i32 poison, !18, i32 poison} -!22 = !{!"function", float poison, !18, i32 poison} -!23 = !{!"function", float poison, !18} -!24 = !{!"function", float poison, !18, !25} +!19 = !{%struct.DispatchSystemData poison} +!20 = !{%struct.AnyHitTraversalData poison} +!21 = !{%struct.DispatchSystemData poison} +!22 = !{%struct.DispatchSystemData poison} +!23 = !{%struct.DispatchSystemData poison} +!24 = !{null, %struct.DispatchSystemData poison, %struct.HitData poison} !25 = !{i32 0, %struct.HitData poison} -!26 = !{!"function", i32 poison, !18, !25} -!27 = !{!"function", float poison, !18, !25, i32 poison} -!28 = !{!"function", float poison, !18, !25, i32 poison, i32 poison} -!29 = !{!"function", i32 poison, !10, !25} -!30 = !{!"function", !"void", !31, !32} +!26 = !{null, %struct.DispatchSystemData poison, %struct.HitData poison} +!27 = !{null, %struct.DispatchSystemData poison, %struct.HitData poison} +!28 = !{null, %struct.DispatchSystemData poison, %struct.HitData poison} +!29 = !{null, %struct.SystemData poison, %struct.HitData poison} +!30 = !{null, %struct.RayPayload poison, %struct.BuiltInTriangleIntersectionAttributes poison} !31 = !{i32 0, %struct.RayPayload poison} !32 = !{i32 0, %struct.BuiltInTriangleIntersectionAttributes poison} diff --git a/llvmraytracing/test/dx/continuation-registercount.ll b/llvmraytracing/test/dx/continuation-registercount.ll index cbe8bf81d0..a972f1e5a1 100644 --- a/llvmraytracing/test/dx/continuation-registercount.ll +++ b/llvmraytracing/test/dx/continuation-registercount.ll @@ -1,9 +1,9 @@ ; RUN: grep -v MAX_REG_10 %s | \ -; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' -S --lint-abort-on-error | \ +; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,continuations-lint,remove-types-metadata' -S --lint-abort-on-error | \ ; RUN: FileCheck -check-prefixes=COMMON,MAX30 %s ; ; RUN: grep -v MAX_REG_30 %s | \ -; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' -S --lint-abort-on-error | \ +; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,continuations-lint,remove-types-metadata' -S --lint-abort-on-error | \ ; RUN: FileCheck -check-prefixes=COMMON,MAX10 %s ; The order of metadata on functions is non-deterministic, so make two different runs to match both of them. @@ -26,7 +26,7 @@ target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16: %struct.TheirParams2 = type { [27 x i32] } %struct._AmdTraversalResultData = type { %struct._AmdPrimitiveSystemState, <2 x float>, i32 } %struct._AmdPrimitiveSystemState = type { float, i32, i32, i32 } -%struct._AmdSystemData = type { i32 } +%struct._AmdSystemData = type { %struct._AmdTraversalResultData } %"class.RWTexture2D >" = type { <4 x float> } @"\01?Scene@@3URaytracingAccelerationStructure@@A" = external constant %dx.types.Handle, align 4 @@ -35,9 +35,6 @@ target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16: ; Function Attrs: alwaysinline declare i32 @_cont_GetContinuationStackAddr() #0 -; Function Attrs: alwaysinline -declare %struct.DispatchSystemData @_cont_SetupRayGen() #0 - ; Function Attrs: alwaysinline declare %struct.DispatchSystemData @_AmdAwaitTraversal(i64, %struct.TraversalData) #0 @@ -48,51 +45,51 @@ declare %struct.DispatchSystemData @_AmdAwaitShader(i64, %struct.DispatchSystemD declare %struct.AnyHitTraversalData @_AmdAwaitAnyHit(i64, %struct.AnyHitTraversalData, float, i32) #0 ; Function Attrs: nounwind memory(read) -declare !types !24 i32 @_cont_HitKind(%struct.SystemData* nocapture readnone, %struct.HitData*) #1 +declare !pointeetys !24 i32 @_cont_HitKind(%struct.SystemData* nocapture readnone, %struct.HitData*) #1 ; Function Attrs: nounwind memory(none) -declare !types !27 void @_AmdRestoreSystemData(%struct.DispatchSystemData*) #2 +declare !pointeetys !27 void @_AmdRestoreSystemData(%struct.DispatchSystemData*) #2 ; Function Attrs: nounwind memory(none) -declare !types !29 void @_AmdRestoreSystemDataAnyHit(%struct.AnyHitTraversalData*) #2 +declare !pointeetys !29 void @_AmdRestoreSystemDataAnyHit(%struct.AnyHitTraversalData*) #2 ; Function Attrs: nounwind memory(none) -declare !types !29 void @_cont_AcceptHit(%struct.AnyHitTraversalData* nocapture readnone) #2 +declare !pointeetys !29 void @_cont_AcceptHit(%struct.AnyHitTraversalData* nocapture readnone) #2 ; Function Attrs: alwaysinline declare i1 @opaqueIsEnd() #0 -define void @_cont_ExitRayGen(ptr nocapture readonly %data) alwaysinline nounwind !types !{!"function", !"void", !{i32 0, %struct.DispatchSystemData poison}} { +define void @_cont_ExitRayGen(ptr nocapture readonly %data) alwaysinline nounwind !pointeetys !{%struct.DispatchSystemData poison} { ret void } ; Function Attrs: alwaysinline -define i1 @_cont_IsEndSearch(%struct.TraversalData* %data) #0 !types !31 { +define i1 @_cont_IsEndSearch(%struct.TraversalData* %data) #0 !pointeetys !31 { %isEnd = call i1 @opaqueIsEnd() ret i1 %isEnd } ; Function Attrs: alwaysinline -define %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData* %data) #0 !types !33 { +define %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData* %data) #0 !pointeetys !33 { %addr = getelementptr %struct.SystemData, %struct.SystemData* %data, i32 0, i32 1 %val = load %struct.BuiltInTriangleIntersectionAttributes, %struct.BuiltInTriangleIntersectionAttributes* %addr, align 4 ret %struct.BuiltInTriangleIntersectionAttributes %val } ; Function Attrs: alwaysinline -define void @_cont_SetTriangleHitAttributes(%struct.SystemData* %data, %struct.BuiltInTriangleIntersectionAttributes %val) #0 !types !34 { +define void @_cont_SetTriangleHitAttributes(%struct.SystemData* %data, %struct.BuiltInTriangleIntersectionAttributes %val) #0 !pointeetys !34 { %addr = getelementptr %struct.SystemData, %struct.SystemData* %data, i32 0, i32 1 store %struct.BuiltInTriangleIntersectionAttributes %val, %struct.BuiltInTriangleIntersectionAttributes* %addr, align 4 ret void } ; Function Attrs: alwaysinline -define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) #0 !types !35 { +define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) #0 !pointeetys !35 { ret i32 5 } ; Function Attrs: alwaysinline -define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, float %6, float %7, float %8, float %9, float %10, float %11, float %12, float %13) #0 !types !36 { +define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, float %6, float %7, float %8, float %9, float %10, float %11, float %12, float %13) #0 !pointeetys !36 { %dis_data = load %struct.DispatchSystemData, %struct.DispatchSystemData* %data, align 4 %sys_data = insertvalue %struct.SystemData undef, %struct.DispatchSystemData %dis_data, 0 %trav_data = insertvalue %struct.TraversalData undef, %struct.SystemData %sys_data, 0 @@ -103,7 +100,7 @@ define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i } ; Function Attrs: alwaysinline -define void @_cont_CallShader(%struct.DispatchSystemData* %data, i32 %0) #0 !types !37 { +define void @_cont_CallShader(%struct.DispatchSystemData* %data, i32 %0) #0 !pointeetys !37 { %dis_data = load %struct.DispatchSystemData, %struct.DispatchSystemData* %data, align 4 %newdata = call %struct.DispatchSystemData @_AmdAwaitShader(i64 2, %struct.DispatchSystemData %dis_data) store %struct.DispatchSystemData %newdata, %struct.DispatchSystemData* %data, align 4 @@ -112,7 +109,7 @@ define void @_cont_CallShader(%struct.DispatchSystemData* %data, i32 %0) #0 !typ } ; Function Attrs: alwaysinline -define i1 @_cont_ReportHit(%struct.AnyHitTraversalData* %data, float %t, i32 %hitKind) #0 !types !38 { +define i1 @_cont_ReportHit(%struct.AnyHitTraversalData* %data, float %t, i32 %hitKind) #0 !pointeetys !38 { %trav_data = load %struct.AnyHitTraversalData, %struct.AnyHitTraversalData* %data, align 4 %newdata = call %struct.AnyHitTraversalData @_AmdAwaitAnyHit(i64 3, %struct.AnyHitTraversalData %trav_data, float %t, i32 %hitKind) store %struct.AnyHitTraversalData %newdata, %struct.AnyHitTraversalData* %data, align 4 @@ -120,15 +117,8 @@ define i1 @_cont_ReportHit(%struct.AnyHitTraversalData* %data, float %t, i32 %hi ret i1 true } -; COMMON-DAG: ![[MD_I32_1:[0-9]+]] = !{i32 1} -; COMMON-DAG: ![[MD_I32_10:[0-9]+]] = !{i32 10} -; MAX30-DAG: ![[MD_I32_15:[0-9]+]] = !{i32 15} -; MAX30-DAG: ![[MD_I32_26:[0-9]+]] = !{i32 26} -; MAX30-DAG: ![[MD_I32_27:[0-9]+]] = !{i32 27} -; MAX30-DAG: ![[MD_I32_30:[0-9]+]] = !{i32 30} - ; COMMON-DAG: define void @main( -; COMMON-DAG: call void (i64, ...) @continuation.continue(i64 2, {{.*}}, %struct.DispatchSystemData %{{[^ ]+}}), !continuation.registercount ![[MD_I32_10]] +; COMMON-DAG: call void (...) @lgc.cps.jump(i64 2, {{.*}} %struct.DispatchSystemData %{{.*}}, [10 x i32] %{{.*}}) define void @main() { %params = alloca %struct.TheirParams, align 4 @@ -137,9 +127,8 @@ define void @main() { } ; COMMON-DAG: define void @mainTrace( -; MAX10-DAG: call void (i64, ...) @continuation.continue(i64 4, {{.*}} %struct.TraversalData %{{.*}}), !continuation.registercount ![[MD_I32_10]] -; MAX30-DAG: call void (i64, ...) @continuation.continue(i64 4, {{.*}} %struct.TraversalData %{{.*}}), !continuation.registercount ![[MD_I32_15]] - +; MAX10-DAG: call void (...) @lgc.cps.jump(i64 4, {{.*}} %struct.TraversalData %{{.*}}, [10 x i32] %{{.*}}) +; MAX30-DAG: call void (...) @lgc.cps.jump(i64 4, {{.*}} %struct.TraversalData %{{.*}}, [15 x i32] %{{.*}}) define void @mainTrace() { %1 = load %dx.types.Handle, %dx.types.Handle* @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 %2 = load %dx.types.Handle, %dx.types.Handle* @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 @@ -152,23 +141,23 @@ define void @mainTrace() { } ; If we set maxPayloadRegisterCount to 10, both functions use only 10 payload registers. -; MAX10-DAG: define void @called({{.*}}%struct.DispatchSystemData %0){{.*}} !continuation.registercount ![[MD_I32_10]] -; MAX10-DAG: define dso_local void @called.resume.0({{.*}}%struct.DispatchSystemData{{.*}} !continuation.registercount ![[MD_I32_10]] -; MAX30-DAG: define void @called({{.*}}%struct.DispatchSystemData %0){{.*}} !continuation.registercount ![[MD_I32_26]] -; MAX30-DAG: define dso_local void @called.resume.0({{.*}}%struct.DispatchSystemData{{.*}} !continuation.registercount ![[MD_I32_27]] +; MAX10-DAG: define void @called({{.*}}%struct.DispatchSystemData %0{{.*}}, [10 x i32] %payload) +; MAX10-DAG: define dso_local void @called.resume.0({{.*}}%struct.DispatchSystemData{{.*}}, [10 x i32] }{{.*}}) +; MAX30-DAG: define void @called({{.*}}%struct.DispatchSystemData %0{{.*}}, [26 x i32] %payload) +; MAX30-DAG: define dso_local void @called.resume.0({{.*}}%struct.DispatchSystemData{{.*}}, [27 x i32] }{{.*}}) -define void @called(%struct.MyParams* %arg) !types !39 { +define void @called(%struct.MyParams* %arg) !pointeetys !39 { %params = alloca %struct.TheirParams2, align 4 call void @dx.op.callShader.struct.TheirParams2(i32 159, i32 2, %struct.TheirParams2* nonnull %params) ret void } -; MAX10-DAG: define void @Intersection({{.*}}%struct.AnyHitTraversalData %0){{.*}} !continuation.registercount ![[MD_I32_10]] -; MAX10-DAG: define dso_local void @Intersection.resume.0({{.*}}%struct.AnyHitTraversalData{{.*}} !continuation.registercount ![[MD_I32_10]] -; MAX10-DAG: call void (i64, ...) @continuation.continue(i64 3, {{.*}} float 4.000000e+00, i32 0, %struct.BuiltInTriangleIntersectionAttributes {{.*}}), !continuation.registercount ![[MD_I32_10]] -; MAX30-DAG: define void @Intersection({{.*}}%struct.AnyHitTraversalData %0){{.*}} !continuation.registercount ![[MD_I32_30]] -; MAX30-DAG: define dso_local void @Intersection.resume.0({{.*}}%struct.AnyHitTraversalData{{.*}} !continuation.registercount ![[MD_I32_30]] -; MAX30-DAG: call void (i64, ...) @continuation.continue(i64 3, {{.*}} float 4.000000e+00, i32 0, %struct.BuiltInTriangleIntersectionAttributes {{.*}}), !continuation.registercount ![[MD_I32_30]] +; MAX10-DAG: define void @Intersection({{.*}}%struct.AnyHitTraversalData %0{{.*}}, [10 x i32] %payload) +; MAX10-DAG: define dso_local void @Intersection.resume.0({{.*}}%struct.AnyHitTraversalData{{.*}}, [10 x i32] }{{.*}}) +; MAX10-DAG: call void (...) @lgc.cps.jump(i64 3, {{.*}} float 4.000000e+00, i32 0, %struct.BuiltInTriangleIntersectionAttributes {{.*}}, [10 x i32] %{{.*}}) +; MAX30-DAG: define void @Intersection({{.*}}%struct.AnyHitTraversalData %0{{.*}}, [30 x i32] %payload) +; MAX30-DAG: define dso_local void @Intersection.resume.0({{.*}}%struct.AnyHitTraversalData{{.*}}, [30 x i32] }{{.*}}) +; MAX30-DAG: call void (...) @lgc.cps.jump(i64 3, {{.*}} float 4.000000e+00, i32 0, %struct.BuiltInTriangleIntersectionAttributes {{.*}}, [30 x i32] %{{.*}}) define void @Intersection() #3 { %a = alloca %struct.BuiltInTriangleIntersectionAttributes, align 4 @@ -176,40 +165,40 @@ define void @Intersection() #3 { ret void } -; MAX10-DAG: define void @AnyHit({{.*}}%struct.AnyHitTraversalData %0, %struct.BuiltInTriangleIntersectionAttributes %1){{.*}} !continuation.registercount ![[MD_I32_10]] -; MAX30-DAG: define void @AnyHit({{.*}}%struct.AnyHitTraversalData %0, %struct.BuiltInTriangleIntersectionAttributes %1){{.*}} !continuation.registercount ![[MD_I32_15]] +; MAX10-DAG: define void @AnyHit({{.*}}%struct.AnyHitTraversalData %0, %struct.BuiltInTriangleIntersectionAttributes %1{{.*}}, [10 x i32] %payload) +; MAX30-DAG: define void @AnyHit({{.*}}%struct.AnyHitTraversalData %0, %struct.BuiltInTriangleIntersectionAttributes %1{{.*}}, [15 x i32] %payload) -define void @AnyHit(%struct.RayPayload* noalias nocapture %payload, %struct.BuiltInTriangleIntersectionAttributes* nocapture readonly %attr) #3 !types !41 { +define void @AnyHit(%struct.RayPayload* noalias nocapture %payload, %struct.BuiltInTriangleIntersectionAttributes* nocapture readonly %attr) #3 !pointeetys !41 { ret void } ; With fixed hit attribute registers and without PAQs, ClosestHitOut also contains storage for hit attributes -; MAX10-DAG: define void @ClosestHit({{.*}}%struct.SystemData %0){{.*}} !continuation.registercount ![[MD_I32_10]] -; MAX30-DAG: define void @ClosestHit({{.*}}%struct.SystemData %0){{.*}} !continuation.registercount ![[MD_I32_15]] +; MAX10-DAG: define void @ClosestHit({{.*}}%struct.SystemData %0{{.*}}, [10 x i32] %payload) +; MAX30-DAG: define void @ClosestHit({{.*}}%struct.SystemData %0{{.*}}, [15 x i32] %payload) -define void @ClosestHit(%struct.RayPayload* noalias nocapture %payload, %struct.BuiltInTriangleIntersectionAttributes* nocapture readonly %attr) #3 !types !41 { +define void @ClosestHit(%struct.RayPayload* noalias nocapture %payload, %struct.AnyHitTraversalData* nocapture readonly %attr) #3 !pointeetys !41 { ret void } -; COMMON-DAG: define void @Miss16({{.*}}%struct.SystemData %0){{.*}} !continuation.registercount ![[MD_I32_1]] -define void @Miss16(%struct.PayloadWithI16* noalias nocapture %payload) !types !55 { +; COMMON-DAG: define void @Miss16({{.*}}%struct.SystemData %0{{.*}}, [1 x i32] %payload) +define void @Miss16(%struct.PayloadWithI16* noalias nocapture %payload) !pointeetys !55 { ret void } -declare void @_AmdEnqueueAnyHit(i64, %struct._AmdSystemData) #0 +declare void @_AmdEnqueueAnyHit(i64, i64, %struct._AmdSystemData, <2 x float>) #0 -; MAX10-DAG: define void @_cont_Traversal({{.*}} !continuation.registercount ![[MD_I32_10]] -; MAX10-DAG: call {{.*}} @continuation.continue({{.*}} !continuation.registercount ![[MD_I32_10]] -; MAX30-DAG: define void @_cont_Traversal({{.*}} !continuation.registercount ![[MD_I32_27]] -; MAX30-DAG: call {{.*}} @continuation.continue({{.*}} !continuation.registercount ![[MD_I32_27]] +; MAX10-DAG: define void @_cont_Traversal({{.*}}, [10 x i32] %payload) +; MAX10-DAG: call {{.*}} @lgc.cps.jump({{.*}}, [10 x i32] %{{.*}}) +; MAX30-DAG: define void @_cont_Traversal({{.*}}, [27 x i32] %payload) +; MAX30-DAG: call {{.*}} @lgc.cps.jump({{.*}}, [27 x i32] %{{.*}}) -define void @_cont_Traversal(%struct._AmdTraversalResultData* noalias nocapture sret(%struct._AmdTraversalResultData) %agg.result, %struct._AmdSystemData* noalias %data) !types !44 { - call void @_AmdEnqueueAnyHit(i64 0, %struct._AmdSystemData undef) +define void @_cont_Traversal(%struct._AmdTraversalResultData* noalias nocapture sret(%struct._AmdTraversalResultData) %agg.result, %struct._AmdSystemData* noalias %data) !pointeetys !44 { + call void @_AmdEnqueueAnyHit(i64 0, i64 poison, %struct.BuiltInTriangleIntersectionAttributes undef, <2 x float> undef) unreachable } ; Function Attrs: nounwind -declare !types !47 void @dx.op.traceRay.struct.RayPayload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.RayPayload*) #3 +declare !pointeetys !47 void @dx.op.traceRay.struct.RayPayload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.RayPayload*) #3 ; Function Attrs: nounwind memory(none) declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #2 @@ -218,12 +207,12 @@ declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types. declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) #1 ; Function Attrs: nounwind -declare !types !48 void @dx.op.callShader.struct.TheirParams(i32, i32, %struct.TheirParams*) #3 +declare !pointeetys !48 void @dx.op.callShader.struct.TheirParams(i32, i32, %struct.TheirParams*) #3 ; Function Attrs: nounwind -declare !types !50 void @dx.op.callShader.struct.TheirParams2(i32, i32, %struct.TheirParams2*) #3 +declare !pointeetys !50 void @dx.op.callShader.struct.TheirParams2(i32, i32, %struct.TheirParams2*) #3 -declare !types !52 i1 @dx.op.reportHit.struct.BuiltInTriangleIntersectionAttributes(i32, float, i32, %struct.BuiltInTriangleIntersectionAttributes*) +declare !pointeetys !52 i1 @dx.op.reportHit.struct.BuiltInTriangleIntersectionAttributes(i32, float, i32, %struct.BuiltInTriangleIntersectionAttributes*) attributes #0 = { alwaysinline } attributes #1 = { nounwind memory(read) } @@ -265,38 +254,38 @@ attributes #3 = { nounwind } !21 = !{void (%struct.RayPayload*, %struct.BuiltInTriangleIntersectionAttributes*)* @ClosestHit, !"ClosestHit", null, null, !22} !22 = !{i32 8, i32 10, i32 5, !8} !23 = !{i32 10} -!24 = !{!"function", i32 poison, !25, !26} +!24 = !{null, %struct.SystemData poison, %struct.HitData poison} !25 = !{i32 0, %struct.SystemData poison} !26 = !{i32 0, %struct.HitData poison} -!27 = !{!"function", !"void", !28} +!27 = !{%struct.DispatchSystemData poison} !28 = !{i32 0, %struct.DispatchSystemData poison} -!29 = !{!"function", !"void", !30} +!29 = !{%struct.AnyHitTraversalData poison} !30 = !{i32 0, %struct.AnyHitTraversalData poison} -!31 = !{!"function", i1 poison, !32} +!31 = !{%struct.TraversalData poison} !32 = !{i32 0, %struct.TraversalData poison} -!33 = !{!"function", %struct.BuiltInTriangleIntersectionAttributes poison, !25} -!34 = !{!"function", !"void", !25, %struct.BuiltInTriangleIntersectionAttributes poison} -!35 = !{!"function", i32 poison, !28} -!36 = !{!"function", !"void", !28, i64 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison} -!37 = !{!"function", !"void", !28, i32 poison} -!38 = !{!"function", i1 poison, !30, float poison, i32 poison} -!39 = !{!"function", !"void", !40} +!33 = !{%struct.SystemData poison} +!34 = !{%struct.SystemData poison} +!35 = !{%struct.DispatchSystemData poison} +!36 = !{%struct.DispatchSystemData poison} +!37 = !{%struct.DispatchSystemData poison} +!38 = !{%struct.AnyHitTraversalData poison} +!39 = !{%struct.MyParams poison} !40 = !{i32 0, %struct.MyParams poison} -!41 = !{!"function", !"void", !42, !43} +!41 = !{null, %struct.RayPayload poison, %struct.BuiltInTriangleIntersectionAttributes poison} !42 = !{i32 0, %struct.RayPayload poison} !43 = !{i32 0, %struct.BuiltInTriangleIntersectionAttributes poison} -!44 = !{!"function", !"void", !45, !46} +!44 = !{null, %struct._AmdTraversalResultData poison, %struct._AmdSystemData poison} !45 = !{i32 0, %struct._AmdTraversalResultData poison} !46 = !{i32 0, %struct._AmdSystemData poison} -!47 = !{!"function", !"void", i32 poison, %dx.types.Handle poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, !42} -!48 = !{!"function", !"void", i32 poison, i32 poison, !49} +!47 = !{%struct.RayPayload poison} +!48 = !{%struct.TheirParams poison} !49 = !{i32 0, %struct.TheirParams poison} -!50 = !{!"function", !"void", i32 poison, i32 poison, !51} +!50 = !{%struct.TheirParams2 poison} !51 = !{i32 0, %struct.TheirParams2 poison} -!52 = !{!"function", i1 poison, i32 poison, float poison, i32 poison, !43} +!52 = !{%struct.BuiltInTriangleIntersectionAttributes poison} !53 = !{i32 30} !54 = !{i32 27} -!55 = !{!"function", !"void", !56} +!55 = !{%struct.PayloadWithI16 poison} !56 = !{i32 0, %struct.PayloadWithI16 poison} !57 = !{void (%struct.PayloadWithI16*)* @Miss16, !"Miss16", null, null, !58} !58 = !{i32 8, i32 11, i32 6, i32 24, i32 5, !59} diff --git a/llvmraytracing/test/dx/continuation-stacksize.ll b/llvmraytracing/test/dx/continuation-stacksize.ll index 176f580c7f..590090f7f2 100644 --- a/llvmraytracing/test/dx/continuation-stacksize.ll +++ b/llvmraytracing/test/dx/continuation-stacksize.ll @@ -1,7 +1,7 @@ -; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' \ +; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,dxil-cont-post-process,lint,continuations-lint,remove-types-metadata' \ ; RUN: -S %s --lint-abort-on-error | FileCheck -check-prefix=POSTPROCESS-STACKSIZE %s -; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' \ -; RUN: -S %s --lint-abort-on-error | FileCheck -check-prefix=POSTPROCESS-STATESIZE %s +; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,remove-types-metadata' \ +; RUN: -S %s --lint-abort-on-error | FileCheck -check-prefix=CLEANUP-STATESIZE %s ; The order of metadata on functions is non-deterministic, so make two different runs to match both of them. @@ -25,8 +25,7 @@ target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16: ; Function Attrs: alwaysinline declare i32 @_cont_GetContinuationStackAddr() #0 -; Function Attrs: alwaysinline -declare %struct.DispatchSystemData @_cont_SetupRayGen() #0 +declare !pointeetys !33 i1 @_cont_ReportHit(%struct.TraversalData* %data, float %t, i32 %hitKind) ; Function Attrs: alwaysinline declare %struct.DispatchSystemData @_AmdAwaitTraversal(i64, %struct.TraversalData) #0 @@ -35,22 +34,22 @@ declare %struct.DispatchSystemData @_AmdAwaitTraversal(i64, %struct.TraversalDat declare %struct.DispatchSystemData @_AmdAwaitShader(i64, %struct.DispatchSystemData) #0 ; Function Attrs: alwaysinline -declare !types !17 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData*) #0 +declare !pointeetys !17 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData*) #0 ; Function Attrs: nounwind memory(none) -declare !types !19 void @_AmdRestoreSystemData(%struct.DispatchSystemData*) #1 +declare !pointeetys !19 void @_AmdRestoreSystemData(%struct.DispatchSystemData*) #1 -define void @_cont_ExitRayGen(ptr nocapture readonly %data) alwaysinline nounwind !types !{!"function", !"void", !{i32 0, %struct.DispatchSystemData poison}} { +define void @_cont_ExitRayGen(ptr nocapture readonly %data) alwaysinline nounwind !pointeetys !{%struct.DispatchSystemData poison} { ret void } ; Function Attrs: alwaysinline -define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) #0 !types !21 { +define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) #0 !pointeetys !21 { ret i32 5 } ; Function Attrs: alwaysinline -define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, float %6, float %7, float %8, float %9, float %10, float %11, float %12, float %13) #0 !types !22 { +define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, float %6, float %7, float %8, float %9, float %10, float %11, float %12, float %13) #0 !pointeetys !22 { %dis_data = load %struct.DispatchSystemData, %struct.DispatchSystemData* %data, align 4 %sys_data = insertvalue %struct.SystemData undef, %struct.DispatchSystemData %dis_data, 0 %trav_data = insertvalue %struct.TraversalData undef, %struct.SystemData %sys_data, 0 @@ -60,7 +59,7 @@ define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i } ; Function Attrs: alwaysinline -define void @_cont_CallShader(%struct.DispatchSystemData* %data, i32 %0) #0 !types !23 { +define void @_cont_CallShader(%struct.DispatchSystemData* %data, i32 %0) #0 !pointeetys !23 { %dis_data = load %struct.DispatchSystemData, %struct.DispatchSystemData* %data, align 4 %newdata = call %struct.DispatchSystemData @_AmdAwaitShader(i64 2, %struct.DispatchSystemData %dis_data) store %struct.DispatchSystemData %newdata, %struct.DispatchSystemData* %data, align 4 @@ -71,10 +70,10 @@ define void @_cont_CallShader(%struct.DispatchSystemData* %data, i32 %0) #0 !typ ; LOWERRAYTRACINGPIPELINE-STACKSIZE-DAG: define void @main(%struct.DispatchSystemData %0){{.*}} !continuation.stacksize ![[main_stacksize:[0-9]+]] ; LOWERRAYTRACINGPIPELINE-STACKSIZE-DAG: ![[main_stacksize]] = !{i32 140} -; POSTPROCESS-STACKSIZE-DAG: define void @main({{.*}}){{.*}} !continuation.stacksize ![[main_stacksize:[0-9]+]] +; POSTPROCESS-STACKSIZE-DAG: define void @main({{.*}} !continuation.stacksize ![[main_stacksize:[0-9]+]] ; POSTPROCESS-STACKSIZE-DAG: ![[main_stacksize]] = !{i32 140} -; POSTPROCESS-STATESIZE-DAG: define void @main({{.*}}){{.*}} !continuation.state ![[main_state:[0-9]+]] -; POSTPROCESS-STATESIZE-DAG: ![[main_state]] = !{i32 0} +; CLEANUP-STATESIZE-DAG: define void @main({{.*}} !continuation.state ![[main_state:[0-9]+]] +; CLEANUP-STATESIZE-DAG: ![[main_state]] = !{i32 0} define void @main() { %params = alloca %struct.TheirParams, align 4 @@ -85,9 +84,9 @@ define void @main() { ; LOWERRAYTRACINGPIPELINE-STACKSIZE-DAG: define void @mainTrace(%struct.DispatchSystemData %0){{.*}} !continuation.stacksize ![[maintrace_stacksize:[0-9]+]] ; LOWERRAYTRACINGPIPELINE-STACKSIZE-DAG: ![[maintrace_stacksize]] = !{i32 180} -; CLEANUP-STACKSIZE-DAG: define void @mainTrace(%struct.DispatchSystemData %0){{.*}} !continuation.stacksize ![[maintrace_stacksize:[0-9]+]] +; CLEANUP-STACKSIZE-DAG: define void @mainTrace{{.*}}%struct.DispatchSystemData{{.*}} !continuation.stacksize ![[maintrace_stacksize:[0-9]+]] ; CLEANUP-STACKSIZE-DAG: ![[maintrace_stacksize]] = !{i32 180} -; CLEANUP-STATESIZE-DAG: define void @mainTrace(%struct.DispatchSystemData %0){{.*}} !continuation.state ![[main_state]] +; CLEANUP-STATESIZE-DAG: define void @mainTrace{{.*}}%struct.DispatchSystemData{{.*}} !continuation.state ![[main_state]] ; SAVESTATE-STACKSIZE-DAG: define void @mainTrace(%struct.DispatchSystemData %0){{.*}} !continuation.stacksize ![[maintrace_stacksize:[0-9]+]] ; SAVESTATE-STACKSIZE-DAG: ![[maintrace_stacksize]] = !{i32 180} @@ -109,22 +108,22 @@ define void @mainTrace() { ; CLEANUP-STACKSIZE-DAG: define void @called({{.*}}%struct.DispatchSystemData %0){{.*}} !continuation.stacksize ![[called_stacksize:[0-9]+]] ; CLEANUP-STACKSIZE-DAG: ![[called_stacksize]] = !{i32 348} -; CLEANUP-STATESIZE-DAG: define void @called({{.*}}%struct.DispatchSystemData %0){{.*}} !continuation.state ![[called_state:[0-9]+]] +; CLEANUP-STATESIZE-DAG: define void @called{{.*}}%struct.DispatchSystemData{{.*}} !continuation.state ![[called_state:[0-9]+]] ; CLEANUP-STATESIZE-DAG: ![[called_state]] = !{i32 204} ; SAVESTATE-STACKSIZE-DAG: define void @called({{.*}}%struct.DispatchSystemData %0){{.*}} !continuation.stacksize ![[called_stacksize:[0-9]+]] ; SAVESTATE-STACKSIZE-DAG: ![[called_stacksize]] = !{i32 348} -; SAVESTATE-STATESIZE-DAG: define void @called({{.*}}%struct.DispatchSystemData %0){{.*}} !continuation.state ![[called_state:[0-9]+]] +; SAVESTATE-STATESIZE-DAG: define void @called{{.*}}%struct.DispatchSystemData{{.*}} !continuation.state ![[called_state:[0-9]+]] ; SAVESTATE-STATESIZE-DAG: ![[called_state]] = !{i32 204} -define void @called(%struct.MyParams* %arg) !types !24 { +define void @called(%struct.MyParams* %arg) !pointeetys !24 { %params = alloca %struct.TheirParams2, align 4 call void @dx.op.callShader.struct.TheirParams2(i32 159, i32 2, %struct.TheirParams2* nonnull %params) ret void } ; Function Attrs: nounwind -declare !types !26 void @dx.op.traceRay.struct.RayPayload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.RayPayload*) #2 +declare !pointeetys !26 void @dx.op.traceRay.struct.RayPayload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.RayPayload*) #2 ; Function Attrs: nounwind memory(none) declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #1 @@ -133,10 +132,10 @@ declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types. declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) #3 ; Function Attrs: nounwind -declare !types !28 void @dx.op.callShader.struct.TheirParams(i32, i32, %struct.TheirParams*) #2 +declare !pointeetys !28 void @dx.op.callShader.struct.TheirParams(i32, i32, %struct.TheirParams*) #2 ; Function Attrs: nounwind -declare !types !30 void @dx.op.callShader.struct.TheirParams2(i32, i32, %struct.TheirParams2*) #2 +declare !pointeetys !30 void @dx.op.callShader.struct.TheirParams2(i32, i32, %struct.TheirParams2*) #2 attributes #0 = { alwaysinline } attributes #1 = { nounwind memory(none) } @@ -166,18 +165,20 @@ attributes #3 = { nounwind memory(read) } !14 = !{i32 8, i32 12} !15 = !{void ()* @mainTrace, !"mainTrace", null, null, !16} !16 = !{i32 8, i32 7} -!17 = !{!"function", %struct.BuiltInTriangleIntersectionAttributes poison, !18} +!17 = !{%struct.SystemData poison} !18 = !{i32 0, %struct.SystemData poison} -!19 = !{!"function", !"void", !20} +!19 = !{%struct.DispatchSystemData poison} !20 = !{i32 0, %struct.DispatchSystemData poison} -!21 = !{!"function", i32 poison, !20} -!22 = !{!"function", !"void", !20, i64 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison} -!23 = !{!"function", !"void", !20, i32 poison} -!24 = !{!"function", !"void", !25} +!21 = !{%struct.DispatchSystemData poison} +!22 = !{%struct.DispatchSystemData poison} +!23 = !{%struct.DispatchSystemData poison} +!24 = !{%struct.MyParams poison} !25 = !{i32 0, %struct.MyParams poison} -!26 = !{!"function", !"void", i32 poison, %dx.types.Handle poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, !27} +!26 = !{%struct.RayPayload poison} !27 = !{i32 0, %struct.RayPayload poison} -!28 = !{!"function", !"void", i32 poison, i32 poison, !29} +!28 = !{%struct.TheirParams poison} !29 = !{i32 0, %struct.TheirParams poison} -!30 = !{!"function", !"void", i32 poison, i32 poison, !31} +!30 = !{%struct.TheirParams2 poison} !31 = !{i32 0, %struct.TheirParams2 poison} +!32 = !{i32 0, %struct.TraversalData poison} +!33 = !{%struct.TraversalData poison} diff --git a/llvmraytracing/test/dx/continuation-state.ll b/llvmraytracing/test/dx/continuation-state.ll index 1d2e413cb4..34a1082bea 100644 --- a/llvmraytracing/test/dx/continuation-state.ll +++ b/llvmraytracing/test/dx/continuation-state.ll @@ -1,7 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 3 ; RUN: opt --verify-each -passes='lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint' -S %s --lint-abort-on-error | FileCheck -check-prefix=CLEANUP %s -; RUN: opt --verify-each -passes='lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint' \ -; RUN: -S %s --lint-abort-on-error | FileCheck -check-prefix=REGISTERBUFFER %s target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:16-i32:32-i64:32-f16:16-f32:32-f64:32-v8:8-v16:16-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" @@ -11,8 +9,6 @@ declare void @await.void(%continuation.token*) declare i32 @_cont_GetContinuationStackAddr() declare %continuation.token* @async_fun() -@PAYLOAD = external addrspace(20) global [30 x i32] - define <4 x i32> @simple_await(i64 %returnAddr, <4 x i32> %arg) !continuation.registercount !1 { %tok = call %continuation.token* @async_fun(), !continuation.registercount !1, !continuation.returnedRegistercount !1 call void @await.void(%continuation.token* %tok) @@ -42,7 +38,7 @@ define void @simple_await_entry(i64 %returnAddr, <4 x i32> %arg, <4 x i32> addrs ; CLEANUP-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 1 ; CLEANUP-NEXT: store i64 [[RETURNADDR]], ptr addrspace(32) [[RETURNADDR_SPILL_ADDR]], align 4 ; CLEANUP-NEXT: [[TMP0:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @simple_await.resume.0) -; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 ptrtoint (ptr @async_fun to i64), i64 [[TMP0]]), !continuation.registercount [[META2]], !continuation.returnedRegistercount [[META2]] +; CLEANUP-NEXT: call void (...) @lgc.cps.jump(i64 ptrtoint (ptr @async_fun to i64), i32 -1, {} poison, i64 [[TMP0]]), !continuation.registercount [[META2]], !continuation.returnedRegistercount [[META2]] ; CLEANUP-NEXT: unreachable ; ; @@ -55,7 +51,7 @@ define void @simple_await_entry(i64 %returnAddr, <4 x i32> %arg, <4 x i32> addrs ; CLEANUP-NEXT: [[RETURNADDR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 1 ; CLEANUP-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(32) [[RETURNADDR_RELOAD_ADDR]], align 4 ; CLEANUP-NEXT: call void @lgc.cps.free(i32 24) -; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], i64 poison, <4 x i32> [[ARG_RELOAD]]), !continuation.registercount [[META2]] +; CLEANUP-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR_RELOAD]], i32 poison, i64 poison, <4 x i32> [[ARG_RELOAD]]), !continuation.registercount [[META2]] ; CLEANUP-NEXT: unreachable ; ; @@ -68,7 +64,7 @@ define void @simple_await_entry(i64 %returnAddr, <4 x i32> %arg, <4 x i32> addrs ; CLEANUP-NEXT: [[ARG_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_ENTRY_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 ; CLEANUP-NEXT: store <4 x i32> [[ARG]], ptr addrspace(32) [[ARG_SPILL_ADDR]], align 4 ; CLEANUP-NEXT: [[TMP0:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @simple_await_entry.resume.0) -; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 ptrtoint (ptr @async_fun to i64), i64 [[TMP0]]), !continuation.registercount [[META2]], !continuation.returnedRegistercount [[META2]] +; CLEANUP-NEXT: call void (...) @lgc.cps.jump(i64 ptrtoint (ptr @async_fun to i64), i32 -1, {} poison, i64 [[TMP0]]), !continuation.registercount [[META2]], !continuation.returnedRegistercount [[META2]] ; CLEANUP-NEXT: unreachable ; ; @@ -86,57 +82,3 @@ define void @simple_await_entry(i64 %returnAddr, <4 x i32> %arg, <4 x i32> addrs ; CLEANUP: entryresume.0.split: ; CLEANUP-NEXT: unreachable ; -; -; REGISTERBUFFER-LABEL: define void @simple_await( -; REGISTERBUFFER-SAME: i64 [[RETURNADDR:%.*]], <4 x i32> [[ARG:%.*]]) !continuation.registercount [[META2:![0-9]+]] !continuation [[META3:![0-9]+]] !continuation.stacksize [[META4:![0-9]+]] !continuation.state [[META4]] { -; REGISTERBUFFER-NEXT: AllocaSpillBB: -; REGISTERBUFFER-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 24) -; REGISTERBUFFER-NEXT: [[ARG_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 -; REGISTERBUFFER-NEXT: store <4 x i32> [[ARG]], ptr addrspace(32) [[ARG_SPILL_ADDR]], align 4 -; REGISTERBUFFER-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 1 -; REGISTERBUFFER-NEXT: store i64 [[RETURNADDR]], ptr addrspace(32) [[RETURNADDR_SPILL_ADDR]], align 4 -; REGISTERBUFFER-NEXT: [[TMP0:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @simple_await.resume.0) -; REGISTERBUFFER-NEXT: call void (i64, ...) @continuation.continue(i64 ptrtoint (ptr @async_fun to i64), i64 [[TMP0]]), !continuation.registercount [[META2]], !continuation.returnedRegistercount [[META2]] -; REGISTERBUFFER-NEXT: unreachable -; -; -; REGISTERBUFFER-LABEL: define dso_local void @simple_await.resume.0( -; REGISTERBUFFER-SAME: i64 [[TMP0:%.*]]) !continuation.registercount [[META2]] !continuation [[META3]] { -; REGISTERBUFFER-NEXT: entryresume.0: -; REGISTERBUFFER-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 24) -; REGISTERBUFFER-NEXT: [[ARG_RELOAD_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 -; REGISTERBUFFER-NEXT: [[ARG_RELOAD:%.*]] = load <4 x i32>, ptr addrspace(32) [[ARG_RELOAD_ADDR]], align 4 -; REGISTERBUFFER-NEXT: [[RETURNADDR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 1 -; REGISTERBUFFER-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(32) [[RETURNADDR_RELOAD_ADDR]], align 4 -; REGISTERBUFFER-NEXT: call void @lgc.cps.free(i32 24) -; REGISTERBUFFER-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], i64 poison, <4 x i32> [[ARG_RELOAD]]), !continuation.registercount [[META2]] -; REGISTERBUFFER-NEXT: unreachable -; -; -; REGISTERBUFFER-LABEL: define void @simple_await_entry( -; REGISTERBUFFER-SAME: i64 [[RETURNADDR:%.*]], <4 x i32> [[ARG:%.*]], ptr addrspace(1) [[MEM:%.*]]) !continuation.registercount [[META2]] !continuation.entry [[META5:![0-9]+]] !continuation [[META6:![0-9]+]] !continuation.stacksize [[META4]] !continuation.state [[META4]] { -; REGISTERBUFFER-NEXT: AllocaSpillBB: -; REGISTERBUFFER-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 24) -; REGISTERBUFFER-NEXT: [[MEM_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_ENTRY_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 1 -; REGISTERBUFFER-NEXT: store ptr addrspace(1) [[MEM]], ptr addrspace(32) [[MEM_SPILL_ADDR]], align 4 -; REGISTERBUFFER-NEXT: [[ARG_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_ENTRY_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 -; REGISTERBUFFER-NEXT: store <4 x i32> [[ARG]], ptr addrspace(32) [[ARG_SPILL_ADDR]], align 4 -; REGISTERBUFFER-NEXT: [[TMP0:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @simple_await_entry.resume.0) -; REGISTERBUFFER-NEXT: call void (i64, ...) @continuation.continue(i64 ptrtoint (ptr @async_fun to i64), i64 [[TMP0]]), !continuation.registercount [[META2]], !continuation.returnedRegistercount [[META2]] -; REGISTERBUFFER-NEXT: unreachable -; -; -; REGISTERBUFFER-LABEL: define dso_local void @simple_await_entry.resume.0( -; REGISTERBUFFER-SAME: i64 [[TMP0:%.*]]) !continuation.registercount [[META2]] !continuation [[META6]] { -; REGISTERBUFFER-NEXT: entryresume.0: -; REGISTERBUFFER-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 24) -; REGISTERBUFFER-NEXT: [[MEM_RELOAD_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_ENTRY_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 1 -; REGISTERBUFFER-NEXT: [[MEM_RELOAD:%.*]] = load ptr addrspace(1), ptr addrspace(32) [[MEM_RELOAD_ADDR]], align 4 -; REGISTERBUFFER-NEXT: [[ARG_RELOAD_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_ENTRY_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 -; REGISTERBUFFER-NEXT: [[ARG_RELOAD:%.*]] = load <4 x i32>, ptr addrspace(32) [[ARG_RELOAD_ADDR]], align 4 -; REGISTERBUFFER-NEXT: store <4 x i32> [[ARG_RELOAD]], ptr addrspace(1) [[MEM_RELOAD]], align 4 -; REGISTERBUFFER-NEXT: call void @lgc.cps.free(i32 24) -; REGISTERBUFFER-NEXT: ret void -; REGISTERBUFFER: entryresume.0.split: -; REGISTERBUFFER-NEXT: unreachable -; diff --git a/llvmraytracing/test/dx/continuation-without-await.ll b/llvmraytracing/test/dx/continuation-without-await.ll index f47ee48bee..bfbbbe1a9b 100644 --- a/llvmraytracing/test/dx/continuation-without-await.ll +++ b/llvmraytracing/test/dx/continuation-without-await.ll @@ -1,10 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 3 ; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,remove-types-metadata' -S %s --lint-abort-on-error | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE %s -; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,remove-types-metadata' \ +; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,continuations-lint,remove-types-metadata' \ ; RUN: -S %s --lint-abort-on-error | FileCheck -check-prefix=CLEANUP %s -; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,remove-types-metadata' \ -; RUN: -S %s --lint-abort-on-error | FileCheck -check-prefix=REGISTERBUFFER %s -; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' \ +; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,dxil-cont-post-process,lint,continuations-lint,remove-types-metadata' \ ; RUN: -S %s --lint-abort-on-error | FileCheck -check-prefix=POSTPROCESS %s ; @called and @main_no_call must be marked as continuation and end with a continue call to the return address @@ -30,20 +28,20 @@ declare %struct.DispatchSystemData @_AmdAwaitTraversal(i64, %struct.TraversalDat declare %struct.DispatchSystemData @_AmdAwaitShader(i64, %struct.DispatchSystemData) -declare !types !16 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData*) +declare !pointeetys !16 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData*) ; Function Attrs: nounwind memory(none) -declare !types !18 void @_AmdRestoreSystemData(%struct.DispatchSystemData*) #0 +declare !pointeetys !18 void @_AmdRestoreSystemData(%struct.DispatchSystemData*) #0 -define void @_cont_ExitRayGen(ptr nocapture readonly %data) alwaysinline nounwind !types !{!"function", !"void", !{i32 0, %struct.DispatchSystemData poison}} { +define void @_cont_ExitRayGen(ptr nocapture readonly %data) alwaysinline nounwind !pointeetys !{%struct.DispatchSystemData poison} { ret void } -define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) !types !20 { +define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) !pointeetys !20 { ret i32 5 } -define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, float %6, float %7, float %8, float %9, float %10, float %11, float %12, float %13) !types !21 { +define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, float %6, float %7, float %8, float %9, float %10, float %11, float %12, float %13) !pointeetys !21 { %dis_data = load %struct.DispatchSystemData, %struct.DispatchSystemData* %data, align 4 %sys_data = insertvalue %struct.SystemData undef, %struct.DispatchSystemData %dis_data, 0 %trav_data = insertvalue %struct.TraversalData undef, %struct.SystemData %sys_data, 0 @@ -53,7 +51,7 @@ define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i ret void } -define void @_cont_CallShader(%struct.DispatchSystemData* %data, i32 %0) !types !22 { +define void @_cont_CallShader(%struct.DispatchSystemData* %data, i32 %0) !pointeetys !22 { %dis_data = load %struct.DispatchSystemData, %struct.DispatchSystemData* %data, align 4 %newdata = call %struct.DispatchSystemData @_AmdAwaitShader(i64 2, %struct.DispatchSystemData %dis_data) store %struct.DispatchSystemData %newdata, %struct.DispatchSystemData* %data, align 4 @@ -61,6 +59,8 @@ define void @_cont_CallShader(%struct.DispatchSystemData* %data, i32 %0) !types ret void } +declare !pointeetys !28 i1 @_cont_ReportHit(%struct.TraversalData* %data, float %t, i32 %hitKind) + define void @main() { %params = alloca %struct.TheirParams, align 4 store %struct.TheirParams zeroinitializer, %struct.TheirParams* %params, align 4 @@ -72,7 +72,7 @@ define void @main_no_call() { ret void } -define void @called(%struct.MyParams* %arg) !types !23 { +define void @called(%struct.MyParams* %arg) !pointeetys !23 { ret void } @@ -83,7 +83,7 @@ declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types. declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) #1 ; Function Attrs: nounwind -declare !types !25 void @dx.op.callShader.struct.TheirParams(i32, i32, %struct.TheirParams*) #2 +declare !pointeetys !25 void @dx.op.callShader.struct.TheirParams(i32, i32, %struct.TheirParams*) #2 attributes #0 = { nounwind memory(none) } attributes #1 = { nounwind memory(read) } @@ -111,17 +111,19 @@ attributes #2 = { nounwind } !13 = !{void ()* @main_no_call, !"main_no_call", null, null, !7} !14 = !{void (%struct.MyParams*)* @called, !"called", null, null, !15} !15 = !{i32 8, i32 12} -!16 = !{!"function", %struct.BuiltInTriangleIntersectionAttributes poison, !17} +!16 = !{%struct.SystemData poison} !17 = !{i32 0, %struct.SystemData poison} -!18 = !{!"function", !"void", !19} +!18 = !{%struct.DispatchSystemData poison} !19 = !{i32 0, %struct.DispatchSystemData poison} -!20 = !{!"function", i32 poison, !19} -!21 = !{!"function", !"void", !19, i64 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison} -!22 = !{!"function", !"void", !19, i32 poison} -!23 = !{!"function", !"void", !24} +!20 = !{%struct.DispatchSystemData poison} +!21 = !{%struct.DispatchSystemData poison} +!22 = !{%struct.DispatchSystemData poison} +!23 = !{%struct.MyParams poison} !24 = !{i32 0, %struct.MyParams poison} -!25 = !{!"function", !"void", i32 poison, i32 poison, !26} +!25 = !{%struct.TheirParams poison} !26 = !{i32 0, %struct.TheirParams poison} +!27 = !{i32 0, %struct.TraversalData poison} +!28 = !{%struct.TraversalData poison} ; LOWERRAYTRACINGPIPELINE-LABEL: define i32 @_cont_GetLocalRootIndex( ; LOWERRAYTRACINGPIPELINE-SAME: ptr [[DATA:%.*]]) { ; LOWERRAYTRACINGPIPELINE-NEXT: ret i32 5 @@ -131,20 +133,25 @@ attributes #2 = { nounwind } ; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META8:![0-9]+]] !continuation.entry [[META19:![0-9]+]] !continuation.registercount [[META8]] !continuation [[META20:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[PARAMS:%.*]] = alloca [[STRUCT_THEIRPARAMS:%.*]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [1 x i32], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_THEIRPARAMS]] zeroinitializer, ptr [[PARAMS]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[DIS_DATA_I:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_THEIRPARAMS]], ptr [[PARAMS]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP5]], ptr addrspace(20) @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = call ptr inttoptr (i64 2 to ptr)([[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I]]), !continuation.registercount [[META21:![0-9]+]], !continuation.returnedRegistercount [[META21]] -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] [[AWAIT:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP4]]) -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_THEIRPARAMS]] poison, ptr [[PARAMS]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_THEIRPARAMS]], ptr [[PARAMS]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP11]], ptr [[TMP8]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP7]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP8]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP3]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = load [1 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = call ptr inttoptr (i64 2 to ptr)([[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I]], [9 x i32] poison, [1 x i32] [[TMP4]]), !continuation.registercount [[META21:![0-9]+]], !continuation.returnedRegistercount [[META21]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = call { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [1 x i32] } @await(ptr [[TMP5]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [1 x i32] } [[TMP6]], 2 +; LOWERRAYTRACINGPIPELINE-NEXT: store [1 x i32] [[TMP11]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_THEIRPARAMS]] poison, ptr [[PARAMS]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_THEIRPARAMS]], ptr [[PARAMS]], i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP17]], ptr [[TMP15]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [1 x i32] } [[TMP6]], 0 +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP18]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; LOWERRAYTRACINGPIPELINE-NEXT: ret void ; @@ -152,37 +159,45 @@ attributes #2 = { nounwind } ; LOWERRAYTRACINGPIPELINE-LABEL: define void @main_no_call( ; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META8]] !continuation.entry [[META19]] !continuation.registercount [[META8]] !continuation [[META22:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [0 x i32], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; LOWERRAYTRACINGPIPELINE-NEXT: ret void ; ; ; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.DispatchSystemData @called( -; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META23:![0-9]+]] !continuation.registercount [[META17:![0-9]+]] !continuation [[META24:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]], [8 x i32] [[PADDING:%.*]], [3 x i32] [[PAYLOAD:%.*]]) !lgc.rt.shaderstage [[META23:![0-9]+]] !continuation.registercount [[META17:![0-9]+]] !continuation [[META24:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [3 x i32], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_MYPARAMS:%.*]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: store [3 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_MYPARAMS]], ptr [[TMP2]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP6]], ptr [[TMP3]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 1), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP8]], ptr [[TMP7]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 2), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP10]], ptr [[TMP9]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_MYPARAMS]], ptr [[TMP2]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP11]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP14]], ptr addrspace(20) @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP16]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 1), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP18]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 2), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP19]]), !continuation.registercount [[META17]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP4]], ptr [[TMP11]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP7]], ptr [[TMP5]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP14]], ptr [[TMP12]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_MYPARAMS]], ptr [[TMP2]], i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP20]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP23]], ptr [[TMP21]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP26]], ptr [[TMP24]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP28:%.*]] = load [3 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP27]], [8 x i32] poison, [3 x i32] [[TMP28]]), !continuation.registercount [[META17]] ; LOWERRAYTRACINGPIPELINE-NEXT: unreachable ; ; @@ -194,20 +209,22 @@ attributes #2 = { nounwind } ; CLEANUP-LABEL: define void @main( ; CLEANUP-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META8:![0-9]+]] !continuation.entry [[META19:![0-9]+]] !continuation.registercount [[META8]] !continuation [[META20:![0-9]+]] !continuation.state [[META8]] { ; CLEANUP-NEXT: AllocaSpillBB: -; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 +; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT3:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 ; CLEANUP-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) -; CLEANUP-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 -; CLEANUP-NEXT: store i32 0, ptr addrspace(20) @PAYLOAD, align 4 +; CLEANUP-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT3]], 0 +; CLEANUP-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [1 x i32] poison, i32 0, 0 ; CLEANUP-NEXT: [[TMP1:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @main.resume.0) -; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 2, i64 [[TMP1]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]]), !continuation.registercount [[META21:![0-9]+]], !continuation.returnedRegistercount [[META21]] +; CLEANUP-NEXT: call void (...) @lgc.cps.jump(i64 2, i32 -1, {} poison, i64 [[TMP1]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]], [9 x i32] poison, [1 x i32] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META21:![0-9]+]], !continuation.returnedRegistercount [[META21]] ; CLEANUP-NEXT: unreachable ; ; ; CLEANUP-LABEL: define dso_local void @main.resume.0( -; CLEANUP-SAME: i64 [[TMP0:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP1:%.*]]) !lgc.rt.shaderstage [[META8]] !continuation.registercount [[META21]] !continuation [[META20]] { +; CLEANUP-SAME: i64 [[TMP0:%.*]], { [[STRUCT_DISPATCHSYSTEMDATA:%.*]], [8 x i32], [1 x i32] } [[TMP1:%.*]]) !lgc.rt.shaderstage [[META8]] !continuation.registercount [[META21]] !continuation [[META20]] { ; CLEANUP-NEXT: entryresume.0: -; CLEANUP-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 -; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT1:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP1]], 0 +; CLEANUP-NEXT: [[TMP3:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [1 x i32] } [[TMP1]], 2 +; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [1 x i32] [[TMP3]], 0 +; CLEANUP-NEXT: [[TMP2:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [1 x i32] } [[TMP1]], 0 +; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT4:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP2]], 0 ; CLEANUP-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; CLEANUP-NEXT: ret void ; CLEANUP: entryresume.0.split: @@ -225,102 +242,50 @@ attributes #2 = { nounwind } ; ; ; CLEANUP-LABEL: define void @called( -; CLEANUP-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META23:![0-9]+]] !continuation.registercount [[META17:![0-9]+]] !continuation [[META24:![0-9]+]] !continuation.state [[META8]] { +; CLEANUP-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]], [8 x i32] [[PADDING:%.*]], [3 x i32] [[PAYLOAD:%.*]]) !lgc.rt.shaderstage [[META23:![0-9]+]] !continuation.registercount [[META17:![0-9]+]] !continuation [[META24:![0-9]+]] !continuation.state [[META8]] { ; CLEANUP-NEXT: AllocaSpillBB: +; CLEANUP-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i32] [[PAYLOAD]], 0 +; CLEANUP-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i32] [[PAYLOAD]], 1 +; CLEANUP-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i32] [[PAYLOAD]], 2 ; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 -; CLEANUP-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 -; CLEANUP-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 1), align 4 -; CLEANUP-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 2), align 4 ; CLEANUP-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; CLEANUP-NEXT: store i32 [[TMP1]], ptr addrspace(20) @PAYLOAD, align 4 -; CLEANUP-NEXT: store i32 [[TMP2]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 1), align 4 -; CLEANUP-NEXT: store i32 [[TMP3]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 2), align 4 -; CLEANUP-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 -; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i64 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META17]] +; CLEANUP-NEXT: [[DOTFCA_0_INSERT5:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 +; CLEANUP-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [3 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; CLEANUP-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [3 x i32] [[DOTFCA_0_INSERT]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; CLEANUP-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [3 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; CLEANUP-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR]], i32 poison, i64 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT5]], [8 x i32] poison, [3 x i32] [[DOTFCA_2_INSERT]]), !continuation.registercount [[META17]] ; CLEANUP-NEXT: unreachable ; ; -; REGISTERBUFFER-LABEL: define i32 @_cont_GetLocalRootIndex( -; REGISTERBUFFER-SAME: ptr [[DATA:%.*]]) { -; REGISTERBUFFER-NEXT: ret i32 5 -; -; -; REGISTERBUFFER-LABEL: define void @main( -; REGISTERBUFFER-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META8:![0-9]+]] !continuation.entry [[META19:![0-9]+]] !continuation.registercount [[META8]] !continuation [[META20:![0-9]+]] !continuation.state [[META8]] { -; REGISTERBUFFER-NEXT: AllocaSpillBB: -; REGISTERBUFFER-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 -; REGISTERBUFFER-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) -; REGISTERBUFFER-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 -; REGISTERBUFFER-NEXT: store i32 0, ptr addrspace(20) @PAYLOAD, align 4 -; REGISTERBUFFER-NEXT: [[TMP1:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @main.resume.0) -; REGISTERBUFFER-NEXT: call void (i64, ...) @continuation.continue(i64 2, i64 [[TMP1]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]]), !continuation.registercount [[META21:![0-9]+]], !continuation.returnedRegistercount [[META21]] -; REGISTERBUFFER-NEXT: unreachable -; -; -; REGISTERBUFFER-LABEL: define dso_local void @main.resume.0( -; REGISTERBUFFER-SAME: i64 [[TMP0:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP1:%.*]]) !lgc.rt.shaderstage [[META8]] !continuation.registercount [[META21]] !continuation [[META20]] { -; REGISTERBUFFER-NEXT: entryresume.0: -; REGISTERBUFFER-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 -; REGISTERBUFFER-NEXT: [[DOTFCA_0_EXTRACT1:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP1]], 0 -; REGISTERBUFFER-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) -; REGISTERBUFFER-NEXT: ret void -; REGISTERBUFFER: entryresume.0.split: -; REGISTERBUFFER-NEXT: unreachable -; -; -; REGISTERBUFFER-LABEL: define void @main_no_call( -; REGISTERBUFFER-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META8]] !continuation.entry [[META19]] !continuation.registercount [[META8]] !continuation [[META22:![0-9]+]] !continuation.state [[META8]] { -; REGISTERBUFFER-NEXT: AllocaSpillBB: -; REGISTERBUFFER-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 -; REGISTERBUFFER-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) -; REGISTERBUFFER-NEXT: ret void -; REGISTERBUFFER: AllocaSpillBB.split: -; REGISTERBUFFER-NEXT: unreachable -; -; -; REGISTERBUFFER-LABEL: define void @called( -; REGISTERBUFFER-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META23:![0-9]+]] !continuation.registercount [[META17:![0-9]+]] !continuation [[META24:![0-9]+]] !continuation.state [[META8]] { -; REGISTERBUFFER-NEXT: AllocaSpillBB: -; REGISTERBUFFER-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 -; REGISTERBUFFER-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 -; REGISTERBUFFER-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 1), align 4 -; REGISTERBUFFER-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 2), align 4 -; REGISTERBUFFER-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; REGISTERBUFFER-NEXT: store i32 [[TMP1]], ptr addrspace(20) @PAYLOAD, align 4 -; REGISTERBUFFER-NEXT: store i32 [[TMP2]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 1), align 4 -; REGISTERBUFFER-NEXT: store i32 [[TMP3]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 2), align 4 -; REGISTERBUFFER-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 -; REGISTERBUFFER-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i64 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META17]] -; REGISTERBUFFER-NEXT: unreachable -; -; ; POSTPROCESS-LABEL: define i32 @_cont_GetLocalRootIndex( ; POSTPROCESS-SAME: ptr [[DATA:%.*]]) { ; POSTPROCESS-NEXT: ret i32 5 ; ; ; POSTPROCESS-LABEL: define void @main( -; POSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META8:![0-9]+]] !continuation.entry [[META19:![0-9]+]] !continuation.registercount [[META8]] !continuation [[META20:![0-9]+]] !continuation.state [[META8]] { +; POSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META8:![0-9]+]] !continuation.entry [[META19:![0-9]+]] !continuation [[META20:![0-9]+]] { ; POSTPROCESS-NEXT: AllocaSpillBB: ; POSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; POSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 -; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT3:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 ; POSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) -; POSTPROCESS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 -; POSTPROCESS-NEXT: store i32 0, ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT3]], 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [1 x i32] poison, i32 0, 0 ; POSTPROCESS-NEXT: [[TMP2:%.*]] = call i64 @continuation.getAddrAndMD(ptr @main.resume.0) ; POSTPROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 2, i32 [[TMP1]], i64 [[TMP2]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]]), !continuation.registercount [[META21:![0-9]+]], !continuation.returnedRegistercount [[META21]] +; POSTPROCESS-NEXT: call void (...) @lgc.ilcps.continue(i64 2, i32 [[TMP1]], i64 [[TMP2]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]], [9 x i32] poison, [1 x i32] [[DOTFCA_0_INSERT]]) ; POSTPROCESS-NEXT: unreachable ; ; ; POSTPROCESS-LABEL: define dso_local void @main.resume.0( -; POSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[TMP0:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP1:%.*]]) !lgc.rt.shaderstage [[META8]] !continuation.registercount [[META21]] !continuation [[META20]] { +; POSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[TMP0:%.*]], { [[STRUCT_DISPATCHSYSTEMDATA:%.*]], [8 x i32], [1 x i32] } [[TMP1:%.*]]) !lgc.rt.shaderstage [[META8]] !continuation [[META20]] { ; POSTPROCESS-NEXT: entryresume.0: ; POSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; POSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 -; POSTPROCESS-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT1:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP1]], 0 +; POSTPROCESS-NEXT: [[TMP3:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [1 x i32] } [[TMP1]], 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [1 x i32] [[TMP3]], 0 +; POSTPROCESS-NEXT: [[TMP2:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [1 x i32] } [[TMP1]], 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT4:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP2]], 0 ; POSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; POSTPROCESS-NEXT: ret void ; POSTPROCESS: entryresume.0.split: @@ -328,7 +293,7 @@ attributes #2 = { nounwind } ; ; ; POSTPROCESS-LABEL: define void @main_no_call( -; POSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META8]] !continuation.entry [[META19]] !continuation.registercount [[META8]] !continuation [[META22:![0-9]+]] !continuation.state [[META8]] { +; POSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META8]] !continuation.entry [[META19]] !continuation [[META21:![0-9]+]] { ; POSTPROCESS-NEXT: AllocaSpillBB: ; POSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; POSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 @@ -340,20 +305,20 @@ attributes #2 = { nounwind } ; ; ; POSTPROCESS-LABEL: define void @called( -; POSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META23:![0-9]+]] !continuation.registercount [[META17:![0-9]+]] !continuation [[META24:![0-9]+]] !continuation.state [[META8]] { +; POSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]], [8 x i32] [[PADDING:%.*]], [3 x i32] [[PAYLOAD:%.*]]) !lgc.rt.shaderstage [[META22:![0-9]+]] !continuation [[META23:![0-9]+]] { ; POSTPROCESS-NEXT: AllocaSpillBB: ; POSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; POSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [3 x i32] [[PAYLOAD]], 0 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [3 x i32] [[PAYLOAD]], 1 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [3 x i32] [[PAYLOAD]], 2 ; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 -; POSTPROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POSTPROCESS-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 1), align 4 -; POSTPROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 2), align 4 ; POSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; POSTPROCESS-NEXT: store i32 [[TMP1]], ptr addrspace(20) @REGISTERS, align 4 -; POSTPROCESS-NEXT: store i32 [[TMP2]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 1), align 4 -; POSTPROCESS-NEXT: store i32 [[TMP3]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 2), align 4 -; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 -; POSTPROCESS-NEXT: [[TMP4:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP4]], i64 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META17]] +; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT5:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [3 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; POSTPROCESS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [3 x i32] [[DOTFCA_0_INSERT]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; POSTPROCESS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [3 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; POSTPROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR]], i32 [[TMP1]], i64 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT5]], [8 x i32] poison, [3 x i32] [[DOTFCA_2_INSERT]]) ; POSTPROCESS-NEXT: unreachable ; diff --git a/llvmraytracing/test/dx/dxil-cont-convert-lgc-rt-op-trace-payload-type.ll b/llvmraytracing/test/dx/dxil-cont-convert-lgc-rt-op-trace-payload-type.ll index 415975625d..2c98a897b3 100644 --- a/llvmraytracing/test/dx/dxil-cont-convert-lgc-rt-op-trace-payload-type.ll +++ b/llvmraytracing/test/dx/dxil-cont-convert-lgc-rt-op-trace-payload-type.ll @@ -21,8 +21,8 @@ target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16: %struct.BuiltInTriangleIntersectionAttributes = type { <2 x float> } ; Function Attrs: nounwind -declare !types !39 void @dx.op.traceRay.struct.RayPayload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.RayPayload*) #0 -declare !types !49 void @dx.op.traceRay.struct.RayPayload2(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.RayPayload2*) #0 +declare !pointeetys !39 void @dx.op.traceRay.struct.RayPayload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.RayPayload*) #0 +declare !pointeetys !49 void @dx.op.traceRay.struct.RayPayload2(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.RayPayload2*) #0 ; Function Attrs: nounwind readnone declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #1 @@ -31,14 +31,14 @@ declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types. declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) #2 ; Function Attrs: nounwind -declare !types !40 void @dx.op.callShader.struct.TheirParams(i32, i32, %struct.TheirParams*) #0 -declare !types !41 void @dx.op.callShader.struct.TheirParams2(i32, i32, %struct.TheirParams2*) #0 +declare !pointeetys !40 void @dx.op.callShader.struct.TheirParams(i32, i32, %struct.TheirParams*) #0 +declare !pointeetys !41 void @dx.op.callShader.struct.TheirParams2(i32, i32, %struct.TheirParams2*) #0 declare float @dx.op.rayTCurrent.f32(i32) #1 declare float @dx.op.rayTMin.f32(i32) #2 declare i32 @dx.op.hitKind.i32(i32) #2 declare i32 @dx.op.instanceID.i32(i32) #2 -declare !types !42 i1 @dx.op.reportHit.struct.BuiltInTriangleIntersectionAttributes(i32, float, i32, %struct.BuiltInTriangleIntersectionAttributes*) #5 +declare !pointeetys !42 i1 @dx.op.reportHit.struct.BuiltInTriangleIntersectionAttributes(i32, float, i32, %struct.BuiltInTriangleIntersectionAttributes*) #5 define void @main() { ; PAYLOADTYPE-LABEL: define void @main @@ -89,7 +89,7 @@ define void @mainTrace() { ret void } -define void @called(%struct.MyParams* %arg) !types !38 { +define void @called(%struct.MyParams* %arg) !pointeetys !38 { ; PAYLOADTYPE3-LABEL: define void @called ; PAYLOADTYPE3: call void (...) @lgc.rt.call.callable.shader(i32 2, %struct.TheirParams2* %{{.*}}, i32 260), !cont.payload.type ![[call_callable_shader_payload_type:[0-9]+]] ; PAYLOADTYPE3: ![[call_callable_shader_payload_type]] = !{%struct.TheirParams2 poison} @@ -144,15 +144,15 @@ attributes #2 = { nounwind readonly } !35 = !{i32 8, i32 12} !36 = !{void ()* @mainTrace, !"mainTrace", null, null, !37} !37 = !{i32 8, i32 7} -!38 = !{!"function", !"void", !43} -!39 = !{!"function", !"void", i32 poison, %dx.types.Handle poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, !44} -!40 = !{!"function", !"void", i32 poison, i32 poison, !45} -!41 = !{!"function", !"void", i32 poison, i32 poison, !46} -!42 = !{!"function", !"void", i32 poison, float poison, i32 poison, !47} +!38 = !{%struct.MyParams poison} +!39 = !{%struct.RayPayload poison} +!40 = !{%struct.TheirParams poison} +!41 = !{%struct.TheirParams2 poison} +!42 = !{%struct.BuiltInTriangleIntersectionAttributes poison} !43 = !{i32 0, %struct.MyParams poison} !44 = !{i32 0, %struct.RayPayload poison} !45 = !{i32 0, %struct.TheirParams poison} !46 = !{i32 0, %struct.TheirParams2 poison} !47 = !{i32 0, %struct.BuiltInTriangleIntersectionAttributes poison} !48 = !{i32 0, %struct.RayPayload2 poison} -!49 = !{!"function", !"void", i32 poison, %dx.types.Handle poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, !48} +!49 = !{%struct.RayPayload2 poison} diff --git a/llvmraytracing/test/dx/dxil-cont-convert-lgc-rt-op-trace.ll b/llvmraytracing/test/dx/dxil-cont-convert-lgc-rt-op-trace.ll index a704ebc222..9b0c645bf1 100644 --- a/llvmraytracing/test/dx/dxil-cont-convert-lgc-rt-op-trace.ll +++ b/llvmraytracing/test/dx/dxil-cont-convert-lgc-rt-op-trace.ll @@ -19,7 +19,7 @@ target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16: %struct.BuiltInTriangleIntersectionAttributes = type { <2 x float> } ; Function Attrs: nounwind -declare !types !39 void @dx.op.traceRay.struct.RayPayload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.RayPayload*) #0 +declare !pointeetys !39 void @dx.op.traceRay.struct.RayPayload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.RayPayload*) #0 ; Function Attrs: nounwind readnone declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #1 @@ -28,14 +28,14 @@ declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types. declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) #2 ; Function Attrs: nounwind -declare !types !40 void @dx.op.callShader.struct.TheirParams(i32, i32, %struct.TheirParams*) #0 -declare !types !41 void @dx.op.callShader.struct.TheirParams2(i32, i32, %struct.TheirParams2*) #0 +declare !pointeetys !40 void @dx.op.callShader.struct.TheirParams(i32, i32, %struct.TheirParams*) #0 +declare !pointeetys !41 void @dx.op.callShader.struct.TheirParams2(i32, i32, %struct.TheirParams2*) #0 declare float @dx.op.rayTCurrent.f32(i32) #1 declare float @dx.op.rayTMin.f32(i32) #2 declare i32 @dx.op.hitKind.i32(i32) #2 declare i32 @dx.op.instanceID.i32(i32) #2 -declare !types !42 i1 @dx.op.reportHit.struct.BuiltInTriangleIntersectionAttributes(i32, float, i32, %struct.BuiltInTriangleIntersectionAttributes*) #5 +declare !pointeetys !42 i1 @dx.op.reportHit.struct.BuiltInTriangleIntersectionAttributes(i32, float, i32, %struct.BuiltInTriangleIntersectionAttributes*) #5 ; Function Attrs: nounwind define void @Intersection() #0 { @@ -97,9 +97,9 @@ define void @mainTrace() { ret void } -define void @called(%struct.MyParams* %arg) !types !38 { +define void @called(%struct.MyParams* %arg) !pointeetys !38 { ; CHECK-LABEL: define void @called( -; CHECK-SAME: ptr [[ARG:%.*]]) !types [[META28:![0-9]+]] !lgc.rt.shaderstage [[META30:![0-9]+]] !cont.payload.type [[META31:![0-9]+]] { +; CHECK-SAME: ptr [[ARG:%.*]]) !pointeetys [[META28:![0-9]+]] !lgc.rt.shaderstage [[META30:![0-9]+]] !cont.payload.type [[META31:![0-9]+]] { ; CHECK-NEXT: [[PARAMS:%.*]] = alloca [[STRUCT_THEIRPARAMS2:%.*]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @lgc.rt.shader.index() ; CHECK-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[TMP1]]) @@ -153,11 +153,11 @@ attributes #2 = { nounwind readonly } !35 = !{i32 8, i32 12} !36 = !{void ()* @mainTrace, !"mainTrace", null, null, !37} !37 = !{i32 8, i32 7} -!38 = !{!"function", !"void", !43} -!39 = !{!"function", !"void", i32 poison, %dx.types.Handle poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, !44} -!40 = !{!"function", !"void", i32 poison, i32 poison, !45} -!41 = !{!"function", !"void", i32 poison, i32 poison, !46} -!42 = !{!"function", !"void", i32 poison, float poison, i32 poison, !47} +!38 = !{%struct.MyParams poison} +!39 = !{%struct.RayPayload poison} +!40 = !{%struct.TheirParams poison} +!41 = !{%struct.TheirParams2 poison} +!42 = !{%struct.BuiltInTriangleIntersectionAttributes poison} !43 = !{i32 0, %struct.MyParams poison} !44 = !{i32 0, %struct.RayPayload poison} !45 = !{i32 0, %struct.TheirParams poison} diff --git a/llvmraytracing/test/dx/dxil-cont-convert-lgc-rt-op.ll b/llvmraytracing/test/dx/dxil-cont-convert-lgc-rt-op.ll index 17f7e087f8..ae33f1adc6 100644 --- a/llvmraytracing/test/dx/dxil-cont-convert-lgc-rt-op.ll +++ b/llvmraytracing/test/dx/dxil-cont-convert-lgc-rt-op.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function ClosestHit --version 3 -; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,remove-types-metadata' -S %s --lint-abort-on-error | FileCheck %s +; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,continuations-lint,remove-types-metadata' -S %s --lint-abort-on-error | FileCheck %s target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:16-i32:32-i64:32-f16:16-f32:32-f64:32-v8:8-v16:16-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" @@ -16,7 +16,7 @@ target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16: @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A" = external constant %dx.types.Handle, align 4 ; Function Attrs: nounwind -define void @ClosestHit(%struct.RayPayload*, %struct.BuiltInTriangleIntersectionAttributes*) #0 !types !31 { +define void @ClosestHit(%struct.RayPayload*, %struct.BuiltInTriangleIntersectionAttributes*) #0 !pointeetys !31 { ; CHECK-LABEL: define void @ClosestHit( ; CHECK-SAME: ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] !cont.payload.type [[META19:![0-9]+]] !lgc.rt.shaderstage [[META20:![0-9]+]] { ; CHECK-NEXT: [[TMP3:%.*]] = alloca [4 x <3 x float>], align 4 @@ -134,6 +134,6 @@ attributes #3 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no- !22 = !{i32 0} !29 = !{void (%struct.RayPayload*, %struct.BuiltInTriangleIntersectionAttributes*)* @ClosestHit, !"ClosestHit", null, null, !30} !30 = !{i32 8, i32 10, i32 5, !22} -!31 = !{!"function", !"void", !32, !33} +!31 = !{null, %struct.RayPayload poison, %struct.BuiltInTriangleIntersectionAttributes poison} !32 = !{i32 0, %struct.RayPayload poison} !33 = !{i32 0, %struct.BuiltInTriangleIntersectionAttributes poison} diff --git a/llvmraytracing/test/dx/dxil-cont-intrinsic-prepare.ll b/llvmraytracing/test/dx/dxil-cont-intrinsic-prepare.ll index e8bd6d7720..7082c1bd07 100644 --- a/llvmraytracing/test/dx/dxil-cont-intrinsic-prepare.ll +++ b/llvmraytracing/test/dx/dxil-cont-intrinsic-prepare.ll @@ -13,23 +13,7 @@ define i32 @_cont_GetContinuationStackAddr() #0 { } ; Function Attrs: nounwind -define void @_cont_SetupRayGen(%struct.DispatchSystemData* noalias nocapture sret(%struct.DispatchSystemData) %agg.result) #1 !types !0 { - %1 = getelementptr inbounds %struct.DispatchSystemData, %struct.DispatchSystemData* %agg.result, i32 0, i32 0 - store i32 2, i32* %1, align 4 - %l = load i32, i32* %1, align 4 - %c = icmp eq i32 %l, 3 - br i1 %c, label %complete, label %end - -complete: ; preds = %0 - call void @_AmdComplete() #3 - br label %end - -end: ; preds = %complete, %0 - ret void -} - -; Function Attrs: nounwind -define void @_cont_TraceRay(%struct.DispatchSystemData* noalias nocapture sret(%struct.DispatchSystemData) %agg.result, %struct.DispatchSystemData* nocapture readonly %data, i64 %accelStruct, i32 %rayFlags, i32 %instanceInclusioMask, i32 %rayContributionToHitGroupIndex, i32 %multiplierForGeometryContributionToShaderIndex, i32 %missShaderIndex, float %originX, float %originY, float %originZ, float %tMin, float %dirX, float %dirY, float %dirZ, float %tMax) #1 !types !2 { +define void @_cont_TraceRay(%struct.DispatchSystemData* noalias nocapture sret(%struct.DispatchSystemData) %agg.result, %struct.DispatchSystemData* nocapture readonly %data, i64 %accelStruct, i32 %rayFlags, i32 %instanceInclusioMask, i32 %rayContributionToHitGroupIndex, i32 %multiplierForGeometryContributionToShaderIndex, i32 %missShaderIndex, float %originX, float %originY, float %originZ, float %tMin, float %dirX, float %dirY, float %dirZ, float %tMax) #1 !pointeetys !2 { %1 = alloca %struct.TraversalData, align 4 %2 = alloca %struct.DispatchSystemData, align 4 %3 = getelementptr inbounds %struct.DispatchSystemData, %struct.DispatchSystemData* %data, i32 0, i32 0 @@ -50,7 +34,7 @@ define void @_cont_TraceRay(%struct.DispatchSystemData* noalias nocapture sret(% ret void } -declare !types !3 void @"\01?_AmdAwait@@YA?AUDispatchSystemData@@UTraversalData@@@Z"(%struct.DispatchSystemData* sret(%struct.DispatchSystemData), i64, %struct.TraversalData*) #2 +declare !pointeetys !3 void @"\01?_AmdAwait@@YA?AUDispatchSystemData@@UTraversalData@@@Z"(%struct.DispatchSystemData* sret(%struct.DispatchSystemData), i64, %struct.TraversalData*) #2 ; Function Attrs: nounwind declare i64 @_AmdGetResumePointAddr() #3 @@ -59,10 +43,10 @@ declare i64 @_AmdGetResumePointAddr() #3 declare void @_AmdComplete() #3 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) -declare !types !5 void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #4 +declare !pointeetys !5 void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #4 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) -declare !types !5 void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #4 +declare !pointeetys !5 void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #4 attributes #0 = { nounwind memory(none) "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="0" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="0" "unsafe-fp-math"="false" "use-soft-float"="false" } @@ -70,43 +54,27 @@ attributes #2 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no- attributes #3 = { nounwind } attributes #4 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } -!0 = !{!"function", !"void", !1} +!0 = !{%struct.DispatchSystemData poison} !1 = !{i32 0, %struct.DispatchSystemData poison} -!2 = !{!"function", !"void", !1, !1, i64 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison} -!3 = !{!"function", !"void", !1, i64 poison, !4} +!2 = !{null, %struct.DispatchSystemData poison, %struct.DispatchSystemData poison} +!3 = !{null, %struct.DispatchSystemData poison, null, %struct.TraversalData poison} !4 = !{i32 0, %struct.TraversalData poison} -!5 = !{!"function", !"void", i64 poison, !6} +!5 = !{i8 poison} !6 = !{i32 0, i8 poison} ; CHECK-LABEL: define i32 @_cont_GetContinuationStackAddr( ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: ret i32 1 ; ; -; CHECK-LABEL: define %struct.DispatchSystemData @_cont_SetupRayGen( -; CHECK-SAME: ) #[[ATTR1:[0-9]+]] { -; CHECK-NEXT: [[TMP1:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA:%.*]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP1]], i32 0, i32 0 -; CHECK-NEXT: store i32 2, ptr [[TMP2]], align 4 -; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[TMP2]], align 4 -; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[L]], 3 -; CHECK-NEXT: br i1 [[C]], label [[COMPLETE:%.*]], label [[END:%.*]] -; CHECK: complete: -; CHECK-NEXT: call void @_AmdComplete() #[[ATTR5:[0-9]+]] -; CHECK-NEXT: br label [[END]] -; CHECK: end: -; CHECK-NEXT: [[TMP3:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP1]], align 4 -; CHECK-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP3]] -; -; ; CHECK-LABEL: define %struct.DispatchSystemData @_cont_TraceRay( -; CHECK-SAME: ptr nocapture readonly [[DATA:%.*]], i64 [[ACCELSTRUCT:%.*]], i32 [[RAYFLAGS:%.*]], i32 [[INSTANCEINCLUSIOMASK:%.*]], i32 [[RAYCONTRIBUTIONTOHITGROUPINDEX:%.*]], i32 [[MULTIPLIERFORGEOMETRYCONTRIBUTIONTOSHADERINDEX:%.*]], i32 [[MISSSHADERINDEX:%.*]], float [[ORIGINX:%.*]], float [[ORIGINY:%.*]], float [[ORIGINZ:%.*]], float [[TMIN:%.*]], float [[DIRX:%.*]], float [[DIRY:%.*]], float [[DIRZ:%.*]], float [[TMAX:%.*]]) #[[ATTR1]] !types [[META0:![0-9]+]] { +; CHECK-SAME: ptr nocapture readonly [[DATA:%.*]], i64 [[ACCELSTRUCT:%.*]], i32 [[RAYFLAGS:%.*]], i32 [[INSTANCEINCLUSIOMASK:%.*]], i32 [[RAYCONTRIBUTIONTOHITGROUPINDEX:%.*]], i32 [[MULTIPLIERFORGEOMETRYCONTRIBUTIONTOSHADERINDEX:%.*]], i32 [[MISSSHADERINDEX:%.*]], float [[ORIGINX:%.*]], float [[ORIGINY:%.*]], float [[ORIGINZ:%.*]], float [[TMIN:%.*]], float [[DIRX:%.*]], float [[DIRY:%.*]], float [[DIRZ:%.*]], float [[TMAX:%.*]]) #[[ATTR1:[0-9]+]] !pointeetys [[META0:![0-9]+]] { ; CHECK-NEXT: [[TMP1:%.*]] = alloca [[STRUCT_TRAVERSALDATA:%.*]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA:%.*]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[DATA]], i32 0, i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast ptr [[TMP1]] to ptr -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[TMP6]]) #[[ATTR5]] +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[TMP6]]) #[[ATTR5:[0-9]+]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[TMP1]], i32 0, i32 0, i32 0, i32 0 ; CHECK-NEXT: store i32 [[TMP5]], ptr [[TMP7]], align 4 ; CHECK-NEXT: [[ADDR:%.*]] = call i64 @_AmdGetResumePointAddr() #[[ATTR5]] diff --git a/llvmraytracing/test/dx/dxil-cont-post-process.ll b/llvmraytracing/test/dx/dxil-cont-post-process.ll index f1df1da558..dc309866fd 100644 --- a/llvmraytracing/test/dx/dxil-cont-post-process.ll +++ b/llvmraytracing/test/dx/dxil-cont-post-process.ll @@ -11,31 +11,6 @@ declare void @_AmdComplete() #0 declare i32 @_cont_GetContinuationStackAddr() declare i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData*) -; Function Attrs: nounwind -define %struct.DispatchSystemData @_cont_SetupRayGen() { -; CHECK-LABEL: define %struct.DispatchSystemData @_cont_SetupRayGen() { -; CHECK-NEXT: [[DATA:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA:%.*]] undef, i32 0, 0 -; CHECK-NEXT: [[C:%.*]] = load i1, ptr @debug_global, align 1 -; CHECK-NEXT: br i1 [[C]], label [[COMPLETE:%.*]], label [[END:%.*]] -; CHECK: complete: -; CHECK-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] poison -; CHECK: complete.split: -; CHECK-NEXT: br label [[END]] -; CHECK: end: -; CHECK-NEXT: ret [[STRUCT_DISPATCHSYSTEMDATA]] [[DATA]] -; - %data = insertvalue %struct.DispatchSystemData undef, i32 0, 0 - %c = load i1, ptr @debug_global, align 1 - br i1 %c, label %complete, label %end - -complete: ; preds = %0 - call void @_AmdComplete() #3 - br label %end - -end: ; preds = %complete, %0 - ret %struct.DispatchSystemData %data -} - define void @RayGen(i64 %dummyRetAddr, %struct.DispatchSystemData %0) !lgc.rt.shaderstage !5 !continuation.entry !0 !continuation !3 { ; CHECK-LABEL: define void @RayGen( ; CHECK-SAME: i32 [[CSPINIT:%.*]], i64 [[DUMMYRETADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META3:![0-9]+]] !continuation.entry [[META4:![0-9]+]] !continuation [[META5:![0-9]+]] { diff --git a/llvmraytracing/test/dx/dxil-cont-prepare-traversal.ll b/llvmraytracing/test/dx/dxil-cont-prepare-traversal.ll index 917b3c4eb0..7e2aea5fcc 100644 --- a/llvmraytracing/test/dx/dxil-cont-prepare-traversal.ll +++ b/llvmraytracing/test/dx/dxil-cont-prepare-traversal.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 3 -; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,remove-types-metadata' -S %s --lint-abort-on-error | FileCheck --check-prefix=PREPARE %s -; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' -S %s --lint-abort-on-error | FileCheck --check-prefix=ALL %s +; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,continuations-lint,remove-types-metadata' -S %s --lint-abort-on-error | FileCheck --check-prefix=PREPARE %s +; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,dxil-cont-post-process,lint,continuations-lint,remove-types-metadata' -S %s --lint-abort-on-error | FileCheck --check-prefix=ALL %s target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:16-i32:32-i64:32-f16:16-f32:32-f64:32-v8:8-v16:16-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" @@ -16,16 +16,18 @@ declare i32 @"\01?_AmdContPayloadRegistersGetI32@@YA_KXZ"(i32) declare void @"\01?_AmdContPayloadRegistersSetI32@@YA_KXZ"(i32, i32) -declare !types !0 i32 @"\01?_AmdValueI32CountSomething@@YA_KXZ"(%struct.TraversalData*) +declare !pointeetys !0 i32 @"\01?_AmdValueI32CountSomething@@YA_KXZ"(%struct.TraversalData*) -declare !types !2 i32 @"\01?_AmdValueGetI32Something@@YA_KXZ"(%struct.TraversalData*, i32) +declare !pointeetys !2 i32 @"\01?_AmdValueGetI32Something@@YA_KXZ"(%struct.TraversalData*, i32) -declare !types !3 void @"\01?_AmdValueSetI32Something@@YA_KXZ"(%struct.TraversalData*, i32, i32) +declare !pointeetys !3 void @"\01?_AmdValueSetI32Something@@YA_KXZ"(%struct.TraversalData*, i32, i32) -declare !types !8 i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData*) +declare !pointeetys !8 i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData*) + +declare !pointeetys !9 i1 @_cont_ReportHit(%struct.TraversalData* %data, float %t, i32 %hitKind) ; Function Attrs: nounwind -define void @_cont_Traversal(%struct.TraversalData* %data) #0 !types !4 { +define void @_cont_Traversal(%struct.TraversalData* %data) #0 !pointeetys !4 { %1 = getelementptr inbounds %struct.TraversalData, %struct.TraversalData* %data, i32 0, i32 1 %2 = load i32, i32* %1, align 4 %3 = icmp eq i32 %2, 0 @@ -46,34 +48,32 @@ define void @_cont_Traversal(%struct.TraversalData* %data) #0 !types !4 { %a3 = add i32 %a2, %i3 %a4 = add i32 %a3, %i4 %addr = zext i32 %a4 to i64 - call void @_AmdWaitEnqueueCall(i64 %addr, i64 -1, i32 0, %struct.SystemData* %4) #2 - br label %7 + call void @_AmdWaitEnqueueCall(i64 %addr, i64 -1, i64 0, %struct.SystemData* %4) #2 + ret void 6: ; preds = %0 - call void @_AmdWaitEnqueue(i64 0, i64 -1, i32 2, %struct.SystemData* %4) #2 - br label %7 - -7: ; preds = %6, %5 + call void @_AmdWaitEnqueue(i64 0, i64 -1, i64 2, %struct.SystemData* %4) #2 ret void } -declare !types !5 void @_AmdWaitEnqueueCall(i64, i64, i32, %struct.SystemData*) #1 +declare !pointeetys !5 void @_AmdWaitEnqueueCall(i64, i64, i64, %struct.SystemData*) #1 -declare !types !5 void @_AmdWaitEnqueue(i64, i64, i32, %struct.SystemData*) #1 +declare !pointeetys !5 void @_AmdWaitEnqueue(i64, i64, i64, %struct.SystemData*) #1 attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="0" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="0" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #2 = { nounwind } -!0 = !{!"function", i32 poison, !1} +!0 = !{%struct.TraversalData poison} !1 = !{i32 0, %struct.TraversalData poison} -!2 = !{!"function", i32 poison, !1, i32 poison} -!3 = !{!"function", !"void", !1, i32 poison, i32 poison} -!4 = !{!"function", !"void", !1} -!5 = !{!"function", !"void", i64 poison, i64 poison, i32 poison, !6} +!2 = !{%struct.TraversalData poison} +!3 = !{%struct.TraversalData poison} +!4 = !{%struct.TraversalData poison} +!5 = !{%struct.SystemData poison} !6 = !{i32 0, %struct.SystemData poison} !7 = !{i32 0, %struct.DispatchSystemData poison} -!8 = !{!"function", i32 poison, !7} +!8 = !{%struct.DispatchSystemData poison} +!9 = !{%struct.TraversalData poison} ; PREPARE-LABEL: define void @_cont_Traversal( ; PREPARE-SAME: [[STRUCT_TRAVERSALDATA:%.*]] [[DATA:%.*]]) #[[ATTR1:[0-9]+]] !lgc.rt.shaderstage [[META0:![0-9]+]] { ; PREPARE-NEXT: [[TMP1:%.*]] = alloca [[STRUCT_TRAVERSALDATA]], align 8 @@ -98,22 +98,50 @@ attributes #2 = { nounwind } ; PREPARE-NEXT: [[ADDR:%.*]] = zext i32 [[A4]] to i64 ; PREPARE-NEXT: [[TMP7:%.*]] = load [[STRUCT_SYSTEMDATA:%.*]], ptr [[TMP5]], align 4 ; PREPARE-NEXT: [[TMP10:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @_cont_Traversal) -; PREPARE-NEXT: call void (i64, i64, ...) @continuation.waitContinue(i64 [[ADDR]], i64 -1, i32 0, i64 [[TMP10]], [[STRUCT_SYSTEMDATA]] [[TMP7]]) +; PREPARE-NEXT: call void (...) @lgc.ilcps.waitContinue(i64 [[ADDR]], i64 -1, i32 poison, i64 [[TMP10]], [[STRUCT_SYSTEMDATA]] [[TMP7]]) ; PREPARE-NEXT: unreachable ; PREPARE: 9: ; PREPARE-NEXT: [[TMP9:%.*]] = load [[STRUCT_SYSTEMDATA]], ptr [[TMP5]], align 4 -; PREPARE-NEXT: call void (i64, i64, ...) @continuation.waitContinue(i64 0, i64 -1, i32 2, [[STRUCT_SYSTEMDATA]] [[TMP9]]) +; PREPARE-NEXT: call void (...) @lgc.ilcps.waitContinue(i64 0, i64 -1, i32 poison, i64 2, [[STRUCT_SYSTEMDATA]] [[TMP9]]) ; PREPARE-NEXT: unreachable -; PREPARE: 11: -; PREPARE-NEXT: ret void ; ; ; ALL-LABEL: define void @_cont_Traversal( -; ALL-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] !lgc.rt.shaderstage [[META2:![0-9]+]] !continuation [[META3:![0-9]+]] !continuation.registercount [[META0:![0-9]+]] !continuation.state [[META4:![0-9]+]] { +; ALL-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]], [8 x i32] [[PADDING:%.*]], [30 x i32] [[PAYLOAD:%.*]]) #[[ATTR0:[0-9]+]] !lgc.rt.shaderstage [[META2:![0-9]+]] !continuation [[META3:![0-9]+]] !continuation.registercount [[META0:![0-9]+]] !continuation.state [[META4:![0-9]+]] { ; ALL-NEXT: AllocaSpillBB: ; ALL-NEXT: [[TMP1:%.*]] = alloca [[STRUCT_TRAVERSALDATA]], align 8 ; ALL-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; ALL-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; ALL-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 0 +; ALL-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 1 +; ALL-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 2 +; ALL-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 3 +; ALL-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 4 +; ALL-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 5 +; ALL-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 6 +; ALL-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 7 +; ALL-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 8 +; ALL-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 9 +; ALL-NEXT: [[PAYLOAD_FCA_10_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 10 +; ALL-NEXT: [[PAYLOAD_FCA_11_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 11 +; ALL-NEXT: [[PAYLOAD_FCA_12_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 12 +; ALL-NEXT: [[PAYLOAD_FCA_13_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 13 +; ALL-NEXT: [[PAYLOAD_FCA_14_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 14 +; ALL-NEXT: [[PAYLOAD_FCA_15_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 15 +; ALL-NEXT: [[PAYLOAD_FCA_16_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 16 +; ALL-NEXT: [[PAYLOAD_FCA_17_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 17 +; ALL-NEXT: [[PAYLOAD_FCA_18_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 18 +; ALL-NEXT: [[PAYLOAD_FCA_19_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 19 +; ALL-NEXT: [[PAYLOAD_FCA_20_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 20 +; ALL-NEXT: [[PAYLOAD_FCA_21_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 21 +; ALL-NEXT: [[PAYLOAD_FCA_22_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 22 +; ALL-NEXT: [[PAYLOAD_FCA_23_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 23 +; ALL-NEXT: [[PAYLOAD_FCA_24_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 24 +; ALL-NEXT: [[PAYLOAD_FCA_25_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 25 +; ALL-NEXT: [[PAYLOAD_FCA_26_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 26 +; ALL-NEXT: [[PAYLOAD_FCA_27_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 27 +; ALL-NEXT: [[PAYLOAD_FCA_28_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 28 +; ALL-NEXT: [[PAYLOAD_FCA_29_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 29 ; ALL-NEXT: [[DOTFCA_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 0, 0, 0 ; ALL-NEXT: [[DOTFCA_0_1_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 0, 1 ; ALL-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 1 @@ -132,17 +160,15 @@ attributes #2 = { nounwind } ; ALL-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[TMP1]], i32 0, i32 0 ; ALL-NEXT: br i1 [[TMP4]], label [[TMP13:%.*]], label [[TMP6:%.*]] ; ALL: 6: -; ALL-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; ALL-NEXT: store i32 1, ptr addrspace(20) @REGISTERS, align 4 -; ALL-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP1]], i32 0 -; ALL-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -; ALL-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP1]], i32 0 -; ALL-NEXT: store i32 1, ptr [[TMP10]], align 4 +; ALL-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP1]], i32 0 +; ALL-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +; ALL-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[TMP1]], i32 0 +; ALL-NEXT: store i32 1, ptr [[TMP9]], align 4 ; ALL-NEXT: [[A0:%.*]] = zext i1 false to i32 ; ALL-NEXT: [[A1:%.*]] = add i32 [[A0]], 30 -; ALL-NEXT: [[A2:%.*]] = add i32 [[A1]], [[TMP7]] +; ALL-NEXT: [[A2:%.*]] = add i32 [[A1]], [[PAYLOAD_FCA_0_EXTRACT]] ; ALL-NEXT: [[A3:%.*]] = add i32 [[A2]], 3 -; ALL-NEXT: [[A4:%.*]] = add i32 [[A3]], [[TMP9]] +; ALL-NEXT: [[A4:%.*]] = add i32 [[A3]], [[TMP8]] ; ALL-NEXT: [[ADDR:%.*]] = zext i32 [[A4]] to i64 ; ALL-NEXT: [[DOTFCA_0_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA:%.*]], ptr [[TMP5]], i32 0, i32 0, i32 0 ; ALL-NEXT: [[DOTFCA_0_0_LOAD:%.*]] = load i32, ptr [[DOTFCA_0_0_GEP]], align 4 @@ -151,17 +177,83 @@ attributes #2 = { nounwind } ; ALL-NEXT: [[DOTFCA_1_LOAD:%.*]] = load float, ptr [[DOTFCA_1_GEP]], align 4 ; ALL-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [[STRUCT_SYSTEMDATA]] [[DOTFCA_0_0_INSERT]], float [[DOTFCA_1_LOAD]], 1 ; ALL-NEXT: [[TMP12:%.*]] = call i64 @continuation.getAddrAndMD(ptr @_cont_Traversal) +; ALL-NEXT: [[DOTFCA_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, i32 [[DOTFCA_0_0_0_EXTRACT]], 0, 0, 0 +; ALL-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT]], float [[DOTFCA_0_1_EXTRACT]], 0, 1 +; ALL-NEXT: [[DOTFCA_1_INSERT125:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_INSERT]], i32 [[DOTFCA_1_EXTRACT]], 1 +; ALL-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [30 x i32] poison, i32 1, 0 +; ALL-NEXT: [[DOTFCA_1_INSERT1:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; ALL-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT1]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; ALL-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; ALL-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; ALL-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; ALL-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; ALL-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; ALL-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; ALL-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; ALL-NEXT: [[DOTFCA_10_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; ALL-NEXT: [[DOTFCA_11_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; ALL-NEXT: [[DOTFCA_12_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; ALL-NEXT: [[DOTFCA_13_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; ALL-NEXT: [[DOTFCA_14_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; ALL-NEXT: [[DOTFCA_15_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; ALL-NEXT: [[DOTFCA_16_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; ALL-NEXT: [[DOTFCA_17_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; ALL-NEXT: [[DOTFCA_18_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; ALL-NEXT: [[DOTFCA_19_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; ALL-NEXT: [[DOTFCA_20_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; ALL-NEXT: [[DOTFCA_21_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; ALL-NEXT: [[DOTFCA_22_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; ALL-NEXT: [[DOTFCA_23_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; ALL-NEXT: [[DOTFCA_24_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; ALL-NEXT: [[DOTFCA_25_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; ALL-NEXT: [[DOTFCA_26_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; ALL-NEXT: [[DOTFCA_27_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; ALL-NEXT: [[DOTFCA_28_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; ALL-NEXT: [[DOTFCA_29_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 ; ALL-NEXT: [[TMP11:%.*]] = load i32, ptr [[CSP]], align 4 -; ALL-NEXT: call void (i64, i64, ...) @continuation.waitContinue(i64 [[ADDR]], i64 -1, i32 [[TMP11]], i32 0, i64 [[TMP12]], [[STRUCT_SYSTEMDATA]] [[DOTFCA_1_INSERT]]), !continuation.registercount [[META0]] +; ALL-NEXT: call void (...) @lgc.ilcps.waitContinue(i64 [[ADDR]], i64 -1, i32 [[TMP11]], i64 [[TMP12]], [[STRUCT_SYSTEMDATA]] [[DOTFCA_1_INSERT]], [9 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]) ; ALL-NEXT: unreachable -; ALL: 13: +; ALL: 12: ; ALL-NEXT: [[DOTFCA_0_0_GEP1:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[TMP5]], i32 0, i32 0, i32 0 ; ALL-NEXT: [[DOTFCA_0_0_LOAD2:%.*]] = load i32, ptr [[DOTFCA_0_0_GEP1]], align 4 ; ALL-NEXT: [[DOTFCA_0_0_INSERT3:%.*]] = insertvalue [[STRUCT_SYSTEMDATA]] poison, i32 [[DOTFCA_0_0_LOAD2]], 0, 0 ; ALL-NEXT: [[DOTFCA_1_GEP4:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[TMP5]], i32 0, i32 1 ; ALL-NEXT: [[DOTFCA_1_LOAD5:%.*]] = load float, ptr [[DOTFCA_1_GEP4]], align 4 ; ALL-NEXT: [[DOTFCA_1_INSERT6:%.*]] = insertvalue [[STRUCT_SYSTEMDATA]] [[DOTFCA_0_0_INSERT3]], float [[DOTFCA_1_LOAD5]], 1 +; ALL-NEXT: [[DOTFCA_0_0_0_INSERT128:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, i32 [[DOTFCA_0_0_0_EXTRACT]], 0, 0, 0 +; ALL-NEXT: [[DOTFCA_0_1_INSERT131:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT128]], float [[DOTFCA_0_1_EXTRACT]], 0, 1 +; ALL-NEXT: [[DOTFCA_1_INSERT134:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_INSERT131]], i32 [[DOTFCA_1_EXTRACT]], 1 +; ALL-NEXT: [[DOTFCA_0_INSERT3:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; ALL-NEXT: [[DOTFCA_1_INSERT7:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT3]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; ALL-NEXT: [[DOTFCA_2_INSERT9:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT7]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; ALL-NEXT: [[DOTFCA_3_INSERT12:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT9]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; ALL-NEXT: [[DOTFCA_4_INSERT15:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT12]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; ALL-NEXT: [[DOTFCA_5_INSERT18:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT15]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; ALL-NEXT: [[DOTFCA_6_INSERT21:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT18]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; ALL-NEXT: [[DOTFCA_7_INSERT24:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT21]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; ALL-NEXT: [[DOTFCA_8_INSERT27:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT24]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; ALL-NEXT: [[DOTFCA_9_INSERT30:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT27]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; ALL-NEXT: [[DOTFCA_10_INSERT33:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT30]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; ALL-NEXT: [[DOTFCA_11_INSERT36:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT33]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; ALL-NEXT: [[DOTFCA_12_INSERT39:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT36]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; ALL-NEXT: [[DOTFCA_13_INSERT42:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT39]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; ALL-NEXT: [[DOTFCA_14_INSERT45:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT42]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; ALL-NEXT: [[DOTFCA_15_INSERT48:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT45]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; ALL-NEXT: [[DOTFCA_16_INSERT51:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT48]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; ALL-NEXT: [[DOTFCA_17_INSERT54:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT51]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; ALL-NEXT: [[DOTFCA_18_INSERT57:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT54]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; ALL-NEXT: [[DOTFCA_19_INSERT60:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT57]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; ALL-NEXT: [[DOTFCA_20_INSERT63:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT60]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; ALL-NEXT: [[DOTFCA_21_INSERT66:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT63]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; ALL-NEXT: [[DOTFCA_22_INSERT69:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT66]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; ALL-NEXT: [[DOTFCA_23_INSERT72:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT69]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; ALL-NEXT: [[DOTFCA_24_INSERT75:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT72]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; ALL-NEXT: [[DOTFCA_25_INSERT78:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT75]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; ALL-NEXT: [[DOTFCA_26_INSERT81:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT78]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; ALL-NEXT: [[DOTFCA_27_INSERT84:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT81]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; ALL-NEXT: [[DOTFCA_28_INSERT87:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT84]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; ALL-NEXT: [[DOTFCA_29_INSERT90:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT87]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 ; ALL-NEXT: [[TMP14:%.*]] = load i32, ptr [[CSP]], align 4 -; ALL-NEXT: call void (i64, i64, ...) @continuation.waitContinue(i64 0, i64 -1, i32 [[TMP14]], i32 2, [[STRUCT_SYSTEMDATA]] [[DOTFCA_1_INSERT6]]), !continuation.registercount [[META0]] +; ALL-NEXT: call void (...) @lgc.ilcps.waitContinue(i64 0, i64 -1, i32 [[TMP14]], i64 2, [[STRUCT_SYSTEMDATA]] [[DOTFCA_1_INSERT6]], [9 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT90]]) ; ALL-NEXT: unreachable ; diff --git a/llvmraytracing/test/dx/dxil-cps-stack-lowering-global.ll b/llvmraytracing/test/dx/dxil-cps-stack-lowering-global.ll index f3acbd5aff..90442d10df 100644 --- a/llvmraytracing/test/dx/dxil-cps-stack-lowering-global.ll +++ b/llvmraytracing/test/dx/dxil-cps-stack-lowering-global.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 3 -; RUN: opt --verify-each -passes='dxil-cont-post-process,lint,remove-types-metadata' -S %s --lint-abort-on-error | FileCheck -check-prefix=CPS-STACK-LOWERING-CPS %s +; RUN: opt --verify-each -passes='dxil-cont-post-process,lint,continuations-lint,remove-types-metadata' -S %s --lint-abort-on-error | FileCheck -check-prefix=CPS-STACK-LOWERING-CPS %s target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:16-i32:32-i64:32-f16:16-f32:32-f64:32-v8:8-v16:16-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" @@ -12,12 +12,9 @@ target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16: %struct.type = type { <2 x float> } @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A" = external constant %dx.types.Handle, align 4 -@PAYLOAD = external addrspace(20) global [30 x i32] declare i32 @_cont_GetContinuationStackAddr() -declare %struct.DispatchSystemData @_cont_SetupRayGen() - declare %struct.DispatchSystemData @_AmdAwaitTraversal(i64, %struct.TraversalData) declare %struct.DispatchSystemData @_AmdAwaitShader(i64, %struct.DispatchSystemData) @@ -32,46 +29,52 @@ define i32 @_cont_GetLocalRootIndex(ptr %data) { declare i64 @_cont_GetContinuationStackGlobalMemBase() -define void @called(%struct.type %cont.state, i32 %return.addr, i32 %shader.index, %struct.DispatchSystemData %0) !lgc.rt.shaderstage !15 !lgc.cps !16 !continuation !17 { +define void @called(%struct.type %cont.state, i32 %return.addr, i32 %shader.index, %struct.DispatchSystemData %0, {} %padding, [1 x i32] %payload) !lgc.rt.shaderstage !15 !lgc.cps !16 !continuation !17 { AllocaSpillBB: %1 = call ptr addrspace(32) @lgc.cps.alloc(i32 8) + %payload.serialization.alloca = alloca [1 x i32], align 4 %return.addr.spill.addr = getelementptr inbounds %called.Frame, ptr addrspace(32) %1, i32 0, i32 0 store i32 %return.addr, ptr addrspace(32) %return.addr.spill.addr, align 4 + store [1 x i32] %payload, ptr %payload.serialization.alloca, align 4 %2 = call %struct.DispatchSystemData @continuations.getSystemData.s_struct.DispatchSystemDatas() %.fca.0.extract = extractvalue %struct.DispatchSystemData %2, 0 call void @amd.dx.setLocalRootIndex(i32 5) %ptr = getelementptr i8, ptr addrspace(32) %1, i32 9 store i32 99, ptr addrspace(32) %ptr %dis_data.i.fca.0.insert = insertvalue %struct.DispatchSystemData poison, i32 %.fca.0.extract, 0 - store i32 undef, ptr addrspace(20) @PAYLOAD, align 4 + %gep.payload = getelementptr i32, ptr %payload.serialization.alloca, i32 0 + store i32 undef, ptr %gep.payload, align 4 %3 = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @called.resume.0) - call void (...) @lgc.cps.jump(i32 2, i32 2, %struct.type %cont.state, i64 %3, %struct.DispatchSystemData %dis_data.i.fca.0.insert), !continuation.registercount !16 + %payload.reload = load [1 x i32], ptr %payload.serialization.alloca, align 4 + call void (...) @lgc.cps.jump(i32 2, i32 2, %struct.type %cont.state, i64 %3, %struct.DispatchSystemData %dis_data.i.fca.0.insert, {} poison, [1 x i32] %payload.reload), !continuation.registercount !16 unreachable } -define void @called.resume.0({} %cont.state, i32 %returnAddr, %struct.type %0, %struct.DispatchSystemData %1) !lgc.rt.shaderstage !15 !lgc.cps !16 !continuation !17 { +define void @called.resume.0({} %cont.state, i32 %returnAddr, %struct.type %0, { %struct.DispatchSystemData, {}, [1 x i32] } %1) !lgc.rt.shaderstage !15 !lgc.cps !16 !continuation !17 { entryresume.0: %2 = call ptr addrspace(32) @lgc.cps.peek(i32 8) - %3 = load i32, ptr addrspace(20) @PAYLOAD, align 4 + %payload.serialization.alloca = alloca [1 x i32], align 4 + %payload = extractvalue { %struct.DispatchSystemData, {}, [1 x i32] } %1, 2 + store [1 x i32] %payload, ptr %payload.serialization.alloca, align 4 + %payload.gep = getelementptr i32, ptr %payload.serialization.alloca, i32 0 + %3 = load i32, ptr %payload.gep, align 4 %4 = extractvalue %struct.type %0, 0 - %.fca.0.extract3 = extractvalue %struct.DispatchSystemData %1, 0 + %system.data = extractvalue { %struct.DispatchSystemData, {}, [1 x i32]} %1, 0 + %.fca.0.extract3 = extractvalue %struct.DispatchSystemData %system.data, 0 call void @amd.dx.setLocalRootIndex(i32 5) %return.addr.reload.addr = getelementptr inbounds %called.Frame, ptr addrspace(32) %2, i32 0, i32 0 %return.addr.reload = load i32, ptr addrspace(32) %return.addr.reload.addr, align 4 - call void (...) @registerbuffer.setpointerbarrier(ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr)) - store i32 %3, ptr addrspace(20) @PAYLOAD, align 4 + store i32 %3, ptr %payload.gep, align 4 %.fca.0.insert = insertvalue %struct.DispatchSystemData poison, i32 %.fca.0.extract3, 0 call void @lgc.cps.free(i32 8) - call void (...) @lgc.cps.jump(i32 %return.addr.reload, i32 2, %struct.type %0, %struct.DispatchSystemData %.fca.0.insert), !continuation.registercount !16 + %payload.reload = load [1 x i32], ptr %payload.serialization.alloca, align 4 + call void (...) @lgc.cps.jump(i32 %return.addr.reload, i32 2, %struct.type %0, i64 poison, %struct.DispatchSystemData %.fca.0.insert, {} poison, [1 x i32] %payload.reload), !continuation.registercount !16 unreachable } ; Function Attrs: nofree nounwind willreturn declare void @amd.dx.setLocalRootIndex(i32) #0 -; Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) -declare void @registerbuffer.setpointerbarrier(...) #1 - ; Function Attrs: nounwind willreturn declare %struct.DispatchSystemData @continuations.getSystemData.s_struct.DispatchSystemDatas() #2 @@ -114,7 +117,7 @@ declare ptr addrspace(32) @lgc.cps.peek(i32) #6 ; Function Attrs: nounwind willreturn memory(inaccessiblemem: readwrite) declare void @lgc.cps.free(i32) #5 -declare void @continuation.continue(i64, ...) +declare void @lgc.ilcps.continue(...) attributes #0 = { nofree nounwind willreturn } attributes #1 = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } @@ -157,7 +160,7 @@ attributes #6 = { nounwind willreturn memory(inaccessiblemem: read) } ; ; ; CPS-STACK-LOWERING-CPS-LABEL: define void @called( -; CPS-STACK-LOWERING-CPS-SAME: [[STRUCT_TYPE:%.*]] [[CONT_STATE:%.*]], i32 [[CSPINIT:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META15:![0-9]+]] !lgc.cps [[META16:![0-9]+]] !continuation [[META17:![0-9]+]] { +; CPS-STACK-LOWERING-CPS-SAME: [[STRUCT_TYPE:%.*]] [[CONT_STATE:%.*]], i32 [[CSPINIT:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]], {} [[PADDING:%.*]], [1 x i32] [[PAYLOAD:%.*]]) !lgc.rt.shaderstage [[META15:![0-9]+]] !lgc.cps [[META16:![0-9]+]] !continuation [[META17:![0-9]+]] { ; CPS-STACK-LOWERING-CPS-NEXT: AllocaSpillBB: ; CPS-STACK-LOWERING-CPS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; CPS-STACK-LOWERING-CPS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 @@ -166,8 +169,10 @@ attributes #6 = { nounwind willreturn memory(inaccessiblemem: read) } ; CPS-STACK-LOWERING-CPS-NEXT: [[TMP3:%.*]] = load i32, ptr [[CSP]], align 4 ; CPS-STACK-LOWERING-CPS-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 8 ; CPS-STACK-LOWERING-CPS-NEXT: store i32 [[TMP4]], ptr [[CSP]], align 4 +; CPS-STACK-LOWERING-CPS-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [1 x i32], align 4 ; CPS-STACK-LOWERING-CPS-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP3]] ; CPS-STACK-LOWERING-CPS-NEXT: store i32 [[RETURN_ADDR]], ptr addrspace(22) [[TMP5]], align 4 +; CPS-STACK-LOWERING-CPS-NEXT: store [1 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; CPS-STACK-LOWERING-CPS-NEXT: [[TMP6:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] [[CONTINUATIONS_GETSYSTEMDATA_S_STRUCT_DISPATCHSYSTEMDATAS:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]() ; CPS-STACK-LOWERING-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP6]], 0 ; CPS-STACK-LOWERING-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) @@ -175,15 +180,17 @@ attributes #6 = { nounwind willreturn memory(inaccessiblemem: read) } ; CPS-STACK-LOWERING-CPS-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP7]] ; CPS-STACK-LOWERING-CPS-NEXT: store i32 99, ptr addrspace(22) [[TMP8]], align 4 ; CPS-STACK-LOWERING-CPS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 -; CPS-STACK-LOWERING-CPS-NEXT: store i32 undef, ptr addrspace(20) @REGISTERS, align 4 +; CPS-STACK-LOWERING-CPS-NEXT: [[GEP_PAYLOAD:%.*]] = getelementptr i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 0 +; CPS-STACK-LOWERING-CPS-NEXT: store i32 undef, ptr [[GEP_PAYLOAD]], align 4 ; CPS-STACK-LOWERING-CPS-NEXT: [[TMP10:%.*]] = call i64 @continuation.getAddrAndMD(ptr @called.resume.0) +; CPS-STACK-LOWERING-CPS-NEXT: [[PAYLOAD_RELOAD:%.*]] = load [1 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; CPS-STACK-LOWERING-CPS-NEXT: [[TMP9:%.*]] = load i32, ptr [[CSP]], align 4 -; CPS-STACK-LOWERING-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 2, i32 [[TMP9]], i64 [[TMP10]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]]), !continuation.registercount [[META16]] +; CPS-STACK-LOWERING-CPS-NEXT: call void (...) @lgc.ilcps.continue(i64 2, i32 [[TMP9]], i64 [[TMP10]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]], {} poison, [1 x i32] [[PAYLOAD_RELOAD]]) ; CPS-STACK-LOWERING-CPS-NEXT: unreachable ; ; ; CPS-STACK-LOWERING-CPS-LABEL: define void @called.resume.0( -; CPS-STACK-LOWERING-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[CSPINIT:%.*]], i32 [[RETURNADDR:%.*]], [[STRUCT_TYPE:%.*]] [[TMP0:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP1:%.*]]) !lgc.rt.shaderstage [[META15]] !lgc.cps [[META16]] !continuation [[META17]] { +; CPS-STACK-LOWERING-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[CSPINIT:%.*]], i32 [[RETURNADDR:%.*]], [[STRUCT_TYPE:%.*]] [[TMP0:%.*]], { [[STRUCT_DISPATCHSYSTEMDATA:%.*]], {}, [1 x i32] } [[TMP1:%.*]]) !lgc.rt.shaderstage [[META15]] !lgc.cps [[META16]] !continuation [[META17]] { ; CPS-STACK-LOWERING-CPS-NEXT: entryresume.0: ; CPS-STACK-LOWERING-CPS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; CPS-STACK-LOWERING-CPS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 @@ -191,19 +198,25 @@ attributes #6 = { nounwind willreturn memory(inaccessiblemem: read) } ; CPS-STACK-LOWERING-CPS-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr addrspace(22) ; CPS-STACK-LOWERING-CPS-NEXT: [[TMP4:%.*]] = load i32, ptr [[CSP]], align 4 ; CPS-STACK-LOWERING-CPS-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], -8 -; CPS-STACK-LOWERING-CPS-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; CPS-STACK-LOWERING-CPS-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [1 x i32], align 4 +; CPS-STACK-LOWERING-CPS-NEXT: [[PAYLOAD:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], {}, [1 x i32] } [[TMP1]], 2 +; CPS-STACK-LOWERING-CPS-NEXT: store [1 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; CPS-STACK-LOWERING-CPS-NEXT: [[PAYLOAD_GEP:%.*]] = getelementptr i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 0 +; CPS-STACK-LOWERING-CPS-NEXT: [[TMP6:%.*]] = load i32, ptr [[PAYLOAD_GEP]], align 4 ; CPS-STACK-LOWERING-CPS-NEXT: [[TMP7:%.*]] = extractvalue [[STRUCT_TYPE]] [[TMP0]], 0 -; CPS-STACK-LOWERING-CPS-NEXT: [[DOTFCA_0_EXTRACT3:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP1]], 0 +; CPS-STACK-LOWERING-CPS-NEXT: [[SYSTEM_DATA:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], {}, [1 x i32] } [[TMP1]], 0 +; CPS-STACK-LOWERING-CPS-NEXT: [[DOTFCA_0_EXTRACT3:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[SYSTEM_DATA]], 0 ; CPS-STACK-LOWERING-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; CPS-STACK-LOWERING-CPS-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP5]] ; CPS-STACK-LOWERING-CPS-NEXT: [[RETURN_ADDR_RELOAD:%.*]] = load i32, ptr addrspace(22) [[TMP8]], align 4 -; CPS-STACK-LOWERING-CPS-NEXT: store i32 [[TMP6]], ptr addrspace(20) @REGISTERS, align 4 +; CPS-STACK-LOWERING-CPS-NEXT: store i32 [[TMP6]], ptr [[PAYLOAD_GEP]], align 4 ; CPS-STACK-LOWERING-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT3]], 0 ; CPS-STACK-LOWERING-CPS-NEXT: [[TMP9:%.*]] = load i32, ptr [[CSP]], align 4 ; CPS-STACK-LOWERING-CPS-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], -8 ; CPS-STACK-LOWERING-CPS-NEXT: store i32 [[TMP10]], ptr [[CSP]], align 4 +; CPS-STACK-LOWERING-CPS-NEXT: [[PAYLOAD_RELOAD:%.*]] = load [1 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; CPS-STACK-LOWERING-CPS-NEXT: [[TMP11:%.*]] = zext i32 [[RETURN_ADDR_RELOAD]] to i64 ; CPS-STACK-LOWERING-CPS-NEXT: [[TMP12:%.*]] = load i32, ptr [[CSP]], align 4 -; CPS-STACK-LOWERING-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP11]], i32 [[TMP12]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META16]] +; CPS-STACK-LOWERING-CPS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[TMP11]], i32 [[TMP12]], i64 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]], {} poison, [1 x i32] [[PAYLOAD_RELOAD]]) ; CPS-STACK-LOWERING-CPS-NEXT: unreachable ; diff --git a/llvmraytracing/test/dx/dxil-cps-stack-lowering-scratch.ll b/llvmraytracing/test/dx/dxil-cps-stack-lowering-scratch.ll index 4d16405118..ecafd30575 100644 --- a/llvmraytracing/test/dx/dxil-cps-stack-lowering-scratch.ll +++ b/llvmraytracing/test/dx/dxil-cps-stack-lowering-scratch.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 3 -; RUN: opt --verify-each -passes='dxil-cont-post-process,lint,remove-types-metadata' -S %s --lint-abort-on-error | FileCheck -check-prefix=CPS-STACK-LOWERING-CPS %s +; RUN: opt --verify-each -passes='dxil-cont-post-process,lint,continuations-lint,remove-types-metadata' -S %s --lint-abort-on-error | FileCheck -check-prefix=CPS-STACK-LOWERING-CPS %s target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:16-i32:32-i64:32-f16:16-f32:32-f64:32-v8:8-v16:16-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" @@ -12,12 +12,9 @@ target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16: %struct.type = type { <2 x float> } @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A" = external constant %dx.types.Handle, align 4 -@PAYLOAD = external addrspace(20) global [30 x i32] declare i32 @_cont_GetContinuationStackAddr() -declare %struct.DispatchSystemData @_cont_SetupRayGen() - declare %struct.DispatchSystemData @_AmdAwaitTraversal(i64, %struct.TraversalData) declare %struct.DispatchSystemData @_AmdAwaitShader(i64, %struct.DispatchSystemData) @@ -32,46 +29,52 @@ define i32 @_cont_GetLocalRootIndex(ptr %data) { declare i64 @_cont_GetContinuationStackGlobalMemBase() -define void @called(%struct.type %cont.state, i32 %return.addr, i32 %shader.index, %struct.DispatchSystemData %0) !lgc.rt.shaderstage !15 !lgc.cps !16 !continuation !17 { +define void @called(%struct.type %cont.state, i32 %return.addr, i32 %shader.index, %struct.DispatchSystemData %0, {} %padding, [1 x i32] %payload) !lgc.rt.shaderstage !15 !lgc.cps !16 !continuation !17 { AllocaSpillBB: %1 = call ptr addrspace(32) @lgc.cps.alloc(i32 8) + %payload.serialization.alloca = alloca [1 x i32], align 4 %return.addr.spill.addr = getelementptr inbounds %called.Frame, ptr addrspace(32) %1, i32 0, i32 0 store i32 %return.addr, ptr addrspace(32) %return.addr.spill.addr, align 4 + store [1 x i32] %payload, ptr %payload.serialization.alloca, align 4 %2 = call %struct.DispatchSystemData @continuations.getSystemData.s_struct.DispatchSystemDatas() %.fca.0.extract = extractvalue %struct.DispatchSystemData %2, 0 call void @amd.dx.setLocalRootIndex(i32 5) %ptr = getelementptr i8, ptr addrspace(32) %1, i32 9 store i32 99, ptr addrspace(32) %ptr %dis_data.i.fca.0.insert = insertvalue %struct.DispatchSystemData poison, i32 %.fca.0.extract, 0 - store i32 undef, ptr addrspace(20) @PAYLOAD, align 4 + %gep.payload = getelementptr i32, ptr %payload.serialization.alloca, i32 0 + store i32 undef, ptr %gep.payload, align 4 %3 = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @called.resume.0) - call void (...) @lgc.cps.jump(i32 2, i32 2, %struct.type %cont.state, i64 %3, %struct.DispatchSystemData %dis_data.i.fca.0.insert), !continuation.registercount !16 + %payload.reload = load [1 x i32], ptr %payload.serialization.alloca, align 4 + call void (...) @lgc.cps.jump(i32 2, i32 2, %struct.type %cont.state, i64 %3, %struct.DispatchSystemData %dis_data.i.fca.0.insert, {} poison, [1 x i32] %payload.reload), !continuation.registercount !16 unreachable } -define void @called.resume.0({} %cont.state, i32 %returnAddr, %struct.type %0, %struct.DispatchSystemData %1) !lgc.rt.shaderstage !15 !lgc.cps !16 !continuation !17 { +define void @called.resume.0({} %cont.state, i32 %returnAddr, %struct.type %0, { %struct.DispatchSystemData, {}, [1 x i32] } %1) !lgc.rt.shaderstage !15 !lgc.cps !16 !continuation !17 { entryresume.0: %2 = call ptr addrspace(32) @lgc.cps.peek(i32 8) - %3 = load i32, ptr addrspace(20) @PAYLOAD, align 4 + %payload.serialization.alloca = alloca [1 x i32], align 4 + %payload = extractvalue { %struct.DispatchSystemData, {}, [1 x i32] } %1, 2 + store [1 x i32] %payload, ptr %payload.serialization.alloca, align 4 + %payload.gep = getelementptr i32, ptr %payload.serialization.alloca, i32 0 + %3 = load i32, ptr %payload.gep, align 4 %4 = extractvalue %struct.type %0, 0 - %.fca.0.extract3 = extractvalue %struct.DispatchSystemData %1, 0 + %system.data = extractvalue { %struct.DispatchSystemData, {}, [1 x i32]} %1, 0 + %.fca.0.extract3 = extractvalue %struct.DispatchSystemData %system.data, 0 call void @amd.dx.setLocalRootIndex(i32 5) %return.addr.reload.addr = getelementptr inbounds %called.Frame, ptr addrspace(32) %2, i32 0, i32 0 %return.addr.reload = load i32, ptr addrspace(32) %return.addr.reload.addr, align 4 - call void (...) @registerbuffer.setpointerbarrier(ptr addrspacecast (ptr addrspace(20) @PAYLOAD to ptr)) - store i32 %3, ptr addrspace(20) @PAYLOAD, align 4 + store i32 %3, ptr %payload.gep, align 4 %.fca.0.insert = insertvalue %struct.DispatchSystemData poison, i32 %.fca.0.extract3, 0 call void @lgc.cps.free(i32 8) - call void (...) @lgc.cps.jump(i32 %return.addr.reload, i32 2, %struct.type %0, %struct.DispatchSystemData %.fca.0.insert), !continuation.registercount !16 + %payload.reload = load [1 x i32], ptr %payload.serialization.alloca, align 4 + call void (...) @lgc.cps.jump(i32 %return.addr.reload, i32 2, %struct.type %0, i64 poison, %struct.DispatchSystemData %.fca.0.insert, {} poison, [1 x i32] %payload.reload), !continuation.registercount !16 unreachable } ; Function Attrs: nofree nounwind willreturn declare void @amd.dx.setLocalRootIndex(i32) #0 -; Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) -declare void @registerbuffer.setpointerbarrier(...) #1 - ; Function Attrs: nounwind willreturn declare %struct.DispatchSystemData @continuations.getSystemData.s_struct.DispatchSystemDatas() #2 @@ -155,16 +158,18 @@ attributes #6 = { nounwind willreturn memory(inaccessiblemem: read) } ; ; ; CPS-STACK-LOWERING-CPS-LABEL: define void @called( -; CPS-STACK-LOWERING-CPS-SAME: [[STRUCT_TYPE:%.*]] [[CONT_STATE:%.*]], i32 [[CSPINIT:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META15:![0-9]+]] !lgc.cps [[META16:![0-9]+]] !continuation [[META17:![0-9]+]] { +; CPS-STACK-LOWERING-CPS-SAME: [[STRUCT_TYPE:%.*]] [[CONT_STATE:%.*]], i32 [[CSPINIT:%.*]], i32 [[RETURN_ADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]], {} [[PADDING:%.*]], [1 x i32] [[PAYLOAD:%.*]]) !lgc.rt.shaderstage [[META15:![0-9]+]] !lgc.cps [[META16:![0-9]+]] !continuation [[META17:![0-9]+]] { ; CPS-STACK-LOWERING-CPS-NEXT: AllocaSpillBB: ; CPS-STACK-LOWERING-CPS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; CPS-STACK-LOWERING-CPS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; CPS-STACK-LOWERING-CPS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 ; CPS-STACK-LOWERING-CPS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 8 ; CPS-STACK-LOWERING-CPS-NEXT: store i32 [[TMP2]], ptr [[CSP]], align 4 +; CPS-STACK-LOWERING-CPS-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [1 x i32], align 4 ; CPS-STACK-LOWERING-CPS-NEXT: [[TMP3:%.*]] = inttoptr i32 [[TMP1]] to ptr addrspace(21) ; CPS-STACK-LOWERING-CPS-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP3]], i32 0 ; CPS-STACK-LOWERING-CPS-NEXT: store i32 [[RETURN_ADDR]], ptr addrspace(21) [[TMP4]], align 4 +; CPS-STACK-LOWERING-CPS-NEXT: store [1 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; CPS-STACK-LOWERING-CPS-NEXT: [[TMP5:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] [[CONTINUATIONS_GETSYSTEMDATA_S_STRUCT_DISPATCHSYSTEMDATAS:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]() ; CPS-STACK-LOWERING-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP5]], 0 ; CPS-STACK-LOWERING-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) @@ -173,34 +178,42 @@ attributes #6 = { nounwind willreturn memory(inaccessiblemem: read) } ; CPS-STACK-LOWERING-CPS-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP7]], i32 0 ; CPS-STACK-LOWERING-CPS-NEXT: store i32 99, ptr addrspace(21) [[TMP8]], align 4 ; CPS-STACK-LOWERING-CPS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 -; CPS-STACK-LOWERING-CPS-NEXT: store i32 undef, ptr addrspace(20) @REGISTERS, align 4 +; CPS-STACK-LOWERING-CPS-NEXT: [[GEP_PAYLOAD:%.*]] = getelementptr i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 0 +; CPS-STACK-LOWERING-CPS-NEXT: store i32 undef, ptr [[GEP_PAYLOAD]], align 4 ; CPS-STACK-LOWERING-CPS-NEXT: [[TMP10:%.*]] = call i64 @continuation.getAddrAndMD(ptr @called.resume.0) +; CPS-STACK-LOWERING-CPS-NEXT: [[PAYLOAD_RELOAD:%.*]] = load [1 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; CPS-STACK-LOWERING-CPS-NEXT: [[TMP9:%.*]] = load i32, ptr [[CSP]], align 4 -; CPS-STACK-LOWERING-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 2, i32 [[TMP9]], i64 [[TMP10]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]]), !continuation.registercount [[META16]] +; CPS-STACK-LOWERING-CPS-NEXT: call void (...) @lgc.ilcps.continue(i64 2, i32 [[TMP9]], i64 [[TMP10]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]], {} poison, [1 x i32] [[PAYLOAD_RELOAD]]) ; CPS-STACK-LOWERING-CPS-NEXT: unreachable ; ; ; CPS-STACK-LOWERING-CPS-LABEL: define void @called.resume.0( -; CPS-STACK-LOWERING-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[CSPINIT:%.*]], i32 [[RETURNADDR:%.*]], [[STRUCT_TYPE:%.*]] [[TMP0:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP1:%.*]]) !lgc.rt.shaderstage [[META15]] !lgc.cps [[META16]] !continuation [[META17]] { +; CPS-STACK-LOWERING-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[CSPINIT:%.*]], i32 [[RETURNADDR:%.*]], [[STRUCT_TYPE:%.*]] [[TMP0:%.*]], { [[STRUCT_DISPATCHSYSTEMDATA:%.*]], {}, [1 x i32] } [[TMP1:%.*]]) !lgc.rt.shaderstage [[META15]] !lgc.cps [[META16]] !continuation [[META17]] { ; CPS-STACK-LOWERING-CPS-NEXT: entryresume.0: ; CPS-STACK-LOWERING-CPS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; CPS-STACK-LOWERING-CPS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; CPS-STACK-LOWERING-CPS-NEXT: [[TMP2:%.*]] = load i32, ptr [[CSP]], align 4 ; CPS-STACK-LOWERING-CPS-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], -8 -; CPS-STACK-LOWERING-CPS-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; CPS-STACK-LOWERING-CPS-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [1 x i32], align 4 +; CPS-STACK-LOWERING-CPS-NEXT: [[PAYLOAD:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], {}, [1 x i32] } [[TMP1]], 2 +; CPS-STACK-LOWERING-CPS-NEXT: store [1 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; CPS-STACK-LOWERING-CPS-NEXT: [[PAYLOAD_GEP:%.*]] = getelementptr i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 0 +; CPS-STACK-LOWERING-CPS-NEXT: [[TMP4:%.*]] = load i32, ptr [[PAYLOAD_GEP]], align 4 ; CPS-STACK-LOWERING-CPS-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_TYPE]] [[TMP0]], 0 -; CPS-STACK-LOWERING-CPS-NEXT: [[DOTFCA_0_EXTRACT3:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP1]], 0 +; CPS-STACK-LOWERING-CPS-NEXT: [[SYSTEM_DATA:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], {}, [1 x i32] } [[TMP1]], 0 +; CPS-STACK-LOWERING-CPS-NEXT: [[DOTFCA_0_EXTRACT3:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[SYSTEM_DATA]], 0 ; CPS-STACK-LOWERING-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; CPS-STACK-LOWERING-CPS-NEXT: [[TMP6:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(21) ; CPS-STACK-LOWERING-CPS-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP6]], i32 0 ; CPS-STACK-LOWERING-CPS-NEXT: [[RETURN_ADDR_RELOAD:%.*]] = load i32, ptr addrspace(21) [[TMP7]], align 4 -; CPS-STACK-LOWERING-CPS-NEXT: store i32 [[TMP4]], ptr addrspace(20) @REGISTERS, align 4 +; CPS-STACK-LOWERING-CPS-NEXT: store i32 [[TMP4]], ptr [[PAYLOAD_GEP]], align 4 ; CPS-STACK-LOWERING-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT3]], 0 ; CPS-STACK-LOWERING-CPS-NEXT: [[TMP8:%.*]] = load i32, ptr [[CSP]], align 4 ; CPS-STACK-LOWERING-CPS-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], -8 ; CPS-STACK-LOWERING-CPS-NEXT: store i32 [[TMP9]], ptr [[CSP]], align 4 +; CPS-STACK-LOWERING-CPS-NEXT: [[PAYLOAD_RELOAD:%.*]] = load [1 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; CPS-STACK-LOWERING-CPS-NEXT: [[TMP10:%.*]] = zext i32 [[RETURN_ADDR_RELOAD]] to i64 ; CPS-STACK-LOWERING-CPS-NEXT: [[TMP11:%.*]] = load i32, ptr [[CSP]], align 4 -; CPS-STACK-LOWERING-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP10]], i32 [[TMP11]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META16]] +; CPS-STACK-LOWERING-CPS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[TMP10]], i32 [[TMP11]], i64 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]], {} poison, [1 x i32] [[PAYLOAD_RELOAD]]) ; CPS-STACK-LOWERING-CPS-NEXT: unreachable ; diff --git a/llvmraytracing/test/dx/global-mem-stack.ll b/llvmraytracing/test/dx/global-mem-stack.ll index 95a7dfc352..c7e726543b 100644 --- a/llvmraytracing/test/dx/global-mem-stack.ll +++ b/llvmraytracing/test/dx/global-mem-stack.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 3 -; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' -S %s --lint-abort-on-error | FileCheck %s +; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,dxil-cont-post-process,lint,continuations-lint,remove-types-metadata' -S %s --lint-abort-on-error | FileCheck %s target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:16-i32:32-i64:32-f16:16-f32:32-f64:32-v8:8-v16:16-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" @@ -14,60 +14,60 @@ declare i32 @_cont_GetContinuationStackAddr() declare i64 @_cont_GetContinuationStackGlobalMemBase() -declare %struct.DispatchSystemData @_cont_SetupRayGen() - declare %struct.DispatchSystemData @_AmdAwaitTraversal(i64, %struct.TraversalData) declare %struct.DispatchSystemData @_AmdAwaitShader(i64, %struct.DispatchSystemData) declare %struct.TraversalData @_AmdAwaitAnyHit(i64, %struct.TraversalData, float, i32) -declare !types !9 i32 @_cont_HitKind(%struct.SystemData*) +declare !pointeetys !9 i32 @_cont_HitKind(%struct.SystemData*) declare i64 @_AmdGetResumePointAddr() -declare !types !11 %struct.HitData @_cont_GetCommittedState(%struct.SystemData*) +declare !pointeetys !11 %struct.HitData @_cont_GetCommittedState(%struct.SystemData*) + +declare !pointeetys !12 void @_AmdRestoreSystemData(%struct.DispatchSystemData*) -declare !types !12 void @_AmdRestoreSystemData(%struct.DispatchSystemData*) +declare !pointeetys !14 void @_AmdRestoreSystemDataAnyHit(%struct.TraversalData*) -declare !types !14 void @_AmdRestoreSystemDataAnyHit(%struct.TraversalData*) +declare !pointeetys !14 void @_cont_AcceptHit(%struct.TraversalData* nocapture readnone) -declare !types !14 void @_cont_AcceptHit(%struct.TraversalData* nocapture readnone) +declare !pointeetys !14 void @_AmdAcceptHitAttributes(%struct.TraversalData*) -declare !types !14 void @_AmdAcceptHitAttributes(%struct.TraversalData*) +declare !pointeetys !26 i1 @_cont_ReportHit(%struct.TraversalData* %data, float %t, i32 %hitKind) declare i1 @opaqueIsEnd() -define i1 @_cont_IsEndSearch(%struct.TraversalData* %data) !types !16 { +define i1 @_cont_IsEndSearch(%struct.TraversalData* %data) !pointeetys !16 { %isEnd = call i1 @opaqueIsEnd() ret i1 %isEnd } -define %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData* %data) !types !17 { +define %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData* %data) !pointeetys !17 { %addr = getelementptr %struct.SystemData, %struct.SystemData* %data, i32 0, i32 1 %val = load %struct.BuiltInTriangleIntersectionAttributes, %struct.BuiltInTriangleIntersectionAttributes* %addr, align 4 ret %struct.BuiltInTriangleIntersectionAttributes %val } -define void @_cont_SetTriangleHitAttributes(%struct.SystemData* %data, %struct.BuiltInTriangleIntersectionAttributes %val) !types !18 { +define void @_cont_SetTriangleHitAttributes(%struct.SystemData* %data, %struct.BuiltInTriangleIntersectionAttributes %val) !pointeetys !18 { %addr = getelementptr %struct.SystemData, %struct.SystemData* %data, i32 0, i32 1 store %struct.BuiltInTriangleIntersectionAttributes %val, %struct.BuiltInTriangleIntersectionAttributes* %addr, align 4 ret void } -define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) !types !19 { +define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) !pointeetys !19 { ret i32 5 } -declare !types !20 i32 @_cont_DispatchRaysIndex(%struct.DispatchSystemData* nocapture readnone, i32) +declare !pointeetys !20 i32 @_cont_DispatchRaysIndex(%struct.DispatchSystemData* nocapture readnone, i32) -declare !types !21 float @_cont_ObjectRayOrigin(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*, i32) +declare !pointeetys !21 float @_cont_ObjectRayOrigin(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*, i32) -declare !types !21 float @_cont_ObjectRayDirection(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*, i32) +declare !pointeetys !21 float @_cont_ObjectRayDirection(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*, i32) -declare !types !12 void @_cont_AcceptHitAndEndSearch(%struct.DispatchSystemData* nocapture readnone) +declare !pointeetys !12 void @_cont_AcceptHitAndEndSearch(%struct.DispatchSystemData* nocapture readnone) -define void @MyClosestHitShader(%struct.RayPayload* noalias nocapture %payload, %struct.BuiltInTriangleIntersectionAttributes* nocapture readonly %attr) !types !23 { +define void @MyClosestHitShader(%struct.RayPayload* noalias nocapture %payload, %struct.BuiltInTriangleIntersectionAttributes* nocapture readonly %attr) !pointeetys !23 { %1 = getelementptr inbounds %struct.BuiltInTriangleIntersectionAttributes, %struct.BuiltInTriangleIntersectionAttributes* %attr, i32 0, i32 0 %2 = load <2 x float>, <2 x float>* %1, align 4 %3 = extractelement <2 x float> %2, i32 0 @@ -98,23 +98,24 @@ define void @MyClosestHitShader(%struct.RayPayload* noalias nocapture %payload, !6 = !{i32 0} !7 = !{i32 22} !8 = !{i32 2} -!9 = !{!"function", i32 poison, !10} +!9 = !{%struct.SystemData poison} !10 = !{i32 0, %struct.SystemData poison} -!11 = !{!"function", %struct.HitData poison, !10} -!12 = !{!"function", !"void", !13} +!11 = !{%struct.SystemData poison} +!12 = !{%struct.DispatchSystemData poison} !13 = !{i32 0, %struct.DispatchSystemData poison} -!14 = !{!"function", !"void", !15} +!14 = !{%struct.TraversalData poison} !15 = !{i32 0, %struct.TraversalData poison} -!16 = !{!"function", i1 poison, !15} -!17 = !{!"function", %struct.BuiltInTriangleIntersectionAttributes poison, !10} -!18 = !{!"function", !"void", !10, %struct.BuiltInTriangleIntersectionAttributes poison} -!19 = !{!"function", i32 poison, !13} -!20 = !{!"function", i32 poison, !13, i32 poison} -!21 = !{!"function", float poison, !13, !22, i32 poison} +!16 = !{%struct.TraversalData poison} +!17 = !{%struct.SystemData poison} +!18 = !{%struct.SystemData poison} +!19 = !{%struct.DispatchSystemData poison} +!20 = !{%struct.DispatchSystemData poison} +!21 = !{null, %struct.DispatchSystemData poison, %struct.HitData poison} !22 = !{i32 0, %struct.HitData poison} -!23 = !{!"function", !"void", !24, !25} +!23 = !{null, %struct.RayPayload poison, %struct.BuiltInTriangleIntersectionAttributes poison} !24 = !{i32 0, %struct.RayPayload poison} !25 = !{i32 0, %struct.BuiltInTriangleIntersectionAttributes poison} +!26 = !{%struct.TraversalData poison} ; CHECK-LABEL: define i1 @_cont_IsEndSearch( ; CHECK-SAME: ptr [[DATA:%.*]]) { ; CHECK-NEXT: [[ISEND:%.*]] = call i1 @opaqueIsEnd() @@ -141,45 +142,46 @@ define void @MyClosestHitShader(%struct.RayPayload* noalias nocapture %payload, ; ; ; CHECK-LABEL: define void @MyClosestHitShader( -; CHECK-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META9:![0-9]+]] !continuation.registercount [[META8:![0-9]+]] !continuation [[META10:![0-9]+]] !continuation.state [[META6:![0-9]+]] { +; CHECK-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]], [19 x i32] [[PADDING:%.*]], [2 x i32] [[PAYLOAD:%.*]]) !lgc.rt.shaderstage [[META9:![0-9]+]] !continuation [[META10:![0-9]+]] { ; CHECK-NEXT: AllocaSpillBB: ; CHECK-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; CHECK-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() ; CHECK-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(22) +; CHECK-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i32] [[PAYLOAD]], 0 +; CHECK-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i32] [[PAYLOAD]], 1 ; CHECK-NEXT: [[DOTFCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 0, 0 ; CHECK-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 1, 0 -; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 20 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(22) [[TMP5]], align 4 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[TMP6]] to float -; CHECK-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> undef, float [[TMP7]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP4]], 4 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(22) [[TMP9]], align 4 -; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP10]] to float -; CHECK-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP11]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP4]], 8 -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(22) [[TMP13]], align 4 -; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[TMP14]] to float -; CHECK-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP15]], i32 2 -; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[TMP4]], 12 -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP16]] -; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(22) [[TMP17]], align 4 -; CHECK-NEXT: [[TMP19:%.*]] = bitcast i32 [[TMP18]] to float -; CHECK-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP19]], i32 3 +; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 20 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(22) [[TMP4]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP5]] to float +; CHECK-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> undef, float [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[TMP3]], 4 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(22) [[TMP8]], align 4 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP9]] to float +; CHECK-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP10]], i32 1 +; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[TMP3]], 8 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(22) [[TMP12]], align 4 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32 [[TMP13]] to float +; CHECK-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP14]], i32 2 +; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP3]], 12 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(22) [[TMP16]], align 4 +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i32 [[TMP17]] to float +; CHECK-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP18]], i32 3 ; CHECK-NEXT: [[VAL_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> [[DOTFCA_1_0_EXTRACT]], 0 ; CHECK-NEXT: [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[VAL_I_FCA_0_INSERT]], 0 -; CHECK-NEXT: [[DOTSROA_06_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 0 -; CHECK-NEXT: [[TMP20:%.*]] = bitcast float [[DOTSROA_06_0_VEC_EXTRACT]] to i32 -; CHECK-NEXT: [[TMP21:%.*]] = bitcast i32 [[TMP20]] to float -; CHECK-NEXT: [[HITATTRS_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP21]], i32 0 -; CHECK-NEXT: [[DOTSROA_06_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 1 -; CHECK-NEXT: [[TMP22:%.*]] = bitcast float [[DOTSROA_06_4_VEC_EXTRACT]] to i32 -; CHECK-NEXT: [[TMP23:%.*]] = bitcast i32 [[TMP22]] to float -; CHECK-NEXT: [[HITATTRS_SROA_0_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[HITATTRS_SROA_0_0_VEC_INSERT]], float [[TMP23]], i32 1 +; CHECK-NEXT: [[DOTSROA_08_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 0 +; CHECK-NEXT: [[TMP19:%.*]] = bitcast float [[DOTSROA_08_0_VEC_EXTRACT]] to i32 +; CHECK-NEXT: [[TMP20:%.*]] = bitcast i32 [[TMP19]] to float +; CHECK-NEXT: [[HITATTRS_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP20]], i32 0 +; CHECK-NEXT: [[DOTSROA_08_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 1 +; CHECK-NEXT: [[TMP21:%.*]] = bitcast float [[DOTSROA_08_4_VEC_EXTRACT]] to i32 +; CHECK-NEXT: [[TMP22:%.*]] = bitcast i32 [[TMP21]] to float +; CHECK-NEXT: [[HITATTRS_SROA_0_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[HITATTRS_SROA_0_0_VEC_INSERT]], float [[TMP22]], i32 1 ; CHECK-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; CHECK-NEXT: [[TMP24:%.*]] = extractelement <2 x float> [[HITATTRS_SROA_0_4_VEC_INSERT]], i32 0 ; CHECK-NEXT: [[TMP25:%.*]] = fsub fast float 1.000000e+00, [[TMP24]] @@ -189,8 +191,7 @@ define void @MyClosestHitShader(%struct.RayPayload* noalias nocapture %payload, ; CHECK-NEXT: [[TMP29:%.*]] = insertelement <4 x float> [[TMP28]], float [[TMP24]], i64 1 ; CHECK-NEXT: [[TMP30:%.*]] = insertelement <4 x float> [[TMP29]], float [[TMP26]], i64 2 ; CHECK-NEXT: [[TMP31:%.*]] = insertelement <4 x float> [[TMP30]], float 1.000000e+00, i64 3 -; CHECK-NEXT: [[TMP32:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; CHECK-NEXT: [[TMP33:%.*]] = add i32 [[TMP32]], 20 +; CHECK-NEXT: [[TMP33:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 20 ; CHECK-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP31]], i32 0 ; CHECK-NEXT: [[TMP34:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 ; CHECK-NEXT: [[TMP35:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP33]] @@ -211,7 +212,9 @@ define void @MyClosestHitShader(%struct.RayPayload* noalias nocapture %payload, ; CHECK-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP42]] ; CHECK-NEXT: store i32 [[TMP43]], ptr addrspace(22) [[TMP44]], align 4 ; CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison, <3 x i32> [[DOTFCA_0_0_EXTRACT]], 0 +; CHECK-NEXT: [[DOTFCA_0_INSERT1:%.*]] = insertvalue [2 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[DOTFCA_0_INSERT1]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 ; CHECK-NEXT: [[TMP45:%.*]] = load i32, ptr [[CSP]], align 4 -; CHECK-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP45]], i64 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META8]] +; CHECK-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR]], i32 [[TMP45]], i64 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]], [21 x i32] poison, [2 x i32] [[DOTFCA_1_INSERT]]) ; CHECK-NEXT: unreachable ; diff --git a/llvmraytracing/test/dx/inline-const-jump-target.ll b/llvmraytracing/test/dx/inline-const-jump-target.ll new file mode 100644 index 0000000000..90184e8701 --- /dev/null +++ b/llvmraytracing/test/dx/inline-const-jump-target.ll @@ -0,0 +1,154 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt --verify-each -passes="dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,remove-types-metadata" -S %s --lint-abort-on-error | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE-CPS %s +; RUN: opt --verify-each -passes="dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,lgc-cps-jump-inliner,lint,remove-types-metadata" -S %s --lint-abort-on-error | FileCheck -check-prefix=JUMP-INLINER-CPS %s + +target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:16-i32:32-i64:32-f16:16-f32:32-f64:32-v8:8-v16:16-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" + +%dx.types.Handle = type { i8* } +%struct.DispatchSystemData = type { i32 } +%struct.BuiltInTriangleIntersectionAttributes = type { <2 x float> } +%struct.SystemData = type { %struct.DispatchSystemData } +%struct.TraversalData = type { %struct.SystemData, %struct.HitData, <3 x float>, <3 x float>, float } +%struct.HitData = type { float, i32 } +%struct.AnyHitTraversalData = type { %struct.TraversalData, %struct.HitData } +%struct.TheirParams = type { i32 } +%struct.Payload = type {} +%"class.RWTexture2D >" = type { <4 x float> } + +@debug_global = external global i32 + +@"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A" = external constant %dx.types.Handle, align 4 + +declare i32 @lgc.rt.shader.index() + +declare i32 @_cont_GetContinuationStackAddr() + +define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) !pointeetys !13 { +; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define i32 @_cont_GetLocalRootIndex( +; LOWERRAYTRACINGPIPELINE-CPS-SAME: ptr [[DATA:%.*]]) { +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: ret i32 5 +; +; JUMP-INLINER-CPS-LABEL: define i32 @_cont_GetLocalRootIndex( +; JUMP-INLINER-CPS-SAME: ptr [[DATA:%.*]]) { +; JUMP-INLINER-CPS-NEXT: ret i32 5 +; + ret i32 5 +} + +; Need _cont_ReportHit to get system data type +declare !pointeetys !21 i1 @_cont_ReportHit(%struct.AnyHitTraversalData* %data, float %t, i32 %hitKind) + +declare void @lgc.cps.jump(...) #1 +declare i32 @get.ret.addr() + +declare !pointeetys !15 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData*) + +declare !pointeetys !13 void @_AmdRestoreSystemData(%struct.DispatchSystemData*) +declare i32 @_AmdGetFuncAddrCallable() + +define void @_cont_ExitRayGen(ptr nocapture readonly %data) alwaysinline nounwind !pointeetys !13 { + ret void +} + +define internal void @Callable(%struct.Payload* %payload) !pointeetys !23 !lgc.rt.shaderstage !25 { +; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define internal void @Callable( +; LOWERRAYTRACINGPIPELINE-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURNADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [0 x i32] [[PADDING:%.*]], [0 x i32] [[PAYLOAD:%.*]]) !lgc.rt.shaderstage [[META15:![0-9]+]] !lgc.cps [[META16:![0-9]+]] !continuation [[META17:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: entry: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [0 x i32], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP0:%.*]] = alloca [[STRUCT_PAYLOAD:%.*]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[SYSTEM_DATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[SHADER_INDEX]], ptr @debug_global, align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP1:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 6, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP1]]), !continuation.registercount [[META8:![0-9]+]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable +; +entry: + %val = call i32 @lgc.rt.shader.index() + store i32 %val, ptr @debug_global + ret void +} + +define void @_cont_CallShader(%struct.DispatchSystemData* %data, i32 %0) !pointeetys !13 { + %dis_data = load %struct.DispatchSystemData, %struct.DispatchSystemData* %data, align 4 + %callable.addr = call i32 @_AmdGetFuncAddrCallable() + %ret.addr = call i32 @get.ret.addr() + call void (...) @lgc.cps.jump(i32 %callable.addr, i32 2, {} poison, i32 %ret.addr, i32 999, %struct.DispatchSystemData %dis_data, {} poison, [0 x i32] poison, [0 x i32] poison) + unreachable +} + +define void @main() { +; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define void @main( +; LOWERRAYTRACINGPIPELINE-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURNADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META8]] !lgc.cps [[META18:![0-9]+]] !continuation [[META19:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PARAMS:%.*]] = alloca [[STRUCT_THEIRPARAMS:%.*]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [1 x i32], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DIS_DATA_I:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP2:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference__i32(ptr @Callable) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[RET_ADDR_I:%.*]] = call i32 @get.ret.addr() +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[TMP2]], i32 2, {} poison, i32 [[RET_ADDR_I]], i32 999, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I]], {} poison, [0 x i32] poison, [0 x i32] poison) +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable +; LOWERRAYTRACINGPIPELINE-CPS: _cont_CallShader.exit: +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: ret void +; +; JUMP-INLINER-CPS-LABEL: define void @main( +; JUMP-INLINER-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURNADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META8:![0-9]+]] !lgc.cps [[META15:![0-9]+]] !continuation [[META16:![0-9]+]] { +; JUMP-INLINER-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_I:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 +; JUMP-INLINER-CPS-NEXT: [[PARAMS:%.*]] = alloca [[STRUCT_THEIRPARAMS:%.*]], align 4 +; JUMP-INLINER-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 +; JUMP-INLINER-CPS-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [1 x i32], align 4 +; JUMP-INLINER-CPS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; JUMP-INLINER-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) +; JUMP-INLINER-CPS-NEXT: [[DIS_DATA_I:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; JUMP-INLINER-CPS-NEXT: [[RET_ADDR_I:%.*]] = call i32 @get.ret.addr() +; JUMP-INLINER-CPS-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[SYSTEM_DATA_ALLOCA_I]]) +; JUMP-INLINER-CPS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I]], ptr [[SYSTEM_DATA_ALLOCA_I]], align 4 +; JUMP-INLINER-CPS-NEXT: store i32 999, ptr @debug_global, align 4 +; JUMP-INLINER-CPS-NEXT: [[TMP2:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA_I]], align 4 +; JUMP-INLINER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RET_ADDR_I]], i32 6, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP2]]), !continuation.registercount [[META8]] +; JUMP-INLINER-CPS-NEXT: unreachable +; JUMP-INLINER-CPS: Callable.exit: +; JUMP-INLINER-CPS-NEXT: unreachable +; JUMP-INLINER-CPS: _cont_CallShader.exit: +; JUMP-INLINER-CPS-NEXT: ret void +; + %params = alloca %struct.TheirParams, align 4 + call void @dx.op.callShader.struct.TheirParams(i32 159, i32 1, %struct.TheirParams* nonnull %params) + ret void +} + +; Function Attrs: nounwind +declare !pointeetys !19 void @dx.op.callShader.struct.TheirParams(i32, i32, %struct.TheirParams*) #0 + +attributes #0 = { nounwind } + +!llvm.ident = !{!0} +!dx.version = !{!1} +!dx.valver = !{!1} +!dx.shaderModel = !{!2} +!dx.entryPoints = !{!3, !6} +!lgc.cps.module = !{} + +attributes #1 = { noreturn } + +!0 = !{!"clang version 3.7.0 (tags/RELEASE_370/final)"} +!1 = !{i32 1, i32 6} +!2 = !{!"lib", i32 6, i32 6} +!3 = !{null, !"", null, !4, !12} +!4 = !{!5, !9, null, null} +!5 = !{!6} +!6 = !{void ()* @main, !"main", null, null, !7} +!7 = !{i32 8, i32 7, i32 6, i32 16, i32 7, i32 8, i32 5, !8} +!8 = !{i32 0} +!9 = !{!10} +!10 = !{i32 0, %"class.RWTexture2D >"* bitcast (%dx.types.Handle* @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A" to %"class.RWTexture2D >"*), !"RenderTarget", i32 0, i32 0, i32 1, i32 2, i1 false, i1 false, i1 false, !11} +!11 = !{i32 0, i32 9} +!12 = !{i32 0, i64 65536} +!13 = !{%struct.DispatchSystemData poison} +!15 = !{%struct.SystemData poison} +!19 = !{%struct.TheirParams poison} +!21 = !{%struct.AnyHitTraversalData poison} +!23 = !{%struct.Payload poison} +!25 = !{i32 5} diff --git a/llvmraytracing/test/dx/intersection-registercount.ll b/llvmraytracing/test/dx/intersection-registercount.ll index 1faa240456..b95b9c5e00 100644 --- a/llvmraytracing/test/dx/intersection-registercount.ll +++ b/llvmraytracing/test/dx/intersection-registercount.ll @@ -1,11 +1,6 @@ -; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' -S %s --lint-abort-on-error | FileCheck %s +; RUN: opt --verify-each --report-payload-register-sizes -passes='dxil-cont-intrinsic-prepare,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,continuations-stats-report,lint,dxil-cont-post-process,lint,continuations-lint,remove-types-metadata' -S %s --lint-abort-on-error 2>&1 | FileCheck %s -; Check that the size of @REGISTERS is as big as the continuation.registercount when there is an intersection shader -; CHECK: @REGISTERS = external addrspace(20) global [25 x i32] - -; Check !continuation.registercount metadata on @Intersection -; CHECK: define void @Intersection{{.*}}!continuation.registercount ![[MDREGCOUNT:[0-9]+]] -; CHECK: ![[MDREGCOUNT]] = !{i32 25} +; CHECK: Incoming and max outgoing payload VGPR size of "Intersection" (intersection): 100 and 100 bytes target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:16-i32:32-i64:32-f16:16-f32:32-f64:32-v8:8-v16:16-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" @@ -28,27 +23,25 @@ declare i64 @_cont_GetTraversalAddr() #0 declare i32 @_cont_GetContinuationStackAddr() #0 -declare !types !16 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData*) #0 +declare !pointeetys !16 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData*) #0 -declare !types !18 void @_cont_SetTriangleHitAttributes(%struct.SystemData*, %struct.BuiltInTriangleIntersectionAttributes) #0 +declare !pointeetys !18 void @_cont_SetTriangleHitAttributes(%struct.SystemData*, %struct.BuiltInTriangleIntersectionAttributes) #0 -declare !types !19 i1 @_cont_IsEndSearch(%struct.TraversalData*) #0 +declare !pointeetys !19 i1 @_cont_IsEndSearch(%struct.TraversalData*) #0 declare %struct.DispatchSystemData @_cont_Traversal(%struct.TraversalData) #0 -declare %struct.DispatchSystemData @_cont_SetupRayGen() #0 - declare %struct.AnyHitTraversalData @_AmdAwaitAnyHit(i64, %struct.AnyHitTraversalData, float, i32) #0 -declare !types !21 %struct.HitData @_cont_GetCandidateState(%struct.AnyHitTraversalData*) #0 +declare !pointeetys !21 %struct.HitData @_cont_GetCandidateState(%struct.AnyHitTraversalData*) #0 -declare !types !23 %struct.HitData @_cont_GetCommittedState(%struct.SystemData*) #0 +declare !pointeetys !23 %struct.HitData @_cont_GetCommittedState(%struct.SystemData*) #0 -define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) #0 !types !24 { +define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) #0 !pointeetys !24 { ret i32 5 } -define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, float %6, float %7, float %8, float %9, float %10, float %11, float %12, float %13) #0 !types !26 { +define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, float %6, float %7, float %8, float %9, float %10, float %11, float %12, float %13) #0 !pointeetys !26 { %dis_data = load %struct.DispatchSystemData, %struct.DispatchSystemData* %data, align 4 %sys_data = insertvalue %struct.SystemData undef, %struct.DispatchSystemData %dis_data, 0 %trav_data = insertvalue %struct.TraversalData undef, %struct.SystemData %sys_data, 0 @@ -57,7 +50,7 @@ define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i ret void } -define i1 @_cont_ReportHit(%struct.AnyHitTraversalData* %data, float %t, i32 %hitKind) #0 !types !27 { +define i1 @_cont_ReportHit(%struct.AnyHitTraversalData* %data, float %t, i32 %hitKind) #0 !pointeetys !27 { %trav_data = load %struct.AnyHitTraversalData, %struct.AnyHitTraversalData* %data, align 4 %newdata = call %struct.AnyHitTraversalData @_AmdAwaitAnyHit(i64 3, %struct.AnyHitTraversalData %trav_data, float %t, i32 %hitKind) store %struct.AnyHitTraversalData %newdata, %struct.AnyHitTraversalData* %data, align 4 @@ -65,49 +58,49 @@ define i1 @_cont_ReportHit(%struct.AnyHitTraversalData* %data, float %t, i32 %hi } ; Function Attrs: nounwind memory(none) -declare !types !28 i32 @_cont_DispatchRaysIndex(%struct.DispatchSystemData* nocapture readnone, i32) #1 +declare !pointeetys !28 i32 @_cont_DispatchRaysIndex(%struct.DispatchSystemData* nocapture readnone, i32) #1 ; Function Attrs: nounwind memory(none) -declare !types !28 i32 @_cont_DispatchRaysDimensions(%struct.DispatchSystemData* nocapture readnone, i32) #1 +declare !pointeetys !28 i32 @_cont_DispatchRaysDimensions(%struct.DispatchSystemData* nocapture readnone, i32) #1 ; Function Attrs: nounwind memory(none) -declare !types !29 float @_cont_WorldRayOrigin(%struct.DispatchSystemData* nocapture readnone, i32) #1 +declare !pointeetys !29 float @_cont_WorldRayOrigin(%struct.DispatchSystemData* nocapture readnone, i32) #1 ; Function Attrs: nounwind memory(none) -declare !types !29 float @_cont_WorldRayDirection(%struct.DispatchSystemData* nocapture readnone, i32) #1 +declare !pointeetys !29 float @_cont_WorldRayDirection(%struct.DispatchSystemData* nocapture readnone, i32) #1 ; Function Attrs: nounwind memory(none) -declare !types !30 float @_cont_RayTMin(%struct.DispatchSystemData* nocapture readnone) #1 +declare !pointeetys !30 float @_cont_RayTMin(%struct.DispatchSystemData* nocapture readnone) #1 ; Function Attrs: nounwind memory(read) -declare !types !31 float @_cont_RayTCurrent(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #2 +declare !pointeetys !31 float @_cont_RayTCurrent(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #2 ; Function Attrs: nounwind memory(none) -declare !types !24 i32 @_cont_RayFlags(%struct.DispatchSystemData* nocapture readnone) #1 +declare !pointeetys !24 i32 @_cont_RayFlags(%struct.DispatchSystemData* nocapture readnone) #1 ; Function Attrs: nounwind memory(none) -declare !types !33 i32 @_cont_InstanceIndex(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #1 +declare !pointeetys !33 i32 @_cont_InstanceIndex(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #1 ; Function Attrs: nounwind memory(none) -declare !types !33 i32 @_cont_InstanceID(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #1 +declare !pointeetys !33 i32 @_cont_InstanceID(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #1 ; Function Attrs: nounwind memory(none) -declare !types !33 i32 @_cont_PrimitiveIndex(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #1 +declare !pointeetys !33 i32 @_cont_PrimitiveIndex(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #1 ; Function Attrs: nounwind memory(none) -declare !types !34 float @_cont_ObjectRayOrigin(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*, i32) #1 +declare !pointeetys !34 float @_cont_ObjectRayOrigin(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*, i32) #1 ; Function Attrs: nounwind memory(none) -declare !types !34 float @_cont_ObjectRayDirection(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*, i32) #1 +declare !pointeetys !34 float @_cont_ObjectRayDirection(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*, i32) #1 ; Function Attrs: nounwind memory(none) -declare !types !35 float @_cont_ObjectToWorld(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*, i32, i32) #1 +declare !pointeetys !35 float @_cont_ObjectToWorld(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*, i32, i32) #1 ; Function Attrs: nounwind memory(none) -declare !types !35 float @_cont_WorldToObject(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*, i32, i32) #1 +declare !pointeetys !35 float @_cont_WorldToObject(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*, i32, i32) #1 ; Function Attrs: nounwind memory(none) -declare !types !36 i32 @_cont_HitKind(%struct.SystemData* nocapture readnone, %struct.HitData*) #1 +declare !pointeetys !36 i32 @_cont_HitKind(%struct.SystemData* nocapture readnone, %struct.HitData*) #1 ; Function Attrs: nounwind define void @Intersection() #3 !lgc.rt.shaderstage !41 { @@ -115,7 +108,7 @@ define void @Intersection() #3 !lgc.rt.shaderstage !41 { } ; Function Attrs: nounwind memory(read) -declare !types !37 void @dx.op.traceRay.struct.RayPayload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.RayPayload*) #2 +declare !pointeetys !37 void @dx.op.traceRay.struct.RayPayload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.RayPayload*) #2 ; Function Attrs: nounwind memory(none) declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #1 @@ -123,10 +116,10 @@ declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types. declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) -declare !types !39 void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #4 +declare !pointeetys !39 void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #4 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) -declare !types !39 void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #4 +declare !pointeetys !39 void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #4 attributes #0 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="0" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind memory(none) } @@ -160,29 +153,29 @@ attributes #4 = { nocallback nofree nosync nounwind willreturn memory(argmem: re !13 = !{i32 8, i32 8, i32 5, !14} !14 = !{i32 0} !15 = !{i32 25} -!16 = !{!"function", %struct.BuiltInTriangleIntersectionAttributes poison, !17} +!16 = !{%struct.SystemData poison} !17 = !{i32 0, %struct.SystemData poison} -!18 = !{!"function", !"void", !17, %struct.BuiltInTriangleIntersectionAttributes poison} -!19 = !{!"function", i1 poison, !20} +!18 = !{%struct.SystemData poison} +!19 = !{%struct.TraversalData poison} !20 = !{i32 0, %struct.TraversalData poison} -!21 = !{!"function", %struct.HitData poison, !22} +!21 = !{%struct.AnyHitTraversalData poison} !22 = !{i32 0, %struct.AnyHitTraversalData poison} -!23 = !{!"function", %struct.HitData poison, !17} -!24 = !{!"function", i32 poison, !25} +!23 = !{%struct.SystemData poison} +!24 = !{%struct.DispatchSystemData poison} !25 = !{i32 0, %struct.DispatchSystemData poison} -!26 = !{!"function", !"void", !25, i64 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison} -!27 = !{!"function", i1 poison, !22, float poison, i32 poison} -!28 = !{!"function", i32 poison, !25, i32 poison} -!29 = !{!"function", float poison, !25, i32 poison} -!30 = !{!"function", float poison, !25} -!31 = !{!"function", float poison, !25, !32} +!26 = !{%struct.DispatchSystemData poison} +!27 = !{%struct.AnyHitTraversalData poison} +!28 = !{%struct.DispatchSystemData poison} +!29 = !{%struct.DispatchSystemData poison} +!30 = !{%struct.DispatchSystemData poison} +!31 = !{null, %struct.DispatchSystemData poison, %struct.HitData poison} !32 = !{i32 0, %struct.HitData poison} -!33 = !{!"function", i32 poison, !25, !32} -!34 = !{!"function", float poison, !25, !32, i32 poison} -!35 = !{!"function", float poison, !25, !32, i32 poison, i32 poison} -!36 = !{!"function", i32 poison, !17, !32} -!37 = !{!"function", !"void", i32 poison, %dx.types.Handle poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, !38} +!33 = !{null, %struct.DispatchSystemData poison, %struct.HitData poison} +!34 = !{null, %struct.DispatchSystemData poison, %struct.HitData poison} +!35 = !{null, %struct.DispatchSystemData poison, %struct.HitData poison} +!36 = !{null, %struct.SystemData poison, %struct.HitData poison} +!37 = !{%struct.RayPayload poison} !38 = !{i32 0, %struct.RayPayload poison} -!39 = !{!"function", !"void", i64 poison, !40} +!39 = !{i8 poison} !40 = !{i32 0, i8 poison} !41 = !{i32 1} diff --git a/llvmraytracing/test/dx/intrinsics/cont-payload-registers-get-i32.ll b/llvmraytracing/test/dx/intrinsics/cont-payload-registers-get-i32.ll index c8abbcea25..fff640b854 100644 --- a/llvmraytracing/test/dx/intrinsics/cont-payload-registers-get-i32.ll +++ b/llvmraytracing/test/dx/intrinsics/cont-payload-registers-get-i32.ll @@ -1,77 +1,89 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function main --version 3 -; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' -S %s --lint-abort-on-error | FileCheck -check-prefix=ALL %s -; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,remove-types-metadata' -S %s --lint-abort-on-error | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt --verify-each -passes='lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,dxil-cont-post-process,lint,continuations-lint,remove-types-metadata' -S %s --lint-abort-on-error | FileCheck -check-prefix=ALL %s +; RUN: opt --verify-each -passes='lower-raytracing-pipeline,lint,continuations-lint,remove-types-metadata' -S %s --lint-abort-on-error | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE %s %struct.DispatchSystemData = type { i32 } %struct.BuiltInTriangleIntersectionAttributes = type { <2 x float> } %struct.HitData = type { float, i32 } -%struct.Payload = type { [8 x i32] } +%struct.Payload = type { [4 x i32] } +%struct.SystemData = type { float } +%struct.TraversalData = type { i32 } @debug_global = external global i32 declare i32 @_AmdContPayloadRegistersGetI32(i32) -declare %struct.DispatchSystemData @_cont_SetupRayGen() +declare !pointeetys !9 i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData*) -declare !types !9 i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData*) +declare !pointeetys !11 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.DispatchSystemData*) -declare !types !11 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.DispatchSystemData*) +declare !pointeetys !12 i32 @_cont_HitKind(%struct.DispatchSystemData*, %struct.HitData*) -declare !types !12 i32 @_cont_HitKind(%struct.DispatchSystemData*, %struct.HitData*) +declare !pointeetys !17 i1 @_cont_ReportHit(%struct.TraversalData* %data, float %t, i32 %hitKind) -define void @_cont_ExitRayGen(ptr nocapture readonly %data) alwaysinline nounwind !types !{!"function", !"void", !{i32 0, %struct.DispatchSystemData poison}} { +define void @_cont_ExitRayGen(ptr nocapture readonly %data) alwaysinline nounwind !pointeetys !{%struct.DispatchSystemData poison} { ret void } -define void @main() { -; ALL-LABEL: define void @main( -; ALL-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !continuation [[META12:![0-9]+]] !lgc.rt.shaderstage [[META5:![0-9]+]] !continuation.entry [[META13:![0-9]+]] !continuation.registercount [[META5]] !continuation.state [[META5]] { +declare void @lgc.ilcps.waitContinue(...) + +define void @_cont_Traversal(%struct.TraversalData %data) #1 !lgc.rt.shaderstage !3 { +; ALL-LABEL: define void @_cont_Traversal( +; ALL-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]], [8 x i32] [[PADDING:%.*]], [4 x i32] [[PAYLOAD:%.*]]) !lgc.rt.shaderstage [[META2:![0-9]+]] !continuation.registercount [[META0:![0-9]+]] !continuation [[META3:![0-9]+]] !continuation.state [[META4:![0-9]+]] { ; ALL-NEXT: entry: ; ALL-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; ALL-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 -; ALL-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 -; ALL-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) -; ALL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(20) getelementptr {{(inbounds )?}}([30 x i32], ptr addrspace(20) @REGISTERS, i32 0, i32 5), align 4 -; ALL-NEXT: store i32 [[TMP2]], ptr @debug_global, align 4 -; ALL-NEXT: ret void -; ALL: entry.split: +; ALL-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i32] [[PAYLOAD]], 0 +; ALL-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i32] [[PAYLOAD]], 1 +; ALL-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [4 x i32] [[PAYLOAD]], 2 +; ALL-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i32] [[PAYLOAD]], 3 +; ALL-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 0 +; ALL-NEXT: store i32 [[PAYLOAD_FCA_5_EXTRACT]], ptr @debug_global, align 4 +; ALL-NEXT: [[DOTFCA_0_INSERT3:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 +; ALL-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [4 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; ALL-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [4 x i32] [[DOTFCA_0_INSERT]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; ALL-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [4 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 2 +; ALL-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [4 x i32] [[DOTFCA_2_INSERT]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; ALL-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 +; ALL-NEXT: call void (...) @lgc.ilcps.waitContinue(i64 0, i64 -1, i32 [[TMP1]], i64 poison, [[STRUCT_SYSTEMDATA:%.*]] poison, [8 x i32] poison, [4 x i32] [[DOTFCA_3_INSERT]]) ; ALL-NEXT: unreachable ; -; LOWERRAYTRACINGPIPELINE-LABEL: define void @main( -; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !continuation [[META12:![0-9]+]] !lgc.rt.shaderstage [[META5:![0-9]+]] !continuation.entry [[META13:![0-9]+]] !continuation.registercount [[META5]] { +; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.TraversalData @_cont_Traversal( +; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]], [8 x i32] [[PADDING:%.*]], [4 x i32] [[PAYLOAD:%.*]]) !lgc.rt.shaderstage [[META2:![0-9]+]] !continuation.registercount [[META0:![0-9]+]] !continuation [[META3:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: entry: -; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) -; LOWERRAYTRACINGPIPELINE-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(20) getelementptr {{(inbounds )?}}([30 x i32], ptr addrspace(20) @PAYLOAD, i32 0, i32 5), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[VAL]], ptr @debug_global, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret void +; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_TRAVERSALDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [4 x i32], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store [4 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_TRAVERSALDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP1:%.*]] = getelementptr [4 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 0, i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP2]], ptr @debug_global, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = load [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = load [4 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.cps.jump(i64 0, i32 -1, {} poison, i64 poison, [[STRUCT_SYSTEMDATA:%.*]] poison, [8 x i32] poison, [4 x i32] [[TMP4]]), !continuation.registercount [[META0]], !waitmask [[META4:![0-9]+]] +; LOWERRAYTRACINGPIPELINE-NEXT: unreachable ; entry: - %val = call i32 @_AmdContPayloadRegistersGetI32(i32 5) + %val = call i32 @_AmdContPayloadRegistersGetI32(i32 2) store i32 %val, i32* @debug_global, align 4 - ret void -} - -define void @chit(%struct.Payload* %pl, %struct.Payload* %attrs) !types !14 { - ret void + call void (...) @lgc.ilcps.waitContinue(i64 0, i64 -1, i32 2, i64 poison, %struct.SystemData poison) + unreachable } -!dx.entryPoints = !{!0, !3, !7} +!continuation.maxPayloadRegisterCount = !{!18} -!0 = !{null, !"", null, !1, !6} -!1 = !{!2, null, null, null} -!2 = !{!3} -!3 = !{void ()* @main, !"main", null, null, !4} -!4 = !{i32 8, i32 7, i32 6, i32 16, i32 7, i32 8, i32 5, !5} +!3 = !{i32 6} +!4 = !{i32 8, i32 12, i32 6, i32 16, i32 7, i32 8, i32 5, !5} !5 = !{i32 0} !6 = !{i32 0, i64 65536} -!7 = !{void (%struct.Payload*, %struct.Payload*)* @chit, !"chit", null, null, !8} !8 = !{i32 8, i32 10, i32 6, i32 16, i32 7, i32 8, i32 5, !5} -!9 = !{!"function", i32 poison, !10} +!9 = !{%struct.DispatchSystemData poison} !10 = !{i32 0, %struct.DispatchSystemData poison} -!11 = !{!"function", %struct.BuiltInTriangleIntersectionAttributes poison, !10} -!12 = !{!"function", i32 poison, !10, !13} +!11 = !{%struct.DispatchSystemData poison} +!12 = !{null, %struct.DispatchSystemData poison, %struct.HitData poison} !13 = !{i32 0, %struct.HitData poison} -!14 = !{!"function", !"void", !15, !15} +!14 = !{null, %struct.Payload poison, %struct.Payload poison} !15 = !{i32 0, %struct.Payload poison} +!16 = !{i32 0, %struct.TraversalData poison} +!17 = !{%struct.TraversalData poison} +!18 = !{i32 4} diff --git a/llvmraytracing/test/dx/intrinsics/cont-payload-registers-i32-count.ll b/llvmraytracing/test/dx/intrinsics/cont-payload-registers-i32-count.ll index b50f4f4e27..09fb6b4991 100644 --- a/llvmraytracing/test/dx/intrinsics/cont-payload-registers-i32-count.ll +++ b/llvmraytracing/test/dx/intrinsics/cont-payload-registers-i32-count.ll @@ -1,24 +1,24 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 -; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' -S %s --lint-abort-on-error | FileCheck -check-prefix=MINCOUNT %s -; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,remove-types-metadata' -S %s --lint-abort-on-error | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE-MINCOUNT %s +; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,dxil-cont-post-process,lint,continuations-lint,remove-types-metadata' -S %s --lint-abort-on-error | FileCheck -check-prefix=MINCOUNT %s +; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,continuations-lint,remove-types-metadata' -S %s --lint-abort-on-error | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE-MINCOUNT %s %struct.DispatchSystemData = type { i32 } @debug_global = external global i32 declare i32 @_AmdContPayloadRegistersI32Count() +%struct.TraversalData = type { i32 } -declare %struct.DispatchSystemData @_cont_SetupRayGen() +declare !pointeetys !9 i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData*) +declare !pointeetys !12 i1 @_cont_ReportHit(%struct.TraversalData* %data, float %t, i32 %hitKind) -declare !types !9 i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData*) - -define void @_cont_ExitRayGen(ptr nocapture readonly %data) alwaysinline nounwind !types !{!"function", !"void", !{i32 0, %struct.DispatchSystemData poison}} { +define void @_cont_ExitRayGen(ptr nocapture readonly %data) alwaysinline nounwind !pointeetys !{%struct.DispatchSystemData poison} { ret void } define void @main() { ; MINCOUNT-LABEL: define void @main( -; MINCOUNT-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !continuation [[META10:![0-9]+]] !lgc.rt.shaderstage [[META5:![0-9]+]] !continuation.entry [[META11:![0-9]+]] !continuation.registercount [[META5]] !continuation.state [[META5]] { +; MINCOUNT-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !continuation [[META10:![0-9]+]] !lgc.rt.shaderstage [[META5:![0-9]+]] !continuation.entry [[META11:![0-9]+]] { ; MINCOUNT-NEXT: entry: ; MINCOUNT-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; MINCOUNT-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 @@ -33,6 +33,7 @@ define void @main() { ; LOWERRAYTRACINGPIPELINE-MINCOUNT-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !continuation [[META10:![0-9]+]] !lgc.rt.shaderstage [[META5:![0-9]+]] !continuation.entry [[META11:![0-9]+]] !continuation.registercount [[META5]] { ; LOWERRAYTRACINGPIPELINE-MINCOUNT-NEXT: entry: ; LOWERRAYTRACINGPIPELINE-MINCOUNT-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-MINCOUNT-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [0 x i32], align 4 ; LOWERRAYTRACINGPIPELINE-MINCOUNT-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-MINCOUNT-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; LOWERRAYTRACINGPIPELINE-MINCOUNT-NEXT: store i32 11, ptr @debug_global, align 4 @@ -57,5 +58,7 @@ entry: !6 = !{i32 0, i64 65536} !7 = !{i32 15} !8 = !{i32 11} -!9 = !{!"function", i32 poison, !10} +!9 = !{%struct.DispatchSystemData poison} !10 = !{i32 0, %struct.DispatchSystemData poison} +!11 = !{i32 0, %struct.TraversalData poison} +!12 = !{%struct.TraversalData poison} diff --git a/llvmraytracing/test/dx/intrinsics/cont-payload-registers-set-i32.ll b/llvmraytracing/test/dx/intrinsics/cont-payload-registers-set-i32.ll index 8eb99d7bc7..79e350a945 100644 --- a/llvmraytracing/test/dx/intrinsics/cont-payload-registers-set-i32.ll +++ b/llvmraytracing/test/dx/intrinsics/cont-payload-registers-set-i32.ll @@ -1,72 +1,80 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function main --version 3 -; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' -S %s --lint-abort-on-error | FileCheck -check-prefix=ALL %s -; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,remove-types-metadata' -S %s --lint-abort-on-error | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt --verify-each -passes='lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,dxil-cont-post-process,lint,continuations-lint,remove-types-metadata' -S %s --lint-abort-on-error | FileCheck -check-prefix=ALL %s +; RUN: opt --verify-each -passes='lower-raytracing-pipeline,lint,continuations-lint,remove-types-metadata' -S %s --lint-abort-on-error | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE %s %struct.DispatchSystemData = type { i32 } %struct.BuiltInTriangleIntersectionAttributes = type { <2 x float> } %struct.HitData = type { float, i32 } -%struct.Payload = type { [8 x i32] } +%struct.Payload = type { [4 x i32] } +%struct.SystemData = type { float } +%struct.TraversalData = type { i32 } declare void @_AmdContPayloadRegistersSetI32(i32, i32) -declare %struct.DispatchSystemData @_cont_SetupRayGen() +declare !pointeetys !9 i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData*) -declare !types !9 i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData*) +declare !pointeetys !11 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.DispatchSystemData*) -declare !types !11 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.DispatchSystemData*) +declare !pointeetys !12 i32 @_cont_HitKind(%struct.DispatchSystemData*, %struct.HitData*) -declare !types !12 i32 @_cont_HitKind(%struct.DispatchSystemData*, %struct.HitData*) +declare !pointeetys !17 i1 @_cont_ReportHit(%struct.TraversalData* %data, float %t, i32 %hitKind) -define void @_cont_ExitRayGen(ptr nocapture readonly %data) alwaysinline nounwind !types !{!"function", !"void", !{i32 0, %struct.DispatchSystemData poison}} { - ret void -} +!continuation.maxPayloadRegisterCount = !{!18} + +declare void @lgc.ilcps.waitContinue(...) -define void @main() { -; ALL-LABEL: define void @main( -; ALL-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !continuation [[META12:![0-9]+]] !lgc.rt.shaderstage [[META5:![0-9]+]] !continuation.entry [[META13:![0-9]+]] !continuation.registercount [[META5]] !continuation.state [[META5]] { +define void @_cont_Traversal(%struct.TraversalData %data) #1 !lgc.rt.shaderstage !3 { +; ALL-LABEL: define void @_cont_Traversal( +; ALL-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]], [8 x i32] [[PADDING:%.*]], [4 x i32] [[PAYLOAD:%.*]]) !lgc.rt.shaderstage [[META2:![0-9]+]] !continuation.registercount [[META0:![0-9]+]] !continuation [[META3:![0-9]+]] !continuation.state [[META4:![0-9]+]] { ; ALL-NEXT: entry: ; ALL-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; ALL-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 -; ALL-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 -; ALL-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) -; ALL-NEXT: store i32 42, ptr addrspace(20) getelementptr {{(inbounds )?}}([30 x i32], ptr addrspace(20) @REGISTERS, i32 0, i32 5), align 4 -; ALL-NEXT: ret void -; ALL: entry.split: +; ALL-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i32] [[PAYLOAD]], 0 +; ALL-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i32] [[PAYLOAD]], 1 +; ALL-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i32] [[PAYLOAD]], 2 +; ALL-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i32] [[PAYLOAD]], 3 +; ALL-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 0 +; ALL-NEXT: [[DOTFCA_0_INSERT2:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 +; ALL-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [4 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; ALL-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [4 x i32] [[DOTFCA_0_INSERT]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; ALL-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [4 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; ALL-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [4 x i32] [[DOTFCA_2_INSERT]], i32 42, 3 +; ALL-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 +; ALL-NEXT: call void (...) @lgc.ilcps.waitContinue(i64 0, i64 -1, i32 [[TMP1]], i64 poison, [[STRUCT_SYSTEMDATA:%.*]] poison, [8 x i32] poison, [4 x i32] [[DOTFCA_3_INSERT]]) ; ALL-NEXT: unreachable ; -; LOWERRAYTRACINGPIPELINE-LABEL: define void @main( -; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !continuation [[META12:![0-9]+]] !lgc.rt.shaderstage [[META5:![0-9]+]] !continuation.entry [[META13:![0-9]+]] !continuation.registercount [[META5]] { +; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.TraversalData @_cont_Traversal( +; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]], [8 x i32] [[PADDING:%.*]], [4 x i32] [[PAYLOAD:%.*]]) !lgc.rt.shaderstage [[META2:![0-9]+]] !continuation.registercount [[META0:![0-9]+]] !continuation [[META3:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: entry: -; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 42, ptr addrspace(20) getelementptr {{(inbounds )?}}([30 x i32], ptr addrspace(20) @PAYLOAD, i32 0, i32 5), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: ret void +; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_TRAVERSALDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [4 x i32], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store [4 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_TRAVERSALDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP1:%.*]] = getelementptr [4 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 0, i32 3 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 42, ptr [[TMP1]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = load [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = load [4 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.cps.jump(i64 0, i32 -1, {} poison, i64 poison, [[STRUCT_SYSTEMDATA:%.*]] poison, [8 x i32] poison, [4 x i32] [[TMP3]]), !continuation.registercount [[META0]], !waitmask [[META4:![0-9]+]] +; LOWERRAYTRACINGPIPELINE-NEXT: unreachable ; entry: - call void @_AmdContPayloadRegistersSetI32(i32 5, i32 42) - ret void + call void @_AmdContPayloadRegistersSetI32(i32 3, i32 42) + call void (...) @lgc.ilcps.waitContinue(i64 0, i64 -1, i32 2, i64 poison, %struct.SystemData poison) + unreachable } -define void @chit(%struct.Payload* %pl, %struct.Payload* %attrs) !types !14 { - ret void -} - -!dx.entryPoints = !{!0, !3, !7} - -!0 = !{null, !"", null, !1, !6} -!1 = !{!2, null, null, null} -!2 = !{!3} -!3 = !{void ()* @main, !"main", null, null, !4} +!3 = !{i32 6} !4 = !{i32 8, i32 7, i32 6, i32 16, i32 7, i32 8, i32 5, !5} !5 = !{i32 0} !6 = !{i32 0, i64 65536} -!7 = !{void (%struct.Payload*, %struct.Payload*)* @chit, !"chit", null, null, !8} !8 = !{i32 8, i32 10, i32 6, i32 16, i32 7, i32 8, i32 5, !5} -!9 = !{!"function", i32 poison, !10} +!9 = !{%struct.DispatchSystemData poison} !10 = !{i32 0, %struct.DispatchSystemData poison} -!11 = !{!"function", %struct.BuiltInTriangleIntersectionAttributes poison, !10} -!12 = !{!"function", i32 poison, !10, !13} +!11 = !{%struct.DispatchSystemData poison} +!12 = !{null, %struct.DispatchSystemData poison, %struct.HitData poison} !13 = !{i32 0, %struct.HitData poison} -!14 = !{!"function", !"void", !15, !15} +!14 = !{null, %struct.Payload poison, %struct.Payload poison} !15 = !{i32 0, %struct.Payload poison} +!16 = !{i32 0, %struct.TraversalData poison} +!17 = !{%struct.TraversalData poison} +!18 = !{i32 4} diff --git a/llvmraytracing/test/dx/intrinsics/cont-stack-access.ll b/llvmraytracing/test/dx/intrinsics/cont-stack-access.ll index 413c7c4492..8f623c9ba2 100644 --- a/llvmraytracing/test/dx/intrinsics/cont-stack-access.ll +++ b/llvmraytracing/test/dx/intrinsics/cont-stack-access.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 -; RUN: grep -v SKIP_GLOBAL_ADDRSPACE %s | opt --verify-each -passes='dxil-cont-post-process,lint,remove-types-metadata' -S --lint-abort-on-error | FileCheck %s -check-prefix=STACK_SCRATCH -; RUN: grep -v SKIP_SCRATCH_ADDRSPACE %s | opt --verify-each -passes='dxil-cont-post-process,lint,remove-types-metadata' -S --lint-abort-on-error | FileCheck %s -check-prefix=STACK_GLOBAL +; RUN: grep -v SKIP_GLOBAL_ADDRSPACE %s | opt --verify-each -passes='dxil-cont-post-process,lint,continuations-lint,remove-types-metadata' -S --lint-abort-on-error | FileCheck %s -check-prefix=STACK_SCRATCH +; RUN: grep -v SKIP_SCRATCH_ADDRSPACE %s | opt --verify-each -passes='dxil-cont-post-process,lint,continuations-lint,remove-types-metadata' -S --lint-abort-on-error | FileCheck %s -check-prefix=STACK_GLOBAL declare i32 @_AmdContStackAlloc(i32 %size) declare i32 @_AmdContStackLoadI32(i32 %addr) diff --git a/llvmraytracing/test/dx/intrinsics/cont-stack-alloc.ll b/llvmraytracing/test/dx/intrinsics/cont-stack-alloc.ll index 1cef618038..62185f1380 100644 --- a/llvmraytracing/test/dx/intrinsics/cont-stack-alloc.ll +++ b/llvmraytracing/test/dx/intrinsics/cont-stack-alloc.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function main --version 2 -; RUN: opt --verify-each -passes='cgscc(inline),lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' -S %s --lint-abort-on-error | FileCheck %s +; RUN: opt --verify-each -passes='cgscc(inline),lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,dxil-cont-post-process,lint,continuations-lint,remove-types-metadata' -S %s --lint-abort-on-error | FileCheck %s declare i32 @_AmdContStackAlloc(i32 %size) declare i32 @_AmdContPayloadRegistersI32Count() @@ -8,22 +8,24 @@ declare i32 @_cont_GetContinuationStackAddr() #0 %struct.DispatchSystemData = type { i32 } %struct.HitData = type { float, i32 } %struct.BuiltInTriangleIntersectionAttributes = type { <2 x float> } -declare %struct.DispatchSystemData @_cont_SetupRayGen() -declare !types !15 i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData*) -declare !types !16 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.DispatchSystemData*) -declare !types !12 i32 @_cont_HitKind(%struct.DispatchSystemData*, %struct.HitData*) +%struct.TraversalData = type { <3 x float>, <3 x float>, float } +declare !pointeetys !15 i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData*) +declare !pointeetys !16 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.DispatchSystemData*) +declare !pointeetys !12 i32 @_cont_HitKind(%struct.DispatchSystemData*, %struct.HitData*) + +declare !pointeetys !21 i1 @_cont_ReportHit(%struct.TraversalData* %data, float %t, i32 %hitKind) %struct.Payload = type { [8 x i32] } @debug_global = external global i32 -define void @_cont_ExitRayGen(ptr nocapture readonly %data) alwaysinline nounwind !types !{!"function", !"void", !{i32 0, %struct.DispatchSystemData poison}} { +define void @_cont_ExitRayGen(ptr nocapture readonly %data) alwaysinline nounwind !pointeetys !{%struct.DispatchSystemData poison} { ret void } define void @main() !lgc.rt.shaderstage !17 { ; CHECK-LABEL: define void @main -; CHECK-SAME: (i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META5:![0-9]+]] !continuation.entry [[META11:![0-9]+]] !continuation.registercount [[META5]] !continuation [[META12:![0-9]+]] !continuation.stacksize [[META13:![0-9]+]] !continuation.state [[META5]] { +; CHECK-SAME: (i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META5:![0-9]+]] !continuation.entry [[META11:![0-9]+]] !continuation [[META12:![0-9]+]] !continuation.stacksize [[META13:![0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; CHECK-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 @@ -49,7 +51,7 @@ entry: ; CHECK: !{{.*}} = !{i32 120} ; Define hit shader to increase payload size -define void @chit(%struct.Payload* %pl, %struct.Payload* %attrs) !types !10 !lgc.rt.shaderstage !18 { +define void @chit(%struct.Payload* %pl, %struct.Payload* %attrs) !pointeetys !10 !lgc.rt.shaderstage !18 { ret void } @@ -65,13 +67,15 @@ define void @chit(%struct.Payload* %pl, %struct.Payload* %attrs) !types !10 !lgc !7 = !{i32 0} !8 = !{void (%struct.Payload*, %struct.Payload*)* @chit, !"chit", null, null, !9} !9 = !{i32 8, i32 10, i32 6, i32 16, i32 7, i32 8, i32 5, !7} -!10 = !{!"function", !"void", !11, !11} +!10 = !{null, %struct.Payload poison, %struct.Payload poison} !11 = !{i32 0, %struct.Payload poison} -!12 = !{!"function", i32 poison, !13, !14} +!12 = !{null, %struct.DispatchSystemData poison, %struct.HitData poison} !13 = !{i32 0, %struct.DispatchSystemData poison} !14 = !{i32 0, %struct.HitData poison} -!15 = !{!"function", !"void", !13} -!16 = !{!"function", %struct.BuiltInTriangleIntersectionAttributes poison, !13} +!15 = !{%struct.DispatchSystemData poison} +!16 = !{%struct.DispatchSystemData poison} !17 = !{i32 0} !18 = !{i32 3} !19 = !{i32 30} +!20 = !{i32 0, %struct.TraversalData poison} +!21 = !{%struct.TraversalData poison} diff --git a/llvmraytracing/test/dx/intrinsics/continuation-stack-is-global-false.ll b/llvmraytracing/test/dx/intrinsics/continuation-stack-is-global-false.ll index e52eb1c016..fdd76887c5 100644 --- a/llvmraytracing/test/dx/intrinsics/continuation-stack-is-global-false.ll +++ b/llvmraytracing/test/dx/intrinsics/continuation-stack-is-global-false.ll @@ -7,9 +7,7 @@ declare i1 @_AmdContinuationStackIsGlobal() -declare %struct.DispatchSystemData @_cont_SetupRayGen() - -declare !types !8 i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData*) +declare !pointeetys !8 i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData*) define void @main() { ; CHECK-LABEL: define void @main( @@ -37,5 +35,4 @@ entry: !5 = !{i32 0} !6 = !{i32 0, i64 65536} !7 = !{i32 21} -!8 = !{!"function", i32 poison, !9} -!9 = !{i32 0, %struct.DispatchSystemData poison} +!8 = !{%struct.DispatchSystemData poison} diff --git a/llvmraytracing/test/dx/intrinsics/continuation-stack-is-global-true.ll b/llvmraytracing/test/dx/intrinsics/continuation-stack-is-global-true.ll index 31da237f86..4d58b94a95 100644 --- a/llvmraytracing/test/dx/intrinsics/continuation-stack-is-global-true.ll +++ b/llvmraytracing/test/dx/intrinsics/continuation-stack-is-global-true.ll @@ -7,9 +7,7 @@ declare i1 @_AmdContinuationStackIsGlobal() -declare %struct.DispatchSystemData @_cont_SetupRayGen() - -declare !types !8 i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData*) +declare !pointeetys !8 i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData*) define void @main() { ; CHECK-LABEL: define void @main( @@ -37,5 +35,4 @@ entry: !5 = !{i32 0} !6 = !{i32 0, i64 65536} !7 = !{i32 22} -!8 = !{!"function", i32 poison, !9} -!9 = !{i32 0, %struct.DispatchSystemData poison} +!8 = !{%struct.DispatchSystemData poison} diff --git a/llvmraytracing/test/dx/intrinsics/get-current-func-addr.ll b/llvmraytracing/test/dx/intrinsics/get-current-func-addr.ll index bc4726f56f..90bcb8777e 100644 --- a/llvmraytracing/test/dx/intrinsics/get-current-func-addr.ll +++ b/llvmraytracing/test/dx/intrinsics/get-current-func-addr.ll @@ -6,7 +6,7 @@ declare void @Use(i64) declare i64 @_AmdGetCurrentFuncAddr() -declare !types !2 i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData*) +declare !pointeetys !2 i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData*) define void @MyRayGen() { ; CHECK-LABEL: define void @MyRayGen() { @@ -17,8 +17,9 @@ define void @MyRayGen() { ; ; CHECK-CPS-LABEL: define void @MyRayGen() { ; CHECK-CPS-NEXT: AllocaSpillBB: -; CHECK-CPS-NEXT: [[TMP0:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @MyRayGen) -; CHECK-CPS-NEXT: call void @Use(i64 [[TMP0]]) +; CHECK-CPS-NEXT: [[TMP0:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference__i32(ptr @MyRayGen) +; CHECK-CPS-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0:%.*]] to i64 +; CHECK-CPS-NEXT: call void @Use(i64 [[TMP1]]) ; CHECK-CPS-NEXT: ret void ; AllocaSpillBB: @@ -36,8 +37,9 @@ define void @MyRayGen.resume.0() { ; ; CHECK-CPS-LABEL: define void @MyRayGen.resume.0() { ; CHECK-CPS-NEXT: entryresume.0: -; CHECK-CPS-NEXT: [[TMP0:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @MyRayGen.resume.0) -; CHECK-CPS-NEXT: call void @Use(i64 [[TMP0]]) +; CHECK-CPS-NEXT: [[TMP0:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference__i32(ptr @MyRayGen.resume.0) +; CHECK-CPS-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0:%.*]] to i64 +; CHECK-CPS-NEXT: call void @Use(i64 [[TMP1]]) ; CHECK-CPS-NEXT: ret void ; entryresume.0: @@ -47,5 +49,4 @@ entryresume.0: } !lgc.cps.module = !{} -!1 = !{i32 0, %struct.DispatchSystemData poison} -!2 = !{!"function", i32 poison, !1} +!2 = !{%struct.DispatchSystemData poison} diff --git a/llvmraytracing/test/dx/intrinsics/get-rtip.ll b/llvmraytracing/test/dx/intrinsics/get-rtip.ll index 08faf97e4e..7f87f75348 100644 --- a/llvmraytracing/test/dx/intrinsics/get-rtip.ll +++ b/llvmraytracing/test/dx/intrinsics/get-rtip.ll @@ -4,8 +4,7 @@ declare i32 @_AmdGetRtip() %struct.DispatchSystemData = type { i32 } -declare %struct.DispatchSystemData @_cont_SetupRayGen() -declare !types !8 i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData*) +declare !pointeetys !8 i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData*) @debug_global = external global i32 @@ -26,5 +25,4 @@ entry: !0 = !{i32 2} !1 = !{i32 0} -!8 = !{!"function", i32 poison, !9} -!9 = !{i32 0, %struct.DispatchSystemData poison} +!8 = !{%struct.DispatchSystemData poison} diff --git a/llvmraytracing/test/dx/intrinsics/get-shader-kind.ll b/llvmraytracing/test/dx/intrinsics/get-shader-kind.ll index ad9bd34f88..ed2e841e3d 100644 --- a/llvmraytracing/test/dx/intrinsics/get-shader-kind.ll +++ b/llvmraytracing/test/dx/intrinsics/get-shader-kind.ll @@ -5,13 +5,14 @@ %struct.BuiltInTriangleIntersectionAttributes = type { <2 x float> } %struct.HitData = type { float, i32 } %struct.Payload = type { i32 } - +%struct.TraversalData = type { i32 } declare i32 @_AmdGetShaderKind() -declare %struct.DispatchSystemData @_cont_SetupRayGen() -declare !types !3 i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData*) -declare !types !5 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.DispatchSystemData*) -declare !types !6 i32 @_cont_HitKind(%struct.DispatchSystemData*, %struct.HitData*) +declare !pointeetys !3 i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData*) +declare !pointeetys !5 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.DispatchSystemData*) +declare !pointeetys !6 i32 @_cont_HitKind(%struct.DispatchSystemData*, %struct.HitData*) + +declare !pointeetys !18 i1 @_cont_ReportHit(%struct.TraversalData* %data, float %t, i32 %hitKind) ; Check that GetShaderKind calls in non-shaders, like left-over intrinsics, are ignored. define float @_cont_RayTCurrent() { @@ -26,24 +27,27 @@ define float @_cont_RayTCurrent() { } ; Note: DXILShaderKind::Miss has value 11 -define void @MyMiss(%struct.Payload* %payload) !types !1 !lgc.rt.shaderstage !16 { +define void @MyMiss(%struct.Payload* %payload) !pointeetys !1 !lgc.rt.shaderstage !16 { ; CHECK-LABEL: define %struct.DispatchSystemData @MyMiss -; CHECK-SAME: (i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META12:![0-9]+]] !continuation.registercount [[META5:![0-9]+]] !continuation [[META13:![0-9]+]] { +; CHECK-SAME: (i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]], [8 x i32] [[PADDING:%.*]], [1 x i32] [[PAYLOAD:%.*]]) !lgc.rt.shaderstage [[META12:![0-9]+]] !continuation.registercount [[META5:![0-9]+]] !continuation [[META13:![0-9]+]] { ; CHECK-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 +; CHECK-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [7 x i32], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_PAYLOAD:%.*]], align 8 +; CHECK-NEXT: store [1 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; CHECK-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @_cont_GetLocalRootIndex(ptr [[SYSTEM_DATA_ALLOCA]]) ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_PAYLOAD]], ptr [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; CHECK-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 ; CHECK-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[TMP3]]) ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_PAYLOAD]], ptr [[TMP2]], i32 0, i32 0 ; CHECK-NEXT: store i32 11, ptr [[TMP6]], align 4 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_PAYLOAD]], ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -; CHECK-NEXT: store i32 [[TMP8]], ptr addrspace(20) @PAYLOAD, align 4 +; CHECK-NEXT: store i32 [[TMP8]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; CHECK-NEXT: [[TMP9:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; CHECK-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP9]]), !continuation.registercount [[META5]] +; CHECK-NEXT: [[TMP10:%.*]] = load [1 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; CHECK-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP9]], [8 x i32] poison, [1 x i32] [[TMP10]]), !continuation.registercount [[META5]] ; CHECK-NEXT: unreachable ; %1 = call i32 @_AmdGetShaderKind() @@ -54,12 +58,12 @@ define void @MyMiss(%struct.Payload* %payload) !types !1 !lgc.rt.shaderstage !16 !dx.entryPoints = !{!12, !13} -!1 = !{!"function", !"void", !2} +!1 = !{%struct.Payload poison} !2 = !{i32 0, %struct.Payload poison} -!3 = !{!"function", i32 poison, !4} +!3 = !{%struct.DispatchSystemData poison} !4 = !{i32 0, %struct.DispatchSystemData poison} -!5 = !{!"function", %struct.BuiltInTriangleIntersectionAttributes poison, !4} -!6 = !{!"function", i32 poison, !4, !7} +!5 = !{%struct.DispatchSystemData poison} +!6 = !{null, %struct.DispatchSystemData poison, %struct.HitData poison} !7 = !{i32 0, %struct.HitData poison} !12 = !{null, !"", null, null, null} !13 = !{void (%struct.Payload*)* @MyMiss, !"MyMiss", null, null, !14} @@ -67,3 +71,5 @@ define void @MyMiss(%struct.Payload* %payload) !types !1 !lgc.rt.shaderstage !16 !14 = !{i32 8, i32 11, i32 6, i32 4, i32 5, !15} !15 = !{i32 0} !16 = !{i32 4} +!17 = !{i32 0, %struct.TraversalData poison} +!18 = !{%struct.TraversalData poison} diff --git a/llvmraytracing/test/dx/intrinsics/shader-index.ll b/llvmraytracing/test/dx/intrinsics/shader-index.ll index 6913bd849b..b00d06be1e 100644 --- a/llvmraytracing/test/dx/intrinsics/shader-index.ll +++ b/llvmraytracing/test/dx/intrinsics/shader-index.ll @@ -8,15 +8,13 @@ declare i32 @lgc.rt.shader.index() -declare %struct.DispatchSystemData @_cont_SetupRayGen() +declare !pointeetys !8 i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData*) -declare !types !8 i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData*) - -define i1 @_cont_ReportHit(%struct.DispatchSystemData* %data, float %t, i32 %hitKind) #0 !types !20 { +define i1 @_cont_ReportHit(%struct.DispatchSystemData* %data, float %t, i32 %hitKind) #0 !pointeetys !20 { ret i1 true } -define void @_cont_ExitRayGen(ptr nocapture readonly %data) alwaysinline nounwind !types !{!"function", !"void", !{i32 0, %struct.DispatchSystemData poison}} { +define void @_cont_ExitRayGen(ptr nocapture readonly %data) alwaysinline nounwind !pointeetys !{%struct.DispatchSystemData poison} { ret void } @@ -25,7 +23,7 @@ define void @main() !lgc.rt.shaderstage !24 { ; CHECK-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURNADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META13:![0-9]+]] !lgc.cps [[META10:![0-9]+]] !continuation [[META14:![0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 -; CHECK-NEXT: [[PAYLOAD_ALLOCA:%.*]] = alloca [30 x i32], align 4 +; CHECK-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [0 x i32], align 4 ; CHECK-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; CHECK-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; CHECK-NEXT: store i32 0, ptr @debug_global, align 4 @@ -37,25 +35,25 @@ entry: ret void } -define void @callable(%struct.Payload* %payload) !types !22 !lgc.rt.shaderstage !25 { +define void @callable(%struct.Payload* %payload) !pointeetys !22 !lgc.rt.shaderstage !25 { ; CHECK-LABEL: define void @callable( ; CHECK-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURNADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [8 x i32] [[PADDING:%.*]], [1 x i32] [[PAYLOAD:%.*]]) !lgc.rt.shaderstage [[META15:![0-9]+]] !lgc.cps [[META16:![0-9]+]] !continuation [[META17:![0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 -; CHECK-NEXT: [[PAYLOAD_ALLOCA:%.*]] = alloca [30 x i32], align 4 +; CHECK-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [1 x i32], align 4 ; CHECK-NEXT: [[TMP0:%.*]] = alloca [[STRUCT_PAYLOAD:%.*]], align 8 -; CHECK-NEXT: store [1 x i32] [[PAYLOAD]], ptr [[PAYLOAD_ALLOCA]], align 4 +; CHECK-NEXT: store [1 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; CHECK-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[SYSTEM_DATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_PAYLOAD]], ptr [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[PAYLOAD_ALLOCA]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; CHECK-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 ; CHECK-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) ; CHECK-NEXT: store i32 [[SHADER_INDEX]], ptr @debug_global, align 4 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_PAYLOAD]], ptr [[TMP0]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -; CHECK-NEXT: store i32 [[TMP4]], ptr [[PAYLOAD_ALLOCA]], align 4 +; CHECK-NEXT: store i32 [[TMP4]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = load [1 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = load [1 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; CHECK-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 6, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP5]], [8 x i32] poison, [1 x i32] [[TMP6]]), !continuation.registercount [[META10]] ; CHECK-NEXT: unreachable ; @@ -76,13 +74,13 @@ entry: !4 = !{i32 8, i32 7} !6 = !{i32 0, i64 65536} !7 = !{i32 21} -!8 = !{!"function", i32 poison, !9} +!8 = !{%struct.DispatchSystemData poison} !9 = !{i32 0, %struct.DispatchSystemData poison} !10 = !{i1 ()* @callable, !"callable", null, null, !11} !11 = !{i32 8, i32 12} -!20 = !{!"function", i1 poison, !21, float poison, i32 poison} +!20 = !{%struct.DispatchSystemData poison} !21 = !{i32 0, %struct.DispatchSystemData poison} -!22 = !{!"function", i1 poison, !23} +!22 = !{%struct.Payload poison} !23 = !{i32 0, %struct.Payload poison} !24 = !{i32 0} !25 = !{i32 5} diff --git a/llvmraytracing/test/dx/intrinsics/value-i32.ll b/llvmraytracing/test/dx/intrinsics/value-i32.ll index d6952f0be3..46b359aab0 100644 --- a/llvmraytracing/test/dx/intrinsics/value-i32.ll +++ b/llvmraytracing/test/dx/intrinsics/value-i32.ll @@ -3,24 +3,24 @@ %struct.Payload = type { float, i32, i64, i32 } -declare !types !0 i32 @_AmdValueI32Count(%struct.Payload*) +declare !pointeetys !0 i32 @_AmdValueI32Count(%struct.Payload*) -declare !types !2 i32 @_AmdValueGetI32(%struct.Payload*, i32) +declare !pointeetys !0 i32 @_AmdValueGetI32(%struct.Payload*, i32) -declare !types !3 void @_AmdValueSetI32(%struct.Payload*, i32, i32) +declare !pointeetys !0 void @_AmdValueSetI32(%struct.Payload*, i32, i32) -define i32 @count(%struct.Payload* %pl) !types !0 { +define i32 @count(%struct.Payload* %pl) !pointeetys !0 { ; CHECK-LABEL: define i32 @count -; CHECK-SAME: (ptr [[PL:%.*]]) !types [[META1:![0-9]+]] { +; CHECK-SAME: (ptr [[PL:%.*]]) !pointeetys [[META1:![0-9]+]] { ; CHECK-NEXT: ret i32 5 ; %val = call i32 @_AmdValueI32Count(%struct.Payload* %pl) ret i32 %val } -define i32 @get(%struct.Payload* %pl) !types !0 { +define i32 @get(%struct.Payload* %pl) !pointeetys !0 { ; CHECK-LABEL: define i32 @get -; CHECK-SAME: (ptr [[PL:%.*]]) !types [[META1]] { +; CHECK-SAME: (ptr [[PL:%.*]]) !pointeetys [[META1]] { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[PL]], i32 2 ; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 ; CHECK-NEXT: ret i32 [[TMP2]] @@ -29,9 +29,9 @@ define i32 @get(%struct.Payload* %pl) !types !0 { ret i32 %val } -define void @set(%struct.Payload* %pl, i32 %val) !types !4 { +define void @set(%struct.Payload* %pl, i32 %val) !pointeetys !0 { ; CHECK-LABEL: define void @set -; CHECK-SAME: (ptr [[PL:%.*]], i32 [[VAL:%.*]]) !types [[META3:![0-9]+]] { +; CHECK-SAME: (ptr [[PL:%.*]], i32 [[VAL:%.*]]) !pointeetys [[META3:![0-9]+]] { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[PL]], i32 2 ; CHECK-NEXT: store i32 [[VAL]], ptr [[TMP1]], align 4 ; CHECK-NEXT: ret void @@ -42,9 +42,5 @@ define void @set(%struct.Payload* %pl, i32 %val) !types !4 { !continuation.stackAddrspace = !{!5} -!0 = !{!"function", i32 poison, !1} -!1 = !{i32 0, %struct.Payload poison} -!2 = !{!"function", i32 poison, !1, i32 poison} -!3 = !{!"function", !"void", !1, i32 poison, i32 poison} -!4 = !{!"function", !"void", !1, i32 poison} +!0 = !{%struct.Payload poison} !5 = !{i32 21} diff --git a/llvmraytracing/test/dx/lint/multiple-setlocalrootindex.ll b/llvmraytracing/test/dx/lint/multiple-setlocalrootindex.ll new file mode 100644 index 0000000000..0cb6227093 --- /dev/null +++ b/llvmraytracing/test/dx/lint/multiple-setlocalrootindex.ll @@ -0,0 +1,23 @@ +; RUN: not opt --verify-each -passes='continuations-lint,continuations-lint,remove-types-metadata' -S %s --lint-abort-on-error 2>&1 | FileCheck %s + +; CHECK: Found a function with more than one call to setLocalRootIndex +; CHECK-NEXT: ptr @RayGen + +target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:16-i32:32-i64:32-f16:16-f32:32-f64:32-v8:8-v16:16-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" + +%struct.DispatchSystemData = type { i32 } + +declare void @amd.dx.setLocalRootIndex(i32) + +define void @RayGen(i64 %dummyRetAddr, %struct.DispatchSystemData %0) !lgc.rt.shaderstage !0 !continuation.entry !1 !continuation !2 { + call void @amd.dx.setLocalRootIndex(i32 0) + call void @amd.dx.setLocalRootIndex(i32 5) + ret void +} + +!continuation.stackAddrspace = !{!3} + +!0 = !{i32 0} +!1 = !{} +!2 = !{void ()* @RayGen} +!3 = !{i32 21} diff --git a/llvmraytracing/test/dx/lint/undef-jump-target.ll b/llvmraytracing/test/dx/lint/undef-jump-target.ll new file mode 100644 index 0000000000..a3f9b2b829 --- /dev/null +++ b/llvmraytracing/test/dx/lint/undef-jump-target.ll @@ -0,0 +1,21 @@ +; RUN: not opt --verify-each -passes='continuations-lint,continuations-lint,remove-types-metadata' -S %s --lint-abort-on-error 2>&1 | FileCheck %s + +; CHECK: Jump has undefined jump target + +target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:16-i32:32-i64:32-f16:16-f32:32-f64:32-v8:8-v16:16-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" + +%struct.DispatchSystemData = type { i32 } + +declare void @lgc.ilcps.continue(...) + +define void @RayGen(i64 %dummyRetAddr, %struct.DispatchSystemData %0) !lgc.rt.shaderstage !0 !continuation.entry !1 !continuation !2 { + call void (...) @lgc.ilcps.continue(i64 undef, i32 undef, i64 undef), !continuation.registercount !0 + unreachable +} + +!continuation.stackAddrspace = !{!3} + +!0 = !{i32 0} +!1 = !{} +!2 = !{void ()* @RayGen} +!3 = !{i32 21} diff --git a/llvmraytracing/test/dx/lower-await.ll b/llvmraytracing/test/dx/lower-await.ll index c3ba72d4ad..7df77fa724 100644 --- a/llvmraytracing/test/dx/lower-await.ll +++ b/llvmraytracing/test/dx/lower-await.ll @@ -40,7 +40,7 @@ define void @simple_await(i64 %dummyRetAddr) !continuation.registercount !1 { ; CLEANED-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[SIMPLE_AWAIT_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 ; CLEANED-NEXT: store i64 [[DUMMYRETADDR]], ptr addrspace(32) [[RETURNADDR_SPILL_ADDR]], align 4 ; CLEANED-NEXT: [[TMP0:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @simple_await.resume.0) -; CLEANED-NEXT: call void (i64, ...) @continuation.continue(i64 ptrtoint (ptr @async_fun to i64), i64 [[TMP0]]), !continuation.registercount [[META1]], !continuation.returnedRegistercount [[META1]] +; CLEANED-NEXT: call void (...) @lgc.cps.jump(i64 ptrtoint (ptr @async_fun to i64), i32 -1, {} poison, i64 [[TMP0]]), !continuation.registercount [[META1]], !continuation.returnedRegistercount [[META1]] ; CLEANED-NEXT: unreachable ; %tok = call %continuation.token* @async_fun(), !continuation.registercount !1, !continuation.returnedRegistercount !1 @@ -70,7 +70,7 @@ define void @simple_await_entry() !continuation.entry !0 !continuation.registerc ; CLEANED-SAME: ) !continuation.registercount [[META1]] !continuation.entry [[META4:![0-9]+]] !continuation [[META5:![0-9]+]] !continuation.state [[META1]] { ; CLEANED-NEXT: AllocaSpillBB: ; CLEANED-NEXT: [[TMP0:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @simple_await_entry.resume.0) -; CLEANED-NEXT: call void (i64, ...) @continuation.continue(i64 ptrtoint (ptr @async_fun to i64), i64 [[TMP0]]), !continuation.registercount [[META1]], !continuation.returnedRegistercount [[META1]] +; CLEANED-NEXT: call void (...) @lgc.cps.jump(i64 ptrtoint (ptr @async_fun to i64), i32 -1, {} poison, i64 [[TMP0]]), !continuation.registercount [[META1]], !continuation.returnedRegistercount [[META1]] ; CLEANED-NEXT: unreachable ; %tok = call %continuation.token* @async_fun(), !continuation.registercount !1, !continuation.returnedRegistercount !1 @@ -106,7 +106,7 @@ define void @await_with_arg(i64 %dummyRetAddr, i32 %i) !continuation.registercou ; CLEANED-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[AWAIT_WITH_ARG_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 ; CLEANED-NEXT: store i64 [[DUMMYRETADDR]], ptr addrspace(32) [[RETURNADDR_SPILL_ADDR]], align 4 ; CLEANED-NEXT: [[TMP0:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @await_with_arg.resume.0) -; CLEANED-NEXT: call void (i64, ...) @continuation.continue(i64 ptrtoint (ptr @async_fun_with_arg to i64), i64 [[TMP0]], i32 [[I]]), !continuation.registercount [[META1]], !continuation.returnedRegistercount [[META1]] +; CLEANED-NEXT: call void (...) @lgc.cps.jump(i64 ptrtoint (ptr @async_fun_with_arg to i64), i32 -1, {} poison, i64 [[TMP0]], i32 [[I]]), !continuation.registercount [[META1]], !continuation.returnedRegistercount [[META1]] ; CLEANED-NEXT: unreachable ; %tok = call %continuation.token* @async_fun_with_arg(i32 %i), !continuation.registercount !1, !continuation.returnedRegistercount !1 @@ -142,7 +142,7 @@ define i32 @await_with_ret_value(i64 %dummyRetAddr) !continuation.registercount ; CLEANED-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[AWAIT_WITH_RET_VALUE_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 ; CLEANED-NEXT: store i64 [[DUMMYRETADDR]], ptr addrspace(32) [[RETURNADDR_SPILL_ADDR]], align 4 ; CLEANED-NEXT: [[TMP0:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @await_with_ret_value.resume.0) -; CLEANED-NEXT: call void (i64, ...) @continuation.continue(i64 ptrtoint (ptr @async_fun to i64), i64 [[TMP0]]), !continuation.registercount [[META1]], !continuation.returnedRegistercount [[META1]] +; CLEANED-NEXT: call void (...) @lgc.cps.jump(i64 ptrtoint (ptr @async_fun to i64), i32 -1, {} poison, i64 [[TMP0]]), !continuation.registercount [[META1]], !continuation.returnedRegistercount [[META1]] ; CLEANED-NEXT: unreachable ; %tok = call %continuation.token* @async_fun(), !continuation.registercount !1, !continuation.returnedRegistercount !1 @@ -177,7 +177,7 @@ define void @wait_await(i64 %dummyRetAddr) !continuation.registercount !1 { ; CLEANED-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[WAIT_AWAIT_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 ; CLEANED-NEXT: store i64 [[DUMMYRETADDR]], ptr addrspace(32) [[RETURNADDR_SPILL_ADDR]], align 4 ; CLEANED-NEXT: [[TMP0:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @wait_await.resume.0) -; CLEANED-NEXT: call void (i64, i64, ...) @continuation.waitContinue(i64 ptrtoint (ptr @async_fun_with_waitmask to i64), i64 -1, i64 [[TMP0]]), !continuation.registercount [[META1]], !continuation.returnedRegistercount [[META1]] +; CLEANED-NEXT: call void (...) @lgc.cps.jump(i64 ptrtoint (ptr @async_fun_with_waitmask to i64), i32 -1, {} poison, i64 [[TMP0]]), !continuation.registercount [[META1]], !continuation.returnedRegistercount [[META1]], !waitmask [[META9:![0-9]+]] ; CLEANED-NEXT: unreachable ; %tok = call %continuation.token* @async_fun_with_waitmask(i64 -1), !continuation.wait.await !0, !continuation.registercount !1, !continuation.returnedRegistercount !1 diff --git a/llvmraytracing/test/dx/lower-rt-pipeline-call-shader.ll b/llvmraytracing/test/dx/lower-rt-pipeline-call-shader.ll index e122b59b9d..a66664ddbf 100644 --- a/llvmraytracing/test/dx/lower-rt-pipeline-call-shader.ll +++ b/llvmraytracing/test/dx/lower-rt-pipeline-call-shader.ll @@ -1,8 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 3 ; RUN: grep -v lgc.cps.module %s | opt --verify-each -passes="dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,remove-types-metadata" -S --lint-abort-on-error | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE %s ; RUN: opt --verify-each -passes="dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,remove-types-metadata" -S %s --lint-abort-on-error | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE-CPS %s -; RUN: opt --verify-each -passes="dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,dxil-cleanup-continuations,lint,register-buffer,lint,remove-types-metadata" -S %s --lint-abort-on-error | FileCheck -check-prefix=REGISTERBUFFER-CPS %s -; RUN: opt --verify-each -passes="dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,dxil-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata" -S %s --lint-abort-on-error | FileCheck -check-prefix=POSTPROCESS-CPS %s +; RUN: opt --verify-each -passes="dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,dxil-cleanup-continuations,lint,dxil-cont-post-process,lint,remove-types-metadata" -S %s --lint-abort-on-error | FileCheck -check-prefix=POSTPROCESS-CPS %s target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:16-i32:32-i64:32-f16:16-f32:32-f64:32-v8:8-v16:16-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" @@ -20,26 +19,24 @@ target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16: declare i32 @_cont_GetContinuationStackAddr() -define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) !types !13 { +define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) !pointeetys !13 { ret i32 5 } ; Need _cont_ReportHit to get system data type -declare !types !22 i1 @_cont_ReportHit(%struct.AnyHitTraversalData* %data, float %t, i32 %hitKind) - -declare %struct.DispatchSystemData @_cont_SetupRayGen() +declare !pointeetys !22 i1 @_cont_ReportHit(%struct.AnyHitTraversalData* %data, float %t, i32 %hitKind) declare %struct.DispatchSystemData @_AmdAwaitShader(i64, %struct.DispatchSystemData) -declare !types !15 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData*) +declare !pointeetys !15 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData*) -declare !types !17 void @_AmdRestoreSystemData(%struct.DispatchSystemData*) +declare !pointeetys !17 void @_AmdRestoreSystemData(%struct.DispatchSystemData*) -define void @_cont_ExitRayGen(ptr nocapture readonly %data) alwaysinline nounwind !types !{!"function", !"void", !{i32 0, %struct.DispatchSystemData poison}} { +define void @_cont_ExitRayGen(ptr nocapture readonly %data) alwaysinline nounwind !pointeetys !{%struct.DispatchSystemData poison} { ret void } -define void @_cont_CallShader(%struct.DispatchSystemData* %data, i32 %0) !types !18 { +define void @_cont_CallShader(%struct.DispatchSystemData* %data, i32 %0) !pointeetys !18 { %dis_data = load %struct.DispatchSystemData, %struct.DispatchSystemData* %data, align 4 %newdata = call %struct.DispatchSystemData @_AmdAwaitShader(i64 2, %struct.DispatchSystemData %dis_data) store %struct.DispatchSystemData %newdata, %struct.DispatchSystemData* %data, align 4 @@ -54,7 +51,7 @@ define void @main() { } ; Function Attrs: nounwind -declare !types !19 void @dx.op.callShader.struct.TheirParams(i32, i32, %struct.TheirParams*) #0 +declare !pointeetys !19 void @dx.op.callShader.struct.TheirParams(i32, i32, %struct.TheirParams*) #0 attributes #0 = { nounwind } @@ -78,16 +75,16 @@ attributes #0 = { nounwind } !10 = !{i32 0, %"class.RWTexture2D >"* bitcast (%dx.types.Handle* @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A" to %"class.RWTexture2D >"*), !"RenderTarget", i32 0, i32 0, i32 1, i32 2, i1 false, i1 false, i1 false, !11} !11 = !{i32 0, i32 9} !12 = !{i32 0, i64 65536} -!13 = !{!"function", i32 poison, !14} +!13 = !{%struct.DispatchSystemData poison} !14 = !{i32 0, %struct.DispatchSystemData poison} -!15 = !{!"function", %struct.BuiltInTriangleIntersectionAttributes poison, !16} +!15 = !{%struct.SystemData poison} !16 = !{i32 0, %struct.SystemData poison} -!17 = !{!"function", !"void", !14} -!18 = !{!"function", !"void", !14, i32 poison} -!19 = !{!"function", !"void", i32 poison, i32 poison, !20} +!17 = !{%struct.DispatchSystemData poison} +!18 = !{%struct.DispatchSystemData poison} +!19 = !{%struct.TheirParams poison} !20 = !{i32 0, %struct.TheirParams poison} !21 = !{i32 0, %struct.AnyHitTraversalData poison} -!22 = !{!"function", i1 poison, !21, float poison, i32 poison} +!22 = !{%struct.AnyHitTraversalData poison} ; LOWERRAYTRACINGPIPELINE-LABEL: define i32 @_cont_GetLocalRootIndex( ; LOWERRAYTRACINGPIPELINE-SAME: ptr [[DATA:%.*]]) { ; LOWERRAYTRACINGPIPELINE-NEXT: ret i32 5 @@ -97,18 +94,23 @@ attributes #0 = { nounwind } ; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META8:![0-9]+]] !continuation.entry [[META16:![0-9]+]] !continuation.registercount [[META8]] !continuation [[META17:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[PARAMS:%.*]] = alloca [[STRUCT_THEIRPARAMS:%.*]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [1 x i32], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; LOWERRAYTRACINGPIPELINE-NEXT: [[DIS_DATA_I:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_THEIRPARAMS]], ptr [[PARAMS]], i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP3]], ptr addrspace(20) @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = call ptr inttoptr (i64 2 to ptr)([[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I]]), !continuation.registercount [[META14:![0-9]+]], !continuation.returnedRegistercount [[META14]] -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] [[AWAIT:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP4]]) +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP3]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = load [1 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = call ptr inttoptr (i64 2 to ptr)([[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I]], [20 x i32] poison, [1 x i32] [[TMP4]]), !continuation.registercount [[META14:![0-9]+]], !continuation.returnedRegistercount [[META14]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = call { [[STRUCT_DISPATCHSYSTEMDATA]], [19 x i32], [1 x i32] } @await(ptr [[TMP8]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [19 x i32], [1 x i32] } [[TMP9]], 2 +; LOWERRAYTRACINGPIPELINE-NEXT: store [1 x i32] [[TMP10]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_THEIRPARAMS]] poison, ptr [[PARAMS]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_THEIRPARAMS]], ptr [[PARAMS]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [19 x i32], [1 x i32] } [[TMP9]], 0 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP5]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; LOWERRAYTRACINGPIPELINE-NEXT: ret void @@ -123,55 +125,27 @@ attributes #0 = { nounwind } ; LOWERRAYTRACINGPIPELINE-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURNADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META8:![0-9]+]] !lgc.cps [[META14:![0-9]+]] !continuation [[META16:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PARAMS:%.*]] = alloca [[STRUCT_THEIRPARAMS:%.*]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PAYLOAD_ALLOCA:%.*]] = alloca [30 x i32], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [1 x i32], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DIS_DATA_I:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_THEIRPARAMS]], ptr [[PARAMS]], i32 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP3]], ptr [[PAYLOAD_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP4:%.*]] = load [1 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP3]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP4:%.*]] = load [1 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP5:%.*]] = call { [[STRUCT_DISPATCHSYSTEMDATA]], [19 x i32], [1 x i32] } (...) @lgc.cps.await__sl_s_struct.DispatchSystemDatasa19i32a1i32s(i32 2, i32 4, i32 5, [20 x i32] poison, [1 x i32] [[TMP4]]), !continuation.returnedRegistercount [[META14]], !continuation.registercount [[META14]] ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP6:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [19 x i32], [1 x i32] } [[TMP5]], 2 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [1 x i32] [[TMP6]], ptr [[PAYLOAD_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP7:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [19 x i32], [1 x i32] } [[TMP5]], 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [1 x i32] [[TMP6]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_THEIRPARAMS]] poison, ptr [[PARAMS]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_THEIRPARAMS]], ptr [[PARAMS]], i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP9:%.*]] = load i32, ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP9:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP7:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [19 x i32], [1 x i32] } [[TMP5]], 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP7]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: ret void ; ; -; REGISTERBUFFER-CPS-LABEL: define i32 @_cont_GetLocalRootIndex( -; REGISTERBUFFER-CPS-SAME: ptr [[DATA:%.*]]) { -; REGISTERBUFFER-CPS-NEXT: ret i32 5 -; -; -; REGISTERBUFFER-CPS-LABEL: define void @main( -; REGISTERBUFFER-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURNADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META8:![0-9]+]] !lgc.cps [[META14:![0-9]+]] !continuation [[META16:![0-9]+]] { -; REGISTERBUFFER-CPS-NEXT: AllocaSpillBB: -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT3:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 -; REGISTERBUFFER-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) -; REGISTERBUFFER-CPS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT3]], 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [1 x i32] poison, i32 undef, 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP1:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @main.resume.0) -; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 2, i32 4, {} poison, i64 [[TMP1]], i32 5, [20 x i32] poison, [1 x i32] [[DOTFCA_0_INSERT]]), !continuation.returnedRegistercount [[META14]], !continuation.registercount [[META14]] -; REGISTERBUFFER-CPS-NEXT: unreachable -; -; -; REGISTERBUFFER-CPS-LABEL: define dso_local void @main.resume.0( -; REGISTERBUFFER-CPS-SAME: {} [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], { [[STRUCT_DISPATCHSYSTEMDATA:%.*]], [19 x i32], [1 x i32] } [[TMP3:%.*]]) !lgc.rt.shaderstage [[META8]] !lgc.cps [[META14]] !continuation [[META16]] { -; REGISTERBUFFER-CPS-NEXT: entryresume.0: -; REGISTERBUFFER-CPS-NEXT: [[TMP4:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [19 x i32], [1 x i32] } [[TMP3]], 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [1 x i32] [[TMP4]], 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP5:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [19 x i32], [1 x i32] } [[TMP3]], 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT4:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP5]], 0 -; REGISTERBUFFER-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) -; REGISTERBUFFER-CPS-NEXT: ret void -; -; ; POSTPROCESS-CPS-LABEL: define i32 @_cont_GetLocalRootIndex( ; POSTPROCESS-CPS-SAME: ptr [[DATA:%.*]]) { ; POSTPROCESS-CPS-NEXT: ret i32 5 @@ -188,7 +162,7 @@ attributes #0 = { nounwind } ; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [1 x i32] poison, i32 undef, 0 ; POSTPROCESS-CPS-NEXT: [[TMP4:%.*]] = call i64 @continuation.getAddrAndMD(ptr @main.resume.0) ; POSTPROCESS-CPS-NEXT: [[TMP3:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 2, i32 [[TMP3]], i64 [[TMP4]], i32 5, [20 x i32] poison, [1 x i32] [[DOTFCA_0_INSERT]]), !continuation.returnedRegistercount [[META14]], !continuation.registercount [[META14]] +; POSTPROCESS-CPS-NEXT: call void (...) @lgc.ilcps.continue(i64 2, i32 [[TMP3]], i64 [[TMP4]], i32 5, [20 x i32] poison, [1 x i32] [[DOTFCA_0_INSERT]]) ; POSTPROCESS-CPS-NEXT: unreachable ; ; @@ -197,10 +171,10 @@ attributes #0 = { nounwind } ; POSTPROCESS-CPS-NEXT: entryresume.0: ; POSTPROCESS-CPS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; POSTPROCESS-CPS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: [[TMP4:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [19 x i32], [1 x i32] } [[TMP3]], 2 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [1 x i32] [[TMP4]], 0 -; POSTPROCESS-CPS-NEXT: [[TMP5:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [19 x i32], [1 x i32] } [[TMP3]], 0 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT4:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP5]], 0 +; POSTPROCESS-CPS-NEXT: [[TMP5:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [19 x i32], [1 x i32] } [[TMP3]], 2 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [1 x i32] [[TMP5]], 0 +; POSTPROCESS-CPS-NEXT: [[TMP6:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [19 x i32], [1 x i32] } [[TMP3]], 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT4:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP6]], 0 ; POSTPROCESS-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; POSTPROCESS-CPS-NEXT: ret void ; diff --git a/llvmraytracing/test/dx/lower-rt-pipeline-exit-raygen.ll b/llvmraytracing/test/dx/lower-rt-pipeline-exit-raygen.ll index 9e86e4437f..b330debb16 100644 --- a/llvmraytracing/test/dx/lower-rt-pipeline-exit-raygen.ll +++ b/llvmraytracing/test/dx/lower-rt-pipeline-exit-raygen.ll @@ -15,18 +15,20 @@ target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16: @"\01?Scene@@3URaytracingAccelerationStructure@@A" = external constant %dx.types.Handle, align 4 @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A" = external constant %dx.types.Handle, align 4 -declare void @continuation.waitContinue(i64, i64, ...) noreturn +declare void @lgc.ilcps.waitContinue(...) noreturn -declare !types !24 i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) +declare !pointeetys !24 i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) -define void @_cont_ExitRayGen(ptr nocapture readonly %data) alwaysinline nounwind !types !{!"function", !"void", !{i32 0, %struct.DispatchSystemData poison}} { +declare !pointeetys !27 i1 @_cont_ReportHit(%struct.TraversalData* %data, float %t, i32 %hitKind) + +define void @_cont_ExitRayGen(ptr nocapture readonly %data) alwaysinline nounwind !pointeetys !{%struct.DispatchSystemData poison} { %dispatchPayloadPtr = getelementptr inbounds %struct.DispatchSystemData, ptr %data, i32 0, i32 0 %dispatchPayload = load <3 x i32>, ptr %dispatchPayloadPtr, align 4 %deadLaneDispatchPayload = insertelement <3 x i32> %dispatchPayload, i32 -11, i32 0 %systemData = insertvalue %struct.SystemData poison, <3 x i32> %deadLaneDispatchPayload, 0, 0 %addrSuffix = load i32, ptr %data, align 4 %addr = zext i32 %addrSuffix to i64 - call void @continuation.waitContinue(i64 %addr, i64 -1, %struct.SystemData %systemData) + call void @lgc.ilcps.waitContinue(i64 %addr, i64 -1, %struct.SystemData %systemData) unreachable } @@ -69,18 +71,21 @@ attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="fa !21 = !{void ()* @MyRayGen, !"MyRayGen", null, null, !22} !22 = !{i32 8, i32 7, i32 5, !23} !23 = !{i32 0} -!24 = !{!"function", i32 poison, !25} +!24 = !{%struct.DispatchSystemData poison} !25 = !{i32 0, %struct.DispatchSystemData poison} +!26 = !{i32 0, %struct.TraversalData poison} +!27 = !{%struct.TraversalData poison} ; LOWERRAYTRACINGPIPELINE-LABEL: define void @MyRayGen( ; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] !lgc.rt.shaderstage [[META16:![0-9]+]] !continuation.entry [[META13:![0-9]+]] !continuation.registercount [[META16]] !continuation [[META19:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [0 x i32], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[DISPATCHPAYLOAD_I:%.*]] = load <3 x i32>, ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[DEADLANEDISPATCHPAYLOAD_I:%.*]] = insertelement <3 x i32> [[DISPATCHPAYLOAD_I]], i32 -11, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEMDATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] poison, <3 x i32> [[DEADLANEDISPATCHPAYLOAD_I]], 0, 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[ADDRSUFFIX_I:%.*]] = load i32, ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[ADDR_I:%.*]] = zext i32 [[ADDRSUFFIX_I]] to i64 -; LOWERRAYTRACINGPIPELINE-NEXT: call void @continuation.waitContinue(i64 [[ADDR_I]], i64 -1, [[STRUCT_SYSTEMDATA]] [[SYSTEMDATA_I]]) #[[ATTR3:[0-9]+]] +; LOWERRAYTRACINGPIPELINE-NEXT: call void @lgc.ilcps.waitContinue(i64 [[ADDR_I]], i64 -1, [[STRUCT_SYSTEMDATA]] [[SYSTEMDATA_I]]) #[[ATTR3:[0-9]+]] ; LOWERRAYTRACINGPIPELINE-NEXT: unreachable ; LOWERRAYTRACINGPIPELINE: _cont_ExitRayGen.exit: ; LOWERRAYTRACINGPIPELINE-NEXT: ret void diff --git a/llvmraytracing/test/dx/lower-rt-pipeline-intrinsics-hit.ll b/llvmraytracing/test/dx/lower-rt-pipeline-intrinsics-hit.ll index d3b03afd08..7600b8aaa3 100644 --- a/llvmraytracing/test/dx/lower-rt-pipeline-intrinsics-hit.ll +++ b/llvmraytracing/test/dx/lower-rt-pipeline-intrinsics-hit.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 -; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,remove-types-metadata' -S %s --lint-abort-on-error | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE %s -; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' -S %s --lint-abort-on-error | FileCheck -check-prefix=DXILCONTPOSTPROCESS %s +; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,continuations-lint,remove-types-metadata' -S %s --lint-abort-on-error | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE %s +; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,dxil-cont-post-process,lint,continuations-lint,remove-types-metadata' -S %s --lint-abort-on-error | FileCheck -check-prefix=DXILCONTPOSTPROCESS %s target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:16-i32:32-i64:32-f16:16-f32:32-f64:32-v8:8-v16:16-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" @@ -22,28 +22,26 @@ declare i64 @_cont_GetTraversalAddr() #0 declare i32 @_cont_GetContinuationStackAddr() #0 -declare !types !25 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData*) #0 +declare !pointeetys !25 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData*) #0 -declare !types !27 void @_cont_SetTriangleHitAttributes(%struct.SystemData*, %struct.BuiltInTriangleIntersectionAttributes) #0 +declare !pointeetys !27 void @_cont_SetTriangleHitAttributes(%struct.SystemData*, %struct.BuiltInTriangleIntersectionAttributes) #0 declare %struct.DispatchSystemData @_cont_Traversal(%struct.TraversalData) #0 -declare %struct.DispatchSystemData @_cont_SetupRayGen() #0 - declare %struct.AnyHitTraversalData @_AmdAwaitAnyHit(i64, %struct.AnyHitTraversalData, float, i32) #0 -declare !types !28 %struct.HitData @_cont_GetCommittedState(%struct.SystemData*) #0 +declare !pointeetys !28 %struct.HitData @_cont_GetCommittedState(%struct.SystemData*) #0 ; Function Attrs: nounwind memory(read) -declare !types !29 void @_cont_AcceptHit(%struct.AnyHitTraversalData* nocapture readnone) #1 +declare !pointeetys !29 void @_cont_AcceptHit(%struct.AnyHitTraversalData* nocapture readnone) #1 declare i1 @opaqueIsEnd() #0 -define void @_cont_ExitRayGen(ptr nocapture readonly %data) alwaysinline nounwind !types !{!"function", !"void", !{i32 0, %struct.DispatchSystemData poison}} { +define void @_cont_ExitRayGen(ptr nocapture readonly %data) alwaysinline nounwind !pointeetys !{%struct.DispatchSystemData poison} { ret void } -define i1 @_cont_IsEndSearch(%struct.TraversalData* %data) #0 !types !31 { +define i1 @_cont_IsEndSearch(%struct.TraversalData* %data) #0 !pointeetys !31 { ; LOWERRAYTRACINGPIPELINE-LABEL: define i1 @_cont_IsEndSearch( ; LOWERRAYTRACINGPIPELINE-SAME: ptr [[DATA:%.*]]) #[[ATTR0:[0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[ISEND:%.*]] = call i1 @opaqueIsEnd() @@ -58,7 +56,7 @@ define i1 @_cont_IsEndSearch(%struct.TraversalData* %data) #0 !types !31 { ret i1 %isEnd } -define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) #0 !types !33 { +define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) #0 !pointeetys !33 { ; LOWERRAYTRACINGPIPELINE-LABEL: define i32 @_cont_GetLocalRootIndex( ; LOWERRAYTRACINGPIPELINE-SAME: ptr [[DATA:%.*]]) #[[ATTR0]] { ; LOWERRAYTRACINGPIPELINE-NEXT: ret i32 5 @@ -70,7 +68,7 @@ define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) #0 !types ret i32 5 } -define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, float %6, float %7, float %8, float %9, float %10, float %11, float %12, float %13) #0 !types !35 { +define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, float %6, float %7, float %8, float %9, float %10, float %11, float %12, float %13) #0 !pointeetys !35 { %dis_data = load %struct.DispatchSystemData, %struct.DispatchSystemData* %data, align 4 %sys_data = insertvalue %struct.SystemData undef, %struct.DispatchSystemData %dis_data, 0 %trav_data = insertvalue %struct.TraversalData undef, %struct.SystemData %sys_data, 0 @@ -79,14 +77,14 @@ define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i ret void } -define i1 @_cont_ReportHit(%struct.AnyHitTraversalData* %data, float %t, i32 %hitKind) #0 !types !36 { +define i1 @_cont_ReportHit(%struct.AnyHitTraversalData* %data, float %t, i32 %hitKind) #0 !pointeetys !36 { %trav_data = load %struct.AnyHitTraversalData, %struct.AnyHitTraversalData* %data, align 4 %newdata = call %struct.AnyHitTraversalData @_AmdAwaitAnyHit(i64 3, %struct.AnyHitTraversalData %trav_data, float %t, i32 %hitKind) store %struct.AnyHitTraversalData %newdata, %struct.AnyHitTraversalData* %data, align 4 ret i1 true } -define %struct.HitData @_cont_GetCandidateState(%struct.TraversalData* %data) #0 !types !37 { +define %struct.HitData @_cont_GetCandidateState(%struct.TraversalData* %data) #0 !pointeetys !37 { ; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.HitData @_cont_GetCandidateState( ; LOWERRAYTRACINGPIPELINE-SAME: ptr [[DATA:%.*]]) #[[ATTR0]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[RESPTR:%.*]] = getelementptr [[STRUCT_TRAVERSALDATA:%.*]], ptr [[DATA]], i32 0, i32 1 @@ -104,7 +102,7 @@ define %struct.HitData @_cont_GetCandidateState(%struct.TraversalData* %data) #0 ret %struct.HitData %res } -define float @_cont_RayTCurrent(%struct.DispatchSystemData* nocapture readnone %data, %struct.HitData* %hitData) !types !38 { +define float @_cont_RayTCurrent(%struct.DispatchSystemData* nocapture readnone %data, %struct.HitData* %hitData) !pointeetys !38 { ; LOWERRAYTRACINGPIPELINE-LABEL: define float @_cont_RayTCurrent( ; LOWERRAYTRACINGPIPELINE-SAME: ptr nocapture readnone [[DATA:%.*]], ptr [[HITDATA:%.*]]) #[[ATTR3:[0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[RESPTR:%.*]] = getelementptr [[STRUCT_HITDATA:%.*]], ptr [[HITDATA]], i32 0, i32 0 @@ -123,7 +121,7 @@ define float @_cont_RayTCurrent(%struct.DispatchSystemData* nocapture readnone % } ; Function Attrs: nounwind memory(none) -define i32 @_cont_HitKind(%struct.SystemData* nocapture readnone %data, %struct.HitData* %0) #2 !types !40 { +define i32 @_cont_HitKind(%struct.SystemData* nocapture readnone %data, %struct.HitData* %0) #2 !pointeetys !40 { ; LOWERRAYTRACINGPIPELINE-LABEL: define i32 @_cont_HitKind( ; LOWERRAYTRACINGPIPELINE-SAME: ptr nocapture readnone [[DATA:%.*]], ptr [[TMP0:%.*]]) #[[ATTR4:[0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[RESPTR:%.*]] = getelementptr [[STRUCT_SYSTEMDATA:%.*]], ptr [[DATA]], i32 0, i32 1 @@ -142,55 +140,56 @@ define i32 @_cont_HitKind(%struct.SystemData* nocapture readnone %data, %struct. } ; Function Attrs: nounwind memory(none) -declare !types !41 <3 x i32> @_cont_DispatchRaysIndex3(%struct.DispatchSystemData* nocapture readnone) #2 +declare !pointeetys !41 <3 x i32> @_cont_DispatchRaysIndex3(%struct.DispatchSystemData* nocapture readnone) #2 ; Function Attrs: nounwind memory(none) -declare !types !41 <3 x i32> @_cont_DispatchRaysDimensions3(%struct.DispatchSystemData* nocapture readnone) #2 +declare !pointeetys !41 <3 x i32> @_cont_DispatchRaysDimensions3(%struct.DispatchSystemData* nocapture readnone) #2 ; Function Attrs: nounwind memory(none) -declare !types !42 <3 x float> @_cont_WorldRayOrigin3(%struct.DispatchSystemData* nocapture readnone) #2 +declare !pointeetys !42 <3 x float> @_cont_WorldRayOrigin3(%struct.DispatchSystemData* nocapture readnone) #2 ; Function Attrs: nounwind memory(none) -declare !types !42 <3 x float> @_cont_WorldRayDirection3(%struct.DispatchSystemData* nocapture readnone) #2 +declare !pointeetys !42 <3 x float> @_cont_WorldRayDirection3(%struct.DispatchSystemData* nocapture readnone) #2 ; Function Attrs: nounwind memory(none) -declare !types !43 float @_cont_RayTMin(%struct.DispatchSystemData* nocapture readnone) #2 +declare !pointeetys !43 float @_cont_RayTMin(%struct.DispatchSystemData* nocapture readnone) #2 ; Function Attrs: nounwind memory(none) -declare !types !33 i32 @_cont_RayFlags(%struct.DispatchSystemData* nocapture readnone) #2 +declare !pointeetys !33 i32 @_cont_RayFlags(%struct.DispatchSystemData* nocapture readnone) #2 ; Function Attrs: nounwind memory(none) -declare !types !44 i32 @_cont_InstanceIndex(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #2 +declare !pointeetys !44 i32 @_cont_InstanceIndex(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #2 ; Function Attrs: nounwind memory(none) -declare !types !44 i32 @_cont_InstanceID(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #2 +declare !pointeetys !44 i32 @_cont_InstanceID(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #2 ; Function Attrs: nounwind memory(none) -declare !types !44 i32 @_cont_PrimitiveIndex(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #2 +declare !pointeetys !44 i32 @_cont_PrimitiveIndex(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #2 ; Function Attrs: nounwind memory(none) -declare !types !45 <3 x float> @_cont_ObjectRayOrigin3(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #2 +declare !pointeetys !45 <3 x float> @_cont_ObjectRayOrigin3(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #2 ; Function Attrs: nounwind memory(none) -declare !types !45 <3 x float> @_cont_ObjectRayDirection3(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #2 +declare !pointeetys !45 <3 x float> @_cont_ObjectRayDirection3(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #2 ; Function Attrs: nounwind memory(none) -declare !types !46 [4 x <3 x float>] @_cont_ObjectToWorld4x3(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #2 +declare !pointeetys !46 [4 x <3 x float>] @_cont_ObjectToWorld4x3(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #2 ; Function Attrs: nounwind memory(none) -declare !types !46 [4 x <3 x float>] @_cont_WorldToObject4x3(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #2 +declare !pointeetys !46 [4 x <3 x float>] @_cont_WorldToObject4x3(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #2 ; Function Attrs: nounwind define void @RayGen() #3 { ; LOWERRAYTRACINGPIPELINE-LABEL: define void @RayGen( ; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR5:[0-9]+]] !lgc.rt.shaderstage [[META18:![0-9]+]] !continuation [[META29:![0-9]+]] !continuation.entry [[META13:![0-9]+]] !continuation.registercount [[META18]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [0 x i32], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; LOWERRAYTRACINGPIPELINE-NEXT: ret void ; ; DXILCONTPOSTPROCESS-LABEL: define void @RayGen( -; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR5:[0-9]+]] !lgc.rt.shaderstage [[META18:![0-9]+]] !continuation [[META28:![0-9]+]] !continuation.entry [[META13:![0-9]+]] !continuation.registercount [[META18]] !continuation.state [[META18]] { +; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR5:[0-9]+]] !lgc.rt.shaderstage [[META18:![0-9]+]] !continuation [[META28:![0-9]+]] !continuation.entry [[META13:![0-9]+]] { ; DXILCONTPOSTPROCESS-NEXT: AllocaSpillBB: ; DXILCONTPOSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; DXILCONTPOSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 @@ -206,10 +205,12 @@ define void @RayGen() #3 { ; Function Attrs: nounwind define void @Intersection() #3 { ; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.AnyHitTraversalData @Intersection( -; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR5]] !lgc.rt.shaderstage [[META30:![0-9]+]] !continuation [[META31:![0-9]+]] !continuation.registercount [[META25:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]], [8 x i32] [[PADDING:%.*]], [30 x i32] [[PAYLOAD:%.*]]) #[[ATTR5]] !lgc.rt.shaderstage [[META30:![0-9]+]] !continuation [[META31:![0-9]+]] !continuation.registercount [[META25:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_HITDATA]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITTRAVERSALDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [30 x i32], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store [30 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) @@ -231,23 +232,29 @@ define void @Intersection() #3 { ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TRAV_DATA_I:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP11]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = call ptr inttoptr (i64 3 to ptr)([[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I]], float 4.000000e+00, i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP12]]), !continuation.registercount [[META25]], !continuation.returnedRegistercount [[META25]] -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = call [[STRUCT_ANYHITTRAVERSALDATA]] [[AWAIT:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP13]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = load [30 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = call ptr inttoptr (i64 3 to ptr)([[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I]], float 4.000000e+00, i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP12]], [19 x i32] poison, [30 x i32] [[TMP13]]), !continuation.registercount [[META25]], !continuation.returnedRegistercount [[META25]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = call { [[STRUCT_ANYHITTRAVERSALDATA]], [8 x i32], [30 x i32] } @await(ptr [[TMP20]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = extractvalue { [[STRUCT_ANYHITTRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP22]], 2 +; LOWERRAYTRACINGPIPELINE-NEXT: store [30 x i32] [[TMP23]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = extractvalue { [[STRUCT_ANYHITTRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP22]], 0 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP14]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[ISEND_I:%.*]] = call i1 @opaqueIsEnd() ; LOWERRAYTRACINGPIPELINE-NEXT: br i1 [[ISEND_I]], label [[TMP16:%.*]], label [[TMP18:%.*]] -; LOWERRAYTRACINGPIPELINE: 16: +; LOWERRAYTRACINGPIPELINE: 19: ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP17]]), !continuation.registercount [[META25]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = load [30 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP17]], [8 x i32] poison, [30 x i32] [[TMP21]]), !continuation.registercount [[META25]] ; LOWERRAYTRACINGPIPELINE-NEXT: unreachable -; LOWERRAYTRACINGPIPELINE: 18: +; LOWERRAYTRACINGPIPELINE: 22: ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP19]]), !continuation.registercount [[META25]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = load [30 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP19]], [8 x i32] poison, [30 x i32] [[TMP24]]), !continuation.registercount [[META25]] ; LOWERRAYTRACINGPIPELINE-NEXT: unreachable ; ; DXILCONTPOSTPROCESS-LABEL: define void @Intersection( -; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR5]] !lgc.rt.shaderstage [[META29:![0-9]+]] !continuation [[META30:![0-9]+]] !continuation.registercount [[META25:![0-9]+]] !continuation.stacksize [[META31:![0-9]+]] !continuation.state [[META31]] { +; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]], [8 x i32] [[PADDING:%.*]], [30 x i32] [[PAYLOAD:%.*]]) #[[ATTR5]] !lgc.rt.shaderstage [[META29:![0-9]+]] !continuation [[META30:![0-9]+]] !continuation.stacksize [[META31:![0-9]+]] { ; DXILCONTPOSTPROCESS-NEXT: AllocaSpillBB: ; DXILCONTPOSTPROCESS-NEXT: [[TMP1:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 ; DXILCONTPOSTPROCESS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITTRAVERSALDATA]], align 8 @@ -260,6 +267,36 @@ define void @Intersection() #3 { ; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(21) ; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP4]], i32 0 ; DXILCONTPOSTPROCESS-NEXT: store i64 [[RETURNADDR]], ptr addrspace(21) [[TMP5]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 1 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 2 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 3 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 4 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 6 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 7 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 8 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 9 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_10_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 10 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_11_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 11 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_12_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 12 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_13_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 13 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_14_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 14 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_15_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 15 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_16_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 16 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_17_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 17 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_18_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 18 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_19_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 19 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_20_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 20 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_21_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 21 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_22_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 22 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_23_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 23 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_24_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 24 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_25_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 25 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_26_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 26 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_27_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 27 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_28_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 28 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_29_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 29 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 0, 0, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 0, 1 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 0 @@ -296,9 +333,39 @@ define void @Intersection() #3 { ; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_4_INSERT]], float [[DOTFCA_1_0_EXTRACT]], 1, 0 ; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_0_INSERT]], i32 [[DOTFCA_1_1_EXTRACT]], 1, 1 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> undef, 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT9:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_INSERT12:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT9]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_2_INSERT15:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT12]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_INSERT18:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT15]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_INSERT21:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT18]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_INSERT24:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT21]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_6_INSERT27:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT24]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_7_INSERT30:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT27]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_8_INSERT33:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT30]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_9_INSERT36:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT33]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_10_INSERT39:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT36]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_11_INSERT42:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT39]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_12_INSERT45:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT42]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_13_INSERT48:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT45]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_14_INSERT51:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT48]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_15_INSERT54:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT51]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_16_INSERT57:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT54]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_17_INSERT60:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT57]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_18_INSERT63:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT60]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_19_INSERT66:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT63]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_20_INSERT69:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT66]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_21_INSERT72:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT69]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_22_INSERT75:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT72]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_23_INSERT78:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT75]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_24_INSERT81:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT78]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_25_INSERT84:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT81]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_26_INSERT87:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT84]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_27_INSERT90:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT87]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_28_INSERT93:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT90]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_29_INSERT96:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT93]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 ; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = call i64 @continuation.getAddrAndMD(ptr @Intersection.resume.0) ; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 3, i32 [[TMP11]], i64 [[TMP12]], [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_1_INSERT]], float 4.000000e+00, i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META25]], !continuation.returnedRegistercount [[META25]] +; DXILCONTPOSTPROCESS-NEXT: call void (...) @lgc.ilcps.continue(i64 3, i32 [[TMP11]], i64 [[TMP12]], [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_1_INSERT]], float 4.000000e+00, i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT]], [19 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT96]]) ; DXILCONTPOSTPROCESS-NEXT: unreachable ; %1 = call float @dx.op.rayTMin.f32(i32 153) @@ -311,102 +378,122 @@ define void @Intersection() #3 { } ; Function Attrs: nounwind -define void @AnyHit(%struct.RayPayload* noalias nocapture %payload, %struct.BuiltInTriangleIntersectionAttributes* nocapture readonly %attr) #3 !types !47 { +define void @AnyHit(%struct.RayPayload* noalias nocapture %payload, %struct.BuiltInTriangleIntersectionAttributes* nocapture readonly %attr) #3 !pointeetys !47 { ; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.AnyHitTraversalData @AnyHit( -; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[TMP1:%.*]]) #[[ATTR5]] !lgc.rt.shaderstage [[META32:![0-9]+]] !continuation [[META33:![0-9]+]] !continuation.registercount [[META26:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[TMP1:%.*]], [6 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) #[[ATTR5]] !lgc.rt.shaderstage [[META32:![0-9]+]] !continuation [[META33:![0-9]+]] !continuation.registercount [[META26:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_HITDATA]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = alloca [[STRUCT_HITDATA]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITTRAVERSALDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [10 x i32], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[ORIGHITATTRS:%.*]] = alloca [8 x i32], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[HITATTRSALLOCA:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: store [10 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP8]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP13]], ptr [[TMP10]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP11]], ptr [[TMP10]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP17]], ptr [[TMP12]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP15]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP25]], ptr [[TMP12]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP59:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP59]], ptr [[TMP14]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP21]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP17]], ptr [[TMP14]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP65:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP65]], ptr [[TMP16]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP22]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP20]], ptr [[TMP16]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP18]]) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP19]], ptr [[TMP7]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP7]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP20]], ptr [[ORIGHITATTRS]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN:%.*]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 0, i32 0, i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP7]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP28]], ptr [[ORIGHITATTRS]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP25]], ptr [[TMP23]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP24]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP27]], ptr [[TMP23]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP1]], ptr [[HITATTRSALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = call float @_cont_RayTMin(ptr [[TMP26]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[RESPTR_I:%.*]] = getelementptr [[STRUCT_TRAVERSALDATA:%.*]], ptr [[TMP29]], i32 0, i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP44:%.*]] = call float @_cont_RayTMin(ptr [[TMP43]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[RESPTR_I:%.*]] = getelementptr [[STRUCT_TRAVERSALDATA:%.*]], ptr [[TMP46]], i32 0, i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[RES_I:%.*]] = load [[STRUCT_HITDATA]], ptr [[RESPTR_I]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_HITDATA]] [[RES_I]], ptr [[TMP5]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[RES_I1:%.*]] = load float, ptr [[TMP5]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[RESPTR_I6:%.*]] = getelementptr [[STRUCT_TRAVERSALDATA]], ptr [[TMP31]], i32 0, i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[RESPTR_I6:%.*]] = getelementptr [[STRUCT_TRAVERSALDATA]], ptr [[TMP48]], i32 0, i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[RES_I7:%.*]] = load [[STRUCT_HITDATA]], ptr [[RESPTR_I6]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_HITDATA]] [[RES_I7]], ptr [[TMP3]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP32:%.*]] = call i32 @_cont_InstanceID(ptr [[TMP30]], ptr [[TMP3]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[RESPTR_I2:%.*]] = getelementptr [[STRUCT_TRAVERSALDATA]], ptr [[TMP34]], i32 0, i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP49:%.*]] = call i32 @_cont_InstanceID(ptr [[TMP47]], ptr [[TMP3]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[RESPTR_I2:%.*]] = getelementptr [[STRUCT_TRAVERSALDATA]], ptr [[TMP51]], i32 0, i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[RES_I3:%.*]] = load [[STRUCT_HITDATA]], ptr [[RESPTR_I2]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_HITDATA]] [[RES_I3]], ptr [[TMP4]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[RESPTR_I4:%.*]] = getelementptr [[STRUCT_SYSTEMDATA:%.*]], ptr [[TMP33]], i32 0, i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[RESPTR_I4:%.*]] = getelementptr [[STRUCT_SYSTEMDATA:%.*]], ptr [[TMP50]], i32 0, i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[RES_I5:%.*]] = load i32, ptr [[RESPTR_I4]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP35:%.*]] = insertvalue [[STRUCT_RAYPAYLOAD]] undef, float [[TMP27]], 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP36:%.*]] = insertvalue [[STRUCT_RAYPAYLOAD]] [[TMP35]], float [[RES_I1]], 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP37:%.*]] = insertvalue [[STRUCT_RAYPAYLOAD]] [[TMP36]], i32 [[TMP32]], 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP38:%.*]] = insertvalue [[STRUCT_RAYPAYLOAD]] [[TMP37]], i32 [[RES_I5]], 3 -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_RAYPAYLOAD]] [[TMP38]], ptr [[TMP8]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP52:%.*]] = insertvalue [[STRUCT_RAYPAYLOAD]] undef, float [[TMP44]], 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP53:%.*]] = insertvalue [[STRUCT_RAYPAYLOAD]] [[TMP52]], float [[RES_I1]], 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP54:%.*]] = insertvalue [[STRUCT_RAYPAYLOAD]] [[TMP53]], i32 [[TMP49]], 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP55:%.*]] = insertvalue [[STRUCT_RAYPAYLOAD]] [[TMP54]], i32 [[RES_I5]], 3 +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_RAYPAYLOAD]] [[TMP55]], ptr [[TMP8]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @_cont_AcceptHit(ptr [[SYSTEM_DATA_ALLOCA]]) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP8]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP68:%.*]] = load i32, ptr [[TMP39]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP68]], ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP42:%.*]] = load i32, ptr [[TMP39]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP42]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP60:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, ptr [[TMP39]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP71:%.*]] = load i32, ptr [[TMP41]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP71]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[TMP41]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP58:%.*]] = load i32, ptr [[TMP43]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP58]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP45:%.*]] = getelementptr inbounds i32, ptr [[TMP41]], i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP62:%.*]] = load i32, ptr [[TMP45]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP62]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP46:%.*]] = load i32, ptr [[HITATTRSALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP46]], ptr [[TMP6]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP50:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP50]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP52]], ptr [[TMP51]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP53:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP6]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: call void @_cont_SetTriangleHitAttributes(ptr [[TMP54]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP53]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP55:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP55]]), !continuation.registercount [[META26]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP59:%.*]] = load i32, ptr [[TMP41]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP59]], ptr [[TMP60]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP62:%.*]] = getelementptr inbounds i32, ptr [[TMP60]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP63:%.*]] = getelementptr inbounds i32, ptr [[TMP41]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP61:%.*]] = load i32, ptr [[TMP63]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP61]], ptr [[TMP62]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, ptr [[TMP60]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP67:%.*]] = getelementptr inbounds i32, ptr [[TMP41]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP64:%.*]] = load i32, ptr [[TMP67]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP64]], ptr [[TMP66]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP65:%.*]] = load i32, ptr [[HITATTRSALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP65]], ptr [[TMP6]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP70:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP68:%.*]] = load i32, ptr [[TMP70]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP68]], ptr [[TMP71]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP56:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP6]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: call void @_cont_SetTriangleHitAttributes(ptr [[TMP57]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP56]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP58:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP73:%.*]] = load [10 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP58]], [8 x i32] poison, [10 x i32] [[TMP73]]), !continuation.registercount [[META26]] ; LOWERRAYTRACINGPIPELINE-NEXT: unreachable ; ; DXILCONTPOSTPROCESS-LABEL: define void @AnyHit( -; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[TMP1:%.*]]) #[[ATTR5]] !lgc.rt.shaderstage [[META32:![0-9]+]] !continuation [[META33:![0-9]+]] !continuation.registercount [[META26:![0-9]+]] !continuation.state [[META18]] { +; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[TMP1:%.*]], [6 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) #[[ATTR5]] !lgc.rt.shaderstage [[META32:![0-9]+]] !continuation [[META33:![0-9]+]] { ; DXILCONTPOSTPROCESS-NEXT: AllocaSpillBB: ; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 ; DXILCONTPOSTPROCESS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITTRAVERSALDATA]], align 8 ; DXILCONTPOSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; DXILCONTPOSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 1 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 2 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 3 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 4 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 6 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 7 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 8 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 9 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 0, 0, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 ; DXILCONTPOSTPROCESS-NEXT: store <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]], ptr [[DOTFCA_0_0_0_0_GEP]], align 4 @@ -435,26 +522,22 @@ define void @AnyHit(%struct.RayPayload* noalias nocapture %payload, %struct.Buil ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 ; DXILCONTPOSTPROCESS-NEXT: store i32 [[DOTFCA_1_1_EXTRACT]], ptr [[DOTFCA_1_1_GEP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float -; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = bitcast i32 [[TMP6]] to float -; DXILCONTPOSTPROCESS-NEXT: [[TMP31:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP32:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP9:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP8]]) -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT18:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP9]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_019_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT18]], i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = bitcast float [[DOTSROA_019_0_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_019_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT18]], i32 1 -; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = bitcast float [[DOTSROA_019_4_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = bitcast i32 [[PAYLOAD_FCA_0_EXTRACT]] to float +; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = bitcast i32 [[PAYLOAD_FCA_7_EXTRACT]] to float +; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP6]]) +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT22:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP7]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_023_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT22]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = bitcast float [[DOTSROA_023_0_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_023_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT22]], i32 1 +; DXILCONTPOSTPROCESS-NEXT: [[TMP9:%.*]] = bitcast float [[DOTSROA_023_4_VEC_EXTRACT]] to i32 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP1]], 0 ; DXILCONTPOSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = call float @_cont_RayTMin(ptr [[TMP10]]) ; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP13:%.*]] = call float @_cont_RayTMin(ptr [[TMP12]]) -; DXILCONTPOSTPROCESS-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[RESPTR_I:%.*]] = getelementptr [[STRUCT_TRAVERSALDATA:%.*]], ptr [[TMP15]], i32 0, i32 1 +; DXILCONTPOSTPROCESS-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[RESPTR_I:%.*]] = getelementptr [[STRUCT_TRAVERSALDATA:%.*]], ptr [[TMP13]], i32 0, i32 1 ; DXILCONTPOSTPROCESS-NEXT: [[RES_I_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[RESPTR_I]], i32 0, i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[RES_I_FCA_0_LOAD:%.*]] = load float, ptr [[RES_I_FCA_0_GEP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[RES_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] poison, float [[RES_I_FCA_0_LOAD]], 0 @@ -463,9 +546,9 @@ define void @AnyHit(%struct.RayPayload* noalias nocapture %payload, %struct.Buil ; DXILCONTPOSTPROCESS-NEXT: [[RES_I_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_0_INSERT]], i32 [[RES_I_FCA_1_LOAD]], 1 ; DXILCONTPOSTPROCESS-NEXT: [[RES_I_FCA_1_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_1_INSERT]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[RES_I_FCA_1_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_1_INSERT]], 1 -; DXILCONTPOSTPROCESS-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[RESPTR_I6:%.*]] = getelementptr [[STRUCT_TRAVERSALDATA]], ptr [[TMP17]], i32 0, i32 1 +; DXILCONTPOSTPROCESS-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[RESPTR_I6:%.*]] = getelementptr [[STRUCT_TRAVERSALDATA]], ptr [[TMP15]], i32 0, i32 1 ; DXILCONTPOSTPROCESS-NEXT: [[RES_I7_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[RESPTR_I6]], i32 0, i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[RES_I7_FCA_0_LOAD:%.*]] = load float, ptr [[RES_I7_FCA_0_GEP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[RES_I7_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] poison, float [[RES_I7_FCA_0_LOAD]], 0 @@ -478,10 +561,10 @@ define void @AnyHit(%struct.RayPayload* noalias nocapture %payload, %struct.Buil ; DXILCONTPOSTPROCESS-NEXT: [[RES_I7_FCA_1_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I7_FCA_1_INSERT]], 1 ; DXILCONTPOSTPROCESS-NEXT: [[RES_I7_FCA_1_INSERT_FCA_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP2]], i32 0, i32 1 ; DXILCONTPOSTPROCESS-NEXT: store i32 [[RES_I7_FCA_1_INSERT_FCA_1_EXTRACT]], ptr [[RES_I7_FCA_1_INSERT_FCA_1_GEP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP18:%.*]] = call i32 @_cont_InstanceID(ptr [[TMP16]], ptr [[TMP2]]) -; DXILCONTPOSTPROCESS-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[RESPTR_I2:%.*]] = getelementptr [[STRUCT_TRAVERSALDATA]], ptr [[TMP20]], i32 0, i32 1 +; DXILCONTPOSTPROCESS-NEXT: [[TMP16:%.*]] = call i32 @_cont_InstanceID(ptr [[TMP14]], ptr [[TMP2]]) +; DXILCONTPOSTPROCESS-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[RESPTR_I2:%.*]] = getelementptr [[STRUCT_TRAVERSALDATA]], ptr [[TMP18]], i32 0, i32 1 ; DXILCONTPOSTPROCESS-NEXT: [[RES_I3_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[RESPTR_I2]], i32 0, i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[RES_I3_FCA_0_LOAD:%.*]] = load float, ptr [[RES_I3_FCA_0_GEP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[RES_I3_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] poison, float [[RES_I3_FCA_0_LOAD]], 0 @@ -490,63 +573,69 @@ define void @AnyHit(%struct.RayPayload* noalias nocapture %payload, %struct.Buil ; DXILCONTPOSTPROCESS-NEXT: [[RES_I3_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I3_FCA_0_INSERT]], i32 [[RES_I3_FCA_1_LOAD]], 1 ; DXILCONTPOSTPROCESS-NEXT: [[RES_I3_FCA_1_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I3_FCA_1_INSERT]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[RES_I3_FCA_1_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I3_FCA_1_INSERT]], 1 -; DXILCONTPOSTPROCESS-NEXT: [[RESPTR_I4:%.*]] = getelementptr [[STRUCT_SYSTEMDATA:%.*]], ptr [[TMP19]], i32 0, i32 1 +; DXILCONTPOSTPROCESS-NEXT: [[RESPTR_I4:%.*]] = getelementptr [[STRUCT_SYSTEMDATA:%.*]], ptr [[TMP17]], i32 0, i32 1 ; DXILCONTPOSTPROCESS-NEXT: [[RES_I5:%.*]] = load i32, ptr [[RESPTR_I4]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP21:%.*]] = insertvalue [[STRUCT_RAYPAYLOAD:%.*]] undef, float [[TMP13]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP22:%.*]] = insertvalue [[STRUCT_RAYPAYLOAD]] [[TMP21]], float [[RES_I_FCA_1_INSERT_FCA_0_EXTRACT]], 1 -; DXILCONTPOSTPROCESS-NEXT: [[TMP23:%.*]] = insertvalue [[STRUCT_RAYPAYLOAD]] [[TMP22]], i32 [[TMP18]], 2 -; DXILCONTPOSTPROCESS-NEXT: [[TMP24:%.*]] = insertvalue [[STRUCT_RAYPAYLOAD]] [[TMP23]], i32 [[RES_I5]], 3 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT8:%.*]] = extractvalue [[STRUCT_RAYPAYLOAD]] [[TMP24]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_RAYPAYLOAD]] [[TMP24]], 1 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_RAYPAYLOAD]] [[TMP24]], 2 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_RAYPAYLOAD]] [[TMP24]], 3 +; DXILCONTPOSTPROCESS-NEXT: [[TMP19:%.*]] = insertvalue [[STRUCT_RAYPAYLOAD:%.*]] undef, float [[TMP11]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP20:%.*]] = insertvalue [[STRUCT_RAYPAYLOAD]] [[TMP19]], float [[RES_I_FCA_1_INSERT_FCA_0_EXTRACT]], 1 +; DXILCONTPOSTPROCESS-NEXT: [[TMP21:%.*]] = insertvalue [[STRUCT_RAYPAYLOAD]] [[TMP20]], i32 [[TMP16]], 2 +; DXILCONTPOSTPROCESS-NEXT: [[TMP22:%.*]] = insertvalue [[STRUCT_RAYPAYLOAD]] [[TMP21]], i32 [[RES_I5]], 3 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT8:%.*]] = extractvalue [[STRUCT_RAYPAYLOAD]] [[TMP22]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_RAYPAYLOAD]] [[TMP22]], 1 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_RAYPAYLOAD]] [[TMP22]], 2 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_RAYPAYLOAD]] [[TMP22]], 3 ; DXILCONTPOSTPROCESS-NEXT: call void @_cont_AcceptHit(ptr [[SYSTEM_DATA_ALLOCA]]) -; DXILCONTPOSTPROCESS-NEXT: [[TMP33:%.*]] = bitcast float [[DOTFCA_0_EXTRACT8]] to i32 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP33]], ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP34:%.*]] = bitcast float [[DOTFCA_1_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP34]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[DOTFCA_2_EXTRACT]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[DOTFCA_3_EXTRACT]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP23:%.*]] = bitcast float [[DOTFCA_0_EXTRACT8]] to i32 +; DXILCONTPOSTPROCESS-NEXT: [[TMP24:%.*]] = bitcast float [[DOTFCA_1_EXTRACT]] to i32 ; DXILCONTPOSTPROCESS-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP25:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT]] to i32 ; DXILCONTPOSTPROCESS-NEXT: [[TMP26:%.*]] = bitcast i32 [[TMP25]] to float -; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_020_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP26]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_025_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP26]], i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 ; DXILCONTPOSTPROCESS-NEXT: [[TMP27:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT]] to i32 ; DXILCONTPOSTPROCESS-NEXT: [[TMP28:%.*]] = bitcast i32 [[TMP27]] to float -; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_020_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_020_0_VEC_INSERT]], float [[TMP28]], i32 1 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_020_4_VEC_INSERT]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_025_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_025_0_VEC_INSERT]], float [[TMP28]], i32 1 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT24:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_025_4_VEC_INSERT]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: call void @_cont_SetTriangleHitAttributes(ptr [[TMP29]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT]]) -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_GEP9:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_LOAD:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP9]], align 4 +; DXILCONTPOSTPROCESS-NEXT: call void @_cont_SetTriangleHitAttributes(ptr [[TMP29]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT24]]) +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_GEP13:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_LOAD:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP13]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD]], 0, 0, 0, 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_1_GEP10:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 1 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_1_LOAD:%.*]] = load i32, ptr [[DOTFCA_0_0_1_GEP10]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_1_GEP14:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 1 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_1_LOAD:%.*]] = load i32, ptr [[DOTFCA_0_0_1_GEP14]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], i32 [[DOTFCA_0_0_1_LOAD]], 0, 0, 1 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_1_0_GEP11:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_1_0_LOAD:%.*]] = load float, ptr [[DOTFCA_0_1_0_GEP11]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_1_0_GEP15:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_1_0_LOAD:%.*]] = load float, ptr [[DOTFCA_0_1_0_GEP15]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_1_INSERT]], float [[DOTFCA_0_1_0_LOAD]], 0, 1, 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_1_1_GEP12:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_1_1_LOAD:%.*]] = load i32, ptr [[DOTFCA_0_1_1_GEP12]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_1_1_GEP16:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_1_1_LOAD:%.*]] = load i32, ptr [[DOTFCA_0_1_1_GEP16]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], i32 [[DOTFCA_0_1_1_LOAD]], 0, 1, 1 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_2_GEP13:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_2_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP13]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_2_GEP17:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_2_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP17]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], <3 x float> [[DOTFCA_0_2_LOAD]], 0, 2 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_3_GEP14:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_3_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP14]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_3_GEP18:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_3_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP18]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_LOAD]], 0, 3 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_4_GEP15:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_4_LOAD:%.*]] = load float, ptr [[DOTFCA_0_4_GEP15]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_4_GEP19:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_4_LOAD:%.*]] = load float, ptr [[DOTFCA_0_4_GEP19]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[DOTFCA_0_4_LOAD]], 0, 4 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_0_GEP16:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_0_LOAD:%.*]] = load float, ptr [[DOTFCA_1_0_GEP16]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_0_GEP20:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_0_LOAD:%.*]] = load float, ptr [[DOTFCA_1_0_GEP20]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], float [[DOTFCA_1_0_LOAD]], 1, 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_1_GEP17:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_1_LOAD:%.*]] = load i32, ptr [[DOTFCA_1_1_GEP17]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_1_GEP21:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_1_LOAD:%.*]] = load i32, ptr [[DOTFCA_1_1_GEP21]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], i32 [[DOTFCA_1_1_LOAD]], 1, 1 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP23]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT]], i32 [[TMP24]], 7 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT]], i32 [[DOTFCA_2_EXTRACT]], 8 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT]], i32 [[DOTFCA_3_EXTRACT]], 9 ; DXILCONTPOSTPROCESS-NEXT: [[TMP30:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP30]], i64 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]]), !continuation.registercount [[META26]] +; DXILCONTPOSTPROCESS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR]], i32 [[TMP30]], i64 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], [8 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]) ; DXILCONTPOSTPROCESS-NEXT: unreachable ; %1 = call float @dx.op.rayTMin.f32(i32 153) @@ -562,77 +651,96 @@ define void @AnyHit(%struct.RayPayload* noalias nocapture %payload, %struct.Buil } ; Function Attrs: nounwind -define void @ClosestHit(%struct.RayPayload* noalias nocapture %payload, %struct.BuiltInTriangleIntersectionAttributes* nocapture readonly %attr) #3 !types !47 { +define void @ClosestHit(%struct.RayPayload* noalias nocapture %payload, %struct.BuiltInTriangleIntersectionAttributes* nocapture readonly %attr) #3 !pointeetys !47 { ; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.DispatchSystemData @ClosestHit( -; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR5]] !lgc.rt.shaderstage [[META34:![0-9]+]] !continuation [[META35:![0-9]+]] !continuation.registercount [[META26]] { +; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]], [19 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) #[[ATTR5]] !lgc.rt.shaderstage [[META34:![0-9]+]] !continuation [[META35:![0-9]+]] !continuation.registercount [[META26]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_HITDATA]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_HITDATA]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [10 x i32], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[HITATTRS:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: store [10 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_SYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP6]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP11]], ptr [[TMP8]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP15]], ptr [[TMP10]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP13]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP17]], ptr [[TMP10]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP43:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP43]], ptr [[TMP12]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP18]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP15]], ptr [[TMP12]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP55:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP55]], ptr [[TMP14]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP19]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP22]], ptr [[TMP14]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[SYSTEM_DATA_ALLOCA]]) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP16]], ptr [[TMP5]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP5]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP17]], ptr [[HITATTRS]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP5]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP24]], ptr [[HITATTRS]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[HITATTRS]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP22]], ptr [[TMP20]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP21]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP23]], ptr [[TMP20]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = call float @_cont_RayTMin(ptr [[TMP23]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[SYSTEM_DATA_ALLOCA]]) -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_HITDATA]] [[TMP26]], ptr [[TMP4]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP40:%.*]] = call float @_cont_RayTMin(ptr [[TMP39]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP42:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[SYSTEM_DATA_ALLOCA]]) +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_HITDATA]] [[TMP42]], ptr [[TMP4]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[RES_I:%.*]] = load float, ptr [[TMP4]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP28:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_HITDATA]] [[TMP28]], ptr [[TMP2]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP29:%.*]] = call i32 @_cont_InstanceID(ptr [[TMP27]], ptr [[TMP2]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP30:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_HITDATA]] [[TMP30]], ptr [[TMP3]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP44:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_HITDATA]] [[TMP44]], ptr [[TMP2]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP30:%.*]] = call i32 @_cont_InstanceID(ptr [[TMP28]], ptr [[TMP2]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP46:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_HITDATA]] [[TMP46]], ptr [[TMP3]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[RESPTR_I:%.*]] = getelementptr [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[RES_I1:%.*]] = load i32, ptr [[RESPTR_I]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP6]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP42:%.*]] = load i32, ptr [[TMP31]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP42]], ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP31]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP35]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[TMP31]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP46:%.*]] = load i32, ptr [[TMP33]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP46]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[TMP33]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP50:%.*]] = load i32, ptr [[TMP35]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP50]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[TMP33]], i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP54:%.*]] = load i32, ptr [[TMP37]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP54]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP40:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP39]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP40]]), !continuation.registercount [[META26]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP33]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP36]], ptr [[TMP34]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[TMP34]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, ptr [[TMP33]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP38]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP43]], ptr [[TMP37]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, ptr [[TMP34]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP49:%.*]] = getelementptr inbounds i32, ptr [[TMP33]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP49]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP47]], ptr [[TMP48]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP52:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP51]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP45:%.*]] = load [10 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP52]], [20 x i32] poison, [10 x i32] [[TMP45]]), !continuation.registercount [[META26]] ; LOWERRAYTRACINGPIPELINE-NEXT: unreachable ; ; DXILCONTPOSTPROCESS-LABEL: define void @ClosestHit( -; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR5]] !lgc.rt.shaderstage [[META34:![0-9]+]] !continuation [[META35:![0-9]+]] !continuation.registercount [[META26]] !continuation.state [[META18]] { +; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]], [19 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) #[[ATTR5]] !lgc.rt.shaderstage [[META34:![0-9]+]] !continuation [[META35:![0-9]+]] { ; DXILCONTPOSTPROCESS-NEXT: AllocaSpillBB: ; DXILCONTPOSTPROCESS-NEXT: [[TMP1:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 ; DXILCONTPOSTPROCESS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 ; DXILCONTPOSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; DXILCONTPOSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 1 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 2 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 3 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 4 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 6 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 7 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 8 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 9 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 0, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 ; DXILCONTPOSTPROCESS-NEXT: store <3 x i32> [[DOTFCA_0_0_EXTRACT]], ptr [[DOTFCA_0_0_GEP]], align 4 @@ -640,47 +748,49 @@ define void @ClosestHit(%struct.RayPayload* noalias nocapture %payload, %struct. ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1 ; DXILCONTPOSTPROCESS-NEXT: store i32 [[DOTFCA_1_EXTRACT]], ptr [[DOTFCA_1_GEP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[SYSTEM_DATA_ALLOCA]]) -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP7]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_03_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = bitcast float [[DOTSROA_03_0_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_03_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 -; DXILCONTPOSTPROCESS-NEXT: [[TMP9:%.*]] = bitcast float [[DOTSROA_03_4_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[SYSTEM_DATA_ALLOCA]]) +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP3]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_08_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = bitcast float [[DOTSROA_08_0_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_08_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 +; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = bitcast float [[DOTSROA_08_4_VEC_EXTRACT]] to i32 ; DXILCONTPOSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = call float @_cont_RayTMin(ptr [[TMP6]]) +; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP9:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[SYSTEM_DATA_ALLOCA]]) +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT9:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP9]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_EXTRACT11:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP9]], 1 ; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = call float @_cont_RayTMin(ptr [[TMP10]]) -; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP13:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[SYSTEM_DATA_ALLOCA]]) -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT4:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP13]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_EXTRACT6:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP13]], 1 -; DXILCONTPOSTPROCESS-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP15:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT15:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP15]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_GEP16:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP1]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: store float [[DOTFCA_0_EXTRACT15]], ptr [[DOTFCA_0_GEP16]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_EXTRACT17:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP15]], 1 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_GEP18:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP1]], i32 0, i32 1 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[DOTFCA_1_EXTRACT17]], ptr [[DOTFCA_1_GEP18]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP16:%.*]] = call i32 @_cont_InstanceID(ptr [[TMP14]], ptr [[TMP1]]) -; DXILCONTPOSTPROCESS-NEXT: [[TMP17:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT10:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP17]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_EXTRACT12:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP17]], 1 +; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT20:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP11]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_GEP21:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP1]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-NEXT: store float [[DOTFCA_0_EXTRACT20]], ptr [[DOTFCA_0_GEP21]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_EXTRACT22:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP11]], 1 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_GEP23:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP1]], i32 0, i32 1 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[DOTFCA_1_EXTRACT22]], ptr [[DOTFCA_1_GEP23]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = call i32 @_cont_InstanceID(ptr [[TMP10]], ptr [[TMP1]]) +; DXILCONTPOSTPROCESS-NEXT: [[TMP13:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT15:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP13]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_EXTRACT17:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP13]], 1 ; DXILCONTPOSTPROCESS-NEXT: [[RESPTR_I:%.*]] = getelementptr [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1 ; DXILCONTPOSTPROCESS-NEXT: [[RES_I1:%.*]] = load i32, ptr [[RESPTR_I]], align 4 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP3]], ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP4]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP5]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP6]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP18]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP14]], i32 0, i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_LOAD:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_GEP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_LOAD]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT1:%.*]] = insertvalue [10 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT1]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 ; DXILCONTPOSTPROCESS-NEXT: [[TMP19:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP19]], i64 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META26]] +; DXILCONTPOSTPROCESS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR]], i32 [[TMP19]], i64 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]], [20 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]) ; DXILCONTPOSTPROCESS-NEXT: unreachable ; %1 = call float @dx.op.rayTMin.f32(i32 153) @@ -736,7 +846,7 @@ declare float @dx.op.worldToObject.f32(i32, i32, i8) #2 declare float @dx.op.objectToWorld.f32(i32, i32, i8) #2 ; Function Attrs: nounwind -declare !types !50 i1 @dx.op.reportHit.struct.BuiltInTriangleIntersectionAttributes(i32, float, i32, %struct.BuiltInTriangleIntersectionAttributes*) #4 +declare !pointeetys !50 i1 @dx.op.reportHit.struct.BuiltInTriangleIntersectionAttributes(i32, float, i32, %struct.BuiltInTriangleIntersectionAttributes*) #4 attributes #0 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="0" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind memory(read) } @@ -777,29 +887,29 @@ attributes #4 = { nounwind } !22 = !{i32 8, i32 9, i32 5, !18} !23 = !{void (%struct.RayPayload*, %struct.BuiltInTriangleIntersectionAttributes*)* @ClosestHit, !"ClosestHit", null, null, !24} !24 = !{i32 8, i32 10, i32 5, !18} -!25 = !{!"function", %struct.BuiltInTriangleIntersectionAttributes poison, !26} +!25 = !{%struct.SystemData poison} !26 = !{i32 0, %struct.SystemData poison} -!27 = !{!"function", !"void", !26, %struct.BuiltInTriangleIntersectionAttributes poison} -!28 = !{!"function", %struct.HitData poison, !26} -!29 = !{!"function", !"void", !30} +!27 = !{%struct.SystemData poison} +!28 = !{%struct.SystemData poison} +!29 = !{%struct.AnyHitTraversalData poison} !30 = !{i32 0, %struct.AnyHitTraversalData poison} -!31 = !{!"function", i1 poison, !32} +!31 = !{%struct.TraversalData poison} !32 = !{i32 0, %struct.TraversalData poison} -!33 = !{!"function", i32 poison, !34} +!33 = !{%struct.DispatchSystemData poison} !34 = !{i32 0, %struct.DispatchSystemData poison} -!35 = !{!"function", !"void", !34, i64 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison} -!36 = !{!"function", i1 poison, !30, float poison, i32 poison} -!37 = !{!"function", %struct.HitData poison, !32} -!38 = !{!"function", float poison, !34, !39} +!35 = !{%struct.DispatchSystemData poison} +!36 = !{%struct.AnyHitTraversalData poison} +!37 = !{%struct.TraversalData poison} +!38 = !{null, %struct.DispatchSystemData poison, %struct.HitData poison} !39 = !{i32 0, %struct.HitData poison} -!40 = !{!"function", i32 poison, !26, !39} -!41 = !{!"function", <3 x i32> poison, !34} -!42 = !{!"function", <3 x float> poison, !34} -!43 = !{!"function", float poison, !34} -!44 = !{!"function", i32 poison, !34, !39} -!45 = !{!"function", <3 x float> poison, !34, !39} -!46 = !{!"function", [4 x <3 x float>] poison, !34, !39} -!47 = !{!"function", !"void", !48, !49} +!40 = !{null, %struct.SystemData poison, %struct.HitData poison} +!41 = !{%struct.DispatchSystemData poison} +!42 = !{%struct.DispatchSystemData poison} +!43 = !{%struct.DispatchSystemData poison} +!44 = !{null, %struct.DispatchSystemData poison, %struct.HitData poison} +!45 = !{null, %struct.DispatchSystemData poison, %struct.HitData poison} +!46 = !{null, %struct.DispatchSystemData poison, %struct.HitData poison} +!47 = !{null, %struct.RayPayload poison, %struct.BuiltInTriangleIntersectionAttributes poison} !48 = !{i32 0, %struct.RayPayload poison} !49 = !{i32 0, %struct.BuiltInTriangleIntersectionAttributes poison} -!50 = !{!"function", i1 poison, i32 poison, float poison, i32 poison, !49} +!50 = !{%struct.BuiltInTriangleIntersectionAttributes poison} diff --git a/llvmraytracing/test/dx/lower-rt-pipeline-intrinsics.ll b/llvmraytracing/test/dx/lower-rt-pipeline-intrinsics.ll index 54c79eb65b..c7c23dc628 100644 --- a/llvmraytracing/test/dx/lower-rt-pipeline-intrinsics.ll +++ b/llvmraytracing/test/dx/lower-rt-pipeline-intrinsics.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function ClosestHit --version 3 -; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,remove-types-metadata' -S %s --lint-abort-on-error | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE %s -; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' -S %s --lint-abort-on-error | FileCheck -check-prefix=DXILCONTPOSTPROCESS %s +; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,continuations-lint,remove-types-metadata' -S %s --lint-abort-on-error | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE %s +; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,dxil-cont-post-process,lint,continuations-lint,remove-types-metadata' -S %s --lint-abort-on-error | FileCheck -check-prefix=DXILCONTPOSTPROCESS %s target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:16-i32:32-i64:32-f16:16-f32:32-f64:32-v8:8-v16:16-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" @@ -22,23 +22,21 @@ declare i64 @_cont_GetTraversalAddr() #0 declare i32 @_cont_GetContinuationStackAddr() #0 -declare !types !19 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData*) #0 +declare !pointeetys !19 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData*) #0 -declare !types !21 void @_cont_SetTriangleHitAttributes(%struct.SystemData*, %struct.BuiltInTriangleIntersectionAttributes) #0 +declare !pointeetys !21 void @_cont_SetTriangleHitAttributes(%struct.SystemData*, %struct.BuiltInTriangleIntersectionAttributes) #0 declare %struct.DispatchSystemData @_cont_Traversal(%struct.TraversalData) #0 -declare %struct.DispatchSystemData @_cont_SetupRayGen() #0 +declare !pointeetys !22 %struct.HitData @_cont_GetCandidateState(%struct.AnyHitTraversalData*) #0 -declare !types !22 %struct.HitData @_cont_GetCandidateState(%struct.AnyHitTraversalData*) #0 +declare !pointeetys !24 %struct.HitData @_cont_GetCommittedState(%struct.SystemData*) #0 -declare !types !24 %struct.HitData @_cont_GetCommittedState(%struct.SystemData*) #0 - -define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) #0 !types !25 { +define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) #0 !pointeetys !25 { ret i32 5 } -define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, float %6, float %7, float %8, float %9, float %10, float %11, float %12, float %13) #0 !types !27 { +define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, float %6, float %7, float %8, float %9, float %10, float %11, float %12, float %13) #0 !pointeetys !27 { %dis_data = load %struct.DispatchSystemData, %struct.DispatchSystemData* %data, align 4 %sys_data = insertvalue %struct.SystemData undef, %struct.DispatchSystemData %dis_data, 0 %trav_data = insertvalue %struct.TraversalData undef, %struct.SystemData %sys_data, 0 @@ -48,54 +46,56 @@ define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i } ; Function Attrs: nounwind memory(none) -declare !types !28 <3 x i32> @_cont_DispatchRaysIndex3(%struct.DispatchSystemData* nocapture readnone) #1 +declare !pointeetys !28 <3 x i32> @_cont_DispatchRaysIndex3(%struct.DispatchSystemData* nocapture readnone) #1 ; Function Attrs: nounwind memory(none) -declare !types !28 <3 x i32> @_cont_DispatchRaysDimensions3(%struct.DispatchSystemData* nocapture readnone) #1 +declare !pointeetys !28 <3 x i32> @_cont_DispatchRaysDimensions3(%struct.DispatchSystemData* nocapture readnone) #1 ; Function Attrs: nounwind memory(none) -declare !types !29 <3 x float> @_cont_WorldRayOrigin3(%struct.DispatchSystemData* nocapture readnone) #1 +declare !pointeetys !29 <3 x float> @_cont_WorldRayOrigin3(%struct.DispatchSystemData* nocapture readnone) #1 ; Function Attrs: nounwind memory(none) -declare !types !29 <3 x float> @_cont_WorldRayDirection3(%struct.DispatchSystemData* nocapture readnone) #1 +declare !pointeetys !29 <3 x float> @_cont_WorldRayDirection3(%struct.DispatchSystemData* nocapture readnone) #1 ; Function Attrs: nounwind memory(none) -declare !types !30 float @_cont_RayTMin(%struct.DispatchSystemData* nocapture readnone) #1 +declare !pointeetys !30 float @_cont_RayTMin(%struct.DispatchSystemData* nocapture readnone) #1 ; Function Attrs: nounwind memory(read) -declare !types !31 float @_cont_RayTCurrent(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #2 +declare !pointeetys !31 float @_cont_RayTCurrent(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #2 ; Function Attrs: nounwind memory(none) -declare !types !25 i32 @_cont_RayFlags(%struct.DispatchSystemData* nocapture readnone) #1 +declare !pointeetys !25 i32 @_cont_RayFlags(%struct.DispatchSystemData* nocapture readnone) #1 ; Function Attrs: nounwind memory(none) -declare !types !33 i32 @_cont_InstanceIndex(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #1 +declare !pointeetys !33 i32 @_cont_InstanceIndex(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #1 ; Function Attrs: nounwind memory(none) -declare !types !33 i32 @_cont_InstanceID(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #1 +declare !pointeetys !33 i32 @_cont_InstanceID(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #1 ; Function Attrs: nounwind memory(none) -declare !types !33 i32 @_cont_PrimitiveIndex(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #1 +declare !pointeetys !33 i32 @_cont_PrimitiveIndex(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #1 ; Function Attrs: nounwind memory(none) -declare !types !34 <3 x float> @_cont_ObjectRayOrigin3(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #1 +declare !pointeetys !34 <3 x float> @_cont_ObjectRayOrigin3(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #1 ; Function Attrs: nounwind memory(none) -declare !types !34 <3 x float> @_cont_ObjectRayDirection3(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #1 +declare !pointeetys !34 <3 x float> @_cont_ObjectRayDirection3(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #1 ; Function Attrs: nounwind memory(none) -declare !types !35 [4 x <3 x float>] @_cont_ObjectToWorld4x3(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #1 +declare !pointeetys !35 [4 x <3 x float>] @_cont_ObjectToWorld4x3(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #1 ; Function Attrs: nounwind memory(none) -declare !types !35 [4 x <3 x float>] @_cont_WorldToObject4x3(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #1 +declare !pointeetys !35 [4 x <3 x float>] @_cont_WorldToObject4x3(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #1 ; Function Attrs: nounwind memory(none) -declare !types !36 i32 @_cont_HitKind(%struct.SystemData* nocapture readnone, %struct.HitData*) #1 +declare !pointeetys !36 i32 @_cont_HitKind(%struct.SystemData* nocapture readnone, %struct.HitData*) #1 + +declare !pointeetys !40 i1 @_cont_ReportHit(%struct.AnyHitTraversalData* %data, float %t, i32 %hitKind) ; Function Attrs: nounwind -define void @ClosestHit(%struct.RayPayload* %0, %struct.BuiltInTriangleIntersectionAttributes* %1) #3 !types !37 { +define void @ClosestHit(%struct.RayPayload* %0, %struct.BuiltInTriangleIntersectionAttributes* %1) #3 !pointeetys !37 { ; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.DispatchSystemData @ClosestHit( -; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3:[0-9]+]] !lgc.rt.shaderstage [[META23:![0-9]+]] !continuation [[META24:![0-9]+]] !continuation.registercount [[META20:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]], [17 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) #[[ATTR3:[0-9]+]] !lgc.rt.shaderstage [[META23:![0-9]+]] !continuation [[META24:![0-9]+]] !continuation.registercount [[META20:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_HITDATA]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_HITDATA]], align 8 @@ -109,24 +109,29 @@ define void @ClosestHit(%struct.RayPayload* %0, %struct.BuiltInTriangleIntersect ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = alloca [4 x <3 x float>], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = alloca [4 x <3 x float>], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [10 x i32], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[HITATTRS:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: store [10 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_SYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP14]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP19]], ptr [[TMP16]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP21]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP23]], ptr [[TMP18]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP79:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP79:%.*]] = load i32, ptr [[TMP26]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP79]], ptr [[TMP20]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP91:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP91:%.*]] = load i32, ptr [[TMP27]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP91]], ptr [[TMP22]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[SYSTEM_DATA_ALLOCA]]) -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP24]], ptr [[TMP11]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP31:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[SYSTEM_DATA_ALLOCA]]) +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP31]], ptr [[TMP11]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP11]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP25]], ptr [[HITATTRS]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[HITATTRS]], i32 1 @@ -134,84 +139,88 @@ define void @ClosestHit(%struct.RayPayload* %0, %struct.BuiltInTriangleIntersect ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP30]], ptr [[TMP28]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP31:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.index() -; LOWERRAYTRACINGPIPELINE-NEXT: [[A:%.*]] = extractelement <3 x i32> [[TMP31]], i8 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP32:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.dimensions() -; LOWERRAYTRACINGPIPELINE-NEXT: [[B:%.*]] = extractelement <3 x i32> [[TMP32]], i8 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP34:%.*]] = call <3 x float> @_cont_WorldRayOrigin3(ptr [[TMP33]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[C:%.*]] = extractelement <3 x float> [[TMP34]], i8 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP36:%.*]] = call <3 x float> @_cont_WorldRayDirection3(ptr [[TMP35]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[D:%.*]] = extractelement <3 x float> [[TMP36]], i8 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP38:%.*]] = call float @_cont_RayTMin(ptr [[TMP37]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP40:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[SYSTEM_DATA_ALLOCA]]) -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_HITDATA]] [[TMP40]], ptr [[TMP8]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP41:%.*]] = call float @_cont_RayTCurrent(ptr [[TMP39]], ptr [[TMP8]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP43:%.*]] = call i32 @_cont_RayFlags(ptr [[TMP42]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP45:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_HITDATA]] [[TMP45]], ptr [[TMP4]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP46:%.*]] = call i32 @_cont_InstanceIndex(ptr [[TMP44]], ptr [[TMP4]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP48:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_HITDATA]] [[TMP48]], ptr [[TMP5]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP49:%.*]] = call i32 @_cont_InstanceID(ptr [[TMP47]], ptr [[TMP5]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP51:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_HITDATA]] [[TMP51]], ptr [[TMP6]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP52:%.*]] = call i32 @_cont_PrimitiveIndex(ptr [[TMP50]], ptr [[TMP6]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP47:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.index() +; LOWERRAYTRACINGPIPELINE-NEXT: [[A:%.*]] = extractelement <3 x i32> [[TMP47]], i8 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP48:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.dimensions() +; LOWERRAYTRACINGPIPELINE-NEXT: [[B:%.*]] = extractelement <3 x i32> [[TMP48]], i8 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP50:%.*]] = call <3 x float> @_cont_WorldRayOrigin3(ptr [[TMP49]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[C:%.*]] = extractelement <3 x float> [[TMP50]], i8 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP52:%.*]] = call <3 x float> @_cont_WorldRayDirection3(ptr [[TMP51]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[D:%.*]] = extractelement <3 x float> [[TMP52]], i8 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP54:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_HITDATA]] [[TMP54]], ptr [[TMP9]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP55:%.*]] = call <3 x float> @_cont_ObjectRayOrigin3(ptr [[TMP53]], ptr [[TMP9]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[K:%.*]] = extractelement <3 x float> [[TMP55]], i8 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP57:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_HITDATA]] [[TMP57]], ptr [[TMP10]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP58:%.*]] = call <3 x float> @_cont_ObjectRayDirection3(ptr [[TMP56]], ptr [[TMP10]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[L:%.*]] = extractelement <3 x float> [[TMP58]], i8 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP60:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_HITDATA]] [[TMP60]], ptr [[TMP2]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP61:%.*]] = call [4 x <3 x float>] @_cont_ObjectToWorld4x3(ptr [[TMP59]], ptr [[TMP2]]) -; LOWERRAYTRACINGPIPELINE-NEXT: store [4 x <3 x float>] [[TMP61]], ptr [[TMP13]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP54:%.*]] = call float @_cont_RayTMin(ptr [[TMP53]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP41:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[SYSTEM_DATA_ALLOCA]]) +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_HITDATA]] [[TMP41]], ptr [[TMP8]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP57:%.*]] = call float @_cont_RayTCurrent(ptr [[TMP55]], ptr [[TMP8]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP59:%.*]] = call i32 @_cont_RayFlags(ptr [[TMP58]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP61:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_HITDATA]] [[TMP61]], ptr [[TMP4]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP62:%.*]] = call i32 @_cont_InstanceIndex(ptr [[TMP60]], ptr [[TMP4]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP64:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_HITDATA]] [[TMP64]], ptr [[TMP5]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP65:%.*]] = call i32 @_cont_InstanceID(ptr [[TMP63]], ptr [[TMP5]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP67:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_HITDATA]] [[TMP67]], ptr [[TMP6]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP68:%.*]] = call i32 @_cont_PrimitiveIndex(ptr [[TMP66]], ptr [[TMP6]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP69:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP70:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_HITDATA]] [[TMP70]], ptr [[TMP9]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP71:%.*]] = call <3 x float> @_cont_ObjectRayOrigin3(ptr [[TMP69]], ptr [[TMP9]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[K:%.*]] = extractelement <3 x float> [[TMP71]], i8 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP73:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_HITDATA]] [[TMP73]], ptr [[TMP10]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP74:%.*]] = call <3 x float> @_cont_ObjectRayDirection3(ptr [[TMP72]], ptr [[TMP10]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[L:%.*]] = extractelement <3 x float> [[TMP74]], i8 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP76:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_HITDATA]] [[TMP76]], ptr [[TMP2]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP77:%.*]] = call [4 x <3 x float>] @_cont_ObjectToWorld4x3(ptr [[TMP75]], ptr [[TMP2]]) +; LOWERRAYTRACINGPIPELINE-NEXT: store [4 x <3 x float>] [[TMP77]], ptr [[TMP13]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[COL_GEP1:%.*]] = getelementptr [4 x <3 x float>], ptr [[TMP13]], i32 0, i8 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[COL_GEP_LOAD2:%.*]] = load <3 x float>, ptr [[COL_GEP1]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[M:%.*]] = extractelement <3 x float> [[COL_GEP_LOAD2]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP63:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_HITDATA]] [[TMP63]], ptr [[TMP3]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP64:%.*]] = call [4 x <3 x float>] @_cont_WorldToObject4x3(ptr [[TMP62]], ptr [[TMP3]]) -; LOWERRAYTRACINGPIPELINE-NEXT: store [4 x <3 x float>] [[TMP64]], ptr [[TMP12]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP82:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_HITDATA]] [[TMP82]], ptr [[TMP3]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP80:%.*]] = call [4 x <3 x float>] @_cont_WorldToObject4x3(ptr [[TMP78]], ptr [[TMP3]]) +; LOWERRAYTRACINGPIPELINE-NEXT: store [4 x <3 x float>] [[TMP80]], ptr [[TMP12]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[COL_GEP:%.*]] = getelementptr [4 x <3 x float>], ptr [[TMP12]], i32 0, i8 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[COL_GEP_LOAD:%.*]] = load <3 x float>, ptr [[COL_GEP]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[N:%.*]] = extractelement <3 x float> [[COL_GEP_LOAD]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP65:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) -; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_HITDATA]] [[TMP65]], ptr [[TMP7]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP66:%.*]] = call i32 @_cont_HitKind(ptr [[SYSTEM_DATA_ALLOCA]], ptr [[TMP7]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP14]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP78:%.*]] = load i32, ptr [[TMP67]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP78]], ptr addrspace(20) @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP69:%.*]] = getelementptr inbounds i32, ptr [[TMP67]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP82:%.*]] = load i32, ptr [[TMP69]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP82]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, ptr [[TMP69]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP86:%.*]] = load i32, ptr [[TMP71]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP86]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, ptr [[TMP69]], i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP90:%.*]] = load i32, ptr [[TMP73]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP90]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP76:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP75]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP76]]), !continuation.registercount [[META20]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP83:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) +; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_HITDATA]] [[TMP83]], ptr [[TMP7]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP84:%.*]] = call i32 @_cont_HitKind(ptr [[SYSTEM_DATA_ALLOCA]], ptr [[TMP7]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP85:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP14]], i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP86:%.*]] = load i32, ptr [[TMP85]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP86]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP87:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP88:%.*]] = getelementptr inbounds i32, ptr [[TMP85]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP89:%.*]] = load i32, ptr [[TMP88]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP89]], ptr [[TMP87]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP90:%.*]] = getelementptr inbounds i32, ptr [[TMP87]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP92:%.*]] = getelementptr inbounds i32, ptr [[TMP88]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP93:%.*]] = load i32, ptr [[TMP92]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP93]], ptr [[TMP90]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP94:%.*]] = getelementptr inbounds i32, ptr [[TMP87]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP95:%.*]] = getelementptr inbounds i32, ptr [[TMP88]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP96:%.*]] = load i32, ptr [[TMP95]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP96]], ptr [[TMP94]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP97:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP98:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP97]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP81:%.*]] = load [10 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP98]], [17 x i32] poison, [10 x i32] [[TMP81]]), !continuation.registercount [[META20]] ; LOWERRAYTRACINGPIPELINE-NEXT: unreachable ; ; DXILCONTPOSTPROCESS-LABEL: define void @ClosestHit( -; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3:[0-9]+]] !lgc.rt.shaderstage [[META22:![0-9]+]] !continuation [[META23:![0-9]+]] !continuation.registercount [[META20:![0-9]+]] !continuation.state [[META18:![0-9]+]] { +; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]], [17 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) #[[ATTR3:[0-9]+]] !lgc.rt.shaderstage [[META22:![0-9]+]] !continuation [[META23:![0-9]+]] { ; DXILCONTPOSTPROCESS-NEXT: AllocaSpillBB: ; DXILCONTPOSTPROCESS-NEXT: [[TMP1:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 ; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_HITDATA]], align 8 @@ -227,139 +236,151 @@ define void @ClosestHit(%struct.RayPayload* %0, %struct.BuiltInTriangleIntersect ; DXILCONTPOSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; DXILCONTPOSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: store [[STRUCT_SYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA1]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 1 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 2 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 3 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 4 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 6 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 7 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 8 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 9 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 0, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 ; DXILCONTPOSTPROCESS-NEXT: store <3 x i32> [[DOTFCA_0_0_EXTRACT]], ptr [[DOTFCA_0_0_GEP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP15:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[SYSTEM_DATA_ALLOCA]]) -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT14:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP15]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_016_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT14]], i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP16:%.*]] = bitcast float [[DOTSROA_016_0_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_016_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT14]], i32 1 -; DXILCONTPOSTPROCESS-NEXT: [[TMP17:%.*]] = bitcast float [[DOTSROA_016_4_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[SYSTEM_DATA_ALLOCA]]) +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT19:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP11]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_021_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT19]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = bitcast float [[DOTSROA_021_0_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_021_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT19]], i32 1 +; DXILCONTPOSTPROCESS-NEXT: [[TMP13:%.*]] = bitcast float [[DOTSROA_021_4_VEC_EXTRACT]] to i32 ; DXILCONTPOSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; DXILCONTPOSTPROCESS-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA1]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP19:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[TMP18]]) -; DXILCONTPOSTPROCESS-NEXT: [[A:%.*]] = extractelement <3 x i32> [[TMP19]], i8 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA1]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP21:%.*]] = call <3 x i32> @_cont_DispatchRaysDimensions3(ptr [[TMP20]]) -; DXILCONTPOSTPROCESS-NEXT: [[B:%.*]] = extractelement <3 x i32> [[TMP21]], i8 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA1]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP15:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[TMP14]]) +; DXILCONTPOSTPROCESS-NEXT: [[A:%.*]] = extractelement <3 x i32> [[TMP15]], i8 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA1]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP17:%.*]] = call <3 x i32> @_cont_DispatchRaysDimensions3(ptr [[TMP16]]) +; DXILCONTPOSTPROCESS-NEXT: [[B:%.*]] = extractelement <3 x i32> [[TMP17]], i8 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP19:%.*]] = call <3 x float> @_cont_WorldRayOrigin3(ptr [[TMP18]]) +; DXILCONTPOSTPROCESS-NEXT: [[C:%.*]] = extractelement <3 x float> [[TMP19]], i8 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP21:%.*]] = call <3 x float> @_cont_WorldRayDirection3(ptr [[TMP20]]) +; DXILCONTPOSTPROCESS-NEXT: [[D:%.*]] = extractelement <3 x float> [[TMP21]], i8 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP23:%.*]] = call <3 x float> @_cont_WorldRayOrigin3(ptr [[TMP22]]) -; DXILCONTPOSTPROCESS-NEXT: [[C:%.*]] = extractelement <3 x float> [[TMP23]], i8 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP23:%.*]] = call float @_cont_RayTMin(ptr [[TMP22]]) ; DXILCONTPOSTPROCESS-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP25:%.*]] = call <3 x float> @_cont_WorldRayDirection3(ptr [[TMP24]]) -; DXILCONTPOSTPROCESS-NEXT: [[D:%.*]] = extractelement <3 x float> [[TMP25]], i8 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP27:%.*]] = call float @_cont_RayTMin(ptr [[TMP26]]) -; DXILCONTPOSTPROCESS-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP29:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[SYSTEM_DATA_ALLOCA]]) -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT24:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP29]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_GEP25:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP7]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: store float [[DOTFCA_0_EXTRACT24]], ptr [[DOTFCA_0_GEP25]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_EXTRACT26:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP29]], 1 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_GEP27:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP7]], i32 0, i32 1 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[DOTFCA_1_EXTRACT26]], ptr [[DOTFCA_1_GEP27]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP30:%.*]] = call float @_cont_RayTCurrent(ptr [[TMP28]], ptr [[TMP7]]) -; DXILCONTPOSTPROCESS-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP32:%.*]] = call i32 @_cont_RayFlags(ptr [[TMP31]]) -; DXILCONTPOSTPROCESS-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP34:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT40:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP34]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_GEP41:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP3]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: store float [[DOTFCA_0_EXTRACT40]], ptr [[DOTFCA_0_GEP41]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_EXTRACT42:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP34]], 1 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_GEP43:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP3]], i32 0, i32 1 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[DOTFCA_1_EXTRACT42]], ptr [[DOTFCA_1_GEP43]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP35:%.*]] = call i32 @_cont_InstanceIndex(ptr [[TMP33]], ptr [[TMP3]]) -; DXILCONTPOSTPROCESS-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP37:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT36:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP37]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_GEP37:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP4]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: store float [[DOTFCA_0_EXTRACT36]], ptr [[DOTFCA_0_GEP37]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_EXTRACT38:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP37]], 1 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_GEP39:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP4]], i32 0, i32 1 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[DOTFCA_1_EXTRACT38]], ptr [[DOTFCA_1_GEP39]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP38:%.*]] = call i32 @_cont_InstanceID(ptr [[TMP36]], ptr [[TMP4]]) -; DXILCONTPOSTPROCESS-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP40:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT32:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP40]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_GEP33:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP5]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: store float [[DOTFCA_0_EXTRACT32]], ptr [[DOTFCA_0_GEP33]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_EXTRACT34:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP40]], 1 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_GEP35:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP5]], i32 0, i32 1 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[DOTFCA_1_EXTRACT34]], ptr [[DOTFCA_1_GEP35]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP41:%.*]] = call i32 @_cont_PrimitiveIndex(ptr [[TMP39]], ptr [[TMP5]]) -; DXILCONTPOSTPROCESS-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP43:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT20:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP43]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_GEP21:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP8]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: store float [[DOTFCA_0_EXTRACT20]], ptr [[DOTFCA_0_GEP21]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_EXTRACT22:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP43]], 1 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_GEP23:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP8]], i32 0, i32 1 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[DOTFCA_1_EXTRACT22]], ptr [[DOTFCA_1_GEP23]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP44:%.*]] = call <3 x float> @_cont_ObjectRayOrigin3(ptr [[TMP42]], ptr [[TMP8]]) -; DXILCONTPOSTPROCESS-NEXT: [[K:%.*]] = extractelement <3 x float> [[TMP44]], i8 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP46:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT17:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP46]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_GEP18:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP9]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: store float [[DOTFCA_0_EXTRACT17]], ptr [[DOTFCA_0_GEP18]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_EXTRACT19:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP46]], 1 +; DXILCONTPOSTPROCESS-NEXT: [[TMP25:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[SYSTEM_DATA_ALLOCA]]) +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT29:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP25]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_GEP30:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP7]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-NEXT: store float [[DOTFCA_0_EXTRACT29]], ptr [[DOTFCA_0_GEP30]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_EXTRACT31:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP25]], 1 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_GEP32:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP7]], i32 0, i32 1 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[DOTFCA_1_EXTRACT31]], ptr [[DOTFCA_1_GEP32]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP26:%.*]] = call float @_cont_RayTCurrent(ptr [[TMP24]], ptr [[TMP7]]) +; DXILCONTPOSTPROCESS-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP28:%.*]] = call i32 @_cont_RayFlags(ptr [[TMP27]]) +; DXILCONTPOSTPROCESS-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP30:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT45:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP30]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_GEP46:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP3]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-NEXT: store float [[DOTFCA_0_EXTRACT45]], ptr [[DOTFCA_0_GEP46]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_EXTRACT47:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP30]], 1 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_GEP48:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP3]], i32 0, i32 1 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[DOTFCA_1_EXTRACT47]], ptr [[DOTFCA_1_GEP48]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP31:%.*]] = call i32 @_cont_InstanceIndex(ptr [[TMP29]], ptr [[TMP3]]) +; DXILCONTPOSTPROCESS-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP33:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT41:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP33]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_GEP42:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP4]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-NEXT: store float [[DOTFCA_0_EXTRACT41]], ptr [[DOTFCA_0_GEP42]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_EXTRACT43:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP33]], 1 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_GEP44:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP4]], i32 0, i32 1 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[DOTFCA_1_EXTRACT43]], ptr [[DOTFCA_1_GEP44]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP34:%.*]] = call i32 @_cont_InstanceID(ptr [[TMP32]], ptr [[TMP4]]) +; DXILCONTPOSTPROCESS-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP36:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT37:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP36]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_GEP38:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP5]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-NEXT: store float [[DOTFCA_0_EXTRACT37]], ptr [[DOTFCA_0_GEP38]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_EXTRACT39:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP36]], 1 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_GEP40:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP5]], i32 0, i32 1 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[DOTFCA_1_EXTRACT39]], ptr [[DOTFCA_1_GEP40]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP37:%.*]] = call i32 @_cont_PrimitiveIndex(ptr [[TMP35]], ptr [[TMP5]]) +; DXILCONTPOSTPROCESS-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP39:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT25:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP39]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_GEP26:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP8]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-NEXT: store float [[DOTFCA_0_EXTRACT25]], ptr [[DOTFCA_0_GEP26]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_EXTRACT27:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP39]], 1 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_GEP28:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP8]], i32 0, i32 1 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[DOTFCA_1_EXTRACT27]], ptr [[DOTFCA_1_GEP28]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP40:%.*]] = call <3 x float> @_cont_ObjectRayOrigin3(ptr [[TMP38]], ptr [[TMP8]]) +; DXILCONTPOSTPROCESS-NEXT: [[K:%.*]] = extractelement <3 x float> [[TMP40]], i8 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP42:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT22:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP42]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_GEP23:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP9]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-NEXT: store float [[DOTFCA_0_EXTRACT22]], ptr [[DOTFCA_0_GEP23]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_EXTRACT24:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP42]], 1 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP9]], i32 0, i32 1 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[DOTFCA_1_EXTRACT19]], ptr [[DOTFCA_1_GEP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP47:%.*]] = call <3 x float> @_cont_ObjectRayDirection3(ptr [[TMP45]], ptr [[TMP9]]) -; DXILCONTPOSTPROCESS-NEXT: [[L:%.*]] = extractelement <3 x float> [[TMP47]], i8 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP49:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT48:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP49]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_GEP49:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP1]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: store float [[DOTFCA_0_EXTRACT48]], ptr [[DOTFCA_0_GEP49]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_EXTRACT50:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP49]], 1 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_GEP51:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP1]], i32 0, i32 1 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[DOTFCA_1_EXTRACT50]], ptr [[DOTFCA_1_GEP51]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP50:%.*]] = call [4 x <3 x float>] @_cont_ObjectToWorld4x3(ptr [[TMP48]], ptr [[TMP1]]) -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [4 x <3 x float>] [[TMP50]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [4 x <3 x float>] [[TMP50]], 1 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_2_EXTRACT:%.*]] = extractvalue [4 x <3 x float>] [[TMP50]], 2 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_EXTRACT:%.*]] = extractvalue [4 x <3 x float>] [[TMP50]], 3 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[DOTFCA_1_EXTRACT24]], ptr [[DOTFCA_1_GEP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP43:%.*]] = call <3 x float> @_cont_ObjectRayDirection3(ptr [[TMP41]], ptr [[TMP9]]) +; DXILCONTPOSTPROCESS-NEXT: [[L:%.*]] = extractelement <3 x float> [[TMP43]], i8 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP45:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT53:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP45]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_GEP54:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP1]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-NEXT: store float [[DOTFCA_0_EXTRACT53]], ptr [[DOTFCA_0_GEP54]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_EXTRACT55:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP45]], 1 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_GEP56:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP1]], i32 0, i32 1 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[DOTFCA_1_EXTRACT55]], ptr [[DOTFCA_1_GEP56]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP46:%.*]] = call [4 x <3 x float>] @_cont_ObjectToWorld4x3(ptr [[TMP44]], ptr [[TMP1]]) +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [4 x <3 x float>] [[TMP46]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [4 x <3 x float>] [[TMP46]], 1 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_2_EXTRACT:%.*]] = extractvalue [4 x <3 x float>] [[TMP46]], 2 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_EXTRACT:%.*]] = extractvalue [4 x <3 x float>] [[TMP46]], 3 ; DXILCONTPOSTPROCESS-NEXT: [[M:%.*]] = extractelement <3 x float> [[DOTFCA_0_EXTRACT]], i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP52:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT44:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP52]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_GEP45:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP2]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: store float [[DOTFCA_0_EXTRACT44]], ptr [[DOTFCA_0_GEP45]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_EXTRACT46:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP52]], 1 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_GEP47:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP2]], i32 0, i32 1 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[DOTFCA_1_EXTRACT46]], ptr [[DOTFCA_1_GEP47]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP53:%.*]] = call [4 x <3 x float>] @_cont_WorldToObject4x3(ptr [[TMP51]], ptr [[TMP2]]) -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT5:%.*]] = extractvalue [4 x <3 x float>] [[TMP53]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_EXTRACT7:%.*]] = extractvalue [4 x <3 x float>] [[TMP53]], 1 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_2_EXTRACT8:%.*]] = extractvalue [4 x <3 x float>] [[TMP53]], 2 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_EXTRACT9:%.*]] = extractvalue [4 x <3 x float>] [[TMP53]], 3 -; DXILCONTPOSTPROCESS-NEXT: [[N:%.*]] = extractelement <3 x float> [[DOTFCA_0_EXTRACT5]], i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP54:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT28:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP54]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_GEP29:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP6]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: store float [[DOTFCA_0_EXTRACT28]], ptr [[DOTFCA_0_GEP29]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_EXTRACT30:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP54]], 1 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_GEP31:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP6]], i32 0, i32 1 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[DOTFCA_1_EXTRACT30]], ptr [[DOTFCA_1_GEP31]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP55:%.*]] = call i32 @_cont_HitKind(ptr [[SYSTEM_DATA_ALLOCA]], ptr [[TMP6]]) -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP11]], ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP12]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP13]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP14]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP56]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP48:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT49:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP48]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_GEP50:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP2]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-NEXT: store float [[DOTFCA_0_EXTRACT49]], ptr [[DOTFCA_0_GEP50]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_EXTRACT51:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP48]], 1 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_GEP52:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP2]], i32 0, i32 1 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[DOTFCA_1_EXTRACT51]], ptr [[DOTFCA_1_GEP52]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP49:%.*]] = call [4 x <3 x float>] @_cont_WorldToObject4x3(ptr [[TMP47]], ptr [[TMP2]]) +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT10:%.*]] = extractvalue [4 x <3 x float>] [[TMP49]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_EXTRACT12:%.*]] = extractvalue [4 x <3 x float>] [[TMP49]], 1 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_2_EXTRACT13:%.*]] = extractvalue [4 x <3 x float>] [[TMP49]], 2 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_EXTRACT14:%.*]] = extractvalue [4 x <3 x float>] [[TMP49]], 3 +; DXILCONTPOSTPROCESS-NEXT: [[N:%.*]] = extractelement <3 x float> [[DOTFCA_0_EXTRACT10]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP50:%.*]] = call [[STRUCT_HITDATA]] [[_CONT_GETCOMMITTEDSTATE]](ptr [[SYSTEM_DATA_ALLOCA]]) +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT33:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP50]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_GEP34:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP6]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-NEXT: store float [[DOTFCA_0_EXTRACT33]], ptr [[DOTFCA_0_GEP34]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_EXTRACT35:%.*]] = extractvalue [[STRUCT_HITDATA]] [[TMP50]], 1 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_GEP36:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP6]], i32 0, i32 1 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[DOTFCA_1_EXTRACT35]], ptr [[DOTFCA_1_GEP36]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP51:%.*]] = call i32 @_cont_HitKind(ptr [[SYSTEM_DATA_ALLOCA]], ptr [[TMP6]]) +; DXILCONTPOSTPROCESS-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP52]], i32 0, i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_LOAD:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_GEP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_LOAD]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT1:%.*]] = insertvalue [10 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT1]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 ; DXILCONTPOSTPROCESS-NEXT: [[TMP57:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP57]], i64 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META20]] +; DXILCONTPOSTPROCESS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR]], i32 [[TMP57]], i64 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]], [17 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]) ; DXILCONTPOSTPROCESS-NEXT: unreachable ; %a = call i32 @dx.op.dispatchRaysIndex.i32(i32 145, i8 0) @@ -457,24 +478,25 @@ attributes #3 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="fa !16 = !{void (%struct.RayPayload*, %struct.BuiltInTriangleIntersectionAttributes*)* @ClosestHit, !"ClosestHit", null, null, !17} !17 = !{i32 8, i32 10, i32 5, !18} !18 = !{i32 0} -!19 = !{!"function", %struct.BuiltInTriangleIntersectionAttributes poison, !20} +!19 = !{%struct.SystemData poison} !20 = !{i32 0, %struct.SystemData poison} -!21 = !{!"function", !"void", !20, %struct.BuiltInTriangleIntersectionAttributes poison} -!22 = !{!"function", %struct.HitData poison, !23} +!21 = !{%struct.SystemData poison} +!22 = !{%struct.AnyHitTraversalData poison} !23 = !{i32 0, %struct.AnyHitTraversalData poison} -!24 = !{!"function", %struct.HitData poison, !20} -!25 = !{!"function", i32 poison, !26} +!24 = !{%struct.SystemData poison} +!25 = !{%struct.DispatchSystemData poison} !26 = !{i32 0, %struct.DispatchSystemData poison} -!27 = !{!"function", !"void", !26, i64 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison} -!28 = !{!"function", <3 x i32> poison, !26} -!29 = !{!"function", <3 x float> poison, !26} -!30 = !{!"function", float poison, !26} -!31 = !{!"function", float poison, !26, !32} +!27 = !{%struct.DispatchSystemData poison} +!28 = !{%struct.DispatchSystemData poison} +!29 = !{%struct.DispatchSystemData poison} +!30 = !{%struct.DispatchSystemData poison} +!31 = !{null, %struct.DispatchSystemData poison, %struct.HitData poison} !32 = !{i32 0, %struct.HitData poison} -!33 = !{!"function", i32 poison, !26, !32} -!34 = !{!"function", <3 x float> poison, !26, !32} -!35 = !{!"function", [4 x <3 x float>] poison, !26, !32} -!36 = !{!"function", i32 poison, !20, !32} -!37 = !{!"function", !"void", !38, !39} +!33 = !{null, %struct.DispatchSystemData poison, %struct.HitData poison} +!34 = !{null, %struct.DispatchSystemData poison, %struct.HitData poison} +!35 = !{null, %struct.DispatchSystemData poison, %struct.HitData poison} +!36 = !{null, %struct.SystemData poison, %struct.HitData poison} +!37 = !{null, %struct.RayPayload poison, %struct.BuiltInTriangleIntersectionAttributes poison} !38 = !{i32 0, %struct.RayPayload poison} !39 = !{i32 0, %struct.BuiltInTriangleIntersectionAttributes poison} +!40 = !{%struct.AnyHitTraversalData poison} diff --git a/llvmraytracing/test/dx/lower-rt-pipeline-large-payload.ll b/llvmraytracing/test/dx/lower-rt-pipeline-large-payload.ll index 858295ca4d..b15886a69e 100644 --- a/llvmraytracing/test/dx/lower-rt-pipeline-large-payload.ll +++ b/llvmraytracing/test/dx/lower-rt-pipeline-large-payload.ll @@ -6,7 +6,7 @@ ; RUN: grep -v lgc.cps.module %s | opt --verify-each -passes="dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,remove-types-metadata" -S --lint-abort-on-error | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE %s ; RUN: grep -v lgc.cps.module %s | opt --verify-each -passes="dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,remove-types-metadata" -S --lint-abort-on-error | FileCheck -check-prefix=CLEANUP %s ; RUN: opt --verify-each -passes="dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,dxil-cleanup-continuations,lint,remove-types-metadata" -S %s --lint-abort-on-error | FileCheck -check-prefix=CLEANUP-CPS %s -; RUN: grep -v lgc.cps.module %s | opt --verify-each -passes="dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata" -S --lint-abort-on-error | FileCheck -check-prefix=DXILCONTPOSTPROCESS %s +; RUN: grep -v lgc.cps.module %s | opt --verify-each -passes="dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,dxil-cont-post-process,lint,remove-types-metadata" -S --lint-abort-on-error | FileCheck -check-prefix=DXILCONTPOSTPROCESS %s target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:16-i32:32-i64:32-f16:16-f32:32-f64:32-v8:8-v16:16-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" @@ -27,18 +27,18 @@ target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16: @"\01?Scene@@3URaytracingAccelerationStructure@@A" = external constant %dx.types.Handle, align 4 ; Need _cont_ReportHit to get system data type -declare !types !206 i1 @_cont_ReportHit(%struct.AnyHitTraversalData* %data, float %t, i32 %hitKind) +declare !pointeetys !206 i1 @_cont_ReportHit(%struct.AnyHitTraversalData* %data, float %t, i32 %hitKind) ; Function Attrs: nounwind declare i64 @_AmdGetResumePointAddr() #3 declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #3 declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) #4 -declare !types !200 void @dx.op.traceRay.struct.SmallPayload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.SmallPayload*) -declare !types !201 void @dx.op.traceRay.struct.MediumPayload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.MediumPayload*) -declare !types !202 void @dx.op.traceRay.struct.LargePayload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.LargePayload*) +declare !pointeetys !200 void @dx.op.traceRay.struct.SmallPayload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.SmallPayload*) +declare !pointeetys !201 void @dx.op.traceRay.struct.MediumPayload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.MediumPayload*) +declare !pointeetys !202 void @dx.op.traceRay.struct.LargePayload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.LargePayload*) -define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, float %6, float %7, float %8, float %9, float %10, float %11, float %12, float %13) #1 !types !203 { +define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, float %6, float %7, float %8, float %9, float %10, float %11, float %12, float %13) #1 !pointeetys !203 { %dis_data = load %struct.DispatchSystemData, %struct.DispatchSystemData* %data, align 4 %sys_data = insertvalue %struct.SystemData undef, %struct.DispatchSystemData %dis_data, 0 %trav_data = insertvalue %struct.TraversalData undef, %struct.SystemData %sys_data, 0 @@ -50,7 +50,7 @@ define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i ret void } -define void @Miss(%struct.SmallPayload* noalias nocapture %outerpayload) !types !204 { +define void @Miss(%struct.SmallPayload* noalias nocapture %outerpayload) !pointeetys !204 { %p1 = alloca %struct.SmallPayload %p2 = alloca %struct.MediumPayload %p3 = alloca %struct.LargePayload @@ -70,9 +70,6 @@ define void @Miss(%struct.SmallPayload* noalias nocapture %outerpayload) !types ret void } -; Function Attrs: alwaysinline -declare %struct.DispatchSystemData @_cont_SetupRayGen() #1 - ; Function Attrs: alwaysinline declare %struct.DispatchSystemData @_AmdWaitAwaitTraversal(i64, i64, %struct.TraversalData) #1 @@ -83,25 +80,25 @@ declare %struct.DispatchSystemData @_AmdAwaitShader(i64, %struct.DispatchSystemD declare %struct.AnyHitTraversalData @_AmdAwaitAnyHit(i64, %struct.AnyHitTraversalData, float, i32) #1 ; Function Attrs: alwaysinline -declare !types !19 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData*) #1 +declare !pointeetys !19 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData*) #1 ; Function Attrs: alwaysinline -declare !types !21 void @_cont_SetTriangleHitAttributes(%struct.SystemData*, %struct.BuiltInTriangleIntersectionAttributes) #1 +declare !pointeetys !21 void @_cont_SetTriangleHitAttributes(%struct.SystemData*, %struct.BuiltInTriangleIntersectionAttributes) #1 ; Function Attrs: alwaysinline -declare !types !22 i1 @_cont_IsEndSearch(%struct.TraversalData*) #1 +declare !pointeetys !22 i1 @_cont_IsEndSearch(%struct.TraversalData*) #1 ; Function Attrs: nounwind memory(read) -declare !types !24 i32 @_cont_HitKind(%struct.SystemData* nocapture readnone, %struct.HitData*) #2 +declare !pointeetys !24 i32 @_cont_HitKind(%struct.SystemData* nocapture readnone, %struct.HitData*) #2 ; Function Attrs: nounwind memory(none) -declare !types !26 void @_AmdRestoreSystemData(%struct.DispatchSystemData*) #3 +declare !pointeetys !26 void @_AmdRestoreSystemData(%struct.DispatchSystemData*) #3 ; Function Attrs: nounwind memory(none) -declare !types !28 void @_AmdRestoreSystemDataAnyHit(%struct.AnyHitTraversalData*) #3 +declare !pointeetys !28 void @_AmdRestoreSystemDataAnyHit(%struct.AnyHitTraversalData*) #3 ; Function Attrs: alwaysinline -define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) #1 !types !30 { +define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) #1 !pointeetys !30 { ret i32 5 } @@ -136,18 +133,18 @@ attributes #3 = { nounwind memory(none) } !14 = !{void (%struct.SmallPayload*)* @Miss, !"Miss", null, null, !15} !15 = !{i32 8, i32 11, i32 6, i32 24, i32 5, !16} !16 = !{i32 0} -!19 = !{!"function", %struct.BuiltInTriangleIntersectionAttributes poison, !20} +!19 = !{%struct.SystemData poison} !20 = !{i32 0, %struct.SystemData poison} -!21 = !{!"function", !"void", !20, %struct.BuiltInTriangleIntersectionAttributes poison} -!22 = !{!"function", i1 poison, !23} +!21 = !{%struct.SystemData poison} +!22 = !{%struct.TraversalData poison} !23 = !{i32 0, %struct.TraversalData poison} -!24 = !{!"function", i32 poison, !20, !25} +!24 = !{null, %struct.SystemData poison, %struct.HitData poison} !25 = !{i32 0, %struct.HitData poison} -!26 = !{!"function", !"void", !27} +!26 = !{%struct.DispatchSystemData poison} !27 = !{i32 0, %struct.DispatchSystemData poison} -!28 = !{!"function", !"void", !29} +!28 = !{%struct.AnyHitTraversalData poison} !29 = !{i32 0, %struct.AnyHitTraversalData poison} -!30 = !{!"function", i32 poison, !27} +!30 = !{%struct.DispatchSystemData poison} !31 = !{i32 2} !32 = !{i32 8} @@ -155,25 +152,27 @@ attributes #3 = { nounwind memory(none) } !101 = !{i32 0, %struct.MediumPayload poison} !102 = !{i32 0, %struct.LargePayload poison} !103 = !{i32 0, %struct.DispatchSystemData poison} -!200 = !{!"function", !"void", i32 poison, %dx.types.Handle poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, !100} -!201 = !{!"function", !"void", i32 poison, %dx.types.Handle poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, !101} -!202 = !{!"function", !"void", i32 poison, %dx.types.Handle poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, !102} -!203 = !{!"function", !"void", !103, i64 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison} -!204 = !{!"function", !"void", !100} +!200 = !{%struct.SmallPayload poison} +!201 = !{%struct.MediumPayload poison} +!202 = !{%struct.LargePayload poison} +!203 = !{%struct.DispatchSystemData poison} +!204 = !{%struct.SmallPayload poison} !205 = !{i32 0, %struct.AnyHitTraversalData poison} -!206 = !{!"function", i1 poison, !205, float poison, i32 poison} +!206 = !{%struct.AnyHitTraversalData poison} ; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.DispatchSystemData @Miss( -; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META16:![0-9]+]] !continuation.registercount [[META17:![0-9]+]] !continuation [[META18:![0-9]+]] !continuation.stacksize [[META19:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]], [27 x i32] [[PADDING:%.*]], [1 x i32] [[PAYLOAD:%.*]]) !lgc.rt.shaderstage [[META16:![0-9]+]] !continuation.registercount [[META17:![0-9]+]] !continuation [[META18:![0-9]+]] !continuation.stacksize [[META19:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[P1:%.*]] = alloca [[STRUCT_SMALLPAYLOAD:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[P2:%.*]] = alloca [[STRUCT_MEDIUMPAYLOAD:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[P3:%.*]] = alloca [[STRUCT_LARGEPAYLOAD:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[PAYLOAD_SPILL_ALLOCA:%.*]] = alloca [4 x i32], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [6 x i32], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_SMALLPAYLOAD]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: store [1 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_SYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_SMALLPAYLOAD]], ptr [[TMP2]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_SMALLPAYLOAD]] zeroinitializer, ptr [[P1]], align 4 @@ -191,13 +190,17 @@ attributes #3 = { nounwind memory(none) } ; LOWERRAYTRACINGPIPELINE-NEXT: [[TRAV_DATA2_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], i64 [[ADDR_I]], 5 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_SMALLPAYLOAD]], ptr [[P1]], i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP9]], ptr addrspace(20) @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = call ptr inttoptr (i64 4 to ptr)(i64 -1, i64 poison, [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]]), !continuation.registercount [[META17]], !continuation.wait.await [[META6:![0-9]+]], !continuation.returnedRegistercount [[META17]] -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] [[AWAIT:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP10]]) +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP9]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = load [1 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = call ptr inttoptr (i64 4 to ptr)(i64 -1, i64 poison, [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]], [10 x i32] poison, [1 x i32] [[TMP10]]), !continuation.registercount [[META17]], !continuation.wait.await [[META6:![0-9]+]], !continuation.returnedRegistercount [[META17]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = call { [[STRUCT_DISPATCHSYSTEMDATA]], [27 x i32], [1 x i32] } @await(ptr [[TMP19]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [27 x i32], [1 x i32] } [[TMP25]], 2 +; LOWERRAYTRACINGPIPELINE-NEXT: store [1 x i32] [[TMP13]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_SMALLPAYLOAD]] poison, ptr [[P1]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_SMALLPAYLOAD]], ptr [[P1]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP15]], ptr [[TMP12]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [27 x i32], [1 x i32] } [[TMP25]], 0 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP11]], ptr [[TMP7]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; LOWERRAYTRACINGPIPELINE-NEXT: br label [[DOTSPLIT10:%.*]] @@ -210,11 +213,12 @@ attributes #3 = { nounwind memory(none) } ; LOWERRAYTRACINGPIPELINE-NEXT: [[ADDR_I4:%.*]] = call i64 @_AmdGetResumePointAddr() #[[ATTR0]] ; LOWERRAYTRACINGPIPELINE-NEXT: [[TRAV_DATA2_I5:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I3]], i64 [[ADDR_I4]], 5 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP33:%.*]] = ptrtoint ptr [[PAYLOAD_SPILL_ALLOCA]] to i32 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP33]], ptr addrspace(20) @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = load ptr addrspace(32), ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP33]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = load ptr addrspace(32), ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_MEDIUMPAYLOAD]], ptr [[P2]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP19]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 1), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP18]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP34]], ptr [[TMP37]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP21]], ptr addrspace(32) [[TMP17]], align 4 @@ -222,13 +226,17 @@ attributes #3 = { nounwind memory(none) } ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP24]], ptr addrspace(32) [[TMP22]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = call ptr inttoptr (i64 4 to ptr)(i64 -1, i64 poison, [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I5]]), !continuation.registercount [[META13:![0-9]+]], !continuation.wait.await [[META6]], !continuation.returnedRegistercount [[META13]] -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] [[AWAIT_1:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP25]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP29:%.*]] = load [2 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP41:%.*]] = call ptr inttoptr (i64 4 to ptr)(i64 -1, i64 poison, [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I5]], [10 x i32] poison, [2 x i32] [[TMP29]]), !continuation.registercount [[META13:![0-9]+]], !continuation.wait.await [[META6]], !continuation.returnedRegistercount [[META13]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP44:%.*]] = call { [[STRUCT_DISPATCHSYSTEMDATA]], [27 x i32], [2 x i32] } @await.1(ptr [[TMP41]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP60:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [27 x i32], [2 x i32] } [[TMP44]], 2 +; LOWERRAYTRACINGPIPELINE-NEXT: store [2 x i32] [[TMP60]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_MEDIUMPAYLOAD]] poison, ptr [[P2]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = load ptr addrspace(32), ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = load ptr addrspace(32), ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_MEDIUMPAYLOAD]], ptr [[P2]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP29:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 1), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP36]], ptr [[TMP28]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP28]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP31:%.*]] = load i32, ptr addrspace(32) [[TMP27]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 4 @@ -236,7 +244,8 @@ attributes #3 = { nounwind memory(none) } ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP42:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP27]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP43:%.*]] = load i32, ptr addrspace(32) [[TMP42]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP43]], ptr [[TMP32]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP44:%.*]] = load ptr addrspace(32), ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP61:%.*]] = load ptr addrspace(32), ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [27 x i32], [2 x i32] } [[TMP44]], 0 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP26]], ptr [[TMP16]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; LOWERRAYTRACINGPIPELINE-NEXT: br label [[DOTSPLIT9:%.*]] @@ -249,11 +258,12 @@ attributes #3 = { nounwind memory(none) } ; LOWERRAYTRACINGPIPELINE-NEXT: [[ADDR_I9:%.*]] = call i64 @_AmdGetResumePointAddr() #[[ATTR0]] ; LOWERRAYTRACINGPIPELINE-NEXT: [[TRAV_DATA2_I10:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I7]], i64 [[ADDR_I9]], 5 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP38:%.*]] = ptrtoint ptr [[PAYLOAD_SPILL_ALLOCA]] to i32 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP38]], ptr addrspace(20) @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP39:%.*]] = load ptr addrspace(32), ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP38]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP39:%.*]] = load ptr addrspace(32), ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_LARGEPAYLOAD]], ptr [[P3]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP41]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 1), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP58:%.*]] = load i32, ptr [[TMP40]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP58]], ptr [[TMP66]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP54:%.*]] = getelementptr inbounds i32, ptr [[TMP40]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP55:%.*]] = load i32, ptr [[TMP54]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP55]], ptr addrspace(32) [[TMP39]], align 4 @@ -269,13 +279,17 @@ attributes #3 = { nounwind memory(none) } ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, ptr [[TMP54]], i32 3 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP51]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP52]], ptr addrspace(32) [[TMP50]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP58:%.*]] = call ptr inttoptr (i64 4 to ptr)(i64 -1, i64 poison, [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I10]]), !continuation.registercount [[META13]], !continuation.wait.await [[META6]], !continuation.returnedRegistercount [[META13]] -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP75:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] [[AWAIT_2:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP58]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP62:%.*]] = load [2 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP63:%.*]] = call ptr inttoptr (i64 4 to ptr)(i64 -1, i64 poison, [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I10]], [10 x i32] poison, [2 x i32] [[TMP62]]), !continuation.registercount [[META13]], !continuation.wait.await [[META6]], !continuation.returnedRegistercount [[META13]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP64:%.*]] = call { [[STRUCT_DISPATCHSYSTEMDATA]], [27 x i32], [2 x i32] } @await.2(ptr [[TMP63]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP65:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [27 x i32], [2 x i32] } [[TMP64]], 2 +; LOWERRAYTRACINGPIPELINE-NEXT: store [2 x i32] [[TMP65]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_LARGEPAYLOAD]] poison, ptr [[P3]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP76:%.*]] = load ptr addrspace(32), ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP76:%.*]] = load ptr addrspace(32), ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT_LARGEPAYLOAD]], ptr [[P3]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP81:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 1), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP81]], ptr [[TMP77]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP69:%.*]] = load i32, ptr [[TMP68]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP69]], ptr [[TMP77]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP82:%.*]] = getelementptr inbounds i32, ptr [[TMP77]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP59:%.*]] = load i32, ptr addrspace(32) [[TMP76]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP59]], ptr [[TMP82]], align 4 @@ -291,17 +305,19 @@ attributes #3 = { nounwind memory(none) } ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP93:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP76]], i32 3 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP94:%.*]] = load i32, ptr addrspace(32) [[TMP93]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP94]], ptr [[TMP92]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP95:%.*]] = load ptr addrspace(32), ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP81:%.*]] = load ptr addrspace(32), ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP75:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [27 x i32], [2 x i32] } [[TMP64]], 0 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP75]], ptr [[TMP53]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; LOWERRAYTRACINGPIPELINE-NEXT: br label [[DOTSPLIT:%.*]] ; LOWERRAYTRACINGPIPELINE: .split: ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT_SMALLPAYLOAD]], ptr [[TMP2]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP99:%.*]] = load i32, ptr [[TMP70]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP99]], ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP84:%.*]] = load i32, ptr [[TMP70]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP84]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP100:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP101:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP100]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP101]]), !continuation.registercount [[META17]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP95:%.*]] = load [1 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP101]], [27 x i32] poison, [1 x i32] [[TMP95]]), !continuation.registercount [[META17]] ; LOWERRAYTRACINGPIPELINE-NEXT: unreachable ; ; @@ -311,73 +327,78 @@ attributes #3 = { nounwind memory(none) } ; ; ; CLEANUP-LABEL: define void @Miss( -; CLEANUP-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META16:![0-9]+]] !continuation.registercount [[META17:![0-9]+]] !continuation [[META18:![0-9]+]] !continuation.stacksize [[META19:![0-9]+]] !continuation.state [[META20:![0-9]+]] { +; CLEANUP-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]], [27 x i32] [[PADDING:%.*]], [1 x i32] [[PAYLOAD:%.*]]) !lgc.rt.shaderstage [[META16:![0-9]+]] !continuation.registercount [[META17:![0-9]+]] !continuation [[META18:![0-9]+]] !continuation.stacksize [[META19:![0-9]+]] !continuation.state [[META20:![0-9]+]] { ; CLEANUP-NEXT: AllocaSpillBB: ; CLEANUP-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 28) ; CLEANUP-NEXT: [[PAYLOAD_SPILL_ALLOCA:%.*]] = getelementptr inbounds [[MISS_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 ; CLEANUP-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[MISS_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 1 ; CLEANUP-NEXT: store i64 [[RETURNADDR]], ptr addrspace(32) [[RETURNADDR_SPILL_ADDR]], align 4 +; CLEANUP-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [1 x i32] [[PAYLOAD]], 0 +; CLEANUP-NEXT: [[PAYLOAD_FCA_0_EXTRACT_SPILL_ADDR:%.*]] = getelementptr inbounds [[MISS_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 2 +; CLEANUP-NEXT: store i32 [[PAYLOAD_FCA_0_EXTRACT]], ptr addrspace(32) [[PAYLOAD_FCA_0_EXTRACT_SPILL_ADDR]], align 4 ; CLEANUP-NEXT: [[DOTFCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 0, 0 -; CLEANUP-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 -; CLEANUP-NEXT: [[DOTSPILL_ADDR:%.*]] = getelementptr inbounds [[MISS_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 2 -; CLEANUP-NEXT: store i32 [[TMP1]], ptr addrspace(32) [[DOTSPILL_ADDR]], align 4 ; CLEANUP-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; CLEANUP-NEXT: [[T1:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 ; CLEANUP-NEXT: [[T2:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[T1]]) ; CLEANUP-NEXT: [[T3:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[T2]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) -; CLEANUP-NEXT: [[TMP2:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[T3]]) +; CLEANUP-NEXT: [[TMP1:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[T3]]) ; CLEANUP-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison, <3 x i32> [[DOTFCA_0_0_EXTRACT]], 0 ; CLEANUP-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]], 0 ; CLEANUP-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 ; CLEANUP-NEXT: [[ADDR_I:%.*]] = call i64 @_AmdGetResumePointAddr() #[[ATTR0:[0-9]+]] ; CLEANUP-NEXT: [[TRAV_DATA2_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], i64 [[ADDR_I]], 5 -; CLEANUP-NEXT: store i32 0, ptr addrspace(20) @PAYLOAD, align 4 +; CLEANUP-NEXT: [[DOTFCA_0_INSERT15:%.*]] = insertvalue [1 x i32] poison, i32 0, 0 ; CLEANUP-NEXT: [[TMP3:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @Miss.resume.0) -; CLEANUP-NEXT: call void (i64, i64, ...) @continuation.waitContinue(i64 4, i64 -1, i64 [[TMP3]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]]), !continuation.registercount [[META17]], !continuation.returnedRegistercount [[META17]] +; CLEANUP-NEXT: call void (...) @lgc.cps.jump(i64 4, i32 -1, {} poison, i64 [[TMP3]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]], [10 x i32] poison, [1 x i32] [[DOTFCA_0_INSERT15]]), !continuation.registercount [[META17]], !continuation.returnedRegistercount [[META17]], !waitmask [[META21:![0-9]+]] ; CLEANUP-NEXT: unreachable ; ; ; CLEANUP-LABEL: define dso_local void @Miss.resume.0( -; CLEANUP-SAME: i64 [[TMP0:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP1:%.*]]) !lgc.rt.shaderstage [[META16]] !continuation.registercount [[META17]] !continuation [[META18]] { +; CLEANUP-SAME: i64 [[TMP0:%.*]], { [[STRUCT_DISPATCHSYSTEMDATA:%.*]], [27 x i32], [1 x i32] } [[TMP1:%.*]]) !lgc.rt.shaderstage [[META16]] !continuation.registercount [[META17]] !continuation [[META18]] { ; CLEANUP-NEXT: entryresume.0: ; CLEANUP-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 28) ; CLEANUP-NEXT: [[PAYLOAD_SPILL_ALLOCA:%.*]] = getelementptr inbounds [[MISS_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 -; CLEANUP-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 -; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP1]], 0 +; CLEANUP-NEXT: [[TMP2:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [27 x i32], [1 x i32] } [[TMP1]], 2 +; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [1 x i32] [[TMP2]], 0 +; CLEANUP-NEXT: [[TMP7:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [27 x i32], [1 x i32] } [[TMP1]], 0 +; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT42:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP7]], 0 ; CLEANUP-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; CLEANUP-NEXT: [[T110:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 ; CLEANUP-NEXT: [[T29:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[T110]]) ; CLEANUP-NEXT: [[T38:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[T29]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) -; CLEANUP-NEXT: [[TMP2:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[T38]]) -; CLEANUP-NEXT: [[DIS_DATA_I1_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT]], 0 +; CLEANUP-NEXT: [[TMP3:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[T38]]) +; CLEANUP-NEXT: [[DIS_DATA_I1_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT42]], 0 ; CLEANUP-NEXT: [[SYS_DATA_I2:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I1_FCA_0_INSERT]], 0 ; CLEANUP-NEXT: [[TRAV_DATA_I3:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I2]], 0 ; CLEANUP-NEXT: [[ADDR_I4:%.*]] = call i64 @_AmdGetResumePointAddr() #[[ATTR0]] ; CLEANUP-NEXT: [[TRAV_DATA2_I5:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I3]], i64 [[ADDR_I4]], 5 -; CLEANUP-NEXT: [[TMP3:%.*]] = ptrtoint ptr addrspace(32) [[PAYLOAD_SPILL_ALLOCA]] to i32 -; CLEANUP-NEXT: store i32 [[TMP3]], ptr addrspace(20) @PAYLOAD, align 4 -; CLEANUP-NEXT: [[TMP4:%.*]] = load ptr addrspace(32), ptr addrspace(20) @PAYLOAD, align 4 -; CLEANUP-NEXT: store i32 0, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 1), align 4 +; CLEANUP-NEXT: [[TMP5:%.*]] = ptrtoint ptr addrspace(32) [[PAYLOAD_SPILL_ALLOCA]] to i32 +; CLEANUP-NEXT: [[TMP4:%.*]] = inttoptr i32 [[TMP5]] to ptr addrspace(32) ; CLEANUP-NEXT: store i32 0, ptr addrspace(32) [[TMP4]], align 4 ; CLEANUP-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP4]], i32 1 ; CLEANUP-NEXT: store i32 0, ptr addrspace(32) [[TMP6]], align 4 -; CLEANUP-NEXT: [[TMP7:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @Miss.resume.1) -; CLEANUP-NEXT: call void (i64, i64, ...) @continuation.waitContinue(i64 4, i64 -1, i64 [[TMP7]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I5]]), !continuation.registercount [[META13:![0-9]+]], !continuation.returnedRegistercount [[META13]] +; CLEANUP-NEXT: [[DOTFCA_0_INSERT19:%.*]] = insertvalue [2 x i32] poison, i32 [[TMP5]], 0 +; CLEANUP-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[DOTFCA_0_INSERT19]], i32 0, 1 +; CLEANUP-NEXT: [[TMP8:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @Miss.resume.1) +; CLEANUP-NEXT: call void (...) @lgc.cps.jump(i64 4, i32 -1, {} poison, i64 [[TMP8]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I5]], [10 x i32] poison, [2 x i32] [[DOTFCA_1_INSERT]]), !continuation.registercount [[META13:![0-9]+]], !continuation.returnedRegistercount [[META13]], !waitmask [[META21]] ; CLEANUP-NEXT: unreachable ; ; ; CLEANUP-LABEL: define dso_local void @Miss.resume.1( -; CLEANUP-SAME: i64 [[TMP0:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP1:%.*]]) !lgc.rt.shaderstage [[META16]] !continuation.registercount [[META13]] !continuation [[META18]] { +; CLEANUP-SAME: i64 [[TMP0:%.*]], { [[STRUCT_DISPATCHSYSTEMDATA:%.*]], [27 x i32], [2 x i32] } [[TMP1:%.*]]) !lgc.rt.shaderstage [[META16]] !continuation.registercount [[META13]] !continuation [[META18]] { ; CLEANUP-NEXT: entryresume.1: ; CLEANUP-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 28) ; CLEANUP-NEXT: [[PAYLOAD_SPILL_ALLOCA:%.*]] = getelementptr inbounds [[MISS_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 -; CLEANUP-NEXT: [[TMP6:%.*]] = load ptr addrspace(32), ptr addrspace(20) @PAYLOAD, align 4 -; CLEANUP-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 1), align 4 +; CLEANUP-NEXT: [[TMP2:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [27 x i32], [2 x i32] } [[TMP1]], 2 +; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT20:%.*]] = extractvalue [2 x i32] [[TMP2]], 0 +; CLEANUP-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x i32] [[TMP2]], 1 +; CLEANUP-NEXT: [[TMP6:%.*]] = inttoptr i32 [[DOTFCA_0_EXTRACT20]] to ptr addrspace(32) ; CLEANUP-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(32) [[TMP6]], align 4 ; CLEANUP-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP6]], i32 1 ; CLEANUP-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(32) [[TMP5]], align 4 -; CLEANUP-NEXT: [[TMP8:%.*]] = load ptr addrspace(32), ptr addrspace(20) @PAYLOAD, align 4 -; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT12:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP1]], 0 +; CLEANUP-NEXT: [[TMP16:%.*]] = inttoptr i32 [[DOTFCA_0_EXTRACT20]] to ptr addrspace(32) +; CLEANUP-NEXT: [[TMP8:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [27 x i32], [2 x i32] } [[TMP1]], 0 +; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT12:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP8]], 0 ; CLEANUP-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; CLEANUP-NEXT: [[T17:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 ; CLEANUP-NEXT: [[T26:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[T17]]) @@ -389,9 +410,7 @@ attributes #3 = { nounwind memory(none) } ; CLEANUP-NEXT: [[ADDR_I9:%.*]] = call i64 @_AmdGetResumePointAddr() #[[ATTR0]] ; CLEANUP-NEXT: [[TRAV_DATA2_I10:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I7]], i64 [[ADDR_I9]], 5 ; CLEANUP-NEXT: [[TMP14:%.*]] = ptrtoint ptr addrspace(32) [[PAYLOAD_SPILL_ALLOCA]] to i32 -; CLEANUP-NEXT: store i32 [[TMP14]], ptr addrspace(20) @PAYLOAD, align 4 -; CLEANUP-NEXT: [[TMP9:%.*]] = load ptr addrspace(32), ptr addrspace(20) @PAYLOAD, align 4 -; CLEANUP-NEXT: store i32 0, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 1), align 4 +; CLEANUP-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP14]] to ptr addrspace(32) ; CLEANUP-NEXT: store i32 0, ptr addrspace(32) [[TMP9]], align 4 ; CLEANUP-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP9]], i32 1 ; CLEANUP-NEXT: store i32 0, ptr addrspace(32) [[TMP10]], align 4 @@ -399,18 +418,22 @@ attributes #3 = { nounwind memory(none) } ; CLEANUP-NEXT: store i32 0, ptr addrspace(32) [[TMP11]], align 4 ; CLEANUP-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP9]], i32 3 ; CLEANUP-NEXT: store i32 0, ptr addrspace(32) [[TMP12]], align 4 -; CLEANUP-NEXT: [[TMP16:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @Miss.resume.2) -; CLEANUP-NEXT: call void (i64, i64, ...) @continuation.waitContinue(i64 4, i64 -1, i64 [[TMP16]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I10]]), !continuation.registercount [[META13]], !continuation.returnedRegistercount [[META13]] +; CLEANUP-NEXT: [[DOTFCA_0_INSERT25:%.*]] = insertvalue [2 x i32] poison, i32 [[TMP14]], 0 +; CLEANUP-NEXT: [[DOTFCA_1_INSERT28:%.*]] = insertvalue [2 x i32] [[DOTFCA_0_INSERT25]], i32 0, 1 +; CLEANUP-NEXT: [[TMP17:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @Miss.resume.2) +; CLEANUP-NEXT: call void (...) @lgc.cps.jump(i64 4, i32 -1, {} poison, i64 [[TMP17]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I10]], [10 x i32] poison, [2 x i32] [[DOTFCA_1_INSERT28]]), !continuation.registercount [[META13]], !continuation.returnedRegistercount [[META13]], !waitmask [[META21]] ; CLEANUP-NEXT: unreachable ; ; ; CLEANUP-LABEL: define dso_local void @Miss.resume.2( -; CLEANUP-SAME: i64 [[TMP0:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP1:%.*]]) !lgc.rt.shaderstage [[META16]] !continuation.registercount [[META13]] !continuation [[META18]] { +; CLEANUP-SAME: i64 [[TMP0:%.*]], { [[STRUCT_DISPATCHSYSTEMDATA:%.*]], [27 x i32], [2 x i32] } [[TMP1:%.*]]) !lgc.rt.shaderstage [[META16]] !continuation.registercount [[META13]] !continuation [[META18]] { ; CLEANUP-NEXT: entryresume.2: ; CLEANUP-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 28) ; CLEANUP-NEXT: [[PAYLOAD_SPILL_ALLOCA:%.*]] = getelementptr inbounds [[MISS_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 -; CLEANUP-NEXT: [[TMP4:%.*]] = load ptr addrspace(32), ptr addrspace(20) @PAYLOAD, align 4 -; CLEANUP-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 1), align 4 +; CLEANUP-NEXT: [[TMP2:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [27 x i32], [2 x i32] } [[TMP1]], 2 +; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT29:%.*]] = extractvalue [2 x i32] [[TMP2]], 0 +; CLEANUP-NEXT: [[DOTFCA_1_EXTRACT31:%.*]] = extractvalue [2 x i32] [[TMP2]], 1 +; CLEANUP-NEXT: [[TMP4:%.*]] = inttoptr i32 [[DOTFCA_0_EXTRACT29]] to ptr addrspace(32) ; CLEANUP-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(32) [[TMP4]], align 4 ; CLEANUP-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP4]], i32 1 ; CLEANUP-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(32) [[TMP6]], align 4 @@ -418,17 +441,18 @@ attributes #3 = { nounwind memory(none) } ; CLEANUP-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(32) [[TMP10]], align 4 ; CLEANUP-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP4]], i32 3 ; CLEANUP-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(32) [[TMP9]], align 4 -; CLEANUP-NEXT: [[TMP12:%.*]] = load ptr addrspace(32), ptr addrspace(20) @PAYLOAD, align 4 -; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT14:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP1]], 0 +; CLEANUP-NEXT: [[TMP14:%.*]] = inttoptr i32 [[DOTFCA_0_EXTRACT29]] to ptr addrspace(32) +; CLEANUP-NEXT: [[TMP12:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [27 x i32], [2 x i32] } [[TMP1]], 0 +; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT46:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP12]], 0 ; CLEANUP-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; CLEANUP-NEXT: [[DOTRELOAD_ADDR:%.*]] = getelementptr inbounds [[MISS_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 2 -; CLEANUP-NEXT: [[DOTRELOAD:%.*]] = load i32, ptr addrspace(32) [[DOTRELOAD_ADDR]], align 4 +; CLEANUP-NEXT: [[PAYLOAD_FCA_0_EXTRACT_RELOAD_ADDR:%.*]] = getelementptr inbounds [[MISS_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 2 +; CLEANUP-NEXT: [[PAYLOAD_FCA_0_EXTRACT_RELOAD:%.*]] = load i32, ptr addrspace(32) [[PAYLOAD_FCA_0_EXTRACT_RELOAD_ADDR]], align 4 ; CLEANUP-NEXT: [[RETURNADDR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[MISS_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 1 ; CLEANUP-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(32) [[RETURNADDR_RELOAD_ADDR]], align 4 -; CLEANUP-NEXT: store i32 [[DOTRELOAD]], ptr addrspace(20) @PAYLOAD, align 4 -; CLEANUP-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT14]], 0 +; CLEANUP-NEXT: [[DOTFCA_0_INSERT41:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT46]], 0 +; CLEANUP-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [1 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT_RELOAD]], 0 ; CLEANUP-NEXT: call void @lgc.cps.free(i32 28) -; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], i64 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META17]] +; CLEANUP-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR_RELOAD]], i32 poison, i64 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT41]], [27 x i32] poison, [1 x i32] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META17]] ; CLEANUP-NEXT: unreachable ; ; @@ -501,12 +525,12 @@ attributes #3 = { nounwind memory(none) } ; CLEANUP-CPS-NEXT: [[TMP5:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [27 x i32], [2 x i32] } [[TMP3]], 2 ; CLEANUP-CPS-NEXT: [[DOTFCA_0_EXTRACT18:%.*]] = extractvalue [2 x i32] [[TMP5]], 0 ; CLEANUP-CPS-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x i32] [[TMP5]], 1 -; CLEANUP-CPS-NEXT: [[TMP6:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [27 x i32], [2 x i32] } [[TMP3]], 0 ; CLEANUP-CPS-NEXT: [[TMP7:%.*]] = inttoptr i32 [[DOTFCA_0_EXTRACT18]] to ptr addrspace(32) ; CLEANUP-CPS-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(32) [[TMP7]], align 4 ; CLEANUP-CPS-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP7]], i32 1 ; CLEANUP-CPS-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(32) [[TMP9]], align 4 ; CLEANUP-CPS-NEXT: [[TMP11:%.*]] = inttoptr i32 [[DOTFCA_0_EXTRACT18]] to ptr addrspace(32) +; CLEANUP-CPS-NEXT: [[TMP6:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [27 x i32], [2 x i32] } [[TMP3]], 0 ; CLEANUP-CPS-NEXT: [[DOTFCA_0_EXTRACT47:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP6]], 0 ; CLEANUP-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; CLEANUP-CPS-NEXT: [[T17:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 @@ -541,7 +565,6 @@ attributes #3 = { nounwind memory(none) } ; CLEANUP-CPS-NEXT: [[TMP5:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [27 x i32], [2 x i32] } [[TMP3]], 2 ; CLEANUP-CPS-NEXT: [[DOTFCA_0_EXTRACT27:%.*]] = extractvalue [2 x i32] [[TMP5]], 0 ; CLEANUP-CPS-NEXT: [[DOTFCA_1_EXTRACT29:%.*]] = extractvalue [2 x i32] [[TMP5]], 1 -; CLEANUP-CPS-NEXT: [[TMP6:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [27 x i32], [2 x i32] } [[TMP3]], 0 ; CLEANUP-CPS-NEXT: [[TMP7:%.*]] = inttoptr i32 [[DOTFCA_0_EXTRACT27]] to ptr addrspace(32) ; CLEANUP-CPS-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(32) [[TMP7]], align 4 ; CLEANUP-CPS-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP7]], i32 1 @@ -551,6 +574,7 @@ attributes #3 = { nounwind memory(none) } ; CLEANUP-CPS-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP7]], i32 3 ; CLEANUP-CPS-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(32) [[TMP13]], align 4 ; CLEANUP-CPS-NEXT: [[TMP15:%.*]] = inttoptr i32 [[DOTFCA_0_EXTRACT27]] to ptr addrspace(32) +; CLEANUP-CPS-NEXT: [[TMP6:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [27 x i32], [2 x i32] } [[TMP3]], 0 ; CLEANUP-CPS-NEXT: [[DOTFCA_0_EXTRACT49:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP6]], 0 ; CLEANUP-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_0_EXTRACT_RELOAD_ADDR:%.*]] = getelementptr inbounds [[MISS_FRAME]], ptr addrspace(32) [[TMP4]], i32 0, i32 2 @@ -570,7 +594,7 @@ attributes #3 = { nounwind memory(none) } ; ; ; DXILCONTPOSTPROCESS-LABEL: define void @Miss( -; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META16:![0-9]+]] !continuation.registercount [[META17:![0-9]+]] !continuation [[META18:![0-9]+]] !continuation.stacksize [[META19:![0-9]+]] !continuation.state [[META20:![0-9]+]] { +; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]], [27 x i32] [[PADDING:%.*]], [1 x i32] [[PAYLOAD:%.*]]) !lgc.rt.shaderstage [[META16:![0-9]+]] !continuation [[META17:![0-9]+]] !continuation.stacksize [[META18:![0-9]+]] { ; DXILCONTPOSTPROCESS-NEXT: AllocaSpillBB: ; DXILCONTPOSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; DXILCONTPOSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 @@ -581,71 +605,73 @@ attributes #3 = { nounwind memory(none) } ; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(21) ; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP4]], i32 0 ; DXILCONTPOSTPROCESS-NEXT: store i64 [[RETURNADDR]], ptr addrspace(21) [[TMP5]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [1 x i32] [[PAYLOAD]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = add i32 [[TMP1]], 24 +; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = inttoptr i32 [[TMP6]] to ptr addrspace(21) +; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP7]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: store i32 [[PAYLOAD_FCA_0_EXTRACT]], ptr addrspace(21) [[TMP8]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 0, 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = add i32 [[TMP1]], 24 -; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = inttoptr i32 [[TMP7]] to ptr addrspace(21) -; DXILCONTPOSTPROCESS-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP8]], i32 0 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP6]], ptr addrspace(21) [[TMP9]], align 4 ; DXILCONTPOSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; DXILCONTPOSTPROCESS-NEXT: [[T1:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 ; DXILCONTPOSTPROCESS-NEXT: [[T2:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[T1]]) ; DXILCONTPOSTPROCESS-NEXT: [[T3:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[T2]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) -; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[T3]]) +; DXILCONTPOSTPROCESS-NEXT: [[TMP9:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[T3]]) ; DXILCONTPOSTPROCESS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison, <3 x i32> [[DOTFCA_0_0_EXTRACT]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = call i64 @continuation.getAddrAndMD(ptr @Miss.resume.0) ; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA2_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], i64 [[TMP12]], 5 -; DXILCONTPOSTPROCESS-NEXT: store i32 0, ptr addrspace(20) @REGISTERS, align 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT15:%.*]] = insertvalue [1 x i32] poison, i32 0, 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: call void (i64, i64, ...) @continuation.waitContinue(i64 4, i64 -1, i32 [[TMP11]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]]), !continuation.registercount [[META17]], !continuation.returnedRegistercount [[META17]] +; DXILCONTPOSTPROCESS-NEXT: call void (...) @lgc.ilcps.waitContinue(i64 4, i64 -1, i32 [[TMP11]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]], [10 x i32] poison, [1 x i32] [[DOTFCA_0_INSERT15]]) ; DXILCONTPOSTPROCESS-NEXT: unreachable ; ; ; DXILCONTPOSTPROCESS-LABEL: define dso_local void @Miss.resume.0( -; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[TMP0:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP1:%.*]]) !lgc.rt.shaderstage [[META16]] !continuation.registercount [[META17]] !continuation [[META18]] { +; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[TMP0:%.*]], { [[STRUCT_DISPATCHSYSTEMDATA:%.*]], [27 x i32], [1 x i32] } [[TMP1:%.*]]) !lgc.rt.shaderstage [[META16]] !continuation [[META17]] { ; DXILCONTPOSTPROCESS-NEXT: entryresume.0: ; DXILCONTPOSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; DXILCONTPOSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP13:%.*]] = load i32, ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP13]], -28 -; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP1]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [27 x i32], [1 x i32] } [[TMP1]], 2 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [1 x i32] [[TMP4]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP14:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [27 x i32], [1 x i32] } [[TMP1]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT42:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP14]], 0 ; DXILCONTPOSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; DXILCONTPOSTPROCESS-NEXT: [[T110:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 ; DXILCONTPOSTPROCESS-NEXT: [[T29:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[T110]]) ; DXILCONTPOSTPROCESS-NEXT: [[T38:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[T29]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) -; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[T38]]) -; DXILCONTPOSTPROCESS-NEXT: [[DIS_DATA_I1_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[T38]]) +; DXILCONTPOSTPROCESS-NEXT: [[DIS_DATA_I1_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT42]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[SYS_DATA_I2:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I1_FCA_0_INSERT]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA_I3:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I2]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = call i64 @continuation.getAddrAndMD(ptr @Miss.resume.1) ; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA2_I5:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I3]], i64 [[TMP12]], 5 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP2]], ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: store i32 0, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 1), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = inttoptr i32 [[TMP5]] to ptr addrspace(21) +; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(21) ; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP6]], i32 0 ; DXILCONTPOSTPROCESS-NEXT: store i32 0, ptr addrspace(21) [[TMP7]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = add i32 [[TMP5]], 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = add i32 [[TMP2]], 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP8]] to ptr addrspace(21) ; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP9]], i32 0 ; DXILCONTPOSTPROCESS-NEXT: store i32 0, ptr addrspace(21) [[TMP10]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT17:%.*]] = insertvalue [2 x i32] poison, i32 [[TMP2]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[DOTFCA_0_INSERT17]], i32 0, 1 ; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: call void (i64, i64, ...) @continuation.waitContinue(i64 4, i64 -1, i32 [[TMP11]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I5]]), !continuation.registercount [[META13:![0-9]+]], !continuation.returnedRegistercount [[META13]] +; DXILCONTPOSTPROCESS-NEXT: call void (...) @lgc.ilcps.waitContinue(i64 4, i64 -1, i32 [[TMP11]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I5]], [10 x i32] poison, [2 x i32] [[DOTFCA_1_INSERT]]) ; DXILCONTPOSTPROCESS-NEXT: unreachable ; ; ; DXILCONTPOSTPROCESS-LABEL: define dso_local void @Miss.resume.1( -; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[TMP0:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP1:%.*]]) !lgc.rt.shaderstage [[META16]] !continuation.registercount [[META13]] !continuation [[META18]] { +; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[TMP0:%.*]], { [[STRUCT_DISPATCHSYSTEMDATA:%.*]], [27 x i32], [2 x i32] } [[TMP1:%.*]]) !lgc.rt.shaderstage [[META16]] !continuation [[META17]] { ; DXILCONTPOSTPROCESS-NEXT: entryresume.1: ; DXILCONTPOSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; DXILCONTPOSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP28:%.*]] = load i32, ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP28]], -28 -; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 1), align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [27 x i32], [2 x i32] } [[TMP1]], 2 +; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = extractvalue [2 x i32] [[TMP4]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x i32] [[TMP4]], 1 ; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(21) ; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP5]], i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(21) [[TMP6]], align 4 @@ -653,50 +679,50 @@ attributes #3 = { nounwind memory(none) } ; DXILCONTPOSTPROCESS-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP8]] to ptr addrspace(21) ; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP9]], i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(21) [[TMP10]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT12:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP1]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP13:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [27 x i32], [2 x i32] } [[TMP1]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT44:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP13]], 0 ; DXILCONTPOSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; DXILCONTPOSTPROCESS-NEXT: [[T17:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 ; DXILCONTPOSTPROCESS-NEXT: [[T26:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[T17]]) ; DXILCONTPOSTPROCESS-NEXT: [[T35:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[T26]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) -; DXILCONTPOSTPROCESS-NEXT: [[TMP13:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[T35]]) -; DXILCONTPOSTPROCESS-NEXT: [[DIS_DATA_I5_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT12]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[T35]]) +; DXILCONTPOSTPROCESS-NEXT: [[DIS_DATA_I5_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT44]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[SYS_DATA_I6:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I5_FCA_0_INSERT]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA_I7:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I6]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP27:%.*]] = call i64 @continuation.getAddrAndMD(ptr @Miss.resume.2) ; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA2_I10:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I7]], i64 [[TMP27]], 5 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP2]], ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: store i32 0, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 1), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP15:%.*]] = inttoptr i32 [[TMP14]] to ptr addrspace(21) +; DXILCONTPOSTPROCESS-NEXT: [[TMP15:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(21) ; DXILCONTPOSTPROCESS-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP15]], i32 0 ; DXILCONTPOSTPROCESS-NEXT: store i32 0, ptr addrspace(21) [[TMP16]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP17:%.*]] = add i32 [[TMP14]], 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP17:%.*]] = add i32 [[TMP2]], 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP18:%.*]] = inttoptr i32 [[TMP17]] to ptr addrspace(21) ; DXILCONTPOSTPROCESS-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP18]], i32 0 ; DXILCONTPOSTPROCESS-NEXT: store i32 0, ptr addrspace(21) [[TMP19]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP20:%.*]] = add i32 [[TMP14]], 8 +; DXILCONTPOSTPROCESS-NEXT: [[TMP20:%.*]] = add i32 [[TMP2]], 8 ; DXILCONTPOSTPROCESS-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP20]] to ptr addrspace(21) ; DXILCONTPOSTPROCESS-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP21]], i32 0 ; DXILCONTPOSTPROCESS-NEXT: store i32 0, ptr addrspace(21) [[TMP22]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP23:%.*]] = add i32 [[TMP14]], 12 +; DXILCONTPOSTPROCESS-NEXT: [[TMP23:%.*]] = add i32 [[TMP2]], 12 ; DXILCONTPOSTPROCESS-NEXT: [[TMP24:%.*]] = inttoptr i32 [[TMP23]] to ptr addrspace(21) ; DXILCONTPOSTPROCESS-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP24]], i32 0 ; DXILCONTPOSTPROCESS-NEXT: store i32 0, ptr addrspace(21) [[TMP25]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT25:%.*]] = insertvalue [2 x i32] poison, i32 [[TMP2]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_INSERT28:%.*]] = insertvalue [2 x i32] [[DOTFCA_0_INSERT25]], i32 0, 1 ; DXILCONTPOSTPROCESS-NEXT: [[TMP26:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: call void (i64, i64, ...) @continuation.waitContinue(i64 4, i64 -1, i32 [[TMP26]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I10]]), !continuation.registercount [[META13]], !continuation.returnedRegistercount [[META13]] +; DXILCONTPOSTPROCESS-NEXT: call void (...) @lgc.ilcps.waitContinue(i64 4, i64 -1, i32 [[TMP26]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I10]], [10 x i32] poison, [2 x i32] [[DOTFCA_1_INSERT28]]) ; DXILCONTPOSTPROCESS-NEXT: unreachable ; ; ; DXILCONTPOSTPROCESS-LABEL: define dso_local void @Miss.resume.2( -; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[TMP0:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP1:%.*]]) !lgc.rt.shaderstage [[META16]] !continuation.registercount [[META13]] !continuation [[META18]] { +; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[TMP0:%.*]], { [[STRUCT_DISPATCHSYSTEMDATA:%.*]], [27 x i32], [2 x i32] } [[TMP1:%.*]]) !lgc.rt.shaderstage [[META16]] !continuation [[META17]] { ; DXILCONTPOSTPROCESS-NEXT: entryresume.2: ; DXILCONTPOSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; DXILCONTPOSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP30:%.*]] = load i32, ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP30]], -28 -; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 1), align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [27 x i32], [2 x i32] } [[TMP1]], 2 +; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = extractvalue [2 x i32] [[TMP4]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_EXTRACT31:%.*]] = extractvalue [2 x i32] [[TMP4]], 1 ; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(21) ; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP5]], i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(21) [[TMP6]], align 4 @@ -712,8 +738,8 @@ attributes #3 = { nounwind memory(none) } ; DXILCONTPOSTPROCESS-NEXT: [[TMP17:%.*]] = inttoptr i32 [[TMP16]] to ptr addrspace(21) ; DXILCONTPOSTPROCESS-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP17]], i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(21) [[TMP18]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT14:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP1]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP20:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [27 x i32], [2 x i32] } [[TMP1]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT14:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP20]], 0 ; DXILCONTPOSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; DXILCONTPOSTPROCESS-NEXT: [[TMP21:%.*]] = add i32 [[TMP2]], 24 ; DXILCONTPOSTPROCESS-NEXT: [[TMP22:%.*]] = inttoptr i32 [[TMP21]] to ptr addrspace(21) @@ -723,13 +749,13 @@ attributes #3 = { nounwind memory(none) } ; DXILCONTPOSTPROCESS-NEXT: [[TMP25:%.*]] = inttoptr i32 [[TMP24]] to ptr addrspace(21) ; DXILCONTPOSTPROCESS-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP25]], i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(21) [[TMP26]], align 4 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[DOTRELOAD]], ptr addrspace(20) @REGISTERS, align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT14]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT1:%.*]] = insertvalue [1 x i32] poison, i32 [[DOTRELOAD]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP27:%.*]] = load i32, ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP28:%.*]] = add i32 [[TMP27]], -28 ; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP28]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP29:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP29]], i64 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META17]] +; DXILCONTPOSTPROCESS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP29]], i64 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]], [27 x i32] poison, [1 x i32] [[DOTFCA_0_INSERT1]]) ; DXILCONTPOSTPROCESS-NEXT: unreachable ; ; diff --git a/llvmraytracing/test/dx/lower-rt-pipeline-simple-call-shader.ll b/llvmraytracing/test/dx/lower-rt-pipeline-simple-call-shader.ll index fb08b19985..16e93c0d3d 100644 --- a/llvmraytracing/test/dx/lower-rt-pipeline-simple-call-shader.ll +++ b/llvmraytracing/test/dx/lower-rt-pipeline-simple-call-shader.ll @@ -2,16 +2,12 @@ ; RUN: grep -v lgc.cps.module %s | opt --verify-each -passes="dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,remove-types-metadata" -S --lint-abort-on-error | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE %s ; RUN: grep -v lgc.cps.module %s | opt --verify-each -passes="dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,remove-types-metadata" \ ; RUN: -S --lint-abort-on-error | FileCheck -check-prefix=CLEANUP %s -; RUN: grep -v lgc.cps.module %s | opt --verify-each -passes="dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,remove-types-metadata" \ -; RUN: -S --lint-abort-on-error | FileCheck -check-prefix=REGISTERBUFFER %s -; RUN: grep -v lgc.cps.module %s | opt --verify-each -passes="dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata" \ +; RUN: grep -v lgc.cps.module %s | opt --verify-each -passes="dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,dxil-cont-post-process,lint,remove-types-metadata" \ ; RUN: -S --lint-abort-on-error | FileCheck -check-prefix=POSTPROCESS %s ; RUN: opt --verify-each -passes="dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,remove-types-metadata" -S %s --lint-abort-on-error | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE-CPS %s ; RUN: opt --verify-each -passes="dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,dxil-cleanup-continuations,lint,remove-types-metadata" \ ; RUN: -S %s --lint-abort-on-error | FileCheck -check-prefix=CLEANUP-CPS %s -; RUN: opt --verify-each -passes="dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,dxil-cleanup-continuations,lint,register-buffer,lint,remove-types-metadata" \ -; RUN: -S %s --lint-abort-on-error | FileCheck -check-prefix=REGISTERBUFFER-CPS %s -; RUN: opt --verify-each -passes="dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,dxil-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata" \ +; RUN: opt --verify-each -passes="dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,dxil-cleanup-continuations,lint,dxil-cont-post-process,lint,remove-types-metadata" \ ; RUN: -S %s --lint-abort-on-error | FileCheck -check-prefix=POSTPROCESS-CPS %s target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:16-i32:32-i64:32-f16:16-f32:32-f64:32-v8:8-v16:16-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" @@ -30,30 +26,28 @@ target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16: declare i32 @_cont_GetContinuationStackAddr() -declare %struct.DispatchSystemData @_cont_SetupRayGen() - declare %struct.DispatchSystemData @_AmdAwaitTraversal(i64, %struct.TraversalData) declare %struct.DispatchSystemData @_AmdAwaitShader(i64, %struct.DispatchSystemData) -declare !types !13 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData*) +declare !pointeetys !13 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData*) -declare !types !15 void @_AmdRestoreSystemData(%struct.DispatchSystemData*) +declare !pointeetys !15 void @_AmdRestoreSystemData(%struct.DispatchSystemData*) -define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) !types !17 { +define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) !pointeetys !17 { ret i32 5 } ; Need _cont_ReportHit to get system data type -declare !types !22 i1 @_cont_ReportHit(%struct.AnyHitTraversalData* %data, float %t, i32 %hitKind) +declare !pointeetys !24 i1 @_cont_ReportHit(%struct.AnyHitTraversalData* %data, float %t, i32 %hitKind) ; Function Attrs: nounwind memory(none) -declare !types !22 <3 x i32> @_cont_DispatchRaysIndex3(%struct.DispatchSystemData* nocapture readnone %data) #1 +declare !pointeetys !22 <3 x i32> @_cont_DispatchRaysIndex3(%struct.DispatchSystemData* nocapture readnone %data) #1 ; Function Attrs: nounwind memory(none) -declare !types !22 <3 x i32> @_cont_DispatchRaysDimensions3(%struct.DispatchSystemData* nocapture readnone %data) #1 +declare !pointeetys !22 <3 x i32> @_cont_DispatchRaysDimensions3(%struct.DispatchSystemData* nocapture readnone %data) #1 -define void @_cont_CallShader(%struct.DispatchSystemData* %data, i32 %0) #1 !types !18 { +define void @_cont_CallShader(%struct.DispatchSystemData* %data, i32 %0) #1 !pointeetys !18 { %dis_data = load %struct.DispatchSystemData, %struct.DispatchSystemData* %data, align 4 %newdata = call %struct.DispatchSystemData @_AmdAwaitShader(i64 2, %struct.DispatchSystemData %dis_data) store %struct.DispatchSystemData %newdata, %struct.DispatchSystemData* %data, align 4 @@ -61,7 +55,7 @@ define void @_cont_CallShader(%struct.DispatchSystemData* %data, i32 %0) #1 !typ ret void } -define void @called(%struct.MyParams* %params) !types !19 { +define void @called(%struct.MyParams* %params) !pointeetys !19 { call void @dx.op.callShader.struct.MyParams(i32 159, i32 2, %struct.MyParams* nonnull %params) %a = call i32 @dx.op.dispatchRaysIndex.i32(i32 145, i8 0) %b = call i32 @dx.op.dispatchRaysDimensions.i32(i32 146, i8 0) @@ -69,7 +63,7 @@ define void @called(%struct.MyParams* %params) !types !19 { } ; Function Attrs: nounwind -declare !types !21 void @dx.op.callShader.struct.MyParams(i32, i32, %struct.MyParams*) #0 +declare !pointeetys !21 void @dx.op.callShader.struct.MyParams(i32, i32, %struct.MyParams*) #0 ; Function Attrs: nounwind memory(none) declare i32 @dx.op.dispatchRaysDimensions.i32(i32, i8) #1 @@ -99,18 +93,18 @@ attributes #1 = { alwaysinline } !10 = !{i32 0, %"class.RWTexture2D >"* bitcast (%dx.types.Handle* @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A" to %"class.RWTexture2D >"*), !"RenderTarget", i32 0, i32 0, i32 1, i32 2, i1 false, i1 false, i1 false, !11} !11 = !{i32 0, i32 9} !12 = !{i32 0, i64 65536} -!13 = !{!"function", %struct.BuiltInTriangleIntersectionAttributes poison, !14} +!13 = !{%struct.SystemData poison} !14 = !{i32 0, %struct.SystemData poison} -!15 = !{!"function", !"void", !16} +!15 = !{%struct.DispatchSystemData poison} !16 = !{i32 0, %struct.DispatchSystemData poison} -!17 = !{!"function", i32 poison, !16} -!18 = !{!"function", !"void", !16, i32 poison} -!19 = !{!"function", !"void", !20} +!17 = !{%struct.DispatchSystemData poison} +!18 = !{%struct.DispatchSystemData poison} +!19 = !{%struct.MyParams poison} !20 = !{i32 0, %struct.MyParams poison} -!21 = !{!"function", !"void", i32 poison, i32 poison, !20} -!22 = !{!"function", <3 x i32> poison, !16} +!21 = !{%struct.MyParams poison} +!22 = !{%struct.DispatchSystemData poison} !23 = !{i32 0, %struct.AnyHitTraversalData poison} -!24 = !{!"function", i1 poison, !23, float poison, i32 poison} +!24 = !{%struct.AnyHitTraversalData poison} ; LOWERRAYTRACINGPIPELINE-LABEL: define i32 @_cont_GetLocalRootIndex( ; LOWERRAYTRACINGPIPELINE-SAME: ptr [[DATA:%.*]]) { @@ -118,24 +112,30 @@ attributes #1 = { alwaysinline } ; ; ; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.DispatchSystemData @called( -; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META16:![0-9]+]] !continuation.registercount [[META14:![0-9]+]] !continuation [[META17:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]], [16 x i32] [[PADDING:%.*]], [1 x i32] [[PAYLOAD:%.*]]) !lgc.rt.shaderstage [[META16:![0-9]+]] !continuation.registercount [[META14:![0-9]+]] !continuation [[META17:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [1 x i32], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_MYPARAMS:%.*]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: store [1 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_MYPARAMS]], ptr [[TMP2]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; LOWERRAYTRACINGPIPELINE-NEXT: [[DIS_DATA_I:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_MYPARAMS]], ptr [[TMP2]], i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP6]], ptr addrspace(20) @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = call ptr inttoptr (i64 2 to ptr)([[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I]]), !continuation.registercount [[META14]], !continuation.returnedRegistercount [[META14]] -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] [[AWAIT:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP7]]) +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP6]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = load [1 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = call ptr inttoptr (i64 2 to ptr)([[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I]], [17 x i32] poison, [1 x i32] [[TMP7]]), !continuation.registercount [[META14]], !continuation.returnedRegistercount [[META14]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = call { [[STRUCT_DISPATCHSYSTEMDATA]], [16 x i32], [1 x i32] } @await(ptr [[TMP11]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [16 x i32], [1 x i32] } [[TMP12]], 2 +; LOWERRAYTRACINGPIPELINE-NEXT: store [1 x i32] [[TMP13]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_MYPARAMS]] poison, ptr [[TMP2]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_MYPARAMS]], ptr [[TMP2]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP10]], ptr [[TMP9]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [16 x i32], [1 x i32] } [[TMP12]], 0 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP8]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; LOWERRAYTRACINGPIPELINE-NEXT: br label [[DOTSPLIT:%.*]] @@ -146,9 +146,10 @@ attributes #1 = { alwaysinline } ; LOWERRAYTRACINGPIPELINE-NEXT: [[B:%.*]] = extractelement <3 x i32> [[TMP18]], i8 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_MYPARAMS]], ptr [[TMP2]], i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP19]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP22]], ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP22]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP23]]), !continuation.registercount [[META14]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = load [1 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP23]], [16 x i32] poison, [1 x i32] [[TMP20]]), !continuation.registercount [[META14]] ; LOWERRAYTRACINGPIPELINE-NEXT: unreachable ; ; @@ -158,27 +159,29 @@ attributes #1 = { alwaysinline } ; ; ; CLEANUP-LABEL: define void @called( -; CLEANUP-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META16:![0-9]+]] !continuation.registercount [[META14:![0-9]+]] !continuation [[META17:![0-9]+]] !continuation.stacksize [[META18:![0-9]+]] !continuation.state [[META18]] { +; CLEANUP-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]], [16 x i32] [[PADDING:%.*]], [1 x i32] [[PAYLOAD:%.*]]) !lgc.rt.shaderstage [[META16:![0-9]+]] !continuation.registercount [[META14:![0-9]+]] !continuation [[META17:![0-9]+]] !continuation.stacksize [[META18:![0-9]+]] !continuation.state [[META18]] { ; CLEANUP-NEXT: AllocaSpillBB: ; CLEANUP-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) ; CLEANUP-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[CALLED_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 ; CLEANUP-NEXT: store i64 [[RETURNADDR]], ptr addrspace(32) [[RETURNADDR_SPILL_ADDR]], align 4 -; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 -; CLEANUP-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; CLEANUP-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [1 x i32] [[PAYLOAD]], 0 +; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT9:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 ; CLEANUP-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; CLEANUP-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 -; CLEANUP-NEXT: store i32 [[TMP1]], ptr addrspace(20) @PAYLOAD, align 4 +; CLEANUP-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT9]], 0 +; CLEANUP-NEXT: [[DOTFCA_0_INSERT4:%.*]] = insertvalue [1 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 ; CLEANUP-NEXT: [[TMP2:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @called.resume.0) -; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 2, i64 [[TMP2]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]]), !continuation.registercount [[META14]], !continuation.returnedRegistercount [[META14]] +; CLEANUP-NEXT: call void (...) @lgc.cps.jump(i64 2, i32 -1, {} poison, i64 [[TMP2]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]], [17 x i32] poison, [1 x i32] [[DOTFCA_0_INSERT4]]), !continuation.registercount [[META14]], !continuation.returnedRegistercount [[META14]] ; CLEANUP-NEXT: unreachable ; ; ; CLEANUP-LABEL: define dso_local void @called.resume.0( -; CLEANUP-SAME: i64 [[TMP0:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP1:%.*]]) !lgc.rt.shaderstage [[META16]] !continuation.registercount [[META14]] !continuation [[META17]] { +; CLEANUP-SAME: i64 [[TMP0:%.*]], { [[STRUCT_DISPATCHSYSTEMDATA:%.*]], [16 x i32], [1 x i32] } [[TMP1:%.*]]) !lgc.rt.shaderstage [[META16]] !continuation.registercount [[META14]] !continuation [[META17]] { ; CLEANUP-NEXT: entryresume.0: ; CLEANUP-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 8) -; CLEANUP-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 -; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT3:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP1]], 0 +; CLEANUP-NEXT: [[TMP4:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [16 x i32], [1 x i32] } [[TMP1]], 2 +; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [1 x i32] [[TMP4]], 0 +; CLEANUP-NEXT: [[TMP5:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [16 x i32], [1 x i32] } [[TMP1]], 0 +; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT3:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP5]], 0 ; CLEANUP-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; CLEANUP-NEXT: [[RETURNADDR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[CALLED_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 ; CLEANUP-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(32) [[RETURNADDR_RELOAD_ADDR]], align 4 @@ -186,61 +189,20 @@ attributes #1 = { alwaysinline } ; CLEANUP-NEXT: [[A:%.*]] = extractelement <3 x i32> [[TMP2]], i8 0 ; CLEANUP-NEXT: [[TMP3:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.dimensions() ; CLEANUP-NEXT: [[B:%.*]] = extractelement <3 x i32> [[TMP3]], i8 0 -; CLEANUP-NEXT: store i32 [[TMP4]], ptr addrspace(20) @PAYLOAD, align 4 ; CLEANUP-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT3]], 0 +; CLEANUP-NEXT: [[DOTFCA_0_INSERT1:%.*]] = insertvalue [1 x i32] poison, i32 [[DOTFCA_0_EXTRACT]], 0 ; CLEANUP-NEXT: call void @lgc.cps.free(i32 8) -; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], i64 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META14]] +; CLEANUP-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR_RELOAD]], i32 poison, i64 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]], [16 x i32] poison, [1 x i32] [[DOTFCA_0_INSERT1]]), !continuation.registercount [[META14]] ; CLEANUP-NEXT: unreachable ; ; -; REGISTERBUFFER-LABEL: define i32 @_cont_GetLocalRootIndex( -; REGISTERBUFFER-SAME: ptr [[DATA:%.*]]) { -; REGISTERBUFFER-NEXT: ret i32 5 -; -; -; REGISTERBUFFER-LABEL: define void @called( -; REGISTERBUFFER-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META16:![0-9]+]] !continuation.registercount [[META14:![0-9]+]] !continuation [[META17:![0-9]+]] !continuation.stacksize [[META18:![0-9]+]] !continuation.state [[META18]] { -; REGISTERBUFFER-NEXT: AllocaSpillBB: -; REGISTERBUFFER-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) -; REGISTERBUFFER-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[CALLED_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 -; REGISTERBUFFER-NEXT: store i64 [[RETURNADDR]], ptr addrspace(32) [[RETURNADDR_SPILL_ADDR]], align 4 -; REGISTERBUFFER-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 -; REGISTERBUFFER-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 -; REGISTERBUFFER-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; REGISTERBUFFER-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 -; REGISTERBUFFER-NEXT: store i32 [[TMP1]], ptr addrspace(20) @PAYLOAD, align 4 -; REGISTERBUFFER-NEXT: [[TMP2:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @called.resume.0) -; REGISTERBUFFER-NEXT: call void (i64, ...) @continuation.continue(i64 2, i64 [[TMP2]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]]), !continuation.registercount [[META14]], !continuation.returnedRegistercount [[META14]] -; REGISTERBUFFER-NEXT: unreachable -; -; -; REGISTERBUFFER-LABEL: define dso_local void @called.resume.0( -; REGISTERBUFFER-SAME: i64 [[TMP0:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP1:%.*]]) !lgc.rt.shaderstage [[META16]] !continuation.registercount [[META14]] !continuation [[META17]] { -; REGISTERBUFFER-NEXT: entryresume.0: -; REGISTERBUFFER-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 8) -; REGISTERBUFFER-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 -; REGISTERBUFFER-NEXT: [[DOTFCA_0_EXTRACT3:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP1]], 0 -; REGISTERBUFFER-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; REGISTERBUFFER-NEXT: [[RETURNADDR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[CALLED_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 -; REGISTERBUFFER-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(32) [[RETURNADDR_RELOAD_ADDR]], align 4 -; REGISTERBUFFER-NEXT: [[TMP2:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.index() -; REGISTERBUFFER-NEXT: [[A:%.*]] = extractelement <3 x i32> [[TMP2]], i8 0 -; REGISTERBUFFER-NEXT: [[TMP3:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.dimensions() -; REGISTERBUFFER-NEXT: [[B:%.*]] = extractelement <3 x i32> [[TMP3]], i8 0 -; REGISTERBUFFER-NEXT: store i32 [[TMP4]], ptr addrspace(20) @PAYLOAD, align 4 -; REGISTERBUFFER-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT3]], 0 -; REGISTERBUFFER-NEXT: call void @lgc.cps.free(i32 8) -; REGISTERBUFFER-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], i64 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META14]] -; REGISTERBUFFER-NEXT: unreachable -; -; ; POSTPROCESS-LABEL: define i32 @_cont_GetLocalRootIndex( ; POSTPROCESS-SAME: ptr [[DATA:%.*]]) { ; POSTPROCESS-NEXT: ret i32 5 ; ; ; POSTPROCESS-LABEL: define void @called( -; POSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META16:![0-9]+]] !continuation.registercount [[META14:![0-9]+]] !continuation [[META17:![0-9]+]] !continuation.stacksize [[META18:![0-9]+]] !continuation.state [[META18]] { +; POSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]], [16 x i32] [[PADDING:%.*]], [1 x i32] [[PAYLOAD:%.*]]) !lgc.rt.shaderstage [[META16:![0-9]+]] !continuation [[META17:![0-9]+]] !continuation.stacksize [[META18:![0-9]+]] { ; POSTPROCESS-NEXT: AllocaSpillBB: ; POSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; POSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 @@ -250,28 +212,31 @@ attributes #1 = { alwaysinline } ; POSTPROCESS-NEXT: [[TMP3:%.*]] = inttoptr i32 [[TMP1]] to ptr addrspace(21) ; POSTPROCESS-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP3]], i32 0 ; POSTPROCESS-NEXT: store i64 [[RETURNADDR]], ptr addrspace(21) [[TMP4]], align 4 -; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 -; POSTPROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [1 x i32] [[PAYLOAD]], 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT9:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 ; POSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; POSTPROCESS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 -; POSTPROCESS-NEXT: store i32 [[TMP5]], ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT9]], 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT4:%.*]] = insertvalue [1 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 ; POSTPROCESS-NEXT: [[TMP7:%.*]] = call i64 @continuation.getAddrAndMD(ptr @called.resume.0) ; POSTPROCESS-NEXT: [[TMP6:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 2, i32 [[TMP6]], i64 [[TMP7]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]]), !continuation.registercount [[META14]], !continuation.returnedRegistercount [[META14]] +; POSTPROCESS-NEXT: call void (...) @lgc.ilcps.continue(i64 2, i32 [[TMP6]], i64 [[TMP7]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]], [17 x i32] poison, [1 x i32] [[DOTFCA_0_INSERT4]]) ; POSTPROCESS-NEXT: unreachable ; ; ; POSTPROCESS-LABEL: define dso_local void @called.resume.0( -; POSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[TMP0:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP1:%.*]]) !lgc.rt.shaderstage [[META16]] !continuation.registercount [[META14]] !continuation [[META17]] { +; POSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[TMP0:%.*]], { [[STRUCT_DISPATCHSYSTEMDATA:%.*]], [16 x i32], [1 x i32] } [[TMP1:%.*]]) !lgc.rt.shaderstage [[META16]] !continuation [[META17]] { ; POSTPROCESS-NEXT: entryresume.0: ; POSTPROCESS-NEXT: [[SYSTEM_DATA_ALLOCA1:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 ; POSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; POSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 -; POSTPROCESS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP1]], ptr [[SYSTEM_DATA_ALLOCA1]], align 4 +; POSTPROCESS-NEXT: [[TMP3:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [16 x i32], [1 x i32] } [[TMP1]], 0 +; POSTPROCESS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP3]], ptr [[SYSTEM_DATA_ALLOCA1]], align 4 ; POSTPROCESS-NEXT: [[TMP11:%.*]] = load i32, ptr [[CSP]], align 4 ; POSTPROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP11]], -8 -; POSTPROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT3:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP1]], 0 +; POSTPROCESS-NEXT: [[TMP12:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [16 x i32], [1 x i32] } [[TMP1]], 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [1 x i32] [[TMP12]], 0 +; POSTPROCESS-NEXT: [[TMP13:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [16 x i32], [1 x i32] } [[TMP1]], 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT3:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP13]], 0 ; POSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; POSTPROCESS-NEXT: [[TMP4:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(21) ; POSTPROCESS-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP4]], i32 0 @@ -280,13 +245,13 @@ attributes #1 = { alwaysinline } ; POSTPROCESS-NEXT: [[A:%.*]] = extractelement <3 x i32> [[TMP6]], i8 0 ; POSTPROCESS-NEXT: [[TMP7:%.*]] = call <3 x i32> @_cont_DispatchRaysDimensions3(ptr [[SYSTEM_DATA_ALLOCA1]]) ; POSTPROCESS-NEXT: [[B:%.*]] = extractelement <3 x i32> [[TMP7]], i8 0 -; POSTPROCESS-NEXT: store i32 [[TMP3]], ptr addrspace(20) @REGISTERS, align 4 ; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT3]], 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT1:%.*]] = insertvalue [1 x i32] poison, i32 [[DOTFCA_0_EXTRACT]], 0 ; POSTPROCESS-NEXT: [[TMP8:%.*]] = load i32, ptr [[CSP]], align 4 ; POSTPROCESS-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], -8 ; POSTPROCESS-NEXT: store i32 [[TMP9]], ptr [[CSP]], align 4 ; POSTPROCESS-NEXT: [[TMP10:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP10]], i64 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META14]] +; POSTPROCESS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP10]], i64 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]], [16 x i32] poison, [1 x i32] [[DOTFCA_0_INSERT1]]) ; POSTPROCESS-NEXT: unreachable ; ; @@ -296,29 +261,29 @@ attributes #1 = { alwaysinline } ; ; ; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define void @called( -; LOWERRAYTRACINGPIPELINE-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURNADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [8 x i32] [[PADDING:%.*]], [1 x i32] [[PAYLOAD:%.*]]) !lgc.rt.shaderstage [[META16:![0-9]+]] !lgc.cps [[META17:![0-9]+]] !continuation [[META18:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURNADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [16 x i32] [[PADDING:%.*]], [1 x i32] [[PAYLOAD:%.*]]) !lgc.rt.shaderstage [[META16:![0-9]+]] !lgc.cps [[META17:![0-9]+]] !continuation [[META18:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PAYLOAD_ALLOCA:%.*]] = alloca [30 x i32], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [1 x i32], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP1:%.*]] = alloca [[STRUCT_MYPARAMS:%.*]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [1 x i32] [[PAYLOAD]], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [1 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[SYSTEM_DATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_MYPARAMS]], ptr [[TMP1]], i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP3:%.*]] = load i32, ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP3:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[DIS_DATA_I:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_MYPARAMS]], ptr [[TMP1]], i32 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP5]], ptr [[PAYLOAD_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP6:%.*]] = load [1 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP7:%.*]] = call { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [1 x i32] } (...) @lgc.cps.await__sl_s_struct.DispatchSystemDatasa8i32a1i32s(i32 2, i32 4, i32 5, [9 x i32] poison, [1 x i32] [[TMP6]]), !continuation.registercount [[META14:![0-9]+]], !continuation.returnedRegistercount [[META14]] -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP8:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [1 x i32] } [[TMP7]], 2 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [1 x i32] [[TMP8]], ptr [[PAYLOAD_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP9:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [1 x i32] } [[TMP7]], 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP5]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP6:%.*]] = load [1 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP7:%.*]] = call { [[STRUCT_DISPATCHSYSTEMDATA]], [16 x i32], [1 x i32] } (...) @lgc.cps.await__sl_s_struct.DispatchSystemDatasa16i32a1i32s(i32 2, i32 4, i32 5, [17 x i32] poison, [1 x i32] [[TMP6]]), !continuation.registercount [[META14:![0-9]+]], !continuation.returnedRegistercount [[META14]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP8:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [16 x i32], [1 x i32] } [[TMP7]], 2 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [1 x i32] [[TMP8]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_MYPARAMS]] poison, ptr [[TMP1]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_MYPARAMS]], ptr [[TMP1]], i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP11:%.*]] = load i32, ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP11:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP11]], ptr [[TMP10]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP9:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [16 x i32], [1 x i32] } [[TMP7]], 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP9]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br label [[DOTSPLIT:%.*]] @@ -329,10 +294,10 @@ attributes #1 = { alwaysinline } ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[B:%.*]] = extractelement <3 x i32> [[TMP13]], i8 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_MYPARAMS]], ptr [[TMP1]], i32 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP15]], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP15]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP16:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP17:%.*]] = load [1 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 6, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP16]], [8 x i32] poison, [1 x i32] [[TMP17]]), !continuation.registercount [[META14]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP17:%.*]] = load [1 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 6, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP16]], [16 x i32] poison, [1 x i32] [[TMP17]]), !continuation.registercount [[META14]] ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable ; ; @@ -342,7 +307,7 @@ attributes #1 = { alwaysinline } ; ; ; CLEANUP-CPS-LABEL: define void @called( -; CLEANUP-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURNADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [8 x i32] [[PADDING:%.*]], [1 x i32] [[PAYLOAD:%.*]]) !lgc.rt.shaderstage [[META16:![0-9]+]] !lgc.cps [[META17:![0-9]+]] !continuation [[META18:![0-9]+]] { +; CLEANUP-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURNADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [16 x i32] [[PADDING:%.*]], [1 x i32] [[PAYLOAD:%.*]]) !lgc.rt.shaderstage [[META16:![0-9]+]] !lgc.cps [[META17:![0-9]+]] !continuation [[META18:![0-9]+]] { ; CLEANUP-CPS-NEXT: AllocaSpillBB: ; CLEANUP-CPS-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) ; CLEANUP-CPS-NEXT: [[RETURN_ADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[CALLED_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 @@ -353,90 +318,43 @@ attributes #1 = { alwaysinline } ; CLEANUP-CPS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[SYSTEM_DATA_FCA_0_EXTRACT]], 0 ; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT4:%.*]] = insertvalue [1 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 ; CLEANUP-CPS-NEXT: [[TMP0:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @called.resume.0) -; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 2, i32 4, {} poison, i64 [[TMP0]], i32 5, [9 x i32] poison, [1 x i32] [[DOTFCA_0_INSERT4]]), !continuation.registercount [[META14:![0-9]+]], !continuation.returnedRegistercount [[META14]] +; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 2, i32 4, {} poison, i64 [[TMP0]], i32 5, [17 x i32] poison, [1 x i32] [[DOTFCA_0_INSERT4]]), !continuation.registercount [[META14:![0-9]+]], !continuation.returnedRegistercount [[META14]] ; CLEANUP-CPS-NEXT: unreachable ; ; ; CLEANUP-CPS-LABEL: define dso_local void @called.resume.0( -; CLEANUP-CPS-SAME: {} [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], { [[STRUCT_DISPATCHSYSTEMDATA:%.*]], [8 x i32], [1 x i32] } [[TMP3:%.*]]) !lgc.rt.shaderstage [[META16]] !lgc.cps [[META17]] !continuation [[META18]] { +; CLEANUP-CPS-SAME: {} [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], { [[STRUCT_DISPATCHSYSTEMDATA:%.*]], [16 x i32], [1 x i32] } [[TMP3:%.*]]) !lgc.rt.shaderstage [[META16]] !lgc.cps [[META17]] !continuation [[META18]] { ; CLEANUP-CPS-NEXT: entryresume.0: -; CLEANUP-CPS-NEXT: [[TMP4:%.*]] = alloca { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [1 x i32] }, align 8 -; CLEANUP-CPS-NEXT: store { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [1 x i32] } [[TMP3]], ptr [[TMP4]], align 4 +; CLEANUP-CPS-NEXT: [[TMP4:%.*]] = alloca { [[STRUCT_DISPATCHSYSTEMDATA]], [16 x i32], [1 x i32] }, align 8 +; CLEANUP-CPS-NEXT: store { [[STRUCT_DISPATCHSYSTEMDATA]], [16 x i32], [1 x i32] } [[TMP3]], ptr [[TMP4]], align 4 ; CLEANUP-CPS-NEXT: [[TMP5:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 8) -; CLEANUP-CPS-NEXT: [[TMP6:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [1 x i32] } [[TMP3]], 2 +; CLEANUP-CPS-NEXT: [[TMP6:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [16 x i32], [1 x i32] } [[TMP3]], 2 ; CLEANUP-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [1 x i32] [[TMP6]], 0 -; CLEANUP-CPS-NEXT: [[TMP7:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [1 x i32] } [[TMP3]], 0 +; CLEANUP-CPS-NEXT: [[TMP7:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [16 x i32], [1 x i32] } [[TMP3]], 0 ; CLEANUP-CPS-NEXT: [[DOTFCA_0_EXTRACT10:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP7]], 0 ; CLEANUP-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; CLEANUP-CPS-NEXT: [[RETURN_ADDR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[CALLED_FRAME:%.*]], ptr addrspace(32) [[TMP5]], i32 0, i32 0 ; CLEANUP-CPS-NEXT: [[RETURN_ADDR_RELOAD:%.*]] = load i32, ptr addrspace(32) [[RETURN_ADDR_RELOAD_ADDR]], align 4 -; CLEANUP-CPS-NEXT: [[TMP8:%.*]] = getelementptr inbounds { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [1 x i32] }, ptr [[TMP4]], i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[TMP8:%.*]] = getelementptr inbounds { [[STRUCT_DISPATCHSYSTEMDATA]], [16 x i32], [1 x i32] }, ptr [[TMP4]], i32 0, i32 0 ; CLEANUP-CPS-NEXT: [[TMP9:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[TMP8]]) ; CLEANUP-CPS-NEXT: [[A:%.*]] = extractelement <3 x i32> [[TMP9]], i8 0 -; CLEANUP-CPS-NEXT: [[TMP10:%.*]] = getelementptr inbounds { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [1 x i32] }, ptr [[TMP4]], i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[TMP10:%.*]] = getelementptr inbounds { [[STRUCT_DISPATCHSYSTEMDATA]], [16 x i32], [1 x i32] }, ptr [[TMP4]], i32 0, i32 0 ; CLEANUP-CPS-NEXT: [[TMP11:%.*]] = call <3 x i32> @_cont_DispatchRaysDimensions3(ptr [[TMP10]]) ; CLEANUP-CPS-NEXT: [[B:%.*]] = extractelement <3 x i32> [[TMP11]], i8 0 ; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT9:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT10]], 0 ; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [1 x i32] poison, i32 [[DOTFCA_0_EXTRACT]], 0 ; CLEANUP-CPS-NEXT: call void @lgc.cps.free(i32 8) -; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR_RELOAD]], i32 6, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT9]], [8 x i32] poison, [1 x i32] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META14]] +; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR_RELOAD]], i32 6, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT9]], [16 x i32] poison, [1 x i32] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META14]] ; CLEANUP-CPS-NEXT: unreachable ; ; -; REGISTERBUFFER-CPS-LABEL: define i32 @_cont_GetLocalRootIndex( -; REGISTERBUFFER-CPS-SAME: ptr [[DATA:%.*]]) #[[ATTR0:[0-9]+]] { -; REGISTERBUFFER-CPS-NEXT: ret i32 5 -; -; -; REGISTERBUFFER-CPS-LABEL: define void @called( -; REGISTERBUFFER-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURNADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [8 x i32] [[PADDING:%.*]], [1 x i32] [[PAYLOAD:%.*]]) !continuation [[META16:![0-9]+]] !lgc.rt.shaderstage [[META17:![0-9]+]] !lgc.cps [[META18:![0-9]+]] { -; REGISTERBUFFER-CPS-NEXT: AllocaSpillBB: -; REGISTERBUFFER-CPS-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) -; REGISTERBUFFER-CPS-NEXT: [[RETURN_ADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[CALLED_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: store i32 [[RETURNADDR]], ptr addrspace(32) [[RETURN_ADDR_SPILL_ADDR]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [1 x i32] [[PAYLOAD]], 0 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[SYSTEM_DATA]], 0 -; REGISTERBUFFER-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) -; REGISTERBUFFER-CPS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[SYSTEM_DATA_FCA_0_EXTRACT]], 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT4:%.*]] = insertvalue [1 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP0:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @called.resume.0) -; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 2, i32 4, {} poison, i64 [[TMP0]], i32 5, [9 x i32] poison, [1 x i32] [[DOTFCA_0_INSERT4]]), !continuation.registercount [[META14:![0-9]+]], !continuation.returnedRegistercount [[META14]] -; REGISTERBUFFER-CPS-NEXT: unreachable -; -; -; REGISTERBUFFER-CPS-LABEL: define dso_local void @called.resume.0( -; REGISTERBUFFER-CPS-SAME: {} [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], { [[STRUCT_DISPATCHSYSTEMDATA:%.*]], [8 x i32], [1 x i32] } [[TMP3:%.*]]) !continuation [[META16]] !lgc.rt.shaderstage [[META17]] !lgc.cps [[META18]] { -; REGISTERBUFFER-CPS-NEXT: entryresume.0: -; REGISTERBUFFER-CPS-NEXT: [[TMP4:%.*]] = alloca { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [1 x i32] }, align 8 -; REGISTERBUFFER-CPS-NEXT: store { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [1 x i32] } [[TMP3]], ptr [[TMP4]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[TMP5:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 8) -; REGISTERBUFFER-CPS-NEXT: [[TMP6:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [1 x i32] } [[TMP3]], 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [1 x i32] [[TMP6]], 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP7:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [1 x i32] } [[TMP3]], 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT10:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP7]], 0 -; REGISTERBUFFER-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; REGISTERBUFFER-CPS-NEXT: [[RETURN_ADDR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[CALLED_FRAME:%.*]], ptr addrspace(32) [[TMP5]], i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[RETURN_ADDR_RELOAD:%.*]] = load i32, ptr addrspace(32) [[RETURN_ADDR_RELOAD_ADDR]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[TMP8:%.*]] = getelementptr inbounds { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [1 x i32] }, ptr [[TMP4]], i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP9:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[TMP8]]) -; REGISTERBUFFER-CPS-NEXT: [[A:%.*]] = extractelement <3 x i32> [[TMP9]], i8 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP10:%.*]] = getelementptr inbounds { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [1 x i32] }, ptr [[TMP4]], i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP11:%.*]] = call <3 x i32> @_cont_DispatchRaysDimensions3(ptr [[TMP10]]) -; REGISTERBUFFER-CPS-NEXT: [[B:%.*]] = extractelement <3 x i32> [[TMP11]], i8 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT9:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT10]], 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [1 x i32] poison, i32 [[DOTFCA_0_EXTRACT]], 0 -; REGISTERBUFFER-CPS-NEXT: call void @lgc.cps.free(i32 8) -; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR_RELOAD]], i32 6, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT9]], [8 x i32] poison, [1 x i32] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META14]] -; REGISTERBUFFER-CPS-NEXT: unreachable -; -; ; POSTPROCESS-CPS-LABEL: define i32 @_cont_GetLocalRootIndex( ; POSTPROCESS-CPS-SAME: ptr [[DATA:%.*]]) #[[ATTR0:[0-9]+]] { ; POSTPROCESS-CPS-NEXT: ret i32 5 ; ; ; POSTPROCESS-CPS-LABEL: define void @called( -; POSTPROCESS-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[CSPINIT:%.*]], i32 [[RETURNADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [8 x i32] [[PADDING:%.*]], [1 x i32] [[PAYLOAD:%.*]]) !continuation [[META16:![0-9]+]] !lgc.rt.shaderstage [[META17:![0-9]+]] !lgc.cps [[META18:![0-9]+]] { +; POSTPROCESS-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[CSPINIT:%.*]], i32 [[RETURNADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [16 x i32] [[PADDING:%.*]], [1 x i32] [[PAYLOAD:%.*]]) !continuation [[META16:![0-9]+]] !lgc.rt.shaderstage [[META17:![0-9]+]] !lgc.cps [[META18:![0-9]+]] { ; POSTPROCESS-CPS-NEXT: AllocaSpillBB: ; POSTPROCESS-CPS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; POSTPROCESS-CPS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 @@ -453,31 +371,31 @@ attributes #1 = { alwaysinline } ; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT4:%.*]] = insertvalue [1 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 ; POSTPROCESS-CPS-NEXT: [[TMP5:%.*]] = call i64 @continuation.getAddrAndMD(ptr @called.resume.0) ; POSTPROCESS-CPS-NEXT: [[TMP4:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 2, i32 [[TMP4]], i64 [[TMP5]], i32 5, [9 x i32] poison, [1 x i32] [[DOTFCA_0_INSERT4]]), !continuation.registercount [[META14:![0-9]+]], !continuation.returnedRegistercount [[META14]] +; POSTPROCESS-CPS-NEXT: call void (...) @lgc.ilcps.continue(i64 2, i32 [[TMP4]], i64 [[TMP5]], i32 5, [17 x i32] poison, [1 x i32] [[DOTFCA_0_INSERT4]]) ; POSTPROCESS-CPS-NEXT: unreachable ; ; ; POSTPROCESS-CPS-LABEL: define dso_local void @called.resume.0( -; POSTPROCESS-CPS-SAME: {} [[TMP0:%.*]], i32 [[CSPINIT:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], { [[STRUCT_DISPATCHSYSTEMDATA:%.*]], [8 x i32], [1 x i32] } [[TMP3:%.*]]) !continuation [[META16]] !lgc.rt.shaderstage [[META17]] !lgc.cps [[META18]] { +; POSTPROCESS-CPS-SAME: {} [[TMP0:%.*]], i32 [[CSPINIT:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], { [[STRUCT_DISPATCHSYSTEMDATA:%.*]], [16 x i32], [1 x i32] } [[TMP3:%.*]]) !continuation [[META16]] !lgc.rt.shaderstage [[META17]] !lgc.cps [[META18]] { ; POSTPROCESS-CPS-NEXT: entryresume.0: -; POSTPROCESS-CPS-NEXT: [[TMP4:%.*]] = alloca { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [1 x i32] }, align 8 +; POSTPROCESS-CPS-NEXT: [[TMP4:%.*]] = alloca { [[STRUCT_DISPATCHSYSTEMDATA]], [16 x i32], [1 x i32] }, align 8 ; POSTPROCESS-CPS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; POSTPROCESS-CPS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: store { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [1 x i32] } [[TMP3]], ptr [[TMP4]], align 4 +; POSTPROCESS-CPS-NEXT: store { [[STRUCT_DISPATCHSYSTEMDATA]], [16 x i32], [1 x i32] } [[TMP3]], ptr [[TMP4]], align 4 ; POSTPROCESS-CPS-NEXT: [[TMP5:%.*]] = load i32, ptr [[CSP]], align 4 ; POSTPROCESS-CPS-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], -8 -; POSTPROCESS-CPS-NEXT: [[TMP7:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [1 x i32] } [[TMP3]], 2 +; POSTPROCESS-CPS-NEXT: [[TMP7:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [16 x i32], [1 x i32] } [[TMP3]], 2 ; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [1 x i32] [[TMP7]], 0 -; POSTPROCESS-CPS-NEXT: [[TMP8:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [1 x i32] } [[TMP3]], 0 +; POSTPROCESS-CPS-NEXT: [[TMP8:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [16 x i32], [1 x i32] } [[TMP3]], 0 ; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT10:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP8]], 0 ; POSTPROCESS-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; POSTPROCESS-CPS-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr addrspace(21) ; POSTPROCESS-CPS-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP9]], i32 0 ; POSTPROCESS-CPS-NEXT: [[RETURN_ADDR_RELOAD:%.*]] = load i32, ptr addrspace(21) [[TMP10]], align 4 -; POSTPROCESS-CPS-NEXT: [[TMP11:%.*]] = getelementptr inbounds { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [1 x i32] }, ptr [[TMP4]], i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: [[TMP11:%.*]] = getelementptr inbounds { [[STRUCT_DISPATCHSYSTEMDATA]], [16 x i32], [1 x i32] }, ptr [[TMP4]], i32 0, i32 0 ; POSTPROCESS-CPS-NEXT: [[TMP12:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[TMP11]]) ; POSTPROCESS-CPS-NEXT: [[A:%.*]] = extractelement <3 x i32> [[TMP12]], i8 0 -; POSTPROCESS-CPS-NEXT: [[TMP13:%.*]] = getelementptr inbounds { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [1 x i32] }, ptr [[TMP4]], i32 0, i32 0 +; POSTPROCESS-CPS-NEXT: [[TMP13:%.*]] = getelementptr inbounds { [[STRUCT_DISPATCHSYSTEMDATA]], [16 x i32], [1 x i32] }, ptr [[TMP4]], i32 0, i32 0 ; POSTPROCESS-CPS-NEXT: [[TMP14:%.*]] = call <3 x i32> @_cont_DispatchRaysDimensions3(ptr [[TMP13]]) ; POSTPROCESS-CPS-NEXT: [[B:%.*]] = extractelement <3 x i32> [[TMP14]], i8 0 ; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT9:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT10]], 0 @@ -487,6 +405,6 @@ attributes #1 = { alwaysinline } ; POSTPROCESS-CPS-NEXT: store i32 [[TMP16]], ptr [[CSP]], align 4 ; POSTPROCESS-CPS-NEXT: [[TMP17:%.*]] = zext i32 [[RETURN_ADDR_RELOAD]] to i64 ; POSTPROCESS-CPS-NEXT: [[TMP18:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP17]], i32 [[TMP18]], i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT9]], [8 x i32] poison, [1 x i32] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META14]] +; POSTPROCESS-CPS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[TMP17]], i32 [[TMP18]], i64 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT9]], [16 x i32] poison, [1 x i32] [[DOTFCA_0_INSERT]]) ; POSTPROCESS-CPS-NEXT: unreachable ; diff --git a/llvmraytracing/test/dx/lower-rt-pipeline-small-payload-field.ll b/llvmraytracing/test/dx/lower-rt-pipeline-small-payload-field.ll index 9668194c58..c4d6f2a287 100644 --- a/llvmraytracing/test/dx/lower-rt-pipeline-small-payload-field.ll +++ b/llvmraytracing/test/dx/lower-rt-pipeline-small-payload-field.ll @@ -2,7 +2,7 @@ ; Test copying of fields between local and global payload whose size ; is not a multiple of i32s, requiring copies at a smaller granularity ; for at least a suffix of the fields. -; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,remove-types-metadata' -S %s --lint-abort-on-error | FileCheck %s +; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,continuations-lint,remove-types-metadata' -S %s --lint-abort-on-error | FileCheck %s target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:16-i32:32-i64:32-f16:16-f32:32-f64:32-v8:8-v16:16-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" @@ -26,22 +26,19 @@ target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16: %struct.BuiltInTriangleIntersectionAttributes = type { <2 x float> } ; Function Attrs: nounwind -define void @MissPAQ(%struct.PAQPayload* noalias nocapture %payload) #0 !types !17 { +define void @MissPAQ(%struct.PAQPayload* noalias nocapture %payload) #0 !pointeetys !17 { %1 = getelementptr inbounds %struct.PAQPayload, %struct.PAQPayload* %payload, i32 0, i32 1 store i16 17, i16* %1, align 4 ret void } ; Function Attrs: nounwind -define void @MissNoPAQ(%struct.NoPAQPayload* noalias nocapture %payload) #0 !types !31 { +define void @MissNoPAQ(%struct.NoPAQPayload* noalias nocapture %payload) #0 !pointeetys !31 { %1 = getelementptr inbounds %struct.NoPAQPayload, %struct.NoPAQPayload* %payload, i32 0, i32 1 store i16 17, i16* %1, align 4 ret void } -; Function Attrs: alwaysinline -declare %struct.DispatchSystemData @_cont_SetupRayGen() #1 - ; Function Attrs: alwaysinline declare %struct.DispatchSystemData @_AmdAwaitTraversal(i64, %struct.TraversalData) #1 @@ -52,28 +49,30 @@ declare %struct.DispatchSystemData @_AmdAwaitShader(i64, %struct.DispatchSystemD declare %struct.AnyHitTraversalData @_AmdAwaitAnyHit(i64, %struct.AnyHitTraversalData, float, i32) #1 ; Function Attrs: alwaysinline -declare !types !19 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData*) #1 +declare !pointeetys !19 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData*) #1 ; Function Attrs: alwaysinline -declare !types !21 void @_cont_SetTriangleHitAttributes(%struct.SystemData*, %struct.BuiltInTriangleIntersectionAttributes) #1 +declare !pointeetys !21 void @_cont_SetTriangleHitAttributes(%struct.SystemData*, %struct.BuiltInTriangleIntersectionAttributes) #1 ; Function Attrs: alwaysinline -declare !types !22 i1 @_cont_IsEndSearch(%struct.TraversalData*) #1 +declare !pointeetys !22 i1 @_cont_IsEndSearch(%struct.TraversalData*) #1 ; Function Attrs: nounwind memory(read) -declare !types !24 i32 @_cont_HitKind(%struct.SystemData* nocapture readnone, %struct.HitData*) #2 +declare !pointeetys !24 i32 @_cont_HitKind(%struct.SystemData* nocapture readnone, %struct.HitData*) #2 ; Function Attrs: nounwind memory(none) -declare !types !26 void @_AmdRestoreSystemData(%struct.DispatchSystemData*) #3 +declare !pointeetys !26 void @_AmdRestoreSystemData(%struct.DispatchSystemData*) #3 ; Function Attrs: nounwind memory(none) -declare !types !28 void @_AmdRestoreSystemDataAnyHit(%struct.AnyHitTraversalData*) #3 +declare !pointeetys !28 void @_AmdRestoreSystemDataAnyHit(%struct.AnyHitTraversalData*) #3 ; Function Attrs: alwaysinline -define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) #1 !types !30 { +define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) #1 !pointeetys !30 { ret i32 5 } +declare !pointeetys !31 i1 @_cont_ReportHit(%struct.AnyHitTraversalData* %data, float %t, i32 %hitKind) + attributes #0 = { nounwind } attributes #1 = { alwaysinline } attributes #2 = { nounwind memory(read) } @@ -104,131 +103,160 @@ attributes #3 = { nounwind memory(none) } !14 = !{void (%struct.PAQPayload*)* @MissPAQ, !"MissPAQ", null, null, !15} !15 = !{i32 8, i32 11, i32 6, i32 24, i32 5, !16} !16 = !{i32 0} -!17 = !{!"function", !"void", !18} +!17 = !{%struct.PAQPayload poison} !18 = !{i32 0, %struct.PAQPayload poison} -!19 = !{!"function", %struct.BuiltInTriangleIntersectionAttributes poison, !20} +!19 = !{%struct.SystemData poison} !20 = !{i32 0, %struct.SystemData poison} -!21 = !{!"function", !"void", !20, %struct.BuiltInTriangleIntersectionAttributes poison} -!22 = !{!"function", i1 poison, !23} +!21 = !{%struct.SystemData poison} +!22 = !{%struct.TraversalData poison} !23 = !{i32 0, %struct.TraversalData poison} -!24 = !{!"function", i32 poison, !20, !25} +!24 = !{null, %struct.SystemData poison, %struct.HitData poison} !25 = !{i32 0, %struct.HitData poison} -!26 = !{!"function", !"void", !27} +!26 = !{%struct.DispatchSystemData poison} !27 = !{i32 0, %struct.DispatchSystemData poison} -!28 = !{!"function", !"void", !29} +!28 = !{%struct.AnyHitTraversalData poison} !29 = !{i32 0, %struct.AnyHitTraversalData poison} -!30 = !{!"function", i32 poison, !27} -!31 = !{!"function", !"void", !32} +!30 = !{%struct.DispatchSystemData poison} +!31 = !{%struct.NoPAQPayload poison} !32 = !{i32 0, %struct.NoPAQPayload poison} !33 = !{void (%struct.NoPAQPayload*)* @MissNoPAQ, !"MissNoPAQ", null, null, !34} !34 = !{i32 8, i32 11, i32 6, i32 24, i32 5, !35} !35 = !{i32 0} ; CHECK-LABEL: define %struct.DispatchSystemData @MissPAQ( -; CHECK-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] !lgc.rt.shaderstage [[META21:![0-9]+]] !continuation.registercount [[META22:![0-9]+]] !continuation [[META23:![0-9]+]] { +; CHECK-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]], [16 x i32] [[PADDING:%.*]], [11 x i32] [[PAYLOAD:%.*]]) #[[ATTR0:[0-9]+]] !lgc.rt.shaderstage [[META21:![0-9]+]] !continuation.registercount [[META22:![0-9]+]] !continuation [[META23:![0-9]+]] { ; CHECK-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 +; CHECK-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [11 x i32], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_PAYLOAD:%.*]], align 8 +; CHECK-NEXT: store [11 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; CHECK-NEXT: store [[STRUCT_SYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_PAYLOAD]], ptr [[TMP2]], i32 0, i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; CHECK-NEXT: store i32 [[TMP7]], ptr [[TMP4]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 1 -; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 4 ; CHECK-NEXT: store i32 [[TMP10]], ptr [[TMP8]], align 4 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 1 +; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP19]], align 4 ; CHECK-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 4 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 2 -; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 2 +; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP25]], align 4 ; CHECK-NEXT: store i32 [[TMP14]], ptr [[TMP13]], align 4 ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 3 -; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 10), align 4 +; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 3 +; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP34]], align 4 ; CHECK-NEXT: store i32 [[TMP16]], ptr [[TMP15]], align 4 ; CHECK-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_PAYLOAD]], ptr [[TMP2]], i32 0, i32 1 ; CHECK-NEXT: store i16 17, ptr [[TMP17]], align 4 ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_PAYLOAD]], ptr [[TMP2]], i32 0, i32 1 +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 1 ; CHECK-NEXT: [[TMP21:%.*]] = load i8, ptr [[TMP18]], align 1 -; CHECK-NEXT: store i8 [[TMP21]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 1), align 1 +; CHECK-NEXT: store i8 [[TMP21]], ptr [[TMP20]], align 1 +; CHECK-NEXT: [[TMP35:%.*]] = getelementptr i8, ptr [[TMP20]], i32 1 ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[TMP18]], i32 1 ; CHECK-NEXT: [[TMP23:%.*]] = load i8, ptr [[TMP22]], align 1 -; CHECK-NEXT: store i8 [[TMP23]], ptr addrspace(20) getelementptr (i8, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 1), i32 1), align 1 +; CHECK-NEXT: store i8 [[TMP23]], ptr [[TMP35]], align 1 ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_PAYLOAD]], ptr [[TMP2]], i32 0, i32 2 +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 2 ; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP24]], align 4 -; CHECK-NEXT: store i32 [[TMP27]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 2), align 4 +; CHECK-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4 +; CHECK-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr [[TMP26]], i32 4 ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[TMP24]], i32 4 ; CHECK-NEXT: [[TMP29:%.*]] = load i8, ptr [[TMP28]], align 1 -; CHECK-NEXT: store i8 [[TMP29]], ptr addrspace(20) getelementptr (i8, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 2), i32 4), align 1 +; CHECK-NEXT: store i8 [[TMP29]], ptr [[TMP37]], align 1 +; CHECK-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr [[TMP26]], i32 5 ; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[TMP24]], i32 5 ; CHECK-NEXT: [[TMP31:%.*]] = load i8, ptr [[TMP30]], align 1 -; CHECK-NEXT: store i8 [[TMP31]], ptr addrspace(20) getelementptr (i8, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 2), i32 5), align 1 +; CHECK-NEXT: store i8 [[TMP31]], ptr [[TMP38]], align 1 ; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; CHECK-NEXT: [[TMP33:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP32]], align 4 -; CHECK-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP33]]), !continuation.registercount [[META22]] +; CHECK-NEXT: [[TMP36:%.*]] = load [11 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; CHECK-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP33]], [16 x i32] poison, [11 x i32] [[TMP36]]), !continuation.registercount [[META22]] ; CHECK-NEXT: unreachable ; ; ; CHECK-LABEL: define %struct.DispatchSystemData @MissNoPAQ( -; CHECK-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR0]] !lgc.rt.shaderstage [[META21]] !continuation.registercount [[META19:![0-9]+]] !continuation [[META24:![0-9]+]] { +; CHECK-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]], [16 x i32] [[PADDING:%.*]], [14 x i32] [[PAYLOAD:%.*]]) #[[ATTR0]] !lgc.rt.shaderstage [[META21]] !continuation.registercount [[META19:![0-9]+]] !continuation [[META24:![0-9]+]] { ; CHECK-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 +; CHECK-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [14 x i32], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_NOPAQPAYLOAD:%.*]], align 8 +; CHECK-NEXT: store [14 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; CHECK-NEXT: store [[STRUCT_SYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_NOPAQPAYLOAD]], ptr [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; CHECK-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 -; CHECK-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP17]], align 4 +; CHECK-NEXT: store i32 [[TMP9]], ptr [[TMP6]], align 4 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 1 -; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 -; CHECK-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 1 +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP23]], align 4 +; CHECK-NEXT: store i32 [[TMP11]], ptr [[TMP8]], align 4 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 2 -; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 -; CHECK-NEXT: store i32 [[TMP11]], ptr [[TMP10]], align 4 +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 2 +; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP26]], align 4 +; CHECK-NEXT: store i32 [[TMP15]], ptr [[TMP10]], align 4 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 3 -; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 10), align 4 -; CHECK-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 3 +; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP16]], align 4 +; CHECK-NEXT: store i32 [[TMP19]], ptr [[TMP12]], align 4 ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 4 -; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 11), align 4 -; CHECK-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 4 +; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP28]], align 4 +; CHECK-NEXT: store i32 [[TMP21]], ptr [[TMP14]], align 4 ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 5 -; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 12), align 4 -; CHECK-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 5 +; CHECK-NEXT: [[TMP54:%.*]] = load i32, ptr [[TMP22]], align 4 +; CHECK-NEXT: store i32 [[TMP54]], ptr [[TMP18]], align 4 ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 6 -; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 13), align 4 -; CHECK-NEXT: store i32 [[TMP21]], ptr [[TMP20]], align 4 +; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 6 +; CHECK-NEXT: [[TMP55:%.*]] = load i32, ptr [[TMP31]], align 4 +; CHECK-NEXT: store i32 [[TMP55]], ptr [[TMP20]], align 4 ; CHECK-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_NOPAQPAYLOAD]], ptr [[TMP2]], i32 0, i32 1 ; CHECK-NEXT: store i16 17, ptr [[TMP24]], align 4 ; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_NOPAQPAYLOAD]], ptr [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -; CHECK-NEXT: store i32 [[TMP26]], ptr addrspace(20) @PAYLOAD, align 4 +; CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP25]], align 4 +; CHECK-NEXT: store i32 [[TMP29]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 ; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[TMP25]], i32 1 -; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -; CHECK-NEXT: store i32 [[TMP28]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 -; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i32 1 -; CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -; CHECK-NEXT: store i32 [[TMP30]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 -; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i32 2 -; CHECK-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 -; CHECK-NEXT: store i32 [[TMP32]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 -; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i32 3 -; CHECK-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -; CHECK-NEXT: store i32 [[TMP34]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 10), align 4 -; CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i32 4 -; CHECK-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 -; CHECK-NEXT: store i32 [[TMP40]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 11), align 4 -; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i32 5 +; CHECK-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP27]], align 4 +; CHECK-NEXT: store i32 [[TMP32]], ptr [[TMP30]], align 4 +; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[TMP30]], i32 1 +; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i32 1 +; CHECK-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +; CHECK-NEXT: store i32 [[TMP35]], ptr [[TMP33]], align 4 +; CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, ptr [[TMP30]], i32 2 +; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i32 2 ; CHECK-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 -; CHECK-NEXT: store i32 [[TMP38]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 12), align 4 -; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i32 6 -; CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 -; CHECK-NEXT: store i32 [[TMP36]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 13), align 4 +; CHECK-NEXT: store i32 [[TMP38]], ptr [[TMP36]], align 4 +; CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds i32, ptr [[TMP30]], i32 3 +; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i32 3 +; CHECK-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4 +; CHECK-NEXT: store i32 [[TMP41]], ptr [[TMP39]], align 4 +; CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds i32, ptr [[TMP30]], i32 4 +; CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i32 4 +; CHECK-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 +; CHECK-NEXT: store i32 [[TMP44]], ptr [[TMP42]], align 4 +; CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, ptr [[TMP30]], i32 5 +; CHECK-NEXT: [[TMP52:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i32 5 +; CHECK-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP52]], align 4 +; CHECK-NEXT: store i32 [[TMP47]], ptr [[TMP51]], align 4 +; CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, ptr [[TMP30]], i32 6 +; CHECK-NEXT: [[TMP49:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i32 6 +; CHECK-NEXT: [[TMP50:%.*]] = load i32, ptr [[TMP49]], align 4 +; CHECK-NEXT: store i32 [[TMP50]], ptr [[TMP48]], align 4 ; CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; CHECK-NEXT: [[TMP46:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP45]], align 4 -; CHECK-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP46]]), !continuation.registercount [[META19]] +; CHECK-NEXT: [[TMP53:%.*]] = load [14 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; CHECK-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP46]], [16 x i32] poison, [14 x i32] [[TMP53]]), !continuation.registercount [[META19]] ; CHECK-NEXT: unreachable ; ; diff --git a/llvmraytracing/test/dx/lower-rt-pipeline.ll b/llvmraytracing/test/dx/lower-rt-pipeline.ll index f097e2e7f6..b34786a0f1 100644 --- a/llvmraytracing/test/dx/lower-rt-pipeline.ll +++ b/llvmraytracing/test/dx/lower-rt-pipeline.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 3 ; RUN: grep -v lgc.cps.module %s | opt --verify-each -passes="dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,remove-types-metadata" -S --lint-abort-on-error | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE %s ; RUN: opt --verify-each -passes="dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,remove-types-metadata" -S %s --lint-abort-on-error | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE-CPS %s -; RUN: opt --verify-each -passes="dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,dxil-cleanup-continuations,lint,register-buffer,lint,remove-types-metadata" -S %s --lint-abort-on-error | FileCheck -check-prefix=REGISTERBUFFER-CPS %s -; RUN: grep -v lgc.cps.module %s | opt --verify-each -passes="dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata" -S --lint-abort-on-error | FileCheck -check-prefix=POSTPROCESS %s -; RUN: opt --verify-each -passes="dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,dxil-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata" -S %s --lint-abort-on-error | FileCheck -check-prefix=POSTPROCESS-CPS %s +; RUN: grep -v lgc.cps.module %s | opt --verify-each -passes="dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,dxil-cont-post-process,lint,remove-types-metadata" -S --lint-abort-on-error | FileCheck -check-prefix=POSTPROCESS %s +; RUN: opt --verify-each -passes="dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,dxil-cleanup-continuations,lint,remove-types-metadata" -S %s --lint-abort-on-error | FileCheck -check-prefix=CLEANUP-CPS %s +; RUN: opt --verify-each -passes="dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,dxil-cleanup-continuations,lint,dxil-cont-post-process,lint,remove-types-metadata" -S %s --lint-abort-on-error | FileCheck -check-prefix=POSTPROCESS-CPS %s target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:16-i32:32-i64:32-f16:16-f32:32-f64:32-v8:8-v16:16-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" @@ -27,69 +27,67 @@ define i32 @_cont_GetContinuationStackAddr() #0 { ret i32 0 } -define void @_cont_ExitRayGen(ptr nocapture readonly %data) alwaysinline nounwind !types !{!"function", !"void", !{i32 0, %struct.DispatchSystemData poison}} { +define void @_cont_ExitRayGen(ptr nocapture readonly %data) alwaysinline nounwind !pointeetys !{%struct.DispatchSystemData poison} { ret void } -declare %struct.DispatchSystemData @_cont_SetupRayGen() #0 - declare %struct.DispatchSystemData @_AmdAwaitTraversal(i64, %struct.TraversalData) #0 declare %struct.DispatchSystemData @_AmdAwaitShader(i64, %struct.DispatchSystemData) #0 declare %struct.AnyHitTraversalData @_AmdAwaitAnyHit(i64, %struct.AnyHitTraversalData, float, i32) #0 -define %struct.HitData @_cont_GetCandidateState(%struct.AnyHitTraversalData* %data) #0 !types !32 { +define %struct.HitData @_cont_GetCandidateState(%struct.AnyHitTraversalData* %data) #0 !pointeetys !32 { %resPtr = getelementptr %struct.AnyHitTraversalData, %struct.AnyHitTraversalData* %data, i32 0, i32 0 %res = load %struct.HitData, %struct.HitData* %resPtr, align 4 ret %struct.HitData %res } -declare !types !34 %struct.HitData @_cont_GetCommittedState(%struct.SystemData*) #0 +declare !pointeetys !34 %struct.HitData @_cont_GetCommittedState(%struct.SystemData*) #0 -declare !types !36 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData*) #0 +declare !pointeetys !36 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData*) #0 -define void @_cont_SetTriangleHitAttributes(%struct.SystemData* %data, %struct.BuiltInTriangleIntersectionAttributes %val) !types !37 { +define void @_cont_SetTriangleHitAttributes(%struct.SystemData* %data, %struct.BuiltInTriangleIntersectionAttributes %val) !pointeetys !37 { %addr = getelementptr %struct.SystemData, %struct.SystemData* %data, i32 0, i32 0 store %struct.BuiltInTriangleIntersectionAttributes %val, %struct.BuiltInTriangleIntersectionAttributes* %addr, align 4 ret void } -define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) !types !38 { +define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) !pointeetys !38 { ret i32 5 } declare i1 @opaqueIsEnd() -define i1 @_cont_IsEndSearch(%struct.TraversalData*) #0 !types !40 { +define i1 @_cont_IsEndSearch(%struct.TraversalData*) #0 !pointeetys !40 { %isEnd = call i1 @opaqueIsEnd() ret i1 %isEnd } -declare !types !42 i32 @_cont_HitKind(%struct.SystemData*) #0 +declare !pointeetys !42 i32 @_cont_HitKind(%struct.SystemData*) #0 ; Function Attrs: nounwind declare i64 @_AmdGetResumePointAddr() #1 ; Function Attrs: nounwind -declare !types !43 void @_AmdRestoreSystemData(%struct.DispatchSystemData*) #1 +declare !pointeetys !43 void @_AmdRestoreSystemData(%struct.DispatchSystemData*) #1 ; Function Attrs: nounwind -declare !types !44 void @_AmdRestoreSystemDataAnyHit(%struct.AnyHitTraversalData*) #1 +declare !pointeetys !44 void @_AmdRestoreSystemDataAnyHit(%struct.AnyHitTraversalData*) #1 ; Function Attrs: nounwind -declare !types !43 void @_cont_AcceptHitAndEndSearch(%struct.DispatchSystemData* nocapture readnone) #1 +declare !pointeetys !43 void @_cont_AcceptHitAndEndSearch(%struct.DispatchSystemData* nocapture readnone) #1 ; Function Attrs: nounwind -declare !types !44 void @_cont_AcceptHit(%struct.AnyHitTraversalData* nocapture readnone) #1 +declare !pointeetys !44 void @_cont_AcceptHit(%struct.AnyHitTraversalData* nocapture readnone) #1 ; Function Attrs: nounwind -declare !types !43 void @_cont_IgnoreHit(%struct.DispatchSystemData* nocapture readnone) #1 +declare !pointeetys !43 void @_cont_IgnoreHit(%struct.DispatchSystemData* nocapture readnone) #1 ; Function Attrs: nounwind -declare !types !44 void @_AmdAcceptHitAttributes(%struct.AnyHitTraversalData* nocapture readnone) #1 +declare !pointeetys !44 void @_AmdAcceptHitAttributes(%struct.AnyHitTraversalData* nocapture readnone) #1 -define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, float %6, float %7, float %8, float %9, float %10, float %11, float %12, float %13) #0 !types !45 { +define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, float %6, float %7, float %8, float %9, float %10, float %11, float %12, float %13) #0 !pointeetys !45 { %dis_data = load %struct.DispatchSystemData, %struct.DispatchSystemData* %data, align 4 %sys_data = insertvalue %struct.SystemData undef, %struct.DispatchSystemData %dis_data, 0 %trav_data = insertvalue %struct.TraversalData undef, %struct.SystemData %sys_data, 0 @@ -101,7 +99,7 @@ define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i ret void } -define void @_cont_CallShader(%struct.DispatchSystemData* %data, i32 %0) #0 !types !46 { +define void @_cont_CallShader(%struct.DispatchSystemData* %data, i32 %0) #0 !pointeetys !46 { %dis_data = load %struct.DispatchSystemData, %struct.DispatchSystemData* %data, align 4 %newdata = call %struct.DispatchSystemData @_AmdAwaitShader(i64 2, %struct.DispatchSystemData %dis_data) store %struct.DispatchSystemData %newdata, %struct.DispatchSystemData* %data, align 4 @@ -109,7 +107,7 @@ define void @_cont_CallShader(%struct.DispatchSystemData* %data, i32 %0) #0 !typ ret void } -define i1 @_cont_ReportHit(%struct.AnyHitTraversalData* %data, float %t, i32 %hitKind) #0 !types !47 { +define i1 @_cont_ReportHit(%struct.AnyHitTraversalData* %data, float %t, i32 %hitKind) #0 !pointeetys !47 { %origTPtr = getelementptr inbounds %struct.AnyHitTraversalData, %struct.AnyHitTraversalData* %data, i32 0, i32 0, i32 4 %origT = load float, float* %origTPtr, align 4 %isNoHit = fcmp fast uge float %t, %origT @@ -128,7 +126,7 @@ isEnd: ; preds = %0 ret i1 false } -define <3 x i32> @_cont_DispatchRaysIndex3(%struct.DispatchSystemData* %data) !types !48 { +define <3 x i32> @_cont_DispatchRaysIndex3(%struct.DispatchSystemData* %data) !pointeetys !48 { %resPtr.1 = getelementptr %struct.DispatchSystemData, %struct.DispatchSystemData* %data, i32 0, i32 0, i32 0 %res.1 = load i32, i32* %resPtr.1, align 4 %resPtr.2 = getelementptr %struct.DispatchSystemData, %struct.DispatchSystemData* %data, i32 0, i32 0, i32 1 @@ -141,7 +139,7 @@ define <3 x i32> @_cont_DispatchRaysIndex3(%struct.DispatchSystemData* %data) !t ret <3 x i32> %val.2 } -define <3 x float> @_cont_ObjectRayOrigin3(%struct.DispatchSystemData* nocapture readnone %data, %struct.HitData* %hitData) !types !49 { +define <3 x float> @_cont_ObjectRayOrigin3(%struct.DispatchSystemData* nocapture readnone %data, %struct.HitData* %hitData) !pointeetys !49 { %resPtr.1 = getelementptr %struct.HitData, %struct.HitData* %hitData, i32 0, i32 0, i32 0 %res.1 = load float, float* %resPtr.1, align 4 %resPtr.2 = getelementptr %struct.HitData, %struct.HitData* %hitData, i32 0, i32 0, i32 1 @@ -154,7 +152,7 @@ define <3 x float> @_cont_ObjectRayOrigin3(%struct.DispatchSystemData* nocapture ret <3 x float> %val.2 } -define <3 x float> @_cont_ObjectRayDirection3(%struct.DispatchSystemData* nocapture readnone %data, %struct.HitData* %hitData) !types !49 { +define <3 x float> @_cont_ObjectRayDirection3(%struct.DispatchSystemData* nocapture readnone %data, %struct.HitData* %hitData) !pointeetys !49 { %resPtr.1 = getelementptr %struct.HitData, %struct.HitData* %hitData, i32 0, i32 1, i32 0 %res.1 = load float, float* %resPtr.1, align 4 %resPtr.2 = getelementptr %struct.HitData, %struct.HitData* %hitData, i32 0, i32 1, i32 1 @@ -167,7 +165,7 @@ define <3 x float> @_cont_ObjectRayDirection3(%struct.DispatchSystemData* nocapt ret <3 x float> %val.2 } -define float @_cont_RayTCurrent(%struct.DispatchSystemData* nocapture readnone %data, %struct.HitData* %hitData) !types !51 { +define float @_cont_RayTCurrent(%struct.DispatchSystemData* nocapture readnone %data, %struct.HitData* %hitData) !pointeetys !51 { %resPtr = getelementptr %struct.HitData, %struct.HitData* %hitData, i32 0, i32 2 %res = load float, float* %resPtr, align 4 ret float %res @@ -200,7 +198,7 @@ define void @MyRayGen() #2 { } ; Function Attrs: nounwind -define void @MyClosestHitShader(%struct.RayPayload* noalias nocapture %payload, %struct.BuiltInTriangleIntersectionAttributes* nocapture readonly %attr) #2 !types !55 { +define void @MyClosestHitShader(%struct.RayPayload* noalias nocapture %payload, %struct.BuiltInTriangleIntersectionAttributes* nocapture readonly %attr) #2 !pointeetys !55 { %1 = getelementptr inbounds %struct.BuiltInTriangleIntersectionAttributes, %struct.BuiltInTriangleIntersectionAttributes* %attr, i32 0, i32 0 %2 = load <2 x float>, <2 x float>* %1, align 4 %3 = extractelement <2 x float> %2, i32 0 @@ -217,7 +215,7 @@ define void @MyClosestHitShader(%struct.RayPayload* noalias nocapture %payload, } ; Function Attrs: nounwind -define void @MyAnyHitShader(%struct.RayPayload* noalias nocapture %payload, %struct.BuiltInTriangleIntersectionAttributes* nocapture readnone %attr) #2 !types !55 { +define void @MyAnyHitShader(%struct.RayPayload* noalias nocapture %payload, %struct.BuiltInTriangleIntersectionAttributes* nocapture readnone %attr) #2 !pointeetys !55 { %1 = getelementptr inbounds %struct.RayPayload, %struct.RayPayload* %payload, i32 0, i32 0 %2 = load <4 x float>, <4 x float>* %1, align 4 %3 = call float @dx.op.objectRayOrigin.f32(i32 149, i8 0) @@ -292,14 +290,14 @@ define void @MyIntersectionShader2() #2 { } ; Function Attrs: nounwind -define void @MyMissShader(%struct.RayPayload* noalias nocapture %payload) #2 !types !58 { +define void @MyMissShader(%struct.RayPayload* noalias nocapture %payload) #2 !pointeetys !58 { %1 = getelementptr inbounds %struct.RayPayload, %struct.RayPayload* %payload, i32 0, i32 0 store <4 x float> , <4 x float>* %1, align 4 ret void } ; Function Attrs: nounwind -declare !types !59 void @dx.op.traceRay.struct.RayPayload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.RayPayload*) #1 +declare !pointeetys !59 void @dx.op.traceRay.struct.RayPayload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.RayPayload*) #1 ; Function Attrs: nounwind declare void @dx.op.textureStore.f32(i32, %dx.types.Handle, i32, i32, i32, float, float, float, float, i8) #1 @@ -321,10 +319,10 @@ declare void @dx.op.acceptHitAndEndSearch(i32) #0 declare void @dx.op.ignoreHit(i32) #0 ; Function Attrs: nounwind -declare !types !60 i1 @dx.op.reportHit.struct.BuiltInTriangleIntersectionAttributes(i32, float, i32, %struct.BuiltInTriangleIntersectionAttributes*) #1 +declare !pointeetys !60 i1 @dx.op.reportHit.struct.BuiltInTriangleIntersectionAttributes(i32, float, i32, %struct.BuiltInTriangleIntersectionAttributes*) #1 ; Function Attrs: nounwind -declare !types !61 i1 @dx.op.reportHit.struct.BuiltInTriangleIntersectionAttributes2(i32, float, i32, %struct.BuiltInTriangleIntersectionAttributes2*) #1 +declare !pointeetys !61 i1 @dx.op.reportHit.struct.BuiltInTriangleIntersectionAttributes2(i32, float, i32, %struct.BuiltInTriangleIntersectionAttributes2*) #1 ; Function Attrs: nounwind memory(none) declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #3 @@ -333,10 +331,10 @@ declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types. declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) #4 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) -declare !types !63 void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #5 +declare !pointeetys !63 void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #5 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) -declare !types !63 void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #5 +declare !pointeetys !63 void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #5 attributes #0 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="0" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind } @@ -386,38 +384,38 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re !29 = !{void ()* @MyRayGen, !"MyRayGen", null, null, !30} !30 = !{i32 8, i32 7, i32 5, !22} !31 = !{void ()* @MyIntersectionShader2, !"MyIntersectionShader2", null, null, !26} -!32 = !{!"function", %struct.HitData poison, !33} +!32 = !{%struct.AnyHitTraversalData poison} !33 = !{i32 0, %struct.AnyHitTraversalData poison} -!34 = !{!"function", %struct.HitData poison, !35} +!34 = !{%struct.SystemData poison} !35 = !{i32 0, %struct.SystemData poison} -!36 = !{!"function", %struct.BuiltInTriangleIntersectionAttributes poison, !35} -!37 = !{!"function", !"void", !35, %struct.BuiltInTriangleIntersectionAttributes poison} -!38 = !{!"function", i32 poison, !39} +!36 = !{%struct.SystemData poison} +!37 = !{%struct.SystemData poison} +!38 = !{%struct.DispatchSystemData poison} !39 = !{i32 0, %struct.DispatchSystemData poison} -!40 = !{!"function", i1 poison, !41} +!40 = !{%struct.TraversalData poison} !41 = !{i32 0, %struct.TraversalData poison} -!42 = !{!"function", i32 poison, !35} -!43 = !{!"function", !"void", !39} -!44 = !{!"function", !"void", !33} -!45 = !{!"function", !"void", !39, i64 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison} -!46 = !{!"function", !"void", !39, i32 poison} -!47 = !{!"function", i1 poison, !33, float poison, i32 poison} -!48 = !{!"function", <3 x i32> poison, !39} -!49 = !{!"function", <3 x float> poison, !39, !50} +!42 = !{%struct.SystemData poison} +!43 = !{%struct.DispatchSystemData poison} +!44 = !{%struct.AnyHitTraversalData poison} +!45 = !{%struct.DispatchSystemData poison} +!46 = !{%struct.DispatchSystemData poison} +!47 = !{%struct.AnyHitTraversalData poison} +!48 = !{%struct.DispatchSystemData poison} +!49 = !{null, %struct.DispatchSystemData poison, %struct.HitData poison} !50 = !{i32 0, %struct.HitData poison} -!51 = !{!"function", float poison, !39, !50} +!51 = !{null, %struct.DispatchSystemData poison, %struct.HitData poison} !52 = !{!53, !53, i64 0} !53 = !{!"omnipotent char", !54, i64 0} !54 = !{!"Simple C/C++ TBAA"} -!55 = !{!"function", !"void", !56, !57} +!55 = !{null, %struct.RayPayload poison, %struct.BuiltInTriangleIntersectionAttributes poison} !56 = !{i32 0, %struct.RayPayload poison} !57 = !{i32 0, %struct.BuiltInTriangleIntersectionAttributes poison} -!58 = !{!"function", !"void", !56} -!59 = !{!"function", !"void", i32 poison, %dx.types.Handle poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, !56} -!60 = !{!"function", i1 poison, i32 poison, float poison, i32 poison, !57} -!61 = !{!"function", i1 poison, i32 poison, float poison, i32 poison, !62} +!58 = !{%struct.RayPayload poison} +!59 = !{%struct.RayPayload poison} +!60 = !{%struct.BuiltInTriangleIntersectionAttributes poison} +!61 = !{%struct.BuiltInTriangleIntersectionAttributes2 poison} !62 = !{i32 0, %struct.BuiltInTriangleIntersectionAttributes2 poison} -!63 = !{!"function", !"void", i64 poison, !64} +!63 = !{i8 poison} !64 = !{i32 0, i8 poison} ; LOWERRAYTRACINGPIPELINE-LABEL: define i32 @_cont_GetContinuationStackAddr( ; LOWERRAYTRACINGPIPELINE-SAME: ) #[[ATTR0:[0-9]+]] { @@ -501,6 +499,7 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-LABEL: define void @MyRayGen( ; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] !lgc.rt.shaderstage [[META22:![0-9]+]] !continuation.entry [[META13:![0-9]+]] !continuation.registercount [[META22]] !continuation [[META35:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [10 x i32], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 @@ -520,31 +519,41 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-NEXT: [[TRAV_DATA2_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], i64 [[ADDR_I]], 5 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP4]], i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP11]], ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP11]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP13]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP13]], ptr [[TMP37]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, ptr [[TMP37]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP15]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP15]], ptr [[TMP38]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP37]], i32 2 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 2 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP17]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = call ptr inttoptr (i64 4 to ptr)([[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]], i64 poison), !continuation.registercount [[META33:![0-9]+]], !continuation.returnedRegistercount [[META33]] -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] [[AWAIT:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP18]]) +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP17]], ptr [[TMP18]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP39:%.*]] = load [10 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP40:%.*]] = call ptr inttoptr (i64 4 to ptr)(i64 poison, [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]], [16 x i32] poison, [10 x i32] [[TMP39]]), !continuation.registercount [[META33:![0-9]+]], !continuation.returnedRegistercount [[META33]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP41:%.*]] = call { [[STRUCT_DISPATCHSYSTEMDATA]], [33 x i32], [10 x i32] } @await(ptr [[TMP40]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP42:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [33 x i32], [10 x i32] } [[TMP41]], 2 +; LOWERRAYTRACINGPIPELINE-NEXT: store [10 x i32] [[TMP42]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_RAYPAYLOAD]] poison, ptr [[TMP4]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP4]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP21]], ptr [[TMP20]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP43]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP44:%.*]] = getelementptr inbounds i32, ptr [[TMP43]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP44]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP45:%.*]] = getelementptr inbounds i32, ptr [[TMP43]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP45]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [33 x i32], [10 x i32] } [[TMP41]], 0 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP19]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; LOWERRAYTRACINGPIPELINE-NEXT: br label [[DOTSPLIT:%.*]] @@ -566,24 +575,29 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; ; ; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.DispatchSystemData @MyClosestHitShader( -; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META39:![0-9]+]] !continuation.registercount [[META33]] !continuation [[META40:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]], [33 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META39:![0-9]+]] !continuation.registercount [[META33]] !continuation [[META40:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [10 x i32], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[HITATTRS:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: store [10 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_SYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP3]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP39:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP39]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP8]], ptr [[TMP7]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, ptr [[TMP39]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP41]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP10]], ptr [[TMP9]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP42:%.*]] = getelementptr inbounds i32, ptr [[TMP39]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP42]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[SYSTEM_DATA_ALLOCA]]) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP13]], ptr [[TMP2]], align 4 @@ -608,24 +622,28 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-NEXT: store <4 x float> [[TMP27]], ptr [[TMP28]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP3]], i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP30]], ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP30]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP29]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP32]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP32]], ptr [[TMP43]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP44:%.*]] = getelementptr inbounds i32, ptr [[TMP43]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[TMP31]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP34]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP34]], ptr [[TMP44]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, ptr [[TMP43]], i32 2 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[TMP31]], i32 2 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP36]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP36]], ptr [[TMP40]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP38:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP37]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP38]]), !continuation.registercount [[META33]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP45:%.*]] = load [10 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP38]], [33 x i32] poison, [10 x i32] [[TMP45]]), !continuation.registercount [[META33]] ; LOWERRAYTRACINGPIPELINE-NEXT: unreachable ; ; ; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.AnyHitTraversalData @MyAnyHitShader( -; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[TMP1:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META41:![0-9]+]] !continuation.registercount [[META33]] !continuation [[META42:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[TMP1:%.*]], [6 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META41:![0-9]+]] !continuation.registercount [[META33]] !continuation [[META42:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_HITDATA]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = alloca [[STRUCT_HITDATA]], align 8 @@ -636,32 +654,38 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITTRAVERSALDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [10 x i32], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[ORIGHITATTRS:%.*]] = alloca [8 x i32], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[HITATTRSALLOCA:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: store [10 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP12]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP17]], ptr [[TMP16]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP18]], ptr [[TMP16]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP25]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP21]], ptr [[TMP19]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP21]], ptr [[TMP20]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP26]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP24]], ptr [[TMP20]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP22]]) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP23]], ptr [[TMP11]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP11]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP24]], ptr [[ORIGHITATTRS]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP27]], ptr [[TMP25]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN:%.*]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 0, i32 0, i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP42:%.*]] = load i32, ptr [[TMP11]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP42]], ptr [[ORIGHITATTRS]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP44:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP45:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP61:%.*]] = load i32, ptr [[TMP45]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP61]], ptr [[TMP44]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP1]], ptr [[HITATTRSALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP12]], i32 0, i32 0 @@ -702,156 +726,180 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP36:%.*]] = fcmp fast ogt float [[TMP34]], 1.000000e+00 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP37:%.*]] = fcmp fast ogt float [[TMP34]], -1.000000e+00 ; LOWERRAYTRACINGPIPELINE-NEXT: br i1 [[TMP35]], label [[TMP38:%.*]], label [[TMP73:%.*]] -; LOWERRAYTRACINGPIPELINE: 38: +; LOWERRAYTRACINGPIPELINE: 42: ; LOWERRAYTRACINGPIPELINE-NEXT: store <4 x float> [[TMP29]], ptr [[TMP28]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: br i1 [[TMP36]], label [[TMP39:%.*]], label [[TMP56:%.*]] -; LOWERRAYTRACINGPIPELINE: 39: +; LOWERRAYTRACINGPIPELINE: 43: ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP40]]) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP12]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP42:%.*]] = load i32, ptr [[TMP41]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP42]], ptr addrspace(20) @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[TMP41]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP44]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP45:%.*]] = getelementptr inbounds i32, ptr [[TMP43]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP46:%.*]] = load i32, ptr [[TMP45]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP46]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP46:%.*]] = load i32, ptr [[TMP41]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP46]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, ptr [[TMP41]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP49]], ptr [[TMP43]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP50:%.*]] = getelementptr inbounds i32, ptr [[TMP43]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, ptr [[TMP48]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP51]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP52]], ptr [[TMP50]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP47:%.*]] = getelementptr inbounds i32, ptr [[TMP43]], i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP48:%.*]] = load i32, ptr [[TMP47]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP48]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP49:%.*]] = load i32, ptr [[HITATTRSALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP49]], ptr [[TMP10]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP50:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP50]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP52]], ptr [[TMP51]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP62:%.*]] = getelementptr inbounds i32, ptr [[TMP48]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP65:%.*]] = load i32, ptr [[TMP62]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP65]], ptr [[TMP47]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP66:%.*]] = load i32, ptr [[HITATTRSALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP66]], ptr [[TMP10]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP81:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP59:%.*]] = load i32, ptr [[TMP68]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP59]], ptr [[TMP81]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP53:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP10]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP53]], ptr [[TMP54]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP55:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP55]]), !continuation.registercount [[META33]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP63:%.*]] = load [10 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP55]], [8 x i32] poison, [10 x i32] [[TMP63]]), !continuation.registercount [[META33]] ; LOWERRAYTRACINGPIPELINE-NEXT: unreachable -; LOWERRAYTRACINGPIPELINE: 56: +; LOWERRAYTRACINGPIPELINE: 64: ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP57]]) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP12]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP59:%.*]] = load i32, ptr [[TMP58]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP59]], ptr addrspace(20) @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP60:%.*]] = getelementptr inbounds i32, ptr [[TMP58]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP61:%.*]] = load i32, ptr [[TMP60]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP61]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP62:%.*]] = getelementptr inbounds i32, ptr [[TMP60]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP63:%.*]] = load i32, ptr [[TMP62]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP63]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP67:%.*]] = load i32, ptr [[TMP58]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP67]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP60:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP69:%.*]] = getelementptr inbounds i32, ptr [[TMP58]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP82:%.*]] = load i32, ptr [[TMP69]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP82]], ptr [[TMP60]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP85:%.*]] = getelementptr inbounds i32, ptr [[TMP60]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP86:%.*]] = getelementptr inbounds i32, ptr [[TMP69]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP87:%.*]] = load i32, ptr [[TMP86]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP87]], ptr [[TMP85]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP64:%.*]] = getelementptr inbounds i32, ptr [[TMP60]], i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP65:%.*]] = load i32, ptr [[TMP64]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP65]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP66:%.*]] = load i32, ptr [[HITATTRSALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP66]], ptr [[TMP9]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP67:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP69:%.*]] = load i32, ptr [[TMP67]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP69]], ptr [[TMP68]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP88:%.*]] = getelementptr inbounds i32, ptr [[TMP69]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP97:%.*]] = load i32, ptr [[TMP88]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP97]], ptr [[TMP64]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP105:%.*]] = load i32, ptr [[HITATTRSALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP105]], ptr [[TMP9]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP78:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP111:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP80:%.*]] = load i32, ptr [[TMP78]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP80]], ptr [[TMP111]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP70:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP9]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP70]], ptr [[TMP71]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP72:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP72]]), !continuation.registercount [[META33]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP84:%.*]] = load [10 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP72]], [8 x i32] poison, [10 x i32] [[TMP84]]), !continuation.registercount [[META33]] ; LOWERRAYTRACINGPIPELINE-NEXT: unreachable -; LOWERRAYTRACINGPIPELINE: 73: +; LOWERRAYTRACINGPIPELINE: 85: ; LOWERRAYTRACINGPIPELINE-NEXT: br i1 [[TMP37]], label [[TMP74:%.*]], label [[TMP109:%.*]] -; LOWERRAYTRACINGPIPELINE: 74: +; LOWERRAYTRACINGPIPELINE: 86: ; LOWERRAYTRACINGPIPELINE-NEXT: br i1 [[TMP36]], label [[TMP75:%.*]], label [[TMP92:%.*]] -; LOWERRAYTRACINGPIPELINE: 75: +; LOWERRAYTRACINGPIPELINE: 87: ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @_cont_IgnoreHit(ptr [[TMP76]]) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP12]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP78:%.*]] = load i32, ptr [[TMP77]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP78]], ptr addrspace(20) @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP79:%.*]] = getelementptr inbounds i32, ptr [[TMP77]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP80:%.*]] = load i32, ptr [[TMP79]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP80]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP81:%.*]] = getelementptr inbounds i32, ptr [[TMP79]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP82:%.*]] = load i32, ptr [[TMP81]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP82]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP113:%.*]] = load i32, ptr [[TMP77]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP113]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP79:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP119:%.*]] = getelementptr inbounds i32, ptr [[TMP77]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP127:%.*]] = load i32, ptr [[TMP119]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP127]], ptr [[TMP79]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP128:%.*]] = getelementptr inbounds i32, ptr [[TMP79]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP95:%.*]] = getelementptr inbounds i32, ptr [[TMP119]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP129:%.*]] = load i32, ptr [[TMP95]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP129]], ptr [[TMP128]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP83:%.*]] = getelementptr inbounds i32, ptr [[TMP79]], i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP84:%.*]] = load i32, ptr [[TMP83]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP84]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP85:%.*]] = load i32, ptr [[ORIGHITATTRS]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP85]], ptr [[TMP8]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP86:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP87:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP88:%.*]] = load i32, ptr [[TMP86]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP88]], ptr [[TMP87]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP98:%.*]] = getelementptr inbounds i32, ptr [[TMP119]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP99:%.*]] = load i32, ptr [[TMP98]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP99]], ptr [[TMP83]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP131:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 0, i32 0, i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP101:%.*]] = load i32, ptr [[ORIGHITATTRS]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP101]], ptr [[TMP8]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP102:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP103:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP104:%.*]] = load i32, ptr [[TMP102]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP104]], ptr [[TMP103]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP89:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP8]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP90:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP89]], ptr [[TMP90]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP91:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP91]]), !continuation.registercount [[META33]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP132:%.*]] = load [10 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP91]], [8 x i32] poison, [10 x i32] [[TMP132]]), !continuation.registercount [[META33]] ; LOWERRAYTRACINGPIPELINE-NEXT: unreachable -; LOWERRAYTRACINGPIPELINE: 92: +; LOWERRAYTRACINGPIPELINE: 109: ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP93:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @_cont_IgnoreHit(ptr [[TMP93]]) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP94:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP12]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP95:%.*]] = load i32, ptr [[TMP94]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP95]], ptr addrspace(20) @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP96:%.*]] = getelementptr inbounds i32, ptr [[TMP94]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP97:%.*]] = load i32, ptr [[TMP96]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP97]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP98:%.*]] = getelementptr inbounds i32, ptr [[TMP96]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP99:%.*]] = load i32, ptr [[TMP98]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP99]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP134:%.*]] = load i32, ptr [[TMP94]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP134]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP96:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP114:%.*]] = getelementptr inbounds i32, ptr [[TMP94]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP115:%.*]] = load i32, ptr [[TMP114]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP115]], ptr [[TMP96]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP140:%.*]] = getelementptr inbounds i32, ptr [[TMP96]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP117:%.*]] = getelementptr inbounds i32, ptr [[TMP114]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP118:%.*]] = load i32, ptr [[TMP117]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP118]], ptr [[TMP140]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP100:%.*]] = getelementptr inbounds i32, ptr [[TMP96]], i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP101:%.*]] = load i32, ptr [[TMP100]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP101]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP102:%.*]] = load i32, ptr [[ORIGHITATTRS]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP102]], ptr [[TMP7]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP103:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP104:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP105:%.*]] = load i32, ptr [[TMP103]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP105]], ptr [[TMP104]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP120:%.*]] = getelementptr inbounds i32, ptr [[TMP114]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP121:%.*]] = load i32, ptr [[TMP120]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP121]], ptr [[TMP100]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP147:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 0, i32 0, i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP148:%.*]] = load i32, ptr [[ORIGHITATTRS]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP148]], ptr [[TMP7]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP149:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP125:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP126:%.*]] = load i32, ptr [[TMP149]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP126]], ptr [[TMP125]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP106:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP7]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP107:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP106]], ptr [[TMP107]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP108:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP108]]), !continuation.registercount [[META33]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP130:%.*]] = load [10 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP108]], [8 x i32] poison, [10 x i32] [[TMP130]]), !continuation.registercount [[META33]] ; LOWERRAYTRACINGPIPELINE-NEXT: unreachable -; LOWERRAYTRACINGPIPELINE: 109: +; LOWERRAYTRACINGPIPELINE: 131: ; LOWERRAYTRACINGPIPELINE-NEXT: store <4 x float> [[TMP29]], ptr [[TMP28]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @_cont_AcceptHit(ptr [[SYSTEM_DATA_ALLOCA]]) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP110:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP12]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP111:%.*]] = load i32, ptr [[TMP110]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP111]], ptr addrspace(20) @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP112:%.*]] = getelementptr inbounds i32, ptr [[TMP110]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP113:%.*]] = load i32, ptr [[TMP112]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP113]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP114:%.*]] = getelementptr inbounds i32, ptr [[TMP112]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP115:%.*]] = load i32, ptr [[TMP114]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP115]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP133:%.*]] = load i32, ptr [[TMP110]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP133]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP112:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP135:%.*]] = getelementptr inbounds i32, ptr [[TMP110]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP136:%.*]] = load i32, ptr [[TMP135]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP136]], ptr [[TMP112]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP137:%.*]] = getelementptr inbounds i32, ptr [[TMP112]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP138:%.*]] = getelementptr inbounds i32, ptr [[TMP135]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP139:%.*]] = load i32, ptr [[TMP138]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP139]], ptr [[TMP137]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP116:%.*]] = getelementptr inbounds i32, ptr [[TMP112]], i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP117:%.*]] = load i32, ptr [[TMP116]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP117]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP118:%.*]] = load i32, ptr [[HITATTRSALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP118]], ptr [[TMP6]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP119:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP120:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP121:%.*]] = load i32, ptr [[TMP119]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP121]], ptr [[TMP120]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP141:%.*]] = getelementptr inbounds i32, ptr [[TMP135]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP142:%.*]] = load i32, ptr [[TMP141]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP142]], ptr [[TMP116]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP143:%.*]] = load i32, ptr [[HITATTRSALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP143]], ptr [[TMP6]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP144:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP145:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP146:%.*]] = load i32, ptr [[TMP144]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP146]], ptr [[TMP145]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP122:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP6]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP123:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP122]], ptr [[TMP123]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP124:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP124]]), !continuation.registercount [[META33]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP150:%.*]] = load [10 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP124]], [8 x i32] poison, [10 x i32] [[TMP150]]), !continuation.registercount [[META33]] ; LOWERRAYTRACINGPIPELINE-NEXT: unreachable ; ; ; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.AnyHitTraversalData @MyIntersectionShader( -; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META43:![0-9]+]] !continuation.registercount [[META32:![0-9]+]] !continuation [[META44:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]], [8 x i32] [[PADDING:%.*]], [30 x i32] [[PAYLOAD:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META43:![0-9]+]] !continuation.registercount [[META32:![0-9]+]] !continuation [[META44:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITTRAVERSALDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [30 x i32], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store [30 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) @@ -869,13 +917,18 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE: callAHit.i: ; LOWERRAYTRACINGPIPELINE-NEXT: [[TRAV_DATA_I:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP4]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = call ptr inttoptr (i64 3 to ptr)([[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I]], float [[RES_I2]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP8]]), !continuation.registercount [[META32]], !continuation.returnedRegistercount [[META32]] -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = call [[STRUCT_ANYHITTRAVERSALDATA]] [[AWAIT_1:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP9]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = load [30 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = call ptr inttoptr (i64 3 to ptr)([[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I]], float [[RES_I2]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP8]], [32 x i32] poison, [30 x i32] [[TMP9]]), !continuation.registercount [[META32]], !continuation.returnedRegistercount [[META32]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = call { [[STRUCT_ANYHITTRAVERSALDATA]], [8 x i32], [30 x i32] } @await.1(ptr [[TMP23]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = extractvalue { [[STRUCT_ANYHITTRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP24]], 2 +; LOWERRAYTRACINGPIPELINE-NEXT: store [30 x i32] [[TMP26]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = extractvalue { [[STRUCT_ANYHITTRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP24]], 0 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP10]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; LOWERRAYTRACINGPIPELINE-NEXT: br label [[_CONT_REPORTHIT_EXIT:%.*]] ; LOWERRAYTRACINGPIPELINE: isEnd.i: +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = getelementptr i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP12]], ptr [[TMP3]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 1 @@ -890,23 +943,27 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[ISEND_I1:%.*]] = call i1 @opaqueIsEnd() ; LOWERRAYTRACINGPIPELINE-NEXT: br i1 [[ISEND_I1]], label [[TMP19:%.*]], label [[TMP21:%.*]] -; LOWERRAYTRACINGPIPELINE: 19: +; LOWERRAYTRACINGPIPELINE: 23: ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP20]]), !continuation.registercount [[META32]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = load [30 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP20]], [8 x i32] poison, [30 x i32] [[TMP25]]), !continuation.registercount [[META32]] ; LOWERRAYTRACINGPIPELINE-NEXT: unreachable -; LOWERRAYTRACINGPIPELINE: 21: +; LOWERRAYTRACINGPIPELINE: 26: ; LOWERRAYTRACINGPIPELINE-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[TMP7]]) #[[ATTR1]] ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP22]]), !continuation.registercount [[META32]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP28:%.*]] = load [30 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP22]], [8 x i32] poison, [30 x i32] [[TMP28]]), !continuation.registercount [[META32]] ; LOWERRAYTRACINGPIPELINE-NEXT: unreachable ; ; ; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.AnyHitTraversalData @MyIntersectionShader2( -; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META43]] !continuation.registercount [[META32]] !continuation [[META45:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]], [8 x i32] [[PADDING:%.*]], [30 x i32] [[PAYLOAD:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META43]] !continuation.registercount [[META32]] !continuation [[META45:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2:%.*]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITTRAVERSALDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [30 x i32], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store [30 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) @@ -924,13 +981,18 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE: callAHit.i: ; LOWERRAYTRACINGPIPELINE-NEXT: [[TRAV_DATA_I:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2]], ptr [[TMP4]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = call ptr inttoptr (i64 3 to ptr)([[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I]], float [[RES_I2]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2]] [[TMP8]]), !continuation.registercount [[META32]], !continuation.returnedRegistercount [[META32]] -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = call [[STRUCT_ANYHITTRAVERSALDATA]] [[AWAIT_2:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP9]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = load [30 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = call ptr inttoptr (i64 3 to ptr)([[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I]], float [[RES_I2]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2]] [[TMP8]], [32 x i32] poison, [30 x i32] [[TMP9]]), !continuation.registercount [[META32]], !continuation.returnedRegistercount [[META32]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = call { [[STRUCT_ANYHITTRAVERSALDATA]], [8 x i32], [30 x i32] } @await.2(ptr [[TMP23]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = extractvalue { [[STRUCT_ANYHITTRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP24]], 2 +; LOWERRAYTRACINGPIPELINE-NEXT: store [30 x i32] [[TMP26]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = extractvalue { [[STRUCT_ANYHITTRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP24]], 0 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP10]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; LOWERRAYTRACINGPIPELINE-NEXT: br label [[_CONT_REPORTHIT_EXIT:%.*]] ; LOWERRAYTRACINGPIPELINE: isEnd.i: +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = getelementptr i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP12]], ptr [[TMP3]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 1 @@ -945,53 +1007,64 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[ISEND_I1:%.*]] = call i1 @opaqueIsEnd() ; LOWERRAYTRACINGPIPELINE-NEXT: br i1 [[ISEND_I1]], label [[TMP19:%.*]], label [[TMP21:%.*]] -; LOWERRAYTRACINGPIPELINE: 19: +; LOWERRAYTRACINGPIPELINE: 23: ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP20]]), !continuation.registercount [[META32]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = load [30 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP20]], [8 x i32] poison, [30 x i32] [[TMP25]]), !continuation.registercount [[META32]] ; LOWERRAYTRACINGPIPELINE-NEXT: unreachable -; LOWERRAYTRACINGPIPELINE: 21: +; LOWERRAYTRACINGPIPELINE: 26: ; LOWERRAYTRACINGPIPELINE-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[TMP7]]) #[[ATTR1]] ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP22]]), !continuation.registercount [[META32]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP28:%.*]] = load [30 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP22]], [8 x i32] poison, [30 x i32] [[TMP28]]), !continuation.registercount [[META32]] ; LOWERRAYTRACINGPIPELINE-NEXT: unreachable ; ; ; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.DispatchSystemData @MyMissShader( -; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META46:![0-9]+]] !continuation.registercount [[META33]] !continuation [[META47:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]], [33 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META46:![0-9]+]] !continuation.registercount [[META33]] !continuation [[META47:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [10 x i32], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: store [10 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_SYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP2]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP23]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP25]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP26]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP11]], ptr [[TMP10]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP2]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: store <4 x float> , ptr [[TMP12]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP2]], i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP14]], ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP14]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP16]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP16]], ptr [[TMP27]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP18]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP18]], ptr [[TMP28]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i32 2 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i32 2 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP20]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP20]], ptr [[TMP24]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP21]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP22]]), !continuation.registercount [[META33]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP29:%.*]] = load [10 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP22]], [33 x i32] poison, [10 x i32] [[TMP29]]), !continuation.registercount [[META33]] ; LOWERRAYTRACINGPIPELINE-NEXT: unreachable ; ; @@ -1077,7 +1150,7 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define void @MyRayGen( ; LOWERRAYTRACINGPIPELINE-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURNADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] !lgc.rt.shaderstage [[META22:![0-9]+]] !lgc.cps [[META35:![0-9]+]] !continuation [[META36:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PAYLOAD_ALLOCA:%.*]] = alloca [30 x i32], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [10 x i32], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP2:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 @@ -1097,8 +1170,8 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TRAV_DATA2_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], i64 [[ADDR_I]], 5 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP4]], i32 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP11]], ptr [[PAYLOAD_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP11]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 1 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP14]], ptr [[TMP12]], align 4 @@ -1110,16 +1183,15 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 2 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP20]], ptr [[TMP18]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP21:%.*]] = load [10 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP21:%.*]] = load [10 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP22:%.*]] = call { [[STRUCT_DISPATCHSYSTEMDATA]], [33 x i32], [10 x i32] } (...) @lgc.cps.await__sl_s_struct.DispatchSystemDatasa33i32a10i32s(i32 4, i32 8, i32 5, [36 x i32] poison, [10 x i32] [[TMP21]]), !continuation.returnedRegistercount [[META33:![0-9]+]], !continuation.registercount [[META33]] ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP23:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [33 x i32], [10 x i32] } [[TMP22]], 2 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [10 x i32] [[TMP23]], ptr [[PAYLOAD_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP24:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [33 x i32], [10 x i32] } [[TMP22]], 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [10 x i32] [[TMP23]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_RAYPAYLOAD]] poison, ptr [[TMP4]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP4]], i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP26:%.*]] = load i32, ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP26:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[TMP25]], i32 1 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP27]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4 @@ -1131,6 +1203,7 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i32 2 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP35]], ptr [[TMP33]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP24:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [33 x i32], [10 x i32] } [[TMP22]], 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP24]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br label [[DOTSPLIT:%.*]] @@ -1155,15 +1228,15 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURNADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [33 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META40:![0-9]+]] !lgc.cps [[META41:![0-9]+]] !continuation [[META42:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP1:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PAYLOAD_ALLOCA:%.*]] = alloca [30 x i32], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [10 x i32], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[HITATTRS:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [10 x i32] [[PAYLOAD]], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [10 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_SYSTEMDATA]] [[SYSTEM_DATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP2]], i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP4:%.*]] = load i32, ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP4:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 1 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 @@ -1198,8 +1271,8 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store <4 x float> [[TMP28]], ptr [[TMP29]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP2]], i32 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP31]], ptr [[PAYLOAD_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP31]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[TMP30]], i32 1 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP34]], ptr [[TMP32]], align 4 @@ -1213,7 +1286,7 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP40]], ptr [[TMP38]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP42:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP41]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP43:%.*]] = load [10 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP43:%.*]] = load [10 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 6, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP42]], [33 x i32] poison, [10 x i32] [[TMP43]]), !continuation.registercount [[META33]] ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable ; @@ -1230,16 +1303,16 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP8:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP9:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITTRAVERSALDATA]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PAYLOAD_ALLOCA:%.*]] = alloca [30 x i32], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [10 x i32], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP10:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[ORIGHITATTRS:%.*]] = alloca [8 x i32], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[HITATTRSALLOCA:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [10 x i32] [[PAYLOAD]], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [10 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP10]], i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP12:%.*]] = load i32, ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP12:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 1 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP13]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 @@ -1254,7 +1327,7 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP23:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP22]]) ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP23]], ptr [[TMP9]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN:%.*]], ptr [[PAYLOAD_ALLOCA]], i32 0, i32 0, i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN:%.*]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 0, i32 0, i32 1 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP9]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP25]], ptr [[ORIGHITATTRS]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i32 1 @@ -1309,8 +1382,8 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP41]]) ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP10]], i32 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP43]], ptr [[PAYLOAD_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP44:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP43]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP44:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP45:%.*]] = getelementptr inbounds i32, ptr [[TMP42]], i32 1 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP46:%.*]] = load i32, ptr [[TMP45]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP46]], ptr [[TMP44]], align 4 @@ -1332,7 +1405,7 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP57]], ptr [[TMP58]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP59:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP60:%.*]] = load [10 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP60:%.*]] = load [10 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 40, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP59]], [8 x i32] poison, [10 x i32] [[TMP60]]), !continuation.registercount [[META33]] ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable ; LOWERRAYTRACINGPIPELINE-CPS: 61: @@ -1340,8 +1413,8 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP62]]) ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP10]], i32 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP64:%.*]] = load i32, ptr [[TMP63]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP64]], ptr [[PAYLOAD_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP65:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP64]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP65:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, ptr [[TMP63]], i32 1 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP67:%.*]] = load i32, ptr [[TMP66]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP67]], ptr [[TMP65]], align 4 @@ -1363,7 +1436,7 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP78]], ptr [[TMP79]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP80:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP81:%.*]] = load [10 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP81:%.*]] = load [10 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 40, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP80]], [8 x i32] poison, [10 x i32] [[TMP81]]), !continuation.registercount [[META33]] ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable ; LOWERRAYTRACINGPIPELINE-CPS: 82: @@ -1375,8 +1448,8 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @_cont_IgnoreHit(ptr [[TMP85]]) ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP86:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP10]], i32 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP87:%.*]] = load i32, ptr [[TMP86]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP87]], ptr [[PAYLOAD_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP88:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP87]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP88:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP89:%.*]] = getelementptr inbounds i32, ptr [[TMP86]], i32 1 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP90:%.*]] = load i32, ptr [[TMP89]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP90]], ptr [[TMP88]], align 4 @@ -1388,7 +1461,7 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP95:%.*]] = getelementptr inbounds i32, ptr [[TMP89]], i32 2 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP96:%.*]] = load i32, ptr [[TMP95]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP96]], ptr [[TMP94]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP97:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr [[PAYLOAD_ALLOCA]], i32 0, i32 0, i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP97:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 0, i32 0, i32 1 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP98:%.*]] = load i32, ptr [[ORIGHITATTRS]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP98]], ptr [[TMP6]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP99:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i32 1 @@ -1399,7 +1472,7 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP102]], ptr [[TMP103]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP104:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP105:%.*]] = load [10 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP105:%.*]] = load [10 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 40, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP104]], [8 x i32] poison, [10 x i32] [[TMP105]]), !continuation.registercount [[META33]] ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable ; LOWERRAYTRACINGPIPELINE-CPS: 106: @@ -1407,8 +1480,8 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @_cont_IgnoreHit(ptr [[TMP107]]) ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP108:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP10]], i32 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP109:%.*]] = load i32, ptr [[TMP108]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP109]], ptr [[PAYLOAD_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP110:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP109]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP110:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP111:%.*]] = getelementptr inbounds i32, ptr [[TMP108]], i32 1 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP112:%.*]] = load i32, ptr [[TMP111]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP112]], ptr [[TMP110]], align 4 @@ -1420,7 +1493,7 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP117:%.*]] = getelementptr inbounds i32, ptr [[TMP111]], i32 2 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP118:%.*]] = load i32, ptr [[TMP117]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP118]], ptr [[TMP116]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP119:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr [[PAYLOAD_ALLOCA]], i32 0, i32 0, i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP119:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 0, i32 0, i32 1 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP120:%.*]] = load i32, ptr [[ORIGHITATTRS]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP120]], ptr [[TMP5]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP121:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i32 1 @@ -1431,7 +1504,7 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP125:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP124]], ptr [[TMP125]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP126:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP127:%.*]] = load [10 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP127:%.*]] = load [10 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 40, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP126]], [8 x i32] poison, [10 x i32] [[TMP127]]), !continuation.registercount [[META33]] ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable ; LOWERRAYTRACINGPIPELINE-CPS: 128: @@ -1439,8 +1512,8 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @_cont_AcceptHit(ptr [[SYSTEM_DATA_ALLOCA]]) ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP129:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP10]], i32 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP130:%.*]] = load i32, ptr [[TMP129]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP130]], ptr [[PAYLOAD_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP131:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP130]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP131:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP132:%.*]] = getelementptr inbounds i32, ptr [[TMP129]], i32 1 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP133:%.*]] = load i32, ptr [[TMP132]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP133]], ptr [[TMP131]], align 4 @@ -1462,7 +1535,7 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP145:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP144]], ptr [[TMP145]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP146:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP147:%.*]] = load [10 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP147:%.*]] = load [10 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 40, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP146]], [8 x i32] poison, [10 x i32] [[TMP147]]), !continuation.registercount [[META33]] ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable ; @@ -1473,8 +1546,8 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITTRAVERSALDATA]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PAYLOAD_ALLOCA:%.*]] = alloca [30 x i32], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [30 x i32] [[PAYLOAD]], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [30 x i32], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [30 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 @@ -1492,17 +1565,17 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TRAV_DATA_I:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP7:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP3]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP8:%.*]] = load [30 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP8:%.*]] = load [30 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP9:%.*]] = call { [[STRUCT_ANYHITTRAVERSALDATA]], [8 x i32], [30 x i32] } (...) @lgc.cps.await__sl_s_struct.AnyHitTraversalDatasa8i32a30i32s(i32 3, i32 16, i32 5, float [[RES_I2]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP7]], [32 x i32] poison, [30 x i32] [[TMP8]]), !continuation.returnedRegistercount [[META32:![0-9]+]], !continuation.registercount [[META32]] ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP10:%.*]] = extractvalue { [[STRUCT_ANYHITTRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP9]], 2 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [30 x i32] [[TMP10]], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [30 x i32] [[TMP10]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP11:%.*]] = extractvalue { [[STRUCT_ANYHITTRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP9]], 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP11]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br label [[_CONT_REPORTHIT_EXIT:%.*]] ; LOWERRAYTRACINGPIPELINE-CPS: isEnd.i: -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[PAYLOAD_ALLOCA]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 1 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP3]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP14]], ptr [[TMP2]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 1 @@ -1519,13 +1592,13 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br i1 [[ISEND_I1]], label [[TMP21:%.*]], label [[TMP24:%.*]] ; LOWERRAYTRACINGPIPELINE-CPS: 21: ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP22:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP23:%.*]] = load [30 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP23:%.*]] = load [30 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 8, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP22]], [8 x i32] poison, [30 x i32] [[TMP23]]), !continuation.registercount [[META32]] ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable ; LOWERRAYTRACINGPIPELINE-CPS: 24: ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[TMP5]]) #[[ATTR1]] ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP25:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP26:%.*]] = load [30 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP26:%.*]] = load [30 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 8, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP25]], [8 x i32] poison, [30 x i32] [[TMP26]]), !continuation.registercount [[META32]] ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable ; @@ -1536,8 +1609,8 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2:%.*]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITTRAVERSALDATA]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PAYLOAD_ALLOCA:%.*]] = alloca [30 x i32], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [30 x i32] [[PAYLOAD]], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [30 x i32], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [30 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 @@ -1555,17 +1628,17 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TRAV_DATA_I:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP7:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2]], ptr [[TMP3]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP8:%.*]] = load [30 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP8:%.*]] = load [30 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP9:%.*]] = call { [[STRUCT_ANYHITTRAVERSALDATA]], [8 x i32], [30 x i32] } (...) @lgc.cps.await__sl_s_struct.AnyHitTraversalDatasa8i32a30i32s(i32 3, i32 16, i32 5, float [[RES_I2]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2]] [[TMP7]], [32 x i32] poison, [30 x i32] [[TMP8]]), !continuation.returnedRegistercount [[META32]], !continuation.registercount [[META32]] ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP10:%.*]] = extractvalue { [[STRUCT_ANYHITTRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP9]], 2 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [30 x i32] [[TMP10]], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [30 x i32] [[TMP10]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP11:%.*]] = extractvalue { [[STRUCT_ANYHITTRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP9]], 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP11]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br label [[_CONT_REPORTHIT_EXIT:%.*]] ; LOWERRAYTRACINGPIPELINE-CPS: isEnd.i: -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[PAYLOAD_ALLOCA]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 1 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP3]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP14]], ptr [[TMP2]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 1 @@ -1582,13 +1655,13 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br i1 [[ISEND_I1]], label [[TMP21:%.*]], label [[TMP24:%.*]] ; LOWERRAYTRACINGPIPELINE-CPS: 21: ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP22:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP23:%.*]] = load [30 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP23:%.*]] = load [30 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 8, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP22]], [8 x i32] poison, [30 x i32] [[TMP23]]), !continuation.registercount [[META32]] ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable ; LOWERRAYTRACINGPIPELINE-CPS: 24: ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[TMP5]]) #[[ATTR1]] ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP25:%.*]] = load [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP26:%.*]] = load [30 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP26:%.*]] = load [30 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 8, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP25]], [8 x i32] poison, [30 x i32] [[TMP26]]), !continuation.registercount [[META32]] ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable ; @@ -1596,14 +1669,14 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define void @MyMissShader( ; LOWERRAYTRACINGPIPELINE-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURNADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [33 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META43]] !lgc.cps [[META41]] !continuation [[META48:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PAYLOAD_ALLOCA:%.*]] = alloca [30 x i32], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [10 x i32], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP1:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [10 x i32] [[PAYLOAD]], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [10 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_SYSTEMDATA]] [[SYSTEM_DATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP1]], i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP3:%.*]] = load i32, ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP3:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 1 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 @@ -1620,8 +1693,8 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store <4 x float> , ptr [[TMP13]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP1]], i32 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP15]], ptr [[PAYLOAD_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP15]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 1 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP18]], ptr [[TMP16]], align 4 @@ -1635,1575 +1708,11 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP24]], ptr [[TMP22]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP26:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP25]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP27:%.*]] = load [10 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP27:%.*]] = load [10 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 6, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP26]], [33 x i32] poison, [10 x i32] [[TMP27]]), !continuation.registercount [[META33]] ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable ; ; -; REGISTERBUFFER-CPS-LABEL: define i32 @_cont_GetContinuationStackAddr( -; REGISTERBUFFER-CPS-SAME: ) #[[ATTR0:[0-9]+]] { -; REGISTERBUFFER-CPS-NEXT: ret i32 0 -; -; -; REGISTERBUFFER-CPS-LABEL: define %struct.HitData @_cont_GetCandidateState( -; REGISTERBUFFER-CPS-SAME: ptr [[DATA:%.*]]) #[[ATTR0]] { -; REGISTERBUFFER-CPS-NEXT: [[RESPTR:%.*]] = getelementptr [[STRUCT_ANYHITTRAVERSALDATA:%.*]], ptr [[DATA]], i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[RES:%.*]] = load [[STRUCT_HITDATA:%.*]], ptr [[RESPTR]], align 4 -; REGISTERBUFFER-CPS-NEXT: ret [[STRUCT_HITDATA]] [[RES]] -; -; -; REGISTERBUFFER-CPS-LABEL: define void @_cont_SetTriangleHitAttributes( -; REGISTERBUFFER-CPS-SAME: ptr [[DATA:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[VAL:%.*]]) { -; REGISTERBUFFER-CPS-NEXT: [[ADDR:%.*]] = getelementptr [[STRUCT_SYSTEMDATA:%.*]], ptr [[DATA]], i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[VAL]], ptr [[ADDR]], align 4 -; REGISTERBUFFER-CPS-NEXT: ret void -; -; -; REGISTERBUFFER-CPS-LABEL: define i32 @_cont_GetLocalRootIndex( -; REGISTERBUFFER-CPS-SAME: ptr [[DATA:%.*]]) { -; REGISTERBUFFER-CPS-NEXT: ret i32 5 -; -; -; REGISTERBUFFER-CPS-LABEL: define i1 @_cont_IsEndSearch( -; REGISTERBUFFER-CPS-SAME: ptr [[TMP0:%.*]]) #[[ATTR0]] { -; REGISTERBUFFER-CPS-NEXT: [[ISEND:%.*]] = call i1 @opaqueIsEnd() -; REGISTERBUFFER-CPS-NEXT: ret i1 [[ISEND]] -; -; -; REGISTERBUFFER-CPS-LABEL: define <3 x i32> @_cont_DispatchRaysIndex3( -; REGISTERBUFFER-CPS-SAME: ptr [[DATA:%.*]]) { -; REGISTERBUFFER-CPS-NEXT: [[RESPTR_1:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[DATA]], i32 0, i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[RES_1:%.*]] = load i32, ptr [[RESPTR_1]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[RESPTR_2:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[DATA]], i32 0, i32 0, i32 1 -; REGISTERBUFFER-CPS-NEXT: [[RES_2:%.*]] = load i32, ptr [[RESPTR_2]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[RESPTR_3:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[DATA]], i32 0, i32 0, i32 2 -; REGISTERBUFFER-CPS-NEXT: [[RES_3:%.*]] = load i32, ptr [[RESPTR_3]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[VAL_0:%.*]] = insertelement <3 x i32> undef, i32 [[RES_1]], i32 0 -; REGISTERBUFFER-CPS-NEXT: [[VAL_1:%.*]] = insertelement <3 x i32> [[VAL_0]], i32 [[RES_2]], i32 1 -; REGISTERBUFFER-CPS-NEXT: [[VAL_2:%.*]] = insertelement <3 x i32> [[VAL_1]], i32 [[RES_3]], i32 2 -; REGISTERBUFFER-CPS-NEXT: ret <3 x i32> [[VAL_2]] -; -; -; REGISTERBUFFER-CPS-LABEL: define <3 x float> @_cont_ObjectRayOrigin3( -; REGISTERBUFFER-CPS-SAME: ptr nocapture readnone [[DATA:%.*]], ptr [[HITDATA:%.*]]) { -; REGISTERBUFFER-CPS-NEXT: [[RESPTR_1:%.*]] = getelementptr [[STRUCT_HITDATA:%.*]], ptr [[HITDATA]], i32 0, i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[RES_1:%.*]] = load float, ptr [[RESPTR_1]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[RESPTR_2:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[HITDATA]], i32 0, i32 0, i32 1 -; REGISTERBUFFER-CPS-NEXT: [[RES_2:%.*]] = load float, ptr [[RESPTR_2]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[RESPTR_3:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[HITDATA]], i32 0, i32 0, i32 2 -; REGISTERBUFFER-CPS-NEXT: [[RES_3:%.*]] = load float, ptr [[RESPTR_3]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[VAL_0:%.*]] = insertelement <3 x float> undef, float [[RES_1]], i32 0 -; REGISTERBUFFER-CPS-NEXT: [[VAL_1:%.*]] = insertelement <3 x float> [[VAL_0]], float [[RES_2]], i32 1 -; REGISTERBUFFER-CPS-NEXT: [[VAL_2:%.*]] = insertelement <3 x float> [[VAL_1]], float [[RES_3]], i32 2 -; REGISTERBUFFER-CPS-NEXT: ret <3 x float> [[VAL_2]] -; -; -; REGISTERBUFFER-CPS-LABEL: define <3 x float> @_cont_ObjectRayDirection3( -; REGISTERBUFFER-CPS-SAME: ptr nocapture readnone [[DATA:%.*]], ptr [[HITDATA:%.*]]) { -; REGISTERBUFFER-CPS-NEXT: [[RESPTR_1:%.*]] = getelementptr [[STRUCT_HITDATA:%.*]], ptr [[HITDATA]], i32 0, i32 1, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[RES_1:%.*]] = load float, ptr [[RESPTR_1]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[RESPTR_2:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[HITDATA]], i32 0, i32 1, i32 1 -; REGISTERBUFFER-CPS-NEXT: [[RES_2:%.*]] = load float, ptr [[RESPTR_2]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[RESPTR_3:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[HITDATA]], i32 0, i32 1, i32 2 -; REGISTERBUFFER-CPS-NEXT: [[RES_3:%.*]] = load float, ptr [[RESPTR_3]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[VAL_0:%.*]] = insertelement <3 x float> undef, float [[RES_1]], i32 0 -; REGISTERBUFFER-CPS-NEXT: [[VAL_1:%.*]] = insertelement <3 x float> [[VAL_0]], float [[RES_2]], i32 1 -; REGISTERBUFFER-CPS-NEXT: [[VAL_2:%.*]] = insertelement <3 x float> [[VAL_1]], float [[RES_3]], i32 2 -; REGISTERBUFFER-CPS-NEXT: ret <3 x float> [[VAL_2]] -; -; -; REGISTERBUFFER-CPS-LABEL: define float @_cont_RayTCurrent( -; REGISTERBUFFER-CPS-SAME: ptr nocapture readnone [[DATA:%.*]], ptr [[HITDATA:%.*]]) { -; REGISTERBUFFER-CPS-NEXT: [[RESPTR:%.*]] = getelementptr [[STRUCT_HITDATA:%.*]], ptr [[HITDATA]], i32 0, i32 2 -; REGISTERBUFFER-CPS-NEXT: [[RES:%.*]] = load float, ptr [[RESPTR]], align 4 -; REGISTERBUFFER-CPS-NEXT: ret float [[RES]] -; -; -; REGISTERBUFFER-CPS-LABEL: define void @MyRayGen( -; REGISTERBUFFER-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURNADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] !lgc.rt.shaderstage [[META22:![0-9]+]] !lgc.cps [[META35:![0-9]+]] !continuation [[META36:![0-9]+]] { -; REGISTERBUFFER-CPS-NEXT: AllocaSpillBB: -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT20:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 -; REGISTERBUFFER-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) -; REGISTERBUFFER-CPS-NEXT: [[TMP1:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 -; REGISTERBUFFER-CPS-NEXT: [[TMP2:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 -; REGISTERBUFFER-CPS-NEXT: [[TMP3:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP1]]) -; REGISTERBUFFER-CPS-NEXT: [[TMP4:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP3]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) -; REGISTERBUFFER-CPS-NEXT: [[TMP5:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP4]]) -; REGISTERBUFFER-CPS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT20]], 0 -; REGISTERBUFFER-CPS-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]], 0 -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP6:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @MyRayGen.resume.0) -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA2_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], i64 [[TMP6]], 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP7:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 1 -; REGISTERBUFFER-CPS-NEXT: [[TMP8:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 2 -; REGISTERBUFFER-CPS-NEXT: [[TMP9:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 3 -; REGISTERBUFFER-CPS-NEXT: [[TMP10:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP7]], 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT]], i32 undef, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT]], i32 undef, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT]], i32 undef, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT]], i32 undef, 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT]], i32 undef, 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT]], i32 undef, 6 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT]], i32 [[TMP8]], 7 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT]], i32 [[TMP9]], 8 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT]], i32 [[TMP10]], 9 -; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 4, i32 8, {} poison, i64 [[TMP6]], i32 5, [36 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]), !continuation.returnedRegistercount [[META33:![0-9]+]], !continuation.registercount [[META33]] -; REGISTERBUFFER-CPS-NEXT: unreachable -; -; -; REGISTERBUFFER-CPS-LABEL: define dso_local void @MyRayGen.resume.0( -; REGISTERBUFFER-CPS-SAME: {} [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], { [[STRUCT_DISPATCHSYSTEMDATA:%.*]], [33 x i32], [10 x i32] } [[TMP3:%.*]]) !lgc.rt.shaderstage [[META22]] !lgc.cps [[META35]] !continuation [[META36]] { -; REGISTERBUFFER-CPS-NEXT: entryresume.0: -; REGISTERBUFFER-CPS-NEXT: [[TMP4:%.*]] = alloca { [[STRUCT_DISPATCHSYSTEMDATA]], [33 x i32], [10 x i32] }, align 8 -; REGISTERBUFFER-CPS-NEXT: store { [[STRUCT_DISPATCHSYSTEMDATA]], [33 x i32], [10 x i32] } [[TMP3]], ptr [[TMP4]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[TMP5:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [33 x i32], [10 x i32] } [[TMP3]], 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_2_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_3_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_4_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_5_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_6_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 6 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_7_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 7 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_8_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 8 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_9_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 9 -; REGISTERBUFFER-CPS-NEXT: [[TMP6:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [33 x i32], [10 x i32] } [[TMP3]], 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP7:%.*]] = bitcast i32 [[DOTFCA_0_EXTRACT]] to float -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> poison, float [[TMP7]], i32 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP8:%.*]] = bitcast i32 [[DOTFCA_7_EXTRACT]] to float -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP8]], i32 1 -; REGISTERBUFFER-CPS-NEXT: [[TMP9:%.*]] = bitcast i32 [[DOTFCA_8_EXTRACT]] to float -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP9]], i32 2 -; REGISTERBUFFER-CPS-NEXT: [[TMP10:%.*]] = bitcast i32 [[DOTFCA_9_EXTRACT]] to float -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP10]], i32 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT21:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP6]], 0 -; REGISTERBUFFER-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) -; REGISTERBUFFER-CPS-NEXT: [[TMP11:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 -; REGISTERBUFFER-CPS-NEXT: [[TMP12:%.*]] = getelementptr inbounds { [[STRUCT_DISPATCHSYSTEMDATA]], [33 x i32], [10 x i32] }, ptr [[TMP4]], i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[RES_1_I1:%.*]] = load i32, ptr [[TMP12]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[RESPTR_2_I2:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP12]], i32 0, i32 0, i32 1 -; REGISTERBUFFER-CPS-NEXT: [[RES_2_I3:%.*]] = load i32, ptr [[RESPTR_2_I2]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[RESPTR_3_I4:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP12]], i32 0, i32 0, i32 2 -; REGISTERBUFFER-CPS-NEXT: [[RES_3_I5:%.*]] = load i32, ptr [[RESPTR_3_I4]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[VAL_0_I6:%.*]] = insertelement <3 x i32> undef, i32 [[RES_1_I1]], i32 0 -; REGISTERBUFFER-CPS-NEXT: [[VAL_1_I7:%.*]] = insertelement <3 x i32> [[VAL_0_I6]], i32 [[RES_2_I3]], i32 1 -; REGISTERBUFFER-CPS-NEXT: [[VAL_2_I8:%.*]] = insertelement <3 x i32> [[VAL_1_I7]], i32 [[RES_3_I5]], i32 2 -; REGISTERBUFFER-CPS-NEXT: [[EXTRACT:%.*]] = extractelement <3 x i32> [[VAL_2_I8]], i8 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP13:%.*]] = getelementptr inbounds { [[STRUCT_DISPATCHSYSTEMDATA]], [33 x i32], [10 x i32] }, ptr [[TMP4]], i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[RES_1_I:%.*]] = load i32, ptr [[TMP13]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[RESPTR_2_I:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP13]], i32 0, i32 0, i32 1 -; REGISTERBUFFER-CPS-NEXT: [[RES_2_I:%.*]] = load i32, ptr [[RESPTR_2_I]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[RESPTR_3_I:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP13]], i32 0, i32 0, i32 2 -; REGISTERBUFFER-CPS-NEXT: [[RES_3_I:%.*]] = load i32, ptr [[RESPTR_3_I]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[VAL_0_I:%.*]] = insertelement <3 x i32> undef, i32 [[RES_1_I]], i32 0 -; REGISTERBUFFER-CPS-NEXT: [[VAL_1_I:%.*]] = insertelement <3 x i32> [[VAL_0_I]], i32 [[RES_2_I]], i32 1 -; REGISTERBUFFER-CPS-NEXT: [[VAL_2_I:%.*]] = insertelement <3 x i32> [[VAL_1_I]], i32 [[RES_3_I]], i32 2 -; REGISTERBUFFER-CPS-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x i32> [[VAL_2_I]], i8 1 -; REGISTERBUFFER-CPS-NEXT: [[TMP14:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP11]]) -; REGISTERBUFFER-CPS-NEXT: [[TMP15:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP14]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 4098, i32 1033 }) -; REGISTERBUFFER-CPS-NEXT: [[TMP16:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP17:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 1 -; REGISTERBUFFER-CPS-NEXT: [[TMP18:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 2 -; REGISTERBUFFER-CPS-NEXT: [[TMP19:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 3 -; REGISTERBUFFER-CPS-NEXT: call void @dx.op.textureStore.f32(i32 67, [[DX_TYPES_HANDLE]] [[TMP15]], i32 [[EXTRACT]], i32 [[EXTRACT1]], i32 undef, float [[TMP16]], float [[TMP17]], float [[TMP18]], float [[TMP19]], i8 15) -; REGISTERBUFFER-CPS-NEXT: ret void -; -; -; REGISTERBUFFER-CPS-LABEL: define void @MyClosestHitShader( -; REGISTERBUFFER-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURNADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [33 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META37:![0-9]+]] !lgc.cps [[META38:![0-9]+]] !continuation [[META39:![0-9]+]] { -; REGISTERBUFFER-CPS-NEXT: AllocaSpillBB: -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 0 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 1 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 2 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 3 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 4 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 5 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 6 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 7 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 8 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 9 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[SYSTEM_DATA]], 0, 0 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: store <3 x i32> [[SYSTEM_DATA_FCA_0_0_EXTRACT]], ptr [[SYSTEM_DATA_FCA_0_0_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[TMP0:%.*]] = bitcast i32 [[PAYLOAD_FCA_0_EXTRACT]] to float -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> undef, float [[TMP0]], i32 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP1:%.*]] = bitcast i32 [[PAYLOAD_FCA_7_EXTRACT]] to float -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP1]], i32 1 -; REGISTERBUFFER-CPS-NEXT: [[TMP2:%.*]] = bitcast i32 [[PAYLOAD_FCA_8_EXTRACT]] to float -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP2]], i32 2 -; REGISTERBUFFER-CPS-NEXT: [[TMP3:%.*]] = bitcast i32 [[PAYLOAD_FCA_9_EXTRACT]] to float -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP3]], i32 3 -; REGISTERBUFFER-CPS-NEXT: [[TMP4:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[SYSTEM_DATA_ALLOCA]]) -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP4]], 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_012_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP5:%.*]] = bitcast float [[DOTSROA_012_0_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP5]] to float -; REGISTERBUFFER-CPS-NEXT: [[HITATTRS_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP6]], i32 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_012_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 -; REGISTERBUFFER-CPS-NEXT: [[TMP7:%.*]] = bitcast float [[DOTSROA_012_4_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP7]] to float -; REGISTERBUFFER-CPS-NEXT: [[HITATTRS_SROA_0_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[HITATTRS_SROA_0_0_VEC_INSERT]], float [[TMP8]], i32 1 -; REGISTERBUFFER-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) -; REGISTERBUFFER-CPS-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[HITATTRS_SROA_0_4_VEC_INSERT]], i32 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP10:%.*]] = fsub fast float 1.000000e+00, [[TMP9]] -; REGISTERBUFFER-CPS-NEXT: [[TMP11:%.*]] = extractelement <2 x float> [[HITATTRS_SROA_0_4_VEC_INSERT]], i32 1 -; REGISTERBUFFER-CPS-NEXT: [[TMP12:%.*]] = fsub fast float [[TMP10]], [[TMP11]] -; REGISTERBUFFER-CPS-NEXT: [[TMP13:%.*]] = insertelement <4 x float> undef, float [[TMP12]], i64 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP14:%.*]] = insertelement <4 x float> [[TMP13]], float [[TMP9]], i64 1 -; REGISTERBUFFER-CPS-NEXT: [[TMP15:%.*]] = insertelement <4 x float> [[TMP14]], float [[TMP11]], i64 2 -; REGISTERBUFFER-CPS-NEXT: [[TMP16:%.*]] = insertelement <4 x float> [[TMP15]], float 1.000000e+00, i64 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP16]], i32 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP17:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP16]], i32 1 -; REGISTERBUFFER-CPS-NEXT: [[TMP18:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP16]], i32 2 -; REGISTERBUFFER-CPS-NEXT: [[TMP19:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP16]], i32 3 -; REGISTERBUFFER-CPS-NEXT: [[TMP20:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP21]], i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_LOAD:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT10:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_LOAD]], 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP17]], 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT]], i32 [[TMP18]], 7 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT]], i32 [[TMP19]], 8 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT]], i32 [[TMP20]], 9 -; REGISTERBUFFER-CPS-NEXT: call void @lgc.cps.free(i32 0) -; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 6, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT10]], [33 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]), !continuation.registercount [[META33]] -; REGISTERBUFFER-CPS-NEXT: unreachable -; -; -; REGISTERBUFFER-CPS-LABEL: define void @MyAnyHitShader( -; REGISTERBUFFER-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURNADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[SYSTEM_DATA:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[HIT_ATTRS:%.*]], [6 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META38]] !lgc.cps [[META40:![0-9]+]] !continuation [[META41:![0-9]+]] { -; REGISTERBUFFER-CPS-NEXT: AllocaSpillBB: -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITTRAVERSALDATA]], align 8 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 0 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 1 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 2 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 3 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 4 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 5 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 6 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 7 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 8 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 9 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 0, 0, 0 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_0_0_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: store <3 x i32> [[SYSTEM_DATA_FCA_0_0_0_0_EXTRACT]], ptr [[SYSTEM_DATA_FCA_0_0_0_0_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 -; REGISTERBUFFER-CPS-NEXT: store <3 x float> [[SYSTEM_DATA_FCA_0_1_0_EXTRACT]], ptr [[SYSTEM_DATA_FCA_0_1_0_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 -; REGISTERBUFFER-CPS-NEXT: store <3 x float> [[SYSTEM_DATA_FCA_0_1_1_EXTRACT]], ptr [[SYSTEM_DATA_FCA_0_1_1_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 -; REGISTERBUFFER-CPS-NEXT: store float [[SYSTEM_DATA_FCA_0_1_2_EXTRACT]], ptr [[SYSTEM_DATA_FCA_0_1_2_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 -; REGISTERBUFFER-CPS-NEXT: store i32 [[SYSTEM_DATA_FCA_0_1_3_EXTRACT]], ptr [[SYSTEM_DATA_FCA_0_1_3_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 2 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 -; REGISTERBUFFER-CPS-NEXT: store <3 x float> [[SYSTEM_DATA_FCA_0_2_EXTRACT]], ptr [[SYSTEM_DATA_FCA_0_2_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 3 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 -; REGISTERBUFFER-CPS-NEXT: store <3 x float> [[SYSTEM_DATA_FCA_0_3_EXTRACT]], ptr [[SYSTEM_DATA_FCA_0_3_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_4_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 4 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_4_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 -; REGISTERBUFFER-CPS-NEXT: store float [[SYSTEM_DATA_FCA_0_4_EXTRACT]], ptr [[SYSTEM_DATA_FCA_0_4_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_5_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 5 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_5_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 -; REGISTERBUFFER-CPS-NEXT: store i64 [[SYSTEM_DATA_FCA_0_5_EXTRACT]], ptr [[SYSTEM_DATA_FCA_0_5_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_1_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 -; REGISTERBUFFER-CPS-NEXT: store <3 x float> [[SYSTEM_DATA_FCA_1_0_EXTRACT]], ptr [[SYSTEM_DATA_FCA_1_0_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_1_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 -; REGISTERBUFFER-CPS-NEXT: store <3 x float> [[SYSTEM_DATA_FCA_1_1_EXTRACT]], ptr [[SYSTEM_DATA_FCA_1_1_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_1_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 -; REGISTERBUFFER-CPS-NEXT: store float [[SYSTEM_DATA_FCA_1_2_EXTRACT]], ptr [[SYSTEM_DATA_FCA_1_2_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_1_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 -; REGISTERBUFFER-CPS-NEXT: store i32 [[SYSTEM_DATA_FCA_1_3_EXTRACT]], ptr [[SYSTEM_DATA_FCA_1_3_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[TMP0:%.*]] = bitcast i32 [[PAYLOAD_FCA_0_EXTRACT]] to float -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> undef, float [[TMP0]], i32 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP1:%.*]] = bitcast i32 [[PAYLOAD_FCA_7_EXTRACT]] to float -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP1]], i32 1 -; REGISTERBUFFER-CPS-NEXT: [[TMP2:%.*]] = bitcast i32 [[PAYLOAD_FCA_8_EXTRACT]] to float -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP2]], i32 2 -; REGISTERBUFFER-CPS-NEXT: [[TMP3:%.*]] = bitcast i32 [[PAYLOAD_FCA_9_EXTRACT]] to float -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP3]], i32 3 -; REGISTERBUFFER-CPS-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP5:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP4]]) -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT387:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP5]], 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0389_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT387]], i32 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP6:%.*]] = bitcast float [[DOTSROA_0389_0_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0389_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT387]], i32 1 -; REGISTERBUFFER-CPS-NEXT: [[TMP7:%.*]] = bitcast float [[DOTSROA_0389_4_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[HIT_ATTRS_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[HIT_ATTRS]], 0 -; REGISTERBUFFER-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) -; REGISTERBUFFER-CPS-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[RES_I1_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA:%.*]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[RES_I1_FCA_0_LOAD:%.*]] = load <3 x float>, ptr [[RES_I1_FCA_0_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[RES_I1_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] poison, <3 x float> [[RES_I1_FCA_0_LOAD]], 0 -; REGISTERBUFFER-CPS-NEXT: [[RES_I1_FCA_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1 -; REGISTERBUFFER-CPS-NEXT: [[RES_I1_FCA_1_LOAD:%.*]] = load <3 x float>, ptr [[RES_I1_FCA_1_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[RES_I1_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_0_INSERT]], <3 x float> [[RES_I1_FCA_1_LOAD]], 1 -; REGISTERBUFFER-CPS-NEXT: [[RES_I1_FCA_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 2 -; REGISTERBUFFER-CPS-NEXT: [[RES_I1_FCA_2_LOAD:%.*]] = load float, ptr [[RES_I1_FCA_2_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[RES_I1_FCA_2_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_1_INSERT]], float [[RES_I1_FCA_2_LOAD]], 2 -; REGISTERBUFFER-CPS-NEXT: [[RES_I1_FCA_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 3 -; REGISTERBUFFER-CPS-NEXT: [[RES_I1_FCA_3_LOAD:%.*]] = load i32, ptr [[RES_I1_FCA_3_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[RES_I1_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_2_INSERT]], i32 [[RES_I1_FCA_3_LOAD]], 3 -; REGISTERBUFFER-CPS-NEXT: [[RES_I1_FCA_3_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_3_INSERT]], 0 -; REGISTERBUFFER-CPS-NEXT: [[RES_I1_FCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_3_INSERT]], 1 -; REGISTERBUFFER-CPS-NEXT: [[RES_I1_FCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_3_INSERT]], 2 -; REGISTERBUFFER-CPS-NEXT: [[RES_I1_FCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_3_INSERT]], 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0411_0_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I1_FCA_3_INSERT_FCA_0_EXTRACT]], i32 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0411_4_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I1_FCA_3_INSERT_FCA_0_EXTRACT]], i32 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0411_8_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I1_FCA_3_INSERT_FCA_0_EXTRACT]], i32 2 -; REGISTERBUFFER-CPS-NEXT: [[VAL_0_I7:%.*]] = insertelement <3 x float> undef, float [[DOTSROA_0411_0_VEC_EXTRACT]], i32 0 -; REGISTERBUFFER-CPS-NEXT: [[VAL_1_I8:%.*]] = insertelement <3 x float> [[VAL_0_I7]], float [[DOTSROA_0411_4_VEC_EXTRACT]], i32 1 -; REGISTERBUFFER-CPS-NEXT: [[VAL_2_I9:%.*]] = insertelement <3 x float> [[VAL_1_I8]], float [[DOTSROA_0411_8_VEC_EXTRACT]], i32 2 -; REGISTERBUFFER-CPS-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x float> [[VAL_2_I9]], i8 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_0_LOAD:%.*]] = load <3 x float>, ptr [[RES_I_FCA_0_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] poison, <3 x float> [[RES_I_FCA_0_LOAD]], 0 -; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1 -; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_1_LOAD:%.*]] = load <3 x float>, ptr [[RES_I_FCA_1_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_0_INSERT]], <3 x float> [[RES_I_FCA_1_LOAD]], 1 -; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 2 -; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_2_LOAD:%.*]] = load float, ptr [[RES_I_FCA_2_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_2_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_1_INSERT]], float [[RES_I_FCA_2_LOAD]], 2 -; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 3 -; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_3_LOAD:%.*]] = load i32, ptr [[RES_I_FCA_3_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_2_INSERT]], i32 [[RES_I_FCA_3_LOAD]], 3 -; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 0 -; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 1 -; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 2 -; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_1_12_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I_FCA_3_INSERT_FCA_1_EXTRACT]], i32 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_1_16_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I_FCA_3_INSERT_FCA_1_EXTRACT]], i32 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_1_20_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I_FCA_3_INSERT_FCA_1_EXTRACT]], i32 2 -; REGISTERBUFFER-CPS-NEXT: [[VAL_0_I:%.*]] = insertelement <3 x float> undef, float [[DOTSROA_1_12_VEC_EXTRACT]], i32 0 -; REGISTERBUFFER-CPS-NEXT: [[VAL_1_I:%.*]] = insertelement <3 x float> [[VAL_0_I]], float [[DOTSROA_1_16_VEC_EXTRACT]], i32 1 -; REGISTERBUFFER-CPS-NEXT: [[VAL_2_I:%.*]] = insertelement <3 x float> [[VAL_1_I]], float [[DOTSROA_1_20_VEC_EXTRACT]], i32 2 -; REGISTERBUFFER-CPS-NEXT: [[EXTRACT:%.*]] = extractelement <3 x float> [[VAL_2_I]], i8 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[RES_I10_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[RES_I10_FCA_0_LOAD:%.*]] = load <3 x float>, ptr [[RES_I10_FCA_0_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[RES_I10_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] poison, <3 x float> [[RES_I10_FCA_0_LOAD]], 0 -; REGISTERBUFFER-CPS-NEXT: [[RES_I10_FCA_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1 -; REGISTERBUFFER-CPS-NEXT: [[RES_I10_FCA_1_LOAD:%.*]] = load <3 x float>, ptr [[RES_I10_FCA_1_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[RES_I10_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I10_FCA_0_INSERT]], <3 x float> [[RES_I10_FCA_1_LOAD]], 1 -; REGISTERBUFFER-CPS-NEXT: [[RES_I10_FCA_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 2 -; REGISTERBUFFER-CPS-NEXT: [[RES_I10_FCA_2_LOAD:%.*]] = load float, ptr [[RES_I10_FCA_2_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[RES_I10_FCA_2_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I10_FCA_1_INSERT]], float [[RES_I10_FCA_2_LOAD]], 2 -; REGISTERBUFFER-CPS-NEXT: [[RES_I10_FCA_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 3 -; REGISTERBUFFER-CPS-NEXT: [[RES_I10_FCA_3_LOAD:%.*]] = load i32, ptr [[RES_I10_FCA_3_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[RES_I10_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I10_FCA_2_INSERT]], i32 [[RES_I10_FCA_3_LOAD]], 3 -; REGISTERBUFFER-CPS-NEXT: [[RES_I10_FCA_3_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I10_FCA_3_INSERT]], 0 -; REGISTERBUFFER-CPS-NEXT: [[RES_I10_FCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I10_FCA_3_INSERT]], 1 -; REGISTERBUFFER-CPS-NEXT: [[RES_I10_FCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I10_FCA_3_INSERT]], 2 -; REGISTERBUFFER-CPS-NEXT: [[RES_I10_FCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I10_FCA_3_INSERT]], 3 -; REGISTERBUFFER-CPS-NEXT: [[TMP11:%.*]] = fmul fast float [[RES_I10_FCA_3_INSERT_FCA_2_EXTRACT]], [[EXTRACT]] -; REGISTERBUFFER-CPS-NEXT: [[TMP12:%.*]] = fadd fast float [[TMP11]], [[EXTRACT1]] -; REGISTERBUFFER-CPS-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP12]], 0.000000e+00 -; REGISTERBUFFER-CPS-NEXT: [[TMP14:%.*]] = fcmp fast ogt float [[TMP12]], 1.000000e+00 -; REGISTERBUFFER-CPS-NEXT: [[TMP15:%.*]] = fcmp fast ogt float [[TMP12]], -1.000000e+00 -; REGISTERBUFFER-CPS-NEXT: br i1 [[TMP13]], label [[TMP16:%.*]], label [[TMP39:%.*]] -; REGISTERBUFFER-CPS: 16: -; REGISTERBUFFER-CPS-NEXT: br i1 [[TMP14]], label [[TMP17:%.*]], label [[TMP28:%.*]] -; REGISTERBUFFER-CPS: 17: -; REGISTERBUFFER-CPS-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP18]]) -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP19:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 1 -; REGISTERBUFFER-CPS-NEXT: [[TMP20:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 2 -; REGISTERBUFFER-CPS-NEXT: [[TMP21:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 3 -; REGISTERBUFFER-CPS-NEXT: [[TMP22:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT15:%.*]] = extractelement <2 x float> [[HIT_ATTRS_FCA_0_EXTRACT]], i32 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP23:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT15]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[TMP24:%.*]] = bitcast i32 [[TMP23]] to float -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0392_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP24]], i32 0 -; REGISTERBUFFER-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT19:%.*]] = extractelement <2 x float> [[HIT_ATTRS_FCA_0_EXTRACT]], i32 1 -; REGISTERBUFFER-CPS-NEXT: [[TMP25:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT19]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[TMP26:%.*]] = bitcast i32 [[TMP25]] to float -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0392_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0392_0_VEC_INSERT]], float [[TMP26]], i32 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT391:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0392_4_VEC_INSERT]], 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT391]], 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP27]], i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT]], ptr [[DOTFCA_0_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_LOAD:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD]], 0, 0, 0, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_LOAD]], 0, 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_LOAD]], 0, 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_LOAD:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_LOAD]], 0, 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_LOAD:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_LOAD]], 0, 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_LOAD]], 0, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_LOAD]], 0, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_LOAD:%.*]] = load float, ptr [[DOTFCA_0_4_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[DOTFCA_0_4_LOAD]], 0, 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_LOAD:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[DOTFCA_0_5_LOAD]], 0, 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_LOAD]], 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_LOAD]], 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_LOAD:%.*]] = load float, ptr [[DOTFCA_1_2_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[DOTFCA_1_2_LOAD]], 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_LOAD:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[DOTFCA_1_3_LOAD]], 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP19]], 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT]], i32 [[TMP20]], 7 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT]], i32 [[TMP21]], 8 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT]], i32 [[TMP22]], 9 -; REGISTERBUFFER-CPS-NEXT: call void @lgc.cps.free(i32 0) -; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 40, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]], [8 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]), !continuation.registercount [[META33]] -; REGISTERBUFFER-CPS-NEXT: unreachable -; REGISTERBUFFER-CPS: 28: -; REGISTERBUFFER-CPS-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP29]]) -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT25:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP30:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT25]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT34:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 1 -; REGISTERBUFFER-CPS-NEXT: [[TMP31:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT34]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT42:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 2 -; REGISTERBUFFER-CPS-NEXT: [[TMP32:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT42]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT52:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 3 -; REGISTERBUFFER-CPS-NEXT: [[TMP33:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT52]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[HIT_ATTRS_FCA_0_EXTRACT]], i32 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP34:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[TMP35:%.*]] = bitcast i32 [[TMP34]] to float -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0396_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP35]], i32 0 -; REGISTERBUFFER-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[HIT_ATTRS_FCA_0_EXTRACT]], i32 1 -; REGISTERBUFFER-CPS-NEXT: [[TMP36:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[TMP37:%.*]] = bitcast i32 [[TMP36]] to float -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0396_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0396_0_VEC_INSERT]], float [[TMP37]], i32 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT395:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0396_4_VEC_INSERT]], 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT223:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT395]], 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_GEP224:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP38]], i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT223]], ptr [[DOTFCA_0_GEP224]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP225:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_LOAD226:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP225]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT227:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD226]], 0, 0, 0, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_GEP228:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_LOAD229:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP228]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_INSERT230:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT227]], <3 x float> [[DOTFCA_0_1_0_LOAD229]], 0, 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_GEP231:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_LOAD232:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP231]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_INSERT233:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT230]], <3 x float> [[DOTFCA_0_1_1_LOAD232]], 0, 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_GEP234:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_LOAD235:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP234]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_INSERT236:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT233]], float [[DOTFCA_0_1_2_LOAD235]], 0, 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_GEP237:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_LOAD238:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP237]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_INSERT239:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT236]], i32 [[DOTFCA_0_1_3_LOAD238]], 0, 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_GEP240:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_LOAD241:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP240]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_INSERT242:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT239]], <3 x float> [[DOTFCA_0_2_LOAD241]], 0, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_GEP243:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_LOAD244:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP243]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_INSERT245:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT242]], <3 x float> [[DOTFCA_0_3_LOAD244]], 0, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_GEP246:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_LOAD247:%.*]] = load float, ptr [[DOTFCA_0_4_GEP246]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_INSERT248:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT245]], float [[DOTFCA_0_4_LOAD247]], 0, 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_GEP249:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_LOAD250:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP249]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_INSERT251:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT248]], i64 [[DOTFCA_0_5_LOAD250]], 0, 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_GEP252:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_LOAD253:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP252]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_INSERT254:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT251]], <3 x float> [[DOTFCA_1_0_LOAD253]], 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_GEP255:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_LOAD256:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP255]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_INSERT257:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT254]], <3 x float> [[DOTFCA_1_1_LOAD256]], 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_GEP258:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_LOAD259:%.*]] = load float, ptr [[DOTFCA_1_2_GEP258]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_INSERT260:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT257]], float [[DOTFCA_1_2_LOAD259]], 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_GEP261:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_LOAD262:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP261]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_INSERT263:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT260]], i32 [[DOTFCA_1_3_LOAD262]], 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT61:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP30]], 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_INSERT64:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT61]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_2_INSERT67:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT64]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_3_INSERT70:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT67]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_4_INSERT73:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT70]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_5_INSERT76:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT73]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_6_INSERT79:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT76]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_7_INSERT82:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT79]], i32 [[TMP31]], 7 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_8_INSERT85:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT82]], i32 [[TMP32]], 8 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_9_INSERT88:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT85]], i32 [[TMP33]], 9 -; REGISTERBUFFER-CPS-NEXT: call void @lgc.cps.free(i32 0) -; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 40, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT263]], [8 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT88]]), !continuation.registercount [[META33]] -; REGISTERBUFFER-CPS-NEXT: unreachable -; REGISTERBUFFER-CPS: 39: -; REGISTERBUFFER-CPS-NEXT: br i1 [[TMP15]], label [[TMP40:%.*]], label [[TMP59:%.*]] -; REGISTERBUFFER-CPS: 40: -; REGISTERBUFFER-CPS-NEXT: br i1 [[TMP14]], label [[TMP41:%.*]], label [[TMP50:%.*]] -; REGISTERBUFFER-CPS: 41: -; REGISTERBUFFER-CPS-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: call void @_cont_IgnoreHit(ptr [[TMP42]]) -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT27:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP43:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT27]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT36:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 1 -; REGISTERBUFFER-CPS-NEXT: [[TMP44:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT36]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT44:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 2 -; REGISTERBUFFER-CPS-NEXT: [[TMP45:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT44]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT54:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 3 -; REGISTERBUFFER-CPS-NEXT: [[TMP46:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT54]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[TMP47:%.*]] = bitcast i32 [[TMP6]] to float -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0400_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP47]], i32 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP48:%.*]] = bitcast i32 [[TMP7]] to float -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0400_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0400_0_VEC_INSERT]], float [[TMP48]], i32 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT399:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0400_4_VEC_INSERT]], 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT264:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT399]], 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_GEP265:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP49]], i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT264]], ptr [[DOTFCA_0_GEP265]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP266:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_LOAD267:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP266]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT268:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD267]], 0, 0, 0, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_GEP269:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_LOAD270:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP269]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_INSERT271:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT268]], <3 x float> [[DOTFCA_0_1_0_LOAD270]], 0, 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_GEP272:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_LOAD273:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP272]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_INSERT274:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT271]], <3 x float> [[DOTFCA_0_1_1_LOAD273]], 0, 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_GEP275:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_LOAD276:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP275]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_INSERT277:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT274]], float [[DOTFCA_0_1_2_LOAD276]], 0, 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_GEP278:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_LOAD279:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP278]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_INSERT280:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT277]], i32 [[DOTFCA_0_1_3_LOAD279]], 0, 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_GEP281:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_LOAD282:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP281]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_INSERT283:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT280]], <3 x float> [[DOTFCA_0_2_LOAD282]], 0, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_GEP284:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_LOAD285:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP284]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_INSERT286:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT283]], <3 x float> [[DOTFCA_0_3_LOAD285]], 0, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_GEP287:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_LOAD288:%.*]] = load float, ptr [[DOTFCA_0_4_GEP287]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_INSERT289:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT286]], float [[DOTFCA_0_4_LOAD288]], 0, 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_GEP290:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_LOAD291:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP290]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_INSERT292:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT289]], i64 [[DOTFCA_0_5_LOAD291]], 0, 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_GEP293:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_LOAD294:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP293]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_INSERT295:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT292]], <3 x float> [[DOTFCA_1_0_LOAD294]], 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_GEP296:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_LOAD297:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP296]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_INSERT298:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT295]], <3 x float> [[DOTFCA_1_1_LOAD297]], 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_GEP299:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_LOAD300:%.*]] = load float, ptr [[DOTFCA_1_2_GEP299]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_INSERT301:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT298]], float [[DOTFCA_1_2_LOAD300]], 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_GEP302:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_LOAD303:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP302]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_INSERT304:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT301]], i32 [[DOTFCA_1_3_LOAD303]], 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT91:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP43]], 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_INSERT94:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT91]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_2_INSERT97:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT94]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_3_INSERT100:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT97]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_4_INSERT103:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT100]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_5_INSERT106:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT103]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_6_INSERT109:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT106]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_7_INSERT112:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT109]], i32 [[TMP44]], 7 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_8_INSERT115:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT112]], i32 [[TMP45]], 8 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_9_INSERT118:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT115]], i32 [[TMP46]], 9 -; REGISTERBUFFER-CPS-NEXT: call void @lgc.cps.free(i32 0) -; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 40, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT304]], [8 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT118]]), !continuation.registercount [[META33]] -; REGISTERBUFFER-CPS-NEXT: unreachable -; REGISTERBUFFER-CPS: 50: -; REGISTERBUFFER-CPS-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: call void @_cont_IgnoreHit(ptr [[TMP51]]) -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT29:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP52:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT29]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT38:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 1 -; REGISTERBUFFER-CPS-NEXT: [[TMP53:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT38]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT46:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 2 -; REGISTERBUFFER-CPS-NEXT: [[TMP54:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT46]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT56:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 3 -; REGISTERBUFFER-CPS-NEXT: [[TMP55:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT56]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[TMP56:%.*]] = bitcast i32 [[TMP6]] to float -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0404_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP56]], i32 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP57:%.*]] = bitcast i32 [[TMP7]] to float -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0404_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0404_0_VEC_INSERT]], float [[TMP57]], i32 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT403:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0404_4_VEC_INSERT]], 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT305:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT403]], 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_GEP306:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP58]], i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT305]], ptr [[DOTFCA_0_GEP306]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP307:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_LOAD308:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP307]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT309:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD308]], 0, 0, 0, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_GEP310:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_LOAD311:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP310]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_INSERT312:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT309]], <3 x float> [[DOTFCA_0_1_0_LOAD311]], 0, 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_GEP313:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_LOAD314:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP313]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_INSERT315:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT312]], <3 x float> [[DOTFCA_0_1_1_LOAD314]], 0, 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_GEP316:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_LOAD317:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP316]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_INSERT318:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT315]], float [[DOTFCA_0_1_2_LOAD317]], 0, 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_GEP319:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_LOAD320:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP319]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_INSERT321:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT318]], i32 [[DOTFCA_0_1_3_LOAD320]], 0, 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_GEP322:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_LOAD323:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP322]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_INSERT324:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT321]], <3 x float> [[DOTFCA_0_2_LOAD323]], 0, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_GEP325:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_LOAD326:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP325]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_INSERT327:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT324]], <3 x float> [[DOTFCA_0_3_LOAD326]], 0, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_GEP328:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_LOAD329:%.*]] = load float, ptr [[DOTFCA_0_4_GEP328]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_INSERT330:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT327]], float [[DOTFCA_0_4_LOAD329]], 0, 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_GEP331:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_LOAD332:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP331]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_INSERT333:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT330]], i64 [[DOTFCA_0_5_LOAD332]], 0, 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_GEP334:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_LOAD335:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP334]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_INSERT336:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT333]], <3 x float> [[DOTFCA_1_0_LOAD335]], 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_GEP337:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_LOAD338:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP337]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_INSERT339:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT336]], <3 x float> [[DOTFCA_1_1_LOAD338]], 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_GEP340:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_LOAD341:%.*]] = load float, ptr [[DOTFCA_1_2_GEP340]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_INSERT342:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT339]], float [[DOTFCA_1_2_LOAD341]], 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_GEP343:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_LOAD344:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP343]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_INSERT345:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT342]], i32 [[DOTFCA_1_3_LOAD344]], 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT121:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP52]], 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_INSERT124:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT121]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_2_INSERT127:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT124]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_3_INSERT130:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT127]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_4_INSERT133:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT130]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_5_INSERT136:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT133]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_6_INSERT139:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT136]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_7_INSERT142:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT139]], i32 [[TMP53]], 7 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_8_INSERT145:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT142]], i32 [[TMP54]], 8 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_9_INSERT148:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT145]], i32 [[TMP55]], 9 -; REGISTERBUFFER-CPS-NEXT: call void @lgc.cps.free(i32 0) -; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 40, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT345]], [8 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT148]]), !continuation.registercount [[META33]] -; REGISTERBUFFER-CPS-NEXT: unreachable -; REGISTERBUFFER-CPS: 59: -; REGISTERBUFFER-CPS-NEXT: call void @_cont_AcceptHit(ptr [[SYSTEM_DATA_ALLOCA]]) -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT31:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP60:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT31]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT40:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 1 -; REGISTERBUFFER-CPS-NEXT: [[TMP61:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT40]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT48:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 2 -; REGISTERBUFFER-CPS-NEXT: [[TMP62:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT48]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT58:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 3 -; REGISTERBUFFER-CPS-NEXT: [[TMP63:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT58]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT13:%.*]] = extractelement <2 x float> [[HIT_ATTRS_FCA_0_EXTRACT]], i32 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP64:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT13]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[TMP65:%.*]] = bitcast i32 [[TMP64]] to float -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0408_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP65]], i32 0 -; REGISTERBUFFER-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT17:%.*]] = extractelement <2 x float> [[HIT_ATTRS_FCA_0_EXTRACT]], i32 1 -; REGISTERBUFFER-CPS-NEXT: [[TMP66:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT17]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[TMP67:%.*]] = bitcast i32 [[TMP66]] to float -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0408_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0408_0_VEC_INSERT]], float [[TMP67]], i32 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT407:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0408_4_VEC_INSERT]], 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT346:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT407]], 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_GEP347:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP68]], i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT346]], ptr [[DOTFCA_0_GEP347]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP348:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_LOAD349:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP348]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT350:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD349]], 0, 0, 0, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_GEP351:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_LOAD352:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP351]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_INSERT353:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT350]], <3 x float> [[DOTFCA_0_1_0_LOAD352]], 0, 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_GEP354:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_LOAD355:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP354]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_INSERT356:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT353]], <3 x float> [[DOTFCA_0_1_1_LOAD355]], 0, 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_GEP357:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_LOAD358:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP357]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_INSERT359:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT356]], float [[DOTFCA_0_1_2_LOAD358]], 0, 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_GEP360:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_LOAD361:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP360]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_INSERT362:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT359]], i32 [[DOTFCA_0_1_3_LOAD361]], 0, 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_GEP363:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_LOAD364:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP363]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_INSERT365:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT362]], <3 x float> [[DOTFCA_0_2_LOAD364]], 0, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_GEP366:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_LOAD367:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP366]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_INSERT368:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT365]], <3 x float> [[DOTFCA_0_3_LOAD367]], 0, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_GEP369:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_LOAD370:%.*]] = load float, ptr [[DOTFCA_0_4_GEP369]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_INSERT371:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT368]], float [[DOTFCA_0_4_LOAD370]], 0, 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_GEP372:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_LOAD373:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP372]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_INSERT374:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT371]], i64 [[DOTFCA_0_5_LOAD373]], 0, 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_GEP375:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_LOAD376:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP375]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_INSERT377:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT374]], <3 x float> [[DOTFCA_1_0_LOAD376]], 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_GEP378:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_LOAD379:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP378]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_INSERT380:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT377]], <3 x float> [[DOTFCA_1_1_LOAD379]], 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_GEP381:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_LOAD382:%.*]] = load float, ptr [[DOTFCA_1_2_GEP381]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_INSERT383:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT380]], float [[DOTFCA_1_2_LOAD382]], 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_GEP384:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_LOAD385:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP384]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_INSERT386:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT383]], i32 [[DOTFCA_1_3_LOAD385]], 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT151:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP60]], 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_INSERT154:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT151]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_2_INSERT157:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT154]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_3_INSERT160:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT157]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_4_INSERT163:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT160]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_5_INSERT166:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT163]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_6_INSERT169:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT166]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_7_INSERT172:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT169]], i32 [[TMP61]], 7 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_8_INSERT175:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT172]], i32 [[TMP62]], 8 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_9_INSERT178:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT175]], i32 [[TMP63]], 9 -; REGISTERBUFFER-CPS-NEXT: call void @lgc.cps.free(i32 0) -; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 40, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT386]], [8 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT178]]), !continuation.registercount [[META33]] -; REGISTERBUFFER-CPS-NEXT: unreachable -; -; -; REGISTERBUFFER-CPS-LABEL: define void @MyIntersectionShader( -; REGISTERBUFFER-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURNADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [8 x i32] [[PADDING:%.*]], [30 x i32] [[PAYLOAD:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META35]] !lgc.cps [[META42:![0-9]+]] !continuation [[META43:![0-9]+]] { -; REGISTERBUFFER-CPS-NEXT: AllocaSpillBB: -; REGISTERBUFFER-CPS-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) -; REGISTERBUFFER-CPS-NEXT: [[RETURN_ADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[MYINTERSECTIONSHADER_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: store i32 [[RETURNADDR]], ptr addrspace(32) [[RETURN_ADDR_SPILL_ADDR]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 0 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 1 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 2 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 3 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 4 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 5 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 6 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 7 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 8 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 9 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_10_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 10 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_11_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 11 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_12_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 12 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_13_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 13 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_14_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 14 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_15_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 15 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_16_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 16 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_17_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 17 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_18_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 18 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_19_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 19 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_20_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 20 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_21_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 21 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_22_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 22 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_23_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 23 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_24_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 24 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_25_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 25 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_26_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 26 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_27_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 27 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_28_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 28 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_29_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 29 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 0, 0, 0 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 2 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 3 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_4_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 4 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_5_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 5 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 1, 3 -; REGISTERBUFFER-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) -; REGISTERBUFFER-CPS-NEXT: [[TMP0:%.*]] = bitcast <3 x i32> [[SYSTEM_DATA_FCA_0_0_0_0_EXTRACT]] to <3 x float> -; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA:%.*]] poison, <3 x float> [[TMP0]], 0 -; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_1_0_EXTRACT]], 1 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_13_24_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[SYSTEM_DATA_FCA_0_1_1_EXTRACT]], i32 0 -; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_2_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_1_INSERT]], float [[SYSTEM_DATA_ALLOCA_SROA_13_24_VEC_EXTRACT]], 2 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_13_28_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[SYSTEM_DATA_FCA_0_1_1_EXTRACT]], i32 1 -; REGISTERBUFFER-CPS-NEXT: [[TMP1:%.*]] = bitcast float [[SYSTEM_DATA_ALLOCA_SROA_13_28_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_2_INSERT]], i32 [[TMP1]], 3 -; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 0 -; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 1 -; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 2 -; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 3 -; REGISTERBUFFER-CPS-NEXT: [[ISNOHIT_I:%.*]] = fcmp fast uge float [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT]], [[SYSTEM_DATA_FCA_0_4_EXTRACT]] -; REGISTERBUFFER-CPS-NEXT: br i1 [[ISNOHIT_I]], label [[ISEND_I:%.*]], label [[CALLAHIT_I:%.*]] -; REGISTERBUFFER-CPS: callAHit.i: -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_FCA_0_0_0_0_EXTRACT]], 0, 0, 0, 0 -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_0_0_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_1_0_EXTRACT]], 0, 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_1_1_EXTRACT]], 0, 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_1_INSERT]], float [[SYSTEM_DATA_FCA_0_1_2_EXTRACT]], 0, 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_2_INSERT]], i32 [[SYSTEM_DATA_FCA_0_1_3_EXTRACT]], 0, 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_3_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_2_EXTRACT]], 0, 2 -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_2_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_3_EXTRACT]], 0, 3 -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_3_INSERT]], float [[SYSTEM_DATA_FCA_0_4_EXTRACT]], 0, 4 -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_4_INSERT]], i64 [[SYSTEM_DATA_FCA_0_5_EXTRACT]], 0, 5 -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_5_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_1_0_EXTRACT]], 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_1_1_EXTRACT]], 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_1_INSERT]], float [[SYSTEM_DATA_FCA_1_2_EXTRACT]], 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_2_INSERT]], i32 [[SYSTEM_DATA_FCA_1_3_EXTRACT]], 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT350:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> undef, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT5:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_INSERT8:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT5]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_2_INSERT11:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT8]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_3_INSERT14:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT11]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_4_INSERT17:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT14]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_5_INSERT20:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT17]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_6_INSERT23:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT20]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_7_INSERT26:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT23]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_8_INSERT29:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT26]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_9_INSERT32:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT29]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_10_INSERT35:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT32]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_11_INSERT38:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT35]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_12_INSERT41:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT38]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_13_INSERT44:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT41]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_14_INSERT47:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT44]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_15_INSERT50:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT47]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_16_INSERT53:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT50]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_17_INSERT56:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT53]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_18_INSERT59:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT56]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_19_INSERT62:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT59]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_20_INSERT65:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT62]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_21_INSERT68:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT65]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_22_INSERT71:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT68]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_23_INSERT74:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT71]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_24_INSERT77:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT74]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_25_INSERT80:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT77]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_26_INSERT83:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT80]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_27_INSERT86:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT83]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_28_INSERT89:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT86]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_29_INSERT92:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT89]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 -; REGISTERBUFFER-CPS-NEXT: [[TMP2:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @MyIntersectionShader.resume.0) -; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 3, i32 16, {} poison, i64 [[TMP2]], i32 5, float [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT350]], [32 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT92]]), !continuation.returnedRegistercount [[META32:![0-9]+]], !continuation.registercount [[META32]] -; REGISTERBUFFER-CPS-NEXT: unreachable -; REGISTERBUFFER-CPS: isEnd.i: -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP3:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to float -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0353_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP4]], i32 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 1 -; REGISTERBUFFER-CPS-NEXT: [[TMP5:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP5]] to float -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0353_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0353_0_VEC_INSERT]], float [[TMP6]], i32 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT352:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0353_4_VEC_INSERT]], 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT286:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT352]], 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP7:%.*]] = bitcast <2 x float> [[DOTFCA_0_EXTRACT286]] to <2 x i32> -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_0_0_VEC_EXPAND:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <3 x i32> -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND:%.*]] = select <3 x i1> , <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VEC_EXPAND]], <3 x i32> [[SYSTEM_DATA_FCA_0_0_0_0_EXTRACT]] -; REGISTERBUFFER-CPS-NEXT: [[ISEND_I1:%.*]] = call i1 @opaqueIsEnd() -; REGISTERBUFFER-CPS-NEXT: br i1 [[ISEND_I1]], label [[TMP8:%.*]], label [[TMP9:%.*]] -; REGISTERBUFFER-CPS: 8: -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT289:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND]], 0, 0, 0, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_INSERT292:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT289]], <3 x float> [[SYSTEM_DATA_FCA_0_1_0_EXTRACT]], 0, 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_INSERT295:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT292]], <3 x float> [[SYSTEM_DATA_FCA_0_1_1_EXTRACT]], 0, 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_INSERT298:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT295]], float [[SYSTEM_DATA_FCA_0_1_2_EXTRACT]], 0, 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_INSERT301:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT298]], i32 [[SYSTEM_DATA_FCA_0_1_3_EXTRACT]], 0, 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_INSERT304:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT301]], <3 x float> [[SYSTEM_DATA_FCA_0_2_EXTRACT]], 0, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_INSERT307:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT304]], <3 x float> [[SYSTEM_DATA_FCA_0_3_EXTRACT]], 0, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_INSERT310:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT307]], float [[SYSTEM_DATA_FCA_0_4_EXTRACT]], 0, 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_INSERT313:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT310]], i64 [[SYSTEM_DATA_FCA_0_5_EXTRACT]], 0, 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_INSERT316:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT313]], <3 x float> [[SYSTEM_DATA_FCA_1_0_EXTRACT]], 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_INSERT319:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT316]], <3 x float> [[SYSTEM_DATA_FCA_1_1_EXTRACT]], 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_INSERT322:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT319]], float [[SYSTEM_DATA_FCA_1_2_EXTRACT]], 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_INSERT325:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT322]], i32 [[SYSTEM_DATA_FCA_1_3_EXTRACT]], 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT125:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_INSERT128:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT125]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_2_INSERT131:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT128]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_3_INSERT134:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT131]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_4_INSERT137:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT134]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_5_INSERT140:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT137]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_6_INSERT143:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT140]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_7_INSERT146:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT143]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_8_INSERT149:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT146]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_9_INSERT152:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT149]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_10_INSERT155:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT152]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_11_INSERT158:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT155]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_12_INSERT161:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT158]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_13_INSERT164:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT161]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_14_INSERT167:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT164]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_15_INSERT170:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT167]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_16_INSERT173:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT170]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_17_INSERT176:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT173]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_18_INSERT179:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT176]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_19_INSERT182:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT179]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_20_INSERT185:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT182]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_21_INSERT188:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT185]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_22_INSERT191:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT188]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_23_INSERT194:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT191]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_24_INSERT197:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT194]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_25_INSERT200:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT197]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_26_INSERT203:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT200]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_27_INSERT206:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT203]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_28_INSERT209:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT206]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_29_INSERT212:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT209]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 -; REGISTERBUFFER-CPS-NEXT: call void @lgc.cps.free(i32 8) -; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 8, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT325]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT212]]), !continuation.registercount [[META32]] -; REGISTERBUFFER-CPS-NEXT: unreachable -; REGISTERBUFFER-CPS: 9: -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND]], 0, 0, 0, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_1_0_EXTRACT]], 0, 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_1_1_EXTRACT]], 0, 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], float [[SYSTEM_DATA_FCA_0_1_2_EXTRACT]], 0, 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT]], i32 [[SYSTEM_DATA_FCA_0_1_3_EXTRACT]], 0, 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_2_EXTRACT]], 0, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_3_EXTRACT]], 0, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[SYSTEM_DATA_FCA_0_4_EXTRACT]], 0, 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[SYSTEM_DATA_FCA_0_5_EXTRACT]], 0, 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_1_0_EXTRACT]], 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_1_1_EXTRACT]], 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[SYSTEM_DATA_FCA_1_2_EXTRACT]], 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[SYSTEM_DATA_FCA_1_3_EXTRACT]], 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_10_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_11_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_12_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_13_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_14_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_15_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_16_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_17_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_18_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_19_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_20_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_21_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_22_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_23_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_24_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_25_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_26_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_27_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_28_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_29_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 -; REGISTERBUFFER-CPS-NEXT: call void @lgc.cps.free(i32 8) -; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 8, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]), !continuation.registercount [[META32]] -; REGISTERBUFFER-CPS-NEXT: unreachable -; -; -; REGISTERBUFFER-CPS-LABEL: define dso_local void @MyIntersectionShader.resume.0( -; REGISTERBUFFER-CPS-SAME: {} [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], { [[STRUCT_ANYHITTRAVERSALDATA:%.*]], [8 x i32], [30 x i32] } [[TMP3:%.*]]) !lgc.rt.shaderstage [[META35]] !lgc.cps [[META42]] !continuation [[META43]] { -; REGISTERBUFFER-CPS-NEXT: entryresume.0: -; REGISTERBUFFER-CPS-NEXT: [[TMP4:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 8) -; REGISTERBUFFER-CPS-NEXT: [[TMP5:%.*]] = extractvalue { [[STRUCT_ANYHITTRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP3]], 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_2_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_3_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_4_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_5_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_6_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 6 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_7_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 7 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_8_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 8 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_9_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 9 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_10_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 10 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_11_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 11 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_12_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 12 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_13_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 13 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_14_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 14 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_15_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 15 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_16_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 16 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_17_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 17 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_18_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 18 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_19_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 19 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_20_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 20 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_21_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 21 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_22_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 22 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_23_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 23 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_24_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 24 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_25_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 25 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_26_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 26 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_27_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 27 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_28_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 28 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_29_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 29 -; REGISTERBUFFER-CPS-NEXT: [[TMP6:%.*]] = extractvalue { [[STRUCT_ANYHITTRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP3]], 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 0, 0, 0, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 0, 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 0, 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 0, 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 0, 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 0, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 0, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 0, 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 0, 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 1, 3 -; REGISTERBUFFER-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; REGISTERBUFFER-CPS-NEXT: [[ISEND_I1:%.*]] = call i1 @opaqueIsEnd() -; REGISTERBUFFER-CPS-NEXT: br i1 [[ISEND_I1]], label [[TMP7:%.*]], label [[TMP8:%.*]] -; REGISTERBUFFER-CPS: 7: -; REGISTERBUFFER-CPS-NEXT: [[RETURN_ADDR_RELOAD_ADDR1:%.*]] = getelementptr inbounds [[MYINTERSECTIONSHADER_FRAME:%.*]], ptr addrspace(32) [[TMP4]], i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[RETURN_ADDR_RELOAD2:%.*]] = load i32, ptr addrspace(32) [[RETURN_ADDR_RELOAD_ADDR1]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT289:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]], 0, 0, 0, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_INSERT292:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT289]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_INSERT295:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT292]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_INSERT298:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT295]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_INSERT301:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT298]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_INSERT304:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT301]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_INSERT307:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT304]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_INSERT310:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT307]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_INSERT313:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT310]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_INSERT316:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT313]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_INSERT319:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT316]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_INSERT322:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT319]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_INSERT325:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT322]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT125:%.*]] = insertvalue [30 x i32] poison, i32 [[DOTFCA_0_EXTRACT]], 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_INSERT128:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT125]], i32 [[DOTFCA_1_EXTRACT]], 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_2_INSERT131:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT128]], i32 [[DOTFCA_2_EXTRACT]], 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_3_INSERT134:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT131]], i32 [[DOTFCA_3_EXTRACT]], 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_4_INSERT137:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT134]], i32 [[DOTFCA_4_EXTRACT]], 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_5_INSERT140:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT137]], i32 [[DOTFCA_5_EXTRACT]], 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_6_INSERT143:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT140]], i32 [[DOTFCA_6_EXTRACT]], 6 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_7_INSERT146:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT143]], i32 [[DOTFCA_7_EXTRACT]], 7 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_8_INSERT149:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT146]], i32 [[DOTFCA_8_EXTRACT]], 8 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_9_INSERT152:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT149]], i32 [[DOTFCA_9_EXTRACT]], 9 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_10_INSERT155:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT152]], i32 [[DOTFCA_10_EXTRACT]], 10 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_11_INSERT158:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT155]], i32 [[DOTFCA_11_EXTRACT]], 11 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_12_INSERT161:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT158]], i32 [[DOTFCA_12_EXTRACT]], 12 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_13_INSERT164:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT161]], i32 [[DOTFCA_13_EXTRACT]], 13 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_14_INSERT167:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT164]], i32 [[DOTFCA_14_EXTRACT]], 14 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_15_INSERT170:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT167]], i32 [[DOTFCA_15_EXTRACT]], 15 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_16_INSERT173:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT170]], i32 [[DOTFCA_16_EXTRACT]], 16 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_17_INSERT176:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT173]], i32 [[DOTFCA_17_EXTRACT]], 17 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_18_INSERT179:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT176]], i32 [[DOTFCA_18_EXTRACT]], 18 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_19_INSERT182:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT179]], i32 [[DOTFCA_19_EXTRACT]], 19 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_20_INSERT185:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT182]], i32 [[DOTFCA_20_EXTRACT]], 20 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_21_INSERT188:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT185]], i32 [[DOTFCA_21_EXTRACT]], 21 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_22_INSERT191:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT188]], i32 [[DOTFCA_22_EXTRACT]], 22 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_23_INSERT194:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT191]], i32 [[DOTFCA_23_EXTRACT]], 23 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_24_INSERT197:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT194]], i32 [[DOTFCA_24_EXTRACT]], 24 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_25_INSERT200:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT197]], i32 [[DOTFCA_25_EXTRACT]], 25 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_26_INSERT203:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT200]], i32 [[DOTFCA_26_EXTRACT]], 26 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_27_INSERT206:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT203]], i32 [[DOTFCA_27_EXTRACT]], 27 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_28_INSERT209:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT206]], i32 [[DOTFCA_28_EXTRACT]], 28 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_29_INSERT212:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT209]], i32 [[DOTFCA_29_EXTRACT]], 29 -; REGISTERBUFFER-CPS-NEXT: call void @lgc.cps.free(i32 8) -; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR_RELOAD2]], i32 8, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT325]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT212]]), !continuation.registercount [[META32]] -; REGISTERBUFFER-CPS-NEXT: unreachable -; REGISTERBUFFER-CPS: 8: -; REGISTERBUFFER-CPS-NEXT: [[RETURN_ADDR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[MYINTERSECTIONSHADER_FRAME]], ptr addrspace(32) [[TMP4]], i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[RETURN_ADDR_RELOAD:%.*]] = load i32, ptr addrspace(32) [[RETURN_ADDR_RELOAD_ADDR]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]], 0, 0, 0, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [30 x i32] poison, i32 [[DOTFCA_0_EXTRACT]], 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT]], i32 [[DOTFCA_1_EXTRACT]], 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT]], i32 [[DOTFCA_2_EXTRACT]], 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT]], i32 [[DOTFCA_3_EXTRACT]], 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT]], i32 [[DOTFCA_4_EXTRACT]], 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT]], i32 [[DOTFCA_5_EXTRACT]], 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT]], i32 [[DOTFCA_6_EXTRACT]], 6 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT]], i32 [[DOTFCA_7_EXTRACT]], 7 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT]], i32 [[DOTFCA_8_EXTRACT]], 8 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT]], i32 [[DOTFCA_9_EXTRACT]], 9 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_10_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT]], i32 [[DOTFCA_10_EXTRACT]], 10 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_11_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT]], i32 [[DOTFCA_11_EXTRACT]], 11 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_12_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT]], i32 [[DOTFCA_12_EXTRACT]], 12 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_13_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT]], i32 [[DOTFCA_13_EXTRACT]], 13 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_14_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT]], i32 [[DOTFCA_14_EXTRACT]], 14 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_15_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT]], i32 [[DOTFCA_15_EXTRACT]], 15 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_16_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT]], i32 [[DOTFCA_16_EXTRACT]], 16 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_17_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT]], i32 [[DOTFCA_17_EXTRACT]], 17 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_18_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT]], i32 [[DOTFCA_18_EXTRACT]], 18 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_19_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT]], i32 [[DOTFCA_19_EXTRACT]], 19 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_20_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT]], i32 [[DOTFCA_20_EXTRACT]], 20 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_21_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT]], i32 [[DOTFCA_21_EXTRACT]], 21 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_22_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT]], i32 [[DOTFCA_22_EXTRACT]], 22 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_23_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT]], i32 [[DOTFCA_23_EXTRACT]], 23 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_24_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT]], i32 [[DOTFCA_24_EXTRACT]], 24 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_25_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT]], i32 [[DOTFCA_25_EXTRACT]], 25 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_26_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT]], i32 [[DOTFCA_26_EXTRACT]], 26 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_27_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT]], i32 [[DOTFCA_27_EXTRACT]], 27 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_28_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT]], i32 [[DOTFCA_28_EXTRACT]], 28 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_29_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT]], i32 [[DOTFCA_29_EXTRACT]], 29 -; REGISTERBUFFER-CPS-NEXT: call void @lgc.cps.free(i32 8) -; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR_RELOAD]], i32 8, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]), !continuation.registercount [[META32]] -; REGISTERBUFFER-CPS-NEXT: unreachable -; -; -; REGISTERBUFFER-CPS-LABEL: define void @MyIntersectionShader2( -; REGISTERBUFFER-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURNADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [8 x i32] [[PADDING:%.*]], [30 x i32] [[PAYLOAD:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META35]] !lgc.cps [[META42]] !continuation [[META44:![0-9]+]] { -; REGISTERBUFFER-CPS-NEXT: AllocaSpillBB: -; REGISTERBUFFER-CPS-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) -; REGISTERBUFFER-CPS-NEXT: [[RETURN_ADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[MYINTERSECTIONSHADER2_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: store i32 [[RETURNADDR]], ptr addrspace(32) [[RETURN_ADDR_SPILL_ADDR]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 0 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 1 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 2 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 3 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 4 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 5 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 6 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 7 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 8 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 9 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_10_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 10 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_11_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 11 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_12_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 12 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_13_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 13 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_14_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 14 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_15_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 15 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_16_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 16 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_17_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 17 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_18_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 18 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_19_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 19 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_20_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 20 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_21_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 21 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_22_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 22 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_23_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 23 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_24_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 24 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_25_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 25 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_26_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 26 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_27_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 27 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_28_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 28 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_29_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 29 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 0, 0, 0 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 2 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 3 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_4_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 4 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_5_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 5 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 1, 3 -; REGISTERBUFFER-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) -; REGISTERBUFFER-CPS-NEXT: [[TMP0:%.*]] = bitcast <3 x i32> [[SYSTEM_DATA_FCA_0_0_0_0_EXTRACT]] to <3 x float> -; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA:%.*]] poison, <3 x float> [[TMP0]], 0 -; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_1_0_EXTRACT]], 1 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_13_24_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[SYSTEM_DATA_FCA_0_1_1_EXTRACT]], i32 0 -; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_2_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_1_INSERT]], float [[SYSTEM_DATA_ALLOCA_SROA_13_24_VEC_EXTRACT]], 2 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_13_28_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[SYSTEM_DATA_FCA_0_1_1_EXTRACT]], i32 1 -; REGISTERBUFFER-CPS-NEXT: [[TMP1:%.*]] = bitcast float [[SYSTEM_DATA_ALLOCA_SROA_13_28_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_2_INSERT]], i32 [[TMP1]], 3 -; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 0 -; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 1 -; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 2 -; REGISTERBUFFER-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 3 -; REGISTERBUFFER-CPS-NEXT: [[ISNOHIT_I:%.*]] = fcmp fast uge float [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT]], [[SYSTEM_DATA_FCA_0_4_EXTRACT]] -; REGISTERBUFFER-CPS-NEXT: br i1 [[ISNOHIT_I]], label [[ISEND_I:%.*]], label [[CALLAHIT_I:%.*]] -; REGISTERBUFFER-CPS: callAHit.i: -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_FCA_0_0_0_0_EXTRACT]], 0, 0, 0, 0 -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_0_0_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_1_0_EXTRACT]], 0, 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_1_1_EXTRACT]], 0, 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_1_INSERT]], float [[SYSTEM_DATA_FCA_0_1_2_EXTRACT]], 0, 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_2_INSERT]], i32 [[SYSTEM_DATA_FCA_0_1_3_EXTRACT]], 0, 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_3_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_2_EXTRACT]], 0, 2 -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_2_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_3_EXTRACT]], 0, 3 -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_3_INSERT]], float [[SYSTEM_DATA_FCA_0_4_EXTRACT]], 0, 4 -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_4_INSERT]], i64 [[SYSTEM_DATA_FCA_0_5_EXTRACT]], 0, 5 -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_5_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_1_0_EXTRACT]], 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_1_1_EXTRACT]], 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_1_INSERT]], float [[SYSTEM_DATA_FCA_1_2_EXTRACT]], 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[TRAV_DATA_I_FCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_2_INSERT]], i32 [[SYSTEM_DATA_FCA_1_3_EXTRACT]], 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT350:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2:%.*]] poison, <2 x float> undef, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT5:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_INSERT8:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT5]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_2_INSERT11:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT8]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_3_INSERT14:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT11]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_4_INSERT17:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT14]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_5_INSERT20:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT17]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_6_INSERT23:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT20]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_7_INSERT26:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT23]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_8_INSERT29:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT26]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_9_INSERT32:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT29]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_10_INSERT35:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT32]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_11_INSERT38:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT35]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_12_INSERT41:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT38]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_13_INSERT44:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT41]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_14_INSERT47:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT44]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_15_INSERT50:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT47]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_16_INSERT53:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT50]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_17_INSERT56:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT53]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_18_INSERT59:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT56]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_19_INSERT62:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT59]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_20_INSERT65:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT62]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_21_INSERT68:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT65]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_22_INSERT71:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT68]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_23_INSERT74:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT71]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_24_INSERT77:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT74]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_25_INSERT80:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT77]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_26_INSERT83:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT80]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_27_INSERT86:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT83]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_28_INSERT89:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT86]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_29_INSERT92:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT89]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 -; REGISTERBUFFER-CPS-NEXT: [[TMP2:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @MyIntersectionShader2.resume.0) -; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 3, i32 16, {} poison, i64 [[TMP2]], i32 5, float [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2]] [[DOTFCA_0_INSERT350]], [32 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT92]]), !continuation.returnedRegistercount [[META32]], !continuation.registercount [[META32]] -; REGISTERBUFFER-CPS-NEXT: unreachable -; REGISTERBUFFER-CPS: isEnd.i: -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP3:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to float -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0353_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP4]], i32 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 1 -; REGISTERBUFFER-CPS-NEXT: [[TMP5:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP5]] to float -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0353_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0353_0_VEC_INSERT]], float [[TMP6]], i32 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT352:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> [[DOTSROA_0353_4_VEC_INSERT]], 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT286:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT352]], 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP7:%.*]] = bitcast <2 x float> [[DOTFCA_0_EXTRACT286]] to <2 x i32> -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_0_0_VEC_EXPAND:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <3 x i32> -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND:%.*]] = select <3 x i1> , <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VEC_EXPAND]], <3 x i32> [[SYSTEM_DATA_FCA_0_0_0_0_EXTRACT]] -; REGISTERBUFFER-CPS-NEXT: [[ISEND_I1:%.*]] = call i1 @opaqueIsEnd() -; REGISTERBUFFER-CPS-NEXT: br i1 [[ISEND_I1]], label [[TMP8:%.*]], label [[TMP9:%.*]] -; REGISTERBUFFER-CPS: 8: -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT289:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND]], 0, 0, 0, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_INSERT292:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT289]], <3 x float> [[SYSTEM_DATA_FCA_0_1_0_EXTRACT]], 0, 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_INSERT295:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT292]], <3 x float> [[SYSTEM_DATA_FCA_0_1_1_EXTRACT]], 0, 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_INSERT298:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT295]], float [[SYSTEM_DATA_FCA_0_1_2_EXTRACT]], 0, 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_INSERT301:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT298]], i32 [[SYSTEM_DATA_FCA_0_1_3_EXTRACT]], 0, 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_INSERT304:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT301]], <3 x float> [[SYSTEM_DATA_FCA_0_2_EXTRACT]], 0, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_INSERT307:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT304]], <3 x float> [[SYSTEM_DATA_FCA_0_3_EXTRACT]], 0, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_INSERT310:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT307]], float [[SYSTEM_DATA_FCA_0_4_EXTRACT]], 0, 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_INSERT313:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT310]], i64 [[SYSTEM_DATA_FCA_0_5_EXTRACT]], 0, 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_INSERT316:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT313]], <3 x float> [[SYSTEM_DATA_FCA_1_0_EXTRACT]], 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_INSERT319:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT316]], <3 x float> [[SYSTEM_DATA_FCA_1_1_EXTRACT]], 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_INSERT322:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT319]], float [[SYSTEM_DATA_FCA_1_2_EXTRACT]], 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_INSERT325:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT322]], i32 [[SYSTEM_DATA_FCA_1_3_EXTRACT]], 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT125:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_INSERT128:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT125]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_2_INSERT131:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT128]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_3_INSERT134:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT131]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_4_INSERT137:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT134]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_5_INSERT140:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT137]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_6_INSERT143:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT140]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_7_INSERT146:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT143]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_8_INSERT149:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT146]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_9_INSERT152:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT149]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_10_INSERT155:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT152]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_11_INSERT158:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT155]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_12_INSERT161:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT158]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_13_INSERT164:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT161]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_14_INSERT167:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT164]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_15_INSERT170:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT167]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_16_INSERT173:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT170]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_17_INSERT176:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT173]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_18_INSERT179:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT176]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_19_INSERT182:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT179]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_20_INSERT185:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT182]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_21_INSERT188:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT185]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_22_INSERT191:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT188]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_23_INSERT194:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT191]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_24_INSERT197:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT194]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_25_INSERT200:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT197]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_26_INSERT203:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT200]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_27_INSERT206:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT203]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_28_INSERT209:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT206]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_29_INSERT212:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT209]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 -; REGISTERBUFFER-CPS-NEXT: call void @lgc.cps.free(i32 8) -; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 8, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT325]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT212]]), !continuation.registercount [[META32]] -; REGISTERBUFFER-CPS-NEXT: unreachable -; REGISTERBUFFER-CPS: 9: -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND]], 0, 0, 0, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_1_0_EXTRACT]], 0, 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_1_1_EXTRACT]], 0, 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], float [[SYSTEM_DATA_FCA_0_1_2_EXTRACT]], 0, 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT]], i32 [[SYSTEM_DATA_FCA_0_1_3_EXTRACT]], 0, 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_2_EXTRACT]], 0, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_3_EXTRACT]], 0, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[SYSTEM_DATA_FCA_0_4_EXTRACT]], 0, 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[SYSTEM_DATA_FCA_0_5_EXTRACT]], 0, 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_1_0_EXTRACT]], 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_1_1_EXTRACT]], 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[SYSTEM_DATA_FCA_1_2_EXTRACT]], 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[SYSTEM_DATA_FCA_1_3_EXTRACT]], 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_10_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_11_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_12_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_13_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_14_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_15_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_16_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_17_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_18_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_19_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_20_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_21_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_22_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_23_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_24_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_25_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_26_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_27_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_28_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_29_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 -; REGISTERBUFFER-CPS-NEXT: call void @lgc.cps.free(i32 8) -; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 8, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]), !continuation.registercount [[META32]] -; REGISTERBUFFER-CPS-NEXT: unreachable -; -; -; REGISTERBUFFER-CPS-LABEL: define dso_local void @MyIntersectionShader2.resume.0( -; REGISTERBUFFER-CPS-SAME: {} [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], { [[STRUCT_ANYHITTRAVERSALDATA:%.*]], [8 x i32], [30 x i32] } [[TMP3:%.*]]) !lgc.rt.shaderstage [[META35]] !lgc.cps [[META42]] !continuation [[META44]] { -; REGISTERBUFFER-CPS-NEXT: entryresume.0: -; REGISTERBUFFER-CPS-NEXT: [[TMP4:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 8) -; REGISTERBUFFER-CPS-NEXT: [[TMP5:%.*]] = extractvalue { [[STRUCT_ANYHITTRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP3]], 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_2_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_3_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_4_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_5_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_6_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 6 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_7_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 7 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_8_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 8 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_9_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 9 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_10_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 10 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_11_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 11 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_12_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 12 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_13_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 13 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_14_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 14 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_15_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 15 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_16_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 16 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_17_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 17 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_18_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 18 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_19_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 19 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_20_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 20 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_21_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 21 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_22_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 22 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_23_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 23 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_24_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 24 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_25_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 25 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_26_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 26 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_27_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 27 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_28_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 28 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_29_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 29 -; REGISTERBUFFER-CPS-NEXT: [[TMP6:%.*]] = extractvalue { [[STRUCT_ANYHITTRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP3]], 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 0, 0, 0, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 0, 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 0, 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 0, 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 0, 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 0, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 0, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 0, 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 0, 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 1, 3 -; REGISTERBUFFER-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; REGISTERBUFFER-CPS-NEXT: [[ISEND_I1:%.*]] = call i1 @opaqueIsEnd() -; REGISTERBUFFER-CPS-NEXT: br i1 [[ISEND_I1]], label [[TMP7:%.*]], label [[TMP8:%.*]] -; REGISTERBUFFER-CPS: 7: -; REGISTERBUFFER-CPS-NEXT: [[RETURN_ADDR_RELOAD_ADDR1:%.*]] = getelementptr inbounds [[MYINTERSECTIONSHADER2_FRAME:%.*]], ptr addrspace(32) [[TMP4]], i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[RETURN_ADDR_RELOAD2:%.*]] = load i32, ptr addrspace(32) [[RETURN_ADDR_RELOAD_ADDR1]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT289:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]], 0, 0, 0, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_INSERT292:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT289]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_INSERT295:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT292]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_INSERT298:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT295]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_INSERT301:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT298]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_INSERT304:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT301]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_INSERT307:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT304]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_INSERT310:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT307]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_INSERT313:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT310]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_INSERT316:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT313]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_INSERT319:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT316]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_INSERT322:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT319]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_INSERT325:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT322]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT125:%.*]] = insertvalue [30 x i32] poison, i32 [[DOTFCA_0_EXTRACT]], 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_INSERT128:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT125]], i32 [[DOTFCA_1_EXTRACT]], 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_2_INSERT131:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT128]], i32 [[DOTFCA_2_EXTRACT]], 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_3_INSERT134:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT131]], i32 [[DOTFCA_3_EXTRACT]], 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_4_INSERT137:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT134]], i32 [[DOTFCA_4_EXTRACT]], 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_5_INSERT140:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT137]], i32 [[DOTFCA_5_EXTRACT]], 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_6_INSERT143:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT140]], i32 [[DOTFCA_6_EXTRACT]], 6 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_7_INSERT146:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT143]], i32 [[DOTFCA_7_EXTRACT]], 7 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_8_INSERT149:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT146]], i32 [[DOTFCA_8_EXTRACT]], 8 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_9_INSERT152:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT149]], i32 [[DOTFCA_9_EXTRACT]], 9 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_10_INSERT155:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT152]], i32 [[DOTFCA_10_EXTRACT]], 10 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_11_INSERT158:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT155]], i32 [[DOTFCA_11_EXTRACT]], 11 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_12_INSERT161:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT158]], i32 [[DOTFCA_12_EXTRACT]], 12 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_13_INSERT164:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT161]], i32 [[DOTFCA_13_EXTRACT]], 13 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_14_INSERT167:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT164]], i32 [[DOTFCA_14_EXTRACT]], 14 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_15_INSERT170:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT167]], i32 [[DOTFCA_15_EXTRACT]], 15 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_16_INSERT173:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT170]], i32 [[DOTFCA_16_EXTRACT]], 16 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_17_INSERT176:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT173]], i32 [[DOTFCA_17_EXTRACT]], 17 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_18_INSERT179:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT176]], i32 [[DOTFCA_18_EXTRACT]], 18 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_19_INSERT182:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT179]], i32 [[DOTFCA_19_EXTRACT]], 19 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_20_INSERT185:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT182]], i32 [[DOTFCA_20_EXTRACT]], 20 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_21_INSERT188:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT185]], i32 [[DOTFCA_21_EXTRACT]], 21 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_22_INSERT191:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT188]], i32 [[DOTFCA_22_EXTRACT]], 22 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_23_INSERT194:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT191]], i32 [[DOTFCA_23_EXTRACT]], 23 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_24_INSERT197:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT194]], i32 [[DOTFCA_24_EXTRACT]], 24 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_25_INSERT200:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT197]], i32 [[DOTFCA_25_EXTRACT]], 25 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_26_INSERT203:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT200]], i32 [[DOTFCA_26_EXTRACT]], 26 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_27_INSERT206:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT203]], i32 [[DOTFCA_27_EXTRACT]], 27 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_28_INSERT209:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT206]], i32 [[DOTFCA_28_EXTRACT]], 28 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_29_INSERT212:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT209]], i32 [[DOTFCA_29_EXTRACT]], 29 -; REGISTERBUFFER-CPS-NEXT: call void @lgc.cps.free(i32 8) -; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR_RELOAD2]], i32 8, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT325]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT212]]), !continuation.registercount [[META32]] -; REGISTERBUFFER-CPS-NEXT: unreachable -; REGISTERBUFFER-CPS: 8: -; REGISTERBUFFER-CPS-NEXT: [[RETURN_ADDR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[MYINTERSECTIONSHADER2_FRAME]], ptr addrspace(32) [[TMP4]], i32 0, i32 0 -; REGISTERBUFFER-CPS-NEXT: [[RETURN_ADDR_RELOAD:%.*]] = load i32, ptr addrspace(32) [[RETURN_ADDR_RELOAD_ADDR]], align 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]], 0, 0, 0, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [30 x i32] poison, i32 [[DOTFCA_0_EXTRACT]], 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT]], i32 [[DOTFCA_1_EXTRACT]], 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT]], i32 [[DOTFCA_2_EXTRACT]], 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT]], i32 [[DOTFCA_3_EXTRACT]], 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT]], i32 [[DOTFCA_4_EXTRACT]], 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT]], i32 [[DOTFCA_5_EXTRACT]], 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT]], i32 [[DOTFCA_6_EXTRACT]], 6 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT]], i32 [[DOTFCA_7_EXTRACT]], 7 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT]], i32 [[DOTFCA_8_EXTRACT]], 8 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT]], i32 [[DOTFCA_9_EXTRACT]], 9 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_10_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT]], i32 [[DOTFCA_10_EXTRACT]], 10 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_11_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT]], i32 [[DOTFCA_11_EXTRACT]], 11 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_12_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT]], i32 [[DOTFCA_12_EXTRACT]], 12 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_13_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT]], i32 [[DOTFCA_13_EXTRACT]], 13 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_14_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT]], i32 [[DOTFCA_14_EXTRACT]], 14 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_15_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT]], i32 [[DOTFCA_15_EXTRACT]], 15 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_16_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT]], i32 [[DOTFCA_16_EXTRACT]], 16 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_17_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT]], i32 [[DOTFCA_17_EXTRACT]], 17 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_18_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT]], i32 [[DOTFCA_18_EXTRACT]], 18 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_19_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT]], i32 [[DOTFCA_19_EXTRACT]], 19 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_20_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT]], i32 [[DOTFCA_20_EXTRACT]], 20 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_21_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT]], i32 [[DOTFCA_21_EXTRACT]], 21 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_22_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT]], i32 [[DOTFCA_22_EXTRACT]], 22 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_23_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT]], i32 [[DOTFCA_23_EXTRACT]], 23 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_24_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT]], i32 [[DOTFCA_24_EXTRACT]], 24 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_25_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT]], i32 [[DOTFCA_25_EXTRACT]], 25 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_26_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT]], i32 [[DOTFCA_26_EXTRACT]], 26 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_27_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT]], i32 [[DOTFCA_27_EXTRACT]], 27 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_28_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT]], i32 [[DOTFCA_28_EXTRACT]], 28 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_29_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT]], i32 [[DOTFCA_29_EXTRACT]], 29 -; REGISTERBUFFER-CPS-NEXT: call void @lgc.cps.free(i32 8) -; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURN_ADDR_RELOAD]], i32 8, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]), !continuation.registercount [[META32]] -; REGISTERBUFFER-CPS-NEXT: unreachable -; -; -; REGISTERBUFFER-CPS-LABEL: define void @MyMissShader( -; REGISTERBUFFER-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURNADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [33 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META40]] !lgc.cps [[META38]] !continuation [[META45:![0-9]+]] { -; REGISTERBUFFER-CPS-NEXT: AllocaSpillBB: -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 0 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 1 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 2 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 3 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 4 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 5 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 6 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 7 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 8 -; REGISTERBUFFER-CPS-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 9 -; REGISTERBUFFER-CPS-NEXT: [[SYSTEM_DATA_FCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[SYSTEM_DATA]], 0, 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP0:%.*]] = bitcast i32 [[PAYLOAD_FCA_0_EXTRACT]] to float -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> undef, float [[TMP0]], i32 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP1:%.*]] = bitcast i32 [[PAYLOAD_FCA_7_EXTRACT]] to float -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP1]], i32 1 -; REGISTERBUFFER-CPS-NEXT: [[TMP2:%.*]] = bitcast i32 [[PAYLOAD_FCA_8_EXTRACT]] to float -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP2]], i32 2 -; REGISTERBUFFER-CPS-NEXT: [[TMP3:%.*]] = bitcast i32 [[PAYLOAD_FCA_9_EXTRACT]] to float -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP3]], i32 3 -; REGISTERBUFFER-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 0 -; REGISTERBUFFER-CPS-NEXT: [[TMP4:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 1 -; REGISTERBUFFER-CPS-NEXT: [[TMP5:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 2 -; REGISTERBUFFER-CPS-NEXT: [[TMP6:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 3 -; REGISTERBUFFER-CPS-NEXT: [[TMP7:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT9:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison, <3 x i32> [[SYSTEM_DATA_FCA_0_0_EXTRACT]], 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP4]], 0 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT]], i32 [[TMP5]], 7 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT]], i32 [[TMP6]], 8 -; REGISTERBUFFER-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT]], i32 [[TMP7]], 9 -; REGISTERBUFFER-CPS-NEXT: call void @lgc.cps.free(i32 0) -; REGISTERBUFFER-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 6, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT9]], [33 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]), !continuation.registercount [[META33]] -; REGISTERBUFFER-CPS-NEXT: unreachable -; -; ; POSTPROCESS-LABEL: define i32 @_cont_GetContinuationStackAddr( ; POSTPROCESS-SAME: ) #[[ATTR0:[0-9]+]] { ; POSTPROCESS-NEXT: ret i32 0 @@ -3284,81 +1793,96 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; ; ; POSTPROCESS-LABEL: define void @MyRayGen( -; POSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] !lgc.rt.shaderstage [[META22:![0-9]+]] !continuation.entry [[META13:![0-9]+]] !continuation.registercount [[META22]] !continuation [[META35:![0-9]+]] !continuation.state [[META22]] { +; POSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] !lgc.rt.shaderstage [[META22:![0-9]+]] !continuation.entry [[META13:![0-9]+]] !continuation [[META35:![0-9]+]] { ; POSTPROCESS-NEXT: AllocaSpillBB: ; POSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; POSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 -; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT20:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 ; POSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) -; POSTPROCESS-NEXT: [[TMP1:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 -; POSTPROCESS-NEXT: [[TMP2:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 -; POSTPROCESS-NEXT: [[TMP3:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP1]]) -; POSTPROCESS-NEXT: [[TMP4:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP3]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) -; POSTPROCESS-NEXT: [[TMP5:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP4]]) -; POSTPROCESS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT]], 0 +; POSTPROCESS-NEXT: [[TMP2:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 +; POSTPROCESS-NEXT: [[TMP1:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 +; POSTPROCESS-NEXT: [[TMP5:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP2]]) +; POSTPROCESS-NEXT: [[TMP3:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP5]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; POSTPROCESS-NEXT: [[TMP4:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP3]]) +; POSTPROCESS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT20]], 0 ; POSTPROCESS-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]], 0 ; POSTPROCESS-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 ; POSTPROCESS-NEXT: [[TMP6:%.*]] = call i64 @continuation.getAddrAndMD(ptr @MyRayGen.resume.0) ; POSTPROCESS-NEXT: [[TRAV_DATA2_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], i64 [[TMP6]], 5 ; POSTPROCESS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 0 -; POSTPROCESS-NEXT: [[TMP7:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 -; POSTPROCESS-NEXT: store i32 [[TMP7]], ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-NEXT: [[TMP11:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 ; POSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 1 -; POSTPROCESS-NEXT: [[TMP8:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 -; POSTPROCESS-NEXT: store i32 [[TMP8]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; POSTPROCESS-NEXT: [[TMP7:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 ; POSTPROCESS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 2 -; POSTPROCESS-NEXT: [[TMP9:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 -; POSTPROCESS-NEXT: store i32 [[TMP9]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; POSTPROCESS-NEXT: [[TMP8:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 ; POSTPROCESS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 3 -; POSTPROCESS-NEXT: [[TMP10:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 -; POSTPROCESS-NEXT: store i32 [[TMP10]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 -; POSTPROCESS-NEXT: [[TMP11:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 4, i32 [[TMP11]], [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]], i64 poison), !continuation.registercount [[META33:![0-9]+]], !continuation.returnedRegistercount [[META33]] +; POSTPROCESS-NEXT: [[TMP9:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 +; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP11]], 0 +; POSTPROCESS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT]], i32 undef, 1 +; POSTPROCESS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT]], i32 undef, 2 +; POSTPROCESS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT]], i32 undef, 3 +; POSTPROCESS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT]], i32 undef, 4 +; POSTPROCESS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT]], i32 undef, 5 +; POSTPROCESS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT]], i32 undef, 6 +; POSTPROCESS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT]], i32 [[TMP7]], 7 +; POSTPROCESS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT]], i32 [[TMP8]], 8 +; POSTPROCESS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT]], i32 [[TMP9]], 9 +; POSTPROCESS-NEXT: [[TMP10:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: call void (...) @lgc.ilcps.continue(i64 4, i32 [[TMP10]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]], [16 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]) ; POSTPROCESS-NEXT: unreachable ; ; ; POSTPROCESS-LABEL: define dso_local void @MyRayGen.resume.0( -; POSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[TMP0:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP1:%.*]]) !lgc.rt.shaderstage [[META22]] !continuation.registercount [[META33]] !continuation [[META35]] { +; POSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[TMP0:%.*]], { [[STRUCT_DISPATCHSYSTEMDATA:%.*]], [33 x i32], [10 x i32] } [[TMP1:%.*]]) !lgc.rt.shaderstage [[META22]] !continuation [[META35]] { ; POSTPROCESS-NEXT: entryresume.0: -; POSTPROCESS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 +; POSTPROCESS-NEXT: [[TMP19:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 ; POSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; POSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 -; POSTPROCESS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP1]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; POSTPROCESS-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-NEXT: [[TMP9:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [33 x i32], [10 x i32] } [[TMP1]], 0 +; POSTPROCESS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP9]], ptr [[TMP19]], align 4 +; POSTPROCESS-NEXT: [[TMP16:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [33 x i32], [10 x i32] } [[TMP1]], 2 +; POSTPROCESS-NEXT: [[TMP10:%.*]] = extractvalue [10 x i32] [[TMP16]], 0 +; POSTPROCESS-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP16]], 1 +; POSTPROCESS-NEXT: [[DOTFCA_2_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP16]], 2 +; POSTPROCESS-NEXT: [[DOTFCA_3_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP16]], 3 +; POSTPROCESS-NEXT: [[DOTFCA_4_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP16]], 4 +; POSTPROCESS-NEXT: [[DOTFCA_5_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP16]], 5 +; POSTPROCESS-NEXT: [[DOTFCA_6_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP16]], 6 +; POSTPROCESS-NEXT: [[TMP3:%.*]] = extractvalue [10 x i32] [[TMP16]], 7 +; POSTPROCESS-NEXT: [[TMP5:%.*]] = extractvalue [10 x i32] [[TMP16]], 8 +; POSTPROCESS-NEXT: [[TMP7:%.*]] = extractvalue [10 x i32] [[TMP16]], 9 ; POSTPROCESS-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP10]] to float ; POSTPROCESS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i32 0 -; POSTPROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 ; POSTPROCESS-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to float ; POSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP4]], i32 1 -; POSTPROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 ; POSTPROCESS-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP5]] to float ; POSTPROCESS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP6]], i32 2 -; POSTPROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 ; POSTPROCESS-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP7]] to float ; POSTPROCESS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP8]], i32 3 -; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT14:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP1]], 0 +; POSTPROCESS-NEXT: [[TMP17:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [33 x i32], [10 x i32] } [[TMP1]], 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT21:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP17]], 0 ; POSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) -; POSTPROCESS-NEXT: [[TMP9:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 -; POSTPROCESS-NEXT: [[RES_1_I1:%.*]] = load i32, ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; POSTPROCESS-NEXT: [[RESPTR_2_I2:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1 +; POSTPROCESS-NEXT: [[TMP18:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 +; POSTPROCESS-NEXT: [[RES_1_I1:%.*]] = load i32, ptr [[TMP19]], align 4 +; POSTPROCESS-NEXT: [[RESPTR_2_I2:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP19]], i32 0, i32 0, i32 1 ; POSTPROCESS-NEXT: [[RES_2_I3:%.*]] = load i32, ptr [[RESPTR_2_I2]], align 4 -; POSTPROCESS-NEXT: [[RESPTR_3_I4:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 +; POSTPROCESS-NEXT: [[RESPTR_3_I4:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP19]], i32 0, i32 0, i32 2 ; POSTPROCESS-NEXT: [[RES_3_I5:%.*]] = load i32, ptr [[RESPTR_3_I4]], align 4 ; POSTPROCESS-NEXT: [[VAL_0_I6:%.*]] = insertelement <3 x i32> undef, i32 [[RES_1_I1]], i32 0 ; POSTPROCESS-NEXT: [[VAL_1_I7:%.*]] = insertelement <3 x i32> [[VAL_0_I6]], i32 [[RES_2_I3]], i32 1 ; POSTPROCESS-NEXT: [[VAL_2_I8:%.*]] = insertelement <3 x i32> [[VAL_1_I7]], i32 [[RES_3_I5]], i32 2 ; POSTPROCESS-NEXT: [[EXTRACT:%.*]] = extractelement <3 x i32> [[VAL_2_I8]], i8 0 -; POSTPROCESS-NEXT: [[RES_1_I:%.*]] = load i32, ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; POSTPROCESS-NEXT: [[RESPTR_2_I:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1 +; POSTPROCESS-NEXT: [[RES_1_I:%.*]] = load i32, ptr [[TMP19]], align 4 +; POSTPROCESS-NEXT: [[RESPTR_2_I:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP19]], i32 0, i32 0, i32 1 ; POSTPROCESS-NEXT: [[RES_2_I:%.*]] = load i32, ptr [[RESPTR_2_I]], align 4 -; POSTPROCESS-NEXT: [[RESPTR_3_I:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 +; POSTPROCESS-NEXT: [[RESPTR_3_I:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP19]], i32 0, i32 0, i32 2 ; POSTPROCESS-NEXT: [[RES_3_I:%.*]] = load i32, ptr [[RESPTR_3_I]], align 4 ; POSTPROCESS-NEXT: [[VAL_0_I:%.*]] = insertelement <3 x i32> undef, i32 [[RES_1_I]], i32 0 ; POSTPROCESS-NEXT: [[VAL_1_I:%.*]] = insertelement <3 x i32> [[VAL_0_I]], i32 [[RES_2_I]], i32 1 ; POSTPROCESS-NEXT: [[VAL_2_I:%.*]] = insertelement <3 x i32> [[VAL_1_I]], i32 [[RES_3_I]], i32 2 ; POSTPROCESS-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x i32> [[VAL_2_I]], i8 1 -; POSTPROCESS-NEXT: [[TMP16:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP9]]) -; POSTPROCESS-NEXT: [[TMP11:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP16]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 4098, i32 1033 }) +; POSTPROCESS-NEXT: [[TMP20:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP18]]) +; POSTPROCESS-NEXT: [[TMP11:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP20]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 4098, i32 1033 }) ; POSTPROCESS-NEXT: [[TMP12:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 0 ; POSTPROCESS-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 1 ; POSTPROCESS-NEXT: [[TMP14:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 2 @@ -3370,73 +1894,95 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; ; ; POSTPROCESS-LABEL: define void @MyClosestHitShader( -; POSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META36:![0-9]+]] !continuation.registercount [[META33]] !continuation [[META37:![0-9]+]] !continuation.state [[META22]] { +; POSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]], [33 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META36:![0-9]+]] !continuation [[META37:![0-9]+]] { ; POSTPROCESS-NEXT: AllocaSpillBB: ; POSTPROCESS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 ; POSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; POSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 0 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 1 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 2 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 3 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 4 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 5 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 6 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 7 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 8 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 9 ; POSTPROCESS-NEXT: [[DOTFCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 0, 0 ; POSTPROCESS-NEXT: [[DOTFCA_0_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 ; POSTPROCESS-NEXT: store <3 x i32> [[DOTFCA_0_0_EXTRACT]], ptr [[DOTFCA_0_0_GEP]], align 4 ; POSTPROCESS-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; POSTPROCESS-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POSTPROCESS-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float -; POSTPROCESS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0 -; POSTPROCESS-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 -; POSTPROCESS-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float -; POSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP5]], i32 1 -; POSTPROCESS-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 -; POSTPROCESS-NEXT: [[TMP7:%.*]] = bitcast i32 [[TMP6]] to float -; POSTPROCESS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP7]], i32 2 -; POSTPROCESS-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 -; POSTPROCESS-NEXT: [[TMP9:%.*]] = bitcast i32 [[TMP8]] to float -; POSTPROCESS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP9]], i32 3 -; POSTPROCESS-NEXT: [[TMP10:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[SYSTEM_DATA_ALLOCA]]) -; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP10]], 0 -; POSTPROCESS-NEXT: [[DOTSROA_07_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 -; POSTPROCESS-NEXT: [[TMP11:%.*]] = bitcast float [[DOTSROA_07_0_VEC_EXTRACT]] to i32 -; POSTPROCESS-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP11]] to float -; POSTPROCESS-NEXT: [[HITATTRS_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP12]], i32 0 -; POSTPROCESS-NEXT: [[DOTSROA_07_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 -; POSTPROCESS-NEXT: [[TMP13:%.*]] = bitcast float [[DOTSROA_07_4_VEC_EXTRACT]] to i32 -; POSTPROCESS-NEXT: [[TMP14:%.*]] = bitcast i32 [[TMP13]] to float -; POSTPROCESS-NEXT: [[HITATTRS_SROA_0_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[HITATTRS_SROA_0_0_VEC_INSERT]], float [[TMP14]], i32 1 +; POSTPROCESS-NEXT: [[TMP2:%.*]] = bitcast i32 [[PAYLOAD_FCA_0_EXTRACT]] to float +; POSTPROCESS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0 +; POSTPROCESS-NEXT: [[TMP3:%.*]] = bitcast i32 [[PAYLOAD_FCA_7_EXTRACT]] to float +; POSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP3]], i32 1 +; POSTPROCESS-NEXT: [[TMP4:%.*]] = bitcast i32 [[PAYLOAD_FCA_8_EXTRACT]] to float +; POSTPROCESS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP4]], i32 2 +; POSTPROCESS-NEXT: [[TMP5:%.*]] = bitcast i32 [[PAYLOAD_FCA_9_EXTRACT]] to float +; POSTPROCESS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP5]], i32 3 +; POSTPROCESS-NEXT: [[TMP6:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[SYSTEM_DATA_ALLOCA]]) +; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP6]], 0 +; POSTPROCESS-NEXT: [[DOTSROA_012_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 +; POSTPROCESS-NEXT: [[TMP7:%.*]] = bitcast float [[DOTSROA_012_0_VEC_EXTRACT]] to i32 +; POSTPROCESS-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP7]] to float +; POSTPROCESS-NEXT: [[HITATTRS_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP8]], i32 0 +; POSTPROCESS-NEXT: [[DOTSROA_012_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 +; POSTPROCESS-NEXT: [[TMP9:%.*]] = bitcast float [[DOTSROA_012_4_VEC_EXTRACT]] to i32 +; POSTPROCESS-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP9]] to float +; POSTPROCESS-NEXT: [[HITATTRS_SROA_0_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[HITATTRS_SROA_0_0_VEC_INSERT]], float [[TMP10]], i32 1 ; POSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; POSTPROCESS-NEXT: [[TMP15:%.*]] = extractelement <2 x float> [[HITATTRS_SROA_0_4_VEC_INSERT]], i32 0 -; POSTPROCESS-NEXT: [[TMP16:%.*]] = fsub fast float 1.000000e+00, [[TMP15]] -; POSTPROCESS-NEXT: [[TMP17:%.*]] = extractelement <2 x float> [[HITATTRS_SROA_0_4_VEC_INSERT]], i32 1 -; POSTPROCESS-NEXT: [[TMP18:%.*]] = fsub fast float [[TMP16]], [[TMP17]] -; POSTPROCESS-NEXT: [[TMP19:%.*]] = insertelement <4 x float> undef, float [[TMP18]], i64 0 -; POSTPROCESS-NEXT: [[TMP20:%.*]] = insertelement <4 x float> [[TMP19]], float [[TMP15]], i64 1 -; POSTPROCESS-NEXT: [[TMP21:%.*]] = insertelement <4 x float> [[TMP20]], float [[TMP17]], i64 2 -; POSTPROCESS-NEXT: [[TMP22:%.*]] = insertelement <4 x float> [[TMP21]], float 1.000000e+00, i64 3 -; POSTPROCESS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP22]], i32 0 -; POSTPROCESS-NEXT: [[TMP23:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 -; POSTPROCESS-NEXT: store i32 [[TMP23]], ptr addrspace(20) @REGISTERS, align 4 -; POSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP22]], i32 1 -; POSTPROCESS-NEXT: [[TMP24:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 -; POSTPROCESS-NEXT: store i32 [[TMP24]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 -; POSTPROCESS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP22]], i32 2 -; POSTPROCESS-NEXT: [[TMP25:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 -; POSTPROCESS-NEXT: store i32 [[TMP25]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 -; POSTPROCESS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP22]], i32 3 -; POSTPROCESS-NEXT: [[TMP26:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 -; POSTPROCESS-NEXT: store i32 [[TMP26]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 -; POSTPROCESS-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; POSTPROCESS-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP27]], i32 0, i32 0 +; POSTPROCESS-NEXT: [[TMP11:%.*]] = extractelement <2 x float> [[HITATTRS_SROA_0_4_VEC_INSERT]], i32 0 +; POSTPROCESS-NEXT: [[TMP12:%.*]] = fsub fast float 1.000000e+00, [[TMP11]] +; POSTPROCESS-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[HITATTRS_SROA_0_4_VEC_INSERT]], i32 1 +; POSTPROCESS-NEXT: [[TMP14:%.*]] = fsub fast float [[TMP12]], [[TMP13]] +; POSTPROCESS-NEXT: [[TMP15:%.*]] = insertelement <4 x float> undef, float [[TMP14]], i64 0 +; POSTPROCESS-NEXT: [[TMP16:%.*]] = insertelement <4 x float> [[TMP15]], float [[TMP11]], i64 1 +; POSTPROCESS-NEXT: [[TMP17:%.*]] = insertelement <4 x float> [[TMP16]], float [[TMP13]], i64 2 +; POSTPROCESS-NEXT: [[TMP18:%.*]] = insertelement <4 x float> [[TMP17]], float 1.000000e+00, i64 3 +; POSTPROCESS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP18]], i32 0 +; POSTPROCESS-NEXT: [[TMP19:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; POSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP18]], i32 1 +; POSTPROCESS-NEXT: [[TMP20:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; POSTPROCESS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP18]], i32 2 +; POSTPROCESS-NEXT: [[TMP21:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 +; POSTPROCESS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP18]], i32 3 +; POSTPROCESS-NEXT: [[TMP22:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 +; POSTPROCESS-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP23]], i32 0, i32 0 ; POSTPROCESS-NEXT: [[DOTFCA_0_LOAD:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_GEP]], align 4 ; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_LOAD]], 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT1:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP19]], 0 +; POSTPROCESS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT1]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; POSTPROCESS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; POSTPROCESS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; POSTPROCESS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; POSTPROCESS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; POSTPROCESS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; POSTPROCESS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT]], i32 [[TMP20]], 7 +; POSTPROCESS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT]], i32 [[TMP21]], 8 +; POSTPROCESS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT]], i32 [[TMP22]], 9 ; POSTPROCESS-NEXT: [[TMP28:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP28]], i64 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META33]] +; POSTPROCESS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR]], i32 [[TMP28]], i64 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]], [33 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]) ; POSTPROCESS-NEXT: unreachable ; ; ; POSTPROCESS-LABEL: define void @MyAnyHitShader( -; POSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[TMP1:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META38:![0-9]+]] !continuation.registercount [[META33]] !continuation [[META39:![0-9]+]] !continuation.state [[META22]] { +; POSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[TMP1:%.*]], [6 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META38:![0-9]+]] !continuation [[META39:![0-9]+]] { ; POSTPROCESS-NEXT: AllocaSpillBB: ; POSTPROCESS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITTRAVERSALDATA]], align 8 ; POSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; POSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 0 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 1 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 2 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 3 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 4 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 5 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 6 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 7 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 8 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 9 ; POSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 0, 0, 0 ; POSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 ; POSTPROCESS-NEXT: store <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]], ptr [[DOTFCA_0_0_0_0_GEP]], align 4 @@ -3477,28 +2023,24 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; POSTPROCESS-NEXT: [[DOTFCA_1_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 ; POSTPROCESS-NEXT: store i32 [[DOTFCA_1_3_EXTRACT]], ptr [[DOTFCA_1_3_GEP]], align 4 ; POSTPROCESS-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; POSTPROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POSTPROCESS-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to float -; POSTPROCESS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> undef, float [[TMP4]], i32 0 -; POSTPROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 -; POSTPROCESS-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP5]] to float -; POSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP6]], i32 1 -; POSTPROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 -; POSTPROCESS-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP7]] to float -; POSTPROCESS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP8]], i32 2 -; POSTPROCESS-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 -; POSTPROCESS-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP9]] to float -; POSTPROCESS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP10]], i32 3 -; POSTPROCESS-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; POSTPROCESS-NEXT: [[TMP12:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP11]]) -; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT237:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP12]], 0 -; POSTPROCESS-NEXT: [[DOTSROA_0239_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT237]], i32 0 -; POSTPROCESS-NEXT: [[TMP13:%.*]] = bitcast float [[DOTSROA_0239_0_VEC_EXTRACT]] to i32 -; POSTPROCESS-NEXT: [[DOTSROA_0239_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT237]], i32 1 -; POSTPROCESS-NEXT: [[TMP14:%.*]] = bitcast float [[DOTSROA_0239_4_VEC_EXTRACT]] to i32 +; POSTPROCESS-NEXT: [[TMP3:%.*]] = bitcast i32 [[PAYLOAD_FCA_0_EXTRACT]] to float +; POSTPROCESS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0 +; POSTPROCESS-NEXT: [[TMP4:%.*]] = bitcast i32 [[PAYLOAD_FCA_7_EXTRACT]] to float +; POSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP4]], i32 1 +; POSTPROCESS-NEXT: [[TMP5:%.*]] = bitcast i32 [[PAYLOAD_FCA_8_EXTRACT]] to float +; POSTPROCESS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP5]], i32 2 +; POSTPROCESS-NEXT: [[TMP6:%.*]] = bitcast i32 [[PAYLOAD_FCA_9_EXTRACT]] to float +; POSTPROCESS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP6]], i32 3 +; POSTPROCESS-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; POSTPROCESS-NEXT: [[TMP8:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP7]]) +; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT401:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP8]], 0 +; POSTPROCESS-NEXT: [[DOTSROA_0403_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT401]], i32 0 +; POSTPROCESS-NEXT: [[TMP9:%.*]] = bitcast float [[DOTSROA_0403_0_VEC_EXTRACT]] to i32 +; POSTPROCESS-NEXT: [[DOTSROA_0403_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT401]], i32 1 +; POSTPROCESS-NEXT: [[TMP10:%.*]] = bitcast float [[DOTSROA_0403_4_VEC_EXTRACT]] to i32 ; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP1]], 0 ; POSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; POSTPROCESS-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; POSTPROCESS-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 ; POSTPROCESS-NEXT: [[RES_I1_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA:%.*]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; POSTPROCESS-NEXT: [[RES_I1_FCA_0_LOAD:%.*]] = load <3 x float>, ptr [[RES_I1_FCA_0_GEP]], align 4 ; POSTPROCESS-NEXT: [[RES_I1_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] poison, <3 x float> [[RES_I1_FCA_0_LOAD]], 0 @@ -3515,14 +2057,14 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; POSTPROCESS-NEXT: [[RES_I1_FCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_3_INSERT]], 1 ; POSTPROCESS-NEXT: [[RES_I1_FCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_3_INSERT]], 2 ; POSTPROCESS-NEXT: [[RES_I1_FCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_3_INSERT]], 3 -; POSTPROCESS-NEXT: [[DOTSROA_0260_0_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I1_FCA_3_INSERT_FCA_0_EXTRACT]], i32 0 -; POSTPROCESS-NEXT: [[DOTSROA_0260_4_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I1_FCA_3_INSERT_FCA_0_EXTRACT]], i32 1 -; POSTPROCESS-NEXT: [[DOTSROA_0260_8_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I1_FCA_3_INSERT_FCA_0_EXTRACT]], i32 2 -; POSTPROCESS-NEXT: [[VAL_0_I7:%.*]] = insertelement <3 x float> undef, float [[DOTSROA_0260_0_VEC_EXTRACT]], i32 0 -; POSTPROCESS-NEXT: [[VAL_1_I8:%.*]] = insertelement <3 x float> [[VAL_0_I7]], float [[DOTSROA_0260_4_VEC_EXTRACT]], i32 1 -; POSTPROCESS-NEXT: [[VAL_2_I9:%.*]] = insertelement <3 x float> [[VAL_1_I8]], float [[DOTSROA_0260_8_VEC_EXTRACT]], i32 2 +; POSTPROCESS-NEXT: [[DOTSROA_0425_0_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I1_FCA_3_INSERT_FCA_0_EXTRACT]], i32 0 +; POSTPROCESS-NEXT: [[DOTSROA_0425_4_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I1_FCA_3_INSERT_FCA_0_EXTRACT]], i32 1 +; POSTPROCESS-NEXT: [[DOTSROA_0425_8_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I1_FCA_3_INSERT_FCA_0_EXTRACT]], i32 2 +; POSTPROCESS-NEXT: [[VAL_0_I7:%.*]] = insertelement <3 x float> undef, float [[DOTSROA_0425_0_VEC_EXTRACT]], i32 0 +; POSTPROCESS-NEXT: [[VAL_1_I8:%.*]] = insertelement <3 x float> [[VAL_0_I7]], float [[DOTSROA_0425_4_VEC_EXTRACT]], i32 1 +; POSTPROCESS-NEXT: [[VAL_2_I9:%.*]] = insertelement <3 x float> [[VAL_1_I8]], float [[DOTSROA_0425_8_VEC_EXTRACT]], i32 2 ; POSTPROCESS-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x float> [[VAL_2_I9]], i8 0 -; POSTPROCESS-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; POSTPROCESS-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 ; POSTPROCESS-NEXT: [[RES_I_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; POSTPROCESS-NEXT: [[RES_I_FCA_0_LOAD:%.*]] = load <3 x float>, ptr [[RES_I_FCA_0_GEP]], align 4 ; POSTPROCESS-NEXT: [[RES_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] poison, <3 x float> [[RES_I_FCA_0_LOAD]], 0 @@ -3546,7 +2088,7 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; POSTPROCESS-NEXT: [[VAL_1_I:%.*]] = insertelement <3 x float> [[VAL_0_I]], float [[DOTSROA_1_16_VEC_EXTRACT]], i32 1 ; POSTPROCESS-NEXT: [[VAL_2_I:%.*]] = insertelement <3 x float> [[VAL_1_I]], float [[DOTSROA_1_20_VEC_EXTRACT]], i32 2 ; POSTPROCESS-NEXT: [[EXTRACT:%.*]] = extractelement <3 x float> [[VAL_2_I]], i8 0 -; POSTPROCESS-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; POSTPROCESS-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 ; POSTPROCESS-NEXT: [[RES_I10_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; POSTPROCESS-NEXT: [[RES_I10_FCA_0_LOAD:%.*]] = load <3 x float>, ptr [[RES_I10_FCA_0_GEP]], align 4 ; POSTPROCESS-NEXT: [[RES_I10_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] poison, <3 x float> [[RES_I10_FCA_0_LOAD]], 0 @@ -3563,36 +2105,32 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; POSTPROCESS-NEXT: [[RES_I10_FCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I10_FCA_3_INSERT]], 1 ; POSTPROCESS-NEXT: [[RES_I10_FCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I10_FCA_3_INSERT]], 2 ; POSTPROCESS-NEXT: [[RES_I10_FCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I10_FCA_3_INSERT]], 3 -; POSTPROCESS-NEXT: [[TMP18:%.*]] = fmul fast float [[RES_I10_FCA_3_INSERT_FCA_2_EXTRACT]], [[EXTRACT]] -; POSTPROCESS-NEXT: [[TMP19:%.*]] = fadd fast float [[TMP18]], [[EXTRACT1]] -; POSTPROCESS-NEXT: [[TMP20:%.*]] = fcmp fast ogt float [[TMP19]], 0.000000e+00 -; POSTPROCESS-NEXT: [[TMP21:%.*]] = fcmp fast ogt float [[TMP19]], 1.000000e+00 -; POSTPROCESS-NEXT: [[TMP22:%.*]] = fcmp fast ogt float [[TMP19]], -1.000000e+00 -; POSTPROCESS-NEXT: br i1 [[TMP20]], label [[TMP23:%.*]], label [[TMP48:%.*]] -; POSTPROCESS: 23: -; POSTPROCESS-NEXT: br i1 [[TMP21]], label [[TMP24:%.*]], label [[TMP36:%.*]] -; POSTPROCESS: 24: -; POSTPROCESS-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; POSTPROCESS-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP25]]) +; POSTPROCESS-NEXT: [[TMP14:%.*]] = fmul fast float [[RES_I10_FCA_3_INSERT_FCA_2_EXTRACT]], [[EXTRACT]] +; POSTPROCESS-NEXT: [[TMP15:%.*]] = fadd fast float [[TMP14]], [[EXTRACT1]] +; POSTPROCESS-NEXT: [[TMP16:%.*]] = fcmp fast ogt float [[TMP15]], 0.000000e+00 +; POSTPROCESS-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP15]], 1.000000e+00 +; POSTPROCESS-NEXT: [[TMP18:%.*]] = fcmp fast ogt float [[TMP15]], -1.000000e+00 +; POSTPROCESS-NEXT: br i1 [[TMP16]], label [[TMP19:%.*]], label [[TMP44:%.*]] +; POSTPROCESS: 19: +; POSTPROCESS-NEXT: br i1 [[TMP17]], label [[TMP20:%.*]], label [[TMP32:%.*]] +; POSTPROCESS: 20: +; POSTPROCESS-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; POSTPROCESS-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP21]]) ; POSTPROCESS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 0 -; POSTPROCESS-NEXT: [[TMP26:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 -; POSTPROCESS-NEXT: store i32 [[TMP26]], ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-NEXT: [[TMP22:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 ; POSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 1 -; POSTPROCESS-NEXT: [[TMP27:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 -; POSTPROCESS-NEXT: store i32 [[TMP27]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; POSTPROCESS-NEXT: [[TMP23:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 ; POSTPROCESS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 2 -; POSTPROCESS-NEXT: [[TMP28:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 -; POSTPROCESS-NEXT: store i32 [[TMP28]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; POSTPROCESS-NEXT: [[TMP24:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 ; POSTPROCESS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 3 -; POSTPROCESS-NEXT: [[TMP29:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 -; POSTPROCESS-NEXT: store i32 [[TMP29]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 +; POSTPROCESS-NEXT: [[TMP25:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 ; POSTPROCESS-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT15:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 ; POSTPROCESS-NEXT: [[TMP30:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT15]] to i32 ; POSTPROCESS-NEXT: [[TMP31:%.*]] = bitcast i32 [[TMP30]] to float ; POSTPROCESS-NEXT: [[DOTSROA_0241_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP31]], i32 0 ; POSTPROCESS-NEXT: [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT19:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 -; POSTPROCESS-NEXT: [[TMP32:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT19]] to i32 -; POSTPROCESS-NEXT: [[TMP33:%.*]] = bitcast i32 [[TMP32]] to float +; POSTPROCESS-NEXT: [[TMP28:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT19]] to i32 +; POSTPROCESS-NEXT: [[TMP33:%.*]] = bitcast i32 [[TMP28]] to float ; POSTPROCESS-NEXT: [[DOTSROA_0241_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0241_0_VEC_INSERT]], float [[TMP33]], i32 1 ; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0241_4_VEC_INSERT]], 0 ; POSTPROCESS-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 @@ -3602,67 +2140,73 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; POSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_GEP60:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 ; POSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_LOAD:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP60]], align 4 ; POSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD]], 0, 0, 0, 0 -; POSTPROCESS-NEXT: [[DOTFCA_0_1_0_GEP61:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 -; POSTPROCESS-NEXT: [[DOTFCA_0_1_0_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP61]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_0_GEP225:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_0_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP225]], align 4 ; POSTPROCESS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_LOAD]], 0, 1, 0 -; POSTPROCESS-NEXT: [[DOTFCA_0_1_1_GEP62:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 -; POSTPROCESS-NEXT: [[DOTFCA_0_1_1_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP62]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_1_GEP226:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_1_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP226]], align 4 ; POSTPROCESS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_LOAD]], 0, 1, 1 -; POSTPROCESS-NEXT: [[DOTFCA_0_1_2_GEP63:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 -; POSTPROCESS-NEXT: [[DOTFCA_0_1_2_LOAD:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP63]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_2_GEP227:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_2_LOAD:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP227]], align 4 ; POSTPROCESS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_LOAD]], 0, 1, 2 -; POSTPROCESS-NEXT: [[DOTFCA_0_1_3_GEP64:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 -; POSTPROCESS-NEXT: [[DOTFCA_0_1_3_LOAD:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP64]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_3_GEP228:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_3_LOAD:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP228]], align 4 ; POSTPROCESS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_LOAD]], 0, 1, 3 -; POSTPROCESS-NEXT: [[DOTFCA_0_2_GEP65:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 -; POSTPROCESS-NEXT: [[DOTFCA_0_2_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP65]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_2_GEP229:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_2_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP229]], align 4 ; POSTPROCESS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_LOAD]], 0, 2 -; POSTPROCESS-NEXT: [[DOTFCA_0_3_GEP66:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 -; POSTPROCESS-NEXT: [[DOTFCA_0_3_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP66]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_3_GEP230:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_3_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP230]], align 4 ; POSTPROCESS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_LOAD]], 0, 3 -; POSTPROCESS-NEXT: [[DOTFCA_0_4_GEP67:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 -; POSTPROCESS-NEXT: [[DOTFCA_0_4_LOAD:%.*]] = load float, ptr [[DOTFCA_0_4_GEP67]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_4_GEP231:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_4_LOAD:%.*]] = load float, ptr [[DOTFCA_0_4_GEP231]], align 4 ; POSTPROCESS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[DOTFCA_0_4_LOAD]], 0, 4 -; POSTPROCESS-NEXT: [[DOTFCA_0_5_GEP68:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 -; POSTPROCESS-NEXT: [[DOTFCA_0_5_LOAD:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP68]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_5_GEP232:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 +; POSTPROCESS-NEXT: [[DOTFCA_0_5_LOAD:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP232]], align 4 ; POSTPROCESS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[DOTFCA_0_5_LOAD]], 0, 5 -; POSTPROCESS-NEXT: [[DOTFCA_1_0_GEP69:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 -; POSTPROCESS-NEXT: [[DOTFCA_1_0_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP69]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_1_0_GEP233:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; POSTPROCESS-NEXT: [[DOTFCA_1_0_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP233]], align 4 ; POSTPROCESS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_LOAD]], 1, 0 -; POSTPROCESS-NEXT: [[DOTFCA_1_1_GEP70:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 -; POSTPROCESS-NEXT: [[DOTFCA_1_1_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP70]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_1_1_GEP234:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; POSTPROCESS-NEXT: [[DOTFCA_1_1_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP234]], align 4 ; POSTPROCESS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_LOAD]], 1, 1 -; POSTPROCESS-NEXT: [[DOTFCA_1_2_GEP71:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 -; POSTPROCESS-NEXT: [[DOTFCA_1_2_LOAD:%.*]] = load float, ptr [[DOTFCA_1_2_GEP71]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_1_2_GEP235:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 +; POSTPROCESS-NEXT: [[DOTFCA_1_2_LOAD:%.*]] = load float, ptr [[DOTFCA_1_2_GEP235]], align 4 ; POSTPROCESS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[DOTFCA_1_2_LOAD]], 1, 2 -; POSTPROCESS-NEXT: [[DOTFCA_1_3_GEP72:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 -; POSTPROCESS-NEXT: [[DOTFCA_1_3_LOAD:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP72]], align 4 +; POSTPROCESS-NEXT: [[DOTFCA_1_3_GEP236:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 +; POSTPROCESS-NEXT: [[DOTFCA_1_3_LOAD:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP236]], align 4 ; POSTPROCESS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[DOTFCA_1_3_LOAD]], 1, 3 -; POSTPROCESS-NEXT: [[TMP35:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP35]], i64 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]]), !continuation.registercount [[META33]] +; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT1:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP22]], 0 +; POSTPROCESS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT1]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; POSTPROCESS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; POSTPROCESS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; POSTPROCESS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; POSTPROCESS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; POSTPROCESS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; POSTPROCESS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT]], i32 [[TMP23]], 7 +; POSTPROCESS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT]], i32 [[TMP24]], 8 +; POSTPROCESS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT]], i32 [[TMP25]], 9 +; POSTPROCESS-NEXT: [[TMP38:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR]], i32 [[TMP38]], i64 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]], [8 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]) ; POSTPROCESS-NEXT: unreachable -; POSTPROCESS: 36: -; POSTPROCESS-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; POSTPROCESS-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP37]]) +; POSTPROCESS: 32: +; POSTPROCESS-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; POSTPROCESS-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP39]]) ; POSTPROCESS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT25:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 0 -; POSTPROCESS-NEXT: [[TMP38:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT25]] to i32 -; POSTPROCESS-NEXT: store i32 [[TMP38]], ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-NEXT: [[TMP41:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT25]] to i32 ; POSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT34:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 1 -; POSTPROCESS-NEXT: [[TMP39:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT34]] to i32 -; POSTPROCESS-NEXT: store i32 [[TMP39]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; POSTPROCESS-NEXT: [[TMP35:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT34]] to i32 ; POSTPROCESS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT42:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 2 -; POSTPROCESS-NEXT: [[TMP40:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT42]] to i32 -; POSTPROCESS-NEXT: store i32 [[TMP40]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; POSTPROCESS-NEXT: [[TMP36:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT42]] to i32 ; POSTPROCESS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT52:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 3 -; POSTPROCESS-NEXT: [[TMP41:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT52]] to i32 -; POSTPROCESS-NEXT: store i32 [[TMP41]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 +; POSTPROCESS-NEXT: [[TMP37:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT52]] to i32 ; POSTPROCESS-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 ; POSTPROCESS-NEXT: [[TMP42:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT]] to i32 ; POSTPROCESS-NEXT: [[TMP43:%.*]] = bitcast i32 [[TMP42]] to float ; POSTPROCESS-NEXT: [[DOTSROA_0245_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP43]], i32 0 ; POSTPROCESS-NEXT: [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 -; POSTPROCESS-NEXT: [[TMP44:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT]] to i32 -; POSTPROCESS-NEXT: [[TMP45:%.*]] = bitcast i32 [[TMP44]] to float +; POSTPROCESS-NEXT: [[TMP40:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT]] to i32 +; POSTPROCESS-NEXT: [[TMP45:%.*]] = bitcast i32 [[TMP40]] to float ; POSTPROCESS-NEXT: [[DOTSROA_0245_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0245_0_VEC_INSERT]], float [[TMP45]], i32 1 ; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT244:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0245_4_VEC_INSERT]], 0 ; POSTPROCESS-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 @@ -3708,31 +2252,37 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; POSTPROCESS-NEXT: [[DOTFCA_1_3_GEP111:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 ; POSTPROCESS-NEXT: [[DOTFCA_1_3_LOAD112:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP111]], align 4 ; POSTPROCESS-NEXT: [[DOTFCA_1_3_INSERT113:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT110]], i32 [[DOTFCA_1_3_LOAD112]], 1, 3 -; POSTPROCESS-NEXT: [[TMP47:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP47]], i64 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT113]]), !continuation.registercount [[META33]] +; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT61:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP41]], 0 +; POSTPROCESS-NEXT: [[DOTFCA_1_INSERT64:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT61]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; POSTPROCESS-NEXT: [[DOTFCA_2_INSERT67:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT64]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; POSTPROCESS-NEXT: [[DOTFCA_3_INSERT70:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT67]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; POSTPROCESS-NEXT: [[DOTFCA_4_INSERT73:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT70]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; POSTPROCESS-NEXT: [[DOTFCA_5_INSERT76:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT73]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; POSTPROCESS-NEXT: [[DOTFCA_6_INSERT79:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT76]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; POSTPROCESS-NEXT: [[DOTFCA_7_INSERT82:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT79]], i32 [[TMP35]], 7 +; POSTPROCESS-NEXT: [[DOTFCA_8_INSERT85:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT82]], i32 [[TMP36]], 8 +; POSTPROCESS-NEXT: [[DOTFCA_9_INSERT88:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT85]], i32 [[TMP37]], 9 +; POSTPROCESS-NEXT: [[TMP52:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR]], i32 [[TMP52]], i64 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT113]], [8 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT88]]) ; POSTPROCESS-NEXT: unreachable -; POSTPROCESS: 48: -; POSTPROCESS-NEXT: br i1 [[TMP22]], label [[TMP49:%.*]], label [[TMP70:%.*]] -; POSTPROCESS: 49: -; POSTPROCESS-NEXT: br i1 [[TMP21]], label [[TMP50:%.*]], label [[TMP60:%.*]] -; POSTPROCESS: 50: -; POSTPROCESS-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; POSTPROCESS-NEXT: call void @_cont_IgnoreHit(ptr [[TMP51]]) +; POSTPROCESS: 44: +; POSTPROCESS-NEXT: br i1 [[TMP18]], label [[TMP53:%.*]], label [[TMP71:%.*]] +; POSTPROCESS: 45: +; POSTPROCESS-NEXT: br i1 [[TMP17]], label [[TMP54:%.*]], label [[TMP62:%.*]] +; POSTPROCESS: 46: +; POSTPROCESS-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; POSTPROCESS-NEXT: call void @_cont_IgnoreHit(ptr [[TMP47]]) ; POSTPROCESS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT27:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 0 -; POSTPROCESS-NEXT: [[TMP52:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT27]] to i32 -; POSTPROCESS-NEXT: store i32 [[TMP52]], ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-NEXT: [[TMP48:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT27]] to i32 ; POSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT36:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 1 -; POSTPROCESS-NEXT: [[TMP53:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT36]] to i32 -; POSTPROCESS-NEXT: store i32 [[TMP53]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; POSTPROCESS-NEXT: [[TMP49:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT36]] to i32 ; POSTPROCESS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT44:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 2 -; POSTPROCESS-NEXT: [[TMP54:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT44]] to i32 -; POSTPROCESS-NEXT: store i32 [[TMP54]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; POSTPROCESS-NEXT: [[TMP50:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT44]] to i32 ; POSTPROCESS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT54:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 3 -; POSTPROCESS-NEXT: [[TMP55:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT54]] to i32 -; POSTPROCESS-NEXT: store i32 [[TMP55]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 -; POSTPROCESS-NEXT: [[TMP56:%.*]] = bitcast i32 [[TMP13]] to float +; POSTPROCESS-NEXT: [[TMP51:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT54]] to i32 +; POSTPROCESS-NEXT: [[TMP56:%.*]] = bitcast i32 [[TMP9]] to float ; POSTPROCESS-NEXT: [[DOTSROA_0249_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP56]], i32 0 -; POSTPROCESS-NEXT: [[TMP57:%.*]] = bitcast i32 [[TMP14]] to float +; POSTPROCESS-NEXT: [[TMP57:%.*]] = bitcast i32 [[TMP10]] to float ; POSTPROCESS-NEXT: [[DOTSROA_0249_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0249_0_VEC_INSERT]], float [[TMP57]], i32 1 ; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT248:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0249_4_VEC_INSERT]], 0 ; POSTPROCESS-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 @@ -3778,27 +2328,33 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; POSTPROCESS-NEXT: [[DOTFCA_1_3_GEP152:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 ; POSTPROCESS-NEXT: [[DOTFCA_1_3_LOAD153:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP152]], align 4 ; POSTPROCESS-NEXT: [[DOTFCA_1_3_INSERT154:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT151]], i32 [[DOTFCA_1_3_LOAD153]], 1, 3 -; POSTPROCESS-NEXT: [[TMP59:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP59]], i64 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT154]]), !continuation.registercount [[META33]] +; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT91:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP48]], 0 +; POSTPROCESS-NEXT: [[DOTFCA_1_INSERT94:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT91]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; POSTPROCESS-NEXT: [[DOTFCA_2_INSERT97:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT94]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; POSTPROCESS-NEXT: [[DOTFCA_3_INSERT100:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT97]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; POSTPROCESS-NEXT: [[DOTFCA_4_INSERT103:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT100]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; POSTPROCESS-NEXT: [[DOTFCA_5_INSERT106:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT103]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; POSTPROCESS-NEXT: [[DOTFCA_6_INSERT109:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT106]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; POSTPROCESS-NEXT: [[DOTFCA_7_INSERT112:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT109]], i32 [[TMP49]], 7 +; POSTPROCESS-NEXT: [[DOTFCA_8_INSERT115:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT112]], i32 [[TMP50]], 8 +; POSTPROCESS-NEXT: [[DOTFCA_9_INSERT118:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT115]], i32 [[TMP51]], 9 +; POSTPROCESS-NEXT: [[TMP55:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR]], i32 [[TMP55]], i64 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT154]], [8 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT118]]) ; POSTPROCESS-NEXT: unreachable -; POSTPROCESS: 60: -; POSTPROCESS-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; POSTPROCESS-NEXT: call void @_cont_IgnoreHit(ptr [[TMP61]]) +; POSTPROCESS: 56: +; POSTPROCESS-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; POSTPROCESS-NEXT: call void @_cont_IgnoreHit(ptr [[TMP63]]) ; POSTPROCESS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT29:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 0 -; POSTPROCESS-NEXT: [[TMP62:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT29]] to i32 -; POSTPROCESS-NEXT: store i32 [[TMP62]], ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-NEXT: [[TMP64:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT29]] to i32 ; POSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT38:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 1 -; POSTPROCESS-NEXT: [[TMP63:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT38]] to i32 -; POSTPROCESS-NEXT: store i32 [[TMP63]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; POSTPROCESS-NEXT: [[TMP59:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT38]] to i32 ; POSTPROCESS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT46:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 2 -; POSTPROCESS-NEXT: [[TMP64:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT46]] to i32 -; POSTPROCESS-NEXT: store i32 [[TMP64]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; POSTPROCESS-NEXT: [[TMP60:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT46]] to i32 ; POSTPROCESS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT56:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 3 -; POSTPROCESS-NEXT: [[TMP65:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT56]] to i32 -; POSTPROCESS-NEXT: store i32 [[TMP65]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 -; POSTPROCESS-NEXT: [[TMP66:%.*]] = bitcast i32 [[TMP13]] to float +; POSTPROCESS-NEXT: [[TMP61:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT56]] to i32 +; POSTPROCESS-NEXT: [[TMP66:%.*]] = bitcast i32 [[TMP9]] to float ; POSTPROCESS-NEXT: [[DOTSROA_0253_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP66]], i32 0 -; POSTPROCESS-NEXT: [[TMP67:%.*]] = bitcast i32 [[TMP14]] to float +; POSTPROCESS-NEXT: [[TMP67:%.*]] = bitcast i32 [[TMP10]] to float ; POSTPROCESS-NEXT: [[DOTSROA_0253_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0253_0_VEC_INSERT]], float [[TMP67]], i32 1 ; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT252:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0253_4_VEC_INSERT]], 0 ; POSTPROCESS-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 @@ -3844,23 +2400,29 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; POSTPROCESS-NEXT: [[DOTFCA_1_3_GEP193:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 ; POSTPROCESS-NEXT: [[DOTFCA_1_3_LOAD194:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP193]], align 4 ; POSTPROCESS-NEXT: [[DOTFCA_1_3_INSERT195:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT192]], i32 [[DOTFCA_1_3_LOAD194]], 1, 3 -; POSTPROCESS-NEXT: [[TMP69:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP69]], i64 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT195]]), !continuation.registercount [[META33]] +; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT121:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP64]], 0 +; POSTPROCESS-NEXT: [[DOTFCA_1_INSERT124:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT121]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; POSTPROCESS-NEXT: [[DOTFCA_2_INSERT127:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT124]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; POSTPROCESS-NEXT: [[DOTFCA_3_INSERT130:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT127]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; POSTPROCESS-NEXT: [[DOTFCA_4_INSERT133:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT130]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; POSTPROCESS-NEXT: [[DOTFCA_5_INSERT136:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT133]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; POSTPROCESS-NEXT: [[DOTFCA_6_INSERT139:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT136]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; POSTPROCESS-NEXT: [[DOTFCA_7_INSERT142:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT139]], i32 [[TMP59]], 7 +; POSTPROCESS-NEXT: [[DOTFCA_8_INSERT145:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT142]], i32 [[TMP60]], 8 +; POSTPROCESS-NEXT: [[DOTFCA_9_INSERT148:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT145]], i32 [[TMP61]], 9 +; POSTPROCESS-NEXT: [[TMP65:%.*]] = load i32, ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR]], i32 [[TMP65]], i64 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT195]], [8 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT148]]) ; POSTPROCESS-NEXT: unreachable -; POSTPROCESS: 70: +; POSTPROCESS: 66: ; POSTPROCESS-NEXT: call void @_cont_AcceptHit(ptr [[SYSTEM_DATA_ALLOCA]]) ; POSTPROCESS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT31:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 0 -; POSTPROCESS-NEXT: [[TMP71:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT31]] to i32 -; POSTPROCESS-NEXT: store i32 [[TMP71]], ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-NEXT: [[TMP72:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT31]] to i32 ; POSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT40:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 1 -; POSTPROCESS-NEXT: [[TMP72:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT40]] to i32 -; POSTPROCESS-NEXT: store i32 [[TMP72]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; POSTPROCESS-NEXT: [[TMP73:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT40]] to i32 ; POSTPROCESS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT48:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 2 -; POSTPROCESS-NEXT: [[TMP73:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT48]] to i32 -; POSTPROCESS-NEXT: store i32 [[TMP73]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; POSTPROCESS-NEXT: [[TMP69:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT48]] to i32 ; POSTPROCESS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT58:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 3 -; POSTPROCESS-NEXT: [[TMP74:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT58]] to i32 -; POSTPROCESS-NEXT: store i32 [[TMP74]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 +; POSTPROCESS-NEXT: [[TMP70:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT58]] to i32 ; POSTPROCESS-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT13:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 ; POSTPROCESS-NEXT: [[TMP75:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT13]] to i32 ; POSTPROCESS-NEXT: [[TMP76:%.*]] = bitcast i32 [[TMP75]] to float @@ -3913,13 +2475,23 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; POSTPROCESS-NEXT: [[DOTFCA_1_3_GEP234:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 ; POSTPROCESS-NEXT: [[DOTFCA_1_3_LOAD235:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP234]], align 4 ; POSTPROCESS-NEXT: [[DOTFCA_1_3_INSERT236:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT233]], i32 [[DOTFCA_1_3_LOAD235]], 1, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT151:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP72]], 0 +; POSTPROCESS-NEXT: [[DOTFCA_1_INSERT154:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT151]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; POSTPROCESS-NEXT: [[DOTFCA_2_INSERT157:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT154]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; POSTPROCESS-NEXT: [[DOTFCA_3_INSERT160:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT157]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; POSTPROCESS-NEXT: [[DOTFCA_4_INSERT163:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT160]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; POSTPROCESS-NEXT: [[DOTFCA_5_INSERT166:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT163]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; POSTPROCESS-NEXT: [[DOTFCA_6_INSERT169:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT166]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; POSTPROCESS-NEXT: [[DOTFCA_7_INSERT172:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT169]], i32 [[TMP73]], 7 +; POSTPROCESS-NEXT: [[DOTFCA_8_INSERT175:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT172]], i32 [[TMP69]], 8 +; POSTPROCESS-NEXT: [[DOTFCA_9_INSERT178:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT175]], i32 [[TMP70]], 9 ; POSTPROCESS-NEXT: [[TMP80:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP80]], i64 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT236]]), !continuation.registercount [[META33]] +; POSTPROCESS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR]], i32 [[TMP80]], i64 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT236]], [8 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT178]]) ; POSTPROCESS-NEXT: unreachable ; ; ; POSTPROCESS-LABEL: define void @MyIntersectionShader( -; POSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META40:![0-9]+]] !continuation.registercount [[META32:![0-9]+]] !continuation [[META41:![0-9]+]] !continuation.stacksize [[META42:![0-9]+]] !continuation.state [[META42]] { +; POSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]], [8 x i32] [[PADDING:%.*]], [30 x i32] [[PAYLOAD:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META40:![0-9]+]] !continuation [[META41:![0-9]+]] !continuation.stacksize [[META42:![0-9]+]] { ; POSTPROCESS-NEXT: AllocaSpillBB: ; POSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; POSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 @@ -3929,6 +2501,36 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; POSTPROCESS-NEXT: [[TMP3:%.*]] = inttoptr i32 [[TMP1]] to ptr addrspace(21) ; POSTPROCESS-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP3]], i32 0 ; POSTPROCESS-NEXT: store i64 [[RETURNADDR]], ptr addrspace(21) [[TMP4]], align 4 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 0 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 1 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 2 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 3 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 4 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 5 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 6 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 7 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 8 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 9 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_10_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 10 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_11_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 11 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_12_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 12 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_13_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 13 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_14_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 14 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_15_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 15 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_16_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 16 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_17_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 17 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_18_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 18 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_19_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 19 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_20_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 20 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_21_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 21 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_22_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 22 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_23_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 23 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_24_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 24 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_25_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 25 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_26_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 26 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_27_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 27 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_28_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 28 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_29_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 29 ; POSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 0, 0, 0 ; POSTPROCESS-NEXT: [[DOTFCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 0 ; POSTPROCESS-NEXT: [[DOTFCA_0_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 1 @@ -3972,19 +2574,49 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; POSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 ; POSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 ; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> undef, 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT5:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; POSTPROCESS-NEXT: [[DOTFCA_1_INSERT8:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT5]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; POSTPROCESS-NEXT: [[DOTFCA_2_INSERT11:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT8]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; POSTPROCESS-NEXT: [[DOTFCA_3_INSERT14:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT11]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; POSTPROCESS-NEXT: [[DOTFCA_4_INSERT17:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT14]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; POSTPROCESS-NEXT: [[DOTFCA_5_INSERT20:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT17]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; POSTPROCESS-NEXT: [[DOTFCA_6_INSERT23:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT20]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; POSTPROCESS-NEXT: [[DOTFCA_7_INSERT26:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT23]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; POSTPROCESS-NEXT: [[DOTFCA_8_INSERT29:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT26]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; POSTPROCESS-NEXT: [[DOTFCA_9_INSERT32:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT29]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; POSTPROCESS-NEXT: [[DOTFCA_10_INSERT35:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT32]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; POSTPROCESS-NEXT: [[DOTFCA_11_INSERT38:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT35]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; POSTPROCESS-NEXT: [[DOTFCA_12_INSERT41:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT38]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; POSTPROCESS-NEXT: [[DOTFCA_13_INSERT44:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT41]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; POSTPROCESS-NEXT: [[DOTFCA_14_INSERT47:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT44]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; POSTPROCESS-NEXT: [[DOTFCA_15_INSERT50:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT47]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; POSTPROCESS-NEXT: [[DOTFCA_16_INSERT53:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT50]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; POSTPROCESS-NEXT: [[DOTFCA_17_INSERT56:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT53]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; POSTPROCESS-NEXT: [[DOTFCA_18_INSERT59:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT56]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; POSTPROCESS-NEXT: [[DOTFCA_19_INSERT62:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT59]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; POSTPROCESS-NEXT: [[DOTFCA_20_INSERT65:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT62]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; POSTPROCESS-NEXT: [[DOTFCA_21_INSERT68:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT65]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; POSTPROCESS-NEXT: [[DOTFCA_22_INSERT71:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT68]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; POSTPROCESS-NEXT: [[DOTFCA_23_INSERT74:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT71]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; POSTPROCESS-NEXT: [[DOTFCA_24_INSERT77:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT74]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; POSTPROCESS-NEXT: [[DOTFCA_25_INSERT80:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT77]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; POSTPROCESS-NEXT: [[DOTFCA_26_INSERT83:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT80]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; POSTPROCESS-NEXT: [[DOTFCA_27_INSERT86:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT83]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; POSTPROCESS-NEXT: [[DOTFCA_28_INSERT89:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT86]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; POSTPROCESS-NEXT: [[DOTFCA_29_INSERT92:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT89]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 ; POSTPROCESS-NEXT: [[TMP8:%.*]] = call i64 @continuation.getAddrAndMD(ptr @MyIntersectionShader.resume.0) ; POSTPROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 3, i32 [[TMP7]], i64 [[TMP8]], [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_3_INSERT]], float [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META32]], !continuation.returnedRegistercount [[META32]] +; POSTPROCESS-NEXT: call void (...) @lgc.ilcps.continue(i64 3, i32 [[TMP7]], i64 [[TMP8]], [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_3_INSERT]], float [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT]], [32 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT92]]) ; POSTPROCESS-NEXT: unreachable ; POSTPROCESS: isEnd.i: ; POSTPROCESS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 0 ; POSTPROCESS-NEXT: [[TMP9:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 ; POSTPROCESS-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP9]] to float -; POSTPROCESS-NEXT: [[DOTSROA_0107_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP10]], i32 0 +; POSTPROCESS-NEXT: [[DOTSROA_0379_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP10]], i32 0 ; POSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 1 ; POSTPROCESS-NEXT: [[TMP11:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 ; POSTPROCESS-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP11]] to float -; POSTPROCESS-NEXT: [[DOTSROA_0107_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0107_0_VEC_INSERT]], float [[TMP12]], i32 1 +; POSTPROCESS-NEXT: [[DOTSROA_0107_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0379_0_VEC_INSERT]], float [[TMP12]], i32 1 ; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT106:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0107_4_VEC_INSERT]], 0 ; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT106]], 0 ; POSTPROCESS-NEXT: [[TMP13:%.*]] = bitcast <2 x float> [[DOTFCA_0_EXTRACT]] to <2 x i32> @@ -3993,24 +2625,54 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; POSTPROCESS-NEXT: [[ISEND_I1:%.*]] = call i1 @opaqueIsEnd() ; POSTPROCESS-NEXT: br i1 [[ISEND_I1]], label [[TMP14:%.*]], label [[TMP18:%.*]] ; POSTPROCESS: 14: -; POSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_INSERT44:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND]], 0, 0, 0, 0 -; POSTPROCESS-NEXT: [[DOTFCA_0_1_0_INSERT47:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT44]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 -; POSTPROCESS-NEXT: [[DOTFCA_0_1_1_INSERT50:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT47]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 -; POSTPROCESS-NEXT: [[DOTFCA_0_1_2_INSERT53:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT50]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 -; POSTPROCESS-NEXT: [[DOTFCA_0_1_3_INSERT56:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT53]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 -; POSTPROCESS-NEXT: [[DOTFCA_0_2_INSERT59:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT56]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 -; POSTPROCESS-NEXT: [[DOTFCA_0_3_INSERT62:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT59]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 -; POSTPROCESS-NEXT: [[DOTFCA_0_4_INSERT65:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT62]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 -; POSTPROCESS-NEXT: [[DOTFCA_0_5_INSERT68:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT65]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 -; POSTPROCESS-NEXT: [[DOTFCA_1_0_INSERT71:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT68]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 -; POSTPROCESS-NEXT: [[DOTFCA_1_1_INSERT74:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT71]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 -; POSTPROCESS-NEXT: [[DOTFCA_1_2_INSERT77:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT74]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 -; POSTPROCESS-NEXT: [[DOTFCA_1_3_INSERT80:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT77]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_INSERT315:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND]], 0, 0, 0, 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_0_INSERT318:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT315]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_1_INSERT321:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT318]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_2_INSERT324:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT321]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_3_INSERT327:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT324]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_2_INSERT330:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT327]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_3_INSERT333:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT330]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_4_INSERT336:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT333]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_5_INSERT339:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT336]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 +; POSTPROCESS-NEXT: [[DOTFCA_1_0_INSERT342:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT339]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 +; POSTPROCESS-NEXT: [[DOTFCA_1_1_INSERT345:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT342]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 +; POSTPROCESS-NEXT: [[DOTFCA_1_2_INSERT348:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT345]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 +; POSTPROCESS-NEXT: [[DOTFCA_1_3_INSERT351:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT348]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT125:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; POSTPROCESS-NEXT: [[DOTFCA_1_INSERT128:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT125]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; POSTPROCESS-NEXT: [[DOTFCA_2_INSERT131:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT128]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; POSTPROCESS-NEXT: [[DOTFCA_3_INSERT134:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT131]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; POSTPROCESS-NEXT: [[DOTFCA_4_INSERT137:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT134]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; POSTPROCESS-NEXT: [[DOTFCA_5_INSERT140:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT137]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; POSTPROCESS-NEXT: [[DOTFCA_6_INSERT143:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT140]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; POSTPROCESS-NEXT: [[DOTFCA_7_INSERT146:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT143]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; POSTPROCESS-NEXT: [[DOTFCA_8_INSERT149:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT146]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; POSTPROCESS-NEXT: [[DOTFCA_9_INSERT152:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT149]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; POSTPROCESS-NEXT: [[DOTFCA_10_INSERT155:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT152]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; POSTPROCESS-NEXT: [[DOTFCA_11_INSERT158:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT155]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; POSTPROCESS-NEXT: [[DOTFCA_12_INSERT161:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT158]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; POSTPROCESS-NEXT: [[DOTFCA_13_INSERT164:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT161]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; POSTPROCESS-NEXT: [[DOTFCA_14_INSERT167:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT164]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; POSTPROCESS-NEXT: [[DOTFCA_15_INSERT170:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT167]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; POSTPROCESS-NEXT: [[DOTFCA_16_INSERT173:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT170]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; POSTPROCESS-NEXT: [[DOTFCA_17_INSERT176:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT173]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; POSTPROCESS-NEXT: [[DOTFCA_18_INSERT179:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT176]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; POSTPROCESS-NEXT: [[DOTFCA_19_INSERT182:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT179]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; POSTPROCESS-NEXT: [[DOTFCA_20_INSERT185:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT182]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; POSTPROCESS-NEXT: [[DOTFCA_21_INSERT188:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT185]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; POSTPROCESS-NEXT: [[DOTFCA_22_INSERT191:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT188]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; POSTPROCESS-NEXT: [[DOTFCA_23_INSERT194:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT191]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; POSTPROCESS-NEXT: [[DOTFCA_24_INSERT197:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT194]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; POSTPROCESS-NEXT: [[DOTFCA_25_INSERT200:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT197]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; POSTPROCESS-NEXT: [[DOTFCA_26_INSERT203:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT200]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; POSTPROCESS-NEXT: [[DOTFCA_27_INSERT206:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT203]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; POSTPROCESS-NEXT: [[DOTFCA_28_INSERT209:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT206]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; POSTPROCESS-NEXT: [[DOTFCA_29_INSERT212:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT209]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 ; POSTPROCESS-NEXT: [[TMP15:%.*]] = load i32, ptr [[CSP]], align 4 ; POSTPROCESS-NEXT: [[TMP16:%.*]] = add i32 [[TMP15]], -8 ; POSTPROCESS-NEXT: store i32 [[TMP16]], ptr [[CSP]], align 4 ; POSTPROCESS-NEXT: [[TMP17:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP17]], i64 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT80]]), !continuation.registercount [[META32]] +; POSTPROCESS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR]], i32 [[TMP17]], i64 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT351]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT212]]) ; POSTPROCESS-NEXT: unreachable ; POSTPROCESS: 18: ; POSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND]], 0, 0, 0, 0 @@ -4026,38 +2688,100 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; POSTPROCESS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 ; POSTPROCESS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 ; POSTPROCESS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT1:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; POSTPROCESS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT1]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; POSTPROCESS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; POSTPROCESS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; POSTPROCESS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; POSTPROCESS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; POSTPROCESS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; POSTPROCESS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; POSTPROCESS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; POSTPROCESS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; POSTPROCESS-NEXT: [[DOTFCA_10_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; POSTPROCESS-NEXT: [[DOTFCA_11_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; POSTPROCESS-NEXT: [[DOTFCA_12_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; POSTPROCESS-NEXT: [[DOTFCA_13_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; POSTPROCESS-NEXT: [[DOTFCA_14_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; POSTPROCESS-NEXT: [[DOTFCA_15_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; POSTPROCESS-NEXT: [[DOTFCA_16_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; POSTPROCESS-NEXT: [[DOTFCA_17_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; POSTPROCESS-NEXT: [[DOTFCA_18_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; POSTPROCESS-NEXT: [[DOTFCA_19_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; POSTPROCESS-NEXT: [[DOTFCA_20_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; POSTPROCESS-NEXT: [[DOTFCA_21_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; POSTPROCESS-NEXT: [[DOTFCA_22_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; POSTPROCESS-NEXT: [[DOTFCA_23_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; POSTPROCESS-NEXT: [[DOTFCA_24_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; POSTPROCESS-NEXT: [[DOTFCA_25_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; POSTPROCESS-NEXT: [[DOTFCA_26_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; POSTPROCESS-NEXT: [[DOTFCA_27_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; POSTPROCESS-NEXT: [[DOTFCA_28_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; POSTPROCESS-NEXT: [[DOTFCA_29_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 ; POSTPROCESS-NEXT: [[TMP19:%.*]] = load i32, ptr [[CSP]], align 4 ; POSTPROCESS-NEXT: [[TMP20:%.*]] = add i32 [[TMP19]], -8 ; POSTPROCESS-NEXT: store i32 [[TMP20]], ptr [[CSP]], align 4 ; POSTPROCESS-NEXT: [[TMP21:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP21]], i64 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]]), !continuation.registercount [[META32]] +; POSTPROCESS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR]], i32 [[TMP21]], i64 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]) ; POSTPROCESS-NEXT: unreachable ; ; ; POSTPROCESS-LABEL: define dso_local void @MyIntersectionShader.resume.0( -; POSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[TMP0:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP1:%.*]]) !lgc.rt.shaderstage [[META40]] !continuation.registercount [[META32]] !continuation [[META41]] { +; POSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[TMP0:%.*]], { [[STRUCT_ANYHITTRAVERSALDATA:%.*]], [8 x i32], [30 x i32] } [[TMP1:%.*]]) !lgc.rt.shaderstage [[META40]] !continuation [[META41]] { ; POSTPROCESS-NEXT: entryresume.0: ; POSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; POSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; POSTPROCESS-NEXT: [[TMP15:%.*]] = load i32, ptr [[CSP]], align 4 ; POSTPROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP15]], -8 -; POSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT16:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP1]], 0, 0, 0, 0 -; POSTPROCESS-NEXT: [[DOTFCA_0_1_0_EXTRACT18:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP1]], 0, 1, 0 -; POSTPROCESS-NEXT: [[DOTFCA_0_1_1_EXTRACT20:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP1]], 0, 1, 1 -; POSTPROCESS-NEXT: [[DOTFCA_0_1_2_EXTRACT22:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP1]], 0, 1, 2 -; POSTPROCESS-NEXT: [[DOTFCA_0_1_3_EXTRACT24:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP1]], 0, 1, 3 -; POSTPROCESS-NEXT: [[DOTFCA_0_2_EXTRACT26:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP1]], 0, 2 -; POSTPROCESS-NEXT: [[DOTFCA_0_3_EXTRACT28:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP1]], 0, 3 -; POSTPROCESS-NEXT: [[DOTFCA_0_4_EXTRACT30:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP1]], 0, 4 -; POSTPROCESS-NEXT: [[DOTFCA_0_5_EXTRACT32:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP1]], 0, 5 -; POSTPROCESS-NEXT: [[DOTFCA_1_0_EXTRACT34:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP1]], 1, 0 -; POSTPROCESS-NEXT: [[DOTFCA_1_1_EXTRACT36:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP1]], 1, 1 -; POSTPROCESS-NEXT: [[DOTFCA_1_2_EXTRACT38:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP1]], 1, 2 -; POSTPROCESS-NEXT: [[DOTFCA_1_3_EXTRACT40:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP1]], 1, 3 +; POSTPROCESS-NEXT: [[TMP16:%.*]] = extractvalue { [[STRUCT_ANYHITTRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP1]], 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 0 +; POSTPROCESS-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 1 +; POSTPROCESS-NEXT: [[DOTFCA_2_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 2 +; POSTPROCESS-NEXT: [[DOTFCA_3_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 3 +; POSTPROCESS-NEXT: [[DOTFCA_4_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 4 +; POSTPROCESS-NEXT: [[DOTFCA_5_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 5 +; POSTPROCESS-NEXT: [[DOTFCA_6_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 6 +; POSTPROCESS-NEXT: [[DOTFCA_7_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 7 +; POSTPROCESS-NEXT: [[DOTFCA_8_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 8 +; POSTPROCESS-NEXT: [[DOTFCA_9_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 9 +; POSTPROCESS-NEXT: [[DOTFCA_10_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 10 +; POSTPROCESS-NEXT: [[DOTFCA_11_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 11 +; POSTPROCESS-NEXT: [[DOTFCA_12_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 12 +; POSTPROCESS-NEXT: [[DOTFCA_13_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 13 +; POSTPROCESS-NEXT: [[DOTFCA_14_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 14 +; POSTPROCESS-NEXT: [[DOTFCA_15_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 15 +; POSTPROCESS-NEXT: [[DOTFCA_16_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 16 +; POSTPROCESS-NEXT: [[DOTFCA_17_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 17 +; POSTPROCESS-NEXT: [[DOTFCA_18_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 18 +; POSTPROCESS-NEXT: [[DOTFCA_19_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 19 +; POSTPROCESS-NEXT: [[DOTFCA_20_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 20 +; POSTPROCESS-NEXT: [[DOTFCA_21_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 21 +; POSTPROCESS-NEXT: [[DOTFCA_22_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 22 +; POSTPROCESS-NEXT: [[DOTFCA_23_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 23 +; POSTPROCESS-NEXT: [[DOTFCA_24_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 24 +; POSTPROCESS-NEXT: [[DOTFCA_25_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 25 +; POSTPROCESS-NEXT: [[DOTFCA_26_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 26 +; POSTPROCESS-NEXT: [[DOTFCA_27_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 27 +; POSTPROCESS-NEXT: [[DOTFCA_28_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 28 +; POSTPROCESS-NEXT: [[DOTFCA_29_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 29 +; POSTPROCESS-NEXT: [[TMP17:%.*]] = extractvalue { [[STRUCT_ANYHITTRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP1]], 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT16:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP17]], 0, 0, 0, 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_0_EXTRACT18:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP17]], 0, 1, 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_1_EXTRACT20:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP17]], 0, 1, 1 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_2_EXTRACT22:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP17]], 0, 1, 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_3_EXTRACT24:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP17]], 0, 1, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_2_EXTRACT26:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP17]], 0, 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_3_EXTRACT28:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP17]], 0, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_4_EXTRACT30:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP17]], 0, 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_5_EXTRACT32:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP17]], 0, 5 +; POSTPROCESS-NEXT: [[DOTFCA_1_0_EXTRACT34:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP17]], 1, 0 +; POSTPROCESS-NEXT: [[DOTFCA_1_1_EXTRACT36:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP17]], 1, 1 +; POSTPROCESS-NEXT: [[DOTFCA_1_2_EXTRACT38:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP17]], 1, 2 +; POSTPROCESS-NEXT: [[DOTFCA_1_3_EXTRACT40:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP17]], 1, 3 ; POSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; POSTPROCESS-NEXT: [[ISEND_I1:%.*]] = call i1 @opaqueIsEnd() ; POSTPROCESS-NEXT: br i1 [[ISEND_I1]], label [[TMP3:%.*]], label [[TMP9:%.*]] -; POSTPROCESS: 4: +; POSTPROCESS: 6: ; POSTPROCESS-NEXT: [[TMP4:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(21) ; POSTPROCESS-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP4]], i32 0 ; POSTPROCESS-NEXT: [[RETURNADDR_RELOAD2:%.*]] = load i64, ptr addrspace(21) [[TMP5]], align 4 @@ -4074,13 +2798,43 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; POSTPROCESS-NEXT: [[DOTFCA_1_1_INSERT74:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT71]], <3 x float> [[DOTFCA_1_1_EXTRACT36]], 1, 1 ; POSTPROCESS-NEXT: [[DOTFCA_1_2_INSERT77:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT74]], float [[DOTFCA_1_2_EXTRACT38]], 1, 2 ; POSTPROCESS-NEXT: [[DOTFCA_1_3_INSERT80:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT77]], i32 [[DOTFCA_1_3_EXTRACT40]], 1, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT125:%.*]] = insertvalue [30 x i32] poison, i32 [[DOTFCA_0_EXTRACT]], 0 +; POSTPROCESS-NEXT: [[DOTFCA_1_INSERT128:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT125]], i32 [[DOTFCA_1_EXTRACT]], 1 +; POSTPROCESS-NEXT: [[DOTFCA_2_INSERT131:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT128]], i32 [[DOTFCA_2_EXTRACT]], 2 +; POSTPROCESS-NEXT: [[DOTFCA_3_INSERT134:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT131]], i32 [[DOTFCA_3_EXTRACT]], 3 +; POSTPROCESS-NEXT: [[DOTFCA_4_INSERT137:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT134]], i32 [[DOTFCA_4_EXTRACT]], 4 +; POSTPROCESS-NEXT: [[DOTFCA_5_INSERT140:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT137]], i32 [[DOTFCA_5_EXTRACT]], 5 +; POSTPROCESS-NEXT: [[DOTFCA_6_INSERT143:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT140]], i32 [[DOTFCA_6_EXTRACT]], 6 +; POSTPROCESS-NEXT: [[DOTFCA_7_INSERT146:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT143]], i32 [[DOTFCA_7_EXTRACT]], 7 +; POSTPROCESS-NEXT: [[DOTFCA_8_INSERT149:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT146]], i32 [[DOTFCA_8_EXTRACT]], 8 +; POSTPROCESS-NEXT: [[DOTFCA_9_INSERT152:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT149]], i32 [[DOTFCA_9_EXTRACT]], 9 +; POSTPROCESS-NEXT: [[DOTFCA_10_INSERT155:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT152]], i32 [[DOTFCA_10_EXTRACT]], 10 +; POSTPROCESS-NEXT: [[DOTFCA_11_INSERT158:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT155]], i32 [[DOTFCA_11_EXTRACT]], 11 +; POSTPROCESS-NEXT: [[DOTFCA_12_INSERT161:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT158]], i32 [[DOTFCA_12_EXTRACT]], 12 +; POSTPROCESS-NEXT: [[DOTFCA_13_INSERT164:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT161]], i32 [[DOTFCA_13_EXTRACT]], 13 +; POSTPROCESS-NEXT: [[DOTFCA_14_INSERT167:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT164]], i32 [[DOTFCA_14_EXTRACT]], 14 +; POSTPROCESS-NEXT: [[DOTFCA_15_INSERT170:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT167]], i32 [[DOTFCA_15_EXTRACT]], 15 +; POSTPROCESS-NEXT: [[DOTFCA_16_INSERT173:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT170]], i32 [[DOTFCA_16_EXTRACT]], 16 +; POSTPROCESS-NEXT: [[DOTFCA_17_INSERT176:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT173]], i32 [[DOTFCA_17_EXTRACT]], 17 +; POSTPROCESS-NEXT: [[DOTFCA_18_INSERT179:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT176]], i32 [[DOTFCA_18_EXTRACT]], 18 +; POSTPROCESS-NEXT: [[DOTFCA_19_INSERT182:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT179]], i32 [[DOTFCA_19_EXTRACT]], 19 +; POSTPROCESS-NEXT: [[DOTFCA_20_INSERT185:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT182]], i32 [[DOTFCA_20_EXTRACT]], 20 +; POSTPROCESS-NEXT: [[DOTFCA_21_INSERT188:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT185]], i32 [[DOTFCA_21_EXTRACT]], 21 +; POSTPROCESS-NEXT: [[DOTFCA_22_INSERT191:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT188]], i32 [[DOTFCA_22_EXTRACT]], 22 +; POSTPROCESS-NEXT: [[DOTFCA_23_INSERT194:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT191]], i32 [[DOTFCA_23_EXTRACT]], 23 +; POSTPROCESS-NEXT: [[DOTFCA_24_INSERT197:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT194]], i32 [[DOTFCA_24_EXTRACT]], 24 +; POSTPROCESS-NEXT: [[DOTFCA_25_INSERT200:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT197]], i32 [[DOTFCA_25_EXTRACT]], 25 +; POSTPROCESS-NEXT: [[DOTFCA_26_INSERT203:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT200]], i32 [[DOTFCA_26_EXTRACT]], 26 +; POSTPROCESS-NEXT: [[DOTFCA_27_INSERT206:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT203]], i32 [[DOTFCA_27_EXTRACT]], 27 +; POSTPROCESS-NEXT: [[DOTFCA_28_INSERT209:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT206]], i32 [[DOTFCA_28_EXTRACT]], 28 +; POSTPROCESS-NEXT: [[DOTFCA_29_INSERT212:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT209]], i32 [[DOTFCA_29_EXTRACT]], 29 ; POSTPROCESS-NEXT: [[TMP6:%.*]] = load i32, ptr [[CSP]], align 4 ; POSTPROCESS-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], -8 ; POSTPROCESS-NEXT: store i32 [[TMP7]], ptr [[CSP]], align 4 ; POSTPROCESS-NEXT: [[TMP8:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD2]], i32 [[TMP8]], i64 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT80]]), !continuation.registercount [[META32]] +; POSTPROCESS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR_RELOAD2]], i32 [[TMP8]], i64 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT80]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT212]]) ; POSTPROCESS-NEXT: unreachable -; POSTPROCESS: 10: +; POSTPROCESS: 12: ; POSTPROCESS-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(21) ; POSTPROCESS-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP10]], i32 0 ; POSTPROCESS-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(21) [[TMP11]], align 4 @@ -4097,16 +2851,46 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; POSTPROCESS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_EXTRACT36]], 1, 1 ; POSTPROCESS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT38]], 1, 2 ; POSTPROCESS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT40]], 1, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [30 x i32] poison, i32 [[DOTFCA_0_EXTRACT]], 0 +; POSTPROCESS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT]], i32 [[DOTFCA_1_EXTRACT]], 1 +; POSTPROCESS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT]], i32 [[DOTFCA_2_EXTRACT]], 2 +; POSTPROCESS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT]], i32 [[DOTFCA_3_EXTRACT]], 3 +; POSTPROCESS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT]], i32 [[DOTFCA_4_EXTRACT]], 4 +; POSTPROCESS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT]], i32 [[DOTFCA_5_EXTRACT]], 5 +; POSTPROCESS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT]], i32 [[DOTFCA_6_EXTRACT]], 6 +; POSTPROCESS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT]], i32 [[DOTFCA_7_EXTRACT]], 7 +; POSTPROCESS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT]], i32 [[DOTFCA_8_EXTRACT]], 8 +; POSTPROCESS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT]], i32 [[DOTFCA_9_EXTRACT]], 9 +; POSTPROCESS-NEXT: [[DOTFCA_10_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT]], i32 [[DOTFCA_10_EXTRACT]], 10 +; POSTPROCESS-NEXT: [[DOTFCA_11_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT]], i32 [[DOTFCA_11_EXTRACT]], 11 +; POSTPROCESS-NEXT: [[DOTFCA_12_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT]], i32 [[DOTFCA_12_EXTRACT]], 12 +; POSTPROCESS-NEXT: [[DOTFCA_13_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT]], i32 [[DOTFCA_13_EXTRACT]], 13 +; POSTPROCESS-NEXT: [[DOTFCA_14_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT]], i32 [[DOTFCA_14_EXTRACT]], 14 +; POSTPROCESS-NEXT: [[DOTFCA_15_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT]], i32 [[DOTFCA_15_EXTRACT]], 15 +; POSTPROCESS-NEXT: [[DOTFCA_16_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT]], i32 [[DOTFCA_16_EXTRACT]], 16 +; POSTPROCESS-NEXT: [[DOTFCA_17_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT]], i32 [[DOTFCA_17_EXTRACT]], 17 +; POSTPROCESS-NEXT: [[DOTFCA_18_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT]], i32 [[DOTFCA_18_EXTRACT]], 18 +; POSTPROCESS-NEXT: [[DOTFCA_19_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT]], i32 [[DOTFCA_19_EXTRACT]], 19 +; POSTPROCESS-NEXT: [[DOTFCA_20_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT]], i32 [[DOTFCA_20_EXTRACT]], 20 +; POSTPROCESS-NEXT: [[DOTFCA_21_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT]], i32 [[DOTFCA_21_EXTRACT]], 21 +; POSTPROCESS-NEXT: [[DOTFCA_22_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT]], i32 [[DOTFCA_22_EXTRACT]], 22 +; POSTPROCESS-NEXT: [[DOTFCA_23_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT]], i32 [[DOTFCA_23_EXTRACT]], 23 +; POSTPROCESS-NEXT: [[DOTFCA_24_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT]], i32 [[DOTFCA_24_EXTRACT]], 24 +; POSTPROCESS-NEXT: [[DOTFCA_25_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT]], i32 [[DOTFCA_25_EXTRACT]], 25 +; POSTPROCESS-NEXT: [[DOTFCA_26_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT]], i32 [[DOTFCA_26_EXTRACT]], 26 +; POSTPROCESS-NEXT: [[DOTFCA_27_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT]], i32 [[DOTFCA_27_EXTRACT]], 27 +; POSTPROCESS-NEXT: [[DOTFCA_28_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT]], i32 [[DOTFCA_28_EXTRACT]], 28 +; POSTPROCESS-NEXT: [[DOTFCA_29_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT]], i32 [[DOTFCA_29_EXTRACT]], 29 ; POSTPROCESS-NEXT: [[TMP12:%.*]] = load i32, ptr [[CSP]], align 4 ; POSTPROCESS-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], -8 ; POSTPROCESS-NEXT: store i32 [[TMP13]], ptr [[CSP]], align 4 ; POSTPROCESS-NEXT: [[TMP14:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP14]], i64 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]]), !continuation.registercount [[META32]] +; POSTPROCESS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP14]], i64 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]) ; POSTPROCESS-NEXT: unreachable ; ; ; POSTPROCESS-LABEL: define void @MyIntersectionShader2( -; POSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META40]] !continuation.registercount [[META32]] !continuation [[META43:![0-9]+]] !continuation.stacksize [[META42]] !continuation.state [[META42]] { +; POSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]], [8 x i32] [[PADDING:%.*]], [30 x i32] [[PAYLOAD:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META40]] !continuation [[META43:![0-9]+]] !continuation.stacksize [[META42]] { ; POSTPROCESS-NEXT: AllocaSpillBB: ; POSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; POSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 @@ -4116,6 +2900,36 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; POSTPROCESS-NEXT: [[TMP3:%.*]] = inttoptr i32 [[TMP1]] to ptr addrspace(21) ; POSTPROCESS-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP3]], i32 0 ; POSTPROCESS-NEXT: store i64 [[RETURNADDR]], ptr addrspace(21) [[TMP4]], align 4 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 0 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 1 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 2 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 3 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 4 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 5 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 6 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 7 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 8 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 9 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_10_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 10 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_11_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 11 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_12_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 12 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_13_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 13 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_14_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 14 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_15_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 15 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_16_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 16 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_17_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 17 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_18_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 18 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_19_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 19 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_20_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 20 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_21_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 21 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_22_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 22 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_23_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 23 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_24_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 24 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_25_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 25 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_26_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 26 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_27_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 27 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_28_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 28 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_29_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 29 ; POSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 0, 0, 0 ; POSTPROCESS-NEXT: [[DOTFCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 0 ; POSTPROCESS-NEXT: [[DOTFCA_0_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 1, 1 @@ -4159,19 +2973,49 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; POSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 ; POSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 ; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2:%.*]] poison, <2 x float> undef, 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT5:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; POSTPROCESS-NEXT: [[DOTFCA_1_INSERT8:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT5]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; POSTPROCESS-NEXT: [[DOTFCA_2_INSERT11:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT8]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; POSTPROCESS-NEXT: [[DOTFCA_3_INSERT14:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT11]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; POSTPROCESS-NEXT: [[DOTFCA_4_INSERT17:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT14]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; POSTPROCESS-NEXT: [[DOTFCA_5_INSERT20:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT17]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; POSTPROCESS-NEXT: [[DOTFCA_6_INSERT23:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT20]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; POSTPROCESS-NEXT: [[DOTFCA_7_INSERT26:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT23]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; POSTPROCESS-NEXT: [[DOTFCA_8_INSERT29:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT26]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; POSTPROCESS-NEXT: [[DOTFCA_9_INSERT32:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT29]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; POSTPROCESS-NEXT: [[DOTFCA_10_INSERT35:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT32]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; POSTPROCESS-NEXT: [[DOTFCA_11_INSERT38:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT35]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; POSTPROCESS-NEXT: [[DOTFCA_12_INSERT41:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT38]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; POSTPROCESS-NEXT: [[DOTFCA_13_INSERT44:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT41]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; POSTPROCESS-NEXT: [[DOTFCA_14_INSERT47:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT44]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; POSTPROCESS-NEXT: [[DOTFCA_15_INSERT50:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT47]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; POSTPROCESS-NEXT: [[DOTFCA_16_INSERT53:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT50]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; POSTPROCESS-NEXT: [[DOTFCA_17_INSERT56:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT53]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; POSTPROCESS-NEXT: [[DOTFCA_18_INSERT59:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT56]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; POSTPROCESS-NEXT: [[DOTFCA_19_INSERT62:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT59]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; POSTPROCESS-NEXT: [[DOTFCA_20_INSERT65:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT62]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; POSTPROCESS-NEXT: [[DOTFCA_21_INSERT68:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT65]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; POSTPROCESS-NEXT: [[DOTFCA_22_INSERT71:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT68]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; POSTPROCESS-NEXT: [[DOTFCA_23_INSERT74:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT71]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; POSTPROCESS-NEXT: [[DOTFCA_24_INSERT77:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT74]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; POSTPROCESS-NEXT: [[DOTFCA_25_INSERT80:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT77]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; POSTPROCESS-NEXT: [[DOTFCA_26_INSERT83:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT80]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; POSTPROCESS-NEXT: [[DOTFCA_27_INSERT86:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT83]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; POSTPROCESS-NEXT: [[DOTFCA_28_INSERT89:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT86]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; POSTPROCESS-NEXT: [[DOTFCA_29_INSERT92:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT89]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 ; POSTPROCESS-NEXT: [[TMP8:%.*]] = call i64 @continuation.getAddrAndMD(ptr @MyIntersectionShader2.resume.0) ; POSTPROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 3, i32 [[TMP7]], i64 [[TMP8]], [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_3_INSERT]], float [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META32]], !continuation.returnedRegistercount [[META32]] +; POSTPROCESS-NEXT: call void (...) @lgc.ilcps.continue(i64 3, i32 [[TMP7]], i64 [[TMP8]], [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_3_INSERT]], float [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2]] [[DOTFCA_0_INSERT]], [32 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT92]]) ; POSTPROCESS-NEXT: unreachable ; POSTPROCESS: isEnd.i: ; POSTPROCESS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 0 ; POSTPROCESS-NEXT: [[TMP9:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 ; POSTPROCESS-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP9]] to float -; POSTPROCESS-NEXT: [[DOTSROA_0107_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP10]], i32 0 +; POSTPROCESS-NEXT: [[DOTSROA_0379_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP10]], i32 0 ; POSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 1 ; POSTPROCESS-NEXT: [[TMP11:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 ; POSTPROCESS-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP11]] to float -; POSTPROCESS-NEXT: [[DOTSROA_0107_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0107_0_VEC_INSERT]], float [[TMP12]], i32 1 +; POSTPROCESS-NEXT: [[DOTSROA_0107_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0379_0_VEC_INSERT]], float [[TMP12]], i32 1 ; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT106:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> [[DOTSROA_0107_4_VEC_INSERT]], 0 ; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT106]], 0 ; POSTPROCESS-NEXT: [[TMP13:%.*]] = bitcast <2 x float> [[DOTFCA_0_EXTRACT]] to <2 x i32> @@ -4180,24 +3024,54 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; POSTPROCESS-NEXT: [[ISEND_I1:%.*]] = call i1 @opaqueIsEnd() ; POSTPROCESS-NEXT: br i1 [[ISEND_I1]], label [[TMP14:%.*]], label [[TMP18:%.*]] ; POSTPROCESS: 14: -; POSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_INSERT44:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND]], 0, 0, 0, 0 -; POSTPROCESS-NEXT: [[DOTFCA_0_1_0_INSERT47:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT44]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 -; POSTPROCESS-NEXT: [[DOTFCA_0_1_1_INSERT50:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT47]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 -; POSTPROCESS-NEXT: [[DOTFCA_0_1_2_INSERT53:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT50]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 -; POSTPROCESS-NEXT: [[DOTFCA_0_1_3_INSERT56:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT53]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 -; POSTPROCESS-NEXT: [[DOTFCA_0_2_INSERT59:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT56]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 -; POSTPROCESS-NEXT: [[DOTFCA_0_3_INSERT62:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT59]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 -; POSTPROCESS-NEXT: [[DOTFCA_0_4_INSERT65:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT62]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 -; POSTPROCESS-NEXT: [[DOTFCA_0_5_INSERT68:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT65]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 -; POSTPROCESS-NEXT: [[DOTFCA_1_0_INSERT71:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT68]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 -; POSTPROCESS-NEXT: [[DOTFCA_1_1_INSERT74:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT71]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 -; POSTPROCESS-NEXT: [[DOTFCA_1_2_INSERT77:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT74]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 -; POSTPROCESS-NEXT: [[DOTFCA_1_3_INSERT80:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT77]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_INSERT315:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND]], 0, 0, 0, 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_0_INSERT318:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT315]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_1_INSERT321:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT318]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_2_INSERT324:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT321]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_3_INSERT327:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT324]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_2_INSERT330:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT327]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_3_INSERT333:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT330]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_4_INSERT336:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT333]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_5_INSERT339:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT336]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 +; POSTPROCESS-NEXT: [[DOTFCA_1_0_INSERT342:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT339]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 +; POSTPROCESS-NEXT: [[DOTFCA_1_1_INSERT345:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT342]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 +; POSTPROCESS-NEXT: [[DOTFCA_1_2_INSERT348:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT345]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 +; POSTPROCESS-NEXT: [[DOTFCA_1_3_INSERT351:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT348]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT125:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; POSTPROCESS-NEXT: [[DOTFCA_1_INSERT128:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT125]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; POSTPROCESS-NEXT: [[DOTFCA_2_INSERT131:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT128]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; POSTPROCESS-NEXT: [[DOTFCA_3_INSERT134:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT131]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; POSTPROCESS-NEXT: [[DOTFCA_4_INSERT137:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT134]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; POSTPROCESS-NEXT: [[DOTFCA_5_INSERT140:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT137]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; POSTPROCESS-NEXT: [[DOTFCA_6_INSERT143:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT140]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; POSTPROCESS-NEXT: [[DOTFCA_7_INSERT146:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT143]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; POSTPROCESS-NEXT: [[DOTFCA_8_INSERT149:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT146]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; POSTPROCESS-NEXT: [[DOTFCA_9_INSERT152:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT149]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; POSTPROCESS-NEXT: [[DOTFCA_10_INSERT155:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT152]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; POSTPROCESS-NEXT: [[DOTFCA_11_INSERT158:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT155]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; POSTPROCESS-NEXT: [[DOTFCA_12_INSERT161:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT158]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; POSTPROCESS-NEXT: [[DOTFCA_13_INSERT164:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT161]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; POSTPROCESS-NEXT: [[DOTFCA_14_INSERT167:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT164]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; POSTPROCESS-NEXT: [[DOTFCA_15_INSERT170:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT167]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; POSTPROCESS-NEXT: [[DOTFCA_16_INSERT173:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT170]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; POSTPROCESS-NEXT: [[DOTFCA_17_INSERT176:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT173]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; POSTPROCESS-NEXT: [[DOTFCA_18_INSERT179:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT176]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; POSTPROCESS-NEXT: [[DOTFCA_19_INSERT182:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT179]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; POSTPROCESS-NEXT: [[DOTFCA_20_INSERT185:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT182]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; POSTPROCESS-NEXT: [[DOTFCA_21_INSERT188:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT185]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; POSTPROCESS-NEXT: [[DOTFCA_22_INSERT191:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT188]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; POSTPROCESS-NEXT: [[DOTFCA_23_INSERT194:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT191]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; POSTPROCESS-NEXT: [[DOTFCA_24_INSERT197:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT194]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; POSTPROCESS-NEXT: [[DOTFCA_25_INSERT200:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT197]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; POSTPROCESS-NEXT: [[DOTFCA_26_INSERT203:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT200]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; POSTPROCESS-NEXT: [[DOTFCA_27_INSERT206:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT203]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; POSTPROCESS-NEXT: [[DOTFCA_28_INSERT209:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT206]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; POSTPROCESS-NEXT: [[DOTFCA_29_INSERT212:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT209]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 ; POSTPROCESS-NEXT: [[TMP15:%.*]] = load i32, ptr [[CSP]], align 4 ; POSTPROCESS-NEXT: [[TMP16:%.*]] = add i32 [[TMP15]], -8 ; POSTPROCESS-NEXT: store i32 [[TMP16]], ptr [[CSP]], align 4 ; POSTPROCESS-NEXT: [[TMP17:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP17]], i64 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT80]]), !continuation.registercount [[META32]] +; POSTPROCESS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR]], i32 [[TMP17]], i64 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT351]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT212]]) ; POSTPROCESS-NEXT: unreachable ; POSTPROCESS: 18: ; POSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND]], 0, 0, 0, 0 @@ -4213,38 +3087,100 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; POSTPROCESS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 ; POSTPROCESS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 ; POSTPROCESS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT1:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; POSTPROCESS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT1]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; POSTPROCESS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; POSTPROCESS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; POSTPROCESS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; POSTPROCESS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; POSTPROCESS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; POSTPROCESS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; POSTPROCESS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; POSTPROCESS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; POSTPROCESS-NEXT: [[DOTFCA_10_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; POSTPROCESS-NEXT: [[DOTFCA_11_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; POSTPROCESS-NEXT: [[DOTFCA_12_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; POSTPROCESS-NEXT: [[DOTFCA_13_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; POSTPROCESS-NEXT: [[DOTFCA_14_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; POSTPROCESS-NEXT: [[DOTFCA_15_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; POSTPROCESS-NEXT: [[DOTFCA_16_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; POSTPROCESS-NEXT: [[DOTFCA_17_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; POSTPROCESS-NEXT: [[DOTFCA_18_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; POSTPROCESS-NEXT: [[DOTFCA_19_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; POSTPROCESS-NEXT: [[DOTFCA_20_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; POSTPROCESS-NEXT: [[DOTFCA_21_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; POSTPROCESS-NEXT: [[DOTFCA_22_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; POSTPROCESS-NEXT: [[DOTFCA_23_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; POSTPROCESS-NEXT: [[DOTFCA_24_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; POSTPROCESS-NEXT: [[DOTFCA_25_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; POSTPROCESS-NEXT: [[DOTFCA_26_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; POSTPROCESS-NEXT: [[DOTFCA_27_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; POSTPROCESS-NEXT: [[DOTFCA_28_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; POSTPROCESS-NEXT: [[DOTFCA_29_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 ; POSTPROCESS-NEXT: [[TMP19:%.*]] = load i32, ptr [[CSP]], align 4 ; POSTPROCESS-NEXT: [[TMP20:%.*]] = add i32 [[TMP19]], -8 ; POSTPROCESS-NEXT: store i32 [[TMP20]], ptr [[CSP]], align 4 ; POSTPROCESS-NEXT: [[TMP21:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP21]], i64 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]]), !continuation.registercount [[META32]] +; POSTPROCESS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR]], i32 [[TMP21]], i64 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]) ; POSTPROCESS-NEXT: unreachable ; ; ; POSTPROCESS-LABEL: define dso_local void @MyIntersectionShader2.resume.0( -; POSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[TMP0:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP1:%.*]]) !lgc.rt.shaderstage [[META40]] !continuation.registercount [[META32]] !continuation [[META43]] { +; POSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[TMP0:%.*]], { [[STRUCT_ANYHITTRAVERSALDATA:%.*]], [8 x i32], [30 x i32] } [[TMP1:%.*]]) !lgc.rt.shaderstage [[META40]] !continuation [[META43]] { ; POSTPROCESS-NEXT: entryresume.0: ; POSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; POSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; POSTPROCESS-NEXT: [[TMP15:%.*]] = load i32, ptr [[CSP]], align 4 ; POSTPROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP15]], -8 -; POSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT16:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP1]], 0, 0, 0, 0 -; POSTPROCESS-NEXT: [[DOTFCA_0_1_0_EXTRACT18:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP1]], 0, 1, 0 -; POSTPROCESS-NEXT: [[DOTFCA_0_1_1_EXTRACT20:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP1]], 0, 1, 1 -; POSTPROCESS-NEXT: [[DOTFCA_0_1_2_EXTRACT22:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP1]], 0, 1, 2 -; POSTPROCESS-NEXT: [[DOTFCA_0_1_3_EXTRACT24:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP1]], 0, 1, 3 -; POSTPROCESS-NEXT: [[DOTFCA_0_2_EXTRACT26:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP1]], 0, 2 -; POSTPROCESS-NEXT: [[DOTFCA_0_3_EXTRACT28:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP1]], 0, 3 -; POSTPROCESS-NEXT: [[DOTFCA_0_4_EXTRACT30:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP1]], 0, 4 -; POSTPROCESS-NEXT: [[DOTFCA_0_5_EXTRACT32:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP1]], 0, 5 -; POSTPROCESS-NEXT: [[DOTFCA_1_0_EXTRACT34:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP1]], 1, 0 -; POSTPROCESS-NEXT: [[DOTFCA_1_1_EXTRACT36:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP1]], 1, 1 -; POSTPROCESS-NEXT: [[DOTFCA_1_2_EXTRACT38:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP1]], 1, 2 -; POSTPROCESS-NEXT: [[DOTFCA_1_3_EXTRACT40:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP1]], 1, 3 +; POSTPROCESS-NEXT: [[TMP16:%.*]] = extractvalue { [[STRUCT_ANYHITTRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP1]], 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 0 +; POSTPROCESS-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 1 +; POSTPROCESS-NEXT: [[DOTFCA_2_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 2 +; POSTPROCESS-NEXT: [[DOTFCA_3_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 3 +; POSTPROCESS-NEXT: [[DOTFCA_4_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 4 +; POSTPROCESS-NEXT: [[DOTFCA_5_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 5 +; POSTPROCESS-NEXT: [[DOTFCA_6_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 6 +; POSTPROCESS-NEXT: [[DOTFCA_7_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 7 +; POSTPROCESS-NEXT: [[DOTFCA_8_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 8 +; POSTPROCESS-NEXT: [[DOTFCA_9_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 9 +; POSTPROCESS-NEXT: [[DOTFCA_10_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 10 +; POSTPROCESS-NEXT: [[DOTFCA_11_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 11 +; POSTPROCESS-NEXT: [[DOTFCA_12_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 12 +; POSTPROCESS-NEXT: [[DOTFCA_13_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 13 +; POSTPROCESS-NEXT: [[DOTFCA_14_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 14 +; POSTPROCESS-NEXT: [[DOTFCA_15_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 15 +; POSTPROCESS-NEXT: [[DOTFCA_16_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 16 +; POSTPROCESS-NEXT: [[DOTFCA_17_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 17 +; POSTPROCESS-NEXT: [[DOTFCA_18_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 18 +; POSTPROCESS-NEXT: [[DOTFCA_19_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 19 +; POSTPROCESS-NEXT: [[DOTFCA_20_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 20 +; POSTPROCESS-NEXT: [[DOTFCA_21_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 21 +; POSTPROCESS-NEXT: [[DOTFCA_22_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 22 +; POSTPROCESS-NEXT: [[DOTFCA_23_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 23 +; POSTPROCESS-NEXT: [[DOTFCA_24_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 24 +; POSTPROCESS-NEXT: [[DOTFCA_25_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 25 +; POSTPROCESS-NEXT: [[DOTFCA_26_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 26 +; POSTPROCESS-NEXT: [[DOTFCA_27_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 27 +; POSTPROCESS-NEXT: [[DOTFCA_28_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 28 +; POSTPROCESS-NEXT: [[DOTFCA_29_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 29 +; POSTPROCESS-NEXT: [[TMP17:%.*]] = extractvalue { [[STRUCT_ANYHITTRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP1]], 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT16:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP17]], 0, 0, 0, 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_0_EXTRACT18:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP17]], 0, 1, 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_1_EXTRACT20:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP17]], 0, 1, 1 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_2_EXTRACT22:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP17]], 0, 1, 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_1_3_EXTRACT24:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP17]], 0, 1, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_2_EXTRACT26:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP17]], 0, 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_3_EXTRACT28:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP17]], 0, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_4_EXTRACT30:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP17]], 0, 4 +; POSTPROCESS-NEXT: [[DOTFCA_0_5_EXTRACT32:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP17]], 0, 5 +; POSTPROCESS-NEXT: [[DOTFCA_1_0_EXTRACT34:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP17]], 1, 0 +; POSTPROCESS-NEXT: [[DOTFCA_1_1_EXTRACT36:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP17]], 1, 1 +; POSTPROCESS-NEXT: [[DOTFCA_1_2_EXTRACT38:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP17]], 1, 2 +; POSTPROCESS-NEXT: [[DOTFCA_1_3_EXTRACT40:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP17]], 1, 3 ; POSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; POSTPROCESS-NEXT: [[ISEND_I1:%.*]] = call i1 @opaqueIsEnd() ; POSTPROCESS-NEXT: br i1 [[ISEND_I1]], label [[TMP3:%.*]], label [[TMP9:%.*]] -; POSTPROCESS: 4: +; POSTPROCESS: 6: ; POSTPROCESS-NEXT: [[TMP4:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(21) ; POSTPROCESS-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP4]], i32 0 ; POSTPROCESS-NEXT: [[RETURNADDR_RELOAD2:%.*]] = load i64, ptr addrspace(21) [[TMP5]], align 4 @@ -4261,13 +3197,43 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; POSTPROCESS-NEXT: [[DOTFCA_1_1_INSERT74:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT71]], <3 x float> [[DOTFCA_1_1_EXTRACT36]], 1, 1 ; POSTPROCESS-NEXT: [[DOTFCA_1_2_INSERT77:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT74]], float [[DOTFCA_1_2_EXTRACT38]], 1, 2 ; POSTPROCESS-NEXT: [[DOTFCA_1_3_INSERT80:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT77]], i32 [[DOTFCA_1_3_EXTRACT40]], 1, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT125:%.*]] = insertvalue [30 x i32] poison, i32 [[DOTFCA_0_EXTRACT]], 0 +; POSTPROCESS-NEXT: [[DOTFCA_1_INSERT128:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT125]], i32 [[DOTFCA_1_EXTRACT]], 1 +; POSTPROCESS-NEXT: [[DOTFCA_2_INSERT131:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT128]], i32 [[DOTFCA_2_EXTRACT]], 2 +; POSTPROCESS-NEXT: [[DOTFCA_3_INSERT134:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT131]], i32 [[DOTFCA_3_EXTRACT]], 3 +; POSTPROCESS-NEXT: [[DOTFCA_4_INSERT137:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT134]], i32 [[DOTFCA_4_EXTRACT]], 4 +; POSTPROCESS-NEXT: [[DOTFCA_5_INSERT140:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT137]], i32 [[DOTFCA_5_EXTRACT]], 5 +; POSTPROCESS-NEXT: [[DOTFCA_6_INSERT143:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT140]], i32 [[DOTFCA_6_EXTRACT]], 6 +; POSTPROCESS-NEXT: [[DOTFCA_7_INSERT146:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT143]], i32 [[DOTFCA_7_EXTRACT]], 7 +; POSTPROCESS-NEXT: [[DOTFCA_8_INSERT149:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT146]], i32 [[DOTFCA_8_EXTRACT]], 8 +; POSTPROCESS-NEXT: [[DOTFCA_9_INSERT152:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT149]], i32 [[DOTFCA_9_EXTRACT]], 9 +; POSTPROCESS-NEXT: [[DOTFCA_10_INSERT155:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT152]], i32 [[DOTFCA_10_EXTRACT]], 10 +; POSTPROCESS-NEXT: [[DOTFCA_11_INSERT158:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT155]], i32 [[DOTFCA_11_EXTRACT]], 11 +; POSTPROCESS-NEXT: [[DOTFCA_12_INSERT161:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT158]], i32 [[DOTFCA_12_EXTRACT]], 12 +; POSTPROCESS-NEXT: [[DOTFCA_13_INSERT164:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT161]], i32 [[DOTFCA_13_EXTRACT]], 13 +; POSTPROCESS-NEXT: [[DOTFCA_14_INSERT167:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT164]], i32 [[DOTFCA_14_EXTRACT]], 14 +; POSTPROCESS-NEXT: [[DOTFCA_15_INSERT170:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT167]], i32 [[DOTFCA_15_EXTRACT]], 15 +; POSTPROCESS-NEXT: [[DOTFCA_16_INSERT173:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT170]], i32 [[DOTFCA_16_EXTRACT]], 16 +; POSTPROCESS-NEXT: [[DOTFCA_17_INSERT176:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT173]], i32 [[DOTFCA_17_EXTRACT]], 17 +; POSTPROCESS-NEXT: [[DOTFCA_18_INSERT179:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT176]], i32 [[DOTFCA_18_EXTRACT]], 18 +; POSTPROCESS-NEXT: [[DOTFCA_19_INSERT182:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT179]], i32 [[DOTFCA_19_EXTRACT]], 19 +; POSTPROCESS-NEXT: [[DOTFCA_20_INSERT185:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT182]], i32 [[DOTFCA_20_EXTRACT]], 20 +; POSTPROCESS-NEXT: [[DOTFCA_21_INSERT188:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT185]], i32 [[DOTFCA_21_EXTRACT]], 21 +; POSTPROCESS-NEXT: [[DOTFCA_22_INSERT191:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT188]], i32 [[DOTFCA_22_EXTRACT]], 22 +; POSTPROCESS-NEXT: [[DOTFCA_23_INSERT194:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT191]], i32 [[DOTFCA_23_EXTRACT]], 23 +; POSTPROCESS-NEXT: [[DOTFCA_24_INSERT197:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT194]], i32 [[DOTFCA_24_EXTRACT]], 24 +; POSTPROCESS-NEXT: [[DOTFCA_25_INSERT200:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT197]], i32 [[DOTFCA_25_EXTRACT]], 25 +; POSTPROCESS-NEXT: [[DOTFCA_26_INSERT203:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT200]], i32 [[DOTFCA_26_EXTRACT]], 26 +; POSTPROCESS-NEXT: [[DOTFCA_27_INSERT206:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT203]], i32 [[DOTFCA_27_EXTRACT]], 27 +; POSTPROCESS-NEXT: [[DOTFCA_28_INSERT209:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT206]], i32 [[DOTFCA_28_EXTRACT]], 28 +; POSTPROCESS-NEXT: [[DOTFCA_29_INSERT212:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT209]], i32 [[DOTFCA_29_EXTRACT]], 29 ; POSTPROCESS-NEXT: [[TMP6:%.*]] = load i32, ptr [[CSP]], align 4 ; POSTPROCESS-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], -8 ; POSTPROCESS-NEXT: store i32 [[TMP7]], ptr [[CSP]], align 4 ; POSTPROCESS-NEXT: [[TMP8:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD2]], i32 [[TMP8]], i64 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT80]]), !continuation.registercount [[META32]] +; POSTPROCESS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR_RELOAD2]], i32 [[TMP8]], i64 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT80]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT212]]) ; POSTPROCESS-NEXT: unreachable -; POSTPROCESS: 10: +; POSTPROCESS: 12: ; POSTPROCESS-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(21) ; POSTPROCESS-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP10]], i32 0 ; POSTPROCESS-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(21) [[TMP11]], align 4 @@ -4284,51 +3250,1650 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; POSTPROCESS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_EXTRACT36]], 1, 1 ; POSTPROCESS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT38]], 1, 2 ; POSTPROCESS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT40]], 1, 3 +; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [30 x i32] poison, i32 [[DOTFCA_0_EXTRACT]], 0 +; POSTPROCESS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT]], i32 [[DOTFCA_1_EXTRACT]], 1 +; POSTPROCESS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT]], i32 [[DOTFCA_2_EXTRACT]], 2 +; POSTPROCESS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT]], i32 [[DOTFCA_3_EXTRACT]], 3 +; POSTPROCESS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT]], i32 [[DOTFCA_4_EXTRACT]], 4 +; POSTPROCESS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT]], i32 [[DOTFCA_5_EXTRACT]], 5 +; POSTPROCESS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT]], i32 [[DOTFCA_6_EXTRACT]], 6 +; POSTPROCESS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT]], i32 [[DOTFCA_7_EXTRACT]], 7 +; POSTPROCESS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT]], i32 [[DOTFCA_8_EXTRACT]], 8 +; POSTPROCESS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT]], i32 [[DOTFCA_9_EXTRACT]], 9 +; POSTPROCESS-NEXT: [[DOTFCA_10_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT]], i32 [[DOTFCA_10_EXTRACT]], 10 +; POSTPROCESS-NEXT: [[DOTFCA_11_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT]], i32 [[DOTFCA_11_EXTRACT]], 11 +; POSTPROCESS-NEXT: [[DOTFCA_12_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT]], i32 [[DOTFCA_12_EXTRACT]], 12 +; POSTPROCESS-NEXT: [[DOTFCA_13_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT]], i32 [[DOTFCA_13_EXTRACT]], 13 +; POSTPROCESS-NEXT: [[DOTFCA_14_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT]], i32 [[DOTFCA_14_EXTRACT]], 14 +; POSTPROCESS-NEXT: [[DOTFCA_15_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT]], i32 [[DOTFCA_15_EXTRACT]], 15 +; POSTPROCESS-NEXT: [[DOTFCA_16_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT]], i32 [[DOTFCA_16_EXTRACT]], 16 +; POSTPROCESS-NEXT: [[DOTFCA_17_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT]], i32 [[DOTFCA_17_EXTRACT]], 17 +; POSTPROCESS-NEXT: [[DOTFCA_18_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT]], i32 [[DOTFCA_18_EXTRACT]], 18 +; POSTPROCESS-NEXT: [[DOTFCA_19_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT]], i32 [[DOTFCA_19_EXTRACT]], 19 +; POSTPROCESS-NEXT: [[DOTFCA_20_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT]], i32 [[DOTFCA_20_EXTRACT]], 20 +; POSTPROCESS-NEXT: [[DOTFCA_21_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT]], i32 [[DOTFCA_21_EXTRACT]], 21 +; POSTPROCESS-NEXT: [[DOTFCA_22_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT]], i32 [[DOTFCA_22_EXTRACT]], 22 +; POSTPROCESS-NEXT: [[DOTFCA_23_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT]], i32 [[DOTFCA_23_EXTRACT]], 23 +; POSTPROCESS-NEXT: [[DOTFCA_24_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT]], i32 [[DOTFCA_24_EXTRACT]], 24 +; POSTPROCESS-NEXT: [[DOTFCA_25_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT]], i32 [[DOTFCA_25_EXTRACT]], 25 +; POSTPROCESS-NEXT: [[DOTFCA_26_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT]], i32 [[DOTFCA_26_EXTRACT]], 26 +; POSTPROCESS-NEXT: [[DOTFCA_27_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT]], i32 [[DOTFCA_27_EXTRACT]], 27 +; POSTPROCESS-NEXT: [[DOTFCA_28_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT]], i32 [[DOTFCA_28_EXTRACT]], 28 +; POSTPROCESS-NEXT: [[DOTFCA_29_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT]], i32 [[DOTFCA_29_EXTRACT]], 29 ; POSTPROCESS-NEXT: [[TMP12:%.*]] = load i32, ptr [[CSP]], align 4 ; POSTPROCESS-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], -8 ; POSTPROCESS-NEXT: store i32 [[TMP13]], ptr [[CSP]], align 4 ; POSTPROCESS-NEXT: [[TMP14:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP14]], i64 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]]), !continuation.registercount [[META32]] +; POSTPROCESS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP14]], i64 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]) ; POSTPROCESS-NEXT: unreachable ; ; ; POSTPROCESS-LABEL: define void @MyMissShader( -; POSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META44:![0-9]+]] !continuation.registercount [[META33]] !continuation [[META45:![0-9]+]] !continuation.state [[META22]] { +; POSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]], [33 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META44:![0-9]+]] !continuation [[META45:![0-9]+]] { ; POSTPROCESS-NEXT: AllocaSpillBB: ; POSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; POSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 0 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 1 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 2 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 3 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 4 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 5 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 6 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 7 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 8 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 9 ; POSTPROCESS-NEXT: [[DOTFCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 0, 0 -; POSTPROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POSTPROCESS-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float -; POSTPROCESS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0 -; POSTPROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 -; POSTPROCESS-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to float -; POSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP4]], i32 1 -; POSTPROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 -; POSTPROCESS-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP5]] to float -; POSTPROCESS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP6]], i32 2 -; POSTPROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 -; POSTPROCESS-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP7]] to float -; POSTPROCESS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP8]], i32 3 +; POSTPROCESS-NEXT: [[TMP1:%.*]] = bitcast i32 [[PAYLOAD_FCA_0_EXTRACT]] to float +; POSTPROCESS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 +; POSTPROCESS-NEXT: [[TMP2:%.*]] = bitcast i32 [[PAYLOAD_FCA_7_EXTRACT]] to float +; POSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP2]], i32 1 +; POSTPROCESS-NEXT: [[TMP3:%.*]] = bitcast i32 [[PAYLOAD_FCA_8_EXTRACT]] to float +; POSTPROCESS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP3]], i32 2 +; POSTPROCESS-NEXT: [[TMP4:%.*]] = bitcast i32 [[PAYLOAD_FCA_9_EXTRACT]] to float +; POSTPROCESS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP4]], i32 3 ; POSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; POSTPROCESS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 0 -; POSTPROCESS-NEXT: [[TMP9:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 -; POSTPROCESS-NEXT: store i32 [[TMP9]], ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-NEXT: [[TMP5:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 ; POSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 1 -; POSTPROCESS-NEXT: [[TMP10:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 -; POSTPROCESS-NEXT: store i32 [[TMP10]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; POSTPROCESS-NEXT: [[TMP6:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 ; POSTPROCESS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 2 -; POSTPROCESS-NEXT: [[TMP11:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 -; POSTPROCESS-NEXT: store i32 [[TMP11]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; POSTPROCESS-NEXT: [[TMP7:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 ; POSTPROCESS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 3 ; POSTPROCESS-NEXT: [[TMP12:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 -; POSTPROCESS-NEXT: store i32 [[TMP12]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 ; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison, <3 x i32> [[DOTFCA_0_0_EXTRACT]], 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT1:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP5]], 0 +; POSTPROCESS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT1]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; POSTPROCESS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; POSTPROCESS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; POSTPROCESS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; POSTPROCESS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; POSTPROCESS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; POSTPROCESS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT]], i32 [[TMP6]], 7 +; POSTPROCESS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT]], i32 [[TMP7]], 8 +; POSTPROCESS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT]], i32 [[TMP12]], 9 ; POSTPROCESS-NEXT: [[TMP13:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP13]], i64 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META33]] +; POSTPROCESS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR]], i32 [[TMP13]], i64 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]], [33 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]) ; POSTPROCESS-NEXT: unreachable ; ; +; CLEANUP-CPS-LABEL: define i32 @_cont_GetContinuationStackAddr( +; CLEANUP-CPS-SAME: ) #[[ATTR0:[0-9]+]] { +; CLEANUP-CPS-NEXT: ret i32 0 +; +; +; CLEANUP-CPS-LABEL: define %struct.HitData @_cont_GetCandidateState( +; CLEANUP-CPS-SAME: ptr [[DATA:%.*]]) #[[ATTR0]] { +; CLEANUP-CPS-NEXT: [[RESPTR:%.*]] = getelementptr [[STRUCT_ANYHITTRAVERSALDATA:%.*]], ptr [[DATA]], i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[RES:%.*]] = load [[STRUCT_HITDATA:%.*]], ptr [[RESPTR]], align 4 +; CLEANUP-CPS-NEXT: ret [[STRUCT_HITDATA]] [[RES]] +; +; +; CLEANUP-CPS-LABEL: define void @_cont_SetTriangleHitAttributes( +; CLEANUP-CPS-SAME: ptr [[DATA:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[VAL:%.*]]) { +; CLEANUP-CPS-NEXT: [[ADDR:%.*]] = getelementptr [[STRUCT_SYSTEMDATA:%.*]], ptr [[DATA]], i32 0, i32 0 +; CLEANUP-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[VAL]], ptr [[ADDR]], align 4 +; CLEANUP-CPS-NEXT: ret void +; +; +; CLEANUP-CPS-LABEL: define i32 @_cont_GetLocalRootIndex( +; CLEANUP-CPS-SAME: ptr [[DATA:%.*]]) { +; CLEANUP-CPS-NEXT: ret i32 5 +; +; +; CLEANUP-CPS-LABEL: define i1 @_cont_IsEndSearch( +; CLEANUP-CPS-SAME: ptr [[TMP0:%.*]]) #[[ATTR0]] { +; CLEANUP-CPS-NEXT: [[ISEND:%.*]] = call i1 @opaqueIsEnd() +; CLEANUP-CPS-NEXT: ret i1 [[ISEND]] +; +; +; CLEANUP-CPS-LABEL: define <3 x i32> @_cont_DispatchRaysIndex3( +; CLEANUP-CPS-SAME: ptr [[DATA:%.*]]) { +; CLEANUP-CPS-NEXT: [[RESPTR_1:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[DATA]], i32 0, i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[RES_1:%.*]] = load i32, ptr [[RESPTR_1]], align 4 +; CLEANUP-CPS-NEXT: [[RESPTR_2:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[DATA]], i32 0, i32 0, i32 1 +; CLEANUP-CPS-NEXT: [[RES_2:%.*]] = load i32, ptr [[RESPTR_2]], align 4 +; CLEANUP-CPS-NEXT: [[RESPTR_3:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[DATA]], i32 0, i32 0, i32 2 +; CLEANUP-CPS-NEXT: [[RES_3:%.*]] = load i32, ptr [[RESPTR_3]], align 4 +; CLEANUP-CPS-NEXT: [[VAL_0:%.*]] = insertelement <3 x i32> undef, i32 [[RES_1]], i32 0 +; CLEANUP-CPS-NEXT: [[VAL_1:%.*]] = insertelement <3 x i32> [[VAL_0]], i32 [[RES_2]], i32 1 +; CLEANUP-CPS-NEXT: [[VAL_2:%.*]] = insertelement <3 x i32> [[VAL_1]], i32 [[RES_3]], i32 2 +; CLEANUP-CPS-NEXT: ret <3 x i32> [[VAL_2]] +; +; +; CLEANUP-CPS-LABEL: define <3 x float> @_cont_ObjectRayOrigin3( +; CLEANUP-CPS-SAME: ptr nocapture readnone [[DATA:%.*]], ptr [[HITDATA:%.*]]) { +; CLEANUP-CPS-NEXT: [[RESPTR_1:%.*]] = getelementptr [[STRUCT_HITDATA:%.*]], ptr [[HITDATA]], i32 0, i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[RES_1:%.*]] = load float, ptr [[RESPTR_1]], align 4 +; CLEANUP-CPS-NEXT: [[RESPTR_2:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[HITDATA]], i32 0, i32 0, i32 1 +; CLEANUP-CPS-NEXT: [[RES_2:%.*]] = load float, ptr [[RESPTR_2]], align 4 +; CLEANUP-CPS-NEXT: [[RESPTR_3:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[HITDATA]], i32 0, i32 0, i32 2 +; CLEANUP-CPS-NEXT: [[RES_3:%.*]] = load float, ptr [[RESPTR_3]], align 4 +; CLEANUP-CPS-NEXT: [[VAL_0:%.*]] = insertelement <3 x float> undef, float [[RES_1]], i32 0 +; CLEANUP-CPS-NEXT: [[VAL_1:%.*]] = insertelement <3 x float> [[VAL_0]], float [[RES_2]], i32 1 +; CLEANUP-CPS-NEXT: [[VAL_2:%.*]] = insertelement <3 x float> [[VAL_1]], float [[RES_3]], i32 2 +; CLEANUP-CPS-NEXT: ret <3 x float> [[VAL_2]] +; +; +; CLEANUP-CPS-LABEL: define <3 x float> @_cont_ObjectRayDirection3( +; CLEANUP-CPS-SAME: ptr nocapture readnone [[DATA:%.*]], ptr [[HITDATA:%.*]]) { +; CLEANUP-CPS-NEXT: [[RESPTR_1:%.*]] = getelementptr [[STRUCT_HITDATA:%.*]], ptr [[HITDATA]], i32 0, i32 1, i32 0 +; CLEANUP-CPS-NEXT: [[RES_1:%.*]] = load float, ptr [[RESPTR_1]], align 4 +; CLEANUP-CPS-NEXT: [[RESPTR_2:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[HITDATA]], i32 0, i32 1, i32 1 +; CLEANUP-CPS-NEXT: [[RES_2:%.*]] = load float, ptr [[RESPTR_2]], align 4 +; CLEANUP-CPS-NEXT: [[RESPTR_3:%.*]] = getelementptr [[STRUCT_HITDATA]], ptr [[HITDATA]], i32 0, i32 1, i32 2 +; CLEANUP-CPS-NEXT: [[RES_3:%.*]] = load float, ptr [[RESPTR_3]], align 4 +; CLEANUP-CPS-NEXT: [[VAL_0:%.*]] = insertelement <3 x float> undef, float [[RES_1]], i32 0 +; CLEANUP-CPS-NEXT: [[VAL_1:%.*]] = insertelement <3 x float> [[VAL_0]], float [[RES_2]], i32 1 +; CLEANUP-CPS-NEXT: [[VAL_2:%.*]] = insertelement <3 x float> [[VAL_1]], float [[RES_3]], i32 2 +; CLEANUP-CPS-NEXT: ret <3 x float> [[VAL_2]] +; +; +; CLEANUP-CPS-LABEL: define float @_cont_RayTCurrent( +; CLEANUP-CPS-SAME: ptr nocapture readnone [[DATA:%.*]], ptr [[HITDATA:%.*]]) { +; CLEANUP-CPS-NEXT: [[RESPTR:%.*]] = getelementptr [[STRUCT_HITDATA:%.*]], ptr [[HITDATA]], i32 0, i32 2 +; CLEANUP-CPS-NEXT: [[RES:%.*]] = load float, ptr [[RESPTR]], align 4 +; CLEANUP-CPS-NEXT: ret float [[RES]] +; +; +; CLEANUP-CPS-LABEL: define void @MyRayGen( +; CLEANUP-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURNADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] !lgc.rt.shaderstage [[META22:![0-9]+]] !lgc.cps [[META35:![0-9]+]] !continuation [[META36:![0-9]+]] { +; CLEANUP-CPS-NEXT: AllocaSpillBB: +; CLEANUP-CPS-NEXT: [[DOTFCA_0_EXTRACT20:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 +; CLEANUP-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) +; CLEANUP-CPS-NEXT: [[TMP1:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 +; CLEANUP-CPS-NEXT: [[TMP2:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 +; CLEANUP-CPS-NEXT: [[TMP3:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP1]]) +; CLEANUP-CPS-NEXT: [[TMP4:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP3]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; CLEANUP-CPS-NEXT: [[TMP5:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP4]]) +; CLEANUP-CPS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT20]], 0 +; CLEANUP-CPS-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]], 0 +; CLEANUP-CPS-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 +; CLEANUP-CPS-NEXT: [[TMP6:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @MyRayGen.resume.0) +; CLEANUP-CPS-NEXT: [[TRAV_DATA2_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], i64 [[TMP6]], 5 +; CLEANUP-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 0 +; CLEANUP-CPS-NEXT: [[TMP7:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; CLEANUP-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 1 +; CLEANUP-CPS-NEXT: [[TMP8:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; CLEANUP-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 2 +; CLEANUP-CPS-NEXT: [[TMP9:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 +; CLEANUP-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 3 +; CLEANUP-CPS-NEXT: [[TMP10:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP7]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT]], i32 undef, 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT]], i32 undef, 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT]], i32 undef, 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT]], i32 undef, 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT]], i32 undef, 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT]], i32 undef, 6 +; CLEANUP-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT]], i32 [[TMP8]], 7 +; CLEANUP-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT]], i32 [[TMP9]], 8 +; CLEANUP-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT]], i32 [[TMP10]], 9 +; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 4, i32 8, {} poison, i64 [[TMP6]], i32 5, [36 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]), !continuation.returnedRegistercount [[META33:![0-9]+]], !continuation.registercount [[META33]] +; CLEANUP-CPS-NEXT: unreachable +; +; +; CLEANUP-CPS-LABEL: define dso_local void @MyRayGen.resume.0( +; CLEANUP-CPS-SAME: {} [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], { [[STRUCT_DISPATCHSYSTEMDATA:%.*]], [33 x i32], [10 x i32] } [[TMP3:%.*]]) !lgc.rt.shaderstage [[META22]] !lgc.cps [[META35]] !continuation [[META36]] { +; CLEANUP-CPS-NEXT: entryresume.0: +; CLEANUP-CPS-NEXT: [[TMP4:%.*]] = alloca { [[STRUCT_DISPATCHSYSTEMDATA]], [33 x i32], [10 x i32] }, align 8 +; CLEANUP-CPS-NEXT: store { [[STRUCT_DISPATCHSYSTEMDATA]], [33 x i32], [10 x i32] } [[TMP3]], ptr [[TMP4]], align 4 +; CLEANUP-CPS-NEXT: [[TMP5:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [33 x i32], [10 x i32] } [[TMP3]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_6_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 6 +; CLEANUP-CPS-NEXT: [[DOTFCA_7_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 7 +; CLEANUP-CPS-NEXT: [[DOTFCA_8_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 8 +; CLEANUP-CPS-NEXT: [[DOTFCA_9_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 9 +; CLEANUP-CPS-NEXT: [[TMP6:%.*]] = bitcast i32 [[DOTFCA_0_EXTRACT]] to float +; CLEANUP-CPS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> poison, float [[TMP6]], i32 0 +; CLEANUP-CPS-NEXT: [[TMP7:%.*]] = bitcast i32 [[DOTFCA_7_EXTRACT]] to float +; CLEANUP-CPS-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP7]], i32 1 +; CLEANUP-CPS-NEXT: [[TMP8:%.*]] = bitcast i32 [[DOTFCA_8_EXTRACT]] to float +; CLEANUP-CPS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP8]], i32 2 +; CLEANUP-CPS-NEXT: [[TMP9:%.*]] = bitcast i32 [[DOTFCA_9_EXTRACT]] to float +; CLEANUP-CPS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP9]], i32 3 +; CLEANUP-CPS-NEXT: [[TMP10:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [33 x i32], [10 x i32] } [[TMP3]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_EXTRACT21:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP10]], 0 +; CLEANUP-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) +; CLEANUP-CPS-NEXT: [[TMP11:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 +; CLEANUP-CPS-NEXT: [[TMP12:%.*]] = getelementptr inbounds { [[STRUCT_DISPATCHSYSTEMDATA]], [33 x i32], [10 x i32] }, ptr [[TMP4]], i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[RES_1_I1:%.*]] = load i32, ptr [[TMP12]], align 4 +; CLEANUP-CPS-NEXT: [[RESPTR_2_I2:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP12]], i32 0, i32 0, i32 1 +; CLEANUP-CPS-NEXT: [[RES_2_I3:%.*]] = load i32, ptr [[RESPTR_2_I2]], align 4 +; CLEANUP-CPS-NEXT: [[RESPTR_3_I4:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP12]], i32 0, i32 0, i32 2 +; CLEANUP-CPS-NEXT: [[RES_3_I5:%.*]] = load i32, ptr [[RESPTR_3_I4]], align 4 +; CLEANUP-CPS-NEXT: [[VAL_0_I6:%.*]] = insertelement <3 x i32> undef, i32 [[RES_1_I1]], i32 0 +; CLEANUP-CPS-NEXT: [[VAL_1_I7:%.*]] = insertelement <3 x i32> [[VAL_0_I6]], i32 [[RES_2_I3]], i32 1 +; CLEANUP-CPS-NEXT: [[VAL_2_I8:%.*]] = insertelement <3 x i32> [[VAL_1_I7]], i32 [[RES_3_I5]], i32 2 +; CLEANUP-CPS-NEXT: [[EXTRACT:%.*]] = extractelement <3 x i32> [[VAL_2_I8]], i8 0 +; CLEANUP-CPS-NEXT: [[TMP13:%.*]] = getelementptr inbounds { [[STRUCT_DISPATCHSYSTEMDATA]], [33 x i32], [10 x i32] }, ptr [[TMP4]], i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[RES_1_I:%.*]] = load i32, ptr [[TMP13]], align 4 +; CLEANUP-CPS-NEXT: [[RESPTR_2_I:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP13]], i32 0, i32 0, i32 1 +; CLEANUP-CPS-NEXT: [[RES_2_I:%.*]] = load i32, ptr [[RESPTR_2_I]], align 4 +; CLEANUP-CPS-NEXT: [[RESPTR_3_I:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP13]], i32 0, i32 0, i32 2 +; CLEANUP-CPS-NEXT: [[RES_3_I:%.*]] = load i32, ptr [[RESPTR_3_I]], align 4 +; CLEANUP-CPS-NEXT: [[VAL_0_I:%.*]] = insertelement <3 x i32> undef, i32 [[RES_1_I]], i32 0 +; CLEANUP-CPS-NEXT: [[VAL_1_I:%.*]] = insertelement <3 x i32> [[VAL_0_I]], i32 [[RES_2_I]], i32 1 +; CLEANUP-CPS-NEXT: [[VAL_2_I:%.*]] = insertelement <3 x i32> [[VAL_1_I]], i32 [[RES_3_I]], i32 2 +; CLEANUP-CPS-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x i32> [[VAL_2_I]], i8 1 +; CLEANUP-CPS-NEXT: [[TMP14:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP11]]) +; CLEANUP-CPS-NEXT: [[TMP15:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP14]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 4098, i32 1033 }) +; CLEANUP-CPS-NEXT: [[TMP16:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 0 +; CLEANUP-CPS-NEXT: [[TMP17:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 1 +; CLEANUP-CPS-NEXT: [[TMP18:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 2 +; CLEANUP-CPS-NEXT: [[TMP19:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 3 +; CLEANUP-CPS-NEXT: call void @dx.op.textureStore.f32(i32 67, [[DX_TYPES_HANDLE]] [[TMP15]], i32 [[EXTRACT]], i32 [[EXTRACT1]], i32 undef, float [[TMP16]], float [[TMP17]], float [[TMP18]], float [[TMP19]], i8 15) +; CLEANUP-CPS-NEXT: ret void +; +; +; CLEANUP-CPS-LABEL: define void @MyClosestHitShader( +; CLEANUP-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURNADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [33 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META37:![0-9]+]] !lgc.cps [[META38:![0-9]+]] !continuation [[META39:![0-9]+]] { +; CLEANUP-CPS-NEXT: AllocaSpillBB: +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 0 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 1 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 2 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 3 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 4 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 5 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 6 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 7 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 8 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 9 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[SYSTEM_DATA]], 0, 0 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_0_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; CLEANUP-CPS-NEXT: store <3 x i32> [[SYSTEM_DATA_FCA_0_0_EXTRACT]], ptr [[SYSTEM_DATA_FCA_0_0_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[TMP0:%.*]] = bitcast i32 [[PAYLOAD_FCA_0_EXTRACT]] to float +; CLEANUP-CPS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> undef, float [[TMP0]], i32 0 +; CLEANUP-CPS-NEXT: [[TMP1:%.*]] = bitcast i32 [[PAYLOAD_FCA_7_EXTRACT]] to float +; CLEANUP-CPS-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP1]], i32 1 +; CLEANUP-CPS-NEXT: [[TMP2:%.*]] = bitcast i32 [[PAYLOAD_FCA_8_EXTRACT]] to float +; CLEANUP-CPS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP2]], i32 2 +; CLEANUP-CPS-NEXT: [[TMP3:%.*]] = bitcast i32 [[PAYLOAD_FCA_9_EXTRACT]] to float +; CLEANUP-CPS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP3]], i32 3 +; CLEANUP-CPS-NEXT: [[TMP4:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[SYSTEM_DATA_ALLOCA]]) +; CLEANUP-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP4]], 0 +; CLEANUP-CPS-NEXT: [[DOTSROA_012_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 +; CLEANUP-CPS-NEXT: [[TMP5:%.*]] = bitcast float [[DOTSROA_012_0_VEC_EXTRACT]] to i32 +; CLEANUP-CPS-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP5]] to float +; CLEANUP-CPS-NEXT: [[HITATTRS_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP6]], i32 0 +; CLEANUP-CPS-NEXT: [[DOTSROA_012_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 +; CLEANUP-CPS-NEXT: [[TMP7:%.*]] = bitcast float [[DOTSROA_012_4_VEC_EXTRACT]] to i32 +; CLEANUP-CPS-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP7]] to float +; CLEANUP-CPS-NEXT: [[HITATTRS_SROA_0_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[HITATTRS_SROA_0_0_VEC_INSERT]], float [[TMP8]], i32 1 +; CLEANUP-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) +; CLEANUP-CPS-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[HITATTRS_SROA_0_4_VEC_INSERT]], i32 0 +; CLEANUP-CPS-NEXT: [[TMP10:%.*]] = fsub fast float 1.000000e+00, [[TMP9]] +; CLEANUP-CPS-NEXT: [[TMP11:%.*]] = extractelement <2 x float> [[HITATTRS_SROA_0_4_VEC_INSERT]], i32 1 +; CLEANUP-CPS-NEXT: [[TMP12:%.*]] = fsub fast float [[TMP10]], [[TMP11]] +; CLEANUP-CPS-NEXT: [[TMP13:%.*]] = insertelement <4 x float> undef, float [[TMP12]], i64 0 +; CLEANUP-CPS-NEXT: [[TMP14:%.*]] = insertelement <4 x float> [[TMP13]], float [[TMP9]], i64 1 +; CLEANUP-CPS-NEXT: [[TMP15:%.*]] = insertelement <4 x float> [[TMP14]], float [[TMP11]], i64 2 +; CLEANUP-CPS-NEXT: [[TMP16:%.*]] = insertelement <4 x float> [[TMP15]], float 1.000000e+00, i64 3 +; CLEANUP-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP16]], i32 0 +; CLEANUP-CPS-NEXT: [[TMP17:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; CLEANUP-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP16]], i32 1 +; CLEANUP-CPS-NEXT: [[TMP18:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; CLEANUP-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP16]], i32 2 +; CLEANUP-CPS-NEXT: [[TMP19:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 +; CLEANUP-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP16]], i32 3 +; CLEANUP-CPS-NEXT: [[TMP20:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 +; CLEANUP-CPS-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP21]], i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_LOAD:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT10:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_LOAD]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP17]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; CLEANUP-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT]], i32 [[TMP18]], 7 +; CLEANUP-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT]], i32 [[TMP19]], 8 +; CLEANUP-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT]], i32 [[TMP20]], 9 +; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 6, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT10]], [33 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]), !continuation.registercount [[META33]] +; CLEANUP-CPS-NEXT: unreachable +; +; +; CLEANUP-CPS-LABEL: define void @MyAnyHitShader( +; CLEANUP-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURNADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[SYSTEM_DATA:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[HIT_ATTRS:%.*]], [6 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META38]] !lgc.cps [[META40:![0-9]+]] !continuation [[META41:![0-9]+]] { +; CLEANUP-CPS-NEXT: AllocaSpillBB: +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITTRAVERSALDATA]], align 8 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 0 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 1 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 2 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 3 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 4 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 5 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 6 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 7 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 8 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 9 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_0_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 0, 0, 0 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_0_0_0_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 +; CLEANUP-CPS-NEXT: store <3 x i32> [[SYSTEM_DATA_FCA_0_0_0_0_EXTRACT]], ptr [[SYSTEM_DATA_FCA_0_0_0_0_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 1, 0 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; CLEANUP-CPS-NEXT: store <3 x float> [[SYSTEM_DATA_FCA_0_1_0_EXTRACT]], ptr [[SYSTEM_DATA_FCA_0_1_0_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 1, 1 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 +; CLEANUP-CPS-NEXT: store <3 x float> [[SYSTEM_DATA_FCA_0_1_1_EXTRACT]], ptr [[SYSTEM_DATA_FCA_0_1_1_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 1, 2 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 +; CLEANUP-CPS-NEXT: store float [[SYSTEM_DATA_FCA_0_1_2_EXTRACT]], ptr [[SYSTEM_DATA_FCA_0_1_2_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 1, 3 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 +; CLEANUP-CPS-NEXT: store i32 [[SYSTEM_DATA_FCA_0_1_3_EXTRACT]], ptr [[SYSTEM_DATA_FCA_0_1_3_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_0_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 2 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_0_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 +; CLEANUP-CPS-NEXT: store <3 x float> [[SYSTEM_DATA_FCA_0_2_EXTRACT]], ptr [[SYSTEM_DATA_FCA_0_2_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_0_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 3 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_0_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 +; CLEANUP-CPS-NEXT: store <3 x float> [[SYSTEM_DATA_FCA_0_3_EXTRACT]], ptr [[SYSTEM_DATA_FCA_0_3_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_0_4_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 4 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_0_4_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 +; CLEANUP-CPS-NEXT: store float [[SYSTEM_DATA_FCA_0_4_EXTRACT]], ptr [[SYSTEM_DATA_FCA_0_4_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_0_5_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 5 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_0_5_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 +; CLEANUP-CPS-NEXT: store i64 [[SYSTEM_DATA_FCA_0_5_EXTRACT]], ptr [[SYSTEM_DATA_FCA_0_5_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 1, 0 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_1_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; CLEANUP-CPS-NEXT: store <3 x float> [[SYSTEM_DATA_FCA_1_0_EXTRACT]], ptr [[SYSTEM_DATA_FCA_1_0_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 1, 1 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_1_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; CLEANUP-CPS-NEXT: store <3 x float> [[SYSTEM_DATA_FCA_1_1_EXTRACT]], ptr [[SYSTEM_DATA_FCA_1_1_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 1, 2 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_1_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 +; CLEANUP-CPS-NEXT: store float [[SYSTEM_DATA_FCA_1_2_EXTRACT]], ptr [[SYSTEM_DATA_FCA_1_2_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 1, 3 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_1_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 +; CLEANUP-CPS-NEXT: store i32 [[SYSTEM_DATA_FCA_1_3_EXTRACT]], ptr [[SYSTEM_DATA_FCA_1_3_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[TMP0:%.*]] = bitcast i32 [[PAYLOAD_FCA_0_EXTRACT]] to float +; CLEANUP-CPS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> undef, float [[TMP0]], i32 0 +; CLEANUP-CPS-NEXT: [[TMP1:%.*]] = bitcast i32 [[PAYLOAD_FCA_7_EXTRACT]] to float +; CLEANUP-CPS-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP1]], i32 1 +; CLEANUP-CPS-NEXT: [[TMP2:%.*]] = bitcast i32 [[PAYLOAD_FCA_8_EXTRACT]] to float +; CLEANUP-CPS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP2]], i32 2 +; CLEANUP-CPS-NEXT: [[TMP3:%.*]] = bitcast i32 [[PAYLOAD_FCA_9_EXTRACT]] to float +; CLEANUP-CPS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP3]], i32 3 +; CLEANUP-CPS-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[TMP5:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP4]]) +; CLEANUP-CPS-NEXT: [[DOTFCA_0_EXTRACT388:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP5]], 0 +; CLEANUP-CPS-NEXT: [[DOTSROA_0390_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT388]], i32 0 +; CLEANUP-CPS-NEXT: [[TMP6:%.*]] = bitcast float [[DOTSROA_0390_0_VEC_EXTRACT]] to i32 +; CLEANUP-CPS-NEXT: [[DOTSROA_0390_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT388]], i32 1 +; CLEANUP-CPS-NEXT: [[TMP7:%.*]] = bitcast float [[DOTSROA_0390_4_VEC_EXTRACT]] to i32 +; CLEANUP-CPS-NEXT: [[HIT_ATTRS_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[HIT_ATTRS]], 0 +; CLEANUP-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) +; CLEANUP-CPS-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[RES_I1_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA:%.*]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[RES_I1_FCA_0_LOAD:%.*]] = load <3 x float>, ptr [[RES_I1_FCA_0_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[RES_I1_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] poison, <3 x float> [[RES_I1_FCA_0_LOAD]], 0 +; CLEANUP-CPS-NEXT: [[RES_I1_FCA_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1 +; CLEANUP-CPS-NEXT: [[RES_I1_FCA_1_LOAD:%.*]] = load <3 x float>, ptr [[RES_I1_FCA_1_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[RES_I1_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_0_INSERT]], <3 x float> [[RES_I1_FCA_1_LOAD]], 1 +; CLEANUP-CPS-NEXT: [[RES_I1_FCA_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 2 +; CLEANUP-CPS-NEXT: [[RES_I1_FCA_2_LOAD:%.*]] = load float, ptr [[RES_I1_FCA_2_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[RES_I1_FCA_2_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_1_INSERT]], float [[RES_I1_FCA_2_LOAD]], 2 +; CLEANUP-CPS-NEXT: [[RES_I1_FCA_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 3 +; CLEANUP-CPS-NEXT: [[RES_I1_FCA_3_LOAD:%.*]] = load i32, ptr [[RES_I1_FCA_3_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[RES_I1_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_2_INSERT]], i32 [[RES_I1_FCA_3_LOAD]], 3 +; CLEANUP-CPS-NEXT: [[RES_I1_FCA_3_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_3_INSERT]], 0 +; CLEANUP-CPS-NEXT: [[RES_I1_FCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_3_INSERT]], 1 +; CLEANUP-CPS-NEXT: [[RES_I1_FCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_3_INSERT]], 2 +; CLEANUP-CPS-NEXT: [[RES_I1_FCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I1_FCA_3_INSERT]], 3 +; CLEANUP-CPS-NEXT: [[DOTSROA_0412_0_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I1_FCA_3_INSERT_FCA_0_EXTRACT]], i32 0 +; CLEANUP-CPS-NEXT: [[DOTSROA_0412_4_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I1_FCA_3_INSERT_FCA_0_EXTRACT]], i32 1 +; CLEANUP-CPS-NEXT: [[DOTSROA_0412_8_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I1_FCA_3_INSERT_FCA_0_EXTRACT]], i32 2 +; CLEANUP-CPS-NEXT: [[VAL_0_I8:%.*]] = insertelement <3 x float> undef, float [[DOTSROA_0412_0_VEC_EXTRACT]], i32 0 +; CLEANUP-CPS-NEXT: [[VAL_1_I9:%.*]] = insertelement <3 x float> [[VAL_0_I8]], float [[DOTSROA_0412_4_VEC_EXTRACT]], i32 1 +; CLEANUP-CPS-NEXT: [[VAL_2_I10:%.*]] = insertelement <3 x float> [[VAL_1_I9]], float [[DOTSROA_0412_8_VEC_EXTRACT]], i32 2 +; CLEANUP-CPS-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x float> [[VAL_2_I10]], i8 0 +; CLEANUP-CPS-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[RES_I_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[RES_I_FCA_0_LOAD:%.*]] = load <3 x float>, ptr [[RES_I_FCA_0_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[RES_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] poison, <3 x float> [[RES_I_FCA_0_LOAD]], 0 +; CLEANUP-CPS-NEXT: [[RES_I_FCA_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1 +; CLEANUP-CPS-NEXT: [[RES_I_FCA_1_LOAD:%.*]] = load <3 x float>, ptr [[RES_I_FCA_1_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[RES_I_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_0_INSERT]], <3 x float> [[RES_I_FCA_1_LOAD]], 1 +; CLEANUP-CPS-NEXT: [[RES_I_FCA_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 2 +; CLEANUP-CPS-NEXT: [[RES_I_FCA_2_LOAD:%.*]] = load float, ptr [[RES_I_FCA_2_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[RES_I_FCA_2_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_1_INSERT]], float [[RES_I_FCA_2_LOAD]], 2 +; CLEANUP-CPS-NEXT: [[RES_I_FCA_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 3 +; CLEANUP-CPS-NEXT: [[RES_I_FCA_3_LOAD:%.*]] = load i32, ptr [[RES_I_FCA_3_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[RES_I_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_2_INSERT]], i32 [[RES_I_FCA_3_LOAD]], 3 +; CLEANUP-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 0 +; CLEANUP-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 1 +; CLEANUP-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 2 +; CLEANUP-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 3 +; CLEANUP-CPS-NEXT: [[DOTSROA_1_12_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I_FCA_3_INSERT_FCA_1_EXTRACT]], i32 0 +; CLEANUP-CPS-NEXT: [[DOTSROA_1_16_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I_FCA_3_INSERT_FCA_1_EXTRACT]], i32 1 +; CLEANUP-CPS-NEXT: [[DOTSROA_1_20_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[RES_I_FCA_3_INSERT_FCA_1_EXTRACT]], i32 2 +; CLEANUP-CPS-NEXT: [[VAL_0_I:%.*]] = insertelement <3 x float> undef, float [[DOTSROA_1_12_VEC_EXTRACT]], i32 0 +; CLEANUP-CPS-NEXT: [[VAL_1_I:%.*]] = insertelement <3 x float> [[VAL_0_I]], float [[DOTSROA_1_16_VEC_EXTRACT]], i32 1 +; CLEANUP-CPS-NEXT: [[VAL_2_I:%.*]] = insertelement <3 x float> [[VAL_1_I]], float [[DOTSROA_1_20_VEC_EXTRACT]], i32 2 +; CLEANUP-CPS-NEXT: [[EXTRACT:%.*]] = extractelement <3 x float> [[VAL_2_I]], i8 0 +; CLEANUP-CPS-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[RES_I11_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[RES_I11_FCA_0_LOAD:%.*]] = load <3 x float>, ptr [[RES_I11_FCA_0_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[RES_I11_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] poison, <3 x float> [[RES_I11_FCA_0_LOAD]], 0 +; CLEANUP-CPS-NEXT: [[RES_I11_FCA_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1 +; CLEANUP-CPS-NEXT: [[RES_I11_FCA_1_LOAD:%.*]] = load <3 x float>, ptr [[RES_I11_FCA_1_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[RES_I11_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I11_FCA_0_INSERT]], <3 x float> [[RES_I11_FCA_1_LOAD]], 1 +; CLEANUP-CPS-NEXT: [[RES_I11_FCA_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 2 +; CLEANUP-CPS-NEXT: [[RES_I11_FCA_2_LOAD:%.*]] = load float, ptr [[RES_I11_FCA_2_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[RES_I11_FCA_2_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I11_FCA_1_INSERT]], float [[RES_I11_FCA_2_LOAD]], 2 +; CLEANUP-CPS-NEXT: [[RES_I11_FCA_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 3 +; CLEANUP-CPS-NEXT: [[RES_I11_FCA_3_LOAD:%.*]] = load i32, ptr [[RES_I11_FCA_3_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[RES_I11_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I11_FCA_2_INSERT]], i32 [[RES_I11_FCA_3_LOAD]], 3 +; CLEANUP-CPS-NEXT: [[RES_I11_FCA_3_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I11_FCA_3_INSERT]], 0 +; CLEANUP-CPS-NEXT: [[RES_I11_FCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I11_FCA_3_INSERT]], 1 +; CLEANUP-CPS-NEXT: [[RES_I11_FCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I11_FCA_3_INSERT]], 2 +; CLEANUP-CPS-NEXT: [[RES_I11_FCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I11_FCA_3_INSERT]], 3 +; CLEANUP-CPS-NEXT: [[TMP11:%.*]] = fmul fast float [[RES_I11_FCA_3_INSERT_FCA_2_EXTRACT]], [[EXTRACT]] +; CLEANUP-CPS-NEXT: [[TMP12:%.*]] = fadd fast float [[TMP11]], [[EXTRACT1]] +; CLEANUP-CPS-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP12]], 0.000000e+00 +; CLEANUP-CPS-NEXT: [[TMP14:%.*]] = fcmp fast ogt float [[TMP12]], 1.000000e+00 +; CLEANUP-CPS-NEXT: [[TMP15:%.*]] = fcmp fast ogt float [[TMP12]], -1.000000e+00 +; CLEANUP-CPS-NEXT: br i1 [[TMP13]], label [[TMP16:%.*]], label [[TMP39:%.*]] +; CLEANUP-CPS: 16: +; CLEANUP-CPS-NEXT: br i1 [[TMP14]], label [[TMP17:%.*]], label [[TMP28:%.*]] +; CLEANUP-CPS: 17: +; CLEANUP-CPS-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; CLEANUP-CPS-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP18]]) +; CLEANUP-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 0 +; CLEANUP-CPS-NEXT: [[TMP19:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; CLEANUP-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 1 +; CLEANUP-CPS-NEXT: [[TMP20:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; CLEANUP-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 2 +; CLEANUP-CPS-NEXT: [[TMP21:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 +; CLEANUP-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 3 +; CLEANUP-CPS-NEXT: [[TMP22:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 +; CLEANUP-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[HIT_ATTRS_FCA_0_EXTRACT]], i32 0 +; CLEANUP-CPS-NEXT: [[TMP23:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT]] to i32 +; CLEANUP-CPS-NEXT: [[TMP24:%.*]] = bitcast i32 [[TMP23]] to float +; CLEANUP-CPS-NEXT: [[DOTSROA_0393_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP24]], i32 0 +; CLEANUP-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[HIT_ATTRS_FCA_0_EXTRACT]], i32 1 +; CLEANUP-CPS-NEXT: [[TMP25:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT]] to i32 +; CLEANUP-CPS-NEXT: [[TMP26:%.*]] = bitcast i32 [[TMP25]] to float +; CLEANUP-CPS-NEXT: [[DOTSROA_0393_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0393_0_VEC_INSERT]], float [[TMP26]], i32 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT392:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0393_4_VEC_INSERT]], 0 +; CLEANUP-CPS-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT392]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP27]], i32 0, i32 0 +; CLEANUP-CPS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT]], ptr [[DOTFCA_0_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_0_0_0_LOAD:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD]], 0, 0, 0, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_0_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_LOAD]], 0, 1, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_1_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_LOAD]], 0, 1, 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_2_LOAD:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_LOAD]], 0, 1, 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_3_LOAD:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_LOAD]], 0, 1, 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_2_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_LOAD]], 0, 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_3_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_LOAD]], 0, 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_4_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_4_LOAD:%.*]] = load float, ptr [[DOTFCA_0_4_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[DOTFCA_0_4_LOAD]], 0, 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_5_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_5_LOAD:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[DOTFCA_0_5_LOAD]], 0, 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_0_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_LOAD]], 1, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_1_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_LOAD]], 1, 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_2_LOAD:%.*]] = load float, ptr [[DOTFCA_1_2_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[DOTFCA_1_2_LOAD]], 1, 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_3_LOAD:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[DOTFCA_1_3_LOAD]], 1, 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP19]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; CLEANUP-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT]], i32 [[TMP20]], 7 +; CLEANUP-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT]], i32 [[TMP21]], 8 +; CLEANUP-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT]], i32 [[TMP22]], 9 +; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 40, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]], [8 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]), !continuation.registercount [[META33]] +; CLEANUP-CPS-NEXT: unreachable +; CLEANUP-CPS: 28: +; CLEANUP-CPS-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; CLEANUP-CPS-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP29]]) +; CLEANUP-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT26:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 0 +; CLEANUP-CPS-NEXT: [[TMP30:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT26]] to i32 +; CLEANUP-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT35:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 1 +; CLEANUP-CPS-NEXT: [[TMP31:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT35]] to i32 +; CLEANUP-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT44:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 2 +; CLEANUP-CPS-NEXT: [[TMP32:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT44]] to i32 +; CLEANUP-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT53:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 3 +; CLEANUP-CPS-NEXT: [[TMP33:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT53]] to i32 +; CLEANUP-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT14:%.*]] = extractelement <2 x float> [[HIT_ATTRS_FCA_0_EXTRACT]], i32 0 +; CLEANUP-CPS-NEXT: [[TMP34:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT14]] to i32 +; CLEANUP-CPS-NEXT: [[TMP35:%.*]] = bitcast i32 [[TMP34]] to float +; CLEANUP-CPS-NEXT: [[DOTSROA_0397_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP35]], i32 0 +; CLEANUP-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT18:%.*]] = extractelement <2 x float> [[HIT_ATTRS_FCA_0_EXTRACT]], i32 1 +; CLEANUP-CPS-NEXT: [[TMP36:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT18]] to i32 +; CLEANUP-CPS-NEXT: [[TMP37:%.*]] = bitcast i32 [[TMP36]] to float +; CLEANUP-CPS-NEXT: [[DOTSROA_0397_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0397_0_VEC_INSERT]], float [[TMP37]], i32 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT396:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0397_4_VEC_INSERT]], 0 +; CLEANUP-CPS-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_EXTRACT224:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT396]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_GEP225:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP38]], i32 0, i32 0 +; CLEANUP-CPS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT224]], ptr [[DOTFCA_0_GEP225]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP226:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_0_0_0_LOAD227:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP226]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT228:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD227]], 0, 0, 0, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_0_GEP229:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_0_LOAD230:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP229]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_0_INSERT231:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT228]], <3 x float> [[DOTFCA_0_1_0_LOAD230]], 0, 1, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_1_GEP232:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_1_LOAD233:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP232]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_1_INSERT234:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT231]], <3 x float> [[DOTFCA_0_1_1_LOAD233]], 0, 1, 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_2_GEP235:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_2_LOAD236:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP235]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_2_INSERT237:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT234]], float [[DOTFCA_0_1_2_LOAD236]], 0, 1, 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_3_GEP238:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_3_LOAD239:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP238]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_3_INSERT240:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT237]], i32 [[DOTFCA_0_1_3_LOAD239]], 0, 1, 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_2_GEP241:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_2_LOAD242:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP241]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_2_INSERT243:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT240]], <3 x float> [[DOTFCA_0_2_LOAD242]], 0, 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_3_GEP244:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_3_LOAD245:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP244]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_3_INSERT246:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT243]], <3 x float> [[DOTFCA_0_3_LOAD245]], 0, 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_4_GEP247:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_4_LOAD248:%.*]] = load float, ptr [[DOTFCA_0_4_GEP247]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_4_INSERT249:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT246]], float [[DOTFCA_0_4_LOAD248]], 0, 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_5_GEP250:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_5_LOAD251:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP250]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_5_INSERT252:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT249]], i64 [[DOTFCA_0_5_LOAD251]], 0, 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_0_GEP253:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_0_LOAD254:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP253]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_0_INSERT255:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT252]], <3 x float> [[DOTFCA_1_0_LOAD254]], 1, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_1_GEP256:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_1_LOAD257:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP256]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_1_INSERT258:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT255]], <3 x float> [[DOTFCA_1_1_LOAD257]], 1, 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_2_GEP259:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_2_LOAD260:%.*]] = load float, ptr [[DOTFCA_1_2_GEP259]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_2_INSERT261:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT258]], float [[DOTFCA_1_2_LOAD260]], 1, 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_3_GEP262:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_3_LOAD263:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP262]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_3_INSERT264:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT261]], i32 [[DOTFCA_1_3_LOAD263]], 1, 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT62:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP30]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_INSERT65:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT62]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_INSERT68:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT65]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_INSERT71:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT68]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_INSERT74:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT71]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_INSERT77:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT74]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_6_INSERT80:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT77]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; CLEANUP-CPS-NEXT: [[DOTFCA_7_INSERT83:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT80]], i32 [[TMP31]], 7 +; CLEANUP-CPS-NEXT: [[DOTFCA_8_INSERT86:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT83]], i32 [[TMP32]], 8 +; CLEANUP-CPS-NEXT: [[DOTFCA_9_INSERT89:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT86]], i32 [[TMP33]], 9 +; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 40, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT264]], [8 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT89]]), !continuation.registercount [[META33]] +; CLEANUP-CPS-NEXT: unreachable +; CLEANUP-CPS: 39: +; CLEANUP-CPS-NEXT: br i1 [[TMP15]], label [[TMP40:%.*]], label [[TMP59:%.*]] +; CLEANUP-CPS: 40: +; CLEANUP-CPS-NEXT: br i1 [[TMP14]], label [[TMP41:%.*]], label [[TMP50:%.*]] +; CLEANUP-CPS: 41: +; CLEANUP-CPS-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; CLEANUP-CPS-NEXT: call void @_cont_IgnoreHit(ptr [[TMP42]]) +; CLEANUP-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT28:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 0 +; CLEANUP-CPS-NEXT: [[TMP43:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT28]] to i32 +; CLEANUP-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT37:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 1 +; CLEANUP-CPS-NEXT: [[TMP44:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT37]] to i32 +; CLEANUP-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT46:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 2 +; CLEANUP-CPS-NEXT: [[TMP45:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT46]] to i32 +; CLEANUP-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT55:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 3 +; CLEANUP-CPS-NEXT: [[TMP46:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT55]] to i32 +; CLEANUP-CPS-NEXT: [[TMP47:%.*]] = bitcast i32 [[TMP6]] to float +; CLEANUP-CPS-NEXT: [[DOTSROA_0401_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP47]], i32 0 +; CLEANUP-CPS-NEXT: [[TMP48:%.*]] = bitcast i32 [[TMP7]] to float +; CLEANUP-CPS-NEXT: [[DOTSROA_0401_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0401_0_VEC_INSERT]], float [[TMP48]], i32 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT400:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0401_4_VEC_INSERT]], 0 +; CLEANUP-CPS-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_EXTRACT265:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT400]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_GEP266:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP49]], i32 0, i32 0 +; CLEANUP-CPS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT265]], ptr [[DOTFCA_0_GEP266]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP267:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_0_0_0_LOAD268:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP267]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT269:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD268]], 0, 0, 0, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_0_GEP270:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_0_LOAD271:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP270]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_0_INSERT272:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT269]], <3 x float> [[DOTFCA_0_1_0_LOAD271]], 0, 1, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_1_GEP273:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_1_LOAD274:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP273]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_1_INSERT275:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT272]], <3 x float> [[DOTFCA_0_1_1_LOAD274]], 0, 1, 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_2_GEP276:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_2_LOAD277:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP276]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_2_INSERT278:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT275]], float [[DOTFCA_0_1_2_LOAD277]], 0, 1, 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_3_GEP279:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_3_LOAD280:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP279]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_3_INSERT281:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT278]], i32 [[DOTFCA_0_1_3_LOAD280]], 0, 1, 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_2_GEP282:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_2_LOAD283:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP282]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_2_INSERT284:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT281]], <3 x float> [[DOTFCA_0_2_LOAD283]], 0, 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_3_GEP285:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_3_LOAD286:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP285]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_3_INSERT287:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT284]], <3 x float> [[DOTFCA_0_3_LOAD286]], 0, 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_4_GEP288:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_4_LOAD289:%.*]] = load float, ptr [[DOTFCA_0_4_GEP288]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_4_INSERT290:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT287]], float [[DOTFCA_0_4_LOAD289]], 0, 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_5_GEP291:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_5_LOAD292:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP291]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_5_INSERT293:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT290]], i64 [[DOTFCA_0_5_LOAD292]], 0, 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_0_GEP294:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_0_LOAD295:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP294]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_0_INSERT296:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT293]], <3 x float> [[DOTFCA_1_0_LOAD295]], 1, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_1_GEP297:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_1_LOAD298:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP297]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_1_INSERT299:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT296]], <3 x float> [[DOTFCA_1_1_LOAD298]], 1, 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_2_GEP300:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_2_LOAD301:%.*]] = load float, ptr [[DOTFCA_1_2_GEP300]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_2_INSERT302:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT299]], float [[DOTFCA_1_2_LOAD301]], 1, 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_3_GEP303:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_3_LOAD304:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP303]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_3_INSERT305:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT302]], i32 [[DOTFCA_1_3_LOAD304]], 1, 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT92:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP43]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_INSERT95:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT92]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_INSERT98:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT95]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_INSERT101:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT98]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_INSERT104:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT101]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_INSERT107:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT104]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_6_INSERT110:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT107]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; CLEANUP-CPS-NEXT: [[DOTFCA_7_INSERT113:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT110]], i32 [[TMP44]], 7 +; CLEANUP-CPS-NEXT: [[DOTFCA_8_INSERT116:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT113]], i32 [[TMP45]], 8 +; CLEANUP-CPS-NEXT: [[DOTFCA_9_INSERT119:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT116]], i32 [[TMP46]], 9 +; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 40, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT305]], [8 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT119]]), !continuation.registercount [[META33]] +; CLEANUP-CPS-NEXT: unreachable +; CLEANUP-CPS: 50: +; CLEANUP-CPS-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; CLEANUP-CPS-NEXT: call void @_cont_IgnoreHit(ptr [[TMP51]]) +; CLEANUP-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT30:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 0 +; CLEANUP-CPS-NEXT: [[TMP52:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT30]] to i32 +; CLEANUP-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT39:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 1 +; CLEANUP-CPS-NEXT: [[TMP53:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT39]] to i32 +; CLEANUP-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT48:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 2 +; CLEANUP-CPS-NEXT: [[TMP54:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT48]] to i32 +; CLEANUP-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT57:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 3 +; CLEANUP-CPS-NEXT: [[TMP55:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT57]] to i32 +; CLEANUP-CPS-NEXT: [[TMP56:%.*]] = bitcast i32 [[TMP6]] to float +; CLEANUP-CPS-NEXT: [[DOTSROA_0405_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP56]], i32 0 +; CLEANUP-CPS-NEXT: [[TMP57:%.*]] = bitcast i32 [[TMP7]] to float +; CLEANUP-CPS-NEXT: [[DOTSROA_0405_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0405_0_VEC_INSERT]], float [[TMP57]], i32 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT404:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0405_4_VEC_INSERT]], 0 +; CLEANUP-CPS-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_EXTRACT306:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT404]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_GEP307:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP58]], i32 0, i32 0 +; CLEANUP-CPS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT306]], ptr [[DOTFCA_0_GEP307]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP308:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_0_0_0_LOAD309:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP308]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT310:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD309]], 0, 0, 0, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_0_GEP311:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_0_LOAD312:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP311]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_0_INSERT313:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT310]], <3 x float> [[DOTFCA_0_1_0_LOAD312]], 0, 1, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_1_GEP314:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_1_LOAD315:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP314]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_1_INSERT316:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT313]], <3 x float> [[DOTFCA_0_1_1_LOAD315]], 0, 1, 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_2_GEP317:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_2_LOAD318:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP317]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_2_INSERT319:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT316]], float [[DOTFCA_0_1_2_LOAD318]], 0, 1, 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_3_GEP320:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_3_LOAD321:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP320]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_3_INSERT322:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT319]], i32 [[DOTFCA_0_1_3_LOAD321]], 0, 1, 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_2_GEP323:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_2_LOAD324:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP323]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_2_INSERT325:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT322]], <3 x float> [[DOTFCA_0_2_LOAD324]], 0, 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_3_GEP326:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_3_LOAD327:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP326]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_3_INSERT328:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT325]], <3 x float> [[DOTFCA_0_3_LOAD327]], 0, 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_4_GEP329:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_4_LOAD330:%.*]] = load float, ptr [[DOTFCA_0_4_GEP329]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_4_INSERT331:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT328]], float [[DOTFCA_0_4_LOAD330]], 0, 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_5_GEP332:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_5_LOAD333:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP332]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_5_INSERT334:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT331]], i64 [[DOTFCA_0_5_LOAD333]], 0, 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_0_GEP335:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_0_LOAD336:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP335]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_0_INSERT337:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT334]], <3 x float> [[DOTFCA_1_0_LOAD336]], 1, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_1_GEP338:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_1_LOAD339:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP338]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_1_INSERT340:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT337]], <3 x float> [[DOTFCA_1_1_LOAD339]], 1, 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_2_GEP341:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_2_LOAD342:%.*]] = load float, ptr [[DOTFCA_1_2_GEP341]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_2_INSERT343:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT340]], float [[DOTFCA_1_2_LOAD342]], 1, 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_3_GEP344:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_3_LOAD345:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP344]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_3_INSERT346:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT343]], i32 [[DOTFCA_1_3_LOAD345]], 1, 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT122:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP52]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_INSERT125:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT122]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_INSERT128:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT125]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_INSERT131:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT128]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_INSERT134:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT131]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_INSERT137:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT134]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_6_INSERT140:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT137]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; CLEANUP-CPS-NEXT: [[DOTFCA_7_INSERT143:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT140]], i32 [[TMP53]], 7 +; CLEANUP-CPS-NEXT: [[DOTFCA_8_INSERT146:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT143]], i32 [[TMP54]], 8 +; CLEANUP-CPS-NEXT: [[DOTFCA_9_INSERT149:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT146]], i32 [[TMP55]], 9 +; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 40, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT346]], [8 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT149]]), !continuation.registercount [[META33]] +; CLEANUP-CPS-NEXT: unreachable +; CLEANUP-CPS: 59: +; CLEANUP-CPS-NEXT: call void @_cont_AcceptHit(ptr [[SYSTEM_DATA_ALLOCA]]) +; CLEANUP-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT32:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 0 +; CLEANUP-CPS-NEXT: [[TMP60:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT32]] to i32 +; CLEANUP-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT41:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 1 +; CLEANUP-CPS-NEXT: [[TMP61:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT41]] to i32 +; CLEANUP-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT50:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 2 +; CLEANUP-CPS-NEXT: [[TMP62:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT50]] to i32 +; CLEANUP-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT59:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 3 +; CLEANUP-CPS-NEXT: [[TMP63:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT59]] to i32 +; CLEANUP-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT16:%.*]] = extractelement <2 x float> [[HIT_ATTRS_FCA_0_EXTRACT]], i32 0 +; CLEANUP-CPS-NEXT: [[TMP64:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT16]] to i32 +; CLEANUP-CPS-NEXT: [[TMP65:%.*]] = bitcast i32 [[TMP64]] to float +; CLEANUP-CPS-NEXT: [[DOTSROA_0409_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP65]], i32 0 +; CLEANUP-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT20:%.*]] = extractelement <2 x float> [[HIT_ATTRS_FCA_0_EXTRACT]], i32 1 +; CLEANUP-CPS-NEXT: [[TMP66:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT20]] to i32 +; CLEANUP-CPS-NEXT: [[TMP67:%.*]] = bitcast i32 [[TMP66]] to float +; CLEANUP-CPS-NEXT: [[DOTSROA_0409_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0409_0_VEC_INSERT]], float [[TMP67]], i32 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT408:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0409_4_VEC_INSERT]], 0 +; CLEANUP-CPS-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_EXTRACT347:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT408]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_GEP348:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP68]], i32 0, i32 0 +; CLEANUP-CPS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT347]], ptr [[DOTFCA_0_GEP348]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_0_0_0_GEP349:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_0_0_0_LOAD350:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP349]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT351:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD350]], 0, 0, 0, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_0_GEP352:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_0_LOAD353:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_0_GEP352]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_0_INSERT354:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT351]], <3 x float> [[DOTFCA_0_1_0_LOAD353]], 0, 1, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_1_GEP355:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_1_LOAD356:%.*]] = load <3 x float>, ptr [[DOTFCA_0_1_1_GEP355]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_1_INSERT357:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT354]], <3 x float> [[DOTFCA_0_1_1_LOAD356]], 0, 1, 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_2_GEP358:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_2_LOAD359:%.*]] = load float, ptr [[DOTFCA_0_1_2_GEP358]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_2_INSERT360:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT357]], float [[DOTFCA_0_1_2_LOAD359]], 0, 1, 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_3_GEP361:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_3_LOAD362:%.*]] = load i32, ptr [[DOTFCA_0_1_3_GEP361]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_3_INSERT363:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT360]], i32 [[DOTFCA_0_1_3_LOAD362]], 0, 1, 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_2_GEP364:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_2_LOAD365:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP364]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_2_INSERT366:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT363]], <3 x float> [[DOTFCA_0_2_LOAD365]], 0, 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_3_GEP367:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_3_LOAD368:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP367]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_3_INSERT369:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT366]], <3 x float> [[DOTFCA_0_3_LOAD368]], 0, 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_4_GEP370:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_4_LOAD371:%.*]] = load float, ptr [[DOTFCA_0_4_GEP370]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_4_INSERT372:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT369]], float [[DOTFCA_0_4_LOAD371]], 0, 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_5_GEP373:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_5_LOAD374:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP373]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_5_INSERT375:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT372]], i64 [[DOTFCA_0_5_LOAD374]], 0, 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_0_GEP376:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_0_LOAD377:%.*]] = load <3 x float>, ptr [[DOTFCA_1_0_GEP376]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_0_INSERT378:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT375]], <3 x float> [[DOTFCA_1_0_LOAD377]], 1, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_1_GEP379:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_1_LOAD380:%.*]] = load <3 x float>, ptr [[DOTFCA_1_1_GEP379]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_1_INSERT381:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT378]], <3 x float> [[DOTFCA_1_1_LOAD380]], 1, 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_2_GEP382:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_2_LOAD383:%.*]] = load float, ptr [[DOTFCA_1_2_GEP382]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_2_INSERT384:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT381]], float [[DOTFCA_1_2_LOAD383]], 1, 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_3_GEP385:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_3_LOAD386:%.*]] = load i32, ptr [[DOTFCA_1_3_GEP385]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_3_INSERT387:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT384]], i32 [[DOTFCA_1_3_LOAD386]], 1, 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT152:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP60]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_INSERT155:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT152]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_INSERT158:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT155]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_INSERT161:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT158]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_INSERT164:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT161]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_INSERT167:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT164]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_6_INSERT170:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT167]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; CLEANUP-CPS-NEXT: [[DOTFCA_7_INSERT173:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT170]], i32 [[TMP61]], 7 +; CLEANUP-CPS-NEXT: [[DOTFCA_8_INSERT176:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT173]], i32 [[TMP62]], 8 +; CLEANUP-CPS-NEXT: [[DOTFCA_9_INSERT179:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT176]], i32 [[TMP63]], 9 +; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 40, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT387]], [8 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT179]]), !continuation.registercount [[META33]] +; CLEANUP-CPS-NEXT: unreachable +; +; +; CLEANUP-CPS-LABEL: define void @MyIntersectionShader( +; CLEANUP-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURNADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [8 x i32] [[PADDING:%.*]], [30 x i32] [[PAYLOAD:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META35]] !lgc.cps [[META42:![0-9]+]] !continuation [[META43:![0-9]+]] { +; CLEANUP-CPS-NEXT: AllocaSpillBB: +; CLEANUP-CPS-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) +; CLEANUP-CPS-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[MYINTERSECTIONSHADER_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 +; CLEANUP-CPS-NEXT: store i32 [[RETURNADDR]], ptr addrspace(32) [[RETURNADDR_SPILL_ADDR]], align 4 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 0 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 1 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 2 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 3 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 4 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 5 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 6 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 7 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 8 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 9 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_10_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 10 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_11_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 11 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_12_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 12 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_13_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 13 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_14_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 14 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_15_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 15 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_16_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 16 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_17_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 17 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_18_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 18 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_19_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 19 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_20_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 20 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_21_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 21 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_22_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 22 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_23_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 23 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_24_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 24 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_25_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 25 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_26_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 26 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_27_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 27 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_28_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 28 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_29_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 29 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_0_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 0, 0, 0 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 1, 0 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 1, 1 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 1, 2 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 1, 3 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_0_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 2 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_0_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 3 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_0_4_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 4 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_0_5_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 5 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 1, 0 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 1, 1 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 1, 2 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 1, 3 +; CLEANUP-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) +; CLEANUP-CPS-NEXT: [[TMP0:%.*]] = bitcast <3 x i32> [[SYSTEM_DATA_FCA_0_0_0_0_EXTRACT]] to <3 x float> +; CLEANUP-CPS-NEXT: [[RES_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA:%.*]] poison, <3 x float> [[TMP0]], 0 +; CLEANUP-CPS-NEXT: [[RES_I_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_1_0_EXTRACT]], 1 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_13_24_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[SYSTEM_DATA_FCA_0_1_1_EXTRACT]], i32 0 +; CLEANUP-CPS-NEXT: [[RES_I_FCA_2_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_1_INSERT]], float [[SYSTEM_DATA_ALLOCA_SROA_13_24_VEC_EXTRACT]], 2 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_13_28_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[SYSTEM_DATA_FCA_0_1_1_EXTRACT]], i32 1 +; CLEANUP-CPS-NEXT: [[TMP1:%.*]] = bitcast float [[SYSTEM_DATA_ALLOCA_SROA_13_28_VEC_EXTRACT]] to i32 +; CLEANUP-CPS-NEXT: [[RES_I_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_2_INSERT]], i32 [[TMP1]], 3 +; CLEANUP-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 0 +; CLEANUP-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 1 +; CLEANUP-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 2 +; CLEANUP-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 3 +; CLEANUP-CPS-NEXT: [[ISNOHIT_I:%.*]] = fcmp fast uge float [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT]], [[SYSTEM_DATA_FCA_0_4_EXTRACT]] +; CLEANUP-CPS-NEXT: br i1 [[ISNOHIT_I]], label [[ISEND_I:%.*]], label [[CALLAHIT_I:%.*]] +; CLEANUP-CPS: callAHit.i: +; CLEANUP-CPS-NEXT: [[TRAV_DATA_I_FCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_FCA_0_0_0_0_EXTRACT]], 0, 0, 0, 0 +; CLEANUP-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_0_0_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_1_0_EXTRACT]], 0, 1, 0 +; CLEANUP-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_1_1_EXTRACT]], 0, 1, 1 +; CLEANUP-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_1_INSERT]], float [[SYSTEM_DATA_FCA_0_1_2_EXTRACT]], 0, 1, 2 +; CLEANUP-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_2_INSERT]], i32 [[SYSTEM_DATA_FCA_0_1_3_EXTRACT]], 0, 1, 3 +; CLEANUP-CPS-NEXT: [[TRAV_DATA_I_FCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_3_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_2_EXTRACT]], 0, 2 +; CLEANUP-CPS-NEXT: [[TRAV_DATA_I_FCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_2_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_3_EXTRACT]], 0, 3 +; CLEANUP-CPS-NEXT: [[TRAV_DATA_I_FCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_3_INSERT]], float [[SYSTEM_DATA_FCA_0_4_EXTRACT]], 0, 4 +; CLEANUP-CPS-NEXT: [[TRAV_DATA_I_FCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_4_INSERT]], i64 [[SYSTEM_DATA_FCA_0_5_EXTRACT]], 0, 5 +; CLEANUP-CPS-NEXT: [[TRAV_DATA_I_FCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_5_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_1_0_EXTRACT]], 1, 0 +; CLEANUP-CPS-NEXT: [[TRAV_DATA_I_FCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_1_1_EXTRACT]], 1, 1 +; CLEANUP-CPS-NEXT: [[TRAV_DATA_I_FCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_1_INSERT]], float [[SYSTEM_DATA_FCA_1_2_EXTRACT]], 1, 2 +; CLEANUP-CPS-NEXT: [[TRAV_DATA_I_FCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_2_INSERT]], i32 [[SYSTEM_DATA_FCA_1_3_EXTRACT]], 1, 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT350:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> undef, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT5:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_INSERT8:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT5]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_INSERT11:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT8]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_INSERT14:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT11]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_INSERT17:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT14]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_INSERT20:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT17]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_6_INSERT23:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT20]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; CLEANUP-CPS-NEXT: [[DOTFCA_7_INSERT26:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT23]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; CLEANUP-CPS-NEXT: [[DOTFCA_8_INSERT29:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT26]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; CLEANUP-CPS-NEXT: [[DOTFCA_9_INSERT32:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT29]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; CLEANUP-CPS-NEXT: [[DOTFCA_10_INSERT35:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT32]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; CLEANUP-CPS-NEXT: [[DOTFCA_11_INSERT38:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT35]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; CLEANUP-CPS-NEXT: [[DOTFCA_12_INSERT41:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT38]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; CLEANUP-CPS-NEXT: [[DOTFCA_13_INSERT44:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT41]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; CLEANUP-CPS-NEXT: [[DOTFCA_14_INSERT47:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT44]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; CLEANUP-CPS-NEXT: [[DOTFCA_15_INSERT50:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT47]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; CLEANUP-CPS-NEXT: [[DOTFCA_16_INSERT53:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT50]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; CLEANUP-CPS-NEXT: [[DOTFCA_17_INSERT56:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT53]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; CLEANUP-CPS-NEXT: [[DOTFCA_18_INSERT59:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT56]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; CLEANUP-CPS-NEXT: [[DOTFCA_19_INSERT62:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT59]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; CLEANUP-CPS-NEXT: [[DOTFCA_20_INSERT65:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT62]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; CLEANUP-CPS-NEXT: [[DOTFCA_21_INSERT68:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT65]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; CLEANUP-CPS-NEXT: [[DOTFCA_22_INSERT71:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT68]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; CLEANUP-CPS-NEXT: [[DOTFCA_23_INSERT74:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT71]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; CLEANUP-CPS-NEXT: [[DOTFCA_24_INSERT77:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT74]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; CLEANUP-CPS-NEXT: [[DOTFCA_25_INSERT80:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT77]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; CLEANUP-CPS-NEXT: [[DOTFCA_26_INSERT83:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT80]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; CLEANUP-CPS-NEXT: [[DOTFCA_27_INSERT86:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT83]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; CLEANUP-CPS-NEXT: [[DOTFCA_28_INSERT89:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT86]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; CLEANUP-CPS-NEXT: [[DOTFCA_29_INSERT92:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT89]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 +; CLEANUP-CPS-NEXT: [[TMP2:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @MyIntersectionShader.resume.0) +; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 3, i32 16, {} poison, i64 [[TMP2]], i32 5, float [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT350]], [32 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT92]]), !continuation.returnedRegistercount [[META32:![0-9]+]], !continuation.registercount [[META32]] +; CLEANUP-CPS-NEXT: unreachable +; CLEANUP-CPS: isEnd.i: +; CLEANUP-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 0 +; CLEANUP-CPS-NEXT: [[TMP3:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; CLEANUP-CPS-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to float +; CLEANUP-CPS-NEXT: [[DOTSROA_0353_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP4]], i32 0 +; CLEANUP-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 1 +; CLEANUP-CPS-NEXT: [[TMP5:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; CLEANUP-CPS-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP5]] to float +; CLEANUP-CPS-NEXT: [[DOTSROA_0353_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0353_0_VEC_INSERT]], float [[TMP6]], i32 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT352:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0353_4_VEC_INSERT]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_EXTRACT286:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT352]], 0 +; CLEANUP-CPS-NEXT: [[TMP7:%.*]] = bitcast <2 x float> [[DOTFCA_0_EXTRACT286]] to <2 x i32> +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_0_0_VEC_EXPAND:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <3 x i32> +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND:%.*]] = select <3 x i1> , <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VEC_EXPAND]], <3 x i32> [[SYSTEM_DATA_FCA_0_0_0_0_EXTRACT]] +; CLEANUP-CPS-NEXT: [[ISEND_I1:%.*]] = call i1 @opaqueIsEnd() +; CLEANUP-CPS-NEXT: br i1 [[ISEND_I1]], label [[TMP8:%.*]], label [[TMP9:%.*]] +; CLEANUP-CPS: 8: +; CLEANUP-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT289:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND]], 0, 0, 0, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_0_INSERT292:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT289]], <3 x float> [[SYSTEM_DATA_FCA_0_1_0_EXTRACT]], 0, 1, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_1_INSERT295:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT292]], <3 x float> [[SYSTEM_DATA_FCA_0_1_1_EXTRACT]], 0, 1, 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_2_INSERT298:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT295]], float [[SYSTEM_DATA_FCA_0_1_2_EXTRACT]], 0, 1, 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_3_INSERT301:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT298]], i32 [[SYSTEM_DATA_FCA_0_1_3_EXTRACT]], 0, 1, 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_2_INSERT304:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT301]], <3 x float> [[SYSTEM_DATA_FCA_0_2_EXTRACT]], 0, 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_3_INSERT307:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT304]], <3 x float> [[SYSTEM_DATA_FCA_0_3_EXTRACT]], 0, 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_4_INSERT310:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT307]], float [[SYSTEM_DATA_FCA_0_4_EXTRACT]], 0, 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_5_INSERT313:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT310]], i64 [[SYSTEM_DATA_FCA_0_5_EXTRACT]], 0, 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_0_INSERT316:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT313]], <3 x float> [[SYSTEM_DATA_FCA_1_0_EXTRACT]], 1, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_1_INSERT319:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT316]], <3 x float> [[SYSTEM_DATA_FCA_1_1_EXTRACT]], 1, 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_2_INSERT322:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT319]], float [[SYSTEM_DATA_FCA_1_2_EXTRACT]], 1, 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_3_INSERT325:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT322]], i32 [[SYSTEM_DATA_FCA_1_3_EXTRACT]], 1, 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT125:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_INSERT128:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT125]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_INSERT131:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT128]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_INSERT134:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT131]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_INSERT137:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT134]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_INSERT140:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT137]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_6_INSERT143:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT140]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; CLEANUP-CPS-NEXT: [[DOTFCA_7_INSERT146:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT143]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; CLEANUP-CPS-NEXT: [[DOTFCA_8_INSERT149:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT146]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; CLEANUP-CPS-NEXT: [[DOTFCA_9_INSERT152:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT149]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; CLEANUP-CPS-NEXT: [[DOTFCA_10_INSERT155:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT152]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; CLEANUP-CPS-NEXT: [[DOTFCA_11_INSERT158:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT155]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; CLEANUP-CPS-NEXT: [[DOTFCA_12_INSERT161:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT158]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; CLEANUP-CPS-NEXT: [[DOTFCA_13_INSERT164:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT161]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; CLEANUP-CPS-NEXT: [[DOTFCA_14_INSERT167:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT164]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; CLEANUP-CPS-NEXT: [[DOTFCA_15_INSERT170:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT167]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; CLEANUP-CPS-NEXT: [[DOTFCA_16_INSERT173:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT170]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; CLEANUP-CPS-NEXT: [[DOTFCA_17_INSERT176:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT173]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; CLEANUP-CPS-NEXT: [[DOTFCA_18_INSERT179:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT176]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; CLEANUP-CPS-NEXT: [[DOTFCA_19_INSERT182:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT179]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; CLEANUP-CPS-NEXT: [[DOTFCA_20_INSERT185:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT182]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; CLEANUP-CPS-NEXT: [[DOTFCA_21_INSERT188:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT185]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; CLEANUP-CPS-NEXT: [[DOTFCA_22_INSERT191:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT188]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; CLEANUP-CPS-NEXT: [[DOTFCA_23_INSERT194:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT191]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; CLEANUP-CPS-NEXT: [[DOTFCA_24_INSERT197:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT194]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; CLEANUP-CPS-NEXT: [[DOTFCA_25_INSERT200:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT197]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; CLEANUP-CPS-NEXT: [[DOTFCA_26_INSERT203:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT200]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; CLEANUP-CPS-NEXT: [[DOTFCA_27_INSERT206:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT203]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; CLEANUP-CPS-NEXT: [[DOTFCA_28_INSERT209:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT206]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; CLEANUP-CPS-NEXT: [[DOTFCA_29_INSERT212:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT209]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 +; CLEANUP-CPS-NEXT: call void @lgc.cps.free(i32 8) +; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 8, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT325]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT212]]), !continuation.registercount [[META32]] +; CLEANUP-CPS-NEXT: unreachable +; CLEANUP-CPS: 9: +; CLEANUP-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND]], 0, 0, 0, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_1_0_EXTRACT]], 0, 1, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_1_1_EXTRACT]], 0, 1, 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], float [[SYSTEM_DATA_FCA_0_1_2_EXTRACT]], 0, 1, 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT]], i32 [[SYSTEM_DATA_FCA_0_1_3_EXTRACT]], 0, 1, 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_2_EXTRACT]], 0, 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_3_EXTRACT]], 0, 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[SYSTEM_DATA_FCA_0_4_EXTRACT]], 0, 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[SYSTEM_DATA_FCA_0_5_EXTRACT]], 0, 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_1_0_EXTRACT]], 1, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_1_1_EXTRACT]], 1, 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[SYSTEM_DATA_FCA_1_2_EXTRACT]], 1, 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[SYSTEM_DATA_FCA_1_3_EXTRACT]], 1, 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; CLEANUP-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; CLEANUP-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; CLEANUP-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; CLEANUP-CPS-NEXT: [[DOTFCA_10_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; CLEANUP-CPS-NEXT: [[DOTFCA_11_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; CLEANUP-CPS-NEXT: [[DOTFCA_12_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; CLEANUP-CPS-NEXT: [[DOTFCA_13_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; CLEANUP-CPS-NEXT: [[DOTFCA_14_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; CLEANUP-CPS-NEXT: [[DOTFCA_15_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; CLEANUP-CPS-NEXT: [[DOTFCA_16_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; CLEANUP-CPS-NEXT: [[DOTFCA_17_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; CLEANUP-CPS-NEXT: [[DOTFCA_18_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; CLEANUP-CPS-NEXT: [[DOTFCA_19_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; CLEANUP-CPS-NEXT: [[DOTFCA_20_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; CLEANUP-CPS-NEXT: [[DOTFCA_21_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; CLEANUP-CPS-NEXT: [[DOTFCA_22_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; CLEANUP-CPS-NEXT: [[DOTFCA_23_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; CLEANUP-CPS-NEXT: [[DOTFCA_24_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; CLEANUP-CPS-NEXT: [[DOTFCA_25_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; CLEANUP-CPS-NEXT: [[DOTFCA_26_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; CLEANUP-CPS-NEXT: [[DOTFCA_27_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; CLEANUP-CPS-NEXT: [[DOTFCA_28_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; CLEANUP-CPS-NEXT: [[DOTFCA_29_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 +; CLEANUP-CPS-NEXT: call void @lgc.cps.free(i32 8) +; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 8, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]), !continuation.registercount [[META32]] +; CLEANUP-CPS-NEXT: unreachable +; +; +; CLEANUP-CPS-LABEL: define dso_local void @MyIntersectionShader.resume.0( +; CLEANUP-CPS-SAME: {} [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], { [[STRUCT_ANYHITTRAVERSALDATA:%.*]], [8 x i32], [30 x i32] } [[TMP3:%.*]]) !lgc.rt.shaderstage [[META35]] !lgc.cps [[META42]] !continuation [[META43]] { +; CLEANUP-CPS-NEXT: entryresume.0: +; CLEANUP-CPS-NEXT: [[TMP4:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 8) +; CLEANUP-CPS-NEXT: [[TMP5:%.*]] = extractvalue { [[STRUCT_ANYHITTRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP3]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_6_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 6 +; CLEANUP-CPS-NEXT: [[DOTFCA_7_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 7 +; CLEANUP-CPS-NEXT: [[DOTFCA_8_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 8 +; CLEANUP-CPS-NEXT: [[DOTFCA_9_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 9 +; CLEANUP-CPS-NEXT: [[DOTFCA_10_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 10 +; CLEANUP-CPS-NEXT: [[DOTFCA_11_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 11 +; CLEANUP-CPS-NEXT: [[DOTFCA_12_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 12 +; CLEANUP-CPS-NEXT: [[DOTFCA_13_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 13 +; CLEANUP-CPS-NEXT: [[DOTFCA_14_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 14 +; CLEANUP-CPS-NEXT: [[DOTFCA_15_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 15 +; CLEANUP-CPS-NEXT: [[DOTFCA_16_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 16 +; CLEANUP-CPS-NEXT: [[DOTFCA_17_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 17 +; CLEANUP-CPS-NEXT: [[DOTFCA_18_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 18 +; CLEANUP-CPS-NEXT: [[DOTFCA_19_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 19 +; CLEANUP-CPS-NEXT: [[DOTFCA_20_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 20 +; CLEANUP-CPS-NEXT: [[DOTFCA_21_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 21 +; CLEANUP-CPS-NEXT: [[DOTFCA_22_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 22 +; CLEANUP-CPS-NEXT: [[DOTFCA_23_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 23 +; CLEANUP-CPS-NEXT: [[DOTFCA_24_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 24 +; CLEANUP-CPS-NEXT: [[DOTFCA_25_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 25 +; CLEANUP-CPS-NEXT: [[DOTFCA_26_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 26 +; CLEANUP-CPS-NEXT: [[DOTFCA_27_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 27 +; CLEANUP-CPS-NEXT: [[DOTFCA_28_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 28 +; CLEANUP-CPS-NEXT: [[DOTFCA_29_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 29 +; CLEANUP-CPS-NEXT: [[TMP6:%.*]] = extractvalue { [[STRUCT_ANYHITTRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP3]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 0, 0, 0, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 0, 1, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 0, 1, 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 0, 1, 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 0, 1, 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 0, 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 0, 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_4_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 0, 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_5_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 0, 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 1, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 1, 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 1, 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 1, 3 +; CLEANUP-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; CLEANUP-CPS-NEXT: [[ISEND_I1:%.*]] = call i1 @opaqueIsEnd() +; CLEANUP-CPS-NEXT: br i1 [[ISEND_I1]], label [[TMP7:%.*]], label [[TMP8:%.*]] +; CLEANUP-CPS: 7: +; CLEANUP-CPS-NEXT: [[RETURNADDR_RELOAD_ADDR1:%.*]] = getelementptr inbounds [[MYINTERSECTIONSHADER_FRAME:%.*]], ptr addrspace(32) [[TMP4]], i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[RETURNADDR_RELOAD2:%.*]] = load i32, ptr addrspace(32) [[RETURNADDR_RELOAD_ADDR1]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT289:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]], 0, 0, 0, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_0_INSERT292:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT289]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_1_INSERT295:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT292]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_2_INSERT298:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT295]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_3_INSERT301:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT298]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_2_INSERT304:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT301]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_3_INSERT307:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT304]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_4_INSERT310:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT307]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_5_INSERT313:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT310]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_0_INSERT316:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT313]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_1_INSERT319:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT316]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_2_INSERT322:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT319]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_3_INSERT325:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT322]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT125:%.*]] = insertvalue [30 x i32] poison, i32 [[DOTFCA_0_EXTRACT]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_INSERT128:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT125]], i32 [[DOTFCA_1_EXTRACT]], 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_INSERT131:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT128]], i32 [[DOTFCA_2_EXTRACT]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_INSERT134:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT131]], i32 [[DOTFCA_3_EXTRACT]], 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_INSERT137:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT134]], i32 [[DOTFCA_4_EXTRACT]], 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_INSERT140:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT137]], i32 [[DOTFCA_5_EXTRACT]], 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_6_INSERT143:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT140]], i32 [[DOTFCA_6_EXTRACT]], 6 +; CLEANUP-CPS-NEXT: [[DOTFCA_7_INSERT146:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT143]], i32 [[DOTFCA_7_EXTRACT]], 7 +; CLEANUP-CPS-NEXT: [[DOTFCA_8_INSERT149:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT146]], i32 [[DOTFCA_8_EXTRACT]], 8 +; CLEANUP-CPS-NEXT: [[DOTFCA_9_INSERT152:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT149]], i32 [[DOTFCA_9_EXTRACT]], 9 +; CLEANUP-CPS-NEXT: [[DOTFCA_10_INSERT155:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT152]], i32 [[DOTFCA_10_EXTRACT]], 10 +; CLEANUP-CPS-NEXT: [[DOTFCA_11_INSERT158:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT155]], i32 [[DOTFCA_11_EXTRACT]], 11 +; CLEANUP-CPS-NEXT: [[DOTFCA_12_INSERT161:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT158]], i32 [[DOTFCA_12_EXTRACT]], 12 +; CLEANUP-CPS-NEXT: [[DOTFCA_13_INSERT164:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT161]], i32 [[DOTFCA_13_EXTRACT]], 13 +; CLEANUP-CPS-NEXT: [[DOTFCA_14_INSERT167:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT164]], i32 [[DOTFCA_14_EXTRACT]], 14 +; CLEANUP-CPS-NEXT: [[DOTFCA_15_INSERT170:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT167]], i32 [[DOTFCA_15_EXTRACT]], 15 +; CLEANUP-CPS-NEXT: [[DOTFCA_16_INSERT173:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT170]], i32 [[DOTFCA_16_EXTRACT]], 16 +; CLEANUP-CPS-NEXT: [[DOTFCA_17_INSERT176:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT173]], i32 [[DOTFCA_17_EXTRACT]], 17 +; CLEANUP-CPS-NEXT: [[DOTFCA_18_INSERT179:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT176]], i32 [[DOTFCA_18_EXTRACT]], 18 +; CLEANUP-CPS-NEXT: [[DOTFCA_19_INSERT182:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT179]], i32 [[DOTFCA_19_EXTRACT]], 19 +; CLEANUP-CPS-NEXT: [[DOTFCA_20_INSERT185:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT182]], i32 [[DOTFCA_20_EXTRACT]], 20 +; CLEANUP-CPS-NEXT: [[DOTFCA_21_INSERT188:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT185]], i32 [[DOTFCA_21_EXTRACT]], 21 +; CLEANUP-CPS-NEXT: [[DOTFCA_22_INSERT191:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT188]], i32 [[DOTFCA_22_EXTRACT]], 22 +; CLEANUP-CPS-NEXT: [[DOTFCA_23_INSERT194:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT191]], i32 [[DOTFCA_23_EXTRACT]], 23 +; CLEANUP-CPS-NEXT: [[DOTFCA_24_INSERT197:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT194]], i32 [[DOTFCA_24_EXTRACT]], 24 +; CLEANUP-CPS-NEXT: [[DOTFCA_25_INSERT200:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT197]], i32 [[DOTFCA_25_EXTRACT]], 25 +; CLEANUP-CPS-NEXT: [[DOTFCA_26_INSERT203:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT200]], i32 [[DOTFCA_26_EXTRACT]], 26 +; CLEANUP-CPS-NEXT: [[DOTFCA_27_INSERT206:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT203]], i32 [[DOTFCA_27_EXTRACT]], 27 +; CLEANUP-CPS-NEXT: [[DOTFCA_28_INSERT209:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT206]], i32 [[DOTFCA_28_EXTRACT]], 28 +; CLEANUP-CPS-NEXT: [[DOTFCA_29_INSERT212:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT209]], i32 [[DOTFCA_29_EXTRACT]], 29 +; CLEANUP-CPS-NEXT: call void @lgc.cps.free(i32 8) +; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR_RELOAD2]], i32 8, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT325]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT212]]), !continuation.registercount [[META32]] +; CLEANUP-CPS-NEXT: unreachable +; CLEANUP-CPS: 8: +; CLEANUP-CPS-NEXT: [[RETURNADDR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[MYINTERSECTIONSHADER_FRAME]], ptr addrspace(32) [[TMP4]], i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i32, ptr addrspace(32) [[RETURNADDR_RELOAD_ADDR]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]], 0, 0, 0, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [30 x i32] poison, i32 [[DOTFCA_0_EXTRACT]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT]], i32 [[DOTFCA_1_EXTRACT]], 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT]], i32 [[DOTFCA_2_EXTRACT]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT]], i32 [[DOTFCA_3_EXTRACT]], 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT]], i32 [[DOTFCA_4_EXTRACT]], 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT]], i32 [[DOTFCA_5_EXTRACT]], 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT]], i32 [[DOTFCA_6_EXTRACT]], 6 +; CLEANUP-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT]], i32 [[DOTFCA_7_EXTRACT]], 7 +; CLEANUP-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT]], i32 [[DOTFCA_8_EXTRACT]], 8 +; CLEANUP-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT]], i32 [[DOTFCA_9_EXTRACT]], 9 +; CLEANUP-CPS-NEXT: [[DOTFCA_10_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT]], i32 [[DOTFCA_10_EXTRACT]], 10 +; CLEANUP-CPS-NEXT: [[DOTFCA_11_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT]], i32 [[DOTFCA_11_EXTRACT]], 11 +; CLEANUP-CPS-NEXT: [[DOTFCA_12_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT]], i32 [[DOTFCA_12_EXTRACT]], 12 +; CLEANUP-CPS-NEXT: [[DOTFCA_13_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT]], i32 [[DOTFCA_13_EXTRACT]], 13 +; CLEANUP-CPS-NEXT: [[DOTFCA_14_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT]], i32 [[DOTFCA_14_EXTRACT]], 14 +; CLEANUP-CPS-NEXT: [[DOTFCA_15_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT]], i32 [[DOTFCA_15_EXTRACT]], 15 +; CLEANUP-CPS-NEXT: [[DOTFCA_16_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT]], i32 [[DOTFCA_16_EXTRACT]], 16 +; CLEANUP-CPS-NEXT: [[DOTFCA_17_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT]], i32 [[DOTFCA_17_EXTRACT]], 17 +; CLEANUP-CPS-NEXT: [[DOTFCA_18_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT]], i32 [[DOTFCA_18_EXTRACT]], 18 +; CLEANUP-CPS-NEXT: [[DOTFCA_19_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT]], i32 [[DOTFCA_19_EXTRACT]], 19 +; CLEANUP-CPS-NEXT: [[DOTFCA_20_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT]], i32 [[DOTFCA_20_EXTRACT]], 20 +; CLEANUP-CPS-NEXT: [[DOTFCA_21_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT]], i32 [[DOTFCA_21_EXTRACT]], 21 +; CLEANUP-CPS-NEXT: [[DOTFCA_22_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT]], i32 [[DOTFCA_22_EXTRACT]], 22 +; CLEANUP-CPS-NEXT: [[DOTFCA_23_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT]], i32 [[DOTFCA_23_EXTRACT]], 23 +; CLEANUP-CPS-NEXT: [[DOTFCA_24_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT]], i32 [[DOTFCA_24_EXTRACT]], 24 +; CLEANUP-CPS-NEXT: [[DOTFCA_25_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT]], i32 [[DOTFCA_25_EXTRACT]], 25 +; CLEANUP-CPS-NEXT: [[DOTFCA_26_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT]], i32 [[DOTFCA_26_EXTRACT]], 26 +; CLEANUP-CPS-NEXT: [[DOTFCA_27_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT]], i32 [[DOTFCA_27_EXTRACT]], 27 +; CLEANUP-CPS-NEXT: [[DOTFCA_28_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT]], i32 [[DOTFCA_28_EXTRACT]], 28 +; CLEANUP-CPS-NEXT: [[DOTFCA_29_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT]], i32 [[DOTFCA_29_EXTRACT]], 29 +; CLEANUP-CPS-NEXT: call void @lgc.cps.free(i32 8) +; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR_RELOAD]], i32 8, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]), !continuation.registercount [[META32]] +; CLEANUP-CPS-NEXT: unreachable +; +; +; CLEANUP-CPS-LABEL: define void @MyIntersectionShader2( +; CLEANUP-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURNADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [8 x i32] [[PADDING:%.*]], [30 x i32] [[PAYLOAD:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META35]] !lgc.cps [[META42]] !continuation [[META44:![0-9]+]] { +; CLEANUP-CPS-NEXT: AllocaSpillBB: +; CLEANUP-CPS-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) +; CLEANUP-CPS-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[MYINTERSECTIONSHADER2_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 +; CLEANUP-CPS-NEXT: store i32 [[RETURNADDR]], ptr addrspace(32) [[RETURNADDR_SPILL_ADDR]], align 4 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 0 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 1 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 2 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 3 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 4 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 5 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 6 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 7 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 8 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 9 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_10_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 10 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_11_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 11 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_12_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 12 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_13_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 13 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_14_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 14 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_15_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 15 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_16_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 16 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_17_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 17 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_18_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 18 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_19_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 19 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_20_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 20 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_21_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 21 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_22_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 22 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_23_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 23 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_24_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 24 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_25_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 25 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_26_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 26 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_27_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 27 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_28_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 28 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_29_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 29 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_0_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 0, 0, 0 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 1, 0 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 1, 1 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 1, 2 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 1, 3 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_0_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 2 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_0_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 3 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_0_4_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 4 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_0_5_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 0, 5 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 1, 0 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 1, 1 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 1, 2 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[SYSTEM_DATA]], 1, 3 +; CLEANUP-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) +; CLEANUP-CPS-NEXT: [[TMP0:%.*]] = bitcast <3 x i32> [[SYSTEM_DATA_FCA_0_0_0_0_EXTRACT]] to <3 x float> +; CLEANUP-CPS-NEXT: [[RES_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA:%.*]] poison, <3 x float> [[TMP0]], 0 +; CLEANUP-CPS-NEXT: [[RES_I_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_1_0_EXTRACT]], 1 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_13_24_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[SYSTEM_DATA_FCA_0_1_1_EXTRACT]], i32 0 +; CLEANUP-CPS-NEXT: [[RES_I_FCA_2_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_1_INSERT]], float [[SYSTEM_DATA_ALLOCA_SROA_13_24_VEC_EXTRACT]], 2 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_13_28_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[SYSTEM_DATA_FCA_0_1_1_EXTRACT]], i32 1 +; CLEANUP-CPS-NEXT: [[TMP1:%.*]] = bitcast float [[SYSTEM_DATA_ALLOCA_SROA_13_28_VEC_EXTRACT]] to i32 +; CLEANUP-CPS-NEXT: [[RES_I_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_2_INSERT]], i32 [[TMP1]], 3 +; CLEANUP-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 0 +; CLEANUP-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 1 +; CLEANUP-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 2 +; CLEANUP-CPS-NEXT: [[RES_I_FCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_3_INSERT]], 3 +; CLEANUP-CPS-NEXT: [[ISNOHIT_I:%.*]] = fcmp fast uge float [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT]], [[SYSTEM_DATA_FCA_0_4_EXTRACT]] +; CLEANUP-CPS-NEXT: br i1 [[ISNOHIT_I]], label [[ISEND_I:%.*]], label [[CALLAHIT_I:%.*]] +; CLEANUP-CPS: callAHit.i: +; CLEANUP-CPS-NEXT: [[TRAV_DATA_I_FCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_FCA_0_0_0_0_EXTRACT]], 0, 0, 0, 0 +; CLEANUP-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_0_0_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_1_0_EXTRACT]], 0, 1, 0 +; CLEANUP-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_1_1_EXTRACT]], 0, 1, 1 +; CLEANUP-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_1_INSERT]], float [[SYSTEM_DATA_FCA_0_1_2_EXTRACT]], 0, 1, 2 +; CLEANUP-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_2_INSERT]], i32 [[SYSTEM_DATA_FCA_0_1_3_EXTRACT]], 0, 1, 3 +; CLEANUP-CPS-NEXT: [[TRAV_DATA_I_FCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_3_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_2_EXTRACT]], 0, 2 +; CLEANUP-CPS-NEXT: [[TRAV_DATA_I_FCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_2_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_3_EXTRACT]], 0, 3 +; CLEANUP-CPS-NEXT: [[TRAV_DATA_I_FCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_3_INSERT]], float [[SYSTEM_DATA_FCA_0_4_EXTRACT]], 0, 4 +; CLEANUP-CPS-NEXT: [[TRAV_DATA_I_FCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_4_INSERT]], i64 [[SYSTEM_DATA_FCA_0_5_EXTRACT]], 0, 5 +; CLEANUP-CPS-NEXT: [[TRAV_DATA_I_FCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_5_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_1_0_EXTRACT]], 1, 0 +; CLEANUP-CPS-NEXT: [[TRAV_DATA_I_FCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_1_1_EXTRACT]], 1, 1 +; CLEANUP-CPS-NEXT: [[TRAV_DATA_I_FCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_1_INSERT]], float [[SYSTEM_DATA_FCA_1_2_EXTRACT]], 1, 2 +; CLEANUP-CPS-NEXT: [[TRAV_DATA_I_FCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_2_INSERT]], i32 [[SYSTEM_DATA_FCA_1_3_EXTRACT]], 1, 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT350:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2:%.*]] poison, <2 x float> undef, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT5:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_INSERT8:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT5]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_INSERT11:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT8]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_INSERT14:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT11]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_INSERT17:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT14]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_INSERT20:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT17]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_6_INSERT23:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT20]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; CLEANUP-CPS-NEXT: [[DOTFCA_7_INSERT26:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT23]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; CLEANUP-CPS-NEXT: [[DOTFCA_8_INSERT29:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT26]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; CLEANUP-CPS-NEXT: [[DOTFCA_9_INSERT32:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT29]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; CLEANUP-CPS-NEXT: [[DOTFCA_10_INSERT35:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT32]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; CLEANUP-CPS-NEXT: [[DOTFCA_11_INSERT38:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT35]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; CLEANUP-CPS-NEXT: [[DOTFCA_12_INSERT41:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT38]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; CLEANUP-CPS-NEXT: [[DOTFCA_13_INSERT44:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT41]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; CLEANUP-CPS-NEXT: [[DOTFCA_14_INSERT47:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT44]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; CLEANUP-CPS-NEXT: [[DOTFCA_15_INSERT50:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT47]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; CLEANUP-CPS-NEXT: [[DOTFCA_16_INSERT53:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT50]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; CLEANUP-CPS-NEXT: [[DOTFCA_17_INSERT56:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT53]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; CLEANUP-CPS-NEXT: [[DOTFCA_18_INSERT59:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT56]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; CLEANUP-CPS-NEXT: [[DOTFCA_19_INSERT62:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT59]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; CLEANUP-CPS-NEXT: [[DOTFCA_20_INSERT65:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT62]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; CLEANUP-CPS-NEXT: [[DOTFCA_21_INSERT68:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT65]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; CLEANUP-CPS-NEXT: [[DOTFCA_22_INSERT71:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT68]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; CLEANUP-CPS-NEXT: [[DOTFCA_23_INSERT74:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT71]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; CLEANUP-CPS-NEXT: [[DOTFCA_24_INSERT77:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT74]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; CLEANUP-CPS-NEXT: [[DOTFCA_25_INSERT80:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT77]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; CLEANUP-CPS-NEXT: [[DOTFCA_26_INSERT83:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT80]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; CLEANUP-CPS-NEXT: [[DOTFCA_27_INSERT86:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT83]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; CLEANUP-CPS-NEXT: [[DOTFCA_28_INSERT89:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT86]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; CLEANUP-CPS-NEXT: [[DOTFCA_29_INSERT92:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT89]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 +; CLEANUP-CPS-NEXT: [[TMP2:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @MyIntersectionShader2.resume.0) +; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 3, i32 16, {} poison, i64 [[TMP2]], i32 5, float [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2]] [[DOTFCA_0_INSERT350]], [32 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT92]]), !continuation.returnedRegistercount [[META32]], !continuation.registercount [[META32]] +; CLEANUP-CPS-NEXT: unreachable +; CLEANUP-CPS: isEnd.i: +; CLEANUP-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 0 +; CLEANUP-CPS-NEXT: [[TMP3:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; CLEANUP-CPS-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to float +; CLEANUP-CPS-NEXT: [[DOTSROA_0353_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP4]], i32 0 +; CLEANUP-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 1 +; CLEANUP-CPS-NEXT: [[TMP5:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; CLEANUP-CPS-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP5]] to float +; CLEANUP-CPS-NEXT: [[DOTSROA_0353_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0353_0_VEC_INSERT]], float [[TMP6]], i32 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT352:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> [[DOTSROA_0353_4_VEC_INSERT]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_EXTRACT286:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT352]], 0 +; CLEANUP-CPS-NEXT: [[TMP7:%.*]] = bitcast <2 x float> [[DOTFCA_0_EXTRACT286]] to <2 x i32> +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_0_0_VEC_EXPAND:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <3 x i32> +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND:%.*]] = select <3 x i1> , <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VEC_EXPAND]], <3 x i32> [[SYSTEM_DATA_FCA_0_0_0_0_EXTRACT]] +; CLEANUP-CPS-NEXT: [[ISEND_I1:%.*]] = call i1 @opaqueIsEnd() +; CLEANUP-CPS-NEXT: br i1 [[ISEND_I1]], label [[TMP8:%.*]], label [[TMP9:%.*]] +; CLEANUP-CPS: 8: +; CLEANUP-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT289:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND]], 0, 0, 0, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_0_INSERT292:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT289]], <3 x float> [[SYSTEM_DATA_FCA_0_1_0_EXTRACT]], 0, 1, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_1_INSERT295:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT292]], <3 x float> [[SYSTEM_DATA_FCA_0_1_1_EXTRACT]], 0, 1, 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_2_INSERT298:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT295]], float [[SYSTEM_DATA_FCA_0_1_2_EXTRACT]], 0, 1, 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_3_INSERT301:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT298]], i32 [[SYSTEM_DATA_FCA_0_1_3_EXTRACT]], 0, 1, 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_2_INSERT304:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT301]], <3 x float> [[SYSTEM_DATA_FCA_0_2_EXTRACT]], 0, 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_3_INSERT307:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT304]], <3 x float> [[SYSTEM_DATA_FCA_0_3_EXTRACT]], 0, 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_4_INSERT310:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT307]], float [[SYSTEM_DATA_FCA_0_4_EXTRACT]], 0, 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_5_INSERT313:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT310]], i64 [[SYSTEM_DATA_FCA_0_5_EXTRACT]], 0, 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_0_INSERT316:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT313]], <3 x float> [[SYSTEM_DATA_FCA_1_0_EXTRACT]], 1, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_1_INSERT319:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT316]], <3 x float> [[SYSTEM_DATA_FCA_1_1_EXTRACT]], 1, 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_2_INSERT322:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT319]], float [[SYSTEM_DATA_FCA_1_2_EXTRACT]], 1, 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_3_INSERT325:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT322]], i32 [[SYSTEM_DATA_FCA_1_3_EXTRACT]], 1, 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT125:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_INSERT128:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT125]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_INSERT131:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT128]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_INSERT134:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT131]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_INSERT137:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT134]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_INSERT140:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT137]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_6_INSERT143:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT140]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; CLEANUP-CPS-NEXT: [[DOTFCA_7_INSERT146:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT143]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; CLEANUP-CPS-NEXT: [[DOTFCA_8_INSERT149:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT146]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; CLEANUP-CPS-NEXT: [[DOTFCA_9_INSERT152:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT149]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; CLEANUP-CPS-NEXT: [[DOTFCA_10_INSERT155:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT152]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; CLEANUP-CPS-NEXT: [[DOTFCA_11_INSERT158:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT155]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; CLEANUP-CPS-NEXT: [[DOTFCA_12_INSERT161:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT158]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; CLEANUP-CPS-NEXT: [[DOTFCA_13_INSERT164:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT161]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; CLEANUP-CPS-NEXT: [[DOTFCA_14_INSERT167:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT164]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; CLEANUP-CPS-NEXT: [[DOTFCA_15_INSERT170:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT167]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; CLEANUP-CPS-NEXT: [[DOTFCA_16_INSERT173:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT170]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; CLEANUP-CPS-NEXT: [[DOTFCA_17_INSERT176:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT173]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; CLEANUP-CPS-NEXT: [[DOTFCA_18_INSERT179:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT176]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; CLEANUP-CPS-NEXT: [[DOTFCA_19_INSERT182:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT179]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; CLEANUP-CPS-NEXT: [[DOTFCA_20_INSERT185:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT182]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; CLEANUP-CPS-NEXT: [[DOTFCA_21_INSERT188:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT185]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; CLEANUP-CPS-NEXT: [[DOTFCA_22_INSERT191:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT188]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; CLEANUP-CPS-NEXT: [[DOTFCA_23_INSERT194:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT191]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; CLEANUP-CPS-NEXT: [[DOTFCA_24_INSERT197:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT194]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; CLEANUP-CPS-NEXT: [[DOTFCA_25_INSERT200:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT197]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; CLEANUP-CPS-NEXT: [[DOTFCA_26_INSERT203:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT200]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; CLEANUP-CPS-NEXT: [[DOTFCA_27_INSERT206:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT203]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; CLEANUP-CPS-NEXT: [[DOTFCA_28_INSERT209:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT206]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; CLEANUP-CPS-NEXT: [[DOTFCA_29_INSERT212:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT209]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 +; CLEANUP-CPS-NEXT: call void @lgc.cps.free(i32 8) +; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 8, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT325]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT212]]), !continuation.registercount [[META32]] +; CLEANUP-CPS-NEXT: unreachable +; CLEANUP-CPS: 9: +; CLEANUP-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND]], 0, 0, 0, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_1_0_EXTRACT]], 0, 1, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_1_1_EXTRACT]], 0, 1, 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], float [[SYSTEM_DATA_FCA_0_1_2_EXTRACT]], 0, 1, 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT]], i32 [[SYSTEM_DATA_FCA_0_1_3_EXTRACT]], 0, 1, 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_2_EXTRACT]], 0, 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_0_3_EXTRACT]], 0, 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[SYSTEM_DATA_FCA_0_4_EXTRACT]], 0, 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[SYSTEM_DATA_FCA_0_5_EXTRACT]], 0, 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_1_0_EXTRACT]], 1, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_1_1_EXTRACT]], 1, 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[SYSTEM_DATA_FCA_1_2_EXTRACT]], 1, 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[SYSTEM_DATA_FCA_1_3_EXTRACT]], 1, 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; CLEANUP-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; CLEANUP-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; CLEANUP-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; CLEANUP-CPS-NEXT: [[DOTFCA_10_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; CLEANUP-CPS-NEXT: [[DOTFCA_11_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; CLEANUP-CPS-NEXT: [[DOTFCA_12_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; CLEANUP-CPS-NEXT: [[DOTFCA_13_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; CLEANUP-CPS-NEXT: [[DOTFCA_14_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; CLEANUP-CPS-NEXT: [[DOTFCA_15_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; CLEANUP-CPS-NEXT: [[DOTFCA_16_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; CLEANUP-CPS-NEXT: [[DOTFCA_17_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; CLEANUP-CPS-NEXT: [[DOTFCA_18_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; CLEANUP-CPS-NEXT: [[DOTFCA_19_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; CLEANUP-CPS-NEXT: [[DOTFCA_20_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; CLEANUP-CPS-NEXT: [[DOTFCA_21_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; CLEANUP-CPS-NEXT: [[DOTFCA_22_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; CLEANUP-CPS-NEXT: [[DOTFCA_23_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; CLEANUP-CPS-NEXT: [[DOTFCA_24_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; CLEANUP-CPS-NEXT: [[DOTFCA_25_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; CLEANUP-CPS-NEXT: [[DOTFCA_26_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; CLEANUP-CPS-NEXT: [[DOTFCA_27_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; CLEANUP-CPS-NEXT: [[DOTFCA_28_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; CLEANUP-CPS-NEXT: [[DOTFCA_29_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 +; CLEANUP-CPS-NEXT: call void @lgc.cps.free(i32 8) +; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 8, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]), !continuation.registercount [[META32]] +; CLEANUP-CPS-NEXT: unreachable +; +; +; CLEANUP-CPS-LABEL: define dso_local void @MyIntersectionShader2.resume.0( +; CLEANUP-CPS-SAME: {} [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], { [[STRUCT_ANYHITTRAVERSALDATA:%.*]], [8 x i32], [30 x i32] } [[TMP3:%.*]]) !lgc.rt.shaderstage [[META35]] !lgc.cps [[META42]] !continuation [[META44]] { +; CLEANUP-CPS-NEXT: entryresume.0: +; CLEANUP-CPS-NEXT: [[TMP4:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 8) +; CLEANUP-CPS-NEXT: [[TMP5:%.*]] = extractvalue { [[STRUCT_ANYHITTRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP3]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_6_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 6 +; CLEANUP-CPS-NEXT: [[DOTFCA_7_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 7 +; CLEANUP-CPS-NEXT: [[DOTFCA_8_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 8 +; CLEANUP-CPS-NEXT: [[DOTFCA_9_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 9 +; CLEANUP-CPS-NEXT: [[DOTFCA_10_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 10 +; CLEANUP-CPS-NEXT: [[DOTFCA_11_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 11 +; CLEANUP-CPS-NEXT: [[DOTFCA_12_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 12 +; CLEANUP-CPS-NEXT: [[DOTFCA_13_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 13 +; CLEANUP-CPS-NEXT: [[DOTFCA_14_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 14 +; CLEANUP-CPS-NEXT: [[DOTFCA_15_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 15 +; CLEANUP-CPS-NEXT: [[DOTFCA_16_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 16 +; CLEANUP-CPS-NEXT: [[DOTFCA_17_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 17 +; CLEANUP-CPS-NEXT: [[DOTFCA_18_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 18 +; CLEANUP-CPS-NEXT: [[DOTFCA_19_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 19 +; CLEANUP-CPS-NEXT: [[DOTFCA_20_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 20 +; CLEANUP-CPS-NEXT: [[DOTFCA_21_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 21 +; CLEANUP-CPS-NEXT: [[DOTFCA_22_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 22 +; CLEANUP-CPS-NEXT: [[DOTFCA_23_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 23 +; CLEANUP-CPS-NEXT: [[DOTFCA_24_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 24 +; CLEANUP-CPS-NEXT: [[DOTFCA_25_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 25 +; CLEANUP-CPS-NEXT: [[DOTFCA_26_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 26 +; CLEANUP-CPS-NEXT: [[DOTFCA_27_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 27 +; CLEANUP-CPS-NEXT: [[DOTFCA_28_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 28 +; CLEANUP-CPS-NEXT: [[DOTFCA_29_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 29 +; CLEANUP-CPS-NEXT: [[TMP6:%.*]] = extractvalue { [[STRUCT_ANYHITTRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP3]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 0, 0, 0, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 0, 1, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 0, 1, 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 0, 1, 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 0, 1, 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 0, 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 0, 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_4_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 0, 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_5_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 0, 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 1, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 1, 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_2_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 1, 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_3_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP6]], 1, 3 +; CLEANUP-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; CLEANUP-CPS-NEXT: [[ISEND_I1:%.*]] = call i1 @opaqueIsEnd() +; CLEANUP-CPS-NEXT: br i1 [[ISEND_I1]], label [[TMP7:%.*]], label [[TMP8:%.*]] +; CLEANUP-CPS: 7: +; CLEANUP-CPS-NEXT: [[RETURNADDR_RELOAD_ADDR1:%.*]] = getelementptr inbounds [[MYINTERSECTIONSHADER2_FRAME:%.*]], ptr addrspace(32) [[TMP4]], i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[RETURNADDR_RELOAD2:%.*]] = load i32, ptr addrspace(32) [[RETURNADDR_RELOAD_ADDR1]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT289:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]], 0, 0, 0, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_0_INSERT292:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT289]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_1_INSERT295:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT292]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_2_INSERT298:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT295]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_3_INSERT301:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT298]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_2_INSERT304:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT301]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_3_INSERT307:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT304]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_4_INSERT310:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT307]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_5_INSERT313:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT310]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_0_INSERT316:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT313]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_1_INSERT319:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT316]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_2_INSERT322:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT319]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_3_INSERT325:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT322]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT125:%.*]] = insertvalue [30 x i32] poison, i32 [[DOTFCA_0_EXTRACT]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_INSERT128:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT125]], i32 [[DOTFCA_1_EXTRACT]], 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_INSERT131:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT128]], i32 [[DOTFCA_2_EXTRACT]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_INSERT134:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT131]], i32 [[DOTFCA_3_EXTRACT]], 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_INSERT137:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT134]], i32 [[DOTFCA_4_EXTRACT]], 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_INSERT140:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT137]], i32 [[DOTFCA_5_EXTRACT]], 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_6_INSERT143:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT140]], i32 [[DOTFCA_6_EXTRACT]], 6 +; CLEANUP-CPS-NEXT: [[DOTFCA_7_INSERT146:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT143]], i32 [[DOTFCA_7_EXTRACT]], 7 +; CLEANUP-CPS-NEXT: [[DOTFCA_8_INSERT149:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT146]], i32 [[DOTFCA_8_EXTRACT]], 8 +; CLEANUP-CPS-NEXT: [[DOTFCA_9_INSERT152:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT149]], i32 [[DOTFCA_9_EXTRACT]], 9 +; CLEANUP-CPS-NEXT: [[DOTFCA_10_INSERT155:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT152]], i32 [[DOTFCA_10_EXTRACT]], 10 +; CLEANUP-CPS-NEXT: [[DOTFCA_11_INSERT158:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT155]], i32 [[DOTFCA_11_EXTRACT]], 11 +; CLEANUP-CPS-NEXT: [[DOTFCA_12_INSERT161:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT158]], i32 [[DOTFCA_12_EXTRACT]], 12 +; CLEANUP-CPS-NEXT: [[DOTFCA_13_INSERT164:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT161]], i32 [[DOTFCA_13_EXTRACT]], 13 +; CLEANUP-CPS-NEXT: [[DOTFCA_14_INSERT167:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT164]], i32 [[DOTFCA_14_EXTRACT]], 14 +; CLEANUP-CPS-NEXT: [[DOTFCA_15_INSERT170:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT167]], i32 [[DOTFCA_15_EXTRACT]], 15 +; CLEANUP-CPS-NEXT: [[DOTFCA_16_INSERT173:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT170]], i32 [[DOTFCA_16_EXTRACT]], 16 +; CLEANUP-CPS-NEXT: [[DOTFCA_17_INSERT176:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT173]], i32 [[DOTFCA_17_EXTRACT]], 17 +; CLEANUP-CPS-NEXT: [[DOTFCA_18_INSERT179:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT176]], i32 [[DOTFCA_18_EXTRACT]], 18 +; CLEANUP-CPS-NEXT: [[DOTFCA_19_INSERT182:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT179]], i32 [[DOTFCA_19_EXTRACT]], 19 +; CLEANUP-CPS-NEXT: [[DOTFCA_20_INSERT185:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT182]], i32 [[DOTFCA_20_EXTRACT]], 20 +; CLEANUP-CPS-NEXT: [[DOTFCA_21_INSERT188:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT185]], i32 [[DOTFCA_21_EXTRACT]], 21 +; CLEANUP-CPS-NEXT: [[DOTFCA_22_INSERT191:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT188]], i32 [[DOTFCA_22_EXTRACT]], 22 +; CLEANUP-CPS-NEXT: [[DOTFCA_23_INSERT194:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT191]], i32 [[DOTFCA_23_EXTRACT]], 23 +; CLEANUP-CPS-NEXT: [[DOTFCA_24_INSERT197:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT194]], i32 [[DOTFCA_24_EXTRACT]], 24 +; CLEANUP-CPS-NEXT: [[DOTFCA_25_INSERT200:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT197]], i32 [[DOTFCA_25_EXTRACT]], 25 +; CLEANUP-CPS-NEXT: [[DOTFCA_26_INSERT203:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT200]], i32 [[DOTFCA_26_EXTRACT]], 26 +; CLEANUP-CPS-NEXT: [[DOTFCA_27_INSERT206:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT203]], i32 [[DOTFCA_27_EXTRACT]], 27 +; CLEANUP-CPS-NEXT: [[DOTFCA_28_INSERT209:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT206]], i32 [[DOTFCA_28_EXTRACT]], 28 +; CLEANUP-CPS-NEXT: [[DOTFCA_29_INSERT212:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT209]], i32 [[DOTFCA_29_EXTRACT]], 29 +; CLEANUP-CPS-NEXT: call void @lgc.cps.free(i32 8) +; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR_RELOAD2]], i32 8, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT325]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT212]]), !continuation.registercount [[META32]] +; CLEANUP-CPS-NEXT: unreachable +; CLEANUP-CPS: 8: +; CLEANUP-CPS-NEXT: [[RETURNADDR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[MYINTERSECTIONSHADER2_FRAME]], ptr addrspace(32) [[TMP4]], i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i32, ptr addrspace(32) [[RETURNADDR_RELOAD_ADDR]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]], 0, 0, 0, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <3 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], <3 x float> [[DOTFCA_0_1_1_EXTRACT]], 0, 1, 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], float [[DOTFCA_0_1_2_EXTRACT]], 0, 1, 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_2_INSERT]], i32 [[DOTFCA_0_1_3_EXTRACT]], 0, 1, 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_3_INSERT]], <3 x float> [[DOTFCA_0_2_EXTRACT]], 0, 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_EXTRACT]], 0, 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[DOTFCA_0_4_EXTRACT]], 0, 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[DOTFCA_0_5_EXTRACT]], 0, 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], <3 x float> [[DOTFCA_1_0_EXTRACT]], 1, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], <3 x float> [[DOTFCA_1_1_EXTRACT]], 1, 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], float [[DOTFCA_1_2_EXTRACT]], 1, 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_2_INSERT]], i32 [[DOTFCA_1_3_EXTRACT]], 1, 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [30 x i32] poison, i32 [[DOTFCA_0_EXTRACT]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT]], i32 [[DOTFCA_1_EXTRACT]], 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT]], i32 [[DOTFCA_2_EXTRACT]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT]], i32 [[DOTFCA_3_EXTRACT]], 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT]], i32 [[DOTFCA_4_EXTRACT]], 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT]], i32 [[DOTFCA_5_EXTRACT]], 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT]], i32 [[DOTFCA_6_EXTRACT]], 6 +; CLEANUP-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT]], i32 [[DOTFCA_7_EXTRACT]], 7 +; CLEANUP-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT]], i32 [[DOTFCA_8_EXTRACT]], 8 +; CLEANUP-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT]], i32 [[DOTFCA_9_EXTRACT]], 9 +; CLEANUP-CPS-NEXT: [[DOTFCA_10_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT]], i32 [[DOTFCA_10_EXTRACT]], 10 +; CLEANUP-CPS-NEXT: [[DOTFCA_11_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT]], i32 [[DOTFCA_11_EXTRACT]], 11 +; CLEANUP-CPS-NEXT: [[DOTFCA_12_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT]], i32 [[DOTFCA_12_EXTRACT]], 12 +; CLEANUP-CPS-NEXT: [[DOTFCA_13_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT]], i32 [[DOTFCA_13_EXTRACT]], 13 +; CLEANUP-CPS-NEXT: [[DOTFCA_14_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT]], i32 [[DOTFCA_14_EXTRACT]], 14 +; CLEANUP-CPS-NEXT: [[DOTFCA_15_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT]], i32 [[DOTFCA_15_EXTRACT]], 15 +; CLEANUP-CPS-NEXT: [[DOTFCA_16_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT]], i32 [[DOTFCA_16_EXTRACT]], 16 +; CLEANUP-CPS-NEXT: [[DOTFCA_17_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT]], i32 [[DOTFCA_17_EXTRACT]], 17 +; CLEANUP-CPS-NEXT: [[DOTFCA_18_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT]], i32 [[DOTFCA_18_EXTRACT]], 18 +; CLEANUP-CPS-NEXT: [[DOTFCA_19_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT]], i32 [[DOTFCA_19_EXTRACT]], 19 +; CLEANUP-CPS-NEXT: [[DOTFCA_20_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT]], i32 [[DOTFCA_20_EXTRACT]], 20 +; CLEANUP-CPS-NEXT: [[DOTFCA_21_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT]], i32 [[DOTFCA_21_EXTRACT]], 21 +; CLEANUP-CPS-NEXT: [[DOTFCA_22_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT]], i32 [[DOTFCA_22_EXTRACT]], 22 +; CLEANUP-CPS-NEXT: [[DOTFCA_23_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT]], i32 [[DOTFCA_23_EXTRACT]], 23 +; CLEANUP-CPS-NEXT: [[DOTFCA_24_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT]], i32 [[DOTFCA_24_EXTRACT]], 24 +; CLEANUP-CPS-NEXT: [[DOTFCA_25_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT]], i32 [[DOTFCA_25_EXTRACT]], 25 +; CLEANUP-CPS-NEXT: [[DOTFCA_26_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT]], i32 [[DOTFCA_26_EXTRACT]], 26 +; CLEANUP-CPS-NEXT: [[DOTFCA_27_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT]], i32 [[DOTFCA_27_EXTRACT]], 27 +; CLEANUP-CPS-NEXT: [[DOTFCA_28_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT]], i32 [[DOTFCA_28_EXTRACT]], 28 +; CLEANUP-CPS-NEXT: [[DOTFCA_29_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT]], i32 [[DOTFCA_29_EXTRACT]], 29 +; CLEANUP-CPS-NEXT: call void @lgc.cps.free(i32 8) +; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR_RELOAD]], i32 8, {} poison, i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]), !continuation.registercount [[META32]] +; CLEANUP-CPS-NEXT: unreachable +; +; +; CLEANUP-CPS-LABEL: define void @MyMissShader( +; CLEANUP-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURNADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [33 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META40]] !lgc.cps [[META38]] !continuation [[META45:![0-9]+]] { +; CLEANUP-CPS-NEXT: AllocaSpillBB: +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 0 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 1 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 2 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 3 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 4 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 5 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 6 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 7 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 8 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 9 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[SYSTEM_DATA]], 0, 0 +; CLEANUP-CPS-NEXT: [[TMP0:%.*]] = bitcast i32 [[PAYLOAD_FCA_0_EXTRACT]] to float +; CLEANUP-CPS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> undef, float [[TMP0]], i32 0 +; CLEANUP-CPS-NEXT: [[TMP1:%.*]] = bitcast i32 [[PAYLOAD_FCA_7_EXTRACT]] to float +; CLEANUP-CPS-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP1]], i32 1 +; CLEANUP-CPS-NEXT: [[TMP2:%.*]] = bitcast i32 [[PAYLOAD_FCA_8_EXTRACT]] to float +; CLEANUP-CPS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP2]], i32 2 +; CLEANUP-CPS-NEXT: [[TMP3:%.*]] = bitcast i32 [[PAYLOAD_FCA_9_EXTRACT]] to float +; CLEANUP-CPS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP3]], i32 3 +; CLEANUP-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) +; CLEANUP-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 0 +; CLEANUP-CPS-NEXT: [[TMP4:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; CLEANUP-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 1 +; CLEANUP-CPS-NEXT: [[TMP5:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; CLEANUP-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 2 +; CLEANUP-CPS-NEXT: [[TMP6:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 +; CLEANUP-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 3 +; CLEANUP-CPS-NEXT: [[TMP7:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT9:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison, <3 x i32> [[SYSTEM_DATA_FCA_0_0_EXTRACT]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP4]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; CLEANUP-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT]], i32 [[TMP5]], 7 +; CLEANUP-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT]], i32 [[TMP6]], 8 +; CLEANUP-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT]], i32 [[TMP7]], 9 +; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 6, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT9]], [33 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]), !continuation.registercount [[META33]] +; CLEANUP-CPS-NEXT: unreachable +; +; ; POSTPROCESS-CPS-LABEL: define i32 @_cont_GetContinuationStackAddr( ; POSTPROCESS-CPS-SAME: ) #[[ATTR0:[0-9]+]] { ; POSTPROCESS-CPS-NEXT: ret i32 0 @@ -4444,7 +5009,7 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; POSTPROCESS-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT]], i32 [[TMP9]], 8 ; POSTPROCESS-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT]], i32 [[TMP10]], 9 ; POSTPROCESS-CPS-NEXT: [[TMP13:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 4, i32 [[TMP13]], i64 [[TMP8]], i32 5, [36 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]), !continuation.returnedRegistercount [[META33:![0-9]+]], !continuation.registercount [[META33]] +; POSTPROCESS-CPS-NEXT: call void (...) @lgc.ilcps.continue(i64 4, i32 [[TMP13]], i64 [[TMP8]], i32 5, [36 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]) ; POSTPROCESS-CPS-NEXT: unreachable ; ; @@ -4466,16 +5031,16 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; POSTPROCESS-CPS-NEXT: [[DOTFCA_7_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 7 ; POSTPROCESS-CPS-NEXT: [[DOTFCA_8_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 8 ; POSTPROCESS-CPS-NEXT: [[DOTFCA_9_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 9 -; POSTPROCESS-CPS-NEXT: [[TMP6:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [33 x i32], [10 x i32] } [[TMP3]], 0 -; POSTPROCESS-CPS-NEXT: [[TMP7:%.*]] = bitcast i32 [[DOTFCA_0_EXTRACT]] to float -; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> poison, float [[TMP7]], i32 0 -; POSTPROCESS-CPS-NEXT: [[TMP8:%.*]] = bitcast i32 [[DOTFCA_7_EXTRACT]] to float -; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP8]], i32 1 -; POSTPROCESS-CPS-NEXT: [[TMP9:%.*]] = bitcast i32 [[DOTFCA_8_EXTRACT]] to float -; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP9]], i32 2 -; POSTPROCESS-CPS-NEXT: [[TMP10:%.*]] = bitcast i32 [[DOTFCA_9_EXTRACT]] to float -; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP10]], i32 3 -; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT21:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP6]], 0 +; POSTPROCESS-CPS-NEXT: [[TMP6:%.*]] = bitcast i32 [[DOTFCA_0_EXTRACT]] to float +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> poison, float [[TMP6]], i32 0 +; POSTPROCESS-CPS-NEXT: [[TMP7:%.*]] = bitcast i32 [[DOTFCA_7_EXTRACT]] to float +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP7]], i32 1 +; POSTPROCESS-CPS-NEXT: [[TMP8:%.*]] = bitcast i32 [[DOTFCA_8_EXTRACT]] to float +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP8]], i32 2 +; POSTPROCESS-CPS-NEXT: [[TMP9:%.*]] = bitcast i32 [[DOTFCA_9_EXTRACT]] to float +; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP9]], i32 3 +; POSTPROCESS-CPS-NEXT: [[TMP10:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [33 x i32], [10 x i32] } [[TMP3]], 0 +; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT21:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP10]], 0 ; POSTPROCESS-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; POSTPROCESS-CPS-NEXT: [[TMP11:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 ; POSTPROCESS-CPS-NEXT: [[TMP12:%.*]] = getelementptr inbounds { [[STRUCT_DISPATCHSYSTEMDATA]], [33 x i32], [10 x i32] }, ptr [[TMP4]], i32 0, i32 0 @@ -4576,12 +5141,9 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; POSTPROCESS-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT]], i32 [[TMP18]], 7 ; POSTPROCESS-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT]], i32 [[TMP19]], 8 ; POSTPROCESS-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT]], i32 [[TMP20]], 9 -; POSTPROCESS-CPS-NEXT: [[TMP22:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: [[TMP23:%.*]] = add i32 [[TMP22]], 0 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP23]], ptr [[CSP]], align 4 ; POSTPROCESS-CPS-NEXT: [[TMP24:%.*]] = zext i32 [[RETURNADDR]] to i64 ; POSTPROCESS-CPS-NEXT: [[TMP25:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP24]], i32 [[TMP25]], i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT10]], [33 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]), !continuation.registercount [[META33]] +; POSTPROCESS-CPS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[TMP24]], i32 [[TMP25]], i64 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT10]], [33 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]) ; POSTPROCESS-CPS-NEXT: unreachable ; ; @@ -4803,14 +5365,11 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; POSTPROCESS-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT]], i32 [[TMP20]], 7 ; POSTPROCESS-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT]], i32 [[TMP21]], 8 ; POSTPROCESS-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT]], i32 [[TMP22]], 9 -; POSTPROCESS-CPS-NEXT: [[TMP28:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: [[TMP29:%.*]] = add i32 [[TMP28]], 0 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP29]], ptr [[CSP]], align 4 ; POSTPROCESS-CPS-NEXT: [[TMP30:%.*]] = zext i32 [[RETURNADDR]] to i64 ; POSTPROCESS-CPS-NEXT: [[TMP31:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP30]], i32 [[TMP31]], i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]], [8 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]), !continuation.registercount [[META33]] +; POSTPROCESS-CPS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[TMP30]], i32 [[TMP31]], i64 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]], [8 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]) ; POSTPROCESS-CPS-NEXT: unreachable -; POSTPROCESS-CPS: 32: +; POSTPROCESS-CPS: 30: ; POSTPROCESS-CPS-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 ; POSTPROCESS-CPS-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP33]]) ; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT25:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 0 @@ -4883,18 +5442,15 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; POSTPROCESS-CPS-NEXT: [[DOTFCA_7_INSERT82:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT79]], i32 [[TMP35]], 7 ; POSTPROCESS-CPS-NEXT: [[DOTFCA_8_INSERT85:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT82]], i32 [[TMP36]], 8 ; POSTPROCESS-CPS-NEXT: [[DOTFCA_9_INSERT88:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT85]], i32 [[TMP37]], 9 -; POSTPROCESS-CPS-NEXT: [[TMP43:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: [[TMP44:%.*]] = add i32 [[TMP43]], 0 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP44]], ptr [[CSP]], align 4 ; POSTPROCESS-CPS-NEXT: [[TMP45:%.*]] = zext i32 [[RETURNADDR]] to i64 ; POSTPROCESS-CPS-NEXT: [[TMP46:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP45]], i32 [[TMP46]], i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT263]], [8 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT88]]), !continuation.registercount [[META33]] +; POSTPROCESS-CPS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[TMP45]], i32 [[TMP46]], i64 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT263]], [8 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT88]]) ; POSTPROCESS-CPS-NEXT: unreachable -; POSTPROCESS-CPS: 47: +; POSTPROCESS-CPS: 43: ; POSTPROCESS-CPS-NEXT: br i1 [[TMP15]], label [[TMP48:%.*]], label [[TMP75:%.*]] -; POSTPROCESS-CPS: 48: +; POSTPROCESS-CPS: 44: ; POSTPROCESS-CPS-NEXT: br i1 [[TMP14]], label [[TMP49:%.*]], label [[TMP62:%.*]] -; POSTPROCESS-CPS: 49: +; POSTPROCESS-CPS: 45: ; POSTPROCESS-CPS-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 ; POSTPROCESS-CPS-NEXT: call void @_cont_IgnoreHit(ptr [[TMP50]]) ; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT27:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 0 @@ -4963,14 +5519,11 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; POSTPROCESS-CPS-NEXT: [[DOTFCA_7_INSERT112:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT109]], i32 [[TMP52]], 7 ; POSTPROCESS-CPS-NEXT: [[DOTFCA_8_INSERT115:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT112]], i32 [[TMP53]], 8 ; POSTPROCESS-CPS-NEXT: [[DOTFCA_9_INSERT118:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT115]], i32 [[TMP54]], 9 -; POSTPROCESS-CPS-NEXT: [[TMP58:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: [[TMP59:%.*]] = add i32 [[TMP58]], 0 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP59]], ptr [[CSP]], align 4 ; POSTPROCESS-CPS-NEXT: [[TMP60:%.*]] = zext i32 [[RETURNADDR]] to i64 ; POSTPROCESS-CPS-NEXT: [[TMP61:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP60]], i32 [[TMP61]], i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT304]], [8 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT118]]), !continuation.registercount [[META33]] +; POSTPROCESS-CPS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[TMP60]], i32 [[TMP61]], i64 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT304]], [8 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT118]]) ; POSTPROCESS-CPS-NEXT: unreachable -; POSTPROCESS-CPS: 62: +; POSTPROCESS-CPS: 56: ; POSTPROCESS-CPS-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 ; POSTPROCESS-CPS-NEXT: call void @_cont_IgnoreHit(ptr [[TMP63]]) ; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT29:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 0 @@ -5039,14 +5592,11 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; POSTPROCESS-CPS-NEXT: [[DOTFCA_7_INSERT142:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT139]], i32 [[TMP65]], 7 ; POSTPROCESS-CPS-NEXT: [[DOTFCA_8_INSERT145:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT142]], i32 [[TMP66]], 8 ; POSTPROCESS-CPS-NEXT: [[DOTFCA_9_INSERT148:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT145]], i32 [[TMP67]], 9 -; POSTPROCESS-CPS-NEXT: [[TMP71:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: [[TMP72:%.*]] = add i32 [[TMP71]], 0 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP72]], ptr [[CSP]], align 4 ; POSTPROCESS-CPS-NEXT: [[TMP73:%.*]] = zext i32 [[RETURNADDR]] to i64 ; POSTPROCESS-CPS-NEXT: [[TMP74:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP73]], i32 [[TMP74]], i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT345]], [8 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT148]]), !continuation.registercount [[META33]] +; POSTPROCESS-CPS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[TMP73]], i32 [[TMP74]], i64 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT345]], [8 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT148]]) ; POSTPROCESS-CPS-NEXT: unreachable -; POSTPROCESS-CPS: 75: +; POSTPROCESS-CPS: 67: ; POSTPROCESS-CPS-NEXT: call void @_cont_AcceptHit(ptr [[SYSTEM_DATA_ALLOCA]]) ; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT31:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 0 ; POSTPROCESS-CPS-NEXT: [[TMP76:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT31]] to i32 @@ -5118,12 +5668,9 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; POSTPROCESS-CPS-NEXT: [[DOTFCA_7_INSERT172:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT169]], i32 [[TMP77]], 7 ; POSTPROCESS-CPS-NEXT: [[DOTFCA_8_INSERT175:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT172]], i32 [[TMP78]], 8 ; POSTPROCESS-CPS-NEXT: [[DOTFCA_9_INSERT178:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT175]], i32 [[TMP79]], 9 -; POSTPROCESS-CPS-NEXT: [[TMP85:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: [[TMP86:%.*]] = add i32 [[TMP85]], 0 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP86]], ptr [[CSP]], align 4 ; POSTPROCESS-CPS-NEXT: [[TMP87:%.*]] = zext i32 [[RETURNADDR]] to i64 ; POSTPROCESS-CPS-NEXT: [[TMP88:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP87]], i32 [[TMP88]], i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT386]], [8 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT178]]), !continuation.registercount [[META33]] +; POSTPROCESS-CPS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[TMP87]], i32 [[TMP88]], i64 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT386]], [8 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT178]]) ; POSTPROCESS-CPS-NEXT: unreachable ; ; @@ -5243,7 +5790,7 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; POSTPROCESS-CPS-NEXT: [[DOTFCA_29_INSERT92:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT89]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 ; POSTPROCESS-CPS-NEXT: [[TMP7:%.*]] = call i64 @continuation.getAddrAndMD(ptr @MyIntersectionShader.resume.0) ; POSTPROCESS-CPS-NEXT: [[TMP6:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 3, i32 [[TMP6]], i64 [[TMP7]], i32 5, float [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT350]], [32 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT92]]), !continuation.returnedRegistercount [[META32:![0-9]+]], !continuation.registercount [[META32]] +; POSTPROCESS-CPS-NEXT: call void (...) @lgc.ilcps.continue(i64 3, i32 [[TMP6]], i64 [[TMP7]], i32 5, float [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT350]], [32 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT92]]) ; POSTPROCESS-CPS-NEXT: unreachable ; POSTPROCESS-CPS: isEnd.i: ; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 0 @@ -5310,7 +5857,7 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; POSTPROCESS-CPS-NEXT: store i32 [[TMP15]], ptr [[CSP]], align 4 ; POSTPROCESS-CPS-NEXT: [[TMP16:%.*]] = zext i32 [[RETURNADDR]] to i64 ; POSTPROCESS-CPS-NEXT: [[TMP17:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP16]], i32 [[TMP17]], i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT325]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT212]]), !continuation.registercount [[META32]] +; POSTPROCESS-CPS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[TMP16]], i32 [[TMP17]], i64 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT325]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT212]]) ; POSTPROCESS-CPS-NEXT: unreachable ; POSTPROCESS-CPS: 18: ; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND]], 0, 0, 0, 0 @@ -5361,7 +5908,7 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; POSTPROCESS-CPS-NEXT: store i32 [[TMP20]], ptr [[CSP]], align 4 ; POSTPROCESS-CPS-NEXT: [[TMP21:%.*]] = zext i32 [[RETURNADDR]] to i64 ; POSTPROCESS-CPS-NEXT: [[TMP22:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP21]], i32 [[TMP22]], i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]), !continuation.registercount [[META32]] +; POSTPROCESS-CPS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[TMP21]], i32 [[TMP22]], i64 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]) ; POSTPROCESS-CPS-NEXT: unreachable ; ; @@ -5472,7 +6019,7 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; POSTPROCESS-CPS-NEXT: store i32 [[TMP12]], ptr [[CSP]], align 4 ; POSTPROCESS-CPS-NEXT: [[TMP13:%.*]] = zext i32 [[RETURN_ADDR_RELOAD2]] to i64 ; POSTPROCESS-CPS-NEXT: [[TMP14:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP13]], i32 [[TMP14]], i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT325]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT212]]), !continuation.registercount [[META32]] +; POSTPROCESS-CPS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[TMP13]], i32 [[TMP14]], i64 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT325]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT212]]) ; POSTPROCESS-CPS-NEXT: unreachable ; POSTPROCESS-CPS: 15: ; POSTPROCESS-CPS-NEXT: [[TMP16:%.*]] = inttoptr i32 [[TMP5]] to ptr addrspace(21) @@ -5526,7 +6073,7 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; POSTPROCESS-CPS-NEXT: store i32 [[TMP19]], ptr [[CSP]], align 4 ; POSTPROCESS-CPS-NEXT: [[TMP20:%.*]] = zext i32 [[RETURN_ADDR_RELOAD]] to i64 ; POSTPROCESS-CPS-NEXT: [[TMP21:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP20]], i32 [[TMP21]], i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]), !continuation.registercount [[META32]] +; POSTPROCESS-CPS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[TMP20]], i32 [[TMP21]], i64 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]) ; POSTPROCESS-CPS-NEXT: unreachable ; ; @@ -5646,7 +6193,7 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; POSTPROCESS-CPS-NEXT: [[DOTFCA_29_INSERT92:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT89]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 ; POSTPROCESS-CPS-NEXT: [[TMP7:%.*]] = call i64 @continuation.getAddrAndMD(ptr @MyIntersectionShader2.resume.0) ; POSTPROCESS-CPS-NEXT: [[TMP6:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 3, i32 [[TMP6]], i64 [[TMP7]], i32 5, float [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2]] [[DOTFCA_0_INSERT350]], [32 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT92]]), !continuation.returnedRegistercount [[META32]], !continuation.registercount [[META32]] +; POSTPROCESS-CPS-NEXT: call void (...) @lgc.ilcps.continue(i64 3, i32 [[TMP6]], i64 [[TMP7]], i32 5, float [[RES_I_FCA_3_INSERT_FCA_2_EXTRACT]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES2]] [[DOTFCA_0_INSERT350]], [32 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT92]]) ; POSTPROCESS-CPS-NEXT: unreachable ; POSTPROCESS-CPS: isEnd.i: ; POSTPROCESS-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 0 @@ -5713,7 +6260,7 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; POSTPROCESS-CPS-NEXT: store i32 [[TMP15]], ptr [[CSP]], align 4 ; POSTPROCESS-CPS-NEXT: [[TMP16:%.*]] = zext i32 [[RETURNADDR]] to i64 ; POSTPROCESS-CPS-NEXT: [[TMP17:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP16]], i32 [[TMP17]], i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT325]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT212]]), !continuation.registercount [[META32]] +; POSTPROCESS-CPS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[TMP16]], i32 [[TMP17]], i64 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT325]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT212]]) ; POSTPROCESS-CPS-NEXT: unreachable ; POSTPROCESS-CPS: 18: ; POSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_ALLOCA_SROA_0_0_VECBLEND]], 0, 0, 0, 0 @@ -5764,7 +6311,7 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; POSTPROCESS-CPS-NEXT: store i32 [[TMP20]], ptr [[CSP]], align 4 ; POSTPROCESS-CPS-NEXT: [[TMP21:%.*]] = zext i32 [[RETURNADDR]] to i64 ; POSTPROCESS-CPS-NEXT: [[TMP22:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP21]], i32 [[TMP22]], i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]), !continuation.registercount [[META32]] +; POSTPROCESS-CPS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[TMP21]], i32 [[TMP22]], i64 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]) ; POSTPROCESS-CPS-NEXT: unreachable ; ; @@ -5875,7 +6422,7 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; POSTPROCESS-CPS-NEXT: store i32 [[TMP12]], ptr [[CSP]], align 4 ; POSTPROCESS-CPS-NEXT: [[TMP13:%.*]] = zext i32 [[RETURN_ADDR_RELOAD2]] to i64 ; POSTPROCESS-CPS-NEXT: [[TMP14:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP13]], i32 [[TMP14]], i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT325]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT212]]), !continuation.registercount [[META32]] +; POSTPROCESS-CPS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[TMP13]], i32 [[TMP14]], i64 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT325]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT212]]) ; POSTPROCESS-CPS-NEXT: unreachable ; POSTPROCESS-CPS: 15: ; POSTPROCESS-CPS-NEXT: [[TMP16:%.*]] = inttoptr i32 [[TMP5]] to ptr addrspace(21) @@ -5929,7 +6476,7 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; POSTPROCESS-CPS-NEXT: store i32 [[TMP19]], ptr [[CSP]], align 4 ; POSTPROCESS-CPS-NEXT: [[TMP20:%.*]] = zext i32 [[RETURN_ADDR_RELOAD]] to i64 ; POSTPROCESS-CPS-NEXT: [[TMP21:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP20]], i32 [[TMP21]], i32 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]), !continuation.registercount [[META32]] +; POSTPROCESS-CPS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[TMP20]], i32 [[TMP21]], i64 poison, i32 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_3_INSERT]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]) ; POSTPROCESS-CPS-NEXT: unreachable ; ; @@ -5977,11 +6524,8 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; POSTPROCESS-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT]], i32 [[TMP5]], 7 ; POSTPROCESS-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT]], i32 [[TMP6]], 8 ; POSTPROCESS-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT]], i32 [[TMP7]], 9 -; POSTPROCESS-CPS-NEXT: [[TMP8:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], 0 -; POSTPROCESS-CPS-NEXT: store i32 [[TMP9]], ptr [[CSP]], align 4 ; POSTPROCESS-CPS-NEXT: [[TMP10:%.*]] = zext i32 [[RETURNADDR]] to i64 ; POSTPROCESS-CPS-NEXT: [[TMP11:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP10]], i32 [[TMP11]], i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT9]], [33 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]), !continuation.registercount [[META33]] +; POSTPROCESS-CPS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[TMP10]], i32 [[TMP11]], i64 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT9]], [33 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]) ; POSTPROCESS-CPS-NEXT: unreachable ; diff --git a/llvmraytracing/test/dx/paq-hit-attribute-size.ll b/llvmraytracing/test/dx/paq-hit-attribute-size.ll index 37da0d7d2b..745733c833 100644 --- a/llvmraytracing/test/dx/paq-hit-attribute-size.ll +++ b/llvmraytracing/test/dx/paq-hit-attribute-size.ll @@ -1,17 +1,20 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 ; Test payload serialization layouts in presence of different max hit attribute ; size metadata. ; ; Default run checking serialization layouts and their usage: -; RUN: grep -v 'NOT-1' %s | opt -debug-only=lower-raytracing-pipeline --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,remove-types-metadata' -S --lint-abort-on-error 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-MAX-1 -; RUN: grep -v 'NOT-2' %s | opt -debug-only=lower-raytracing-pipeline --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,remove-types-metadata' -S --lint-abort-on-error 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-MAX-2 -; RUN: grep -v 'NOT-4' %s | opt -debug-only=lower-raytracing-pipeline --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,remove-types-metadata' -S --lint-abort-on-error 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-MAX-4 -; RUN: grep -v 'NOT-8' %s | opt -debug-only=lower-raytracing-pipeline --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,remove-types-metadata' -S --lint-abort-on-error 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-MAX-8 - -; Check that hit attributes violating the max size (here: 2 Dwords, set by removing lines containing NOT-2) are detected and crash: -; RUN: grep -v 'NOT-INVALID' %s | not --crash opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,remove-types-metadata' -S --lint-abort-on-error 2>&1 | FileCheck %s --check-prefix INVALID +; RUN: grep -v 'NOT-1' %s | opt -debug-only=lower-raytracing-pipeline --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,continuations-lint,remove-types-metadata' -S --lint-abort-on-error 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-MAX-1 +; RUN: grep -v 'NOT-2' %s | opt -debug-only=lower-raytracing-pipeline --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,continuations-lint,remove-types-metadata' -S --lint-abort-on-error 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-MAX-2 +; RUN: grep -v 'NOT-4' %s | opt -debug-only=lower-raytracing-pipeline --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,continuations-lint,remove-types-metadata' -S --lint-abort-on-error 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-MAX-4 +; RUN: grep -v 'NOT-8' %s | opt -debug-only=lower-raytracing-pipeline --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,continuations-lint,remove-types-metadata' -S --lint-abort-on-error 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-MAX-8 + +; Check that hit attributes violating the max size (here: 2 Dwords, set by removing lines containing NOT-2) are detected and crash. +; Note: The padding computation will fail before the actual hit attribute check in copyHitAttributes, because we are using more-than-expected storage +; for the hit attributes. So, we only check for an assertion to occur. +; RUN: grep -v 'NOT-INVALID' %s | not --crash opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,continuations-lint,remove-types-metadata' -S --lint-abort-on-error 2>&1 | FileCheck %s --check-prefix INVALID ; REQUIRES: assertions -; INVALID: Hit attributes are too large! +; INVALID: Assertion target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:16-i32:32-i64:32-f16:16-f32:32-f64:32-v8:8-v16:16-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" @@ -43,28 +46,780 @@ target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16: ; CHECK-MAX-4-DAG: %struct.MyPayload.attr_max_4_i32s.layout_0_caller_out = type { [6 x i32] } ; CHECK-MAX-8-DAG: %struct.MyPayload.attr_max_8_i32s.layout_0_caller_out = type { [10 x i32] } -; CHECK-LABEL: define {{.*}} @AnyHit1DWords( -define void @AnyHit1DWords(%struct.MyPayload* %payload, %struct.Attributes1DWords* %attrs) !types !60 { +define void @AnyHit1DWords(%struct.MyPayload* %payload, %struct.Attributes1DWords* %attrs) !pointeetys !60 { +; CHECK-MAX-1-LABEL: define %struct.AnyHitSystemData @AnyHit1DWords( +; CHECK-MAX-1-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_ANYHITSYSTEMDATA:%.*]] [[TMP0:%.*]], [[STRUCT_ATTRIBUTES1DWORDS:%.*]] [[TMP1:%.*]], [1 x i32] [[PADDING:%.*]], [4 x i32] [[PAYLOAD:%.*]]) !lgc.rt.shaderstage [[META18:![0-9]+]] !continuation.registercount [[META15:![0-9]+]] !continuation [[META19:![0-9]+]] { +; CHECK-MAX-1-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 +; CHECK-MAX-1-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 +; CHECK-MAX-1-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITSYSTEMDATA]], align 8 +; CHECK-MAX-1-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [4 x i32], align 4 +; CHECK-MAX-1-NEXT: [[TMP5:%.*]] = alloca [[STRUCT_MYPAYLOAD:%.*]], align 8 +; CHECK-MAX-1-NEXT: [[ORIGHITATTRS:%.*]] = alloca [8 x i32], align 4 +; CHECK-MAX-1-NEXT: [[HITATTRSALLOCA:%.*]] = alloca [[STRUCT_ATTRIBUTES1DWORDS]], align 8 +; CHECK-MAX-1-NEXT: store [4 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; CHECK-MAX-1-NEXT: store [[STRUCT_ANYHITSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; CHECK-MAX-1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANYHITSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; CHECK-MAX-1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_MYPAYLOAD]], ptr [[TMP5]], i32 0 +; CHECK-MAX-1-NEXT: [[TMP8:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; CHECK-MAX-1-NEXT: store i32 [[TMP8]], ptr [[TMP7]], align 4 +; CHECK-MAX-1-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 1 +; CHECK-MAX-1-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 1 +; CHECK-MAX-1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +; CHECK-MAX-1-NEXT: store i32 [[TMP11]], ptr [[TMP9]], align 4 +; CHECK-MAX-1-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 2 +; CHECK-MAX-1-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 2 +; CHECK-MAX-1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +; CHECK-MAX-1-NEXT: store i32 [[TMP14]], ptr [[TMP12]], align 4 +; CHECK-MAX-1-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 3 +; CHECK-MAX-1-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 3 +; CHECK-MAX-1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +; CHECK-MAX-1-NEXT: store i32 [[TMP17]], ptr [[TMP15]], align 4 +; CHECK-MAX-1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANYHITSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; CHECK-MAX-1-NEXT: [[TMP19:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP18]]) +; CHECK-MAX-1-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP19]], ptr [[TMP4]], align 4 +; CHECK-MAX-1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP4]], align 4 +; CHECK-MAX-1-NEXT: store i32 [[TMP20]], ptr [[ORIGHITATTRS]], align 4 +; CHECK-MAX-1-NEXT: store [[STRUCT_ATTRIBUTES1DWORDS]] [[TMP1]], ptr [[HITATTRSALLOCA]], align 4 +; CHECK-MAX-1-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; CHECK-MAX-1-NEXT: call void @_cont_AcceptHit(ptr [[SYSTEM_DATA_ALLOCA]]) +; CHECK-MAX-1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_MYPAYLOAD]], ptr [[TMP5]], i32 0 +; CHECK-MAX-1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +; CHECK-MAX-1-NEXT: store i32 [[TMP22]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; CHECK-MAX-1-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 1 +; CHECK-MAX-1-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i32 1 +; CHECK-MAX-1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +; CHECK-MAX-1-NEXT: store i32 [[TMP25]], ptr [[TMP23]], align 4 +; CHECK-MAX-1-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 2 +; CHECK-MAX-1-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i32 2 +; CHECK-MAX-1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +; CHECK-MAX-1-NEXT: store i32 [[TMP28]], ptr [[TMP26]], align 4 +; CHECK-MAX-1-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 3 +; CHECK-MAX-1-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i32 3 +; CHECK-MAX-1-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +; CHECK-MAX-1-NEXT: store i32 [[TMP31]], ptr [[TMP29]], align 4 +; CHECK-MAX-1-NEXT: [[TMP32:%.*]] = load i32, ptr [[HITATTRSALLOCA]], align 4 +; CHECK-MAX-1-NEXT: store i32 [[TMP32]], ptr [[TMP3]], align 4 +; CHECK-MAX-1-NEXT: [[TMP33:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP3]], align 4 +; CHECK-MAX-1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANYHITSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; CHECK-MAX-1-NEXT: call void @_cont_SetTriangleHitAttributes(ptr [[TMP34]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP33]]) +; CHECK-MAX-1-NEXT: [[TMP35:%.*]] = load [[STRUCT_ANYHITSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; CHECK-MAX-1-NEXT: [[TMP36:%.*]] = load [4 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; CHECK-MAX-1-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_ANYHITSYSTEMDATA]] [[TMP35]], [2 x i32] poison, [4 x i32] [[TMP36]]), !continuation.registercount [[META15]] +; CHECK-MAX-1-NEXT: unreachable +; +; CHECK-MAX-2-LABEL: define %struct.AnyHitSystemData @AnyHit1DWords( +; CHECK-MAX-2-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_ANYHITSYSTEMDATA:%.*]] [[TMP0:%.*]], [[STRUCT_ATTRIBUTES1DWORDS:%.*]] [[TMP1:%.*]], [1 x i32] [[PADDING:%.*]], [4 x i32] [[PAYLOAD:%.*]]) !lgc.rt.shaderstage [[META20:![0-9]+]] !continuation.registercount [[META18:![0-9]+]] !continuation [[META21:![0-9]+]] { +; CHECK-MAX-2-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 +; CHECK-MAX-2-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 +; CHECK-MAX-2-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITSYSTEMDATA]], align 8 +; CHECK-MAX-2-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [4 x i32], align 4 +; CHECK-MAX-2-NEXT: [[TMP5:%.*]] = alloca [[STRUCT_MYPAYLOAD:%.*]], align 8 +; CHECK-MAX-2-NEXT: [[ORIGHITATTRS:%.*]] = alloca [8 x i32], align 4 +; CHECK-MAX-2-NEXT: [[HITATTRSALLOCA:%.*]] = alloca [[STRUCT_ATTRIBUTES1DWORDS]], align 8 +; CHECK-MAX-2-NEXT: store [4 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; CHECK-MAX-2-NEXT: store [[STRUCT_ANYHITSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; CHECK-MAX-2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANYHITSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; CHECK-MAX-2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_MYPAYLOAD]], ptr [[TMP5]], i32 0 +; CHECK-MAX-2-NEXT: [[TMP8:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; CHECK-MAX-2-NEXT: store i32 [[TMP8]], ptr [[TMP7]], align 4 +; CHECK-MAX-2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 1 +; CHECK-MAX-2-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 1 +; CHECK-MAX-2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +; CHECK-MAX-2-NEXT: store i32 [[TMP11]], ptr [[TMP9]], align 4 +; CHECK-MAX-2-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 2 +; CHECK-MAX-2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 2 +; CHECK-MAX-2-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +; CHECK-MAX-2-NEXT: store i32 [[TMP14]], ptr [[TMP12]], align 4 +; CHECK-MAX-2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 3 +; CHECK-MAX-2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 3 +; CHECK-MAX-2-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +; CHECK-MAX-2-NEXT: store i32 [[TMP17]], ptr [[TMP15]], align 4 +; CHECK-MAX-2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANYHITSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; CHECK-MAX-2-NEXT: [[TMP19:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP18]]) +; CHECK-MAX-2-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP19]], ptr [[TMP4]], align 4 +; CHECK-MAX-2-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP4]], align 4 +; CHECK-MAX-2-NEXT: store i32 [[TMP20]], ptr [[ORIGHITATTRS]], align 4 +; CHECK-MAX-2-NEXT: store [[STRUCT_ATTRIBUTES1DWORDS]] [[TMP1]], ptr [[HITATTRSALLOCA]], align 4 +; CHECK-MAX-2-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; CHECK-MAX-2-NEXT: call void @_cont_AcceptHit(ptr [[SYSTEM_DATA_ALLOCA]]) +; CHECK-MAX-2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_MYPAYLOAD]], ptr [[TMP5]], i32 0 +; CHECK-MAX-2-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +; CHECK-MAX-2-NEXT: store i32 [[TMP22]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; CHECK-MAX-2-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 1 +; CHECK-MAX-2-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i32 1 +; CHECK-MAX-2-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +; CHECK-MAX-2-NEXT: store i32 [[TMP25]], ptr [[TMP23]], align 4 +; CHECK-MAX-2-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 2 +; CHECK-MAX-2-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i32 2 +; CHECK-MAX-2-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +; CHECK-MAX-2-NEXT: store i32 [[TMP28]], ptr [[TMP26]], align 4 +; CHECK-MAX-2-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 3 +; CHECK-MAX-2-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i32 3 +; CHECK-MAX-2-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +; CHECK-MAX-2-NEXT: store i32 [[TMP31]], ptr [[TMP29]], align 4 +; CHECK-MAX-2-NEXT: [[TMP32:%.*]] = load i32, ptr [[HITATTRSALLOCA]], align 4 +; CHECK-MAX-2-NEXT: store i32 [[TMP32]], ptr [[TMP3]], align 4 +; CHECK-MAX-2-NEXT: [[TMP33:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP3]], align 4 +; CHECK-MAX-2-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANYHITSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; CHECK-MAX-2-NEXT: call void @_cont_SetTriangleHitAttributes(ptr [[TMP34]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP33]]) +; CHECK-MAX-2-NEXT: [[TMP35:%.*]] = load [[STRUCT_ANYHITSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; CHECK-MAX-2-NEXT: [[TMP36:%.*]] = load [4 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; CHECK-MAX-2-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_ANYHITSYSTEMDATA]] [[TMP35]], [2 x i32] poison, [4 x i32] [[TMP36]]), !continuation.registercount [[META18]] +; CHECK-MAX-2-NEXT: unreachable +; +; CHECK-MAX-4-LABEL: define %struct.AnyHitSystemData @AnyHit1DWords( +; CHECK-MAX-4-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_ANYHITSYSTEMDATA:%.*]] [[TMP0:%.*]], [[STRUCT_ATTRIBUTES1DWORDS:%.*]] [[TMP1:%.*]], [3 x i32] [[PADDING:%.*]], [6 x i32] [[PAYLOAD:%.*]]) !lgc.rt.shaderstage [[META21:![0-9]+]] !continuation.registercount [[META19:![0-9]+]] !continuation [[META22:![0-9]+]] { +; CHECK-MAX-4-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 +; CHECK-MAX-4-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 +; CHECK-MAX-4-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITSYSTEMDATA]], align 8 +; CHECK-MAX-4-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [6 x i32], align 4 +; CHECK-MAX-4-NEXT: [[TMP5:%.*]] = alloca [[STRUCT_MYPAYLOAD:%.*]], align 8 +; CHECK-MAX-4-NEXT: [[ORIGHITATTRS:%.*]] = alloca [8 x i32], align 4 +; CHECK-MAX-4-NEXT: [[HITATTRSALLOCA:%.*]] = alloca [[STRUCT_ATTRIBUTES1DWORDS]], align 8 +; CHECK-MAX-4-NEXT: store [6 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; CHECK-MAX-4-NEXT: store [[STRUCT_ANYHITSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; CHECK-MAX-4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANYHITSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; CHECK-MAX-4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_MYPAYLOAD]], ptr [[TMP5]], i32 0 +; CHECK-MAX-4-NEXT: [[TMP8:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; CHECK-MAX-4-NEXT: store i32 [[TMP8]], ptr [[TMP7]], align 4 +; CHECK-MAX-4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 3 +; CHECK-MAX-4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 1 +; CHECK-MAX-4-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 4 +; CHECK-MAX-4-NEXT: store i32 [[TMP11]], ptr [[TMP10]], align 4 +; CHECK-MAX-4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 1 +; CHECK-MAX-4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 1 +; CHECK-MAX-4-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +; CHECK-MAX-4-NEXT: store i32 [[TMP14]], ptr [[TMP12]], align 4 +; CHECK-MAX-4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 2 +; CHECK-MAX-4-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 2 +; CHECK-MAX-4-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +; CHECK-MAX-4-NEXT: store i32 [[TMP17]], ptr [[TMP15]], align 4 +; CHECK-MAX-4-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANYHITSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; CHECK-MAX-4-NEXT: [[TMP19:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP18]]) +; CHECK-MAX-4-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP19]], ptr [[TMP4]], align 4 +; CHECK-MAX-4-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_MYPAYLOAD_ATTR_MAX_4_I32S_LAYOUT_1_ANYHIT_IN:%.*]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 0, i32 0, i32 1 +; CHECK-MAX-4-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP4]], align 4 +; CHECK-MAX-4-NEXT: store i32 [[TMP21]], ptr [[ORIGHITATTRS]], align 4 +; CHECK-MAX-4-NEXT: store [[STRUCT_ATTRIBUTES1DWORDS]] [[TMP1]], ptr [[HITATTRSALLOCA]], align 4 +; CHECK-MAX-4-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; CHECK-MAX-4-NEXT: call void @_cont_AcceptHit(ptr [[SYSTEM_DATA_ALLOCA]]) +; CHECK-MAX-4-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_MYPAYLOAD]], ptr [[TMP5]], i32 0 +; CHECK-MAX-4-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +; CHECK-MAX-4-NEXT: store i32 [[TMP23]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; CHECK-MAX-4-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 3 +; CHECK-MAX-4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i32 1 +; CHECK-MAX-4-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +; CHECK-MAX-4-NEXT: store i32 [[TMP26]], ptr [[TMP24]], align 4 +; CHECK-MAX-4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i32 1 +; CHECK-MAX-4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[TMP25]], i32 1 +; CHECK-MAX-4-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +; CHECK-MAX-4-NEXT: store i32 [[TMP29]], ptr [[TMP27]], align 4 +; CHECK-MAX-4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i32 2 +; CHECK-MAX-4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP25]], i32 2 +; CHECK-MAX-4-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +; CHECK-MAX-4-NEXT: store i32 [[TMP32]], ptr [[TMP30]], align 4 +; CHECK-MAX-4-NEXT: [[TMP33:%.*]] = load i32, ptr [[HITATTRSALLOCA]], align 4 +; CHECK-MAX-4-NEXT: store i32 [[TMP33]], ptr [[TMP3]], align 4 +; CHECK-MAX-4-NEXT: [[TMP34:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP3]], align 4 +; CHECK-MAX-4-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANYHITSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; CHECK-MAX-4-NEXT: call void @_cont_SetTriangleHitAttributes(ptr [[TMP35]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP34]]) +; CHECK-MAX-4-NEXT: [[TMP36:%.*]] = load [[STRUCT_ANYHITSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; CHECK-MAX-4-NEXT: [[TMP37:%.*]] = load [6 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; CHECK-MAX-4-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_ANYHITSYSTEMDATA]] [[TMP36]], [4 x i32] poison, [6 x i32] [[TMP37]]), !continuation.registercount [[META19]] +; CHECK-MAX-4-NEXT: unreachable +; +; CHECK-MAX-8-LABEL: define %struct.AnyHitSystemData @AnyHit1DWords( +; CHECK-MAX-8-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_ANYHITSYSTEMDATA:%.*]] [[TMP0:%.*]], [[STRUCT_ATTRIBUTES1DWORDS:%.*]] [[TMP1:%.*]], [7 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) !lgc.rt.shaderstage [[META22:![0-9]+]] !continuation.registercount [[META20:![0-9]+]] !continuation [[META23:![0-9]+]] { +; CHECK-MAX-8-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 +; CHECK-MAX-8-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 +; CHECK-MAX-8-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITSYSTEMDATA]], align 8 +; CHECK-MAX-8-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [10 x i32], align 4 +; CHECK-MAX-8-NEXT: [[TMP5:%.*]] = alloca [[STRUCT_MYPAYLOAD:%.*]], align 8 +; CHECK-MAX-8-NEXT: [[ORIGHITATTRS:%.*]] = alloca [8 x i32], align 4 +; CHECK-MAX-8-NEXT: [[HITATTRSALLOCA:%.*]] = alloca [[STRUCT_ATTRIBUTES1DWORDS]], align 8 +; CHECK-MAX-8-NEXT: store [10 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; CHECK-MAX-8-NEXT: store [[STRUCT_ANYHITSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; CHECK-MAX-8-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANYHITSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; CHECK-MAX-8-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_MYPAYLOAD]], ptr [[TMP5]], i32 0 +; CHECK-MAX-8-NEXT: [[TMP8:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP8]], ptr [[TMP7]], align 4 +; CHECK-MAX-8-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 +; CHECK-MAX-8-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 1 +; CHECK-MAX-8-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP11]], ptr [[TMP10]], align 4 +; CHECK-MAX-8-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 1 +; CHECK-MAX-8-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 1 +; CHECK-MAX-8-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP14]], ptr [[TMP12]], align 4 +; CHECK-MAX-8-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 2 +; CHECK-MAX-8-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 2 +; CHECK-MAX-8-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP17]], ptr [[TMP15]], align 4 +; CHECK-MAX-8-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANYHITSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; CHECK-MAX-8-NEXT: [[TMP19:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP18]]) +; CHECK-MAX-8-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP19]], ptr [[TMP4]], align 4 +; CHECK-MAX-8-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_MYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN:%.*]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 0, i32 0, i32 1 +; CHECK-MAX-8-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP4]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP21]], ptr [[ORIGHITATTRS]], align 4 +; CHECK-MAX-8-NEXT: store [[STRUCT_ATTRIBUTES1DWORDS]] [[TMP1]], ptr [[HITATTRSALLOCA]], align 4 +; CHECK-MAX-8-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; CHECK-MAX-8-NEXT: call void @_cont_AcceptHit(ptr [[SYSTEM_DATA_ALLOCA]]) +; CHECK-MAX-8-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_MYPAYLOAD]], ptr [[TMP5]], i32 0 +; CHECK-MAX-8-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP23]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; CHECK-MAX-8-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 +; CHECK-MAX-8-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i32 1 +; CHECK-MAX-8-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP26]], ptr [[TMP24]], align 4 +; CHECK-MAX-8-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i32 1 +; CHECK-MAX-8-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[TMP25]], i32 1 +; CHECK-MAX-8-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP29]], ptr [[TMP27]], align 4 +; CHECK-MAX-8-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i32 2 +; CHECK-MAX-8-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP25]], i32 2 +; CHECK-MAX-8-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP32]], ptr [[TMP30]], align 4 +; CHECK-MAX-8-NEXT: [[TMP33:%.*]] = load i32, ptr [[HITATTRSALLOCA]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP33]], ptr [[TMP3]], align 4 +; CHECK-MAX-8-NEXT: [[TMP34:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP3]], align 4 +; CHECK-MAX-8-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANYHITSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; CHECK-MAX-8-NEXT: call void @_cont_SetTriangleHitAttributes(ptr [[TMP35]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP34]]) +; CHECK-MAX-8-NEXT: [[TMP36:%.*]] = load [[STRUCT_ANYHITSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; CHECK-MAX-8-NEXT: [[TMP37:%.*]] = load [10 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; CHECK-MAX-8-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_ANYHITSYSTEMDATA]] [[TMP36]], [8 x i32] poison, [10 x i32] [[TMP37]]), !continuation.registercount [[META20]] +; CHECK-MAX-8-NEXT: unreachable +; ret void } -; CHECK-LABEL: define {{.*}} @AnyHit2DWords( -define void @AnyHit2DWords(%struct.MyPayload* %payload, %struct.Attributes2DWords* %attrs) !types !23 { +define void @AnyHit2DWords(%struct.MyPayload* %payload, %struct.Attributes2DWords* %attrs) !pointeetys !23 { +; CHECK-MAX-1-LABEL: define void @AnyHit2DWords( +; CHECK-MAX-1-SAME: ptr [[PAYLOAD:%.*]], ptr [[ATTRS:%.*]]) { +; CHECK-MAX-1-NEXT: ret void +; +; CHECK-MAX-2-LABEL: define %struct.AnyHitSystemData @AnyHit2DWords( +; CHECK-MAX-2-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_ANYHITSYSTEMDATA:%.*]] [[TMP0:%.*]], [[STRUCT_ATTRIBUTES2DWORDS:%.*]] [[TMP1:%.*]], {} [[PADDING:%.*]], [4 x i32] [[PAYLOAD:%.*]]) !lgc.rt.shaderstage [[META20]] !continuation.registercount [[META18]] !continuation [[META22:![0-9]+]] { +; CHECK-MAX-2-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 +; CHECK-MAX-2-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 +; CHECK-MAX-2-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITSYSTEMDATA]], align 8 +; CHECK-MAX-2-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [4 x i32], align 4 +; CHECK-MAX-2-NEXT: [[TMP5:%.*]] = alloca [[STRUCT_MYPAYLOAD:%.*]], align 8 +; CHECK-MAX-2-NEXT: [[ORIGHITATTRS:%.*]] = alloca [8 x i32], align 4 +; CHECK-MAX-2-NEXT: [[HITATTRSALLOCA:%.*]] = alloca [[STRUCT_ATTRIBUTES2DWORDS]], align 8 +; CHECK-MAX-2-NEXT: store [4 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; CHECK-MAX-2-NEXT: store [[STRUCT_ANYHITSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; CHECK-MAX-2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANYHITSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; CHECK-MAX-2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_MYPAYLOAD]], ptr [[TMP5]], i32 0 +; CHECK-MAX-2-NEXT: [[TMP8:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; CHECK-MAX-2-NEXT: store i32 [[TMP8]], ptr [[TMP7]], align 4 +; CHECK-MAX-2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 1 +; CHECK-MAX-2-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 1 +; CHECK-MAX-2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +; CHECK-MAX-2-NEXT: store i32 [[TMP11]], ptr [[TMP9]], align 4 +; CHECK-MAX-2-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 2 +; CHECK-MAX-2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 2 +; CHECK-MAX-2-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +; CHECK-MAX-2-NEXT: store i32 [[TMP14]], ptr [[TMP12]], align 4 +; CHECK-MAX-2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 3 +; CHECK-MAX-2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 3 +; CHECK-MAX-2-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +; CHECK-MAX-2-NEXT: store i32 [[TMP17]], ptr [[TMP15]], align 4 +; CHECK-MAX-2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANYHITSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; CHECK-MAX-2-NEXT: [[TMP19:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP18]]) +; CHECK-MAX-2-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP19]], ptr [[TMP4]], align 4 +; CHECK-MAX-2-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP4]], align 4 +; CHECK-MAX-2-NEXT: store i32 [[TMP20]], ptr [[ORIGHITATTRS]], align 4 +; CHECK-MAX-2-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i32 1 +; CHECK-MAX-2-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 1 +; CHECK-MAX-2-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +; CHECK-MAX-2-NEXT: store i32 [[TMP23]], ptr [[TMP21]], align 4 +; CHECK-MAX-2-NEXT: store [[STRUCT_ATTRIBUTES2DWORDS]] [[TMP1]], ptr [[HITATTRSALLOCA]], align 4 +; CHECK-MAX-2-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; CHECK-MAX-2-NEXT: call void @_cont_AcceptHit(ptr [[SYSTEM_DATA_ALLOCA]]) +; CHECK-MAX-2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_MYPAYLOAD]], ptr [[TMP5]], i32 0 +; CHECK-MAX-2-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +; CHECK-MAX-2-NEXT: store i32 [[TMP25]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; CHECK-MAX-2-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 1 +; CHECK-MAX-2-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i32 1 +; CHECK-MAX-2-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +; CHECK-MAX-2-NEXT: store i32 [[TMP28]], ptr [[TMP26]], align 4 +; CHECK-MAX-2-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 2 +; CHECK-MAX-2-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i32 2 +; CHECK-MAX-2-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +; CHECK-MAX-2-NEXT: store i32 [[TMP31]], ptr [[TMP29]], align 4 +; CHECK-MAX-2-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 3 +; CHECK-MAX-2-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i32 3 +; CHECK-MAX-2-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +; CHECK-MAX-2-NEXT: store i32 [[TMP34]], ptr [[TMP32]], align 4 +; CHECK-MAX-2-NEXT: [[TMP35:%.*]] = load i32, ptr [[HITATTRSALLOCA]], align 4 +; CHECK-MAX-2-NEXT: store i32 [[TMP35]], ptr [[TMP3]], align 4 +; CHECK-MAX-2-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i32 1 +; CHECK-MAX-2-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 1 +; CHECK-MAX-2-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP36]], align 4 +; CHECK-MAX-2-NEXT: store i32 [[TMP38]], ptr [[TMP37]], align 4 +; CHECK-MAX-2-NEXT: [[TMP39:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP3]], align 4 +; CHECK-MAX-2-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_ANYHITSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; CHECK-MAX-2-NEXT: call void @_cont_SetTriangleHitAttributes(ptr [[TMP40]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP39]]) +; CHECK-MAX-2-NEXT: [[TMP41:%.*]] = load [[STRUCT_ANYHITSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; CHECK-MAX-2-NEXT: [[TMP42:%.*]] = load [4 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; CHECK-MAX-2-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_ANYHITSYSTEMDATA]] [[TMP41]], [2 x i32] poison, [4 x i32] [[TMP42]]), !continuation.registercount [[META18]] +; CHECK-MAX-2-NEXT: unreachable +; +; CHECK-MAX-4-LABEL: define %struct.AnyHitSystemData @AnyHit2DWords( +; CHECK-MAX-4-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_ANYHITSYSTEMDATA:%.*]] [[TMP0:%.*]], [[STRUCT_ATTRIBUTES2DWORDS:%.*]] [[TMP1:%.*]], [2 x i32] [[PADDING:%.*]], [6 x i32] [[PAYLOAD:%.*]]) !lgc.rt.shaderstage [[META21]] !continuation.registercount [[META19]] !continuation [[META23:![0-9]+]] { +; CHECK-MAX-4-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 +; CHECK-MAX-4-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 +; CHECK-MAX-4-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITSYSTEMDATA]], align 8 +; CHECK-MAX-4-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [6 x i32], align 4 +; CHECK-MAX-4-NEXT: [[TMP5:%.*]] = alloca [[STRUCT_MYPAYLOAD:%.*]], align 8 +; CHECK-MAX-4-NEXT: [[ORIGHITATTRS:%.*]] = alloca [8 x i32], align 4 +; CHECK-MAX-4-NEXT: [[HITATTRSALLOCA:%.*]] = alloca [[STRUCT_ATTRIBUTES2DWORDS]], align 8 +; CHECK-MAX-4-NEXT: store [6 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; CHECK-MAX-4-NEXT: store [[STRUCT_ANYHITSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; CHECK-MAX-4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANYHITSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; CHECK-MAX-4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_MYPAYLOAD]], ptr [[TMP5]], i32 0 +; CHECK-MAX-4-NEXT: [[TMP8:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; CHECK-MAX-4-NEXT: store i32 [[TMP8]], ptr [[TMP7]], align 4 +; CHECK-MAX-4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 3 +; CHECK-MAX-4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 1 +; CHECK-MAX-4-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 4 +; CHECK-MAX-4-NEXT: store i32 [[TMP11]], ptr [[TMP10]], align 4 +; CHECK-MAX-4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 1 +; CHECK-MAX-4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 1 +; CHECK-MAX-4-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +; CHECK-MAX-4-NEXT: store i32 [[TMP14]], ptr [[TMP12]], align 4 +; CHECK-MAX-4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 2 +; CHECK-MAX-4-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 2 +; CHECK-MAX-4-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +; CHECK-MAX-4-NEXT: store i32 [[TMP17]], ptr [[TMP15]], align 4 +; CHECK-MAX-4-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANYHITSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; CHECK-MAX-4-NEXT: [[TMP19:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP18]]) +; CHECK-MAX-4-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP19]], ptr [[TMP4]], align 4 +; CHECK-MAX-4-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_MYPAYLOAD_ATTR_MAX_4_I32S_LAYOUT_1_ANYHIT_IN:%.*]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 0, i32 0, i32 1 +; CHECK-MAX-4-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP4]], align 4 +; CHECK-MAX-4-NEXT: store i32 [[TMP21]], ptr [[ORIGHITATTRS]], align 4 +; CHECK-MAX-4-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i32 1 +; CHECK-MAX-4-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 1 +; CHECK-MAX-4-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +; CHECK-MAX-4-NEXT: store i32 [[TMP24]], ptr [[TMP22]], align 4 +; CHECK-MAX-4-NEXT: store [[STRUCT_ATTRIBUTES2DWORDS]] [[TMP1]], ptr [[HITATTRSALLOCA]], align 4 +; CHECK-MAX-4-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; CHECK-MAX-4-NEXT: call void @_cont_AcceptHit(ptr [[SYSTEM_DATA_ALLOCA]]) +; CHECK-MAX-4-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_MYPAYLOAD]], ptr [[TMP5]], i32 0 +; CHECK-MAX-4-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +; CHECK-MAX-4-NEXT: store i32 [[TMP26]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; CHECK-MAX-4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 3 +; CHECK-MAX-4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[TMP25]], i32 1 +; CHECK-MAX-4-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +; CHECK-MAX-4-NEXT: store i32 [[TMP29]], ptr [[TMP27]], align 4 +; CHECK-MAX-4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i32 1 +; CHECK-MAX-4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP28]], i32 1 +; CHECK-MAX-4-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +; CHECK-MAX-4-NEXT: store i32 [[TMP32]], ptr [[TMP30]], align 4 +; CHECK-MAX-4-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i32 2 +; CHECK-MAX-4-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[TMP28]], i32 2 +; CHECK-MAX-4-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +; CHECK-MAX-4-NEXT: store i32 [[TMP35]], ptr [[TMP33]], align 4 +; CHECK-MAX-4-NEXT: [[TMP36:%.*]] = load i32, ptr [[HITATTRSALLOCA]], align 4 +; CHECK-MAX-4-NEXT: store i32 [[TMP36]], ptr [[TMP3]], align 4 +; CHECK-MAX-4-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i32 1 +; CHECK-MAX-4-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 1 +; CHECK-MAX-4-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP37]], align 4 +; CHECK-MAX-4-NEXT: store i32 [[TMP39]], ptr [[TMP38]], align 4 +; CHECK-MAX-4-NEXT: [[TMP40:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP3]], align 4 +; CHECK-MAX-4-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANYHITSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; CHECK-MAX-4-NEXT: call void @_cont_SetTriangleHitAttributes(ptr [[TMP41]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP40]]) +; CHECK-MAX-4-NEXT: [[TMP42:%.*]] = load [[STRUCT_ANYHITSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; CHECK-MAX-4-NEXT: [[TMP43:%.*]] = load [6 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; CHECK-MAX-4-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_ANYHITSYSTEMDATA]] [[TMP42]], [4 x i32] poison, [6 x i32] [[TMP43]]), !continuation.registercount [[META19]] +; CHECK-MAX-4-NEXT: unreachable +; +; CHECK-MAX-8-LABEL: define %struct.AnyHitSystemData @AnyHit2DWords( +; CHECK-MAX-8-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_ANYHITSYSTEMDATA:%.*]] [[TMP0:%.*]], [[STRUCT_ATTRIBUTES2DWORDS:%.*]] [[TMP1:%.*]], [6 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) !lgc.rt.shaderstage [[META22]] !continuation.registercount [[META20]] !continuation [[META24:![0-9]+]] { +; CHECK-MAX-8-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 +; CHECK-MAX-8-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 +; CHECK-MAX-8-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITSYSTEMDATA]], align 8 +; CHECK-MAX-8-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [10 x i32], align 4 +; CHECK-MAX-8-NEXT: [[TMP5:%.*]] = alloca [[STRUCT_MYPAYLOAD:%.*]], align 8 +; CHECK-MAX-8-NEXT: [[ORIGHITATTRS:%.*]] = alloca [8 x i32], align 4 +; CHECK-MAX-8-NEXT: [[HITATTRSALLOCA:%.*]] = alloca [[STRUCT_ATTRIBUTES2DWORDS]], align 8 +; CHECK-MAX-8-NEXT: store [10 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; CHECK-MAX-8-NEXT: store [[STRUCT_ANYHITSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; CHECK-MAX-8-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANYHITSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; CHECK-MAX-8-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_MYPAYLOAD]], ptr [[TMP5]], i32 0 +; CHECK-MAX-8-NEXT: [[TMP8:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP8]], ptr [[TMP7]], align 4 +; CHECK-MAX-8-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 +; CHECK-MAX-8-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 1 +; CHECK-MAX-8-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP11]], ptr [[TMP10]], align 4 +; CHECK-MAX-8-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 1 +; CHECK-MAX-8-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 1 +; CHECK-MAX-8-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP14]], ptr [[TMP12]], align 4 +; CHECK-MAX-8-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 2 +; CHECK-MAX-8-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 2 +; CHECK-MAX-8-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP17]], ptr [[TMP15]], align 4 +; CHECK-MAX-8-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANYHITSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; CHECK-MAX-8-NEXT: [[TMP19:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP18]]) +; CHECK-MAX-8-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP19]], ptr [[TMP4]], align 4 +; CHECK-MAX-8-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_MYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN:%.*]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 0, i32 0, i32 1 +; CHECK-MAX-8-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP4]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP21]], ptr [[ORIGHITATTRS]], align 4 +; CHECK-MAX-8-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i32 1 +; CHECK-MAX-8-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 1 +; CHECK-MAX-8-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP24]], ptr [[TMP22]], align 4 +; CHECK-MAX-8-NEXT: store [[STRUCT_ATTRIBUTES2DWORDS]] [[TMP1]], ptr [[HITATTRSALLOCA]], align 4 +; CHECK-MAX-8-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; CHECK-MAX-8-NEXT: call void @_cont_AcceptHit(ptr [[SYSTEM_DATA_ALLOCA]]) +; CHECK-MAX-8-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_MYPAYLOAD]], ptr [[TMP5]], i32 0 +; CHECK-MAX-8-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP26]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; CHECK-MAX-8-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 +; CHECK-MAX-8-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[TMP25]], i32 1 +; CHECK-MAX-8-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP29]], ptr [[TMP27]], align 4 +; CHECK-MAX-8-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i32 1 +; CHECK-MAX-8-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP28]], i32 1 +; CHECK-MAX-8-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP32]], ptr [[TMP30]], align 4 +; CHECK-MAX-8-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i32 2 +; CHECK-MAX-8-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[TMP28]], i32 2 +; CHECK-MAX-8-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP35]], ptr [[TMP33]], align 4 +; CHECK-MAX-8-NEXT: [[TMP36:%.*]] = load i32, ptr [[HITATTRSALLOCA]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP36]], ptr [[TMP3]], align 4 +; CHECK-MAX-8-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i32 1 +; CHECK-MAX-8-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 1 +; CHECK-MAX-8-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP37]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP39]], ptr [[TMP38]], align 4 +; CHECK-MAX-8-NEXT: [[TMP40:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP3]], align 4 +; CHECK-MAX-8-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANYHITSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; CHECK-MAX-8-NEXT: call void @_cont_SetTriangleHitAttributes(ptr [[TMP41]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP40]]) +; CHECK-MAX-8-NEXT: [[TMP42:%.*]] = load [[STRUCT_ANYHITSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; CHECK-MAX-8-NEXT: [[TMP43:%.*]] = load [10 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; CHECK-MAX-8-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_ANYHITSYSTEMDATA]] [[TMP42]], [8 x i32] poison, [10 x i32] [[TMP43]]), !continuation.registercount [[META20]] +; CHECK-MAX-8-NEXT: unreachable +; ret void } -; CHECK-LABEL: define {{.*}} @AnyHit4DWords( -define void @AnyHit4DWords(%struct.MyPayload* %payload, %struct.Attributes4DWords* %attrs) !types !28 { +define void @AnyHit4DWords(%struct.MyPayload* %payload, %struct.Attributes4DWords* %attrs) !pointeetys !28 { +; CHECK-MAX-1-LABEL: define void @AnyHit4DWords( +; CHECK-MAX-1-SAME: ptr [[PAYLOAD:%.*]], ptr [[ATTRS:%.*]]) { +; CHECK-MAX-1-NEXT: ret void +; +; CHECK-MAX-2-LABEL: define void @AnyHit4DWords( +; CHECK-MAX-2-SAME: ptr [[PAYLOAD:%.*]], ptr [[ATTRS:%.*]]) { +; CHECK-MAX-2-NEXT: ret void +; +; CHECK-MAX-4-LABEL: define %struct.AnyHitSystemData @AnyHit4DWords( +; CHECK-MAX-4-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_ANYHITSYSTEMDATA:%.*]] [[TMP0:%.*]], [[STRUCT_ATTRIBUTES4DWORDS:%.*]] [[TMP1:%.*]], {} [[PADDING:%.*]], [6 x i32] [[PAYLOAD:%.*]]) !lgc.rt.shaderstage [[META21]] !continuation.registercount [[META19]] !continuation [[META24:![0-9]+]] { +; CHECK-MAX-4-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 +; CHECK-MAX-4-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 +; CHECK-MAX-4-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITSYSTEMDATA]], align 8 +; CHECK-MAX-4-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [6 x i32], align 4 +; CHECK-MAX-4-NEXT: [[TMP5:%.*]] = alloca [[STRUCT_MYPAYLOAD:%.*]], align 8 +; CHECK-MAX-4-NEXT: [[ORIGHITATTRS:%.*]] = alloca [8 x i32], align 4 +; CHECK-MAX-4-NEXT: [[HITATTRSALLOCA:%.*]] = alloca [[STRUCT_ATTRIBUTES4DWORDS]], align 8 +; CHECK-MAX-4-NEXT: store [6 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; CHECK-MAX-4-NEXT: store [[STRUCT_ANYHITSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; CHECK-MAX-4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANYHITSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; CHECK-MAX-4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_MYPAYLOAD]], ptr [[TMP5]], i32 0 +; CHECK-MAX-4-NEXT: [[TMP8:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; CHECK-MAX-4-NEXT: store i32 [[TMP8]], ptr [[TMP7]], align 4 +; CHECK-MAX-4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 3 +; CHECK-MAX-4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 1 +; CHECK-MAX-4-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 4 +; CHECK-MAX-4-NEXT: store i32 [[TMP11]], ptr [[TMP10]], align 4 +; CHECK-MAX-4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 1 +; CHECK-MAX-4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 1 +; CHECK-MAX-4-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +; CHECK-MAX-4-NEXT: store i32 [[TMP14]], ptr [[TMP12]], align 4 +; CHECK-MAX-4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 2 +; CHECK-MAX-4-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 2 +; CHECK-MAX-4-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +; CHECK-MAX-4-NEXT: store i32 [[TMP17]], ptr [[TMP15]], align 4 +; CHECK-MAX-4-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANYHITSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; CHECK-MAX-4-NEXT: [[TMP19:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP18]]) +; CHECK-MAX-4-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP19]], ptr [[TMP4]], align 4 +; CHECK-MAX-4-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_MYPAYLOAD_ATTR_MAX_4_I32S_LAYOUT_1_ANYHIT_IN:%.*]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 0, i32 0, i32 1 +; CHECK-MAX-4-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP4]], align 4 +; CHECK-MAX-4-NEXT: store i32 [[TMP21]], ptr [[ORIGHITATTRS]], align 4 +; CHECK-MAX-4-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i32 1 +; CHECK-MAX-4-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 1 +; CHECK-MAX-4-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +; CHECK-MAX-4-NEXT: store i32 [[TMP24]], ptr [[TMP22]], align 4 +; CHECK-MAX-4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i32 2 +; CHECK-MAX-4-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP20]], align 4 +; CHECK-MAX-4-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4 +; CHECK-MAX-4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i32 3 +; CHECK-MAX-4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i32 1 +; CHECK-MAX-4-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +; CHECK-MAX-4-NEXT: store i32 [[TMP29]], ptr [[TMP27]], align 4 +; CHECK-MAX-4-NEXT: store [[STRUCT_ATTRIBUTES4DWORDS]] [[TMP1]], ptr [[HITATTRSALLOCA]], align 4 +; CHECK-MAX-4-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; CHECK-MAX-4-NEXT: call void @_cont_AcceptHit(ptr [[SYSTEM_DATA_ALLOCA]]) +; CHECK-MAX-4-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_MYPAYLOAD]], ptr [[TMP5]], i32 0 +; CHECK-MAX-4-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +; CHECK-MAX-4-NEXT: store i32 [[TMP31]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; CHECK-MAX-4-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 3 +; CHECK-MAX-4-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[TMP30]], i32 1 +; CHECK-MAX-4-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +; CHECK-MAX-4-NEXT: store i32 [[TMP34]], ptr [[TMP32]], align 4 +; CHECK-MAX-4-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 1 +; CHECK-MAX-4-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, ptr [[TMP33]], i32 1 +; CHECK-MAX-4-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 +; CHECK-MAX-4-NEXT: store i32 [[TMP37]], ptr [[TMP35]], align 4 +; CHECK-MAX-4-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 2 +; CHECK-MAX-4-NEXT: [[TMP39:%.*]] = getelementptr inbounds i32, ptr [[TMP33]], i32 2 +; CHECK-MAX-4-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 +; CHECK-MAX-4-NEXT: store i32 [[TMP40]], ptr [[TMP38]], align 4 +; CHECK-MAX-4-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_MYPAYLOAD_ATTR_MAX_4_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT:%.*]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 0, i32 0, i32 1 +; CHECK-MAX-4-NEXT: [[TMP42:%.*]] = load i32, ptr [[HITATTRSALLOCA]], align 4 +; CHECK-MAX-4-NEXT: store i32 [[TMP42]], ptr [[TMP3]], align 4 +; CHECK-MAX-4-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i32 1 +; CHECK-MAX-4-NEXT: [[TMP44:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 1 +; CHECK-MAX-4-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP43]], align 4 +; CHECK-MAX-4-NEXT: store i32 [[TMP45]], ptr [[TMP44]], align 4 +; CHECK-MAX-4-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i32 2 +; CHECK-MAX-4-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP46]], align 4 +; CHECK-MAX-4-NEXT: store i32 [[TMP47]], ptr [[TMP41]], align 4 +; CHECK-MAX-4-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i32 3 +; CHECK-MAX-4-NEXT: [[TMP49:%.*]] = getelementptr inbounds i32, ptr [[TMP41]], i32 1 +; CHECK-MAX-4-NEXT: [[TMP50:%.*]] = load i32, ptr [[TMP48]], align 4 +; CHECK-MAX-4-NEXT: store i32 [[TMP50]], ptr [[TMP49]], align 4 +; CHECK-MAX-4-NEXT: [[TMP51:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP3]], align 4 +; CHECK-MAX-4-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT_ANYHITSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; CHECK-MAX-4-NEXT: call void @_cont_SetTriangleHitAttributes(ptr [[TMP52]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP51]]) +; CHECK-MAX-4-NEXT: [[TMP53:%.*]] = load [[STRUCT_ANYHITSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; CHECK-MAX-4-NEXT: [[TMP54:%.*]] = load [6 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; CHECK-MAX-4-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_ANYHITSYSTEMDATA]] [[TMP53]], [4 x i32] poison, [6 x i32] [[TMP54]]), !continuation.registercount [[META19]] +; CHECK-MAX-4-NEXT: unreachable +; +; CHECK-MAX-8-LABEL: define %struct.AnyHitSystemData @AnyHit4DWords( +; CHECK-MAX-8-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_ANYHITSYSTEMDATA:%.*]] [[TMP0:%.*]], [[STRUCT_ATTRIBUTES4DWORDS:%.*]] [[TMP1:%.*]], [4 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) !lgc.rt.shaderstage [[META22]] !continuation.registercount [[META20]] !continuation [[META25:![0-9]+]] { +; CHECK-MAX-8-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 +; CHECK-MAX-8-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 +; CHECK-MAX-8-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITSYSTEMDATA]], align 8 +; CHECK-MAX-8-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [10 x i32], align 4 +; CHECK-MAX-8-NEXT: [[TMP5:%.*]] = alloca [[STRUCT_MYPAYLOAD:%.*]], align 8 +; CHECK-MAX-8-NEXT: [[ORIGHITATTRS:%.*]] = alloca [8 x i32], align 4 +; CHECK-MAX-8-NEXT: [[HITATTRSALLOCA:%.*]] = alloca [[STRUCT_ATTRIBUTES4DWORDS]], align 8 +; CHECK-MAX-8-NEXT: store [10 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; CHECK-MAX-8-NEXT: store [[STRUCT_ANYHITSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; CHECK-MAX-8-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANYHITSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; CHECK-MAX-8-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_MYPAYLOAD]], ptr [[TMP5]], i32 0 +; CHECK-MAX-8-NEXT: [[TMP8:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP8]], ptr [[TMP7]], align 4 +; CHECK-MAX-8-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 +; CHECK-MAX-8-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 1 +; CHECK-MAX-8-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP11]], ptr [[TMP10]], align 4 +; CHECK-MAX-8-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 1 +; CHECK-MAX-8-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 1 +; CHECK-MAX-8-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP14]], ptr [[TMP12]], align 4 +; CHECK-MAX-8-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 2 +; CHECK-MAX-8-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 2 +; CHECK-MAX-8-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP17]], ptr [[TMP15]], align 4 +; CHECK-MAX-8-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANYHITSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; CHECK-MAX-8-NEXT: [[TMP19:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP18]]) +; CHECK-MAX-8-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP19]], ptr [[TMP4]], align 4 +; CHECK-MAX-8-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_MYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN:%.*]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 0, i32 0, i32 1 +; CHECK-MAX-8-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP4]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP21]], ptr [[ORIGHITATTRS]], align 4 +; CHECK-MAX-8-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i32 1 +; CHECK-MAX-8-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 1 +; CHECK-MAX-8-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP24]], ptr [[TMP22]], align 4 +; CHECK-MAX-8-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i32 2 +; CHECK-MAX-8-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP20]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4 +; CHECK-MAX-8-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i32 3 +; CHECK-MAX-8-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i32 1 +; CHECK-MAX-8-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP29]], ptr [[TMP27]], align 4 +; CHECK-MAX-8-NEXT: store [[STRUCT_ATTRIBUTES4DWORDS]] [[TMP1]], ptr [[HITATTRSALLOCA]], align 4 +; CHECK-MAX-8-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; CHECK-MAX-8-NEXT: call void @_cont_AcceptHit(ptr [[SYSTEM_DATA_ALLOCA]]) +; CHECK-MAX-8-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_MYPAYLOAD]], ptr [[TMP5]], i32 0 +; CHECK-MAX-8-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP31]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; CHECK-MAX-8-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 +; CHECK-MAX-8-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[TMP30]], i32 1 +; CHECK-MAX-8-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP34]], ptr [[TMP32]], align 4 +; CHECK-MAX-8-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 1 +; CHECK-MAX-8-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, ptr [[TMP33]], i32 1 +; CHECK-MAX-8-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP37]], ptr [[TMP35]], align 4 +; CHECK-MAX-8-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 2 +; CHECK-MAX-8-NEXT: [[TMP39:%.*]] = getelementptr inbounds i32, ptr [[TMP33]], i32 2 +; CHECK-MAX-8-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP40]], ptr [[TMP38]], align 4 +; CHECK-MAX-8-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_MYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT_PAYLOAD_ATTR_2_I32S:%.*]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 0, i32 0, i32 1 +; CHECK-MAX-8-NEXT: [[TMP42:%.*]] = load i32, ptr [[HITATTRSALLOCA]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP42]], ptr [[TMP3]], align 4 +; CHECK-MAX-8-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i32 1 +; CHECK-MAX-8-NEXT: [[TMP44:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 1 +; CHECK-MAX-8-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP43]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP45]], ptr [[TMP44]], align 4 +; CHECK-MAX-8-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i32 2 +; CHECK-MAX-8-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP46]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP47]], ptr [[TMP41]], align 4 +; CHECK-MAX-8-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i32 3 +; CHECK-MAX-8-NEXT: [[TMP49:%.*]] = getelementptr inbounds i32, ptr [[TMP41]], i32 1 +; CHECK-MAX-8-NEXT: [[TMP50:%.*]] = load i32, ptr [[TMP48]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP50]], ptr [[TMP49]], align 4 +; CHECK-MAX-8-NEXT: [[TMP51:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP3]], align 4 +; CHECK-MAX-8-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT_ANYHITSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; CHECK-MAX-8-NEXT: call void @_cont_SetTriangleHitAttributes(ptr [[TMP52]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP51]]) +; CHECK-MAX-8-NEXT: [[TMP53:%.*]] = load [[STRUCT_ANYHITSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; CHECK-MAX-8-NEXT: [[TMP54:%.*]] = load [10 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; CHECK-MAX-8-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_ANYHITSYSTEMDATA]] [[TMP53]], [8 x i32] poison, [10 x i32] [[TMP54]]), !continuation.registercount [[META20]] +; CHECK-MAX-8-NEXT: unreachable +; ret void } -; CHECK-LABEL: define {{.*}} @AnyHit8DWords( -define void @AnyHit8DWords(%struct.MyPayload* %payload, %struct.Attributes8DWords* %attrs) !types !63 { +define void @AnyHit8DWords(%struct.MyPayload* %payload, %struct.Attributes8DWords* %attrs) !pointeetys !63 { +; CHECK-MAX-1-LABEL: define void @AnyHit8DWords( +; CHECK-MAX-1-SAME: ptr [[PAYLOAD:%.*]], ptr [[ATTRS:%.*]]) { +; CHECK-MAX-1-NEXT: ret void +; +; CHECK-MAX-2-LABEL: define void @AnyHit8DWords( +; CHECK-MAX-2-SAME: ptr [[PAYLOAD:%.*]], ptr [[ATTRS:%.*]]) { +; CHECK-MAX-2-NEXT: ret void +; +; CHECK-MAX-4-LABEL: define void @AnyHit8DWords( +; CHECK-MAX-4-SAME: ptr [[PAYLOAD:%.*]], ptr [[ATTRS:%.*]]) { +; CHECK-MAX-4-NEXT: ret void +; +; CHECK-MAX-8-LABEL: define %struct.AnyHitSystemData @AnyHit8DWords( +; CHECK-MAX-8-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_ANYHITSYSTEMDATA:%.*]] [[TMP0:%.*]], [[STRUCT_ATTRIBUTES8DWORDS:%.*]] [[TMP1:%.*]], {} [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) !lgc.rt.shaderstage [[META22]] !continuation.registercount [[META20]] !continuation [[META26:![0-9]+]] { +; CHECK-MAX-8-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 +; CHECK-MAX-8-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 +; CHECK-MAX-8-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITSYSTEMDATA]], align 8 +; CHECK-MAX-8-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [10 x i32], align 4 +; CHECK-MAX-8-NEXT: [[TMP5:%.*]] = alloca [[STRUCT_MYPAYLOAD:%.*]], align 8 +; CHECK-MAX-8-NEXT: [[ORIGHITATTRS:%.*]] = alloca [8 x i32], align 4 +; CHECK-MAX-8-NEXT: [[HITATTRSALLOCA:%.*]] = alloca [[STRUCT_ATTRIBUTES8DWORDS]], align 8 +; CHECK-MAX-8-NEXT: store [10 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; CHECK-MAX-8-NEXT: store [[STRUCT_ANYHITSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; CHECK-MAX-8-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANYHITSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; CHECK-MAX-8-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_MYPAYLOAD]], ptr [[TMP5]], i32 0 +; CHECK-MAX-8-NEXT: [[TMP8:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP8]], ptr [[TMP7]], align 4 +; CHECK-MAX-8-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 +; CHECK-MAX-8-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 1 +; CHECK-MAX-8-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP11]], ptr [[TMP10]], align 4 +; CHECK-MAX-8-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 1 +; CHECK-MAX-8-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 1 +; CHECK-MAX-8-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP14]], ptr [[TMP12]], align 4 +; CHECK-MAX-8-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 2 +; CHECK-MAX-8-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 2 +; CHECK-MAX-8-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP17]], ptr [[TMP15]], align 4 +; CHECK-MAX-8-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANYHITSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; CHECK-MAX-8-NEXT: [[TMP19:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP18]]) +; CHECK-MAX-8-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP19]], ptr [[TMP4]], align 4 +; CHECK-MAX-8-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_MYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN:%.*]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 0, i32 0, i32 1 +; CHECK-MAX-8-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP4]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP21]], ptr [[ORIGHITATTRS]], align 4 +; CHECK-MAX-8-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i32 1 +; CHECK-MAX-8-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 1 +; CHECK-MAX-8-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP24]], ptr [[TMP22]], align 4 +; CHECK-MAX-8-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i32 2 +; CHECK-MAX-8-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP20]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4 +; CHECK-MAX-8-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i32 3 +; CHECK-MAX-8-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i32 1 +; CHECK-MAX-8-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP29]], ptr [[TMP27]], align 4 +; CHECK-MAX-8-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i32 4 +; CHECK-MAX-8-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i32 2 +; CHECK-MAX-8-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP32]], ptr [[TMP30]], align 4 +; CHECK-MAX-8-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i32 5 +; CHECK-MAX-8-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i32 3 +; CHECK-MAX-8-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP35]], ptr [[TMP33]], align 4 +; CHECK-MAX-8-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i32 6 +; CHECK-MAX-8-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i32 4 +; CHECK-MAX-8-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP38]], ptr [[TMP36]], align 4 +; CHECK-MAX-8-NEXT: [[TMP39:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i32 7 +; CHECK-MAX-8-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i32 5 +; CHECK-MAX-8-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP41]], ptr [[TMP39]], align 4 +; CHECK-MAX-8-NEXT: store [[STRUCT_ATTRIBUTES8DWORDS]] [[TMP1]], ptr [[HITATTRSALLOCA]], align 4 +; CHECK-MAX-8-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; CHECK-MAX-8-NEXT: call void @_cont_AcceptHit(ptr [[SYSTEM_DATA_ALLOCA]]) +; CHECK-MAX-8-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_MYPAYLOAD]], ptr [[TMP5]], i32 0 +; CHECK-MAX-8-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP43]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; CHECK-MAX-8-NEXT: [[TMP44:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 +; CHECK-MAX-8-NEXT: [[TMP45:%.*]] = getelementptr inbounds i32, ptr [[TMP42]], i32 1 +; CHECK-MAX-8-NEXT: [[TMP46:%.*]] = load i32, ptr [[TMP45]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP46]], ptr [[TMP44]], align 4 +; CHECK-MAX-8-NEXT: [[TMP47:%.*]] = getelementptr inbounds i32, ptr [[TMP44]], i32 1 +; CHECK-MAX-8-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, ptr [[TMP45]], i32 1 +; CHECK-MAX-8-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP49]], ptr [[TMP47]], align 4 +; CHECK-MAX-8-NEXT: [[TMP50:%.*]] = getelementptr inbounds i32, ptr [[TMP44]], i32 2 +; CHECK-MAX-8-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, ptr [[TMP45]], i32 2 +; CHECK-MAX-8-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP51]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP52]], ptr [[TMP50]], align 4 +; CHECK-MAX-8-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT_MYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_2_ANYHIT_OUT_ACCEPT:%.*]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 0, i32 0, i32 1 +; CHECK-MAX-8-NEXT: [[TMP54:%.*]] = load i32, ptr [[HITATTRSALLOCA]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP54]], ptr [[TMP3]], align 4 +; CHECK-MAX-8-NEXT: [[TMP55:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i32 1 +; CHECK-MAX-8-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 1 +; CHECK-MAX-8-NEXT: [[TMP57:%.*]] = load i32, ptr [[TMP55]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP57]], ptr [[TMP56]], align 4 +; CHECK-MAX-8-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i32 2 +; CHECK-MAX-8-NEXT: [[TMP59:%.*]] = load i32, ptr [[TMP58]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP59]], ptr [[TMP53]], align 4 +; CHECK-MAX-8-NEXT: [[TMP60:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i32 3 +; CHECK-MAX-8-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, ptr [[TMP53]], i32 1 +; CHECK-MAX-8-NEXT: [[TMP62:%.*]] = load i32, ptr [[TMP60]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP62]], ptr [[TMP61]], align 4 +; CHECK-MAX-8-NEXT: [[TMP63:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i32 4 +; CHECK-MAX-8-NEXT: [[TMP64:%.*]] = getelementptr inbounds i32, ptr [[TMP53]], i32 2 +; CHECK-MAX-8-NEXT: [[TMP65:%.*]] = load i32, ptr [[TMP63]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP65]], ptr [[TMP64]], align 4 +; CHECK-MAX-8-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i32 5 +; CHECK-MAX-8-NEXT: [[TMP67:%.*]] = getelementptr inbounds i32, ptr [[TMP53]], i32 3 +; CHECK-MAX-8-NEXT: [[TMP68:%.*]] = load i32, ptr [[TMP66]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP68]], ptr [[TMP67]], align 4 +; CHECK-MAX-8-NEXT: [[TMP69:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i32 6 +; CHECK-MAX-8-NEXT: [[TMP70:%.*]] = getelementptr inbounds i32, ptr [[TMP53]], i32 4 +; CHECK-MAX-8-NEXT: [[TMP71:%.*]] = load i32, ptr [[TMP69]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP71]], ptr [[TMP70]], align 4 +; CHECK-MAX-8-NEXT: [[TMP72:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i32 7 +; CHECK-MAX-8-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, ptr [[TMP53]], i32 5 +; CHECK-MAX-8-NEXT: [[TMP74:%.*]] = load i32, ptr [[TMP72]], align 4 +; CHECK-MAX-8-NEXT: store i32 [[TMP74]], ptr [[TMP73]], align 4 +; CHECK-MAX-8-NEXT: [[TMP75:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP3]], align 4 +; CHECK-MAX-8-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT_ANYHITSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; CHECK-MAX-8-NEXT: call void @_cont_SetTriangleHitAttributes(ptr [[TMP76]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP75]]) +; CHECK-MAX-8-NEXT: [[TMP77:%.*]] = load [[STRUCT_ANYHITSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; CHECK-MAX-8-NEXT: [[TMP78:%.*]] = load [10 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; CHECK-MAX-8-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_ANYHITSYSTEMDATA]] [[TMP77]], [8 x i32] poison, [10 x i32] [[TMP78]]), !continuation.registercount [[META20]] +; CHECK-MAX-8-NEXT: unreachable +; ret void } ; Function Attrs: nounwind -declare !types !30 void @dx.op.traceRay.struct.MyPayload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.MyPayload*) #0 +declare !pointeetys !30 void @dx.op.traceRay.struct.MyPayload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.MyPayload*) #0 ; Function Attrs: nounwind declare void @dx.op.textureStore.f32(i32, %dx.types.Handle, i32, i32, i32, float, float, float, float, i8) #0 @@ -78,9 +833,6 @@ declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types. ; Function Attrs: nounwind memory(read) declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) #2 -; Function Attrs: alwaysinline -declare %struct.DispatchSystemData @_cont_SetupRayGen() #3 - ; Function Attrs: alwaysinline declare %struct.DispatchSystemData @_AmdAwaitTraversal(i64, %struct.TraversalData) #3 @@ -91,34 +843,38 @@ declare %struct.DispatchSystemData @_AmdAwaitShader(i64, %struct.DispatchSystemD declare %struct.AnyHitTraversalData @_AmdAwaitAnyHit(i64, %struct.AnyHitTraversalData, float, i32) #3 ; Function Attrs: alwaysinline -declare !types !31 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData*) #3 +declare !pointeetys !31 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData*) #3 ; Function Attrs: alwaysinline -declare !types !33 void @_cont_SetTriangleHitAttributes(%struct.SystemData*, %struct.BuiltInTriangleIntersectionAttributes) #3 +declare !pointeetys !33 void @_cont_SetTriangleHitAttributes(%struct.SystemData*, %struct.BuiltInTriangleIntersectionAttributes) #3 ; Function Attrs: alwaysinline -declare !types !34 i1 @_cont_IsEndSearch(%struct.TraversalData*) #3 +declare !pointeetys !34 i1 @_cont_IsEndSearch(%struct.TraversalData*) #3 ; Function Attrs: nounwind memory(read) -declare !types !36 i32 @_cont_HitKind(%struct.SystemData* nocapture readnone, %struct.HitData*) #2 +declare !pointeetys !36 i32 @_cont_HitKind(%struct.SystemData* nocapture readnone, %struct.HitData*) #2 ; Function Attrs: nounwind memory(none) -declare !types !38 void @_AmdRestoreSystemData(%struct.DispatchSystemData*) #1 +declare !pointeetys !38 void @_AmdRestoreSystemData(%struct.DispatchSystemData*) #1 ; Function Attrs: nounwind memory(none) -declare !types !40 void @_AmdRestoreSystemDataAnyHit(%struct.AnyHitTraversalData*) #1 +declare !pointeetys !40 void @_AmdRestoreSystemDataAnyHit(%struct.AnyHitTraversalData*) #1 -declare !types !42 i1 @_cont_ReportHit(%struct.AnyHitSystemData*, float, i32) +declare !pointeetys !42 i1 @_cont_ReportHit(%struct.AnyHitSystemData*, float, i32) -declare !types !44 void @_cont_AcceptHit(%struct.AnyHitSystemData*) +declare !pointeetys !44 void @_cont_AcceptHit(%struct.AnyHitSystemData*) ; Function Attrs: alwaysinline -define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) #3 !types !45 { +define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) #3 !pointeetys !45 { +; CHECK-LABEL: define i32 @_cont_GetLocalRootIndex( +; CHECK-SAME: ptr [[DATA:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: ret i32 5 +; ret i32 5 } ; Function Attrs: alwaysinline -define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, float %6, float %7, float %8, float %9, float %10, float %11, float %12, float %13) #3 !types !46 { +define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, float %6, float %7, float %8, float %9, float %10, float %11, float %12, float %13) #3 !pointeetys !46 { %dis_data = load %struct.DispatchSystemData, %struct.DispatchSystemData* %data, align 4 %sys_data = insertvalue %struct.SystemData undef, %struct.DispatchSystemData %dis_data, 0 %trav_data = insertvalue %struct.TraversalData undef, %struct.SystemData %sys_data, 0 @@ -129,10 +885,10 @@ define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i } ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) -declare !types !47 void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #4 +declare !pointeetys !47 void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #4 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) -declare !types !47 void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #4 +declare !pointeetys !47 void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #4 attributes #0 = { nounwind } attributes #1 = { nounwind memory(none) } @@ -183,35 +939,58 @@ attributes #4 = { nocallback nofree nosync nounwind willreturn memory(argmem: re !16 = !{void (%struct.MyPayload*, %struct.Attributes4DWords*)* @AnyHit4DWords, !"AnyHit4DWords", null, null, !12} !17 = !{void (%struct.MyPayload*, %struct.Attributes4DWords*)* @AnyHit8DWords, !"AnyHit8DWords", null, null, !12} !22 = !{i32 8} -!23 = !{!"function", !"void", !24, !25} +!23 = !{null, %struct.MyPayload poison, %struct.Attributes2DWords poison} !24 = !{i32 0, %struct.MyPayload poison} !25 = !{i32 0, %struct.Attributes2DWords poison} !26 = !{i32 16} !27 = !{i32 32} -!28 = !{!"function", !"void", !24, !29} +!28 = !{null, %struct.MyPayload poison, %struct.Attributes4DWords poison} !29 = !{i32 0, %struct.Attributes4DWords poison} -!30 = !{!"function", !"void", i32 poison, %dx.types.Handle poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, !24} -!31 = !{!"function", %struct.BuiltInTriangleIntersectionAttributes poison, !32} +!30 = !{%struct.MyPayload poison} +!31 = !{%struct.SystemData poison} !32 = !{i32 0, %struct.SystemData poison} -!33 = !{!"function", !"void", !32, %struct.BuiltInTriangleIntersectionAttributes poison} -!34 = !{!"function", i1 poison, !35} +!33 = !{%struct.SystemData poison} +!34 = !{%struct.TraversalData poison} !35 = !{i32 0, %struct.TraversalData poison} -!36 = !{!"function", i32 poison, !32, !37} +!36 = !{null, %struct.SystemData poison, %struct.HitData poison} !37 = !{i32 0, %struct.HitData poison} -!38 = !{!"function", !"void", !39} +!38 = !{%struct.DispatchSystemData poison} !39 = !{i32 0, %struct.DispatchSystemData poison} -!40 = !{!"function", !"void", !41} +!40 = !{%struct.AnyHitTraversalData poison} !41 = !{i32 0, %struct.AnyHitTraversalData poison} -!42 = !{!"function", i1 poison, !43, float poison, i32 poison} +!42 = !{%struct.AnyHitSystemData poison} !43 = !{i32 0, %struct.AnyHitSystemData poison} -!44 = !{!"function", !"void", !43} -!45 = !{!"function", i32 poison, !39} -!46 = !{!"function", !"void", !39, i64 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison} -!47 = !{!"function", !"void", i64 poison, !48} +!44 = !{%struct.AnyHitSystemData poison} +!45 = !{%struct.DispatchSystemData poison} +!46 = !{%struct.DispatchSystemData poison} +!47 = !{i8 poison} !48 = !{i32 0, i8 poison} !49 = !{i32 4} -!60 = !{!"function", !"void", !61, !62} +!60 = !{null, %struct.MyPayload poison, %struct.Attributes1DWords poison} !61 = !{i32 0, %struct.MyPayload poison} !62 = !{i32 0, %struct.Attributes1DWords poison} -!63 = !{!"function", !"void", !24, !64} +!63 = !{null, %struct.MyPayload poison, %struct.Attributes8DWords poison} !64 = !{i32 0, %struct.Attributes8DWords poison} +;. +; CHECK-MAX-1: [[META15]] = !{i32 4} +; CHECK-MAX-1: [[META18]] = !{i32 2} +; CHECK-MAX-1: [[META19]] = !{ptr @AnyHit1DWords} +;. +; CHECK-MAX-2: [[META18]] = !{i32 4} +; CHECK-MAX-2: [[META20]] = !{i32 2} +; CHECK-MAX-2: [[META21]] = !{ptr @AnyHit1DWords} +; CHECK-MAX-2: [[META22]] = !{ptr @AnyHit2DWords} +;. +; CHECK-MAX-4: [[META19]] = !{i32 6} +; CHECK-MAX-4: [[META21]] = !{i32 2} +; CHECK-MAX-4: [[META22]] = !{ptr @AnyHit1DWords} +; CHECK-MAX-4: [[META23]] = !{ptr @AnyHit2DWords} +; CHECK-MAX-4: [[META24]] = !{ptr @AnyHit4DWords} +;. +; CHECK-MAX-8: [[META20]] = !{i32 10} +; CHECK-MAX-8: [[META22]] = !{i32 2} +; CHECK-MAX-8: [[META23]] = !{ptr @AnyHit1DWords} +; CHECK-MAX-8: [[META24]] = !{ptr @AnyHit2DWords} +; CHECK-MAX-8: [[META25]] = !{ptr @AnyHit4DWords} +; CHECK-MAX-8: [[META26]] = !{ptr @AnyHit8DWords} +;. diff --git a/llvmraytracing/test/dx/payload-caller-in-paq.ll b/llvmraytracing/test/dx/payload-caller-in-paq.ll index affcf301ac..303a0dd4ce 100644 --- a/llvmraytracing/test/dx/payload-caller-in-paq.ll +++ b/llvmraytracing/test/dx/payload-caller-in-paq.ll @@ -27,7 +27,7 @@ target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16: @"\01?myAccelerationStructure@@3URaytracingAccelerationStructure@@A" = external constant %dx.types.Handle, align 4 @"\01?gOutput@@3V?$RWTexture2D@V?$vector@M$03@@@@A" = external constant %dx.types.Handle, align 4 -define void @_cont_ExitRayGen(ptr nocapture readonly %data) alwaysinline nounwind !types !{!"function", !"void", !{i32 0, %struct.DispatchSystemData poison}} { +define void @_cont_ExitRayGen(ptr nocapture readonly %data) alwaysinline nounwind !pointeetys !{%struct.DispatchSystemData poison} { ret void } @@ -36,6 +36,7 @@ define void @RayGen() #0 { ; LOWERRAYTRACINGPIPELINE-LABEL: define void @RayGen( ; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] !lgc.rt.shaderstage [[META23:![0-9]+]] !continuation.entry [[META13:![0-9]+]] !continuation.registercount [[META23]] !continuation [[META27:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [7 x i32], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?myAccelerationStructure@@3URaytracingAccelerationStructure@@A", align 4 @@ -55,40 +56,47 @@ define void @RayGen() #0 { ; LOWERRAYTRACINGPIPELINE-NEXT: [[TRAV_DATA2_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], i64 [[ADDR_I]], 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_MYPAYLOAD]], ptr [[TMP4]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP10]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP33]], ptr addrspace(20) @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = call ptr inttoptr (i64 4 to ptr)(i64 -1, i64 poison, [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]]), !continuation.registercount [[META32:![0-9]+]], !continuation.wait.await [[META13]], !continuation.returnedRegistercount [[META25:![0-9]+]] -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] [[AWAIT:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP12]]) +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP33]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = load [1 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = call ptr inttoptr (i64 4 to ptr)(i64 -1, i64 poison, [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]], [10 x i32] poison, [1 x i32] [[TMP12]]), !continuation.registercount [[META32:![0-9]+]], !continuation.wait.await [[META13]], !continuation.returnedRegistercount [[META25:![0-9]+]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = call { [[STRUCT_DISPATCHSYSTEMDATA]], [12 x i32], [3 x i32] } @await(ptr [[TMP17]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [12 x i32], [3 x i32] } [[TMP20]], 2 +; LOWERRAYTRACINGPIPELINE-NEXT: store [3 x i32] [[TMP15]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_MYPAYLOAD]] poison, ptr [[TMP4]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_MYPAYLOAD]], ptr [[TMP4]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP19]], ptr [[TMP14]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_MYPAYLOAD]], ptr [[TMP4]], i32 0, i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP38:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 1), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP24]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP38]], ptr [[TMP16]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_MYPAYLOAD]], ptr [[TMP4]], i32 0, i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 1), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP26]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP21]], ptr [[TMP18]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 2), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP25]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [12 x i32], [3 x i32] } [[TMP20]], 0 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP13]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; LOWERRAYTRACINGPIPELINE-NEXT: br label [[DOTSPLIT:%.*]] ; LOWERRAYTRACINGPIPELINE: .split: -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = load float, ptr [[TMP6]], align 8, !tbaa [[TBAA28]] -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_MYPAYLOAD]], ptr [[TMP4]], i32 0, i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4, !tbaa [[TBAA33:![0-9]+]] -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = sitofp i32 [[TMP26]] to float -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_MYPAYLOAD]], ptr [[TMP4]], i32 0, i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP29:%.*]] = load double, ptr [[TMP28]], align 8, !tbaa [[TBAA35:![0-9]+]] -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP30:%.*]] = fptrunc double [[TMP29]] to float -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP31:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.index() -; LOWERRAYTRACINGPIPELINE-NEXT: [[EXTRACT:%.*]] = extractelement <3 x i32> [[TMP31]], i8 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP32:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.index() -; LOWERRAYTRACINGPIPELINE-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x i32> [[TMP32]], i8 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP39:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE]](i32 160, [[DX_TYPES_HANDLE]] [[TMP3]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP34:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE]](i32 216, [[DX_TYPES_HANDLE]] [[TMP39]], [[DX_TYPES_RESOURCEPROPERTIES]] { i32 4098, i32 1033 }) -; LOWERRAYTRACINGPIPELINE-NEXT: call void @dx.op.textureStore.f32(i32 67, [[DX_TYPES_HANDLE]] [[TMP34]], i32 [[EXTRACT]], i32 [[EXTRACT1]], i32 undef, float [[TMP24]], float [[TMP27]], float [[TMP30]], float 0.000000e+00, i8 15) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP42:%.*]] = load float, ptr [[TMP6]], align 8, !tbaa [[TBAA28]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_MYPAYLOAD]], ptr [[TMP4]], i32 0, i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4, !tbaa [[TBAA33:![0-9]+]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP45:%.*]] = sitofp i32 [[TMP44]] to float +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_MYPAYLOAD]], ptr [[TMP4]], i32 0, i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP47:%.*]] = load double, ptr [[TMP46]], align 8, !tbaa [[TBAA35:![0-9]+]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP48:%.*]] = fptrunc double [[TMP47]] to float +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP49:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.index() +; LOWERRAYTRACINGPIPELINE-NEXT: [[EXTRACT:%.*]] = extractelement <3 x i32> [[TMP49]], i8 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP50:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.index() +; LOWERRAYTRACINGPIPELINE-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x i32> [[TMP50]], i8 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP37:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE]](i32 160, [[DX_TYPES_HANDLE]] [[TMP3]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP52:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE]](i32 216, [[DX_TYPES_HANDLE]] [[TMP37]], [[DX_TYPES_RESOURCEPROPERTIES]] { i32 4098, i32 1033 }) +; LOWERRAYTRACINGPIPELINE-NEXT: call void @dx.op.textureStore.f32(i32 67, [[DX_TYPES_HANDLE]] [[TMP52]], i32 [[EXTRACT]], i32 [[EXTRACT1]], i32 undef, float [[TMP42]], float [[TMP45]], float [[TMP48]], float 0.000000e+00, i8 15) ; LOWERRAYTRACINGPIPELINE-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[TMP5]]) #[[ATTR0]] ; LOWERRAYTRACINGPIPELINE-NEXT: ret void ; @@ -119,7 +127,7 @@ define void @RayGen() #0 { } ; Function Attrs: nounwind -declare !types !32 void @dx.op.traceRay.struct.MyPayload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.MyPayload*) #0 +declare !pointeetys !32 void @dx.op.traceRay.struct.MyPayload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.MyPayload*) #0 ; Function Attrs: nounwind declare void @dx.op.textureStore.f32(i32, %dx.types.Handle, i32, i32, i32, float, float, float, float, i8) #0 @@ -133,9 +141,6 @@ declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types. ; Function Attrs: nounwind memory(read) declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) #2 -; Function Attrs: alwaysinline -declare %struct.DispatchSystemData @_cont_SetupRayGen() #3 - ; Function Attrs: alwaysinline declare %struct.DispatchSystemData @_AmdWaitAwaitTraversal(i64, i64, %struct.TraversalData) #3 @@ -146,36 +151,38 @@ declare %struct.DispatchSystemData @_AmdAwaitShader(i64, %struct.DispatchSystemD declare %struct.AnyHitTraversalData @_AmdAwaitAnyHit(i64, %struct.AnyHitTraversalData, float, i32) #3 ; Function Attrs: alwaysinline -declare !types !34 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData*) #3 +declare !pointeetys !34 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData*) #3 ; Function Attrs: nounwind memory(read) -declare !types !36 <3 x i32> @_cont_DispatchRaysIndex3(%struct.DispatchSystemData* nocapture readnone) #2 +declare !pointeetys !36 <3 x i32> @_cont_DispatchRaysIndex3(%struct.DispatchSystemData* nocapture readnone) #2 ; Function Attrs: alwaysinline -declare !types !38 void @_cont_SetTriangleHitAttributes(%struct.SystemData*, %struct.BuiltInTriangleIntersectionAttributes) #3 +declare !pointeetys !38 void @_cont_SetTriangleHitAttributes(%struct.SystemData*, %struct.BuiltInTriangleIntersectionAttributes) #3 ; Function Attrs: alwaysinline -declare !types !39 i1 @_cont_IsEndSearch(%struct.TraversalData*) #3 +declare !pointeetys !39 i1 @_cont_IsEndSearch(%struct.TraversalData*) #3 ; Function Attrs: nounwind memory(read) -declare !types !41 i32 @_cont_HitKind(%struct.SystemData* nocapture readnone, %struct.HitData*) #2 +declare !pointeetys !41 i32 @_cont_HitKind(%struct.SystemData* nocapture readnone, %struct.HitData*) #2 + +declare !pointeetys !50 i1 @_cont_ReportHit(%struct.AnyHitTraversalData* %data, float %t, i32 %hitKind) ; Function Attrs: nounwind memory(none) -declare !types !43 void @_AmdRestoreSystemData(%struct.DispatchSystemData*) #1 +declare !pointeetys !43 void @_AmdRestoreSystemData(%struct.DispatchSystemData*) #1 ; Function Attrs: nounwind memory(none) -declare !types !44 void @_AmdRestoreSystemDataAnyHit(%struct.AnyHitTraversalData*) #1 +declare !pointeetys !44 void @_AmdRestoreSystemDataAnyHit(%struct.AnyHitTraversalData*) #1 ; Function Attrs: nounwind declare i64 @_AmdGetResumePointAddr() #3 ; Function Attrs: alwaysinline -define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) #3 !types !46 { +define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) #3 !pointeetys !46 { ret i32 5 } ; Function Attrs: alwaysinline -define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, float %6, float %7, float %8, float %9, float %10, float %11, float %12, float %13) #3 !types !47 { +define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, float %6, float %7, float %8, float %9, float %10, float %11, float %12, float %13) #3 !pointeetys !47 { %dis_data = load %struct.DispatchSystemData, %struct.DispatchSystemData* %data, align 4 %sys_data = insertvalue %struct.SystemData undef, %struct.DispatchSystemData %dis_data, 0 %trav_data = insertvalue %struct.TraversalData undef, %struct.SystemData %sys_data, 0 @@ -188,10 +195,10 @@ define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i } ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) -declare !types !48 void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #4 +declare !pointeetys !48 void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #4 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) -declare !types !48 void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #4 +declare !pointeetys !48 void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #4 attributes #0 = { nounwind } attributes #1 = { nounwind memory(none) } @@ -240,21 +247,22 @@ attributes #4 = { nocallback nofree nosync nounwind willreturn memory(argmem: re !29 = !{!"int", !26, i64 0} !30 = !{!31, !31, i64 0} !31 = !{!"double", !26, i64 0} -!32 = !{!"function", !"void", i32 poison, %dx.types.Handle poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, !33} +!32 = !{%struct.MyPayload poison} !33 = !{i32 0, %struct.MyPayload poison} -!34 = !{!"function", %struct.BuiltInTriangleIntersectionAttributes poison, !35} +!34 = !{%struct.SystemData poison} !35 = !{i32 0, %struct.SystemData poison} -!36 = !{!"function", <3 x i32> poison, !37} +!36 = !{%struct.DispatchSystemData poison} !37 = !{i32 0, %struct.DispatchSystemData poison} -!38 = !{!"function", !"void", !35, %struct.BuiltInTriangleIntersectionAttributes poison} -!39 = !{!"function", i1 poison, !40} +!38 = !{%struct.SystemData poison} +!39 = !{%struct.TraversalData poison} !40 = !{i32 0, %struct.TraversalData poison} -!41 = !{!"function", i32 poison, !35, !42} +!41 = !{null, %struct.SystemData poison, %struct.HitData poison} !42 = !{i32 0, %struct.HitData poison} -!43 = !{!"function", !"void", !37} -!44 = !{!"function", !"void", !45} +!43 = !{%struct.DispatchSystemData poison} +!44 = !{%struct.AnyHitTraversalData poison} !45 = !{i32 0, %struct.AnyHitTraversalData poison} -!46 = !{!"function", i32 poison, !37} -!47 = !{!"function", !"void", !37, i64 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison} -!48 = !{!"function", !"void", i64 poison, !49} +!46 = !{%struct.DispatchSystemData poison} +!47 = !{%struct.DispatchSystemData poison} +!48 = !{i8 poison} !49 = !{i32 0, i8 poison} +!50 = !{%struct.AnyHitTraversalData poison} diff --git a/llvmraytracing/test/dx/payload-caller-in-paq.ll.hlsl b/llvmraytracing/test/dx/payload-caller-in-paq.ll.hlsl index ea08c13a60..55c61edaf0 100644 --- a/llvmraytracing/test/dx/payload-caller-in-paq.ll.hlsl +++ b/llvmraytracing/test/dx/payload-caller-in-paq.ll.hlsl @@ -18,6 +18,5 @@ RWTexture2D gOutput : register(u0); TraceRay(myAccelerationStructure, 0, 0, 0, 0, 0, myRay, payload); - gOutput[DispatchRaysIndex().xy] = - float4(payload.v1, payload.v2, payload.v3, 0.); + gOutput[DispatchRaysIndex().xy] = float4(payload.v1, payload.v2, payload.v3, 0.); } diff --git a/llvmraytracing/test/dx/payload-save-registers.ll b/llvmraytracing/test/dx/payload-save-registers.ll index c307c2ed18..e1429dc68b 100644 --- a/llvmraytracing/test/dx/payload-save-registers.ll +++ b/llvmraytracing/test/dx/payload-save-registers.ll @@ -24,71 +24,98 @@ target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16: %struct.BuiltInTriangleIntersectionAttributes = type { <2 x float> } %struct.RaytracingAccelerationStructure = type { i32 } +declare !pointeetys !48 i1 @_cont_ReportHit(%struct.AnyHitTraversalData* %data, float %t, i32 %hitKind) + @"\01?myAccelerationStructure@@3URaytracingAccelerationStructure@@A" = external constant %dx.types.Handle, align 4 ; Function Attrs: nounwind -define void @Miss(%struct.OuterPayload* noalias nocapture %outerPayload) #0 !types !23 { +define void @Miss(%struct.OuterPayload* noalias nocapture %outerPayload) #0 !pointeetys !23 { ; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.DispatchSystemData @Miss( -; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] !lgc.rt.shaderstage [[META25:![0-9]+]] !continuation.registercount [[META23:![0-9]+]] !continuation [[META26:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]], [10 x i32] [[PADDING:%.*]], [30 x i32] [[PAYLOAD:%.*]]) #[[ATTR0:[0-9]+]] !lgc.rt.shaderstage [[META25:![0-9]+]] !continuation.registercount [[META23:![0-9]+]] !continuation [[META26:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [37 x i32], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_OUTERPAYLOAD:%.*]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: store [30 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_SYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP36:%.*]] = load ptr addrspace(32), ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP36:%.*]] = load ptr addrspace(32), ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP7]], ptr [[TMP4]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP6]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP35]], ptr [[TMP4]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP7]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP38]], ptr [[TMP8]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP11]], ptr [[TMP10]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP9]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP41]], ptr [[TMP10]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 3 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 10), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 3 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP11]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP44]], ptr [[TMP12]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 11), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP67:%.*]] = load i32, ptr [[TMP13]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP67]], ptr [[TMP14]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 5 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 12), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP17]], ptr [[TMP16]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 5 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP68:%.*]] = load i32, ptr [[TMP15]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP68]], ptr [[TMP16]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 6 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 13), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 6 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP69:%.*]] = load i32, ptr [[TMP17]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP69]], ptr [[TMP18]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 7 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 14), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP21]], ptr [[TMP20]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 7 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP70:%.*]] = load i32, ptr [[TMP19]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP70]], ptr [[TMP20]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 8 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 15), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP21]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP31]], ptr [[TMP22]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 9 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 16), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 9 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP71:%.*]] = load i32, ptr [[TMP23]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP71]], ptr [[TMP24]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 10 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 17), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 10 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP25]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP37]], ptr [[TMP26]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 11 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP29:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 18), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 11 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP72:%.*]] = load i32, ptr [[TMP27]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP72]], ptr [[TMP28]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 12 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP31:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 19), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 12 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP29]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP43]], ptr [[TMP30]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 13 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP33:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 20), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP33]], ptr [[TMP32]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 13 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP74:%.*]] = load i32, ptr [[TMP33]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP74]], ptr [[TMP32]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 14 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP35:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 21), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP35]], ptr [[TMP34]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP37:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP38:%.*]] = load i32, ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @PAYLOAD, i32 22), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP39:%.*]] = load i32, ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @PAYLOAD, i32 23), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP40:%.*]] = load i32, ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @PAYLOAD, i32 24), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP41:%.*]] = load i32, ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @PAYLOAD, i32 25), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP42:%.*]] = load i32, ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @PAYLOAD, i32 26), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP43:%.*]] = load i32, ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @PAYLOAD, i32 27), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP44:%.*]] = load i32, ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @PAYLOAD, i32 28), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP49:%.*]] = load i32, ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @PAYLOAD, i32 29), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 14 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP75:%.*]] = load i32, ptr [[TMP76]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP75]], ptr [[TMP34]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP77:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP82:%.*]] = getelementptr i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 22 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP83:%.*]] = load i32, ptr [[TMP82]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP85:%.*]] = getelementptr i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 23 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP90:%.*]] = load i32, ptr [[TMP85]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP55:%.*]] = getelementptr i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 24 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP56:%.*]] = load i32, ptr [[TMP55]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP92:%.*]] = getelementptr i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 25 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP96:%.*]] = load i32, ptr [[TMP92]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP141:%.*]] = getelementptr i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 26 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP60:%.*]] = load i32, ptr [[TMP141]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP40:%.*]] = getelementptr i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 27 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP142:%.*]] = load i32, ptr [[TMP40]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP63:%.*]] = getelementptr i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 28 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP64:%.*]] = load i32, ptr [[TMP63]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP65:%.*]] = getelementptr i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 29 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP66:%.*]] = load i32, ptr [[TMP65]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP45:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?myAccelerationStructure@@3URaytracingAccelerationStructure@@A", align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP46:%.*]] = alloca [[STRUCT_INNERPAYLOAD:%.*]], align 4 @@ -98,87 +125,115 @@ define void @Miss(%struct.OuterPayload* noalias nocapture %outerPayload) #0 !typ ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP51:%.*]] = load float, ptr [[TMP48]], align 4, !tbaa [[TBAA27:![0-9]+]] ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_INNERPAYLOAD]], ptr [[TMP46]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP51]], ptr [[TMP50]], align 4, !tbaa [[TBAA27]] -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP55:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP45]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP52:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP55]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP73:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP45]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP52:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP73]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP53:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP52]]) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[DIS_DATA_I:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP54]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I]], 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT_INNERPAYLOAD]], ptr [[TMP46]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP61:%.*]] = load i32, ptr [[TMP57]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP61]], ptr addrspace(20) @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP56:%.*]] = call ptr inttoptr (i64 4 to ptr)([[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], i64 poison), !continuation.registercount [[META31:![0-9]+]], !continuation.returnedRegistercount [[META31]] -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP58:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] [[AWAIT:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP56]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP78:%.*]] = load i32, ptr [[TMP57]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP78]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP79:%.*]] = load [1 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP80:%.*]] = call ptr inttoptr (i64 4 to ptr)(i64 poison, [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], [10 x i32] poison, [1 x i32] [[TMP79]]), !continuation.registercount [[META31:![0-9]+]], !continuation.returnedRegistercount [[META31]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP81:%.*]] = call { [[STRUCT_DISPATCHSYSTEMDATA]], [10 x i32], [1 x i32] } @await(ptr [[TMP80]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP61:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [10 x i32], [1 x i32] } [[TMP81]], 2 +; LOWERRAYTRACINGPIPELINE-NEXT: store [1 x i32] [[TMP61]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_INNERPAYLOAD]] poison, ptr [[TMP46]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT_INNERPAYLOAD]], ptr [[TMP46]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP97:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP97]], ptr [[TMP59]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP84:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP84]], ptr [[TMP59]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP58:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [10 x i32], [1 x i32] } [[TMP81]], 0 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP58]], ptr [[TMP54]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; LOWERRAYTRACINGPIPELINE-NEXT: br label [[DOTSPLIT:%.*]] ; LOWERRAYTRACINGPIPELINE: .split: -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP60:%.*]] = load float, ptr [[TMP50]], align 4, !tbaa [[TBAA27]] -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP60]], ptr [[TMP48]], align 4, !tbaa [[TBAA27]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP86:%.*]] = load float, ptr [[TMP50]], align 4, !tbaa [[TBAA27]] +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP86]], ptr [[TMP48]], align 4, !tbaa [[TBAA27]] ; LOWERRAYTRACINGPIPELINE-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[TMP47]]) #[[ATTR0]] -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP37]], ptr addrspace(20) @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP38]], ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @PAYLOAD, i32 22), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP39]], ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @PAYLOAD, i32 23), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP40]], ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @PAYLOAD, i32 24), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP41]], ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @PAYLOAD, i32 25), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP42]], ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @PAYLOAD, i32 26), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP43]], ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @PAYLOAD, i32 27), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP44]], ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @PAYLOAD, i32 28), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP49]], ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @PAYLOAD, i32 29), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP67:%.*]] = load ptr addrspace(32), ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP77]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP87:%.*]] = getelementptr i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 22 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP83]], ptr [[TMP87]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP88:%.*]] = getelementptr i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 23 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP90]], ptr [[TMP88]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP89:%.*]] = getelementptr i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 24 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP56]], ptr [[TMP89]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP39:%.*]] = getelementptr i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 25 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP96]], ptr [[TMP39]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP91:%.*]] = getelementptr i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 26 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP60]], ptr [[TMP91]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP42:%.*]] = getelementptr i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 27 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP142]], ptr [[TMP42]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP93:%.*]] = getelementptr i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 28 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP64]], ptr [[TMP93]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP49:%.*]] = getelementptr i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 29 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP66]], ptr [[TMP49]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP144:%.*]] = load ptr addrspace(32), ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP71:%.*]] = load i32, ptr [[TMP62]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP71]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP64:%.*]] = getelementptr inbounds i32, ptr [[TMP62]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP73:%.*]] = load i32, ptr [[TMP64]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP73]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, ptr [[TMP62]], i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP75:%.*]] = load i32, ptr [[TMP66]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP75]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, ptr [[TMP62]], i32 3 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP77:%.*]] = load i32, ptr [[TMP68]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP77]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 10), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP70:%.*]] = getelementptr inbounds i32, ptr [[TMP62]], i32 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP79:%.*]] = load i32, ptr [[TMP70]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP79]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 11), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP72:%.*]] = getelementptr inbounds i32, ptr [[TMP62]], i32 5 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP81:%.*]] = load i32, ptr [[TMP72]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP81]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 12), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP74:%.*]] = getelementptr inbounds i32, ptr [[TMP62]], i32 6 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP83:%.*]] = load i32, ptr [[TMP74]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP83]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 13), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, ptr [[TMP62]], i32 7 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP85:%.*]] = load i32, ptr [[TMP76]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP85]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 14), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP78:%.*]] = getelementptr inbounds i32, ptr [[TMP62]], i32 8 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP87:%.*]] = load i32, ptr [[TMP78]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP87]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 15), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP80:%.*]] = getelementptr inbounds i32, ptr [[TMP62]], i32 9 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP89:%.*]] = load i32, ptr [[TMP80]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP89]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 16), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP82:%.*]] = getelementptr inbounds i32, ptr [[TMP62]], i32 10 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP91:%.*]] = load i32, ptr [[TMP82]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP91]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 17), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP84:%.*]] = getelementptr inbounds i32, ptr [[TMP62]], i32 11 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP93:%.*]] = load i32, ptr [[TMP84]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP93]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 18), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP86:%.*]] = getelementptr inbounds i32, ptr [[TMP62]], i32 12 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP98:%.*]] = load i32, ptr [[TMP86]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP98]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 19), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP88:%.*]] = getelementptr inbounds i32, ptr [[TMP62]], i32 13 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP100:%.*]] = load i32, ptr [[TMP88]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP100]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 20), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP90:%.*]] = getelementptr inbounds i32, ptr [[TMP62]], i32 14 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP101:%.*]] = load i32, ptr [[TMP90]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP101]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 21), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP97:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP98:%.*]] = load i32, ptr [[TMP62]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP98]], ptr [[TMP97]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP99:%.*]] = getelementptr inbounds i32, ptr [[TMP97]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP100:%.*]] = getelementptr inbounds i32, ptr [[TMP62]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP101:%.*]] = load i32, ptr [[TMP100]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP101]], ptr [[TMP99]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP102:%.*]] = getelementptr inbounds i32, ptr [[TMP97]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP103:%.*]] = getelementptr inbounds i32, ptr [[TMP62]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP104:%.*]] = load i32, ptr [[TMP103]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP104]], ptr [[TMP102]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP105:%.*]] = getelementptr inbounds i32, ptr [[TMP97]], i32 3 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP106:%.*]] = getelementptr inbounds i32, ptr [[TMP62]], i32 3 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP107:%.*]] = load i32, ptr [[TMP106]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP107]], ptr [[TMP105]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP108:%.*]] = getelementptr inbounds i32, ptr [[TMP97]], i32 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP109:%.*]] = getelementptr inbounds i32, ptr [[TMP62]], i32 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP110:%.*]] = load i32, ptr [[TMP109]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP110]], ptr [[TMP108]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP111:%.*]] = getelementptr inbounds i32, ptr [[TMP97]], i32 5 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP112:%.*]] = getelementptr inbounds i32, ptr [[TMP62]], i32 5 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP113:%.*]] = load i32, ptr [[TMP112]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP113]], ptr [[TMP111]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP114:%.*]] = getelementptr inbounds i32, ptr [[TMP97]], i32 6 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP115:%.*]] = getelementptr inbounds i32, ptr [[TMP62]], i32 6 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP116:%.*]] = load i32, ptr [[TMP115]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP116]], ptr [[TMP114]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP117:%.*]] = getelementptr inbounds i32, ptr [[TMP97]], i32 7 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP118:%.*]] = getelementptr inbounds i32, ptr [[TMP62]], i32 7 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP119:%.*]] = load i32, ptr [[TMP118]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP119]], ptr [[TMP117]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP120:%.*]] = getelementptr inbounds i32, ptr [[TMP97]], i32 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP121:%.*]] = getelementptr inbounds i32, ptr [[TMP62]], i32 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP122:%.*]] = load i32, ptr [[TMP121]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP122]], ptr [[TMP120]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP123:%.*]] = getelementptr inbounds i32, ptr [[TMP97]], i32 9 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP124:%.*]] = getelementptr inbounds i32, ptr [[TMP62]], i32 9 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP125:%.*]] = load i32, ptr [[TMP124]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP125]], ptr [[TMP123]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP126:%.*]] = getelementptr inbounds i32, ptr [[TMP97]], i32 10 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP127:%.*]] = getelementptr inbounds i32, ptr [[TMP62]], i32 10 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP128:%.*]] = load i32, ptr [[TMP127]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP128]], ptr [[TMP126]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP129:%.*]] = getelementptr inbounds i32, ptr [[TMP97]], i32 11 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP130:%.*]] = getelementptr inbounds i32, ptr [[TMP62]], i32 11 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP131:%.*]] = load i32, ptr [[TMP130]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP131]], ptr [[TMP129]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP132:%.*]] = getelementptr inbounds i32, ptr [[TMP97]], i32 12 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP133:%.*]] = getelementptr inbounds i32, ptr [[TMP62]], i32 12 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP134:%.*]] = load i32, ptr [[TMP133]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP134]], ptr [[TMP132]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP135:%.*]] = getelementptr inbounds i32, ptr [[TMP97]], i32 13 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP136:%.*]] = getelementptr inbounds i32, ptr [[TMP62]], i32 13 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP137:%.*]] = load i32, ptr [[TMP136]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP137]], ptr [[TMP135]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP138:%.*]] = getelementptr inbounds i32, ptr [[TMP97]], i32 14 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP139:%.*]] = getelementptr inbounds i32, ptr [[TMP62]], i32 14 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP140:%.*]] = load i32, ptr [[TMP139]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP140]], ptr [[TMP138]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP94:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP95:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[TMP94]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP95]]), !continuation.registercount [[META23]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP143:%.*]] = load [30 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP95]], [10 x i32] poison, [30 x i32] [[TMP143]]), !continuation.registercount [[META23]] ; LOWERRAYTRACINGPIPELINE-NEXT: unreachable ; %1 = load %dx.types.Handle, %dx.types.Handle* @"\01?myAccelerationStructure@@3URaytracingAccelerationStructure@@A", align 4 @@ -199,564 +254,687 @@ define void @Miss(%struct.OuterPayload* noalias nocapture %outerPayload) #0 !typ } ; Function Attrs: nounwind -define void @Callable(%struct.OuterPayload* noalias %outerPayload) #0 !types !23 { +define void @Callable(%struct.OuterPayload* noalias %outerPayload) #0 !pointeetys !23 { ; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.DispatchSystemData @Callable( -; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR0]] !lgc.rt.shaderstage [[META32:![0-9]+]] !continuation.registercount [[META23]] !continuation [[META33:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]], [10 x i32] [[PADDING:%.*]], [30 x i32] [[PAYLOAD:%.*]]) #[[ATTR0]] !lgc.rt.shaderstage [[META32:![0-9]+]] !continuation.registercount [[META23]] !continuation [[META33:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_OUTERPAYLOAD:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [30 x i32], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_OUTERPAYLOAD]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: store [30 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP7]], ptr [[TMP4]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 1), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP9]], ptr [[TMP6]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 2), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP15]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP11]], ptr [[TMP8]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 3 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 3), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP13]], ptr [[TMP10]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 3 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP13]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP21]], ptr [[TMP10]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 4), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP15]], ptr [[TMP12]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP27]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP17]], ptr [[TMP12]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 5 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 5), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP17]], ptr [[TMP14]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 5 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP19]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP33]], ptr [[TMP14]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 6 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 6), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP19]], ptr [[TMP16]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP39:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 6 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP39]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP23]], ptr [[TMP16]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 7 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP21]], ptr [[TMP18]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP25]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP45]], ptr [[TMP18]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 8 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP23]], ptr [[TMP20]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP51]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP29]], ptr [[TMP20]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 9 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP25]], ptr [[TMP22]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 9 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP57:%.*]] = load i32, ptr [[TMP31]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP57]], ptr [[TMP22]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 10 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 10), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP27]], ptr [[TMP24]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP63:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 10 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP63]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP35]], ptr [[TMP24]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 11 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP29:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 11), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP29]], ptr [[TMP26]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 11 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP66:%.*]] = load i32, ptr [[TMP37]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP66]], ptr [[TMP26]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 12 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP31:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 12), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP31]], ptr [[TMP28]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP69:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 12 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP69]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP41]], ptr [[TMP28]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 13 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP33:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 13), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP33]], ptr [[TMP30]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 13 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP72:%.*]] = load i32, ptr [[TMP43]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP72]], ptr [[TMP30]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 14 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP35:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 14), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP35]], ptr [[TMP32]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP75:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 14 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP75]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP47]], ptr [[TMP32]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 15 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP39:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 15), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP39]], ptr [[TMP34]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP49:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 15 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP78:%.*]] = load i32, ptr [[TMP49]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP78]], ptr [[TMP34]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 16 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP41:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 16), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP41]], ptr [[TMP36]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP81:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 16 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP53:%.*]] = load i32, ptr [[TMP81]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP53]], ptr [[TMP36]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 17 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP43:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 17), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP43]], ptr [[TMP38]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP55:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 17 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP84:%.*]] = load i32, ptr [[TMP55]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP84]], ptr [[TMP38]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 18 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP45:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 18), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP45]], ptr [[TMP40]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP87:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 18 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP59:%.*]] = load i32, ptr [[TMP87]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP59]], ptr [[TMP40]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP42:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 19 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP47:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 19), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP47]], ptr [[TMP42]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 19 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP90:%.*]] = load i32, ptr [[TMP61]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP90]], ptr [[TMP42]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP44:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 20 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP49:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 20), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP49]], ptr [[TMP44]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP64:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 20 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP65:%.*]] = load i32, ptr [[TMP64]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP65]], ptr [[TMP44]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 21 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP51:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 21), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP51]], ptr [[TMP46]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP67:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 21 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP68:%.*]] = load i32, ptr [[TMP67]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP68]], ptr [[TMP46]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 22 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP53:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 22), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP53]], ptr [[TMP48]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP70:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 22 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP71:%.*]] = load i32, ptr [[TMP70]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP71]], ptr [[TMP48]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP50:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 23 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP55:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 23), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP55]], ptr [[TMP50]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 23 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP74:%.*]] = load i32, ptr [[TMP73]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP74]], ptr [[TMP50]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP52:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 24 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP57:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 24), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP57]], ptr [[TMP52]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 24 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP77:%.*]] = load i32, ptr [[TMP76]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP77]], ptr [[TMP52]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP54:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 25 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP59:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 25), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP59]], ptr [[TMP54]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP79:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 25 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP80:%.*]] = load i32, ptr [[TMP79]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP80]], ptr [[TMP54]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 26 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP61:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 26), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP61]], ptr [[TMP56]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP82:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 26 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP83:%.*]] = load i32, ptr [[TMP82]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP83]], ptr [[TMP56]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 27 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP63:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 27), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP63]], ptr [[TMP58]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP85:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 27 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP86:%.*]] = load i32, ptr [[TMP85]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP86]], ptr [[TMP58]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP60:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 28 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP65:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 28), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP65]], ptr [[TMP60]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP88:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 28 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP89:%.*]] = load i32, ptr [[TMP88]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP89]], ptr [[TMP60]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP62:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 29 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP67:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 29), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP67]], ptr [[TMP62]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP91:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 29 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP92:%.*]] = load i32, ptr [[TMP91]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP92]], ptr [[TMP62]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP69:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 0, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP70:%.*]] = load float, ptr [[TMP69]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP70]], ptr [[TMP68]], align 8 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 0, i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 0, i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP73:%.*]] = load float, ptr [[TMP72]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP73]], ptr [[TMP71]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 0, i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 0, i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP76:%.*]] = load float, ptr [[TMP75]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP76]], ptr [[TMP74]], align 8 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 0, i32 3 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 0, i32 3 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP79:%.*]] = load float, ptr [[TMP78]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP79]], ptr [[TMP77]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 0, i32 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP81:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 0, i32 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP82:%.*]] = load float, ptr [[TMP81]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP82]], ptr [[TMP80]], align 8 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP83:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 0, i32 5 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP84:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 0, i32 5 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP85:%.*]] = load float, ptr [[TMP84]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP85]], ptr [[TMP83]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP86:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 0, i32 6 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP87:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 0, i32 6 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP88:%.*]] = load float, ptr [[TMP87]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP88]], ptr [[TMP86]], align 8 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP89:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 0, i32 7 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP90:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 0, i32 7 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP91:%.*]] = load float, ptr [[TMP90]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP91]], ptr [[TMP89]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP92:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 0, i32 8 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP93:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 0, i32 8 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP94:%.*]] = load float, ptr [[TMP93]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP94]], ptr [[TMP92]], align 8 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP95:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 0, i32 9 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP96:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 0, i32 9 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP97:%.*]] = load float, ptr [[TMP96]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP97]], ptr [[TMP95]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP98:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 0, i32 10 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP99:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 0, i32 10 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP100:%.*]] = load float, ptr [[TMP99]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP100]], ptr [[TMP98]], align 8 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP101:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 0, i32 11 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP102:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 0, i32 11 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP103:%.*]] = load float, ptr [[TMP102]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP103]], ptr [[TMP101]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP104:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 0, i32 12 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 0, i32 12 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP106:%.*]] = load float, ptr [[TMP105]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP106]], ptr [[TMP104]], align 8 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP107:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 0, i32 13 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP108:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 0, i32 13 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP109:%.*]] = load float, ptr [[TMP108]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP109]], ptr [[TMP107]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP110:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 0, i32 14 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP111:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 0, i32 14 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP112:%.*]] = load float, ptr [[TMP111]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP112]], ptr [[TMP110]], align 8 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP113:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 1, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP114:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 1, i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP115:%.*]] = load float, ptr [[TMP114]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP115]], ptr [[TMP113]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP116:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 1, i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP117:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 1, i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP118:%.*]] = load float, ptr [[TMP117]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP118]], ptr [[TMP116]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP119:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 1, i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP120:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 1, i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP121:%.*]] = load float, ptr [[TMP120]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP121]], ptr [[TMP119]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP122:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 1, i32 3 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP123:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 1, i32 3 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP124:%.*]] = load float, ptr [[TMP123]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP124]], ptr [[TMP122]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP125:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 1, i32 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP126:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 1, i32 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP127:%.*]] = load float, ptr [[TMP126]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP127]], ptr [[TMP125]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP128:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 1, i32 5 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP129:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 1, i32 5 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP130:%.*]] = load float, ptr [[TMP129]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP130]], ptr [[TMP128]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP131:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 1, i32 6 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP132:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 1, i32 6 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP133:%.*]] = load float, ptr [[TMP132]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP133]], ptr [[TMP131]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP134:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 1, i32 7 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP135:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 1, i32 7 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP136:%.*]] = load float, ptr [[TMP135]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP136]], ptr [[TMP134]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP137:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 1, i32 8 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP138:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 1, i32 8 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP139:%.*]] = load float, ptr [[TMP138]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP139]], ptr [[TMP137]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP140:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 1, i32 9 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP141:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 1, i32 9 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP142:%.*]] = load float, ptr [[TMP141]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP142]], ptr [[TMP140]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP143:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 1, i32 10 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP144:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 1, i32 10 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP145:%.*]] = load float, ptr [[TMP144]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP145]], ptr [[TMP143]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP146:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 1, i32 11 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP147:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 1, i32 11 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP148:%.*]] = load float, ptr [[TMP147]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP148]], ptr [[TMP146]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP149:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 1, i32 12 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP150:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 1, i32 12 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP151:%.*]] = load float, ptr [[TMP150]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP151]], ptr [[TMP149]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP152:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 1, i32 13 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP153:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 1, i32 13 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP154:%.*]] = load float, ptr [[TMP153]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP154]], ptr [[TMP152]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP155:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 1, i32 14 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP156:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 1, i32 14 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP157:%.*]] = load float, ptr [[TMP156]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP157]], ptr [[TMP155]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP100:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP101:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 0, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP102:%.*]] = load float, ptr [[TMP101]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP102]], ptr [[TMP100]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 0, i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP104:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 0, i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP98:%.*]] = load float, ptr [[TMP104]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP98]], ptr [[TMP103]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 0, i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP107:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 0, i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP108:%.*]] = load float, ptr [[TMP107]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP108]], ptr [[TMP106]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP109:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 0, i32 3 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP110:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 0, i32 3 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP105:%.*]] = load float, ptr [[TMP110]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP105]], ptr [[TMP109]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP112:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 0, i32 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP113:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 0, i32 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP114:%.*]] = load float, ptr [[TMP113]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP114]], ptr [[TMP112]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP115:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 0, i32 5 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP116:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 0, i32 5 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP111:%.*]] = load float, ptr [[TMP116]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP111]], ptr [[TMP115]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP118:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 0, i32 6 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP119:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 0, i32 6 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP120:%.*]] = load float, ptr [[TMP119]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP120]], ptr [[TMP118]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP121:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 0, i32 7 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP122:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 0, i32 7 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP117:%.*]] = load float, ptr [[TMP122]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP117]], ptr [[TMP121]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP124:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 0, i32 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP125:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 0, i32 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP126:%.*]] = load float, ptr [[TMP125]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP126]], ptr [[TMP124]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP127:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 0, i32 9 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP128:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 0, i32 9 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP123:%.*]] = load float, ptr [[TMP128]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP123]], ptr [[TMP127]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP130:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 0, i32 10 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP131:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 0, i32 10 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP132:%.*]] = load float, ptr [[TMP131]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP132]], ptr [[TMP130]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP133:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 0, i32 11 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP134:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 0, i32 11 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP129:%.*]] = load float, ptr [[TMP134]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP129]], ptr [[TMP133]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP136:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 0, i32 12 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP137:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 0, i32 12 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP138:%.*]] = load float, ptr [[TMP137]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP138]], ptr [[TMP136]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP139:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 0, i32 13 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP140:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 0, i32 13 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP135:%.*]] = load float, ptr [[TMP140]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP135]], ptr [[TMP139]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP142:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 0, i32 14 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP143:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 0, i32 14 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP144:%.*]] = load float, ptr [[TMP143]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP144]], ptr [[TMP142]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP145:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 1, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP146:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 1, i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP141:%.*]] = load float, ptr [[TMP146]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP141]], ptr [[TMP145]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP148:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 1, i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP149:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 1, i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP147:%.*]] = load float, ptr [[TMP149]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP147]], ptr [[TMP148]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP151:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 1, i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP152:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 1, i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP150:%.*]] = load float, ptr [[TMP152]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP150]], ptr [[TMP151]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP154:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 1, i32 3 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP155:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 1, i32 3 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP153:%.*]] = load float, ptr [[TMP155]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP153]], ptr [[TMP154]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP157:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 1, i32 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP158:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 1, i32 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP156:%.*]] = load float, ptr [[TMP158]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP156]], ptr [[TMP157]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP160:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 1, i32 5 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP161:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 1, i32 5 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP159:%.*]] = load float, ptr [[TMP161]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP159]], ptr [[TMP160]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP163:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 1, i32 6 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP164:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 1, i32 6 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP162:%.*]] = load float, ptr [[TMP164]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP162]], ptr [[TMP163]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP166:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 1, i32 7 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP167:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 1, i32 7 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP165:%.*]] = load float, ptr [[TMP167]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP165]], ptr [[TMP166]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP169:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 1, i32 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP170:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 1, i32 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP168:%.*]] = load float, ptr [[TMP170]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP168]], ptr [[TMP169]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP172:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 1, i32 9 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP173:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 1, i32 9 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP171:%.*]] = load float, ptr [[TMP173]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP171]], ptr [[TMP172]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP175:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 1, i32 10 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP176:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 1, i32 10 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP174:%.*]] = load float, ptr [[TMP176]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP174]], ptr [[TMP175]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP178:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 1, i32 11 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP179:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 1, i32 11 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP177:%.*]] = load float, ptr [[TMP179]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP177]], ptr [[TMP178]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP181:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 1, i32 12 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP182:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 1, i32 12 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP180:%.*]] = load float, ptr [[TMP182]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP180]], ptr [[TMP181]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP184:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 1, i32 13 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP185:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 1, i32 13 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP276:%.*]] = load float, ptr [[TMP185]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP276]], ptr [[TMP184]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP187:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0, i32 1, i32 14 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP188:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0, i32 1, i32 14 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP278:%.*]] = load float, ptr [[TMP188]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP278]], ptr [[TMP187]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[DIS_DATA_I:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP158:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP161:%.*]] = load i32, ptr [[TMP158]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP161]], ptr addrspace(20) @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP159:%.*]] = getelementptr inbounds i32, ptr [[TMP158]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP163:%.*]] = load i32, ptr [[TMP159]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP163]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 1), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP184:%.*]] = getelementptr inbounds i32, ptr [[TMP158]], i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP165:%.*]] = load i32, ptr [[TMP184]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP165]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 2), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP160:%.*]] = getelementptr inbounds i32, ptr [[TMP158]], i32 3 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP167:%.*]] = load i32, ptr [[TMP160]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP167]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 3), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP162:%.*]] = getelementptr inbounds i32, ptr [[TMP158]], i32 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP169:%.*]] = load i32, ptr [[TMP162]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP169]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 4), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP164:%.*]] = getelementptr inbounds i32, ptr [[TMP158]], i32 5 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP171:%.*]] = load i32, ptr [[TMP164]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP171]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 5), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP166:%.*]] = getelementptr inbounds i32, ptr [[TMP158]], i32 6 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP173:%.*]] = load i32, ptr [[TMP166]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP173]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 6), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP168:%.*]] = getelementptr inbounds i32, ptr [[TMP158]], i32 7 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP175:%.*]] = load i32, ptr [[TMP168]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP175]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP170:%.*]] = getelementptr inbounds i32, ptr [[TMP158]], i32 8 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP177:%.*]] = load i32, ptr [[TMP170]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP177]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP172:%.*]] = getelementptr inbounds i32, ptr [[TMP158]], i32 9 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP179:%.*]] = load i32, ptr [[TMP172]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP179]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP174:%.*]] = getelementptr inbounds i32, ptr [[TMP158]], i32 10 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP181:%.*]] = load i32, ptr [[TMP174]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP181]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 10), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP176:%.*]] = getelementptr inbounds i32, ptr [[TMP158]], i32 11 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP183:%.*]] = load i32, ptr [[TMP176]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP183]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 11), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP178:%.*]] = getelementptr inbounds i32, ptr [[TMP158]], i32 12 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP185:%.*]] = load i32, ptr [[TMP178]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP185]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 12), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP180:%.*]] = getelementptr inbounds i32, ptr [[TMP158]], i32 13 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP187:%.*]] = load i32, ptr [[TMP180]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP187]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 13), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP182:%.*]] = getelementptr inbounds i32, ptr [[TMP158]], i32 14 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP189:%.*]] = load i32, ptr [[TMP182]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP189]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 14), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP191:%.*]] = getelementptr inbounds i32, ptr [[TMP158]], i32 15 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP193:%.*]] = load i32, ptr [[TMP191]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP193]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 15), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP186:%.*]] = getelementptr inbounds i32, ptr [[TMP158]], i32 16 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP195:%.*]] = load i32, ptr [[TMP186]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP195]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 16), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP188:%.*]] = getelementptr inbounds i32, ptr [[TMP158]], i32 17 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP197:%.*]] = load i32, ptr [[TMP188]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP197]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 17), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP190:%.*]] = getelementptr inbounds i32, ptr [[TMP158]], i32 18 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP199:%.*]] = load i32, ptr [[TMP190]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP199]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 18), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP192:%.*]] = getelementptr inbounds i32, ptr [[TMP158]], i32 19 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP201:%.*]] = load i32, ptr [[TMP192]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP201]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 19), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP194:%.*]] = getelementptr inbounds i32, ptr [[TMP158]], i32 20 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP203:%.*]] = load i32, ptr [[TMP194]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP203]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 20), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP196:%.*]] = getelementptr inbounds i32, ptr [[TMP158]], i32 21 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP205:%.*]] = load i32, ptr [[TMP196]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP205]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 21), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP198:%.*]] = getelementptr inbounds i32, ptr [[TMP158]], i32 22 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP207:%.*]] = load i32, ptr [[TMP198]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP207]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 22), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP200:%.*]] = getelementptr inbounds i32, ptr [[TMP158]], i32 23 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP209:%.*]] = load i32, ptr [[TMP200]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP209]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 23), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP202:%.*]] = getelementptr inbounds i32, ptr [[TMP158]], i32 24 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP211:%.*]] = load i32, ptr [[TMP202]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP211]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 24), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP204:%.*]] = getelementptr inbounds i32, ptr [[TMP158]], i32 25 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP213:%.*]] = load i32, ptr [[TMP204]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP213]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 25), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP206:%.*]] = getelementptr inbounds i32, ptr [[TMP158]], i32 26 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP215:%.*]] = load i32, ptr [[TMP206]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP215]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 26), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP208:%.*]] = getelementptr inbounds i32, ptr [[TMP158]], i32 27 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP217:%.*]] = load i32, ptr [[TMP208]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP217]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 27), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP210:%.*]] = getelementptr inbounds i32, ptr [[TMP158]], i32 28 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP219:%.*]] = load i32, ptr [[TMP210]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP219]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 28), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP212:%.*]] = getelementptr inbounds i32, ptr [[TMP158]], i32 29 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP221:%.*]] = load i32, ptr [[TMP212]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP221]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 29), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP214:%.*]] = call ptr inttoptr (i64 2 to ptr)([[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I]]), !continuation.registercount [[META23]], !continuation.returnedRegistercount [[META23]] -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP223:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] [[AWAIT_1:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP214]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP183:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP281:%.*]] = load i32, ptr [[TMP183]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP281]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP284:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP186:%.*]] = getelementptr inbounds i32, ptr [[TMP183]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP287:%.*]] = load i32, ptr [[TMP186]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP287]], ptr [[TMP284]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP290:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP189:%.*]] = getelementptr inbounds i32, ptr [[TMP183]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP190:%.*]] = load i32, ptr [[TMP189]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP190]], ptr [[TMP290]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP191:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 3 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP192:%.*]] = getelementptr inbounds i32, ptr [[TMP183]], i32 3 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP193:%.*]] = load i32, ptr [[TMP192]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP193]], ptr [[TMP191]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP194:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP195:%.*]] = getelementptr inbounds i32, ptr [[TMP183]], i32 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP196:%.*]] = load i32, ptr [[TMP195]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP196]], ptr [[TMP194]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP197:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 5 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP198:%.*]] = getelementptr inbounds i32, ptr [[TMP183]], i32 5 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP199:%.*]] = load i32, ptr [[TMP198]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP199]], ptr [[TMP197]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP200:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 6 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP201:%.*]] = getelementptr inbounds i32, ptr [[TMP183]], i32 6 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP202:%.*]] = load i32, ptr [[TMP201]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP202]], ptr [[TMP200]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP203:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP204:%.*]] = getelementptr inbounds i32, ptr [[TMP183]], i32 7 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP205:%.*]] = load i32, ptr [[TMP204]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP205]], ptr [[TMP203]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP206:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP207:%.*]] = getelementptr inbounds i32, ptr [[TMP183]], i32 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP208:%.*]] = load i32, ptr [[TMP207]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP208]], ptr [[TMP206]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP209:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 9 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP210:%.*]] = getelementptr inbounds i32, ptr [[TMP183]], i32 9 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP211:%.*]] = load i32, ptr [[TMP210]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP211]], ptr [[TMP209]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP212:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 10 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP213:%.*]] = getelementptr inbounds i32, ptr [[TMP183]], i32 10 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP214:%.*]] = load i32, ptr [[TMP213]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP214]], ptr [[TMP212]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP215:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 11 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP216:%.*]] = getelementptr inbounds i32, ptr [[TMP183]], i32 11 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP217:%.*]] = load i32, ptr [[TMP216]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP217]], ptr [[TMP215]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP293:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 12 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP219:%.*]] = getelementptr inbounds i32, ptr [[TMP183]], i32 12 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP296:%.*]] = load i32, ptr [[TMP219]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP296]], ptr [[TMP293]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP221:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 13 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP299:%.*]] = getelementptr inbounds i32, ptr [[TMP183]], i32 13 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP302:%.*]] = load i32, ptr [[TMP299]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP302]], ptr [[TMP221]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP305:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 14 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP308:%.*]] = getelementptr inbounds i32, ptr [[TMP183]], i32 14 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP311:%.*]] = load i32, ptr [[TMP308]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP311]], ptr [[TMP305]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP227:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 15 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP314:%.*]] = getelementptr inbounds i32, ptr [[TMP183]], i32 15 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP229:%.*]] = load i32, ptr [[TMP314]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP229]], ptr [[TMP227]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP317:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 16 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP231:%.*]] = getelementptr inbounds i32, ptr [[TMP183]], i32 16 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP320:%.*]] = load i32, ptr [[TMP231]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP320]], ptr [[TMP317]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP233:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 17 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP323:%.*]] = getelementptr inbounds i32, ptr [[TMP183]], i32 17 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP235:%.*]] = load i32, ptr [[TMP323]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP235]], ptr [[TMP233]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP326:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 18 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP237:%.*]] = getelementptr inbounds i32, ptr [[TMP183]], i32 18 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP329:%.*]] = load i32, ptr [[TMP237]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP329]], ptr [[TMP326]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP239:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 19 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP332:%.*]] = getelementptr inbounds i32, ptr [[TMP183]], i32 19 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP241:%.*]] = load i32, ptr [[TMP332]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP241]], ptr [[TMP239]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP335:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 20 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP243:%.*]] = getelementptr inbounds i32, ptr [[TMP183]], i32 20 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP338:%.*]] = load i32, ptr [[TMP243]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP338]], ptr [[TMP335]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP245:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 21 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP341:%.*]] = getelementptr inbounds i32, ptr [[TMP183]], i32 21 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP247:%.*]] = load i32, ptr [[TMP341]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP247]], ptr [[TMP245]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP344:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 22 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP249:%.*]] = getelementptr inbounds i32, ptr [[TMP183]], i32 22 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP347:%.*]] = load i32, ptr [[TMP249]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP347]], ptr [[TMP344]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP251:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 23 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP350:%.*]] = getelementptr inbounds i32, ptr [[TMP183]], i32 23 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP253:%.*]] = load i32, ptr [[TMP350]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP253]], ptr [[TMP251]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP353:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 24 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP255:%.*]] = getelementptr inbounds i32, ptr [[TMP183]], i32 24 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP356:%.*]] = load i32, ptr [[TMP255]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP356]], ptr [[TMP353]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP257:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 25 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP359:%.*]] = getelementptr inbounds i32, ptr [[TMP183]], i32 25 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP259:%.*]] = load i32, ptr [[TMP359]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP259]], ptr [[TMP257]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP362:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 26 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP261:%.*]] = getelementptr inbounds i32, ptr [[TMP183]], i32 26 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP365:%.*]] = load i32, ptr [[TMP261]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP365]], ptr [[TMP362]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP263:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 27 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP396:%.*]] = getelementptr inbounds i32, ptr [[TMP183]], i32 27 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP265:%.*]] = load i32, ptr [[TMP396]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP265]], ptr [[TMP263]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP485:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 28 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP267:%.*]] = getelementptr inbounds i32, ptr [[TMP183]], i32 28 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP487:%.*]] = load i32, ptr [[TMP267]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP487]], ptr [[TMP485]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP269:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 29 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP270:%.*]] = getelementptr inbounds i32, ptr [[TMP183]], i32 29 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP488:%.*]] = load i32, ptr [[TMP270]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP488]], ptr [[TMP269]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP272:%.*]] = load [30 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP489:%.*]] = call ptr inttoptr (i64 2 to ptr)([[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I]], [11 x i32] poison, [30 x i32] [[TMP272]]), !continuation.registercount [[META23]], !continuation.returnedRegistercount [[META23]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP274:%.*]] = call { [[STRUCT_DISPATCHSYSTEMDATA]], [10 x i32], [30 x i32] } @await.1(ptr [[TMP489]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP490:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [10 x i32], [30 x i32] } [[TMP274]], 2 +; LOWERRAYTRACINGPIPELINE-NEXT: store [30 x i32] [[TMP490]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_OUTERPAYLOAD]] poison, ptr [[TMP2]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP224:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP2]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP227:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP227]], ptr [[TMP224]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP277:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP277]], ptr [[TMP224]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP218:%.*]] = getelementptr inbounds i32, ptr [[TMP224]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP229:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 1), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP229]], ptr [[TMP218]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP279:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP280:%.*]] = load i32, ptr [[TMP279]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP280]], ptr [[TMP218]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP220:%.*]] = getelementptr inbounds i32, ptr [[TMP224]], i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP231:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 2), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP231]], ptr [[TMP220]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP282:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP283:%.*]] = load i32, ptr [[TMP282]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP283]], ptr [[TMP220]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP222:%.*]] = getelementptr inbounds i32, ptr [[TMP224]], i32 3 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP233:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 3), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP233]], ptr [[TMP222]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP285:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 3 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP286:%.*]] = load i32, ptr [[TMP285]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP286]], ptr [[TMP222]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP225:%.*]] = getelementptr inbounds i32, ptr [[TMP224]], i32 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP235:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 4), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP235]], ptr [[TMP225]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP288:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP289:%.*]] = load i32, ptr [[TMP288]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP289]], ptr [[TMP225]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP226:%.*]] = getelementptr inbounds i32, ptr [[TMP224]], i32 5 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP237:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 5), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP237]], ptr [[TMP226]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP291:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 5 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP292:%.*]] = load i32, ptr [[TMP291]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP292]], ptr [[TMP226]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP228:%.*]] = getelementptr inbounds i32, ptr [[TMP224]], i32 6 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP239:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 6), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP239]], ptr [[TMP228]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP294:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 6 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP295:%.*]] = load i32, ptr [[TMP294]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP295]], ptr [[TMP228]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP230:%.*]] = getelementptr inbounds i32, ptr [[TMP224]], i32 7 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP241:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP241]], ptr [[TMP230]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP297:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP298:%.*]] = load i32, ptr [[TMP297]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP298]], ptr [[TMP230]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP232:%.*]] = getelementptr inbounds i32, ptr [[TMP224]], i32 8 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP243:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP243]], ptr [[TMP232]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP300:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP301:%.*]] = load i32, ptr [[TMP300]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP301]], ptr [[TMP232]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP234:%.*]] = getelementptr inbounds i32, ptr [[TMP224]], i32 9 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP245:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP245]], ptr [[TMP234]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP303:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 9 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP304:%.*]] = load i32, ptr [[TMP303]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP304]], ptr [[TMP234]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP236:%.*]] = getelementptr inbounds i32, ptr [[TMP224]], i32 10 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP247:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 10), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP247]], ptr [[TMP236]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP306:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 10 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP307:%.*]] = load i32, ptr [[TMP306]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP307]], ptr [[TMP236]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP238:%.*]] = getelementptr inbounds i32, ptr [[TMP224]], i32 11 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP249:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 11), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP249]], ptr [[TMP238]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP309:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 11 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP310:%.*]] = load i32, ptr [[TMP309]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP310]], ptr [[TMP238]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP240:%.*]] = getelementptr inbounds i32, ptr [[TMP224]], i32 12 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP251:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 12), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP251]], ptr [[TMP240]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP312:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 12 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP313:%.*]] = load i32, ptr [[TMP312]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP313]], ptr [[TMP240]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP242:%.*]] = getelementptr inbounds i32, ptr [[TMP224]], i32 13 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP253:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 13), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP253]], ptr [[TMP242]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP315:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 13 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP316:%.*]] = load i32, ptr [[TMP315]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP316]], ptr [[TMP242]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP244:%.*]] = getelementptr inbounds i32, ptr [[TMP224]], i32 14 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP255:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 14), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP255]], ptr [[TMP244]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP491:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 14 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP319:%.*]] = load i32, ptr [[TMP491]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP319]], ptr [[TMP244]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP246:%.*]] = getelementptr inbounds i32, ptr [[TMP224]], i32 15 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP259:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 15), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP259]], ptr [[TMP246]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP321:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 15 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP322:%.*]] = load i32, ptr [[TMP321]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP322]], ptr [[TMP246]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP248:%.*]] = getelementptr inbounds i32, ptr [[TMP224]], i32 16 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP261:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 16), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP261]], ptr [[TMP248]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP324:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 16 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP325:%.*]] = load i32, ptr [[TMP324]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP325]], ptr [[TMP248]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP250:%.*]] = getelementptr inbounds i32, ptr [[TMP224]], i32 17 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP263:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 17), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP263]], ptr [[TMP250]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP327:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 17 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP328:%.*]] = load i32, ptr [[TMP327]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP328]], ptr [[TMP250]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP252:%.*]] = getelementptr inbounds i32, ptr [[TMP224]], i32 18 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP265:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 18), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP265]], ptr [[TMP252]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP330:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 18 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP331:%.*]] = load i32, ptr [[TMP330]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP331]], ptr [[TMP252]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP254:%.*]] = getelementptr inbounds i32, ptr [[TMP224]], i32 19 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP267:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 19), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP267]], ptr [[TMP254]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP333:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 19 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP334:%.*]] = load i32, ptr [[TMP333]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP334]], ptr [[TMP254]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP256:%.*]] = getelementptr inbounds i32, ptr [[TMP224]], i32 20 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP269:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 20), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP269]], ptr [[TMP256]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP336:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 20 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP337:%.*]] = load i32, ptr [[TMP336]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP337]], ptr [[TMP256]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP258:%.*]] = getelementptr inbounds i32, ptr [[TMP224]], i32 21 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP270:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 21), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP270]], ptr [[TMP258]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP339:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 21 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP340:%.*]] = load i32, ptr [[TMP339]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP340]], ptr [[TMP258]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP260:%.*]] = getelementptr inbounds i32, ptr [[TMP224]], i32 22 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP272:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 22), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP272]], ptr [[TMP260]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP342:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 22 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP343:%.*]] = load i32, ptr [[TMP342]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP343]], ptr [[TMP260]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP262:%.*]] = getelementptr inbounds i32, ptr [[TMP224]], i32 23 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP274:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 23), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP274]], ptr [[TMP262]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP345:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 23 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP346:%.*]] = load i32, ptr [[TMP345]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP346]], ptr [[TMP262]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP264:%.*]] = getelementptr inbounds i32, ptr [[TMP224]], i32 24 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP276:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 24), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP276]], ptr [[TMP264]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP348:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 24 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP349:%.*]] = load i32, ptr [[TMP348]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP349]], ptr [[TMP264]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP266:%.*]] = getelementptr inbounds i32, ptr [[TMP224]], i32 25 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP278:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 25), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP278]], ptr [[TMP266]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP351:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 25 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP352:%.*]] = load i32, ptr [[TMP351]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP352]], ptr [[TMP266]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP268:%.*]] = getelementptr inbounds i32, ptr [[TMP224]], i32 26 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP280:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 26), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP280]], ptr [[TMP268]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP354:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 26 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP355:%.*]] = load i32, ptr [[TMP354]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP355]], ptr [[TMP268]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP271:%.*]] = getelementptr inbounds i32, ptr [[TMP224]], i32 27 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP282:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 27), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP282]], ptr [[TMP271]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP357:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 27 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP358:%.*]] = load i32, ptr [[TMP357]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP358]], ptr [[TMP271]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP273:%.*]] = getelementptr inbounds i32, ptr [[TMP224]], i32 28 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP284:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 28), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP284]], ptr [[TMP273]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP360:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 28 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP361:%.*]] = load i32, ptr [[TMP360]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP361]], ptr [[TMP273]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP275:%.*]] = getelementptr inbounds i32, ptr [[TMP224]], i32 29 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP286:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 29), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP286]], ptr [[TMP275]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP363:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 29 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP364:%.*]] = load i32, ptr [[TMP363]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP364]], ptr [[TMP275]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP223:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [10 x i32], [30 x i32] } [[TMP274]], 0 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP223]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; LOWERRAYTRACINGPIPELINE-NEXT: br label [[DOTSPLIT:%.*]] ; LOWERRAYTRACINGPIPELINE: .split: -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP288:%.*]] = load float, ptr [[TMP68]], align 8 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP288]], ptr [[TMP69]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP277:%.*]] = load float, ptr [[TMP71]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP277]], ptr [[TMP72]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP290:%.*]] = load float, ptr [[TMP74]], align 8 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP290]], ptr [[TMP75]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP279:%.*]] = load float, ptr [[TMP77]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP279]], ptr [[TMP78]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP292:%.*]] = load float, ptr [[TMP80]], align 8 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP292]], ptr [[TMP81]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP281:%.*]] = load float, ptr [[TMP83]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP281]], ptr [[TMP84]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP294:%.*]] = load float, ptr [[TMP86]], align 8 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP294]], ptr [[TMP87]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP283:%.*]] = load float, ptr [[TMP89]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP283]], ptr [[TMP90]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP296:%.*]] = load float, ptr [[TMP92]], align 8 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP296]], ptr [[TMP93]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP285:%.*]] = load float, ptr [[TMP95]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP285]], ptr [[TMP96]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP298:%.*]] = load float, ptr [[TMP98]], align 8 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP298]], ptr [[TMP99]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP287:%.*]] = load float, ptr [[TMP101]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP287]], ptr [[TMP102]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP300:%.*]] = load float, ptr [[TMP104]], align 8 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP300]], ptr [[TMP105]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP289:%.*]] = load float, ptr [[TMP107]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP289]], ptr [[TMP108]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP302:%.*]] = load float, ptr [[TMP110]], align 8 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP302]], ptr [[TMP111]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP291:%.*]] = load float, ptr [[TMP113]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP291]], ptr [[TMP114]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP306:%.*]] = load float, ptr [[TMP116]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP306]], ptr [[TMP117]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP293:%.*]] = load float, ptr [[TMP119]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP293]], ptr [[TMP120]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP307:%.*]] = load float, ptr [[TMP122]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP307]], ptr [[TMP123]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP295:%.*]] = load float, ptr [[TMP125]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP295]], ptr [[TMP126]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP309:%.*]] = load float, ptr [[TMP128]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP309]], ptr [[TMP129]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP297:%.*]] = load float, ptr [[TMP131]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP297]], ptr [[TMP132]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP311:%.*]] = load float, ptr [[TMP134]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP311]], ptr [[TMP135]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP299:%.*]] = load float, ptr [[TMP137]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP299]], ptr [[TMP138]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP313:%.*]] = load float, ptr [[TMP140]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP313]], ptr [[TMP141]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP301:%.*]] = load float, ptr [[TMP143]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP301]], ptr [[TMP144]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP315:%.*]] = load float, ptr [[TMP146]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP315]], ptr [[TMP147]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP303:%.*]] = load float, ptr [[TMP149]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP303]], ptr [[TMP150]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP304:%.*]] = load float, ptr [[TMP152]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP304]], ptr [[TMP153]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP305:%.*]] = load float, ptr [[TMP155]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP305]], ptr [[TMP156]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP366:%.*]] = load float, ptr [[TMP100]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP366]], ptr [[TMP101]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP367:%.*]] = load float, ptr [[TMP103]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP367]], ptr [[TMP104]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP368:%.*]] = load float, ptr [[TMP106]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP368]], ptr [[TMP107]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP369:%.*]] = load float, ptr [[TMP109]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP369]], ptr [[TMP110]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP370:%.*]] = load float, ptr [[TMP112]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP370]], ptr [[TMP113]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP371:%.*]] = load float, ptr [[TMP115]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP371]], ptr [[TMP116]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP372:%.*]] = load float, ptr [[TMP118]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP372]], ptr [[TMP119]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP373:%.*]] = load float, ptr [[TMP121]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP373]], ptr [[TMP122]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP374:%.*]] = load float, ptr [[TMP124]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP374]], ptr [[TMP125]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP375:%.*]] = load float, ptr [[TMP127]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP375]], ptr [[TMP128]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP376:%.*]] = load float, ptr [[TMP130]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP376]], ptr [[TMP131]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP377:%.*]] = load float, ptr [[TMP133]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP377]], ptr [[TMP134]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP378:%.*]] = load float, ptr [[TMP136]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP378]], ptr [[TMP137]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP379:%.*]] = load float, ptr [[TMP139]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP379]], ptr [[TMP140]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP380:%.*]] = load float, ptr [[TMP142]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP380]], ptr [[TMP143]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP381:%.*]] = load float, ptr [[TMP145]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP381]], ptr [[TMP146]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP492:%.*]] = load float, ptr [[TMP148]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP492]], ptr [[TMP149]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP383:%.*]] = load float, ptr [[TMP151]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP383]], ptr [[TMP152]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP384:%.*]] = load float, ptr [[TMP154]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP384]], ptr [[TMP155]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP385:%.*]] = load float, ptr [[TMP157]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP385]], ptr [[TMP158]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP386:%.*]] = load float, ptr [[TMP160]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP386]], ptr [[TMP161]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP387:%.*]] = load float, ptr [[TMP163]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP387]], ptr [[TMP164]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP388:%.*]] = load float, ptr [[TMP166]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP388]], ptr [[TMP167]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP389:%.*]] = load float, ptr [[TMP169]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP389]], ptr [[TMP170]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP390:%.*]] = load float, ptr [[TMP172]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP390]], ptr [[TMP173]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP391:%.*]] = load float, ptr [[TMP175]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP391]], ptr [[TMP176]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP392:%.*]] = load float, ptr [[TMP178]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP392]], ptr [[TMP179]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP393:%.*]] = load float, ptr [[TMP181]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP393]], ptr [[TMP182]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP394:%.*]] = load float, ptr [[TMP184]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP394]], ptr [[TMP185]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP395:%.*]] = load float, ptr [[TMP187]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store float [[TMP395]], ptr [[TMP188]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP318:%.*]] = getelementptr inbounds [[STRUCT_OUTERPAYLOAD]], ptr [[TMP3]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP321:%.*]] = load i32, ptr [[TMP318]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP321]], ptr addrspace(20) @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP308:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP323:%.*]] = load i32, ptr [[TMP308]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP323]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 1), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP310:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP325:%.*]] = load i32, ptr [[TMP310]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP325]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 2), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP312:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 3 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP327:%.*]] = load i32, ptr [[TMP312]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP327]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 3), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP314:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP329:%.*]] = load i32, ptr [[TMP314]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP329]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 4), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP316:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 5 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP331:%.*]] = load i32, ptr [[TMP316]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP331]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 5), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP319:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 6 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP333:%.*]] = load i32, ptr [[TMP319]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP333]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 6), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP320:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 7 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP335:%.*]] = load i32, ptr [[TMP320]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP335]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP322:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 8 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP337:%.*]] = load i32, ptr [[TMP322]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP337]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP324:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 9 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP339:%.*]] = load i32, ptr [[TMP324]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP339]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP326:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 10 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP341:%.*]] = load i32, ptr [[TMP326]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP341]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 10), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP328:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 11 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP343:%.*]] = load i32, ptr [[TMP328]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP343]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 11), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP330:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 12 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP345:%.*]] = load i32, ptr [[TMP330]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP345]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 12), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP332:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 13 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP347:%.*]] = load i32, ptr [[TMP332]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP347]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 13), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP334:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 14 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP349:%.*]] = load i32, ptr [[TMP334]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP349]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 14), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP336:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 15 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP353:%.*]] = load i32, ptr [[TMP336]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP353]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 15), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP338:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 16 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP355:%.*]] = load i32, ptr [[TMP338]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP355]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 16), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP340:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 17 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP357:%.*]] = load i32, ptr [[TMP340]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP357]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 17), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP342:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 18 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP359:%.*]] = load i32, ptr [[TMP342]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP359]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 18), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP344:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 19 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP361:%.*]] = load i32, ptr [[TMP344]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP361]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 19), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP346:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 20 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP363:%.*]] = load i32, ptr [[TMP346]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP363]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 20), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP348:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 21 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP365:%.*]] = load i32, ptr [[TMP348]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP365]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 21), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP350:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 22 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP367:%.*]] = load i32, ptr [[TMP350]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP367]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 22), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP352:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 23 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP369:%.*]] = load i32, ptr [[TMP352]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP369]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 23), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP354:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 24 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP371:%.*]] = load i32, ptr [[TMP354]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP371]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 24), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP356:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 25 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP373:%.*]] = load i32, ptr [[TMP356]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP373]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 25), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP358:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 26 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP375:%.*]] = load i32, ptr [[TMP358]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP375]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 26), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP360:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 27 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP377:%.*]] = load i32, ptr [[TMP360]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP377]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 27), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP362:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 28 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP379:%.*]] = load i32, ptr [[TMP362]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP379]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 28), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP364:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 29 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP381:%.*]] = load i32, ptr [[TMP364]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP381]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 29), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP397:%.*]] = load i32, ptr [[TMP318]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP397]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP398:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP399:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP400:%.*]] = load i32, ptr [[TMP399]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP400]], ptr [[TMP398]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP401:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP402:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP403:%.*]] = load i32, ptr [[TMP402]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP403]], ptr [[TMP401]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP404:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 3 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP405:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 3 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP406:%.*]] = load i32, ptr [[TMP405]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP406]], ptr [[TMP404]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP407:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP408:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP409:%.*]] = load i32, ptr [[TMP408]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP409]], ptr [[TMP407]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP410:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 5 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP411:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 5 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP412:%.*]] = load i32, ptr [[TMP411]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP412]], ptr [[TMP410]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP413:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 6 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP414:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 6 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP415:%.*]] = load i32, ptr [[TMP414]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP415]], ptr [[TMP413]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP416:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP417:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 7 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP418:%.*]] = load i32, ptr [[TMP417]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP418]], ptr [[TMP416]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP419:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP420:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP421:%.*]] = load i32, ptr [[TMP420]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP421]], ptr [[TMP419]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP422:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 9 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP423:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 9 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP424:%.*]] = load i32, ptr [[TMP423]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP424]], ptr [[TMP422]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP425:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 10 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP426:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 10 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP427:%.*]] = load i32, ptr [[TMP426]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP427]], ptr [[TMP425]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP428:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 11 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP429:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 11 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP430:%.*]] = load i32, ptr [[TMP429]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP430]], ptr [[TMP428]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP431:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 12 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP432:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 12 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP433:%.*]] = load i32, ptr [[TMP432]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP433]], ptr [[TMP431]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP434:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 13 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP435:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 13 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP436:%.*]] = load i32, ptr [[TMP435]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP436]], ptr [[TMP434]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP437:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 14 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP438:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 14 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP439:%.*]] = load i32, ptr [[TMP438]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP439]], ptr [[TMP437]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP440:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 15 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP441:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 15 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP442:%.*]] = load i32, ptr [[TMP441]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP442]], ptr [[TMP440]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP443:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 16 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP444:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 16 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP445:%.*]] = load i32, ptr [[TMP444]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP445]], ptr [[TMP443]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP446:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 17 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP447:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 17 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP448:%.*]] = load i32, ptr [[TMP447]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP448]], ptr [[TMP446]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP449:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 18 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP450:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 18 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP451:%.*]] = load i32, ptr [[TMP450]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP451]], ptr [[TMP449]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP452:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 19 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP453:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 19 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP454:%.*]] = load i32, ptr [[TMP453]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP454]], ptr [[TMP452]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP455:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 20 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP456:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 20 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP457:%.*]] = load i32, ptr [[TMP456]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP457]], ptr [[TMP455]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP458:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 21 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP459:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 21 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP460:%.*]] = load i32, ptr [[TMP459]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP460]], ptr [[TMP458]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP461:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 22 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP462:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 22 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP463:%.*]] = load i32, ptr [[TMP462]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP463]], ptr [[TMP461]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP464:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 23 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP465:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 23 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP466:%.*]] = load i32, ptr [[TMP465]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP466]], ptr [[TMP464]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP467:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 24 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP468:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 24 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP469:%.*]] = load i32, ptr [[TMP468]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP469]], ptr [[TMP467]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP470:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 25 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP471:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 25 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP472:%.*]] = load i32, ptr [[TMP471]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP472]], ptr [[TMP470]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP473:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 26 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP474:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 26 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP475:%.*]] = load i32, ptr [[TMP474]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP475]], ptr [[TMP473]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP476:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 27 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP477:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 27 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP478:%.*]] = load i32, ptr [[TMP477]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP478]], ptr [[TMP476]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP479:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 28 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP480:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 28 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP481:%.*]] = load i32, ptr [[TMP480]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP481]], ptr [[TMP479]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP482:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 29 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP483:%.*]] = getelementptr inbounds i32, ptr [[TMP318]], i32 29 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP484:%.*]] = load i32, ptr [[TMP483]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP484]], ptr [[TMP482]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP382:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP382]]), !continuation.registercount [[META23]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP486:%.*]] = load [30 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP382]], [10 x i32] poison, [30 x i32] [[TMP486]]), !continuation.registercount [[META23]] ; LOWERRAYTRACINGPIPELINE-NEXT: unreachable ; %1 = alloca %struct.OuterPayload, align 8 @@ -945,10 +1123,10 @@ define void @Callable(%struct.OuterPayload* noalias %outerPayload) #0 !types !23 } ; Function Attrs: nounwind -declare !types !29 void @dx.op.traceRay.struct.InnerPayload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.InnerPayload*) #0 +declare !pointeetys !29 void @dx.op.traceRay.struct.InnerPayload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.InnerPayload*) #0 ; Function Attrs: nounwind -declare !types !31 void @dx.op.callShader.struct.OuterPayload(i32, i32, %struct.OuterPayload*) #0 +declare !pointeetys !31 void @dx.op.callShader.struct.OuterPayload(i32, i32, %struct.OuterPayload*) #0 ; Function Attrs: nounwind memory(none) declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #1 @@ -956,9 +1134,6 @@ declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types. ; Function Attrs: nounwind memory(read) declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) #2 -; Function Attrs: alwaysinline -declare %struct.DispatchSystemData @_cont_SetupRayGen() #3 - ; Function Attrs: alwaysinline declare %struct.DispatchSystemData @_AmdAwaitTraversal(i64, %struct.TraversalData) #3 @@ -969,25 +1144,25 @@ declare %struct.DispatchSystemData @_AmdAwaitShader(i64, %struct.DispatchSystemD declare %struct.AnyHitTraversalData @_AmdAwaitAnyHit(i64, %struct.AnyHitTraversalData, float, i32) #3 ; Function Attrs: alwaysinline -declare !types !32 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData*) #3 +declare !pointeetys !32 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData*) #3 ; Function Attrs: alwaysinline -declare !types !34 void @_cont_SetTriangleHitAttributes(%struct.SystemData*, %struct.BuiltInTriangleIntersectionAttributes) #3 +declare !pointeetys !34 void @_cont_SetTriangleHitAttributes(%struct.SystemData*, %struct.BuiltInTriangleIntersectionAttributes) #3 ; Function Attrs: alwaysinline -declare !types !35 i1 @_cont_IsEndSearch(%struct.TraversalData*) #3 +declare !pointeetys !35 i1 @_cont_IsEndSearch(%struct.TraversalData*) #3 ; Function Attrs: nounwind memory(read) -declare !types !37 i32 @_cont_HitKind(%struct.SystemData* nocapture readnone, %struct.HitData*) #2 +declare !pointeetys !37 i32 @_cont_HitKind(%struct.SystemData* nocapture readnone, %struct.HitData*) #2 ; Function Attrs: nounwind memory(none) -declare !types !39 void @_AmdRestoreSystemData(%struct.DispatchSystemData*) #1 +declare !pointeetys !39 void @_AmdRestoreSystemData(%struct.DispatchSystemData*) #1 ; Function Attrs: nounwind memory(none) -declare !types !41 void @_AmdRestoreSystemDataAnyHit(%struct.AnyHitTraversalData*) #1 +declare !pointeetys !41 void @_AmdRestoreSystemDataAnyHit(%struct.AnyHitTraversalData*) #1 ; Function Attrs: alwaysinline -define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) #3 !types !43 { +define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) #3 !pointeetys !43 { ; LOWERRAYTRACINGPIPELINE-LABEL: define i32 @_cont_GetLocalRootIndex( ; LOWERRAYTRACINGPIPELINE-SAME: ptr [[DATA:%.*]]) #[[ATTR3:[0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: ret i32 5 @@ -996,7 +1171,7 @@ define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) #3 !types } ; Function Attrs: alwaysinline -define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, float %6, float %7, float %8, float %9, float %10, float %11, float %12, float %13) #3 !types !44 { +define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, float %6, float %7, float %8, float %9, float %10, float %11, float %12, float %13) #3 !pointeetys !44 { %dis_data = load %struct.DispatchSystemData, %struct.DispatchSystemData* %data, align 4 %sys_data = insertvalue %struct.SystemData undef, %struct.DispatchSystemData %dis_data, 0 %trav_data = insertvalue %struct.TraversalData undef, %struct.SystemData %sys_data, 0 @@ -1007,7 +1182,7 @@ define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i } ; Function Attrs: alwaysinline -define void @_cont_CallShader(%struct.DispatchSystemData* %data, i32 %0) #3 !types !45 { +define void @_cont_CallShader(%struct.DispatchSystemData* %data, i32 %0) #3 !pointeetys !45 { %dis_data = load %struct.DispatchSystemData, %struct.DispatchSystemData* %data, align 4 %newdata = call %struct.DispatchSystemData @_AmdAwaitShader(i64 2, %struct.DispatchSystemData %dis_data) store %struct.DispatchSystemData %newdata, %struct.DispatchSystemData* %data, align 4 @@ -1016,10 +1191,10 @@ define void @_cont_CallShader(%struct.DispatchSystemData* %data, i32 %0) #3 !typ } ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) -declare !types !46 void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #4 +declare !pointeetys !46 void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #4 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) -declare !types !46 void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #4 +declare !pointeetys !46 void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #4 attributes #0 = { nounwind } attributes #1 = { nounwind memory(none) } @@ -1059,28 +1234,29 @@ attributes #4 = { nocallback nofree nosync nounwind willreturn memory(argmem: re !20 = !{i32 0} !21 = !{void (%struct.OuterPayload*)* @Callable, !"Callable", null, null, !22} !22 = !{i32 8, i32 12, i32 6, i32 120, i32 5, !20} -!23 = !{!"function", !"void", !24} +!23 = !{%struct.OuterPayload poison} !24 = !{i32 0, %struct.OuterPayload poison} !25 = !{!26, !26, i64 0} !26 = !{!"float", !27, i64 0} !27 = !{!"omnipotent char", !28, i64 0} !28 = !{!"Simple C/C++ TBAA"} -!29 = !{!"function", !"void", i32 poison, %dx.types.Handle poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, !30} +!29 = !{%struct.InnerPayload poison} !30 = !{i32 0, %struct.InnerPayload poison} -!31 = !{!"function", !"void", i32 poison, i32 poison, !24} -!32 = !{!"function", %struct.BuiltInTriangleIntersectionAttributes poison, !33} +!31 = !{%struct.OuterPayload poison} +!32 = !{%struct.SystemData poison} !33 = !{i32 0, %struct.SystemData poison} -!34 = !{!"function", !"void", !33, %struct.BuiltInTriangleIntersectionAttributes poison} -!35 = !{!"function", i1 poison, !36} +!34 = !{%struct.SystemData poison} +!35 = !{%struct.TraversalData poison} !36 = !{i32 0, %struct.TraversalData poison} -!37 = !{!"function", i32 poison, !33, !38} +!37 = !{null, %struct.SystemData poison, %struct.HitData poison} !38 = !{i32 0, %struct.HitData poison} -!39 = !{!"function", !"void", !40} +!39 = !{%struct.DispatchSystemData poison} !40 = !{i32 0, %struct.DispatchSystemData poison} -!41 = !{!"function", !"void", !42} +!41 = !{%struct.AnyHitTraversalData poison} !42 = !{i32 0, %struct.AnyHitTraversalData poison} -!43 = !{!"function", i32 poison, !40} -!44 = !{!"function", !"void", !40, i64 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison} -!45 = !{!"function", !"void", !40, i32 poison} -!46 = !{!"function", !"void", i64 poison, !47} +!43 = !{%struct.DispatchSystemData poison} +!44 = !{%struct.DispatchSystemData poison} +!45 = !{%struct.DispatchSystemData poison} +!46 = !{i8 poison} !47 = !{i32 0, i8 poison} +!48 = !{%struct.AnyHitTraversalData poison} diff --git a/llvmraytracing/test/dx/payload.ll b/llvmraytracing/test/dx/payload.ll index a1e7a5b953..c64fe6ec79 100644 --- a/llvmraytracing/test/dx/payload.ll +++ b/llvmraytracing/test/dx/payload.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 3 -; RUN: grep -v SKIP_GLOBAL_ADDRSPACE %s | opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,remove-types-metadata' -S --lint-abort-on-error | FileCheck -check-prefix=CLEANUP %s -; RUN: grep -v SKIP_GLOBAL_ADDRSPACE %s | opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' \ +; RUN: grep -v SKIP_GLOBAL_ADDRSPACE %s | opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,continuations-lint,remove-types-metadata' -S --lint-abort-on-error | FileCheck -check-prefix=CLEANUP %s +; RUN: grep -v SKIP_GLOBAL_ADDRSPACE %s | opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,dxil-cont-post-process,lint,continuations-lint,remove-types-metadata' \ ; RUN: -S --lint-abort-on-error | FileCheck -check-prefix=POST-PROCESS %s -; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' \ +; RUN: opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,dxil-cont-post-process,lint,continuations-lint,remove-types-metadata' \ ; RUN: -S %s --lint-abort-on-error | FileCheck -check-prefix=POST-PROCESS-GLOBAL %s target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:16-i32:32-i64:32-f16:16-f32:32-f64:32-v8:8-v16:16-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" @@ -24,9 +24,6 @@ target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16: ; Function Attrs: alwaysinline declare i32 @_cont_GetContinuationStackAddr() #0 -; Function Attrs: alwaysinline -declare %struct.DispatchSystemData @_cont_SetupRayGen() #0 - ; Function Attrs: alwaysinline declare %struct.DispatchSystemData @_AmdWaitAwaitTraversal(i64, i64, %struct.TraversalData) #0 @@ -36,46 +33,46 @@ declare %struct.DispatchSystemData @_AmdAwaitShader(i64, %struct.DispatchSystemD ; Function Attrs: alwaysinline declare %struct.AnyHitTraversalData @_AmdAwaitAnyHit(i64, %struct.AnyHitTraversalData, float, i32) #0 -define void @_cont_ExitRayGen(ptr nocapture readonly %data) alwaysinline nounwind !types !{!"function", !"void", !{i32 0, %struct.DispatchSystemData poison}} { +define void @_cont_ExitRayGen(ptr nocapture readonly %data) alwaysinline nounwind !pointeetys !{%struct.DispatchSystemData poison} { ret void } -define %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData* %data) #0 !types !17 { +define %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData* %data) #0 !pointeetys !17 { %addr = getelementptr %struct.SystemData, %struct.SystemData* %data, i32 0, i32 1 %val = load %struct.BuiltInTriangleIntersectionAttributes, %struct.BuiltInTriangleIntersectionAttributes* %addr, align 4 ret %struct.BuiltInTriangleIntersectionAttributes %val } ; Function Attrs: alwaysinline -declare !types !19 void @_cont_SetTriangleHitAttributes(%struct.SystemData*, %struct.BuiltInTriangleIntersectionAttributes) #0 +declare !pointeetys !19 void @_cont_SetTriangleHitAttributes(%struct.SystemData*, %struct.BuiltInTriangleIntersectionAttributes) #0 ; Function Attrs: alwaysinline -declare !types !20 i1 @_cont_IsEndSearch(%struct.TraversalData*) #0 +declare !pointeetys !20 i1 @_cont_IsEndSearch(%struct.TraversalData*) #0 ; Function Attrs: nounwind memory(read) -declare !types !22 i32 @_cont_HitKind(%struct.SystemData* nocapture readnone, %struct.HitData*) #1 +declare !pointeetys !22 i32 @_cont_HitKind(%struct.SystemData* nocapture readnone, %struct.HitData*) #1 ; Function Attrs: nounwind memory(none) -declare !types !24 void @_AmdRestoreSystemData(%struct.DispatchSystemData*) #2 +declare !pointeetys !24 void @_AmdRestoreSystemData(%struct.DispatchSystemData*) #2 ; Function Attrs: nounwind memory(none) -declare !types !26 void @_AmdRestoreSystemDataAnyHit(%struct.AnyHitTraversalData*) #2 +declare !pointeetys !26 void @_AmdRestoreSystemDataAnyHit(%struct.AnyHitTraversalData*) #2 ; Function Attrs: nounwind declare i64 @_AmdGetResumePointAddr() #3 ; Function Attrs: nounwind memory(none) -declare !types !26 void @_cont_AcceptHit(%struct.AnyHitTraversalData* nocapture readnone) #2 +declare !pointeetys !26 void @_cont_AcceptHit(%struct.AnyHitTraversalData* nocapture readnone) #2 ; Function Attrs: alwaysinline -define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) #0 !types !28 { +define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) #0 !pointeetys !28 { ret i32 5 } declare i64 @_cont_GetContinuationStackGlobalMemBase() ; Function Attrs: alwaysinline -define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, float %6, float %7, float %8, float %9, float %10, float %11, float %12, float %13) #0 !types !29 { +define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, float %6, float %7, float %8, float %9, float %10, float %11, float %12, float %13) #0 !pointeetys !29 { %dis_data = load %struct.DispatchSystemData, %struct.DispatchSystemData* %data, align 4 %sys_data = insertvalue %struct.SystemData undef, %struct.DispatchSystemData %dis_data, 0 %trav_data = insertvalue %struct.TraversalData undef, %struct.SystemData %sys_data, 0 @@ -88,7 +85,7 @@ define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i } ; Function Attrs: alwaysinline -define void @_cont_CallShader(%struct.DispatchSystemData* %data, i32 %0) #0 !types !30 { +define void @_cont_CallShader(%struct.DispatchSystemData* %data, i32 %0) #0 !pointeetys !30 { %dis_data = load %struct.DispatchSystemData, %struct.DispatchSystemData* %data, align 4 %newdata = call %struct.DispatchSystemData @_AmdAwaitShader(i64 2, %struct.DispatchSystemData %dis_data) store %struct.DispatchSystemData %newdata, %struct.DispatchSystemData* %data, align 4 @@ -97,7 +94,7 @@ define void @_cont_CallShader(%struct.DispatchSystemData* %data, i32 %0) #0 !typ } ; Function Attrs: alwaysinline -define i1 @_cont_ReportHit(%struct.AnyHitTraversalData* %data, float %t, i32 %hitKind) #0 !types !31 { +define i1 @_cont_ReportHit(%struct.AnyHitTraversalData* %data, float %t, i32 %hitKind) #0 !pointeetys !31 { %trav_data = load %struct.AnyHitTraversalData, %struct.AnyHitTraversalData* %data, align 4 %newdata = call %struct.AnyHitTraversalData @_AmdAwaitAnyHit(i64 3, %struct.AnyHitTraversalData %trav_data, float %t, i32 %hitKind) store %struct.AnyHitTraversalData %newdata, %struct.AnyHitTraversalData* %data, align 4 @@ -117,12 +114,12 @@ define void @main() { } ; Function Attrs: nounwind -define void @AnyHit(%struct.RayPayload* noalias nocapture %payload, %struct.BuiltInTriangleIntersectionAttributes* nocapture readonly %attr) #3 !types !32 { +define void @AnyHit(%struct.RayPayload* noalias nocapture %payload, %struct.BuiltInTriangleIntersectionAttributes* nocapture readonly %attr) #3 !pointeetys !32 { ret void } ; Function Attrs: nounwind -define void @ClosestHit(%struct.RayPayload* noalias nocapture %payload, %struct.BuiltInTriangleIntersectionAttributes* nocapture readonly %attr) #3 !types !32 { +define void @ClosestHit(%struct.RayPayload* noalias nocapture %payload, %struct.BuiltInTriangleIntersectionAttributes* nocapture readonly %attr) #3 !pointeetys !32 { %1 = load %dx.types.Handle, %dx.types.Handle* @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 %2 = load %dx.types.Handle, %dx.types.Handle* @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 %3 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %1) @@ -132,7 +129,7 @@ define void @ClosestHit(%struct.RayPayload* noalias nocapture %payload, %struct. } ; Function Attrs: nounwind -declare !types !35 void @dx.op.traceRay.struct.RayPayload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.RayPayload*) #3 +declare !pointeetys !35 void @dx.op.traceRay.struct.RayPayload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.RayPayload*) #3 ; Function Attrs: nounwind memory(none) declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #2 @@ -169,25 +166,25 @@ attributes #3 = { nounwind } !14 = !{i32 8, i32 9, i32 5, !8} !15 = !{void (%struct.RayPayload*, %struct.BuiltInTriangleIntersectionAttributes*)* @ClosestHit, !"ClosestHit", null, null, !16} !16 = !{i32 8, i32 10, i32 5, !8} -!17 = !{!"function", %struct.BuiltInTriangleIntersectionAttributes poison, !18} +!17 = !{%struct.SystemData poison} !18 = !{i32 0, %struct.SystemData poison} -!19 = !{!"function", !"void", !18, %struct.BuiltInTriangleIntersectionAttributes poison} -!20 = !{!"function", i1 poison, !21} +!19 = !{%struct.SystemData poison} +!20 = !{%struct.TraversalData poison} !21 = !{i32 0, %struct.TraversalData poison} -!22 = !{!"function", i32 poison, !18, !23} +!22 = !{null, %struct.SystemData poison, %struct.HitData poison} !23 = !{i32 0, %struct.HitData poison} -!24 = !{!"function", !"void", !25} +!24 = !{%struct.DispatchSystemData poison} !25 = !{i32 0, %struct.DispatchSystemData poison} -!26 = !{!"function", !"void", !27} +!26 = !{%struct.AnyHitTraversalData poison} !27 = !{i32 0, %struct.AnyHitTraversalData poison} -!28 = !{!"function", i32 poison, !25} -!29 = !{!"function", !"void", !25, i64 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison} -!30 = !{!"function", !"void", !25, i32 poison} -!31 = !{!"function", i1 poison, !27, float poison, i32 poison} -!32 = !{!"function", !"void", !33, !34} +!28 = !{%struct.DispatchSystemData poison} +!29 = !{%struct.DispatchSystemData poison} +!30 = !{%struct.DispatchSystemData poison} +!31 = !{%struct.AnyHitTraversalData poison} +!32 = !{null, %struct.RayPayload poison, %struct.BuiltInTriangleIntersectionAttributes poison} !33 = !{i32 0, %struct.RayPayload poison} !34 = !{i32 0, %struct.BuiltInTriangleIntersectionAttributes poison} -!35 = !{!"function", !"void", i32 poison, %dx.types.Handle poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, !33} +!35 = !{%struct.RayPayload poison} !36 = !{i32 22} ; CLEANUP-LABEL: define %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes( ; CLEANUP-SAME: ptr [[DATA:%.*]]) #[[ATTR0:[0-9]+]] { @@ -206,44 +203,20 @@ attributes #3 = { nounwind } ; CLEANUP-NEXT: AllocaSpillBB: ; CLEANUP-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 108) ; CLEANUP-NEXT: [[PAYLOAD_SPILL_ALLOCA:%.*]] = getelementptr inbounds [[MAIN_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 -; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 +; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT56:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 ; CLEANUP-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; CLEANUP-NEXT: [[TMP1:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 ; CLEANUP-NEXT: [[TMP2:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 ; CLEANUP-NEXT: [[TMP3:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP1]]) ; CLEANUP-NEXT: [[TMP4:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP3]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) ; CLEANUP-NEXT: [[TMP5:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP4]]) -; CLEANUP-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT]], 0 +; CLEANUP-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT56]], 0 ; CLEANUP-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]], 0 ; CLEANUP-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 ; CLEANUP-NEXT: [[ADDR_I:%.*]] = call i64 @_AmdGetResumePointAddr() #[[ATTR3:[0-9]+]] ; CLEANUP-NEXT: [[TRAV_DATA2_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], i64 [[ADDR_I]], 5 ; CLEANUP-NEXT: [[TMP6:%.*]] = ptrtoint ptr addrspace(32) [[PAYLOAD_SPILL_ALLOCA]] to i32 -; CLEANUP-NEXT: store i32 [[TMP6]], ptr addrspace(20) @PAYLOAD, align 4 -; CLEANUP-NEXT: [[TMP7:%.*]] = load ptr addrspace(32), ptr addrspace(20) @PAYLOAD, align 4 -; CLEANUP-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 -; CLEANUP-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 -; CLEANUP-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 -; CLEANUP-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 10), align 4 -; CLEANUP-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 11), align 4 -; CLEANUP-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 12), align 4 -; CLEANUP-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 13), align 4 -; CLEANUP-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 14), align 4 -; CLEANUP-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 15), align 4 -; CLEANUP-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 16), align 4 -; CLEANUP-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 17), align 4 -; CLEANUP-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 18), align 4 -; CLEANUP-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 19), align 4 -; CLEANUP-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 20), align 4 -; CLEANUP-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 21), align 4 -; CLEANUP-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 22), align 4 -; CLEANUP-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 23), align 4 -; CLEANUP-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 24), align 4 -; CLEANUP-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 25), align 4 -; CLEANUP-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 26), align 4 -; CLEANUP-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 27), align 4 -; CLEANUP-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 28), align 4 -; CLEANUP-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 29), align 4 +; CLEANUP-NEXT: [[TMP7:%.*]] = inttoptr i32 [[TMP6]] to ptr addrspace(32) ; CLEANUP-NEXT: store i32 undef, ptr addrspace(32) [[TMP7]], align 4 ; CLEANUP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP7]], i32 1 ; CLEANUP-NEXT: store i32 undef, ptr addrspace(32) [[TMP8]], align 4 @@ -297,95 +270,134 @@ attributes #3 = { nounwind } ; CLEANUP-NEXT: store i32 undef, ptr addrspace(32) [[TMP32]], align 4 ; CLEANUP-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP7]], i32 26 ; CLEANUP-NEXT: store i32 undef, ptr addrspace(32) [[TMP33]], align 4 +; CLEANUP-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [30 x i32] poison, i32 [[TMP6]], 0 +; CLEANUP-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT]], i32 undef, 1 +; CLEANUP-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT]], i32 undef, 2 +; CLEANUP-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT]], i32 undef, 3 +; CLEANUP-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT]], i32 undef, 4 +; CLEANUP-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT]], i32 undef, 5 +; CLEANUP-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT]], i32 undef, 6 +; CLEANUP-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT]], i32 undef, 7 +; CLEANUP-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT]], i32 undef, 8 +; CLEANUP-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT]], i32 undef, 9 +; CLEANUP-NEXT: [[DOTFCA_10_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT]], i32 undef, 10 +; CLEANUP-NEXT: [[DOTFCA_11_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT]], i32 undef, 11 +; CLEANUP-NEXT: [[DOTFCA_12_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT]], i32 undef, 12 +; CLEANUP-NEXT: [[DOTFCA_13_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT]], i32 undef, 13 +; CLEANUP-NEXT: [[DOTFCA_14_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT]], i32 undef, 14 +; CLEANUP-NEXT: [[DOTFCA_15_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT]], i32 undef, 15 +; CLEANUP-NEXT: [[DOTFCA_16_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT]], i32 undef, 16 +; CLEANUP-NEXT: [[DOTFCA_17_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT]], i32 undef, 17 +; CLEANUP-NEXT: [[DOTFCA_18_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT]], i32 undef, 18 +; CLEANUP-NEXT: [[DOTFCA_19_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT]], i32 undef, 19 +; CLEANUP-NEXT: [[DOTFCA_20_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT]], i32 undef, 20 +; CLEANUP-NEXT: [[DOTFCA_21_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT]], i32 undef, 21 +; CLEANUP-NEXT: [[DOTFCA_22_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT]], i32 undef, 22 +; CLEANUP-NEXT: [[DOTFCA_23_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT]], i32 undef, 23 +; CLEANUP-NEXT: [[DOTFCA_24_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT]], i32 undef, 24 +; CLEANUP-NEXT: [[DOTFCA_25_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT]], i32 undef, 25 +; CLEANUP-NEXT: [[DOTFCA_26_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT]], i32 undef, 26 +; CLEANUP-NEXT: [[DOTFCA_27_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT]], i32 undef, 27 +; CLEANUP-NEXT: [[DOTFCA_28_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT]], i32 undef, 28 +; CLEANUP-NEXT: [[DOTFCA_29_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT]], i32 undef, 29 ; CLEANUP-NEXT: [[TMP34:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @main.resume.0) -; CLEANUP-NEXT: call void (i64, i64, ...) @continuation.waitContinue(i64 4, i64 -1, i64 [[TMP34]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]]), !continuation.registercount [[META17:![0-9]+]], !continuation.returnedRegistercount [[META17]] +; CLEANUP-NEXT: call void (...) @lgc.cps.jump(i64 4, i32 -1, {} poison, i64 [[TMP34]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]], [10 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]), !continuation.registercount [[META17:![0-9]+]], !continuation.returnedRegistercount [[META17]], !waitmask [[META22:![0-9]+]] ; CLEANUP-NEXT: unreachable ; ; ; CLEANUP-LABEL: define dso_local void @main.resume.0( -; CLEANUP-SAME: i64 [[TMP0:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP1:%.*]]) !lgc.rt.shaderstage [[META8]] !continuation.registercount [[META17]] !continuation [[META20]] { +; CLEANUP-SAME: i64 [[TMP0:%.*]], { [[STRUCT_DISPATCHSYSTEMDATA:%.*]], [23 x i32], [30 x i32] } [[TMP1:%.*]]) !lgc.rt.shaderstage [[META8]] !continuation.registercount [[META17]] !continuation [[META20]] { ; CLEANUP-NEXT: entryresume.0: ; CLEANUP-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 108) ; CLEANUP-NEXT: [[PAYLOAD_SPILL_ALLOCA:%.*]] = getelementptr inbounds [[MAIN_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 -; CLEANUP-NEXT: [[TMP2:%.*]] = load ptr addrspace(32), ptr addrspace(20) @PAYLOAD, align 4 -; CLEANUP-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 -; CLEANUP-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 -; CLEANUP-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 -; CLEANUP-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 10), align 4 -; CLEANUP-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 11), align 4 -; CLEANUP-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 12), align 4 -; CLEANUP-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 13), align 4 -; CLEANUP-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 14), align 4 -; CLEANUP-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 15), align 4 -; CLEANUP-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 16), align 4 -; CLEANUP-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 17), align 4 -; CLEANUP-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 18), align 4 -; CLEANUP-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 19), align 4 -; CLEANUP-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 20), align 4 -; CLEANUP-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 21), align 4 -; CLEANUP-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 22), align 4 -; CLEANUP-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 23), align 4 -; CLEANUP-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 24), align 4 -; CLEANUP-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 25), align 4 -; CLEANUP-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 26), align 4 -; CLEANUP-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 27), align 4 -; CLEANUP-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 28), align 4 -; CLEANUP-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 29), align 4 -; CLEANUP-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(32) [[TMP2]], align 4 +; CLEANUP-NEXT: [[TMP3:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [23 x i32], [30 x i32] } [[TMP1]], 2 +; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 0 +; CLEANUP-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 1 +; CLEANUP-NEXT: [[DOTFCA_2_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 2 +; CLEANUP-NEXT: [[DOTFCA_3_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 3 +; CLEANUP-NEXT: [[DOTFCA_4_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 4 +; CLEANUP-NEXT: [[DOTFCA_5_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 5 +; CLEANUP-NEXT: [[DOTFCA_6_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 6 +; CLEANUP-NEXT: [[DOTFCA_7_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 7 +; CLEANUP-NEXT: [[DOTFCA_8_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 8 +; CLEANUP-NEXT: [[DOTFCA_9_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 9 +; CLEANUP-NEXT: [[DOTFCA_10_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 10 +; CLEANUP-NEXT: [[DOTFCA_11_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 11 +; CLEANUP-NEXT: [[DOTFCA_12_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 12 +; CLEANUP-NEXT: [[DOTFCA_13_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 13 +; CLEANUP-NEXT: [[DOTFCA_14_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 14 +; CLEANUP-NEXT: [[DOTFCA_15_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 15 +; CLEANUP-NEXT: [[DOTFCA_16_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 16 +; CLEANUP-NEXT: [[DOTFCA_17_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 17 +; CLEANUP-NEXT: [[DOTFCA_18_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 18 +; CLEANUP-NEXT: [[DOTFCA_19_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 19 +; CLEANUP-NEXT: [[DOTFCA_20_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 20 +; CLEANUP-NEXT: [[DOTFCA_21_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 21 +; CLEANUP-NEXT: [[DOTFCA_22_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 22 +; CLEANUP-NEXT: [[DOTFCA_23_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 23 +; CLEANUP-NEXT: [[DOTFCA_24_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 24 +; CLEANUP-NEXT: [[DOTFCA_25_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 25 +; CLEANUP-NEXT: [[DOTFCA_26_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 26 +; CLEANUP-NEXT: [[DOTFCA_27_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 27 +; CLEANUP-NEXT: [[TMP23:%.*]] = extractvalue [30 x i32] [[TMP3]], 28 +; CLEANUP-NEXT: [[DOTFCA_29_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 29 +; CLEANUP-NEXT: [[TMP2:%.*]] = inttoptr i32 [[DOTFCA_0_EXTRACT]] to ptr addrspace(32) +; CLEANUP-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(32) [[TMP2]], align 4 ; CLEANUP-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP2]], i32 1 -; CLEANUP-NEXT: [[TMP28:%.*]] = load i32, ptr addrspace(32) [[TMP27]], align 4 +; CLEANUP-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(32) [[TMP27]], align 4 ; CLEANUP-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP2]], i32 2 -; CLEANUP-NEXT: [[TMP30:%.*]] = load i32, ptr addrspace(32) [[TMP29]], align 4 +; CLEANUP-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(32) [[TMP29]], align 4 ; CLEANUP-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP2]], i32 3 -; CLEANUP-NEXT: [[TMP32:%.*]] = load i32, ptr addrspace(32) [[TMP31]], align 4 +; CLEANUP-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(32) [[TMP31]], align 4 ; CLEANUP-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP2]], i32 4 -; CLEANUP-NEXT: [[TMP34:%.*]] = load i32, ptr addrspace(32) [[TMP33]], align 4 +; CLEANUP-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(32) [[TMP33]], align 4 ; CLEANUP-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP2]], i32 5 -; CLEANUP-NEXT: [[TMP36:%.*]] = load i32, ptr addrspace(32) [[TMP35]], align 4 +; CLEANUP-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(32) [[TMP35]], align 4 ; CLEANUP-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP2]], i32 6 -; CLEANUP-NEXT: [[TMP38:%.*]] = load i32, ptr addrspace(32) [[TMP37]], align 4 +; CLEANUP-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(32) [[TMP37]], align 4 ; CLEANUP-NEXT: [[TMP39:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP2]], i32 7 -; CLEANUP-NEXT: [[TMP40:%.*]] = load i32, ptr addrspace(32) [[TMP39]], align 4 +; CLEANUP-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(32) [[TMP39]], align 4 ; CLEANUP-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP2]], i32 8 -; CLEANUP-NEXT: [[TMP42:%.*]] = load i32, ptr addrspace(32) [[TMP41]], align 4 +; CLEANUP-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(32) [[TMP41]], align 4 ; CLEANUP-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP2]], i32 9 -; CLEANUP-NEXT: [[TMP44:%.*]] = load i32, ptr addrspace(32) [[TMP43]], align 4 +; CLEANUP-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(32) [[TMP43]], align 4 ; CLEANUP-NEXT: [[TMP45:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP2]], i32 10 -; CLEANUP-NEXT: [[TMP46:%.*]] = load i32, ptr addrspace(32) [[TMP45]], align 4 +; CLEANUP-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(32) [[TMP45]], align 4 ; CLEANUP-NEXT: [[TMP47:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP2]], i32 11 -; CLEANUP-NEXT: [[TMP48:%.*]] = load i32, ptr addrspace(32) [[TMP47]], align 4 +; CLEANUP-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(32) [[TMP47]], align 4 ; CLEANUP-NEXT: [[TMP49:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP2]], i32 12 -; CLEANUP-NEXT: [[TMP50:%.*]] = load i32, ptr addrspace(32) [[TMP49]], align 4 +; CLEANUP-NEXT: [[TMP28:%.*]] = load i32, ptr addrspace(32) [[TMP49]], align 4 ; CLEANUP-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP2]], i32 13 -; CLEANUP-NEXT: [[TMP52:%.*]] = load i32, ptr addrspace(32) [[TMP51]], align 4 +; CLEANUP-NEXT: [[TMP30:%.*]] = load i32, ptr addrspace(32) [[TMP51]], align 4 ; CLEANUP-NEXT: [[TMP53:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP2]], i32 14 -; CLEANUP-NEXT: [[TMP54:%.*]] = load i32, ptr addrspace(32) [[TMP53]], align 4 +; CLEANUP-NEXT: [[TMP32:%.*]] = load i32, ptr addrspace(32) [[TMP53]], align 4 ; CLEANUP-NEXT: [[TMP55:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP2]], i32 15 -; CLEANUP-NEXT: [[TMP56:%.*]] = load i32, ptr addrspace(32) [[TMP55]], align 4 +; CLEANUP-NEXT: [[TMP34:%.*]] = load i32, ptr addrspace(32) [[TMP55]], align 4 ; CLEANUP-NEXT: [[TMP57:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP2]], i32 16 -; CLEANUP-NEXT: [[TMP58:%.*]] = load i32, ptr addrspace(32) [[TMP57]], align 4 +; CLEANUP-NEXT: [[TMP36:%.*]] = load i32, ptr addrspace(32) [[TMP57]], align 4 ; CLEANUP-NEXT: [[TMP59:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP2]], i32 17 -; CLEANUP-NEXT: [[TMP60:%.*]] = load i32, ptr addrspace(32) [[TMP59]], align 4 +; CLEANUP-NEXT: [[TMP38:%.*]] = load i32, ptr addrspace(32) [[TMP59]], align 4 ; CLEANUP-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP2]], i32 18 -; CLEANUP-NEXT: [[TMP62:%.*]] = load i32, ptr addrspace(32) [[TMP61]], align 4 +; CLEANUP-NEXT: [[TMP40:%.*]] = load i32, ptr addrspace(32) [[TMP61]], align 4 ; CLEANUP-NEXT: [[TMP63:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP2]], i32 19 -; CLEANUP-NEXT: [[TMP64:%.*]] = load i32, ptr addrspace(32) [[TMP63]], align 4 +; CLEANUP-NEXT: [[TMP42:%.*]] = load i32, ptr addrspace(32) [[TMP63]], align 4 ; CLEANUP-NEXT: [[TMP65:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP2]], i32 20 -; CLEANUP-NEXT: [[TMP66:%.*]] = load i32, ptr addrspace(32) [[TMP65]], align 4 +; CLEANUP-NEXT: [[TMP44:%.*]] = load i32, ptr addrspace(32) [[TMP65]], align 4 ; CLEANUP-NEXT: [[TMP67:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP2]], i32 21 -; CLEANUP-NEXT: [[TMP68:%.*]] = load i32, ptr addrspace(32) [[TMP67]], align 4 +; CLEANUP-NEXT: [[TMP46:%.*]] = load i32, ptr addrspace(32) [[TMP67]], align 4 ; CLEANUP-NEXT: [[TMP69:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP2]], i32 22 -; CLEANUP-NEXT: [[TMP70:%.*]] = load i32, ptr addrspace(32) [[TMP69]], align 4 +; CLEANUP-NEXT: [[TMP48:%.*]] = load i32, ptr addrspace(32) [[TMP69]], align 4 ; CLEANUP-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP2]], i32 23 -; CLEANUP-NEXT: [[TMP72:%.*]] = load i32, ptr addrspace(32) [[TMP71]], align 4 +; CLEANUP-NEXT: [[TMP50:%.*]] = load i32, ptr addrspace(32) [[TMP71]], align 4 ; CLEANUP-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP2]], i32 24 -; CLEANUP-NEXT: [[TMP74:%.*]] = load i32, ptr addrspace(32) [[TMP73]], align 4 +; CLEANUP-NEXT: [[TMP52:%.*]] = load i32, ptr addrspace(32) [[TMP73]], align 4 ; CLEANUP-NEXT: [[TMP75:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP2]], i32 25 -; CLEANUP-NEXT: [[TMP76:%.*]] = load i32, ptr addrspace(32) [[TMP75]], align 4 +; CLEANUP-NEXT: [[TMP54:%.*]] = load i32, ptr addrspace(32) [[TMP75]], align 4 ; CLEANUP-NEXT: [[TMP77:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP2]], i32 26 -; CLEANUP-NEXT: [[TMP78:%.*]] = load i32, ptr addrspace(32) [[TMP77]], align 4 -; CLEANUP-NEXT: [[TMP80:%.*]] = load ptr addrspace(32), ptr addrspace(20) @PAYLOAD, align 4 -; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT1:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP1]], 0 +; CLEANUP-NEXT: [[TMP56:%.*]] = load i32, ptr addrspace(32) [[TMP77]], align 4 +; CLEANUP-NEXT: [[TMP60:%.*]] = inttoptr i32 [[DOTFCA_0_EXTRACT]] to ptr addrspace(32) +; CLEANUP-NEXT: [[TMP58:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [23 x i32], [30 x i32] } [[TMP1]], 0 +; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT57:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP58]], 0 ; CLEANUP-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; CLEANUP-NEXT: call void @lgc.cps.free(i32 108) ; CLEANUP-NEXT: ret void @@ -394,9 +406,39 @@ attributes #3 = { nounwind } ; ; ; CLEANUP-LABEL: define void @AnyHit( -; CLEANUP-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[TMP1:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META22:![0-9]+]] !continuation.registercount [[META17]] !continuation [[META23:![0-9]+]] !continuation.state [[META8]] { +; CLEANUP-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[TMP1:%.*]], [6 x i32] [[PADDING:%.*]], [30 x i32] [[PAYLOAD:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META23:![0-9]+]] !continuation.registercount [[META17]] !continuation [[META24:![0-9]+]] !continuation.state [[META8]] { ; CLEANUP-NEXT: AllocaSpillBB: ; CLEANUP-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITTRAVERSALDATA]], align 8 +; CLEANUP-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 0 +; CLEANUP-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 1 +; CLEANUP-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 2 +; CLEANUP-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 3 +; CLEANUP-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 4 +; CLEANUP-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 5 +; CLEANUP-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 6 +; CLEANUP-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 7 +; CLEANUP-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 8 +; CLEANUP-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 9 +; CLEANUP-NEXT: [[PAYLOAD_FCA_10_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 10 +; CLEANUP-NEXT: [[PAYLOAD_FCA_11_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 11 +; CLEANUP-NEXT: [[PAYLOAD_FCA_12_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 12 +; CLEANUP-NEXT: [[PAYLOAD_FCA_13_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 13 +; CLEANUP-NEXT: [[PAYLOAD_FCA_14_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 14 +; CLEANUP-NEXT: [[PAYLOAD_FCA_15_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 15 +; CLEANUP-NEXT: [[PAYLOAD_FCA_16_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 16 +; CLEANUP-NEXT: [[PAYLOAD_FCA_17_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 17 +; CLEANUP-NEXT: [[PAYLOAD_FCA_18_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 18 +; CLEANUP-NEXT: [[PAYLOAD_FCA_19_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 19 +; CLEANUP-NEXT: [[PAYLOAD_FCA_20_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 20 +; CLEANUP-NEXT: [[PAYLOAD_FCA_21_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 21 +; CLEANUP-NEXT: [[PAYLOAD_FCA_22_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 22 +; CLEANUP-NEXT: [[PAYLOAD_FCA_23_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 23 +; CLEANUP-NEXT: [[PAYLOAD_FCA_24_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 24 +; CLEANUP-NEXT: [[PAYLOAD_FCA_25_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 25 +; CLEANUP-NEXT: [[PAYLOAD_FCA_26_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 26 +; CLEANUP-NEXT: [[PAYLOAD_FCA_27_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 27 +; CLEANUP-NEXT: [[PAYLOAD_FCA_28_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 28 +; CLEANUP-NEXT: [[PAYLOAD_FCA_29_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 29 ; CLEANUP-NEXT: [[DOTFCA_0_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 0, 0, 0 ; CLEANUP-NEXT: [[DOTFCA_0_0_0_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 ; CLEANUP-NEXT: store <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]], ptr [[DOTFCA_0_0_0_0_GEP]], align 4 @@ -428,578 +470,589 @@ attributes #3 = { nounwind } ; CLEANUP-NEXT: [[DOTFCA_1_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 ; CLEANUP-NEXT: store i32 [[DOTFCA_1_1_EXTRACT]], ptr [[DOTFCA_1_1_GEP]], align 4 ; CLEANUP-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; CLEANUP-NEXT: [[TMP3:%.*]] = load ptr addrspace(32), ptr addrspace(20) @PAYLOAD, align 4 -; CLEANUP-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 -; CLEANUP-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 -; CLEANUP-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 -; CLEANUP-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 10), align 4 -; CLEANUP-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 11), align 4 -; CLEANUP-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 12), align 4 -; CLEANUP-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 13), align 4 -; CLEANUP-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 14), align 4 -; CLEANUP-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 15), align 4 -; CLEANUP-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 16), align 4 -; CLEANUP-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 17), align 4 -; CLEANUP-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 18), align 4 -; CLEANUP-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 19), align 4 -; CLEANUP-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 20), align 4 -; CLEANUP-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 21), align 4 -; CLEANUP-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 22), align 4 -; CLEANUP-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 23), align 4 -; CLEANUP-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 24), align 4 -; CLEANUP-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 25), align 4 -; CLEANUP-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 26), align 4 -; CLEANUP-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 27), align 4 -; CLEANUP-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 28), align 4 -; CLEANUP-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 29), align 4 -; CLEANUP-NEXT: [[TMP80:%.*]] = load i32, ptr addrspace(32) [[TMP3]], align 4 +; CLEANUP-NEXT: [[TMP3:%.*]] = inttoptr i32 [[PAYLOAD_FCA_0_EXTRACT]] to ptr addrspace(32) +; CLEANUP-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(32) [[TMP3]], align 4 ; CLEANUP-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP3]], i32 1 -; CLEANUP-NEXT: [[TMP29:%.*]] = load i32, ptr addrspace(32) [[TMP28]], align 4 +; CLEANUP-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(32) [[TMP28]], align 4 ; CLEANUP-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP3]], i32 2 -; CLEANUP-NEXT: [[TMP31:%.*]] = load i32, ptr addrspace(32) [[TMP30]], align 4 +; CLEANUP-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(32) [[TMP30]], align 4 ; CLEANUP-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP3]], i32 3 -; CLEANUP-NEXT: [[TMP33:%.*]] = load i32, ptr addrspace(32) [[TMP32]], align 4 +; CLEANUP-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(32) [[TMP32]], align 4 ; CLEANUP-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP3]], i32 4 -; CLEANUP-NEXT: [[TMP35:%.*]] = load i32, ptr addrspace(32) [[TMP34]], align 4 +; CLEANUP-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(32) [[TMP34]], align 4 ; CLEANUP-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP3]], i32 5 -; CLEANUP-NEXT: [[TMP37:%.*]] = load i32, ptr addrspace(32) [[TMP36]], align 4 +; CLEANUP-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(32) [[TMP36]], align 4 ; CLEANUP-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP3]], i32 6 -; CLEANUP-NEXT: [[TMP39:%.*]] = load i32, ptr addrspace(32) [[TMP38]], align 4 +; CLEANUP-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(32) [[TMP38]], align 4 ; CLEANUP-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP3]], i32 7 -; CLEANUP-NEXT: [[TMP41:%.*]] = load i32, ptr addrspace(32) [[TMP40]], align 4 +; CLEANUP-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(32) [[TMP40]], align 4 ; CLEANUP-NEXT: [[TMP42:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP3]], i32 8 -; CLEANUP-NEXT: [[TMP43:%.*]] = load i32, ptr addrspace(32) [[TMP42]], align 4 +; CLEANUP-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(32) [[TMP42]], align 4 ; CLEANUP-NEXT: [[TMP44:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP3]], i32 9 -; CLEANUP-NEXT: [[TMP45:%.*]] = load i32, ptr addrspace(32) [[TMP44]], align 4 +; CLEANUP-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(32) [[TMP44]], align 4 ; CLEANUP-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP3]], i32 10 -; CLEANUP-NEXT: [[TMP47:%.*]] = load i32, ptr addrspace(32) [[TMP46]], align 4 +; CLEANUP-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(32) [[TMP46]], align 4 ; CLEANUP-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP3]], i32 11 -; CLEANUP-NEXT: [[TMP49:%.*]] = load i32, ptr addrspace(32) [[TMP48]], align 4 +; CLEANUP-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(32) [[TMP48]], align 4 ; CLEANUP-NEXT: [[TMP50:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP3]], i32 12 -; CLEANUP-NEXT: [[TMP51:%.*]] = load i32, ptr addrspace(32) [[TMP50]], align 4 +; CLEANUP-NEXT: [[TMP29:%.*]] = load i32, ptr addrspace(32) [[TMP50]], align 4 ; CLEANUP-NEXT: [[TMP52:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP3]], i32 13 -; CLEANUP-NEXT: [[TMP53:%.*]] = load i32, ptr addrspace(32) [[TMP52]], align 4 +; CLEANUP-NEXT: [[TMP31:%.*]] = load i32, ptr addrspace(32) [[TMP52]], align 4 ; CLEANUP-NEXT: [[TMP54:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP3]], i32 14 -; CLEANUP-NEXT: [[TMP55:%.*]] = load i32, ptr addrspace(32) [[TMP54]], align 4 +; CLEANUP-NEXT: [[TMP33:%.*]] = load i32, ptr addrspace(32) [[TMP54]], align 4 ; CLEANUP-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP3]], i32 15 -; CLEANUP-NEXT: [[TMP57:%.*]] = load i32, ptr addrspace(32) [[TMP56]], align 4 +; CLEANUP-NEXT: [[TMP35:%.*]] = load i32, ptr addrspace(32) [[TMP56]], align 4 ; CLEANUP-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP3]], i32 16 -; CLEANUP-NEXT: [[TMP59:%.*]] = load i32, ptr addrspace(32) [[TMP58]], align 4 +; CLEANUP-NEXT: [[TMP37:%.*]] = load i32, ptr addrspace(32) [[TMP58]], align 4 ; CLEANUP-NEXT: [[TMP60:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP3]], i32 17 -; CLEANUP-NEXT: [[TMP61:%.*]] = load i32, ptr addrspace(32) [[TMP60]], align 4 +; CLEANUP-NEXT: [[TMP39:%.*]] = load i32, ptr addrspace(32) [[TMP60]], align 4 ; CLEANUP-NEXT: [[TMP62:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP3]], i32 18 -; CLEANUP-NEXT: [[TMP63:%.*]] = load i32, ptr addrspace(32) [[TMP62]], align 4 +; CLEANUP-NEXT: [[TMP41:%.*]] = load i32, ptr addrspace(32) [[TMP62]], align 4 ; CLEANUP-NEXT: [[TMP64:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP3]], i32 19 -; CLEANUP-NEXT: [[TMP65:%.*]] = load i32, ptr addrspace(32) [[TMP64]], align 4 +; CLEANUP-NEXT: [[TMP43:%.*]] = load i32, ptr addrspace(32) [[TMP64]], align 4 ; CLEANUP-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP3]], i32 20 -; CLEANUP-NEXT: [[TMP67:%.*]] = load i32, ptr addrspace(32) [[TMP66]], align 4 +; CLEANUP-NEXT: [[TMP45:%.*]] = load i32, ptr addrspace(32) [[TMP66]], align 4 ; CLEANUP-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP3]], i32 21 -; CLEANUP-NEXT: [[TMP69:%.*]] = load i32, ptr addrspace(32) [[TMP68]], align 4 +; CLEANUP-NEXT: [[TMP47:%.*]] = load i32, ptr addrspace(32) [[TMP68]], align 4 ; CLEANUP-NEXT: [[TMP70:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP3]], i32 22 -; CLEANUP-NEXT: [[TMP71:%.*]] = load i32, ptr addrspace(32) [[TMP70]], align 4 +; CLEANUP-NEXT: [[TMP49:%.*]] = load i32, ptr addrspace(32) [[TMP70]], align 4 ; CLEANUP-NEXT: [[TMP72:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP3]], i32 23 -; CLEANUP-NEXT: [[TMP73:%.*]] = load i32, ptr addrspace(32) [[TMP72]], align 4 +; CLEANUP-NEXT: [[TMP51:%.*]] = load i32, ptr addrspace(32) [[TMP72]], align 4 ; CLEANUP-NEXT: [[TMP74:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP3]], i32 24 -; CLEANUP-NEXT: [[TMP75:%.*]] = load i32, ptr addrspace(32) [[TMP74]], align 4 +; CLEANUP-NEXT: [[TMP53:%.*]] = load i32, ptr addrspace(32) [[TMP74]], align 4 ; CLEANUP-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP3]], i32 25 -; CLEANUP-NEXT: [[TMP77:%.*]] = load i32, ptr addrspace(32) [[TMP76]], align 4 +; CLEANUP-NEXT: [[TMP55:%.*]] = load i32, ptr addrspace(32) [[TMP76]], align 4 ; CLEANUP-NEXT: [[TMP78:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP3]], i32 26 -; CLEANUP-NEXT: [[TMP79:%.*]] = load i32, ptr addrspace(32) [[TMP78]], align 4 +; CLEANUP-NEXT: [[TMP57:%.*]] = load i32, ptr addrspace(32) [[TMP78]], align 4 ; CLEANUP-NEXT: [[TMP82:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 ; CLEANUP-NEXT: [[ADDR_I:%.*]] = getelementptr [[STRUCT_SYSTEMDATA:%.*]], ptr [[TMP82]], i32 0, i32 1 ; CLEANUP-NEXT: [[VAL_I_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[ADDR_I]], i32 0, i32 0 ; CLEANUP-NEXT: [[VAL_I_FCA_0_LOAD:%.*]] = load <2 x float>, ptr [[VAL_I_FCA_0_GEP]], align 4 ; CLEANUP-NEXT: [[VAL_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[VAL_I_FCA_0_LOAD]], 0 ; CLEANUP-NEXT: [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[VAL_I_FCA_0_INSERT]], 0 -; CLEANUP-NEXT: [[DOTSROA_011_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 0 -; CLEANUP-NEXT: [[TMP83:%.*]] = bitcast float [[DOTSROA_011_0_VEC_EXTRACT]] to i32 -; CLEANUP-NEXT: [[DOTSROA_011_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 1 -; CLEANUP-NEXT: [[TMP84:%.*]] = bitcast float [[DOTSROA_011_4_VEC_EXTRACT]] to i32 +; CLEANUP-NEXT: [[DOTSROA_035_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 0 +; CLEANUP-NEXT: [[TMP81:%.*]] = bitcast float [[DOTSROA_035_0_VEC_EXTRACT]] to i32 +; CLEANUP-NEXT: [[DOTSROA_035_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 1 +; CLEANUP-NEXT: [[TMP83:%.*]] = bitcast float [[DOTSROA_035_4_VEC_EXTRACT]] to i32 ; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP1]], 0 ; CLEANUP-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; CLEANUP-NEXT: call void @_cont_AcceptHit(ptr [[SYSTEM_DATA_ALLOCA]]) -; CLEANUP-NEXT: [[TMP85:%.*]] = load ptr addrspace(32), ptr addrspace(20) @PAYLOAD, align 4 -; CLEANUP-NEXT: store i32 [[TMP5]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 -; CLEANUP-NEXT: store i32 [[TMP6]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 -; CLEANUP-NEXT: store i32 [[TMP7]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 -; CLEANUP-NEXT: store i32 [[TMP8]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 10), align 4 -; CLEANUP-NEXT: store i32 [[TMP9]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 11), align 4 -; CLEANUP-NEXT: store i32 [[TMP10]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 12), align 4 -; CLEANUP-NEXT: store i32 [[TMP11]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 13), align 4 -; CLEANUP-NEXT: store i32 [[TMP12]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 14), align 4 -; CLEANUP-NEXT: store i32 [[TMP13]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 15), align 4 -; CLEANUP-NEXT: store i32 [[TMP14]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 16), align 4 -; CLEANUP-NEXT: store i32 [[TMP15]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 17), align 4 -; CLEANUP-NEXT: store i32 [[TMP16]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 18), align 4 -; CLEANUP-NEXT: store i32 [[TMP17]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 19), align 4 -; CLEANUP-NEXT: store i32 [[TMP18]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 20), align 4 -; CLEANUP-NEXT: store i32 [[TMP19]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 21), align 4 -; CLEANUP-NEXT: store i32 [[TMP20]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 22), align 4 -; CLEANUP-NEXT: store i32 [[TMP21]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 23), align 4 -; CLEANUP-NEXT: store i32 [[TMP22]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 24), align 4 -; CLEANUP-NEXT: store i32 [[TMP23]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 25), align 4 -; CLEANUP-NEXT: store i32 [[TMP24]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 26), align 4 -; CLEANUP-NEXT: store i32 [[TMP25]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 27), align 4 -; CLEANUP-NEXT: store i32 [[TMP26]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 28), align 4 -; CLEANUP-NEXT: store i32 [[TMP27]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 29), align 4 -; CLEANUP-NEXT: store i32 [[TMP80]], ptr addrspace(32) [[TMP85]], align 4 +; CLEANUP-NEXT: [[TMP85:%.*]] = inttoptr i32 [[PAYLOAD_FCA_0_EXTRACT]] to ptr addrspace(32) +; CLEANUP-NEXT: store i32 [[TMP4]], ptr addrspace(32) [[TMP85]], align 4 ; CLEANUP-NEXT: [[TMP110:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP85]], i32 1 -; CLEANUP-NEXT: store i32 [[TMP29]], ptr addrspace(32) [[TMP110]], align 4 +; CLEANUP-NEXT: store i32 [[TMP6]], ptr addrspace(32) [[TMP110]], align 4 ; CLEANUP-NEXT: [[TMP111:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP85]], i32 2 -; CLEANUP-NEXT: store i32 [[TMP31]], ptr addrspace(32) [[TMP111]], align 4 +; CLEANUP-NEXT: store i32 [[TMP8]], ptr addrspace(32) [[TMP111]], align 4 ; CLEANUP-NEXT: [[TMP86:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP85]], i32 3 -; CLEANUP-NEXT: store i32 [[TMP33]], ptr addrspace(32) [[TMP86]], align 4 +; CLEANUP-NEXT: store i32 [[TMP10]], ptr addrspace(32) [[TMP86]], align 4 ; CLEANUP-NEXT: [[TMP87:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP85]], i32 4 -; CLEANUP-NEXT: store i32 [[TMP35]], ptr addrspace(32) [[TMP87]], align 4 +; CLEANUP-NEXT: store i32 [[TMP12]], ptr addrspace(32) [[TMP87]], align 4 ; CLEANUP-NEXT: [[TMP88:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP85]], i32 5 -; CLEANUP-NEXT: store i32 [[TMP37]], ptr addrspace(32) [[TMP88]], align 4 +; CLEANUP-NEXT: store i32 [[TMP14]], ptr addrspace(32) [[TMP88]], align 4 ; CLEANUP-NEXT: [[TMP89:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP85]], i32 6 -; CLEANUP-NEXT: store i32 [[TMP39]], ptr addrspace(32) [[TMP89]], align 4 +; CLEANUP-NEXT: store i32 [[TMP16]], ptr addrspace(32) [[TMP89]], align 4 ; CLEANUP-NEXT: [[TMP90:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP85]], i32 7 -; CLEANUP-NEXT: store i32 [[TMP41]], ptr addrspace(32) [[TMP90]], align 4 +; CLEANUP-NEXT: store i32 [[TMP18]], ptr addrspace(32) [[TMP90]], align 4 ; CLEANUP-NEXT: [[TMP91:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP85]], i32 8 -; CLEANUP-NEXT: store i32 [[TMP43]], ptr addrspace(32) [[TMP91]], align 4 +; CLEANUP-NEXT: store i32 [[TMP20]], ptr addrspace(32) [[TMP91]], align 4 ; CLEANUP-NEXT: [[TMP92:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP85]], i32 9 -; CLEANUP-NEXT: store i32 [[TMP45]], ptr addrspace(32) [[TMP92]], align 4 +; CLEANUP-NEXT: store i32 [[TMP22]], ptr addrspace(32) [[TMP92]], align 4 ; CLEANUP-NEXT: [[TMP93:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP85]], i32 10 -; CLEANUP-NEXT: store i32 [[TMP47]], ptr addrspace(32) [[TMP93]], align 4 +; CLEANUP-NEXT: store i32 [[TMP24]], ptr addrspace(32) [[TMP93]], align 4 ; CLEANUP-NEXT: [[TMP94:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP85]], i32 11 -; CLEANUP-NEXT: store i32 [[TMP49]], ptr addrspace(32) [[TMP94]], align 4 +; CLEANUP-NEXT: store i32 [[TMP26]], ptr addrspace(32) [[TMP94]], align 4 ; CLEANUP-NEXT: [[TMP95:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP85]], i32 12 -; CLEANUP-NEXT: store i32 [[TMP51]], ptr addrspace(32) [[TMP95]], align 4 +; CLEANUP-NEXT: store i32 [[TMP29]], ptr addrspace(32) [[TMP95]], align 4 ; CLEANUP-NEXT: [[TMP96:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP85]], i32 13 -; CLEANUP-NEXT: store i32 [[TMP53]], ptr addrspace(32) [[TMP96]], align 4 +; CLEANUP-NEXT: store i32 [[TMP31]], ptr addrspace(32) [[TMP96]], align 4 ; CLEANUP-NEXT: [[TMP97:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP85]], i32 14 -; CLEANUP-NEXT: store i32 [[TMP55]], ptr addrspace(32) [[TMP97]], align 4 +; CLEANUP-NEXT: store i32 [[TMP33]], ptr addrspace(32) [[TMP97]], align 4 ; CLEANUP-NEXT: [[TMP98:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP85]], i32 15 -; CLEANUP-NEXT: store i32 [[TMP57]], ptr addrspace(32) [[TMP98]], align 4 +; CLEANUP-NEXT: store i32 [[TMP35]], ptr addrspace(32) [[TMP98]], align 4 ; CLEANUP-NEXT: [[TMP99:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP85]], i32 16 -; CLEANUP-NEXT: store i32 [[TMP59]], ptr addrspace(32) [[TMP99]], align 4 +; CLEANUP-NEXT: store i32 [[TMP37]], ptr addrspace(32) [[TMP99]], align 4 ; CLEANUP-NEXT: [[TMP100:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP85]], i32 17 -; CLEANUP-NEXT: store i32 [[TMP61]], ptr addrspace(32) [[TMP100]], align 4 +; CLEANUP-NEXT: store i32 [[TMP39]], ptr addrspace(32) [[TMP100]], align 4 ; CLEANUP-NEXT: [[TMP101:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP85]], i32 18 -; CLEANUP-NEXT: store i32 [[TMP63]], ptr addrspace(32) [[TMP101]], align 4 +; CLEANUP-NEXT: store i32 [[TMP41]], ptr addrspace(32) [[TMP101]], align 4 ; CLEANUP-NEXT: [[TMP102:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP85]], i32 19 -; CLEANUP-NEXT: store i32 [[TMP65]], ptr addrspace(32) [[TMP102]], align 4 +; CLEANUP-NEXT: store i32 [[TMP43]], ptr addrspace(32) [[TMP102]], align 4 ; CLEANUP-NEXT: [[TMP103:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP85]], i32 20 -; CLEANUP-NEXT: store i32 [[TMP67]], ptr addrspace(32) [[TMP103]], align 4 +; CLEANUP-NEXT: store i32 [[TMP45]], ptr addrspace(32) [[TMP103]], align 4 ; CLEANUP-NEXT: [[TMP104:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP85]], i32 21 -; CLEANUP-NEXT: store i32 [[TMP69]], ptr addrspace(32) [[TMP104]], align 4 +; CLEANUP-NEXT: store i32 [[TMP47]], ptr addrspace(32) [[TMP104]], align 4 ; CLEANUP-NEXT: [[TMP105:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP85]], i32 22 -; CLEANUP-NEXT: store i32 [[TMP71]], ptr addrspace(32) [[TMP105]], align 4 +; CLEANUP-NEXT: store i32 [[TMP49]], ptr addrspace(32) [[TMP105]], align 4 ; CLEANUP-NEXT: [[TMP106:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP85]], i32 23 -; CLEANUP-NEXT: store i32 [[TMP73]], ptr addrspace(32) [[TMP106]], align 4 +; CLEANUP-NEXT: store i32 [[TMP51]], ptr addrspace(32) [[TMP106]], align 4 ; CLEANUP-NEXT: [[TMP107:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP85]], i32 24 -; CLEANUP-NEXT: store i32 [[TMP75]], ptr addrspace(32) [[TMP107]], align 4 +; CLEANUP-NEXT: store i32 [[TMP53]], ptr addrspace(32) [[TMP107]], align 4 ; CLEANUP-NEXT: [[TMP108:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP85]], i32 25 -; CLEANUP-NEXT: store i32 [[TMP77]], ptr addrspace(32) [[TMP108]], align 4 +; CLEANUP-NEXT: store i32 [[TMP55]], ptr addrspace(32) [[TMP108]], align 4 ; CLEANUP-NEXT: [[TMP109:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP85]], i32 26 -; CLEANUP-NEXT: store i32 [[TMP79]], ptr addrspace(32) [[TMP109]], align 4 +; CLEANUP-NEXT: store i32 [[TMP57]], ptr addrspace(32) [[TMP109]], align 4 ; CLEANUP-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 -; CLEANUP-NEXT: [[TMP114:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT]] to i32 -; CLEANUP-NEXT: [[TMP115:%.*]] = bitcast i32 [[TMP114]] to float -; CLEANUP-NEXT: [[DOTSROA_012_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP115]], i32 0 +; CLEANUP-NEXT: [[TMP112:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT]] to i32 +; CLEANUP-NEXT: [[TMP113:%.*]] = bitcast i32 [[TMP112]] to float +; CLEANUP-NEXT: [[DOTSROA_037_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP113]], i32 0 ; CLEANUP-NEXT: [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 -; CLEANUP-NEXT: [[TMP116:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT]] to i32 -; CLEANUP-NEXT: [[TMP117:%.*]] = bitcast i32 [[TMP116]] to float -; CLEANUP-NEXT: [[DOTSROA_012_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_012_0_VEC_INSERT]], float [[TMP117]], i32 1 -; CLEANUP-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_012_4_VEC_INSERT]], 0 -; CLEANUP-NEXT: [[TMP118:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; CLEANUP-NEXT: call void @_cont_SetTriangleHitAttributes(ptr [[TMP118]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT]]) -; CLEANUP-NEXT: [[DOTFCA_0_0_0_0_GEP1:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 -; CLEANUP-NEXT: [[DOTFCA_0_0_0_0_LOAD:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP1]], align 4 +; CLEANUP-NEXT: [[TMP114:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT]] to i32 +; CLEANUP-NEXT: [[TMP115:%.*]] = bitcast i32 [[TMP114]] to float +; CLEANUP-NEXT: [[DOTSROA_037_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_037_0_VEC_INSERT]], float [[TMP115]], i32 1 +; CLEANUP-NEXT: [[DOTFCA_0_INSERT36:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_037_4_VEC_INSERT]], 0 +; CLEANUP-NEXT: [[TMP116:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; CLEANUP-NEXT: call void @_cont_SetTriangleHitAttributes(ptr [[TMP116]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT36]]) +; CLEANUP-NEXT: [[DOTFCA_0_0_0_0_GEP25:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 +; CLEANUP-NEXT: [[DOTFCA_0_0_0_0_LOAD:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP25]], align 4 ; CLEANUP-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD]], 0, 0, 0, 0 -; CLEANUP-NEXT: [[DOTFCA_0_0_1_0_GEP2:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 1, i32 0 -; CLEANUP-NEXT: [[DOTFCA_0_0_1_0_LOAD:%.*]] = load <2 x float>, ptr [[DOTFCA_0_0_1_0_GEP2]], align 4 +; CLEANUP-NEXT: [[DOTFCA_0_0_1_0_GEP26:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 1, i32 0 +; CLEANUP-NEXT: [[DOTFCA_0_0_1_0_LOAD:%.*]] = load <2 x float>, ptr [[DOTFCA_0_0_1_0_GEP26]], align 4 ; CLEANUP-NEXT: [[DOTFCA_0_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <2 x float> [[DOTFCA_0_0_1_0_LOAD]], 0, 0, 1, 0 -; CLEANUP-NEXT: [[DOTFCA_0_1_0_GEP3:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 -; CLEANUP-NEXT: [[DOTFCA_0_1_0_LOAD:%.*]] = load float, ptr [[DOTFCA_0_1_0_GEP3]], align 4 +; CLEANUP-NEXT: [[DOTFCA_0_1_0_GEP27:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; CLEANUP-NEXT: [[DOTFCA_0_1_0_LOAD:%.*]] = load float, ptr [[DOTFCA_0_1_0_GEP27]], align 4 ; CLEANUP-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_1_0_INSERT]], float [[DOTFCA_0_1_0_LOAD]], 0, 1, 0 -; CLEANUP-NEXT: [[DOTFCA_0_1_1_GEP4:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 -; CLEANUP-NEXT: [[DOTFCA_0_1_1_LOAD:%.*]] = load i32, ptr [[DOTFCA_0_1_1_GEP4]], align 4 +; CLEANUP-NEXT: [[DOTFCA_0_1_1_GEP28:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 +; CLEANUP-NEXT: [[DOTFCA_0_1_1_LOAD:%.*]] = load i32, ptr [[DOTFCA_0_1_1_GEP28]], align 4 ; CLEANUP-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], i32 [[DOTFCA_0_1_1_LOAD]], 0, 1, 1 -; CLEANUP-NEXT: [[DOTFCA_0_2_GEP5:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 -; CLEANUP-NEXT: [[DOTFCA_0_2_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP5]], align 4 +; CLEANUP-NEXT: [[DOTFCA_0_2_GEP29:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 +; CLEANUP-NEXT: [[DOTFCA_0_2_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP29]], align 4 ; CLEANUP-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], <3 x float> [[DOTFCA_0_2_LOAD]], 0, 2 -; CLEANUP-NEXT: [[DOTFCA_0_3_GEP6:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 -; CLEANUP-NEXT: [[DOTFCA_0_3_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP6]], align 4 +; CLEANUP-NEXT: [[DOTFCA_0_3_GEP30:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 +; CLEANUP-NEXT: [[DOTFCA_0_3_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP30]], align 4 ; CLEANUP-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_LOAD]], 0, 3 -; CLEANUP-NEXT: [[DOTFCA_0_4_GEP7:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 -; CLEANUP-NEXT: [[DOTFCA_0_4_LOAD:%.*]] = load float, ptr [[DOTFCA_0_4_GEP7]], align 4 +; CLEANUP-NEXT: [[DOTFCA_0_4_GEP31:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 +; CLEANUP-NEXT: [[DOTFCA_0_4_LOAD:%.*]] = load float, ptr [[DOTFCA_0_4_GEP31]], align 4 ; CLEANUP-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[DOTFCA_0_4_LOAD]], 0, 4 -; CLEANUP-NEXT: [[DOTFCA_0_5_GEP8:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 -; CLEANUP-NEXT: [[DOTFCA_0_5_LOAD:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP8]], align 4 +; CLEANUP-NEXT: [[DOTFCA_0_5_GEP32:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 +; CLEANUP-NEXT: [[DOTFCA_0_5_LOAD:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP32]], align 4 ; CLEANUP-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[DOTFCA_0_5_LOAD]], 0, 5 -; CLEANUP-NEXT: [[DOTFCA_1_0_GEP9:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 -; CLEANUP-NEXT: [[DOTFCA_1_0_LOAD:%.*]] = load float, ptr [[DOTFCA_1_0_GEP9]], align 4 +; CLEANUP-NEXT: [[DOTFCA_1_0_GEP33:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; CLEANUP-NEXT: [[DOTFCA_1_0_LOAD:%.*]] = load float, ptr [[DOTFCA_1_0_GEP33]], align 4 ; CLEANUP-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], float [[DOTFCA_1_0_LOAD]], 1, 0 -; CLEANUP-NEXT: [[DOTFCA_1_1_GEP10:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 -; CLEANUP-NEXT: [[DOTFCA_1_1_LOAD:%.*]] = load i32, ptr [[DOTFCA_1_1_GEP10]], align 4 +; CLEANUP-NEXT: [[DOTFCA_1_1_GEP34:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; CLEANUP-NEXT: [[DOTFCA_1_1_LOAD:%.*]] = load i32, ptr [[DOTFCA_1_1_GEP34]], align 4 ; CLEANUP-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], i32 [[DOTFCA_1_1_LOAD]], 1, 1 -; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i64 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]]), !continuation.registercount [[META17]] +; CLEANUP-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; CLEANUP-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; CLEANUP-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; CLEANUP-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; CLEANUP-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; CLEANUP-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; CLEANUP-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; CLEANUP-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; CLEANUP-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; CLEANUP-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; CLEANUP-NEXT: [[DOTFCA_10_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; CLEANUP-NEXT: [[DOTFCA_11_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; CLEANUP-NEXT: [[DOTFCA_12_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; CLEANUP-NEXT: [[DOTFCA_13_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; CLEANUP-NEXT: [[DOTFCA_14_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; CLEANUP-NEXT: [[DOTFCA_15_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; CLEANUP-NEXT: [[DOTFCA_16_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; CLEANUP-NEXT: [[DOTFCA_17_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; CLEANUP-NEXT: [[DOTFCA_18_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; CLEANUP-NEXT: [[DOTFCA_19_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; CLEANUP-NEXT: [[DOTFCA_20_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; CLEANUP-NEXT: [[DOTFCA_21_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; CLEANUP-NEXT: [[DOTFCA_22_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; CLEANUP-NEXT: [[DOTFCA_23_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; CLEANUP-NEXT: [[DOTFCA_24_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; CLEANUP-NEXT: [[DOTFCA_25_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; CLEANUP-NEXT: [[DOTFCA_26_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; CLEANUP-NEXT: [[DOTFCA_27_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; CLEANUP-NEXT: [[DOTFCA_28_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; CLEANUP-NEXT: [[DOTFCA_29_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 +; CLEANUP-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR]], i32 poison, i64 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]), !continuation.registercount [[META17]] ; CLEANUP-NEXT: unreachable ; ; ; CLEANUP-LABEL: define void @ClosestHit( -; CLEANUP-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META24:![0-9]+]] !continuation.registercount [[META17]] !continuation [[META25:![0-9]+]] !continuation.stacksize [[META26:![0-9]+]] !continuation.state [[META27:![0-9]+]] { +; CLEANUP-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]], [21 x i32] [[PADDING:%.*]], [30 x i32] [[PAYLOAD:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META25:![0-9]+]] !continuation.registercount [[META17]] !continuation [[META26:![0-9]+]] !continuation.stacksize [[META27:![0-9]+]] !continuation.state [[META28:![0-9]+]] { ; CLEANUP-NEXT: AllocaSpillBB: ; CLEANUP-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 120) ; CLEANUP-NEXT: [[PAYLOAD_SPILL_ALLOCA:%.*]] = getelementptr inbounds [[CLOSESTHIT_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 ; CLEANUP-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[CLOSESTHIT_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 1 ; CLEANUP-NEXT: store i64 [[RETURNADDR]], ptr addrspace(32) [[RETURNADDR_SPILL_ADDR]], align 4 +; CLEANUP-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 0 +; CLEANUP-NEXT: [[PAYLOAD_FCA_0_EXTRACT_SPILL_ADDR:%.*]] = getelementptr inbounds [[CLOSESTHIT_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 2 +; CLEANUP-NEXT: store i32 [[PAYLOAD_FCA_0_EXTRACT]], ptr addrspace(32) [[PAYLOAD_FCA_0_EXTRACT_SPILL_ADDR]], align 4 +; CLEANUP-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 1 +; CLEANUP-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 2 +; CLEANUP-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 3 +; CLEANUP-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 4 +; CLEANUP-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 5 +; CLEANUP-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 6 +; CLEANUP-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 7 +; CLEANUP-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 8 +; CLEANUP-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 9 +; CLEANUP-NEXT: [[PAYLOAD_FCA_10_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 10 +; CLEANUP-NEXT: [[PAYLOAD_FCA_11_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 11 +; CLEANUP-NEXT: [[PAYLOAD_FCA_12_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 12 +; CLEANUP-NEXT: [[PAYLOAD_FCA_13_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 13 +; CLEANUP-NEXT: [[PAYLOAD_FCA_14_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 14 +; CLEANUP-NEXT: [[PAYLOAD_FCA_15_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 15 +; CLEANUP-NEXT: [[PAYLOAD_FCA_16_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 16 +; CLEANUP-NEXT: [[PAYLOAD_FCA_17_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 17 +; CLEANUP-NEXT: [[PAYLOAD_FCA_18_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 18 +; CLEANUP-NEXT: [[PAYLOAD_FCA_19_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 19 +; CLEANUP-NEXT: [[PAYLOAD_FCA_20_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 20 +; CLEANUP-NEXT: [[PAYLOAD_FCA_21_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 21 +; CLEANUP-NEXT: [[PAYLOAD_FCA_22_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 22 +; CLEANUP-NEXT: [[PAYLOAD_FCA_23_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 23 +; CLEANUP-NEXT: [[PAYLOAD_FCA_24_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 24 +; CLEANUP-NEXT: [[PAYLOAD_FCA_25_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 25 +; CLEANUP-NEXT: [[PAYLOAD_FCA_26_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 26 +; CLEANUP-NEXT: [[PAYLOAD_FCA_27_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 27 +; CLEANUP-NEXT: [[PAYLOAD_FCA_28_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 28 +; CLEANUP-NEXT: [[PAYLOAD_FCA_29_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 29 ; CLEANUP-NEXT: [[DOTFCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 0, 0 ; CLEANUP-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 1, 0 -; CLEANUP-NEXT: [[TMP1:%.*]] = load ptr addrspace(32), ptr addrspace(20) @PAYLOAD, align 4 -; CLEANUP-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 -; CLEANUP-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 -; CLEANUP-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 -; CLEANUP-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 10), align 4 -; CLEANUP-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 11), align 4 -; CLEANUP-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 12), align 4 -; CLEANUP-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 13), align 4 -; CLEANUP-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 14), align 4 -; CLEANUP-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 15), align 4 -; CLEANUP-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 16), align 4 -; CLEANUP-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 17), align 4 -; CLEANUP-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 18), align 4 -; CLEANUP-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 19), align 4 -; CLEANUP-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 20), align 4 -; CLEANUP-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 21), align 4 -; CLEANUP-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 22), align 4 -; CLEANUP-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 23), align 4 -; CLEANUP-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 24), align 4 -; CLEANUP-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 25), align 4 -; CLEANUP-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 26), align 4 -; CLEANUP-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 27), align 4 -; CLEANUP-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 28), align 4 -; CLEANUP-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 29), align 4 -; CLEANUP-NEXT: [[TMP78:%.*]] = load i32, ptr addrspace(32) [[TMP1]], align 4 +; CLEANUP-NEXT: [[TMP1:%.*]] = inttoptr i32 [[PAYLOAD_FCA_0_EXTRACT]] to ptr addrspace(32) +; CLEANUP-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(32) [[TMP1]], align 4 ; CLEANUP-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 1 -; CLEANUP-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(32) [[TMP26]], align 4 +; CLEANUP-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(32) [[TMP26]], align 4 ; CLEANUP-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 2 -; CLEANUP-NEXT: [[TMP29:%.*]] = load i32, ptr addrspace(32) [[TMP28]], align 4 +; CLEANUP-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(32) [[TMP28]], align 4 ; CLEANUP-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 3 -; CLEANUP-NEXT: [[TMP31:%.*]] = load i32, ptr addrspace(32) [[TMP30]], align 4 +; CLEANUP-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(32) [[TMP30]], align 4 ; CLEANUP-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 4 -; CLEANUP-NEXT: [[TMP33:%.*]] = load i32, ptr addrspace(32) [[TMP32]], align 4 +; CLEANUP-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(32) [[TMP32]], align 4 ; CLEANUP-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 5 -; CLEANUP-NEXT: [[TMP35:%.*]] = load i32, ptr addrspace(32) [[TMP34]], align 4 +; CLEANUP-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(32) [[TMP34]], align 4 ; CLEANUP-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 6 -; CLEANUP-NEXT: [[TMP37:%.*]] = load i32, ptr addrspace(32) [[TMP36]], align 4 +; CLEANUP-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(32) [[TMP36]], align 4 ; CLEANUP-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 7 -; CLEANUP-NEXT: [[TMP39:%.*]] = load i32, ptr addrspace(32) [[TMP38]], align 4 +; CLEANUP-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(32) [[TMP38]], align 4 ; CLEANUP-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 8 -; CLEANUP-NEXT: [[TMP41:%.*]] = load i32, ptr addrspace(32) [[TMP40]], align 4 +; CLEANUP-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(32) [[TMP40]], align 4 ; CLEANUP-NEXT: [[TMP42:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 9 -; CLEANUP-NEXT: [[TMP43:%.*]] = load i32, ptr addrspace(32) [[TMP42]], align 4 +; CLEANUP-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(32) [[TMP42]], align 4 ; CLEANUP-NEXT: [[TMP44:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 10 -; CLEANUP-NEXT: [[TMP45:%.*]] = load i32, ptr addrspace(32) [[TMP44]], align 4 +; CLEANUP-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(32) [[TMP44]], align 4 ; CLEANUP-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 11 -; CLEANUP-NEXT: [[TMP47:%.*]] = load i32, ptr addrspace(32) [[TMP46]], align 4 +; CLEANUP-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(32) [[TMP46]], align 4 ; CLEANUP-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 12 -; CLEANUP-NEXT: [[TMP49:%.*]] = load i32, ptr addrspace(32) [[TMP48]], align 4 +; CLEANUP-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(32) [[TMP48]], align 4 ; CLEANUP-NEXT: [[TMP50:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 13 -; CLEANUP-NEXT: [[TMP51:%.*]] = load i32, ptr addrspace(32) [[TMP50]], align 4 +; CLEANUP-NEXT: [[TMP29:%.*]] = load i32, ptr addrspace(32) [[TMP50]], align 4 ; CLEANUP-NEXT: [[TMP52:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 14 -; CLEANUP-NEXT: [[TMP53:%.*]] = load i32, ptr addrspace(32) [[TMP52]], align 4 +; CLEANUP-NEXT: [[TMP31:%.*]] = load i32, ptr addrspace(32) [[TMP52]], align 4 ; CLEANUP-NEXT: [[TMP54:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 15 -; CLEANUP-NEXT: [[TMP55:%.*]] = load i32, ptr addrspace(32) [[TMP54]], align 4 +; CLEANUP-NEXT: [[TMP33:%.*]] = load i32, ptr addrspace(32) [[TMP54]], align 4 ; CLEANUP-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 16 -; CLEANUP-NEXT: [[TMP57:%.*]] = load i32, ptr addrspace(32) [[TMP56]], align 4 +; CLEANUP-NEXT: [[TMP35:%.*]] = load i32, ptr addrspace(32) [[TMP56]], align 4 ; CLEANUP-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 17 -; CLEANUP-NEXT: [[TMP59:%.*]] = load i32, ptr addrspace(32) [[TMP58]], align 4 +; CLEANUP-NEXT: [[TMP37:%.*]] = load i32, ptr addrspace(32) [[TMP58]], align 4 ; CLEANUP-NEXT: [[TMP60:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 18 -; CLEANUP-NEXT: [[TMP61:%.*]] = load i32, ptr addrspace(32) [[TMP60]], align 4 +; CLEANUP-NEXT: [[TMP39:%.*]] = load i32, ptr addrspace(32) [[TMP60]], align 4 ; CLEANUP-NEXT: [[TMP62:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 19 -; CLEANUP-NEXT: [[TMP63:%.*]] = load i32, ptr addrspace(32) [[TMP62]], align 4 +; CLEANUP-NEXT: [[TMP41:%.*]] = load i32, ptr addrspace(32) [[TMP62]], align 4 ; CLEANUP-NEXT: [[TMP64:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 20 -; CLEANUP-NEXT: [[TMP65:%.*]] = load i32, ptr addrspace(32) [[TMP64]], align 4 +; CLEANUP-NEXT: [[TMP43:%.*]] = load i32, ptr addrspace(32) [[TMP64]], align 4 ; CLEANUP-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 21 -; CLEANUP-NEXT: [[TMP67:%.*]] = load i32, ptr addrspace(32) [[TMP66]], align 4 +; CLEANUP-NEXT: [[TMP45:%.*]] = load i32, ptr addrspace(32) [[TMP66]], align 4 ; CLEANUP-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 22 -; CLEANUP-NEXT: [[TMP69:%.*]] = load i32, ptr addrspace(32) [[TMP68]], align 4 +; CLEANUP-NEXT: [[TMP47:%.*]] = load i32, ptr addrspace(32) [[TMP68]], align 4 ; CLEANUP-NEXT: [[TMP70:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 23 -; CLEANUP-NEXT: [[TMP71:%.*]] = load i32, ptr addrspace(32) [[TMP70]], align 4 +; CLEANUP-NEXT: [[TMP49:%.*]] = load i32, ptr addrspace(32) [[TMP70]], align 4 ; CLEANUP-NEXT: [[TMP72:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 24 -; CLEANUP-NEXT: [[TMP73:%.*]] = load i32, ptr addrspace(32) [[TMP72]], align 4 +; CLEANUP-NEXT: [[TMP51:%.*]] = load i32, ptr addrspace(32) [[TMP72]], align 4 ; CLEANUP-NEXT: [[TMP74:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 25 -; CLEANUP-NEXT: [[TMP75:%.*]] = load i32, ptr addrspace(32) [[TMP74]], align 4 +; CLEANUP-NEXT: [[TMP53:%.*]] = load i32, ptr addrspace(32) [[TMP74]], align 4 ; CLEANUP-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP1]], i32 26 -; CLEANUP-NEXT: [[TMP77:%.*]] = load i32, ptr addrspace(32) [[TMP76]], align 4 -; CLEANUP-NEXT: [[TMP80:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 -; CLEANUP-NEXT: [[DOTSPILL_ADDR:%.*]] = getelementptr inbounds [[CLOSESTHIT_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 2 -; CLEANUP-NEXT: store i32 [[TMP80]], ptr addrspace(32) [[DOTSPILL_ADDR]], align 4 +; CLEANUP-NEXT: [[TMP55:%.*]] = load i32, ptr addrspace(32) [[TMP76]], align 4 ; CLEANUP-NEXT: [[VAL_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> [[DOTFCA_1_0_EXTRACT]], 0 ; CLEANUP-NEXT: [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[VAL_I_FCA_0_INSERT]], 0 -; CLEANUP-NEXT: [[DOTSROA_053_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 0 -; CLEANUP-NEXT: [[TMP81:%.*]] = bitcast float [[DOTSROA_053_0_VEC_EXTRACT]] to i32 -; CLEANUP-NEXT: [[DOTSROA_053_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 1 -; CLEANUP-NEXT: [[TMP82:%.*]] = bitcast float [[DOTSROA_053_4_VEC_EXTRACT]] to i32 +; CLEANUP-NEXT: [[DOTSROA_0256_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 0 +; CLEANUP-NEXT: [[TMP57:%.*]] = bitcast float [[DOTSROA_0256_0_VEC_EXTRACT]] to i32 +; CLEANUP-NEXT: [[DOTSROA_0256_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 1 +; CLEANUP-NEXT: [[TMP63:%.*]] = bitcast float [[DOTSROA_0256_4_VEC_EXTRACT]] to i32 ; CLEANUP-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; CLEANUP-NEXT: [[TMP83:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 -; CLEANUP-NEXT: [[TMP84:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 -; CLEANUP-NEXT: [[TMP85:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP83]]) -; CLEANUP-NEXT: [[TMP86:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP85]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) -; CLEANUP-NEXT: [[TMP87:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP86]]) +; CLEANUP-NEXT: [[TMP59:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 +; CLEANUP-NEXT: [[TMP65:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 +; CLEANUP-NEXT: [[TMP67:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP59]]) +; CLEANUP-NEXT: [[TMP69:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP67]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; CLEANUP-NEXT: [[TMP61:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP69]]) ; CLEANUP-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison, <3 x i32> [[DOTFCA_0_0_EXTRACT]], 0 ; CLEANUP-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]], 0 ; CLEANUP-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 ; CLEANUP-NEXT: [[ADDR_I1:%.*]] = call i64 @_AmdGetResumePointAddr() #[[ATTR3]] ; CLEANUP-NEXT: [[TRAV_DATA2_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], i64 [[ADDR_I1]], 5 ; CLEANUP-NEXT: [[TMP88:%.*]] = ptrtoint ptr addrspace(32) [[PAYLOAD_SPILL_ALLOCA]] to i32 -; CLEANUP-NEXT: store i32 [[TMP88]], ptr addrspace(20) @PAYLOAD, align 4 -; CLEANUP-NEXT: [[TMP89:%.*]] = load ptr addrspace(32), ptr addrspace(20) @PAYLOAD, align 4 -; CLEANUP-NEXT: store i32 [[TMP3]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 -; CLEANUP-NEXT: store i32 [[TMP4]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 -; CLEANUP-NEXT: store i32 [[TMP5]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 -; CLEANUP-NEXT: store i32 [[TMP6]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 10), align 4 -; CLEANUP-NEXT: store i32 [[TMP7]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 11), align 4 -; CLEANUP-NEXT: store i32 [[TMP8]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 12), align 4 -; CLEANUP-NEXT: store i32 [[TMP9]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 13), align 4 -; CLEANUP-NEXT: store i32 [[TMP10]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 14), align 4 -; CLEANUP-NEXT: store i32 [[TMP11]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 15), align 4 -; CLEANUP-NEXT: store i32 [[TMP12]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 16), align 4 -; CLEANUP-NEXT: store i32 [[TMP13]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 17), align 4 -; CLEANUP-NEXT: store i32 [[TMP14]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 18), align 4 -; CLEANUP-NEXT: store i32 [[TMP15]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 19), align 4 -; CLEANUP-NEXT: store i32 [[TMP16]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 20), align 4 -; CLEANUP-NEXT: store i32 [[TMP17]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 21), align 4 -; CLEANUP-NEXT: store i32 [[TMP18]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 22), align 4 -; CLEANUP-NEXT: store i32 [[TMP19]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 23), align 4 -; CLEANUP-NEXT: store i32 [[TMP20]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 24), align 4 -; CLEANUP-NEXT: store i32 [[TMP21]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 25), align 4 -; CLEANUP-NEXT: store i32 [[TMP22]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 26), align 4 -; CLEANUP-NEXT: store i32 [[TMP23]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 27), align 4 -; CLEANUP-NEXT: store i32 [[TMP24]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 28), align 4 -; CLEANUP-NEXT: store i32 [[TMP25]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 29), align 4 -; CLEANUP-NEXT: store i32 [[TMP78]], ptr addrspace(32) [[TMP89]], align 4 +; CLEANUP-NEXT: [[TMP89:%.*]] = inttoptr i32 [[TMP88]] to ptr addrspace(32) +; CLEANUP-NEXT: store i32 [[TMP2]], ptr addrspace(32) [[TMP89]], align 4 ; CLEANUP-NEXT: [[TMP114:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP89]], i32 1 -; CLEANUP-NEXT: store i32 [[TMP27]], ptr addrspace(32) [[TMP114]], align 4 +; CLEANUP-NEXT: store i32 [[TMP4]], ptr addrspace(32) [[TMP114]], align 4 ; CLEANUP-NEXT: [[TMP115:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP89]], i32 2 -; CLEANUP-NEXT: store i32 [[TMP29]], ptr addrspace(32) [[TMP115]], align 4 +; CLEANUP-NEXT: store i32 [[TMP6]], ptr addrspace(32) [[TMP115]], align 4 ; CLEANUP-NEXT: [[TMP90:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP89]], i32 3 -; CLEANUP-NEXT: store i32 [[TMP31]], ptr addrspace(32) [[TMP90]], align 4 +; CLEANUP-NEXT: store i32 [[TMP8]], ptr addrspace(32) [[TMP90]], align 4 ; CLEANUP-NEXT: [[TMP91:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP89]], i32 4 -; CLEANUP-NEXT: store i32 [[TMP33]], ptr addrspace(32) [[TMP91]], align 4 +; CLEANUP-NEXT: store i32 [[TMP10]], ptr addrspace(32) [[TMP91]], align 4 ; CLEANUP-NEXT: [[TMP92:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP89]], i32 5 -; CLEANUP-NEXT: store i32 [[TMP35]], ptr addrspace(32) [[TMP92]], align 4 +; CLEANUP-NEXT: store i32 [[TMP12]], ptr addrspace(32) [[TMP92]], align 4 ; CLEANUP-NEXT: [[TMP93:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP89]], i32 6 -; CLEANUP-NEXT: store i32 [[TMP37]], ptr addrspace(32) [[TMP93]], align 4 +; CLEANUP-NEXT: store i32 [[TMP14]], ptr addrspace(32) [[TMP93]], align 4 ; CLEANUP-NEXT: [[TMP94:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP89]], i32 7 -; CLEANUP-NEXT: store i32 [[TMP39]], ptr addrspace(32) [[TMP94]], align 4 +; CLEANUP-NEXT: store i32 [[TMP16]], ptr addrspace(32) [[TMP94]], align 4 ; CLEANUP-NEXT: [[TMP95:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP89]], i32 8 -; CLEANUP-NEXT: store i32 [[TMP41]], ptr addrspace(32) [[TMP95]], align 4 +; CLEANUP-NEXT: store i32 [[TMP18]], ptr addrspace(32) [[TMP95]], align 4 ; CLEANUP-NEXT: [[TMP96:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP89]], i32 9 -; CLEANUP-NEXT: store i32 [[TMP43]], ptr addrspace(32) [[TMP96]], align 4 +; CLEANUP-NEXT: store i32 [[TMP20]], ptr addrspace(32) [[TMP96]], align 4 ; CLEANUP-NEXT: [[TMP97:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP89]], i32 10 -; CLEANUP-NEXT: store i32 [[TMP45]], ptr addrspace(32) [[TMP97]], align 4 +; CLEANUP-NEXT: store i32 [[TMP22]], ptr addrspace(32) [[TMP97]], align 4 ; CLEANUP-NEXT: [[TMP98:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP89]], i32 11 -; CLEANUP-NEXT: store i32 [[TMP47]], ptr addrspace(32) [[TMP98]], align 4 +; CLEANUP-NEXT: store i32 [[TMP24]], ptr addrspace(32) [[TMP98]], align 4 ; CLEANUP-NEXT: [[TMP99:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP89]], i32 12 -; CLEANUP-NEXT: store i32 [[TMP49]], ptr addrspace(32) [[TMP99]], align 4 +; CLEANUP-NEXT: store i32 [[TMP27]], ptr addrspace(32) [[TMP99]], align 4 ; CLEANUP-NEXT: [[TMP100:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP89]], i32 13 -; CLEANUP-NEXT: store i32 [[TMP51]], ptr addrspace(32) [[TMP100]], align 4 +; CLEANUP-NEXT: store i32 [[TMP29]], ptr addrspace(32) [[TMP100]], align 4 ; CLEANUP-NEXT: [[TMP101:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP89]], i32 14 -; CLEANUP-NEXT: store i32 [[TMP53]], ptr addrspace(32) [[TMP101]], align 4 +; CLEANUP-NEXT: store i32 [[TMP31]], ptr addrspace(32) [[TMP101]], align 4 ; CLEANUP-NEXT: [[TMP102:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP89]], i32 15 -; CLEANUP-NEXT: store i32 [[TMP55]], ptr addrspace(32) [[TMP102]], align 4 +; CLEANUP-NEXT: store i32 [[TMP33]], ptr addrspace(32) [[TMP102]], align 4 ; CLEANUP-NEXT: [[TMP103:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP89]], i32 16 -; CLEANUP-NEXT: store i32 [[TMP57]], ptr addrspace(32) [[TMP103]], align 4 +; CLEANUP-NEXT: store i32 [[TMP35]], ptr addrspace(32) [[TMP103]], align 4 ; CLEANUP-NEXT: [[TMP104:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP89]], i32 17 -; CLEANUP-NEXT: store i32 [[TMP59]], ptr addrspace(32) [[TMP104]], align 4 +; CLEANUP-NEXT: store i32 [[TMP37]], ptr addrspace(32) [[TMP104]], align 4 ; CLEANUP-NEXT: [[TMP105:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP89]], i32 18 -; CLEANUP-NEXT: store i32 [[TMP61]], ptr addrspace(32) [[TMP105]], align 4 +; CLEANUP-NEXT: store i32 [[TMP39]], ptr addrspace(32) [[TMP105]], align 4 ; CLEANUP-NEXT: [[TMP106:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP89]], i32 19 -; CLEANUP-NEXT: store i32 [[TMP63]], ptr addrspace(32) [[TMP106]], align 4 +; CLEANUP-NEXT: store i32 [[TMP41]], ptr addrspace(32) [[TMP106]], align 4 ; CLEANUP-NEXT: [[TMP107:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP89]], i32 20 -; CLEANUP-NEXT: store i32 [[TMP65]], ptr addrspace(32) [[TMP107]], align 4 +; CLEANUP-NEXT: store i32 [[TMP43]], ptr addrspace(32) [[TMP107]], align 4 ; CLEANUP-NEXT: [[TMP108:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP89]], i32 21 -; CLEANUP-NEXT: store i32 [[TMP67]], ptr addrspace(32) [[TMP108]], align 4 +; CLEANUP-NEXT: store i32 [[TMP45]], ptr addrspace(32) [[TMP108]], align 4 ; CLEANUP-NEXT: [[TMP109:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP89]], i32 22 -; CLEANUP-NEXT: store i32 [[TMP69]], ptr addrspace(32) [[TMP109]], align 4 +; CLEANUP-NEXT: store i32 [[TMP47]], ptr addrspace(32) [[TMP109]], align 4 ; CLEANUP-NEXT: [[TMP110:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP89]], i32 23 -; CLEANUP-NEXT: store i32 [[TMP71]], ptr addrspace(32) [[TMP110]], align 4 +; CLEANUP-NEXT: store i32 [[TMP49]], ptr addrspace(32) [[TMP110]], align 4 ; CLEANUP-NEXT: [[TMP111:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP89]], i32 24 -; CLEANUP-NEXT: store i32 [[TMP73]], ptr addrspace(32) [[TMP111]], align 4 +; CLEANUP-NEXT: store i32 [[TMP51]], ptr addrspace(32) [[TMP111]], align 4 ; CLEANUP-NEXT: [[TMP112:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP89]], i32 25 -; CLEANUP-NEXT: store i32 [[TMP75]], ptr addrspace(32) [[TMP112]], align 4 +; CLEANUP-NEXT: store i32 [[TMP53]], ptr addrspace(32) [[TMP112]], align 4 ; CLEANUP-NEXT: [[TMP113:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP89]], i32 26 -; CLEANUP-NEXT: store i32 [[TMP77]], ptr addrspace(32) [[TMP113]], align 4 +; CLEANUP-NEXT: store i32 [[TMP55]], ptr addrspace(32) [[TMP113]], align 4 +; CLEANUP-NEXT: [[DOTFCA_0_INSERT54:%.*]] = insertvalue [30 x i32] poison, i32 [[TMP88]], 0 +; CLEANUP-NEXT: [[DOTFCA_1_INSERT57:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT54]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; CLEANUP-NEXT: [[DOTFCA_2_INSERT60:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT57]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; CLEANUP-NEXT: [[DOTFCA_3_INSERT63:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT60]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; CLEANUP-NEXT: [[DOTFCA_4_INSERT66:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT63]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; CLEANUP-NEXT: [[DOTFCA_5_INSERT69:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT66]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; CLEANUP-NEXT: [[DOTFCA_6_INSERT72:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT69]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; CLEANUP-NEXT: [[DOTFCA_7_INSERT75:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT72]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; CLEANUP-NEXT: [[DOTFCA_8_INSERT78:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT75]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; CLEANUP-NEXT: [[DOTFCA_9_INSERT81:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT78]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; CLEANUP-NEXT: [[DOTFCA_10_INSERT84:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT81]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; CLEANUP-NEXT: [[DOTFCA_11_INSERT87:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT84]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; CLEANUP-NEXT: [[DOTFCA_12_INSERT90:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT87]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; CLEANUP-NEXT: [[DOTFCA_13_INSERT93:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT90]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; CLEANUP-NEXT: [[DOTFCA_14_INSERT96:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT93]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; CLEANUP-NEXT: [[DOTFCA_15_INSERT99:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT96]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; CLEANUP-NEXT: [[DOTFCA_16_INSERT102:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT99]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; CLEANUP-NEXT: [[DOTFCA_17_INSERT105:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT102]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; CLEANUP-NEXT: [[DOTFCA_18_INSERT108:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT105]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; CLEANUP-NEXT: [[DOTFCA_19_INSERT111:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT108]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; CLEANUP-NEXT: [[DOTFCA_20_INSERT114:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT111]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; CLEANUP-NEXT: [[DOTFCA_21_INSERT117:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT114]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; CLEANUP-NEXT: [[DOTFCA_22_INSERT120:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT117]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; CLEANUP-NEXT: [[DOTFCA_23_INSERT123:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT120]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; CLEANUP-NEXT: [[DOTFCA_24_INSERT126:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT123]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; CLEANUP-NEXT: [[DOTFCA_25_INSERT129:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT126]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; CLEANUP-NEXT: [[DOTFCA_26_INSERT132:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT129]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; CLEANUP-NEXT: [[DOTFCA_27_INSERT135:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT132]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; CLEANUP-NEXT: [[DOTFCA_28_INSERT138:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT135]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; CLEANUP-NEXT: [[DOTFCA_29_INSERT141:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT138]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 ; CLEANUP-NEXT: [[TMP116:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @ClosestHit.resume.0) -; CLEANUP-NEXT: call void (i64, i64, ...) @continuation.waitContinue(i64 4, i64 -1, i64 [[TMP116]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]]), !continuation.registercount [[META17]], !continuation.returnedRegistercount [[META17]] +; CLEANUP-NEXT: call void (...) @lgc.cps.jump(i64 4, i32 -1, {} poison, i64 [[TMP116]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]], [10 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT141]]), !continuation.registercount [[META17]], !continuation.returnedRegistercount [[META17]], !waitmask [[META22]] ; CLEANUP-NEXT: unreachable ; ; ; CLEANUP-LABEL: define dso_local void @ClosestHit.resume.0( -; CLEANUP-SAME: i64 [[TMP0:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP1:%.*]]) !lgc.rt.shaderstage [[META24]] !continuation.registercount [[META17]] !continuation [[META25]] { +; CLEANUP-SAME: i64 [[TMP0:%.*]], { [[STRUCT_DISPATCHSYSTEMDATA:%.*]], [23 x i32], [30 x i32] } [[TMP1:%.*]]) !lgc.rt.shaderstage [[META25]] !continuation.registercount [[META17]] !continuation [[META26]] { ; CLEANUP-NEXT: entryresume.0: ; CLEANUP-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 120) ; CLEANUP-NEXT: [[PAYLOAD_SPILL_ALLOCA:%.*]] = getelementptr inbounds [[CLOSESTHIT_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 -; CLEANUP-NEXT: [[TMP2:%.*]] = load ptr addrspace(32), ptr addrspace(20) @PAYLOAD, align 4 -; CLEANUP-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 -; CLEANUP-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 -; CLEANUP-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 -; CLEANUP-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 10), align 4 -; CLEANUP-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 11), align 4 -; CLEANUP-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 12), align 4 -; CLEANUP-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 13), align 4 -; CLEANUP-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 14), align 4 -; CLEANUP-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 15), align 4 -; CLEANUP-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 16), align 4 -; CLEANUP-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 17), align 4 -; CLEANUP-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 18), align 4 -; CLEANUP-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 19), align 4 -; CLEANUP-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 20), align 4 -; CLEANUP-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 21), align 4 -; CLEANUP-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 22), align 4 -; CLEANUP-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 23), align 4 -; CLEANUP-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 24), align 4 -; CLEANUP-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 25), align 4 -; CLEANUP-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 26), align 4 -; CLEANUP-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 27), align 4 -; CLEANUP-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 28), align 4 -; CLEANUP-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 29), align 4 -; CLEANUP-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(32) [[TMP2]], align 4 +; CLEANUP-NEXT: [[TMP3:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [23 x i32], [30 x i32] } [[TMP1]], 2 +; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT1:%.*]] = extractvalue [30 x i32] [[TMP3]], 0 +; CLEANUP-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 1 +; CLEANUP-NEXT: [[DOTFCA_2_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 2 +; CLEANUP-NEXT: [[DOTFCA_3_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 3 +; CLEANUP-NEXT: [[DOTFCA_4_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 4 +; CLEANUP-NEXT: [[DOTFCA_5_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 5 +; CLEANUP-NEXT: [[DOTFCA_6_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 6 +; CLEANUP-NEXT: [[DOTFCA_7_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 7 +; CLEANUP-NEXT: [[DOTFCA_8_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 8 +; CLEANUP-NEXT: [[DOTFCA_9_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 9 +; CLEANUP-NEXT: [[DOTFCA_10_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 10 +; CLEANUP-NEXT: [[DOTFCA_11_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 11 +; CLEANUP-NEXT: [[DOTFCA_12_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 12 +; CLEANUP-NEXT: [[DOTFCA_13_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 13 +; CLEANUP-NEXT: [[DOTFCA_14_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 14 +; CLEANUP-NEXT: [[DOTFCA_15_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 15 +; CLEANUP-NEXT: [[DOTFCA_16_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 16 +; CLEANUP-NEXT: [[DOTFCA_17_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 17 +; CLEANUP-NEXT: [[DOTFCA_18_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 18 +; CLEANUP-NEXT: [[DOTFCA_19_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 19 +; CLEANUP-NEXT: [[DOTFCA_20_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 20 +; CLEANUP-NEXT: [[DOTFCA_21_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 21 +; CLEANUP-NEXT: [[DOTFCA_22_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 22 +; CLEANUP-NEXT: [[DOTFCA_23_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 23 +; CLEANUP-NEXT: [[DOTFCA_24_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 24 +; CLEANUP-NEXT: [[DOTFCA_25_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 25 +; CLEANUP-NEXT: [[DOTFCA_26_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 26 +; CLEANUP-NEXT: [[DOTFCA_27_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 27 +; CLEANUP-NEXT: [[TMP23:%.*]] = extractvalue [30 x i32] [[TMP3]], 28 +; CLEANUP-NEXT: [[DOTFCA_29_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP3]], 29 +; CLEANUP-NEXT: [[TMP2:%.*]] = inttoptr i32 [[DOTFCA_0_EXTRACT1]] to ptr addrspace(32) +; CLEANUP-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(32) [[TMP2]], align 4 ; CLEANUP-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP2]], i32 1 -; CLEANUP-NEXT: [[TMP28:%.*]] = load i32, ptr addrspace(32) [[TMP27]], align 4 +; CLEANUP-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(32) [[TMP27]], align 4 ; CLEANUP-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP2]], i32 2 -; CLEANUP-NEXT: [[TMP30:%.*]] = load i32, ptr addrspace(32) [[TMP29]], align 4 +; CLEANUP-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(32) [[TMP29]], align 4 ; CLEANUP-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP2]], i32 3 -; CLEANUP-NEXT: [[TMP32:%.*]] = load i32, ptr addrspace(32) [[TMP31]], align 4 +; CLEANUP-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(32) [[TMP31]], align 4 ; CLEANUP-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP2]], i32 4 -; CLEANUP-NEXT: [[TMP34:%.*]] = load i32, ptr addrspace(32) [[TMP33]], align 4 +; CLEANUP-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(32) [[TMP33]], align 4 ; CLEANUP-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP2]], i32 5 -; CLEANUP-NEXT: [[TMP36:%.*]] = load i32, ptr addrspace(32) [[TMP35]], align 4 +; CLEANUP-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(32) [[TMP35]], align 4 ; CLEANUP-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP2]], i32 6 -; CLEANUP-NEXT: [[TMP38:%.*]] = load i32, ptr addrspace(32) [[TMP37]], align 4 +; CLEANUP-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(32) [[TMP37]], align 4 ; CLEANUP-NEXT: [[TMP39:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP2]], i32 7 -; CLEANUP-NEXT: [[TMP40:%.*]] = load i32, ptr addrspace(32) [[TMP39]], align 4 +; CLEANUP-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(32) [[TMP39]], align 4 ; CLEANUP-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP2]], i32 8 -; CLEANUP-NEXT: [[TMP42:%.*]] = load i32, ptr addrspace(32) [[TMP41]], align 4 +; CLEANUP-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(32) [[TMP41]], align 4 ; CLEANUP-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP2]], i32 9 -; CLEANUP-NEXT: [[TMP44:%.*]] = load i32, ptr addrspace(32) [[TMP43]], align 4 +; CLEANUP-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(32) [[TMP43]], align 4 ; CLEANUP-NEXT: [[TMP45:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP2]], i32 10 -; CLEANUP-NEXT: [[TMP46:%.*]] = load i32, ptr addrspace(32) [[TMP45]], align 4 +; CLEANUP-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(32) [[TMP45]], align 4 ; CLEANUP-NEXT: [[TMP47:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP2]], i32 11 -; CLEANUP-NEXT: [[TMP48:%.*]] = load i32, ptr addrspace(32) [[TMP47]], align 4 +; CLEANUP-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(32) [[TMP47]], align 4 ; CLEANUP-NEXT: [[TMP49:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP2]], i32 12 -; CLEANUP-NEXT: [[TMP50:%.*]] = load i32, ptr addrspace(32) [[TMP49]], align 4 +; CLEANUP-NEXT: [[TMP28:%.*]] = load i32, ptr addrspace(32) [[TMP49]], align 4 ; CLEANUP-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP2]], i32 13 -; CLEANUP-NEXT: [[TMP52:%.*]] = load i32, ptr addrspace(32) [[TMP51]], align 4 +; CLEANUP-NEXT: [[TMP30:%.*]] = load i32, ptr addrspace(32) [[TMP51]], align 4 ; CLEANUP-NEXT: [[TMP53:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP2]], i32 14 -; CLEANUP-NEXT: [[TMP54:%.*]] = load i32, ptr addrspace(32) [[TMP53]], align 4 +; CLEANUP-NEXT: [[TMP32:%.*]] = load i32, ptr addrspace(32) [[TMP53]], align 4 ; CLEANUP-NEXT: [[TMP55:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP2]], i32 15 -; CLEANUP-NEXT: [[TMP56:%.*]] = load i32, ptr addrspace(32) [[TMP55]], align 4 +; CLEANUP-NEXT: [[TMP34:%.*]] = load i32, ptr addrspace(32) [[TMP55]], align 4 ; CLEANUP-NEXT: [[TMP57:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP2]], i32 16 -; CLEANUP-NEXT: [[TMP58:%.*]] = load i32, ptr addrspace(32) [[TMP57]], align 4 +; CLEANUP-NEXT: [[TMP36:%.*]] = load i32, ptr addrspace(32) [[TMP57]], align 4 ; CLEANUP-NEXT: [[TMP59:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP2]], i32 17 -; CLEANUP-NEXT: [[TMP60:%.*]] = load i32, ptr addrspace(32) [[TMP59]], align 4 +; CLEANUP-NEXT: [[TMP38:%.*]] = load i32, ptr addrspace(32) [[TMP59]], align 4 ; CLEANUP-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP2]], i32 18 -; CLEANUP-NEXT: [[TMP62:%.*]] = load i32, ptr addrspace(32) [[TMP61]], align 4 +; CLEANUP-NEXT: [[TMP40:%.*]] = load i32, ptr addrspace(32) [[TMP61]], align 4 ; CLEANUP-NEXT: [[TMP63:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP2]], i32 19 -; CLEANUP-NEXT: [[TMP64:%.*]] = load i32, ptr addrspace(32) [[TMP63]], align 4 +; CLEANUP-NEXT: [[TMP42:%.*]] = load i32, ptr addrspace(32) [[TMP63]], align 4 ; CLEANUP-NEXT: [[TMP65:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP2]], i32 20 -; CLEANUP-NEXT: [[TMP66:%.*]] = load i32, ptr addrspace(32) [[TMP65]], align 4 +; CLEANUP-NEXT: [[TMP44:%.*]] = load i32, ptr addrspace(32) [[TMP65]], align 4 ; CLEANUP-NEXT: [[TMP67:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP2]], i32 21 -; CLEANUP-NEXT: [[TMP68:%.*]] = load i32, ptr addrspace(32) [[TMP67]], align 4 +; CLEANUP-NEXT: [[TMP46:%.*]] = load i32, ptr addrspace(32) [[TMP67]], align 4 ; CLEANUP-NEXT: [[TMP69:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP2]], i32 22 -; CLEANUP-NEXT: [[TMP70:%.*]] = load i32, ptr addrspace(32) [[TMP69]], align 4 +; CLEANUP-NEXT: [[TMP48:%.*]] = load i32, ptr addrspace(32) [[TMP69]], align 4 ; CLEANUP-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP2]], i32 23 -; CLEANUP-NEXT: [[TMP72:%.*]] = load i32, ptr addrspace(32) [[TMP71]], align 4 +; CLEANUP-NEXT: [[TMP50:%.*]] = load i32, ptr addrspace(32) [[TMP71]], align 4 ; CLEANUP-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP2]], i32 24 -; CLEANUP-NEXT: [[TMP74:%.*]] = load i32, ptr addrspace(32) [[TMP73]], align 4 +; CLEANUP-NEXT: [[TMP52:%.*]] = load i32, ptr addrspace(32) [[TMP73]], align 4 ; CLEANUP-NEXT: [[TMP75:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP2]], i32 25 -; CLEANUP-NEXT: [[TMP76:%.*]] = load i32, ptr addrspace(32) [[TMP75]], align 4 +; CLEANUP-NEXT: [[TMP54:%.*]] = load i32, ptr addrspace(32) [[TMP75]], align 4 ; CLEANUP-NEXT: [[TMP77:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP2]], i32 26 -; CLEANUP-NEXT: [[TMP78:%.*]] = load i32, ptr addrspace(32) [[TMP77]], align 4 -; CLEANUP-NEXT: [[TMP80:%.*]] = load ptr addrspace(32), ptr addrspace(20) @PAYLOAD, align 4 -; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP1]], 0 +; CLEANUP-NEXT: [[TMP56:%.*]] = load i32, ptr addrspace(32) [[TMP77]], align 4 +; CLEANUP-NEXT: [[TMP60:%.*]] = inttoptr i32 [[DOTFCA_0_EXTRACT1]] to ptr addrspace(32) +; CLEANUP-NEXT: [[TMP58:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [23 x i32], [30 x i32] } [[TMP1]], 0 +; CLEANUP-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP58]], 0 ; CLEANUP-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; CLEANUP-NEXT: [[DOTRELOAD_ADDR:%.*]] = getelementptr inbounds [[CLOSESTHIT_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 2 -; CLEANUP-NEXT: [[DOTRELOAD:%.*]] = load i32, ptr addrspace(32) [[DOTRELOAD_ADDR]], align 4 +; CLEANUP-NEXT: [[PAYLOAD_FCA_0_EXTRACT_RELOAD_ADDR:%.*]] = getelementptr inbounds [[CLOSESTHIT_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 2 +; CLEANUP-NEXT: [[PAYLOAD_FCA_0_EXTRACT_RELOAD:%.*]] = load i32, ptr addrspace(32) [[PAYLOAD_FCA_0_EXTRACT_RELOAD_ADDR]], align 4 ; CLEANUP-NEXT: [[RETURNADDR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[CLOSESTHIT_FRAME]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 1 ; CLEANUP-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(32) [[RETURNADDR_RELOAD_ADDR]], align 4 -; CLEANUP-NEXT: store i32 [[DOTRELOAD]], ptr addrspace(20) @PAYLOAD, align 4 -; CLEANUP-NEXT: [[TMP81:%.*]] = load ptr addrspace(32), ptr addrspace(20) @PAYLOAD, align 4 -; CLEANUP-NEXT: store i32 [[TMP3]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 -; CLEANUP-NEXT: store i32 [[TMP4]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 -; CLEANUP-NEXT: store i32 [[TMP5]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 -; CLEANUP-NEXT: store i32 [[TMP6]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 10), align 4 -; CLEANUP-NEXT: store i32 [[TMP7]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 11), align 4 -; CLEANUP-NEXT: store i32 [[TMP8]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 12), align 4 -; CLEANUP-NEXT: store i32 [[TMP9]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 13), align 4 -; CLEANUP-NEXT: store i32 [[TMP10]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 14), align 4 -; CLEANUP-NEXT: store i32 [[TMP11]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 15), align 4 -; CLEANUP-NEXT: store i32 [[TMP12]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 16), align 4 -; CLEANUP-NEXT: store i32 [[TMP13]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 17), align 4 -; CLEANUP-NEXT: store i32 [[TMP14]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 18), align 4 -; CLEANUP-NEXT: store i32 [[TMP15]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 19), align 4 -; CLEANUP-NEXT: store i32 [[TMP16]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 20), align 4 -; CLEANUP-NEXT: store i32 [[TMP17]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 21), align 4 -; CLEANUP-NEXT: store i32 [[TMP18]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 22), align 4 -; CLEANUP-NEXT: store i32 [[TMP19]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 23), align 4 -; CLEANUP-NEXT: store i32 [[TMP20]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 24), align 4 -; CLEANUP-NEXT: store i32 [[TMP21]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 25), align 4 -; CLEANUP-NEXT: store i32 [[TMP22]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 26), align 4 -; CLEANUP-NEXT: store i32 [[TMP23]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 27), align 4 -; CLEANUP-NEXT: store i32 [[TMP24]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 28), align 4 -; CLEANUP-NEXT: store i32 [[TMP25]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 29), align 4 -; CLEANUP-NEXT: store i32 [[TMP26]], ptr addrspace(32) [[TMP81]], align 4 +; CLEANUP-NEXT: [[TMP81:%.*]] = inttoptr i32 [[PAYLOAD_FCA_0_EXTRACT_RELOAD]] to ptr addrspace(32) +; CLEANUP-NEXT: store i32 [[TMP4]], ptr addrspace(32) [[TMP81]], align 4 ; CLEANUP-NEXT: [[TMP106:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP81]], i32 1 -; CLEANUP-NEXT: store i32 [[TMP28]], ptr addrspace(32) [[TMP106]], align 4 +; CLEANUP-NEXT: store i32 [[TMP6]], ptr addrspace(32) [[TMP106]], align 4 ; CLEANUP-NEXT: [[TMP107:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP81]], i32 2 -; CLEANUP-NEXT: store i32 [[TMP30]], ptr addrspace(32) [[TMP107]], align 4 +; CLEANUP-NEXT: store i32 [[TMP8]], ptr addrspace(32) [[TMP107]], align 4 ; CLEANUP-NEXT: [[TMP82:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP81]], i32 3 -; CLEANUP-NEXT: store i32 [[TMP32]], ptr addrspace(32) [[TMP82]], align 4 +; CLEANUP-NEXT: store i32 [[TMP10]], ptr addrspace(32) [[TMP82]], align 4 ; CLEANUP-NEXT: [[TMP83:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP81]], i32 4 -; CLEANUP-NEXT: store i32 [[TMP34]], ptr addrspace(32) [[TMP83]], align 4 +; CLEANUP-NEXT: store i32 [[TMP12]], ptr addrspace(32) [[TMP83]], align 4 ; CLEANUP-NEXT: [[TMP84:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP81]], i32 5 -; CLEANUP-NEXT: store i32 [[TMP36]], ptr addrspace(32) [[TMP84]], align 4 +; CLEANUP-NEXT: store i32 [[TMP14]], ptr addrspace(32) [[TMP84]], align 4 ; CLEANUP-NEXT: [[TMP85:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP81]], i32 6 -; CLEANUP-NEXT: store i32 [[TMP38]], ptr addrspace(32) [[TMP85]], align 4 +; CLEANUP-NEXT: store i32 [[TMP16]], ptr addrspace(32) [[TMP85]], align 4 ; CLEANUP-NEXT: [[TMP86:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP81]], i32 7 -; CLEANUP-NEXT: store i32 [[TMP40]], ptr addrspace(32) [[TMP86]], align 4 +; CLEANUP-NEXT: store i32 [[TMP18]], ptr addrspace(32) [[TMP86]], align 4 ; CLEANUP-NEXT: [[TMP87:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP81]], i32 8 -; CLEANUP-NEXT: store i32 [[TMP42]], ptr addrspace(32) [[TMP87]], align 4 +; CLEANUP-NEXT: store i32 [[TMP20]], ptr addrspace(32) [[TMP87]], align 4 ; CLEANUP-NEXT: [[TMP88:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP81]], i32 9 -; CLEANUP-NEXT: store i32 [[TMP44]], ptr addrspace(32) [[TMP88]], align 4 +; CLEANUP-NEXT: store i32 [[TMP22]], ptr addrspace(32) [[TMP88]], align 4 ; CLEANUP-NEXT: [[TMP89:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP81]], i32 10 -; CLEANUP-NEXT: store i32 [[TMP46]], ptr addrspace(32) [[TMP89]], align 4 +; CLEANUP-NEXT: store i32 [[TMP24]], ptr addrspace(32) [[TMP89]], align 4 ; CLEANUP-NEXT: [[TMP90:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP81]], i32 11 -; CLEANUP-NEXT: store i32 [[TMP48]], ptr addrspace(32) [[TMP90]], align 4 +; CLEANUP-NEXT: store i32 [[TMP26]], ptr addrspace(32) [[TMP90]], align 4 ; CLEANUP-NEXT: [[TMP91:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP81]], i32 12 -; CLEANUP-NEXT: store i32 [[TMP50]], ptr addrspace(32) [[TMP91]], align 4 +; CLEANUP-NEXT: store i32 [[TMP28]], ptr addrspace(32) [[TMP91]], align 4 ; CLEANUP-NEXT: [[TMP92:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP81]], i32 13 -; CLEANUP-NEXT: store i32 [[TMP52]], ptr addrspace(32) [[TMP92]], align 4 +; CLEANUP-NEXT: store i32 [[TMP30]], ptr addrspace(32) [[TMP92]], align 4 ; CLEANUP-NEXT: [[TMP93:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP81]], i32 14 -; CLEANUP-NEXT: store i32 [[TMP54]], ptr addrspace(32) [[TMP93]], align 4 +; CLEANUP-NEXT: store i32 [[TMP32]], ptr addrspace(32) [[TMP93]], align 4 ; CLEANUP-NEXT: [[TMP94:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP81]], i32 15 -; CLEANUP-NEXT: store i32 [[TMP56]], ptr addrspace(32) [[TMP94]], align 4 +; CLEANUP-NEXT: store i32 [[TMP34]], ptr addrspace(32) [[TMP94]], align 4 ; CLEANUP-NEXT: [[TMP95:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP81]], i32 16 -; CLEANUP-NEXT: store i32 [[TMP58]], ptr addrspace(32) [[TMP95]], align 4 +; CLEANUP-NEXT: store i32 [[TMP36]], ptr addrspace(32) [[TMP95]], align 4 ; CLEANUP-NEXT: [[TMP96:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP81]], i32 17 -; CLEANUP-NEXT: store i32 [[TMP60]], ptr addrspace(32) [[TMP96]], align 4 +; CLEANUP-NEXT: store i32 [[TMP38]], ptr addrspace(32) [[TMP96]], align 4 ; CLEANUP-NEXT: [[TMP97:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP81]], i32 18 -; CLEANUP-NEXT: store i32 [[TMP62]], ptr addrspace(32) [[TMP97]], align 4 +; CLEANUP-NEXT: store i32 [[TMP40]], ptr addrspace(32) [[TMP97]], align 4 ; CLEANUP-NEXT: [[TMP98:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP81]], i32 19 -; CLEANUP-NEXT: store i32 [[TMP64]], ptr addrspace(32) [[TMP98]], align 4 +; CLEANUP-NEXT: store i32 [[TMP42]], ptr addrspace(32) [[TMP98]], align 4 ; CLEANUP-NEXT: [[TMP99:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP81]], i32 20 -; CLEANUP-NEXT: store i32 [[TMP66]], ptr addrspace(32) [[TMP99]], align 4 +; CLEANUP-NEXT: store i32 [[TMP44]], ptr addrspace(32) [[TMP99]], align 4 ; CLEANUP-NEXT: [[TMP100:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP81]], i32 21 -; CLEANUP-NEXT: store i32 [[TMP68]], ptr addrspace(32) [[TMP100]], align 4 +; CLEANUP-NEXT: store i32 [[TMP46]], ptr addrspace(32) [[TMP100]], align 4 ; CLEANUP-NEXT: [[TMP101:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP81]], i32 22 -; CLEANUP-NEXT: store i32 [[TMP70]], ptr addrspace(32) [[TMP101]], align 4 +; CLEANUP-NEXT: store i32 [[TMP48]], ptr addrspace(32) [[TMP101]], align 4 ; CLEANUP-NEXT: [[TMP102:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP81]], i32 23 -; CLEANUP-NEXT: store i32 [[TMP72]], ptr addrspace(32) [[TMP102]], align 4 +; CLEANUP-NEXT: store i32 [[TMP50]], ptr addrspace(32) [[TMP102]], align 4 ; CLEANUP-NEXT: [[TMP103:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP81]], i32 24 -; CLEANUP-NEXT: store i32 [[TMP74]], ptr addrspace(32) [[TMP103]], align 4 +; CLEANUP-NEXT: store i32 [[TMP52]], ptr addrspace(32) [[TMP103]], align 4 ; CLEANUP-NEXT: [[TMP104:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP81]], i32 25 -; CLEANUP-NEXT: store i32 [[TMP76]], ptr addrspace(32) [[TMP104]], align 4 +; CLEANUP-NEXT: store i32 [[TMP54]], ptr addrspace(32) [[TMP104]], align 4 ; CLEANUP-NEXT: [[TMP105:%.*]] = getelementptr inbounds i32, ptr addrspace(32) [[TMP81]], i32 26 -; CLEANUP-NEXT: store i32 [[TMP78]], ptr addrspace(32) [[TMP105]], align 4 +; CLEANUP-NEXT: store i32 [[TMP56]], ptr addrspace(32) [[TMP105]], align 4 ; CLEANUP-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT]], 0 +; CLEANUP-NEXT: [[DOTFCA_0_INSERT1:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT_RELOAD]], 0 +; CLEANUP-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT1]], i32 [[DOTFCA_1_EXTRACT]], 1 +; CLEANUP-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT]], i32 [[DOTFCA_2_EXTRACT]], 2 +; CLEANUP-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT]], i32 [[DOTFCA_3_EXTRACT]], 3 +; CLEANUP-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT]], i32 [[DOTFCA_4_EXTRACT]], 4 +; CLEANUP-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT]], i32 [[DOTFCA_5_EXTRACT]], 5 +; CLEANUP-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT]], i32 [[DOTFCA_6_EXTRACT]], 6 +; CLEANUP-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT]], i32 [[DOTFCA_7_EXTRACT]], 7 +; CLEANUP-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT]], i32 [[DOTFCA_8_EXTRACT]], 8 +; CLEANUP-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT]], i32 [[DOTFCA_9_EXTRACT]], 9 +; CLEANUP-NEXT: [[DOTFCA_10_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT]], i32 [[DOTFCA_10_EXTRACT]], 10 +; CLEANUP-NEXT: [[DOTFCA_11_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT]], i32 [[DOTFCA_11_EXTRACT]], 11 +; CLEANUP-NEXT: [[DOTFCA_12_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT]], i32 [[DOTFCA_12_EXTRACT]], 12 +; CLEANUP-NEXT: [[DOTFCA_13_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT]], i32 [[DOTFCA_13_EXTRACT]], 13 +; CLEANUP-NEXT: [[DOTFCA_14_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT]], i32 [[DOTFCA_14_EXTRACT]], 14 +; CLEANUP-NEXT: [[DOTFCA_15_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT]], i32 [[DOTFCA_15_EXTRACT]], 15 +; CLEANUP-NEXT: [[DOTFCA_16_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT]], i32 [[DOTFCA_16_EXTRACT]], 16 +; CLEANUP-NEXT: [[DOTFCA_17_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT]], i32 [[DOTFCA_17_EXTRACT]], 17 +; CLEANUP-NEXT: [[DOTFCA_18_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT]], i32 [[DOTFCA_18_EXTRACT]], 18 +; CLEANUP-NEXT: [[DOTFCA_19_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT]], i32 [[DOTFCA_19_EXTRACT]], 19 +; CLEANUP-NEXT: [[DOTFCA_20_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT]], i32 [[DOTFCA_20_EXTRACT]], 20 +; CLEANUP-NEXT: [[DOTFCA_21_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT]], i32 [[DOTFCA_21_EXTRACT]], 21 +; CLEANUP-NEXT: [[DOTFCA_22_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT]], i32 [[DOTFCA_22_EXTRACT]], 22 +; CLEANUP-NEXT: [[DOTFCA_23_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT]], i32 [[DOTFCA_23_EXTRACT]], 23 +; CLEANUP-NEXT: [[DOTFCA_24_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT]], i32 [[DOTFCA_24_EXTRACT]], 24 +; CLEANUP-NEXT: [[DOTFCA_25_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT]], i32 [[DOTFCA_25_EXTRACT]], 25 +; CLEANUP-NEXT: [[DOTFCA_26_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT]], i32 [[DOTFCA_26_EXTRACT]], 26 +; CLEANUP-NEXT: [[DOTFCA_27_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT]], i32 [[DOTFCA_27_EXTRACT]], 27 +; CLEANUP-NEXT: [[DOTFCA_28_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT]], i32 [[TMP23]], 28 +; CLEANUP-NEXT: [[DOTFCA_29_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT]], i32 [[DOTFCA_29_EXTRACT]], 29 ; CLEANUP-NEXT: call void @lgc.cps.free(i32 120) -; CLEANUP-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], i64 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META17]] +; CLEANUP-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR_RELOAD]], i32 poison, i64 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]], [23 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]), !continuation.registercount [[META17]] ; CLEANUP-NEXT: unreachable ; ; @@ -1016,317 +1069,359 @@ attributes #3 = { nounwind } ; ; ; POST-PROCESS-LABEL: define void @main( -; POST-PROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META8:![0-9]+]] !continuation.entry [[META19:![0-9]+]] !continuation.registercount [[META8]] !continuation [[META20:![0-9]+]] !continuation.stacksize [[META21:![0-9]+]] !continuation.state [[META8]] { +; POST-PROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META8:![0-9]+]] !continuation.entry [[META19:![0-9]+]] !continuation [[META20:![0-9]+]] !continuation.stacksize [[META21:![0-9]+]] { ; POST-PROCESS-NEXT: AllocaSpillBB: ; POST-PROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; POST-PROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; POST-PROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 ; POST-PROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 108 ; POST-PROCESS-NEXT: store i32 [[TMP2]], ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 +; POST-PROCESS-NEXT: [[DOTFCA_0_EXTRACT56:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 ; POST-PROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; POST-PROCESS-NEXT: [[TMP3:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 ; POST-PROCESS-NEXT: [[TMP4:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 ; POST-PROCESS-NEXT: [[TMP5:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP3]]) ; POST-PROCESS-NEXT: [[TMP6:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP5]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) ; POST-PROCESS-NEXT: [[TMP7:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP6]]) -; POST-PROCESS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT]], 0 +; POST-PROCESS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT56]], 0 ; POST-PROCESS-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]], 0 ; POST-PROCESS-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 ; POST-PROCESS-NEXT: [[TMP9:%.*]] = call i64 @continuation.getAddrAndMD(ptr @main.resume.0) ; POST-PROCESS-NEXT: [[TRAV_DATA2_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], i64 [[TMP9]], 5 -; POST-PROCESS-NEXT: store i32 [[TMP1]], ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 10), align 4 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 11), align 4 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 12), align 4 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 13), align 4 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 14), align 4 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 15), align 4 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 16), align 4 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 17), align 4 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 18), align 4 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 19), align 4 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 20), align 4 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 21), align 4 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 22), align 4 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 23), align 4 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 24), align 4 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 25), align 4 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 26), align 4 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 27), align 4 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 28), align 4 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 29), align 4 -; POST-PROCESS-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP8]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP10]], i32 0 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP12]], align 4 -; POST-PROCESS-NEXT: [[TMP11:%.*]] = add i32 [[TMP8]], 4 -; POST-PROCESS-NEXT: [[TMP13:%.*]] = inttoptr i32 [[TMP11]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP13]], i32 0 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP15]], align 4 -; POST-PROCESS-NEXT: [[TMP14:%.*]] = add i32 [[TMP8]], 8 -; POST-PROCESS-NEXT: [[TMP16:%.*]] = inttoptr i32 [[TMP14]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP16]], i32 0 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP18]], align 4 -; POST-PROCESS-NEXT: [[TMP17:%.*]] = add i32 [[TMP8]], 12 -; POST-PROCESS-NEXT: [[TMP19:%.*]] = inttoptr i32 [[TMP17]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP19]], i32 0 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP21]], align 4 -; POST-PROCESS-NEXT: [[TMP20:%.*]] = add i32 [[TMP8]], 16 -; POST-PROCESS-NEXT: [[TMP22:%.*]] = inttoptr i32 [[TMP20]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP22]], i32 0 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP24]], align 4 -; POST-PROCESS-NEXT: [[TMP23:%.*]] = add i32 [[TMP8]], 20 -; POST-PROCESS-NEXT: [[TMP25:%.*]] = inttoptr i32 [[TMP23]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP25]], i32 0 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP27]], align 4 -; POST-PROCESS-NEXT: [[TMP26:%.*]] = add i32 [[TMP8]], 24 -; POST-PROCESS-NEXT: [[TMP28:%.*]] = inttoptr i32 [[TMP26]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP28]], i32 0 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP30]], align 4 -; POST-PROCESS-NEXT: [[TMP29:%.*]] = add i32 [[TMP8]], 28 -; POST-PROCESS-NEXT: [[TMP31:%.*]] = inttoptr i32 [[TMP29]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP31]], i32 0 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP33]], align 4 -; POST-PROCESS-NEXT: [[TMP32:%.*]] = add i32 [[TMP8]], 32 -; POST-PROCESS-NEXT: [[TMP34:%.*]] = inttoptr i32 [[TMP32]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP36:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP34]], i32 0 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP36]], align 4 -; POST-PROCESS-NEXT: [[TMP35:%.*]] = add i32 [[TMP8]], 36 -; POST-PROCESS-NEXT: [[TMP37:%.*]] = inttoptr i32 [[TMP35]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP39:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP37]], i32 0 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP39]], align 4 -; POST-PROCESS-NEXT: [[TMP38:%.*]] = add i32 [[TMP8]], 40 -; POST-PROCESS-NEXT: [[TMP40:%.*]] = inttoptr i32 [[TMP38]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP42:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP40]], i32 0 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP42]], align 4 -; POST-PROCESS-NEXT: [[TMP41:%.*]] = add i32 [[TMP8]], 44 -; POST-PROCESS-NEXT: [[TMP43:%.*]] = inttoptr i32 [[TMP41]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP45:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP43]], i32 0 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP45]], align 4 -; POST-PROCESS-NEXT: [[TMP44:%.*]] = add i32 [[TMP8]], 48 -; POST-PROCESS-NEXT: [[TMP46:%.*]] = inttoptr i32 [[TMP44]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP46]], i32 0 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP48]], align 4 -; POST-PROCESS-NEXT: [[TMP47:%.*]] = add i32 [[TMP8]], 52 -; POST-PROCESS-NEXT: [[TMP49:%.*]] = inttoptr i32 [[TMP47]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP51:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP49]], i32 0 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP51]], align 4 -; POST-PROCESS-NEXT: [[TMP50:%.*]] = add i32 [[TMP8]], 56 -; POST-PROCESS-NEXT: [[TMP52:%.*]] = inttoptr i32 [[TMP50]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP54:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP52]], i32 0 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP54]], align 4 -; POST-PROCESS-NEXT: [[TMP53:%.*]] = add i32 [[TMP8]], 60 -; POST-PROCESS-NEXT: [[TMP55:%.*]] = inttoptr i32 [[TMP53]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP57:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP55]], i32 0 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP57]], align 4 -; POST-PROCESS-NEXT: [[TMP56:%.*]] = add i32 [[TMP8]], 64 -; POST-PROCESS-NEXT: [[TMP58:%.*]] = inttoptr i32 [[TMP56]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP60:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP58]], i32 0 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP60]], align 4 -; POST-PROCESS-NEXT: [[TMP59:%.*]] = add i32 [[TMP8]], 68 -; POST-PROCESS-NEXT: [[TMP61:%.*]] = inttoptr i32 [[TMP59]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP63:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP61]], i32 0 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP63]], align 4 -; POST-PROCESS-NEXT: [[TMP62:%.*]] = add i32 [[TMP8]], 72 -; POST-PROCESS-NEXT: [[TMP64:%.*]] = inttoptr i32 [[TMP62]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP66:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP64]], i32 0 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP66]], align 4 -; POST-PROCESS-NEXT: [[TMP65:%.*]] = add i32 [[TMP8]], 76 -; POST-PROCESS-NEXT: [[TMP67:%.*]] = inttoptr i32 [[TMP65]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP69:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP67]], i32 0 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP69]], align 4 -; POST-PROCESS-NEXT: [[TMP68:%.*]] = add i32 [[TMP8]], 80 -; POST-PROCESS-NEXT: [[TMP70:%.*]] = inttoptr i32 [[TMP68]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP72:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP70]], i32 0 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP72]], align 4 -; POST-PROCESS-NEXT: [[TMP71:%.*]] = add i32 [[TMP8]], 84 -; POST-PROCESS-NEXT: [[TMP73:%.*]] = inttoptr i32 [[TMP71]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP75:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP73]], i32 0 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP75]], align 4 -; POST-PROCESS-NEXT: [[TMP74:%.*]] = add i32 [[TMP8]], 88 -; POST-PROCESS-NEXT: [[TMP76:%.*]] = inttoptr i32 [[TMP74]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP78:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP76]], i32 0 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP78]], align 4 -; POST-PROCESS-NEXT: [[TMP77:%.*]] = add i32 [[TMP8]], 92 -; POST-PROCESS-NEXT: [[TMP79:%.*]] = inttoptr i32 [[TMP77]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP81:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP79]], i32 0 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP81]], align 4 -; POST-PROCESS-NEXT: [[TMP80:%.*]] = add i32 [[TMP8]], 96 -; POST-PROCESS-NEXT: [[TMP82:%.*]] = inttoptr i32 [[TMP80]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP84:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP82]], i32 0 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP84]], align 4 -; POST-PROCESS-NEXT: [[TMP83:%.*]] = add i32 [[TMP8]], 100 -; POST-PROCESS-NEXT: [[TMP85:%.*]] = inttoptr i32 [[TMP83]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP87:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP85]], i32 0 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP87]], align 4 -; POST-PROCESS-NEXT: [[TMP86:%.*]] = add i32 [[TMP8]], 104 -; POST-PROCESS-NEXT: [[TMP88:%.*]] = inttoptr i32 [[TMP86]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP90:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP88]], i32 0 -; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP90]], align 4 +; POST-PROCESS-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP1]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP11]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP10]], align 4 +; POST-PROCESS-NEXT: [[TMP12:%.*]] = add i32 [[TMP1]], 4 +; POST-PROCESS-NEXT: [[TMP14:%.*]] = inttoptr i32 [[TMP12]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP14]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP13]], align 4 +; POST-PROCESS-NEXT: [[TMP15:%.*]] = add i32 [[TMP1]], 8 +; POST-PROCESS-NEXT: [[TMP17:%.*]] = inttoptr i32 [[TMP15]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP17]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP16]], align 4 +; POST-PROCESS-NEXT: [[TMP18:%.*]] = add i32 [[TMP1]], 12 +; POST-PROCESS-NEXT: [[TMP20:%.*]] = inttoptr i32 [[TMP18]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP20]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP19]], align 4 +; POST-PROCESS-NEXT: [[TMP21:%.*]] = add i32 [[TMP1]], 16 +; POST-PROCESS-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP21]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP23]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP22]], align 4 +; POST-PROCESS-NEXT: [[TMP24:%.*]] = add i32 [[TMP1]], 20 +; POST-PROCESS-NEXT: [[TMP26:%.*]] = inttoptr i32 [[TMP24]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP26]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP25]], align 4 +; POST-PROCESS-NEXT: [[TMP27:%.*]] = add i32 [[TMP1]], 24 +; POST-PROCESS-NEXT: [[TMP29:%.*]] = inttoptr i32 [[TMP27]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP29]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP28]], align 4 +; POST-PROCESS-NEXT: [[TMP30:%.*]] = add i32 [[TMP1]], 28 +; POST-PROCESS-NEXT: [[TMP32:%.*]] = inttoptr i32 [[TMP30]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP31:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP32]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP31]], align 4 +; POST-PROCESS-NEXT: [[TMP33:%.*]] = add i32 [[TMP1]], 32 +; POST-PROCESS-NEXT: [[TMP35:%.*]] = inttoptr i32 [[TMP33]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP35]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP34]], align 4 +; POST-PROCESS-NEXT: [[TMP36:%.*]] = add i32 [[TMP1]], 36 +; POST-PROCESS-NEXT: [[TMP38:%.*]] = inttoptr i32 [[TMP36]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP38]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP37]], align 4 +; POST-PROCESS-NEXT: [[TMP39:%.*]] = add i32 [[TMP1]], 40 +; POST-PROCESS-NEXT: [[TMP41:%.*]] = inttoptr i32 [[TMP39]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP40:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP41]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP40]], align 4 +; POST-PROCESS-NEXT: [[TMP42:%.*]] = add i32 [[TMP1]], 44 +; POST-PROCESS-NEXT: [[TMP44:%.*]] = inttoptr i32 [[TMP42]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP44]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP43]], align 4 +; POST-PROCESS-NEXT: [[TMP45:%.*]] = add i32 [[TMP1]], 48 +; POST-PROCESS-NEXT: [[TMP47:%.*]] = inttoptr i32 [[TMP45]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP46:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP47]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP46]], align 4 +; POST-PROCESS-NEXT: [[TMP48:%.*]] = add i32 [[TMP1]], 52 +; POST-PROCESS-NEXT: [[TMP50:%.*]] = inttoptr i32 [[TMP48]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP49:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP50]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP49]], align 4 +; POST-PROCESS-NEXT: [[TMP51:%.*]] = add i32 [[TMP1]], 56 +; POST-PROCESS-NEXT: [[TMP53:%.*]] = inttoptr i32 [[TMP51]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP52:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP53]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP52]], align 4 +; POST-PROCESS-NEXT: [[TMP54:%.*]] = add i32 [[TMP1]], 60 +; POST-PROCESS-NEXT: [[TMP56:%.*]] = inttoptr i32 [[TMP54]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP55:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP56]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP55]], align 4 +; POST-PROCESS-NEXT: [[TMP57:%.*]] = add i32 [[TMP1]], 64 +; POST-PROCESS-NEXT: [[TMP59:%.*]] = inttoptr i32 [[TMP57]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP58:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP59]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP58]], align 4 +; POST-PROCESS-NEXT: [[TMP60:%.*]] = add i32 [[TMP1]], 68 +; POST-PROCESS-NEXT: [[TMP62:%.*]] = inttoptr i32 [[TMP60]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP61:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP62]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP61]], align 4 +; POST-PROCESS-NEXT: [[TMP63:%.*]] = add i32 [[TMP1]], 72 +; POST-PROCESS-NEXT: [[TMP65:%.*]] = inttoptr i32 [[TMP63]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP64:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP65]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP64]], align 4 +; POST-PROCESS-NEXT: [[TMP66:%.*]] = add i32 [[TMP1]], 76 +; POST-PROCESS-NEXT: [[TMP68:%.*]] = inttoptr i32 [[TMP66]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP67:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP68]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP67]], align 4 +; POST-PROCESS-NEXT: [[TMP69:%.*]] = add i32 [[TMP1]], 80 +; POST-PROCESS-NEXT: [[TMP71:%.*]] = inttoptr i32 [[TMP69]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP70:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP71]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP70]], align 4 +; POST-PROCESS-NEXT: [[TMP72:%.*]] = add i32 [[TMP1]], 84 +; POST-PROCESS-NEXT: [[TMP74:%.*]] = inttoptr i32 [[TMP72]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP73:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP74]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP73]], align 4 +; POST-PROCESS-NEXT: [[TMP75:%.*]] = add i32 [[TMP1]], 88 +; POST-PROCESS-NEXT: [[TMP77:%.*]] = inttoptr i32 [[TMP75]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP76:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP77]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP76]], align 4 +; POST-PROCESS-NEXT: [[TMP78:%.*]] = add i32 [[TMP1]], 92 +; POST-PROCESS-NEXT: [[TMP80:%.*]] = inttoptr i32 [[TMP78]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP79:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP80]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP79]], align 4 +; POST-PROCESS-NEXT: [[TMP81:%.*]] = add i32 [[TMP1]], 96 +; POST-PROCESS-NEXT: [[TMP83:%.*]] = inttoptr i32 [[TMP81]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP82:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP83]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP82]], align 4 +; POST-PROCESS-NEXT: [[TMP84:%.*]] = add i32 [[TMP1]], 100 +; POST-PROCESS-NEXT: [[TMP86:%.*]] = inttoptr i32 [[TMP84]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP85:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP86]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP85]], align 4 +; POST-PROCESS-NEXT: [[TMP87:%.*]] = add i32 [[TMP1]], 104 +; POST-PROCESS-NEXT: [[TMP90:%.*]] = inttoptr i32 [[TMP87]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP88:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP90]], i32 0 +; POST-PROCESS-NEXT: store i32 undef, ptr addrspace(21) [[TMP88]], align 4 +; POST-PROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [30 x i32] poison, i32 [[TMP1]], 0 +; POST-PROCESS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT]], i32 undef, 1 +; POST-PROCESS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT]], i32 undef, 2 +; POST-PROCESS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT]], i32 undef, 3 +; POST-PROCESS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT]], i32 undef, 4 +; POST-PROCESS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT]], i32 undef, 5 +; POST-PROCESS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT]], i32 undef, 6 +; POST-PROCESS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT]], i32 undef, 7 +; POST-PROCESS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT]], i32 undef, 8 +; POST-PROCESS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT]], i32 undef, 9 +; POST-PROCESS-NEXT: [[DOTFCA_10_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT]], i32 undef, 10 +; POST-PROCESS-NEXT: [[DOTFCA_11_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT]], i32 undef, 11 +; POST-PROCESS-NEXT: [[DOTFCA_12_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT]], i32 undef, 12 +; POST-PROCESS-NEXT: [[DOTFCA_13_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT]], i32 undef, 13 +; POST-PROCESS-NEXT: [[DOTFCA_14_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT]], i32 undef, 14 +; POST-PROCESS-NEXT: [[DOTFCA_15_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT]], i32 undef, 15 +; POST-PROCESS-NEXT: [[DOTFCA_16_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT]], i32 undef, 16 +; POST-PROCESS-NEXT: [[DOTFCA_17_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT]], i32 undef, 17 +; POST-PROCESS-NEXT: [[DOTFCA_18_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT]], i32 undef, 18 +; POST-PROCESS-NEXT: [[DOTFCA_19_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT]], i32 undef, 19 +; POST-PROCESS-NEXT: [[DOTFCA_20_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT]], i32 undef, 20 +; POST-PROCESS-NEXT: [[DOTFCA_21_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT]], i32 undef, 21 +; POST-PROCESS-NEXT: [[DOTFCA_22_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT]], i32 undef, 22 +; POST-PROCESS-NEXT: [[DOTFCA_23_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT]], i32 undef, 23 +; POST-PROCESS-NEXT: [[DOTFCA_24_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT]], i32 undef, 24 +; POST-PROCESS-NEXT: [[DOTFCA_25_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT]], i32 undef, 25 +; POST-PROCESS-NEXT: [[DOTFCA_26_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT]], i32 undef, 26 +; POST-PROCESS-NEXT: [[DOTFCA_27_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT]], i32 undef, 27 +; POST-PROCESS-NEXT: [[DOTFCA_28_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT]], i32 undef, 28 +; POST-PROCESS-NEXT: [[DOTFCA_29_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT]], i32 undef, 29 ; POST-PROCESS-NEXT: [[TMP89:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: call void (i64, i64, ...) @continuation.waitContinue(i64 4, i64 -1, i32 [[TMP89]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]]), !continuation.registercount [[META17:![0-9]+]], !continuation.returnedRegistercount [[META17]] +; POST-PROCESS-NEXT: call void (...) @lgc.ilcps.waitContinue(i64 4, i64 -1, i32 [[TMP89]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]], [10 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]) ; POST-PROCESS-NEXT: unreachable ; ; ; POST-PROCESS-LABEL: define dso_local void @main.resume.0( -; POST-PROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[TMP0:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP1:%.*]]) !lgc.rt.shaderstage [[META8]] !continuation.registercount [[META17]] !continuation [[META20]] { +; POST-PROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[TMP0:%.*]], { [[STRUCT_DISPATCHSYSTEMDATA:%.*]], [23 x i32], [30 x i32] } [[TMP1:%.*]]) !lgc.rt.shaderstage [[META8]] !continuation [[META20]] { ; POST-PROCESS-NEXT: entryresume.0: ; POST-PROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; POST-PROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; POST-PROCESS-NEXT: [[TMP2:%.*]] = load i32, ptr [[CSP]], align 4 ; POST-PROCESS-NEXT: [[TMP27:%.*]] = add i32 [[TMP2]], -108 -; POST-PROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 -; POST-PROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 -; POST-PROCESS-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 -; POST-PROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 10), align 4 -; POST-PROCESS-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 11), align 4 -; POST-PROCESS-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 12), align 4 -; POST-PROCESS-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 13), align 4 -; POST-PROCESS-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 14), align 4 -; POST-PROCESS-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 15), align 4 -; POST-PROCESS-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 16), align 4 -; POST-PROCESS-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 17), align 4 -; POST-PROCESS-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 18), align 4 -; POST-PROCESS-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 19), align 4 -; POST-PROCESS-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 20), align 4 -; POST-PROCESS-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 21), align 4 -; POST-PROCESS-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 22), align 4 -; POST-PROCESS-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 23), align 4 -; POST-PROCESS-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 24), align 4 -; POST-PROCESS-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 25), align 4 -; POST-PROCESS-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 26), align 4 -; POST-PROCESS-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 27), align 4 -; POST-PROCESS-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 28), align 4 -; POST-PROCESS-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 29), align 4 -; POST-PROCESS-NEXT: [[TMP28:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP28]], i32 0 -; POST-PROCESS-NEXT: [[TMP31:%.*]] = load i32, ptr addrspace(21) [[TMP29]], align 4 +; POST-PROCESS-NEXT: [[TMP4:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [23 x i32], [30 x i32] } [[TMP1]], 2 +; POST-PROCESS-NEXT: [[TMP3:%.*]] = extractvalue [30 x i32] [[TMP4]], 0 +; POST-PROCESS-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP4]], 1 +; POST-PROCESS-NEXT: [[DOTFCA_2_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP4]], 2 +; POST-PROCESS-NEXT: [[DOTFCA_3_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP4]], 3 +; POST-PROCESS-NEXT: [[DOTFCA_4_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP4]], 4 +; POST-PROCESS-NEXT: [[DOTFCA_5_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP4]], 5 +; POST-PROCESS-NEXT: [[DOTFCA_6_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP4]], 6 +; POST-PROCESS-NEXT: [[DOTFCA_7_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP4]], 7 +; POST-PROCESS-NEXT: [[DOTFCA_8_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP4]], 8 +; POST-PROCESS-NEXT: [[DOTFCA_9_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP4]], 9 +; POST-PROCESS-NEXT: [[DOTFCA_10_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP4]], 10 +; POST-PROCESS-NEXT: [[DOTFCA_11_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP4]], 11 +; POST-PROCESS-NEXT: [[DOTFCA_12_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP4]], 12 +; POST-PROCESS-NEXT: [[DOTFCA_13_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP4]], 13 +; POST-PROCESS-NEXT: [[DOTFCA_14_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP4]], 14 +; POST-PROCESS-NEXT: [[DOTFCA_15_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP4]], 15 +; POST-PROCESS-NEXT: [[DOTFCA_16_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP4]], 16 +; POST-PROCESS-NEXT: [[DOTFCA_17_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP4]], 17 +; POST-PROCESS-NEXT: [[DOTFCA_18_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP4]], 18 +; POST-PROCESS-NEXT: [[DOTFCA_19_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP4]], 19 +; POST-PROCESS-NEXT: [[DOTFCA_20_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP4]], 20 +; POST-PROCESS-NEXT: [[DOTFCA_21_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP4]], 21 +; POST-PROCESS-NEXT: [[DOTFCA_22_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP4]], 22 +; POST-PROCESS-NEXT: [[DOTFCA_23_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP4]], 23 +; POST-PROCESS-NEXT: [[DOTFCA_24_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP4]], 24 +; POST-PROCESS-NEXT: [[TMP22:%.*]] = extractvalue [30 x i32] [[TMP4]], 25 +; POST-PROCESS-NEXT: [[TMP23:%.*]] = extractvalue [30 x i32] [[TMP4]], 26 +; POST-PROCESS-NEXT: [[TMP24:%.*]] = extractvalue [30 x i32] [[TMP4]], 27 +; POST-PROCESS-NEXT: [[DOTFCA_28_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP4]], 28 +; POST-PROCESS-NEXT: [[DOTFCA_29_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP4]], 29 +; POST-PROCESS-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP5]], i32 0 +; POST-PROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(21) [[TMP6]], align 4 ; POST-PROCESS-NEXT: [[TMP30:%.*]] = add i32 [[TMP3]], 4 -; POST-PROCESS-NEXT: [[TMP32:%.*]] = inttoptr i32 [[TMP30]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP32]], i32 0 -; POST-PROCESS-NEXT: [[TMP35:%.*]] = load i32, ptr addrspace(21) [[TMP33]], align 4 +; POST-PROCESS-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP30]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP9]], i32 0 +; POST-PROCESS-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(21) [[TMP10]], align 4 ; POST-PROCESS-NEXT: [[TMP34:%.*]] = add i32 [[TMP3]], 8 -; POST-PROCESS-NEXT: [[TMP36:%.*]] = inttoptr i32 [[TMP34]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP36]], i32 0 -; POST-PROCESS-NEXT: [[TMP39:%.*]] = load i32, ptr addrspace(21) [[TMP37]], align 4 +; POST-PROCESS-NEXT: [[TMP13:%.*]] = inttoptr i32 [[TMP34]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP13]], i32 0 +; POST-PROCESS-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(21) [[TMP14]], align 4 ; POST-PROCESS-NEXT: [[TMP38:%.*]] = add i32 [[TMP3]], 12 -; POST-PROCESS-NEXT: [[TMP40:%.*]] = inttoptr i32 [[TMP38]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP41:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP40]], i32 0 -; POST-PROCESS-NEXT: [[TMP43:%.*]] = load i32, ptr addrspace(21) [[TMP41]], align 4 +; POST-PROCESS-NEXT: [[TMP17:%.*]] = inttoptr i32 [[TMP38]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP17]], i32 0 +; POST-PROCESS-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(21) [[TMP18]], align 4 ; POST-PROCESS-NEXT: [[TMP42:%.*]] = add i32 [[TMP3]], 16 -; POST-PROCESS-NEXT: [[TMP44:%.*]] = inttoptr i32 [[TMP42]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP45:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP44]], i32 0 -; POST-PROCESS-NEXT: [[TMP47:%.*]] = load i32, ptr addrspace(21) [[TMP45]], align 4 +; POST-PROCESS-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP42]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP21]], i32 0 +; POST-PROCESS-NEXT: [[TMP32:%.*]] = load i32, ptr addrspace(21) [[TMP28]], align 4 ; POST-PROCESS-NEXT: [[TMP46:%.*]] = add i32 [[TMP3]], 20 -; POST-PROCESS-NEXT: [[TMP48:%.*]] = inttoptr i32 [[TMP46]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP49:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP48]], i32 0 -; POST-PROCESS-NEXT: [[TMP51:%.*]] = load i32, ptr addrspace(21) [[TMP49]], align 4 +; POST-PROCESS-NEXT: [[TMP25:%.*]] = inttoptr i32 [[TMP46]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP25]], i32 0 +; POST-PROCESS-NEXT: [[TMP36:%.*]] = load i32, ptr addrspace(21) [[TMP26]], align 4 ; POST-PROCESS-NEXT: [[TMP50:%.*]] = add i32 [[TMP3]], 24 -; POST-PROCESS-NEXT: [[TMP52:%.*]] = inttoptr i32 [[TMP50]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP53:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP52]], i32 0 -; POST-PROCESS-NEXT: [[TMP55:%.*]] = load i32, ptr addrspace(21) [[TMP53]], align 4 +; POST-PROCESS-NEXT: [[TMP29:%.*]] = inttoptr i32 [[TMP50]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP40:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP29]], i32 0 +; POST-PROCESS-NEXT: [[TMP31:%.*]] = load i32, ptr addrspace(21) [[TMP40]], align 4 ; POST-PROCESS-NEXT: [[TMP54:%.*]] = add i32 [[TMP3]], 28 -; POST-PROCESS-NEXT: [[TMP56:%.*]] = inttoptr i32 [[TMP54]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP57:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP56]], i32 0 -; POST-PROCESS-NEXT: [[TMP59:%.*]] = load i32, ptr addrspace(21) [[TMP57]], align 4 +; POST-PROCESS-NEXT: [[TMP33:%.*]] = inttoptr i32 [[TMP54]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP33]], i32 0 +; POST-PROCESS-NEXT: [[TMP35:%.*]] = load i32, ptr addrspace(21) [[TMP44]], align 4 ; POST-PROCESS-NEXT: [[TMP58:%.*]] = add i32 [[TMP3]], 32 -; POST-PROCESS-NEXT: [[TMP60:%.*]] = inttoptr i32 [[TMP58]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP61:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP60]], i32 0 -; POST-PROCESS-NEXT: [[TMP63:%.*]] = load i32, ptr addrspace(21) [[TMP61]], align 4 +; POST-PROCESS-NEXT: [[TMP37:%.*]] = inttoptr i32 [[TMP58]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP37]], i32 0 +; POST-PROCESS-NEXT: [[TMP39:%.*]] = load i32, ptr addrspace(21) [[TMP48]], align 4 ; POST-PROCESS-NEXT: [[TMP62:%.*]] = add i32 [[TMP3]], 36 -; POST-PROCESS-NEXT: [[TMP64:%.*]] = inttoptr i32 [[TMP62]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP65:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP64]], i32 0 -; POST-PROCESS-NEXT: [[TMP67:%.*]] = load i32, ptr addrspace(21) [[TMP65]], align 4 +; POST-PROCESS-NEXT: [[TMP41:%.*]] = inttoptr i32 [[TMP62]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP52:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP41]], i32 0 +; POST-PROCESS-NEXT: [[TMP43:%.*]] = load i32, ptr addrspace(21) [[TMP52]], align 4 ; POST-PROCESS-NEXT: [[TMP66:%.*]] = add i32 [[TMP3]], 40 -; POST-PROCESS-NEXT: [[TMP68:%.*]] = inttoptr i32 [[TMP66]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP69:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP68]], i32 0 -; POST-PROCESS-NEXT: [[TMP71:%.*]] = load i32, ptr addrspace(21) [[TMP69]], align 4 +; POST-PROCESS-NEXT: [[TMP45:%.*]] = inttoptr i32 [[TMP66]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP56:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP45]], i32 0 +; POST-PROCESS-NEXT: [[TMP47:%.*]] = load i32, ptr addrspace(21) [[TMP56]], align 4 ; POST-PROCESS-NEXT: [[TMP70:%.*]] = add i32 [[TMP3]], 44 -; POST-PROCESS-NEXT: [[TMP72:%.*]] = inttoptr i32 [[TMP70]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP73:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP72]], i32 0 -; POST-PROCESS-NEXT: [[TMP75:%.*]] = load i32, ptr addrspace(21) [[TMP73]], align 4 +; POST-PROCESS-NEXT: [[TMP49:%.*]] = inttoptr i32 [[TMP70]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP60:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP49]], i32 0 +; POST-PROCESS-NEXT: [[TMP51:%.*]] = load i32, ptr addrspace(21) [[TMP60]], align 4 ; POST-PROCESS-NEXT: [[TMP74:%.*]] = add i32 [[TMP3]], 48 -; POST-PROCESS-NEXT: [[TMP76:%.*]] = inttoptr i32 [[TMP74]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP77:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP76]], i32 0 -; POST-PROCESS-NEXT: [[TMP79:%.*]] = load i32, ptr addrspace(21) [[TMP77]], align 4 +; POST-PROCESS-NEXT: [[TMP53:%.*]] = inttoptr i32 [[TMP74]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP64:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP53]], i32 0 +; POST-PROCESS-NEXT: [[TMP55:%.*]] = load i32, ptr addrspace(21) [[TMP64]], align 4 ; POST-PROCESS-NEXT: [[TMP78:%.*]] = add i32 [[TMP3]], 52 -; POST-PROCESS-NEXT: [[TMP80:%.*]] = inttoptr i32 [[TMP78]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP81:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP80]], i32 0 -; POST-PROCESS-NEXT: [[TMP83:%.*]] = load i32, ptr addrspace(21) [[TMP81]], align 4 +; POST-PROCESS-NEXT: [[TMP57:%.*]] = inttoptr i32 [[TMP78]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP68:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP57]], i32 0 +; POST-PROCESS-NEXT: [[TMP59:%.*]] = load i32, ptr addrspace(21) [[TMP68]], align 4 ; POST-PROCESS-NEXT: [[TMP82:%.*]] = add i32 [[TMP3]], 56 -; POST-PROCESS-NEXT: [[TMP84:%.*]] = inttoptr i32 [[TMP82]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP85:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP84]], i32 0 -; POST-PROCESS-NEXT: [[TMP87:%.*]] = load i32, ptr addrspace(21) [[TMP85]], align 4 +; POST-PROCESS-NEXT: [[TMP61:%.*]] = inttoptr i32 [[TMP82]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP72:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP61]], i32 0 +; POST-PROCESS-NEXT: [[TMP63:%.*]] = load i32, ptr addrspace(21) [[TMP72]], align 4 ; POST-PROCESS-NEXT: [[TMP86:%.*]] = add i32 [[TMP3]], 60 -; POST-PROCESS-NEXT: [[TMP88:%.*]] = inttoptr i32 [[TMP86]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP89:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP88]], i32 0 -; POST-PROCESS-NEXT: [[TMP91:%.*]] = load i32, ptr addrspace(21) [[TMP89]], align 4 +; POST-PROCESS-NEXT: [[TMP65:%.*]] = inttoptr i32 [[TMP86]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP76:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP65]], i32 0 +; POST-PROCESS-NEXT: [[TMP67:%.*]] = load i32, ptr addrspace(21) [[TMP76]], align 4 ; POST-PROCESS-NEXT: [[TMP90:%.*]] = add i32 [[TMP3]], 64 -; POST-PROCESS-NEXT: [[TMP92:%.*]] = inttoptr i32 [[TMP90]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP93:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP92]], i32 0 -; POST-PROCESS-NEXT: [[TMP95:%.*]] = load i32, ptr addrspace(21) [[TMP93]], align 4 +; POST-PROCESS-NEXT: [[TMP69:%.*]] = inttoptr i32 [[TMP90]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP80:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP69]], i32 0 +; POST-PROCESS-NEXT: [[TMP71:%.*]] = load i32, ptr addrspace(21) [[TMP80]], align 4 ; POST-PROCESS-NEXT: [[TMP94:%.*]] = add i32 [[TMP3]], 68 -; POST-PROCESS-NEXT: [[TMP96:%.*]] = inttoptr i32 [[TMP94]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP97:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP96]], i32 0 -; POST-PROCESS-NEXT: [[TMP99:%.*]] = load i32, ptr addrspace(21) [[TMP97]], align 4 +; POST-PROCESS-NEXT: [[TMP73:%.*]] = inttoptr i32 [[TMP94]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP84:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP73]], i32 0 +; POST-PROCESS-NEXT: [[TMP75:%.*]] = load i32, ptr addrspace(21) [[TMP84]], align 4 ; POST-PROCESS-NEXT: [[TMP98:%.*]] = add i32 [[TMP3]], 72 -; POST-PROCESS-NEXT: [[TMP100:%.*]] = inttoptr i32 [[TMP98]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP101:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP100]], i32 0 -; POST-PROCESS-NEXT: [[TMP103:%.*]] = load i32, ptr addrspace(21) [[TMP101]], align 4 +; POST-PROCESS-NEXT: [[TMP77:%.*]] = inttoptr i32 [[TMP98]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP88:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP77]], i32 0 +; POST-PROCESS-NEXT: [[TMP79:%.*]] = load i32, ptr addrspace(21) [[TMP88]], align 4 ; POST-PROCESS-NEXT: [[TMP102:%.*]] = add i32 [[TMP3]], 76 -; POST-PROCESS-NEXT: [[TMP104:%.*]] = inttoptr i32 [[TMP102]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP105:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP104]], i32 0 -; POST-PROCESS-NEXT: [[TMP107:%.*]] = load i32, ptr addrspace(21) [[TMP105]], align 4 +; POST-PROCESS-NEXT: [[TMP81:%.*]] = inttoptr i32 [[TMP102]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP92:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP81]], i32 0 +; POST-PROCESS-NEXT: [[TMP83:%.*]] = load i32, ptr addrspace(21) [[TMP92]], align 4 ; POST-PROCESS-NEXT: [[TMP106:%.*]] = add i32 [[TMP3]], 80 -; POST-PROCESS-NEXT: [[TMP108:%.*]] = inttoptr i32 [[TMP106]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP109:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP108]], i32 0 -; POST-PROCESS-NEXT: [[TMP111:%.*]] = load i32, ptr addrspace(21) [[TMP109]], align 4 +; POST-PROCESS-NEXT: [[TMP85:%.*]] = inttoptr i32 [[TMP106]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP96:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP85]], i32 0 +; POST-PROCESS-NEXT: [[TMP87:%.*]] = load i32, ptr addrspace(21) [[TMP96]], align 4 ; POST-PROCESS-NEXT: [[TMP110:%.*]] = add i32 [[TMP3]], 84 -; POST-PROCESS-NEXT: [[TMP112:%.*]] = inttoptr i32 [[TMP110]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP113:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP112]], i32 0 -; POST-PROCESS-NEXT: [[TMP115:%.*]] = load i32, ptr addrspace(21) [[TMP113]], align 4 +; POST-PROCESS-NEXT: [[TMP89:%.*]] = inttoptr i32 [[TMP110]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP100:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP89]], i32 0 +; POST-PROCESS-NEXT: [[TMP91:%.*]] = load i32, ptr addrspace(21) [[TMP100]], align 4 ; POST-PROCESS-NEXT: [[TMP114:%.*]] = add i32 [[TMP3]], 88 -; POST-PROCESS-NEXT: [[TMP116:%.*]] = inttoptr i32 [[TMP114]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP117:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP116]], i32 0 -; POST-PROCESS-NEXT: [[TMP119:%.*]] = load i32, ptr addrspace(21) [[TMP117]], align 4 +; POST-PROCESS-NEXT: [[TMP93:%.*]] = inttoptr i32 [[TMP114]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP104:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP93]], i32 0 +; POST-PROCESS-NEXT: [[TMP95:%.*]] = load i32, ptr addrspace(21) [[TMP104]], align 4 ; POST-PROCESS-NEXT: [[TMP118:%.*]] = add i32 [[TMP3]], 92 -; POST-PROCESS-NEXT: [[TMP120:%.*]] = inttoptr i32 [[TMP118]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP121:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP120]], i32 0 -; POST-PROCESS-NEXT: [[TMP123:%.*]] = load i32, ptr addrspace(21) [[TMP121]], align 4 +; POST-PROCESS-NEXT: [[TMP97:%.*]] = inttoptr i32 [[TMP118]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP108:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP97]], i32 0 +; POST-PROCESS-NEXT: [[TMP99:%.*]] = load i32, ptr addrspace(21) [[TMP108]], align 4 ; POST-PROCESS-NEXT: [[TMP122:%.*]] = add i32 [[TMP3]], 96 -; POST-PROCESS-NEXT: [[TMP124:%.*]] = inttoptr i32 [[TMP122]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP125:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP124]], i32 0 -; POST-PROCESS-NEXT: [[TMP127:%.*]] = load i32, ptr addrspace(21) [[TMP125]], align 4 +; POST-PROCESS-NEXT: [[TMP101:%.*]] = inttoptr i32 [[TMP122]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP115:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP101]], i32 0 +; POST-PROCESS-NEXT: [[TMP103:%.*]] = load i32, ptr addrspace(21) [[TMP115]], align 4 ; POST-PROCESS-NEXT: [[TMP126:%.*]] = add i32 [[TMP3]], 100 -; POST-PROCESS-NEXT: [[TMP128:%.*]] = inttoptr i32 [[TMP126]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP129:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP128]], i32 0 -; POST-PROCESS-NEXT: [[TMP131:%.*]] = load i32, ptr addrspace(21) [[TMP129]], align 4 +; POST-PROCESS-NEXT: [[TMP105:%.*]] = inttoptr i32 [[TMP126]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP116:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP105]], i32 0 +; POST-PROCESS-NEXT: [[TMP107:%.*]] = load i32, ptr addrspace(21) [[TMP116]], align 4 ; POST-PROCESS-NEXT: [[TMP130:%.*]] = add i32 [[TMP3]], 104 -; POST-PROCESS-NEXT: [[TMP132:%.*]] = inttoptr i32 [[TMP130]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP133:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP132]], i32 0 -; POST-PROCESS-NEXT: [[TMP137:%.*]] = load i32, ptr addrspace(21) [[TMP133]], align 4 -; POST-PROCESS-NEXT: [[TMP134:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[DOTFCA_0_EXTRACT1:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP1]], 0 +; POST-PROCESS-NEXT: [[TMP109:%.*]] = inttoptr i32 [[TMP130]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP117:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP109]], i32 0 +; POST-PROCESS-NEXT: [[TMP111:%.*]] = load i32, ptr addrspace(21) [[TMP117]], align 4 +; POST-PROCESS-NEXT: [[TMP119:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [23 x i32], [30 x i32] } [[TMP1]], 0 +; POST-PROCESS-NEXT: [[DOTFCA_0_EXTRACT57:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP119]], 0 ; POST-PROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) -; POST-PROCESS-NEXT: [[TMP135:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: [[TMP136:%.*]] = add i32 [[TMP135]], -108 -; POST-PROCESS-NEXT: store i32 [[TMP136]], ptr [[CSP]], align 4 +; POST-PROCESS-NEXT: [[TMP112:%.*]] = load i32, ptr [[CSP]], align 4 +; POST-PROCESS-NEXT: [[TMP113:%.*]] = add i32 [[TMP112]], -108 +; POST-PROCESS-NEXT: store i32 [[TMP113]], ptr [[CSP]], align 4 ; POST-PROCESS-NEXT: ret void ; POST-PROCESS: entryresume.0.split: ; POST-PROCESS-NEXT: unreachable ; ; ; POST-PROCESS-LABEL: define void @AnyHit( -; POST-PROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] !lgc.rt.shaderstage [[META22:![0-9]+]] !continuation.registercount [[META17]] !continuation [[META23:![0-9]+]] !continuation.state [[META8]] { +; POST-PROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[TMP1:%.*]], [6 x i32] [[PADDING:%.*]], [30 x i32] [[PAYLOAD:%.*]]) #[[ATTR2:[0-9]+]] !lgc.rt.shaderstage [[META22:![0-9]+]] !continuation [[META23:![0-9]+]] { ; POST-PROCESS-NEXT: AllocaSpillBB: ; POST-PROCESS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITTRAVERSALDATA]], align 8 ; POST-PROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; POST-PROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 0 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 1 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 2 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 3 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 4 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 5 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 6 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 7 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 8 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 9 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_10_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 10 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_11_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 11 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_12_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 12 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_13_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 13 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_14_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 14 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_15_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 15 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_16_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 16 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_17_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 17 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_18_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 18 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_19_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 19 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_20_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 20 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_21_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 21 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_22_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 22 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_23_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 23 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_24_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 24 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_25_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 25 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_26_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 26 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_27_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 27 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_28_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 28 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_29_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 29 ; POST-PROCESS-NEXT: [[DOTFCA_0_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 0, 0, 0 ; POST-PROCESS-NEXT: [[DOTFCA_0_0_0_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 ; POST-PROCESS-NEXT: store <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]], ptr [[DOTFCA_0_0_0_0_GEP]], align 4 @@ -1358,329 +1453,311 @@ attributes #3 = { nounwind } ; POST-PROCESS-NEXT: [[DOTFCA_1_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 ; POST-PROCESS-NEXT: store i32 [[DOTFCA_1_1_EXTRACT]], ptr [[DOTFCA_1_1_GEP]], align 4 ; POST-PROCESS-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; POST-PROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 -; POST-PROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 -; POST-PROCESS-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 -; POST-PROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 10), align 4 -; POST-PROCESS-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 11), align 4 -; POST-PROCESS-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 12), align 4 -; POST-PROCESS-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 13), align 4 -; POST-PROCESS-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 14), align 4 -; POST-PROCESS-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 15), align 4 -; POST-PROCESS-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 16), align 4 -; POST-PROCESS-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 17), align 4 -; POST-PROCESS-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 18), align 4 -; POST-PROCESS-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 19), align 4 -; POST-PROCESS-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 20), align 4 -; POST-PROCESS-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 21), align 4 -; POST-PROCESS-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 22), align 4 -; POST-PROCESS-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 23), align 4 -; POST-PROCESS-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 24), align 4 -; POST-PROCESS-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 25), align 4 -; POST-PROCESS-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 26), align 4 -; POST-PROCESS-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 27), align 4 -; POST-PROCESS-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 28), align 4 -; POST-PROCESS-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 29), align 4 -; POST-PROCESS-NEXT: [[TMP27:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP3:%.*]] = inttoptr i32 [[PAYLOAD_FCA_0_EXTRACT]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP3]], i32 0 +; POST-PROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(21) [[TMP4]], align 4 +; POST-PROCESS-NEXT: [[TMP6:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 4 +; POST-PROCESS-NEXT: [[TMP7:%.*]] = inttoptr i32 [[TMP6]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP7]], i32 0 +; POST-PROCESS-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(21) [[TMP8]], align 4 +; POST-PROCESS-NEXT: [[TMP10:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 8 +; POST-PROCESS-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP10]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP11]], i32 0 +; POST-PROCESS-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(21) [[TMP12]], align 4 +; POST-PROCESS-NEXT: [[TMP14:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 12 +; POST-PROCESS-NEXT: [[TMP15:%.*]] = inttoptr i32 [[TMP14]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP15]], i32 0 +; POST-PROCESS-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(21) [[TMP16]], align 4 +; POST-PROCESS-NEXT: [[TMP18:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 16 +; POST-PROCESS-NEXT: [[TMP19:%.*]] = inttoptr i32 [[TMP18]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP19]], i32 0 +; POST-PROCESS-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(21) [[TMP20]], align 4 +; POST-PROCESS-NEXT: [[TMP22:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 20 +; POST-PROCESS-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP22]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP23]], i32 0 +; POST-PROCESS-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(21) [[TMP24]], align 4 +; POST-PROCESS-NEXT: [[TMP26:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 24 +; POST-PROCESS-NEXT: [[TMP27:%.*]] = inttoptr i32 [[TMP26]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP27]], i32 0 ; POST-PROCESS-NEXT: [[TMP29:%.*]] = load i32, ptr addrspace(21) [[TMP28]], align 4 -; POST-PROCESS-NEXT: [[TMP30:%.*]] = add i32 [[TMP3]], 4 +; POST-PROCESS-NEXT: [[TMP30:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 28 ; POST-PROCESS-NEXT: [[TMP31:%.*]] = inttoptr i32 [[TMP30]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP31]], i32 0 ; POST-PROCESS-NEXT: [[TMP33:%.*]] = load i32, ptr addrspace(21) [[TMP32]], align 4 -; POST-PROCESS-NEXT: [[TMP34:%.*]] = add i32 [[TMP3]], 8 +; POST-PROCESS-NEXT: [[TMP34:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 32 ; POST-PROCESS-NEXT: [[TMP35:%.*]] = inttoptr i32 [[TMP34]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP36:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP35]], i32 0 ; POST-PROCESS-NEXT: [[TMP37:%.*]] = load i32, ptr addrspace(21) [[TMP36]], align 4 -; POST-PROCESS-NEXT: [[TMP38:%.*]] = add i32 [[TMP3]], 12 +; POST-PROCESS-NEXT: [[TMP38:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 36 ; POST-PROCESS-NEXT: [[TMP39:%.*]] = inttoptr i32 [[TMP38]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP40:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP39]], i32 0 ; POST-PROCESS-NEXT: [[TMP41:%.*]] = load i32, ptr addrspace(21) [[TMP40]], align 4 -; POST-PROCESS-NEXT: [[TMP42:%.*]] = add i32 [[TMP3]], 16 +; POST-PROCESS-NEXT: [[TMP42:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 40 ; POST-PROCESS-NEXT: [[TMP43:%.*]] = inttoptr i32 [[TMP42]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP43]], i32 0 ; POST-PROCESS-NEXT: [[TMP45:%.*]] = load i32, ptr addrspace(21) [[TMP44]], align 4 -; POST-PROCESS-NEXT: [[TMP46:%.*]] = add i32 [[TMP3]], 20 +; POST-PROCESS-NEXT: [[TMP46:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 44 ; POST-PROCESS-NEXT: [[TMP47:%.*]] = inttoptr i32 [[TMP46]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP47]], i32 0 ; POST-PROCESS-NEXT: [[TMP49:%.*]] = load i32, ptr addrspace(21) [[TMP48]], align 4 -; POST-PROCESS-NEXT: [[TMP50:%.*]] = add i32 [[TMP3]], 24 +; POST-PROCESS-NEXT: [[TMP50:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 48 ; POST-PROCESS-NEXT: [[TMP51:%.*]] = inttoptr i32 [[TMP50]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP52:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP51]], i32 0 ; POST-PROCESS-NEXT: [[TMP53:%.*]] = load i32, ptr addrspace(21) [[TMP52]], align 4 -; POST-PROCESS-NEXT: [[TMP54:%.*]] = add i32 [[TMP3]], 28 +; POST-PROCESS-NEXT: [[TMP54:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 52 ; POST-PROCESS-NEXT: [[TMP55:%.*]] = inttoptr i32 [[TMP54]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP56:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP55]], i32 0 ; POST-PROCESS-NEXT: [[TMP57:%.*]] = load i32, ptr addrspace(21) [[TMP56]], align 4 -; POST-PROCESS-NEXT: [[TMP58:%.*]] = add i32 [[TMP3]], 32 +; POST-PROCESS-NEXT: [[TMP58:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 56 ; POST-PROCESS-NEXT: [[TMP59:%.*]] = inttoptr i32 [[TMP58]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP60:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP59]], i32 0 ; POST-PROCESS-NEXT: [[TMP61:%.*]] = load i32, ptr addrspace(21) [[TMP60]], align 4 -; POST-PROCESS-NEXT: [[TMP62:%.*]] = add i32 [[TMP3]], 36 +; POST-PROCESS-NEXT: [[TMP62:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 60 ; POST-PROCESS-NEXT: [[TMP63:%.*]] = inttoptr i32 [[TMP62]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP64:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP63]], i32 0 ; POST-PROCESS-NEXT: [[TMP65:%.*]] = load i32, ptr addrspace(21) [[TMP64]], align 4 -; POST-PROCESS-NEXT: [[TMP66:%.*]] = add i32 [[TMP3]], 40 +; POST-PROCESS-NEXT: [[TMP66:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 64 ; POST-PROCESS-NEXT: [[TMP67:%.*]] = inttoptr i32 [[TMP66]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP68:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP67]], i32 0 ; POST-PROCESS-NEXT: [[TMP69:%.*]] = load i32, ptr addrspace(21) [[TMP68]], align 4 -; POST-PROCESS-NEXT: [[TMP70:%.*]] = add i32 [[TMP3]], 44 +; POST-PROCESS-NEXT: [[TMP70:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 68 ; POST-PROCESS-NEXT: [[TMP71:%.*]] = inttoptr i32 [[TMP70]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP72:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP71]], i32 0 ; POST-PROCESS-NEXT: [[TMP73:%.*]] = load i32, ptr addrspace(21) [[TMP72]], align 4 -; POST-PROCESS-NEXT: [[TMP74:%.*]] = add i32 [[TMP3]], 48 +; POST-PROCESS-NEXT: [[TMP74:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 72 ; POST-PROCESS-NEXT: [[TMP75:%.*]] = inttoptr i32 [[TMP74]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP76:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP75]], i32 0 ; POST-PROCESS-NEXT: [[TMP77:%.*]] = load i32, ptr addrspace(21) [[TMP76]], align 4 -; POST-PROCESS-NEXT: [[TMP78:%.*]] = add i32 [[TMP3]], 52 +; POST-PROCESS-NEXT: [[TMP78:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 76 ; POST-PROCESS-NEXT: [[TMP79:%.*]] = inttoptr i32 [[TMP78]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP80:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP79]], i32 0 ; POST-PROCESS-NEXT: [[TMP81:%.*]] = load i32, ptr addrspace(21) [[TMP80]], align 4 -; POST-PROCESS-NEXT: [[TMP82:%.*]] = add i32 [[TMP3]], 56 +; POST-PROCESS-NEXT: [[TMP82:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 80 ; POST-PROCESS-NEXT: [[TMP83:%.*]] = inttoptr i32 [[TMP82]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP84:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP83]], i32 0 ; POST-PROCESS-NEXT: [[TMP85:%.*]] = load i32, ptr addrspace(21) [[TMP84]], align 4 -; POST-PROCESS-NEXT: [[TMP86:%.*]] = add i32 [[TMP3]], 60 +; POST-PROCESS-NEXT: [[TMP86:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 84 ; POST-PROCESS-NEXT: [[TMP87:%.*]] = inttoptr i32 [[TMP86]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP88:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP87]], i32 0 ; POST-PROCESS-NEXT: [[TMP89:%.*]] = load i32, ptr addrspace(21) [[TMP88]], align 4 -; POST-PROCESS-NEXT: [[TMP90:%.*]] = add i32 [[TMP3]], 64 +; POST-PROCESS-NEXT: [[TMP90:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 88 ; POST-PROCESS-NEXT: [[TMP91:%.*]] = inttoptr i32 [[TMP90]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP92:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP91]], i32 0 ; POST-PROCESS-NEXT: [[TMP93:%.*]] = load i32, ptr addrspace(21) [[TMP92]], align 4 -; POST-PROCESS-NEXT: [[TMP94:%.*]] = add i32 [[TMP3]], 68 +; POST-PROCESS-NEXT: [[TMP94:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 92 ; POST-PROCESS-NEXT: [[TMP95:%.*]] = inttoptr i32 [[TMP94]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP96:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP95]], i32 0 ; POST-PROCESS-NEXT: [[TMP97:%.*]] = load i32, ptr addrspace(21) [[TMP96]], align 4 -; POST-PROCESS-NEXT: [[TMP98:%.*]] = add i32 [[TMP3]], 72 +; POST-PROCESS-NEXT: [[TMP98:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 96 ; POST-PROCESS-NEXT: [[TMP99:%.*]] = inttoptr i32 [[TMP98]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP100:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP99]], i32 0 ; POST-PROCESS-NEXT: [[TMP101:%.*]] = load i32, ptr addrspace(21) [[TMP100]], align 4 -; POST-PROCESS-NEXT: [[TMP102:%.*]] = add i32 [[TMP3]], 76 +; POST-PROCESS-NEXT: [[TMP102:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 100 ; POST-PROCESS-NEXT: [[TMP103:%.*]] = inttoptr i32 [[TMP102]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP104:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP103]], i32 0 ; POST-PROCESS-NEXT: [[TMP105:%.*]] = load i32, ptr addrspace(21) [[TMP104]], align 4 -; POST-PROCESS-NEXT: [[TMP106:%.*]] = add i32 [[TMP3]], 80 +; POST-PROCESS-NEXT: [[TMP106:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 104 ; POST-PROCESS-NEXT: [[TMP107:%.*]] = inttoptr i32 [[TMP106]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP108:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP107]], i32 0 ; POST-PROCESS-NEXT: [[TMP109:%.*]] = load i32, ptr addrspace(21) [[TMP108]], align 4 -; POST-PROCESS-NEXT: [[TMP110:%.*]] = add i32 [[TMP3]], 84 -; POST-PROCESS-NEXT: [[TMP111:%.*]] = inttoptr i32 [[TMP110]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP112:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP111]], i32 0 -; POST-PROCESS-NEXT: [[TMP113:%.*]] = load i32, ptr addrspace(21) [[TMP112]], align 4 -; POST-PROCESS-NEXT: [[TMP114:%.*]] = add i32 [[TMP3]], 88 -; POST-PROCESS-NEXT: [[TMP115:%.*]] = inttoptr i32 [[TMP114]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP116:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP115]], i32 0 -; POST-PROCESS-NEXT: [[TMP117:%.*]] = load i32, ptr addrspace(21) [[TMP116]], align 4 -; POST-PROCESS-NEXT: [[TMP118:%.*]] = add i32 [[TMP3]], 92 -; POST-PROCESS-NEXT: [[TMP119:%.*]] = inttoptr i32 [[TMP118]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP120:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP119]], i32 0 -; POST-PROCESS-NEXT: [[TMP121:%.*]] = load i32, ptr addrspace(21) [[TMP120]], align 4 -; POST-PROCESS-NEXT: [[TMP122:%.*]] = add i32 [[TMP3]], 96 -; POST-PROCESS-NEXT: [[TMP123:%.*]] = inttoptr i32 [[TMP122]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP124:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP123]], i32 0 -; POST-PROCESS-NEXT: [[TMP125:%.*]] = load i32, ptr addrspace(21) [[TMP124]], align 4 -; POST-PROCESS-NEXT: [[TMP126:%.*]] = add i32 [[TMP3]], 100 -; POST-PROCESS-NEXT: [[TMP127:%.*]] = inttoptr i32 [[TMP126]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP128:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP127]], i32 0 -; POST-PROCESS-NEXT: [[TMP129:%.*]] = load i32, ptr addrspace(21) [[TMP128]], align 4 -; POST-PROCESS-NEXT: [[TMP130:%.*]] = add i32 [[TMP3]], 104 -; POST-PROCESS-NEXT: [[TMP131:%.*]] = inttoptr i32 [[TMP130]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP132:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP131]], i32 0 -; POST-PROCESS-NEXT: [[TMP133:%.*]] = load i32, ptr addrspace(21) [[TMP132]], align 4 -; POST-PROCESS-NEXT: [[TMP134:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; POST-PROCESS-NEXT: [[ADDR_I:%.*]] = getelementptr [[STRUCT_SYSTEMDATA:%.*]], ptr [[TMP134]], i32 0, i32 1 +; POST-PROCESS-NEXT: [[TMP110:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; POST-PROCESS-NEXT: [[ADDR_I:%.*]] = getelementptr [[STRUCT_SYSTEMDATA:%.*]], ptr [[TMP110]], i32 0, i32 1 ; POST-PROCESS-NEXT: [[VAL_I_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[ADDR_I]], i32 0, i32 0 ; POST-PROCESS-NEXT: [[VAL_I_FCA_0_LOAD:%.*]] = load <2 x float>, ptr [[VAL_I_FCA_0_GEP]], align 4 ; POST-PROCESS-NEXT: [[VAL_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[VAL_I_FCA_0_LOAD]], 0 ; POST-PROCESS-NEXT: [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[VAL_I_FCA_0_INSERT]], 0 -; POST-PROCESS-NEXT: [[DOTSROA_011_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 0 -; POST-PROCESS-NEXT: [[TMP135:%.*]] = bitcast float [[DOTSROA_011_0_VEC_EXTRACT]] to i32 -; POST-PROCESS-NEXT: [[DOTSROA_011_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 1 -; POST-PROCESS-NEXT: [[TMP136:%.*]] = bitcast float [[DOTSROA_011_4_VEC_EXTRACT]] to i32 +; POST-PROCESS-NEXT: [[DOTSROA_035_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 0 +; POST-PROCESS-NEXT: [[TMP111:%.*]] = bitcast float [[DOTSROA_035_0_VEC_EXTRACT]] to i32 +; POST-PROCESS-NEXT: [[DOTSROA_035_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 1 +; POST-PROCESS-NEXT: [[TMP112:%.*]] = bitcast float [[DOTSROA_035_4_VEC_EXTRACT]] to i32 ; POST-PROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP1]], 0 ; POST-PROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; POST-PROCESS-NEXT: call void @_cont_AcceptHit(ptr [[SYSTEM_DATA_ALLOCA]]) -; POST-PROCESS-NEXT: [[TMP137:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: store i32 [[TMP4]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP5]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP6]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP7]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 10), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP8]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 11), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP9]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 12), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP10]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 13), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP11]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 14), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP12]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 15), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP13]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 16), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP14]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 17), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP15]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 18), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP16]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 19), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP17]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 20), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP18]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 21), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP19]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 22), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP20]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 23), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP21]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 24), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP22]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 25), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP23]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 26), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP24]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 27), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP25]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 28), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP26]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 29), align 4 -; POST-PROCESS-NEXT: [[TMP138:%.*]] = inttoptr i32 [[TMP137]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP139:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP138]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP29]], ptr addrspace(21) [[TMP139]], align 4 -; POST-PROCESS-NEXT: [[TMP140:%.*]] = add i32 [[TMP137]], 4 -; POST-PROCESS-NEXT: [[TMP141:%.*]] = inttoptr i32 [[TMP140]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP142:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP141]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP33]], ptr addrspace(21) [[TMP142]], align 4 -; POST-PROCESS-NEXT: [[TMP143:%.*]] = add i32 [[TMP137]], 8 -; POST-PROCESS-NEXT: [[TMP144:%.*]] = inttoptr i32 [[TMP143]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP145:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP144]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP37]], ptr addrspace(21) [[TMP145]], align 4 -; POST-PROCESS-NEXT: [[TMP146:%.*]] = add i32 [[TMP137]], 12 -; POST-PROCESS-NEXT: [[TMP147:%.*]] = inttoptr i32 [[TMP146]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP148:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP147]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP41]], ptr addrspace(21) [[TMP148]], align 4 -; POST-PROCESS-NEXT: [[TMP149:%.*]] = add i32 [[TMP137]], 16 -; POST-PROCESS-NEXT: [[TMP150:%.*]] = inttoptr i32 [[TMP149]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP151:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP150]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP45]], ptr addrspace(21) [[TMP151]], align 4 -; POST-PROCESS-NEXT: [[TMP152:%.*]] = add i32 [[TMP137]], 20 -; POST-PROCESS-NEXT: [[TMP153:%.*]] = inttoptr i32 [[TMP152]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP154:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP153]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP49]], ptr addrspace(21) [[TMP154]], align 4 -; POST-PROCESS-NEXT: [[TMP155:%.*]] = add i32 [[TMP137]], 24 -; POST-PROCESS-NEXT: [[TMP156:%.*]] = inttoptr i32 [[TMP155]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP157:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP156]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP53]], ptr addrspace(21) [[TMP157]], align 4 -; POST-PROCESS-NEXT: [[TMP158:%.*]] = add i32 [[TMP137]], 28 -; POST-PROCESS-NEXT: [[TMP159:%.*]] = inttoptr i32 [[TMP158]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP160:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP159]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP57]], ptr addrspace(21) [[TMP160]], align 4 -; POST-PROCESS-NEXT: [[TMP161:%.*]] = add i32 [[TMP137]], 32 -; POST-PROCESS-NEXT: [[TMP162:%.*]] = inttoptr i32 [[TMP161]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP163:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP162]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP61]], ptr addrspace(21) [[TMP163]], align 4 -; POST-PROCESS-NEXT: [[TMP164:%.*]] = add i32 [[TMP137]], 36 -; POST-PROCESS-NEXT: [[TMP165:%.*]] = inttoptr i32 [[TMP164]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP166:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP165]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP65]], ptr addrspace(21) [[TMP166]], align 4 -; POST-PROCESS-NEXT: [[TMP167:%.*]] = add i32 [[TMP137]], 40 -; POST-PROCESS-NEXT: [[TMP168:%.*]] = inttoptr i32 [[TMP167]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP169:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP168]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP69]], ptr addrspace(21) [[TMP169]], align 4 -; POST-PROCESS-NEXT: [[TMP170:%.*]] = add i32 [[TMP137]], 44 -; POST-PROCESS-NEXT: [[TMP171:%.*]] = inttoptr i32 [[TMP170]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP172:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP171]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP73]], ptr addrspace(21) [[TMP172]], align 4 -; POST-PROCESS-NEXT: [[TMP173:%.*]] = add i32 [[TMP137]], 48 -; POST-PROCESS-NEXT: [[TMP174:%.*]] = inttoptr i32 [[TMP173]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP175:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP174]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP77]], ptr addrspace(21) [[TMP175]], align 4 -; POST-PROCESS-NEXT: [[TMP176:%.*]] = add i32 [[TMP137]], 52 -; POST-PROCESS-NEXT: [[TMP177:%.*]] = inttoptr i32 [[TMP176]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP178:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP177]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP81]], ptr addrspace(21) [[TMP178]], align 4 -; POST-PROCESS-NEXT: [[TMP179:%.*]] = add i32 [[TMP137]], 56 -; POST-PROCESS-NEXT: [[TMP180:%.*]] = inttoptr i32 [[TMP179]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP181:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP180]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP85]], ptr addrspace(21) [[TMP181]], align 4 -; POST-PROCESS-NEXT: [[TMP182:%.*]] = add i32 [[TMP137]], 60 -; POST-PROCESS-NEXT: [[TMP183:%.*]] = inttoptr i32 [[TMP182]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP184:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP183]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP89]], ptr addrspace(21) [[TMP184]], align 4 -; POST-PROCESS-NEXT: [[TMP185:%.*]] = add i32 [[TMP137]], 64 -; POST-PROCESS-NEXT: [[TMP186:%.*]] = inttoptr i32 [[TMP185]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP187:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP186]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP93]], ptr addrspace(21) [[TMP187]], align 4 -; POST-PROCESS-NEXT: [[TMP188:%.*]] = add i32 [[TMP137]], 68 -; POST-PROCESS-NEXT: [[TMP189:%.*]] = inttoptr i32 [[TMP188]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP190:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP189]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP97]], ptr addrspace(21) [[TMP190]], align 4 -; POST-PROCESS-NEXT: [[TMP191:%.*]] = add i32 [[TMP137]], 72 -; POST-PROCESS-NEXT: [[TMP192:%.*]] = inttoptr i32 [[TMP191]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP193:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP192]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP101]], ptr addrspace(21) [[TMP193]], align 4 -; POST-PROCESS-NEXT: [[TMP194:%.*]] = add i32 [[TMP137]], 76 -; POST-PROCESS-NEXT: [[TMP195:%.*]] = inttoptr i32 [[TMP194]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP196:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP195]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP105]], ptr addrspace(21) [[TMP196]], align 4 -; POST-PROCESS-NEXT: [[TMP197:%.*]] = add i32 [[TMP137]], 80 -; POST-PROCESS-NEXT: [[TMP198:%.*]] = inttoptr i32 [[TMP197]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP199:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP198]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP109]], ptr addrspace(21) [[TMP199]], align 4 -; POST-PROCESS-NEXT: [[TMP200:%.*]] = add i32 [[TMP137]], 84 -; POST-PROCESS-NEXT: [[TMP201:%.*]] = inttoptr i32 [[TMP200]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP202:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP201]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP113]], ptr addrspace(21) [[TMP202]], align 4 -; POST-PROCESS-NEXT: [[TMP203:%.*]] = add i32 [[TMP137]], 88 -; POST-PROCESS-NEXT: [[TMP204:%.*]] = inttoptr i32 [[TMP203]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP205:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP204]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP117]], ptr addrspace(21) [[TMP205]], align 4 -; POST-PROCESS-NEXT: [[TMP206:%.*]] = add i32 [[TMP137]], 92 -; POST-PROCESS-NEXT: [[TMP207:%.*]] = inttoptr i32 [[TMP206]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP208:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP207]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP121]], ptr addrspace(21) [[TMP208]], align 4 -; POST-PROCESS-NEXT: [[TMP209:%.*]] = add i32 [[TMP137]], 96 -; POST-PROCESS-NEXT: [[TMP210:%.*]] = inttoptr i32 [[TMP209]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP211:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP210]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP125]], ptr addrspace(21) [[TMP211]], align 4 -; POST-PROCESS-NEXT: [[TMP212:%.*]] = add i32 [[TMP137]], 100 -; POST-PROCESS-NEXT: [[TMP213:%.*]] = inttoptr i32 [[TMP212]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP214:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP213]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP129]], ptr addrspace(21) [[TMP214]], align 4 -; POST-PROCESS-NEXT: [[TMP215:%.*]] = add i32 [[TMP137]], 104 -; POST-PROCESS-NEXT: [[TMP216:%.*]] = inttoptr i32 [[TMP215]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP217:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP216]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP133]], ptr addrspace(21) [[TMP217]], align 4 +; POST-PROCESS-NEXT: [[TMP113:%.*]] = inttoptr i32 [[PAYLOAD_FCA_0_EXTRACT]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP114:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP113]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP5]], ptr addrspace(21) [[TMP114]], align 4 +; POST-PROCESS-NEXT: [[TMP115:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 4 +; POST-PROCESS-NEXT: [[TMP116:%.*]] = inttoptr i32 [[TMP115]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP117:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP116]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP9]], ptr addrspace(21) [[TMP117]], align 4 +; POST-PROCESS-NEXT: [[TMP118:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 8 +; POST-PROCESS-NEXT: [[TMP119:%.*]] = inttoptr i32 [[TMP118]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP120:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP119]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP13]], ptr addrspace(21) [[TMP120]], align 4 +; POST-PROCESS-NEXT: [[TMP121:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 12 +; POST-PROCESS-NEXT: [[TMP122:%.*]] = inttoptr i32 [[TMP121]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP123:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP122]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP17]], ptr addrspace(21) [[TMP123]], align 4 +; POST-PROCESS-NEXT: [[TMP124:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 16 +; POST-PROCESS-NEXT: [[TMP125:%.*]] = inttoptr i32 [[TMP124]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP126:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP125]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP21]], ptr addrspace(21) [[TMP126]], align 4 +; POST-PROCESS-NEXT: [[TMP127:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 20 +; POST-PROCESS-NEXT: [[TMP128:%.*]] = inttoptr i32 [[TMP127]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP129:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP128]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP25]], ptr addrspace(21) [[TMP129]], align 4 +; POST-PROCESS-NEXT: [[TMP130:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 24 +; POST-PROCESS-NEXT: [[TMP131:%.*]] = inttoptr i32 [[TMP130]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP132:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP131]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP29]], ptr addrspace(21) [[TMP132]], align 4 +; POST-PROCESS-NEXT: [[TMP133:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 28 +; POST-PROCESS-NEXT: [[TMP134:%.*]] = inttoptr i32 [[TMP133]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP135:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP134]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP33]], ptr addrspace(21) [[TMP135]], align 4 +; POST-PROCESS-NEXT: [[TMP136:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 32 +; POST-PROCESS-NEXT: [[TMP137:%.*]] = inttoptr i32 [[TMP136]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP138:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP137]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP37]], ptr addrspace(21) [[TMP138]], align 4 +; POST-PROCESS-NEXT: [[TMP139:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 36 +; POST-PROCESS-NEXT: [[TMP140:%.*]] = inttoptr i32 [[TMP139]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP141:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP140]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP41]], ptr addrspace(21) [[TMP141]], align 4 +; POST-PROCESS-NEXT: [[TMP142:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 40 +; POST-PROCESS-NEXT: [[TMP143:%.*]] = inttoptr i32 [[TMP142]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP144:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP143]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP45]], ptr addrspace(21) [[TMP144]], align 4 +; POST-PROCESS-NEXT: [[TMP145:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 44 +; POST-PROCESS-NEXT: [[TMP146:%.*]] = inttoptr i32 [[TMP145]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP147:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP146]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP49]], ptr addrspace(21) [[TMP147]], align 4 +; POST-PROCESS-NEXT: [[TMP148:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 48 +; POST-PROCESS-NEXT: [[TMP149:%.*]] = inttoptr i32 [[TMP148]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP150:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP149]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP53]], ptr addrspace(21) [[TMP150]], align 4 +; POST-PROCESS-NEXT: [[TMP151:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 52 +; POST-PROCESS-NEXT: [[TMP152:%.*]] = inttoptr i32 [[TMP151]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP153:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP152]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP57]], ptr addrspace(21) [[TMP153]], align 4 +; POST-PROCESS-NEXT: [[TMP154:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 56 +; POST-PROCESS-NEXT: [[TMP155:%.*]] = inttoptr i32 [[TMP154]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP156:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP155]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP61]], ptr addrspace(21) [[TMP156]], align 4 +; POST-PROCESS-NEXT: [[TMP157:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 60 +; POST-PROCESS-NEXT: [[TMP158:%.*]] = inttoptr i32 [[TMP157]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP159:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP158]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP65]], ptr addrspace(21) [[TMP159]], align 4 +; POST-PROCESS-NEXT: [[TMP160:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 64 +; POST-PROCESS-NEXT: [[TMP161:%.*]] = inttoptr i32 [[TMP160]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP162:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP161]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP69]], ptr addrspace(21) [[TMP162]], align 4 +; POST-PROCESS-NEXT: [[TMP163:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 68 +; POST-PROCESS-NEXT: [[TMP164:%.*]] = inttoptr i32 [[TMP163]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP165:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP164]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP73]], ptr addrspace(21) [[TMP165]], align 4 +; POST-PROCESS-NEXT: [[TMP166:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 72 +; POST-PROCESS-NEXT: [[TMP167:%.*]] = inttoptr i32 [[TMP166]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP168:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP167]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP77]], ptr addrspace(21) [[TMP168]], align 4 +; POST-PROCESS-NEXT: [[TMP169:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 76 +; POST-PROCESS-NEXT: [[TMP170:%.*]] = inttoptr i32 [[TMP169]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP171:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP170]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP81]], ptr addrspace(21) [[TMP171]], align 4 +; POST-PROCESS-NEXT: [[TMP172:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 80 +; POST-PROCESS-NEXT: [[TMP173:%.*]] = inttoptr i32 [[TMP172]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP174:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP173]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP85]], ptr addrspace(21) [[TMP174]], align 4 +; POST-PROCESS-NEXT: [[TMP175:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 84 +; POST-PROCESS-NEXT: [[TMP176:%.*]] = inttoptr i32 [[TMP175]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP177:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP176]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP89]], ptr addrspace(21) [[TMP177]], align 4 +; POST-PROCESS-NEXT: [[TMP178:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 88 +; POST-PROCESS-NEXT: [[TMP179:%.*]] = inttoptr i32 [[TMP178]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP180:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP179]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP93]], ptr addrspace(21) [[TMP180]], align 4 +; POST-PROCESS-NEXT: [[TMP181:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 92 +; POST-PROCESS-NEXT: [[TMP182:%.*]] = inttoptr i32 [[TMP181]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP183:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP182]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP97]], ptr addrspace(21) [[TMP183]], align 4 +; POST-PROCESS-NEXT: [[TMP184:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 96 +; POST-PROCESS-NEXT: [[TMP185:%.*]] = inttoptr i32 [[TMP184]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP186:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP185]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP101]], ptr addrspace(21) [[TMP186]], align 4 +; POST-PROCESS-NEXT: [[TMP187:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 100 +; POST-PROCESS-NEXT: [[TMP188:%.*]] = inttoptr i32 [[TMP187]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP189:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP188]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP105]], ptr addrspace(21) [[TMP189]], align 4 +; POST-PROCESS-NEXT: [[TMP190:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 104 +; POST-PROCESS-NEXT: [[TMP191:%.*]] = inttoptr i32 [[TMP190]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP192:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP191]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP109]], ptr addrspace(21) [[TMP192]], align 4 ; POST-PROCESS-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 -; POST-PROCESS-NEXT: [[TMP218:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT]] to i32 -; POST-PROCESS-NEXT: [[TMP219:%.*]] = bitcast i32 [[TMP218]] to float -; POST-PROCESS-NEXT: [[DOTSROA_012_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP219]], i32 0 +; POST-PROCESS-NEXT: [[TMP193:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT]] to i32 +; POST-PROCESS-NEXT: [[TMP194:%.*]] = bitcast i32 [[TMP193]] to float +; POST-PROCESS-NEXT: [[DOTSROA_037_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP194]], i32 0 ; POST-PROCESS-NEXT: [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 -; POST-PROCESS-NEXT: [[TMP220:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT]] to i32 -; POST-PROCESS-NEXT: [[TMP221:%.*]] = bitcast i32 [[TMP220]] to float -; POST-PROCESS-NEXT: [[DOTSROA_012_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_012_0_VEC_INSERT]], float [[TMP221]], i32 1 -; POST-PROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_012_4_VEC_INSERT]], 0 -; POST-PROCESS-NEXT: [[TMP222:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; POST-PROCESS-NEXT: call void @_cont_SetTriangleHitAttributes(ptr [[TMP222]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT]]) -; POST-PROCESS-NEXT: [[DOTFCA_0_0_0_0_GEP1:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 -; POST-PROCESS-NEXT: [[DOTFCA_0_0_0_0_LOAD:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP1]], align 4 +; POST-PROCESS-NEXT: [[TMP195:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT]] to i32 +; POST-PROCESS-NEXT: [[TMP196:%.*]] = bitcast i32 [[TMP195]] to float +; POST-PROCESS-NEXT: [[DOTSROA_037_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_037_0_VEC_INSERT]], float [[TMP196]], i32 1 +; POST-PROCESS-NEXT: [[DOTFCA_0_INSERT36:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_037_4_VEC_INSERT]], 0 +; POST-PROCESS-NEXT: [[TMP197:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; POST-PROCESS-NEXT: call void @_cont_SetTriangleHitAttributes(ptr [[TMP197]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT36]]) +; POST-PROCESS-NEXT: [[DOTFCA_0_0_0_0_GEP25:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 +; POST-PROCESS-NEXT: [[DOTFCA_0_0_0_0_LOAD:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP25]], align 4 ; POST-PROCESS-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD]], 0, 0, 0, 0 -; POST-PROCESS-NEXT: [[DOTFCA_0_0_1_0_GEP2:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 1, i32 0 -; POST-PROCESS-NEXT: [[DOTFCA_0_0_1_0_LOAD:%.*]] = load <2 x float>, ptr [[DOTFCA_0_0_1_0_GEP2]], align 4 +; POST-PROCESS-NEXT: [[DOTFCA_0_0_1_0_GEP26:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 1, i32 0 +; POST-PROCESS-NEXT: [[DOTFCA_0_0_1_0_LOAD:%.*]] = load <2 x float>, ptr [[DOTFCA_0_0_1_0_GEP26]], align 4 ; POST-PROCESS-NEXT: [[DOTFCA_0_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <2 x float> [[DOTFCA_0_0_1_0_LOAD]], 0, 0, 1, 0 -; POST-PROCESS-NEXT: [[DOTFCA_0_1_0_GEP3:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 -; POST-PROCESS-NEXT: [[DOTFCA_0_1_0_LOAD:%.*]] = load float, ptr [[DOTFCA_0_1_0_GEP3]], align 4 +; POST-PROCESS-NEXT: [[DOTFCA_0_1_0_GEP27:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; POST-PROCESS-NEXT: [[DOTFCA_0_1_0_LOAD:%.*]] = load float, ptr [[DOTFCA_0_1_0_GEP27]], align 4 ; POST-PROCESS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_1_0_INSERT]], float [[DOTFCA_0_1_0_LOAD]], 0, 1, 0 -; POST-PROCESS-NEXT: [[DOTFCA_0_1_1_GEP4:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 -; POST-PROCESS-NEXT: [[DOTFCA_0_1_1_LOAD:%.*]] = load i32, ptr [[DOTFCA_0_1_1_GEP4]], align 4 +; POST-PROCESS-NEXT: [[DOTFCA_0_1_1_GEP28:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 +; POST-PROCESS-NEXT: [[DOTFCA_0_1_1_LOAD:%.*]] = load i32, ptr [[DOTFCA_0_1_1_GEP28]], align 4 ; POST-PROCESS-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], i32 [[DOTFCA_0_1_1_LOAD]], 0, 1, 1 -; POST-PROCESS-NEXT: [[DOTFCA_0_2_GEP5:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 -; POST-PROCESS-NEXT: [[DOTFCA_0_2_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP5]], align 4 +; POST-PROCESS-NEXT: [[DOTFCA_0_2_GEP29:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 +; POST-PROCESS-NEXT: [[DOTFCA_0_2_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP29]], align 4 ; POST-PROCESS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], <3 x float> [[DOTFCA_0_2_LOAD]], 0, 2 -; POST-PROCESS-NEXT: [[DOTFCA_0_3_GEP6:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 -; POST-PROCESS-NEXT: [[DOTFCA_0_3_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP6]], align 4 +; POST-PROCESS-NEXT: [[DOTFCA_0_3_GEP30:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 +; POST-PROCESS-NEXT: [[DOTFCA_0_3_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP30]], align 4 ; POST-PROCESS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_LOAD]], 0, 3 -; POST-PROCESS-NEXT: [[DOTFCA_0_4_GEP7:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 -; POST-PROCESS-NEXT: [[DOTFCA_0_4_LOAD:%.*]] = load float, ptr [[DOTFCA_0_4_GEP7]], align 4 +; POST-PROCESS-NEXT: [[DOTFCA_0_4_GEP31:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 +; POST-PROCESS-NEXT: [[DOTFCA_0_4_LOAD:%.*]] = load float, ptr [[DOTFCA_0_4_GEP31]], align 4 ; POST-PROCESS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[DOTFCA_0_4_LOAD]], 0, 4 -; POST-PROCESS-NEXT: [[DOTFCA_0_5_GEP8:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 -; POST-PROCESS-NEXT: [[DOTFCA_0_5_LOAD:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP8]], align 4 +; POST-PROCESS-NEXT: [[DOTFCA_0_5_GEP32:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 +; POST-PROCESS-NEXT: [[DOTFCA_0_5_LOAD:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP32]], align 4 ; POST-PROCESS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[DOTFCA_0_5_LOAD]], 0, 5 -; POST-PROCESS-NEXT: [[DOTFCA_1_0_GEP9:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 -; POST-PROCESS-NEXT: [[DOTFCA_1_0_LOAD:%.*]] = load float, ptr [[DOTFCA_1_0_GEP9]], align 4 +; POST-PROCESS-NEXT: [[DOTFCA_1_0_GEP33:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; POST-PROCESS-NEXT: [[DOTFCA_1_0_LOAD:%.*]] = load float, ptr [[DOTFCA_1_0_GEP33]], align 4 ; POST-PROCESS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], float [[DOTFCA_1_0_LOAD]], 1, 0 -; POST-PROCESS-NEXT: [[DOTFCA_1_1_GEP10:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 -; POST-PROCESS-NEXT: [[DOTFCA_1_1_LOAD:%.*]] = load i32, ptr [[DOTFCA_1_1_GEP10]], align 4 +; POST-PROCESS-NEXT: [[DOTFCA_1_1_GEP34:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; POST-PROCESS-NEXT: [[DOTFCA_1_1_LOAD:%.*]] = load i32, ptr [[DOTFCA_1_1_GEP34]], align 4 ; POST-PROCESS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], i32 [[DOTFCA_1_1_LOAD]], 1, 1 +; POST-PROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; POST-PROCESS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; POST-PROCESS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; POST-PROCESS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; POST-PROCESS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; POST-PROCESS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; POST-PROCESS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; POST-PROCESS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; POST-PROCESS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; POST-PROCESS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; POST-PROCESS-NEXT: [[DOTFCA_10_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; POST-PROCESS-NEXT: [[DOTFCA_11_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; POST-PROCESS-NEXT: [[DOTFCA_12_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; POST-PROCESS-NEXT: [[DOTFCA_13_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; POST-PROCESS-NEXT: [[DOTFCA_14_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; POST-PROCESS-NEXT: [[DOTFCA_15_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; POST-PROCESS-NEXT: [[DOTFCA_16_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; POST-PROCESS-NEXT: [[DOTFCA_17_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; POST-PROCESS-NEXT: [[DOTFCA_18_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; POST-PROCESS-NEXT: [[DOTFCA_19_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; POST-PROCESS-NEXT: [[DOTFCA_20_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; POST-PROCESS-NEXT: [[DOTFCA_21_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; POST-PROCESS-NEXT: [[DOTFCA_22_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; POST-PROCESS-NEXT: [[DOTFCA_23_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; POST-PROCESS-NEXT: [[DOTFCA_24_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; POST-PROCESS-NEXT: [[DOTFCA_25_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; POST-PROCESS-NEXT: [[DOTFCA_26_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; POST-PROCESS-NEXT: [[DOTFCA_27_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; POST-PROCESS-NEXT: [[DOTFCA_28_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; POST-PROCESS-NEXT: [[DOTFCA_29_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 ; POST-PROCESS-NEXT: [[TMP223:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP223]], i64 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]]), !continuation.registercount [[META17]] +; POST-PROCESS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR]], i32 [[TMP223]], i64 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]) ; POST-PROCESS-NEXT: unreachable ; ; ; POST-PROCESS-LABEL: define void @ClosestHit( -; POST-PROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META24:![0-9]+]] !continuation.registercount [[META17]] !continuation [[META25:![0-9]+]] !continuation.stacksize [[META26:![0-9]+]] !continuation.state [[META27:![0-9]+]] { +; POST-PROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]], [21 x i32] [[PADDING:%.*]], [30 x i32] [[PAYLOAD:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META24:![0-9]+]] !continuation [[META25:![0-9]+]] !continuation.stacksize [[META26:![0-9]+]] { ; POST-PROCESS-NEXT: AllocaSpillBB: ; POST-PROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; POST-PROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 @@ -1691,585 +1768,607 @@ attributes #3 = { nounwind } ; POST-PROCESS-NEXT: [[TMP4:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(21) ; POST-PROCESS-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP4]], i32 0 ; POST-PROCESS-NEXT: store i64 [[RETURNADDR]], ptr addrspace(21) [[TMP5]], align 4 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 0 +; POST-PROCESS-NEXT: [[TMP6:%.*]] = add i32 [[TMP1]], 116 +; POST-PROCESS-NEXT: [[TMP7:%.*]] = inttoptr i32 [[TMP6]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP7]], i32 0 +; POST-PROCESS-NEXT: store i32 [[PAYLOAD_FCA_0_EXTRACT]], ptr addrspace(21) [[TMP8]], align 4 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 1 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 2 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 3 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 4 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 5 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 6 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 7 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 8 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 9 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_10_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 10 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_11_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 11 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_12_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 12 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_13_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 13 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_14_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 14 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_15_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 15 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_16_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 16 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_17_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 17 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_18_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 18 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_19_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 19 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_20_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 20 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_21_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 21 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_22_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 22 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_23_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 23 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_24_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 24 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_25_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 25 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_26_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 26 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_27_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 27 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_28_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 28 +; POST-PROCESS-NEXT: [[PAYLOAD_FCA_29_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 29 ; POST-PROCESS-NEXT: [[DOTFCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 0, 0 ; POST-PROCESS-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 1, 0 -; POST-PROCESS-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 -; POST-PROCESS-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 -; POST-PROCESS-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 -; POST-PROCESS-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 10), align 4 -; POST-PROCESS-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 11), align 4 -; POST-PROCESS-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 12), align 4 -; POST-PROCESS-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 13), align 4 -; POST-PROCESS-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 14), align 4 -; POST-PROCESS-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 15), align 4 -; POST-PROCESS-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 16), align 4 -; POST-PROCESS-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 17), align 4 -; POST-PROCESS-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 18), align 4 -; POST-PROCESS-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 19), align 4 -; POST-PROCESS-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 20), align 4 -; POST-PROCESS-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 21), align 4 -; POST-PROCESS-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 22), align 4 -; POST-PROCESS-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 23), align 4 -; POST-PROCESS-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 24), align 4 -; POST-PROCESS-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 25), align 4 -; POST-PROCESS-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 26), align 4 -; POST-PROCESS-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 27), align 4 -; POST-PROCESS-NEXT: [[TMP28:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 28), align 4 -; POST-PROCESS-NEXT: [[TMP29:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 29), align 4 -; POST-PROCESS-NEXT: [[TMP30:%.*]] = inttoptr i32 [[TMP6]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP31:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP30]], i32 0 -; POST-PROCESS-NEXT: [[TMP32:%.*]] = load i32, ptr addrspace(21) [[TMP31]], align 4 -; POST-PROCESS-NEXT: [[TMP33:%.*]] = add i32 [[TMP6]], 4 -; POST-PROCESS-NEXT: [[TMP34:%.*]] = inttoptr i32 [[TMP33]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP35:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP34]], i32 0 -; POST-PROCESS-NEXT: [[TMP36:%.*]] = load i32, ptr addrspace(21) [[TMP35]], align 4 -; POST-PROCESS-NEXT: [[TMP37:%.*]] = add i32 [[TMP6]], 8 -; POST-PROCESS-NEXT: [[TMP38:%.*]] = inttoptr i32 [[TMP37]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP39:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP38]], i32 0 -; POST-PROCESS-NEXT: [[TMP40:%.*]] = load i32, ptr addrspace(21) [[TMP39]], align 4 -; POST-PROCESS-NEXT: [[TMP41:%.*]] = add i32 [[TMP6]], 12 -; POST-PROCESS-NEXT: [[TMP42:%.*]] = inttoptr i32 [[TMP41]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP42]], i32 0 -; POST-PROCESS-NEXT: [[TMP44:%.*]] = load i32, ptr addrspace(21) [[TMP43]], align 4 -; POST-PROCESS-NEXT: [[TMP45:%.*]] = add i32 [[TMP6]], 16 -; POST-PROCESS-NEXT: [[TMP46:%.*]] = inttoptr i32 [[TMP45]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP47:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP46]], i32 0 -; POST-PROCESS-NEXT: [[TMP48:%.*]] = load i32, ptr addrspace(21) [[TMP47]], align 4 -; POST-PROCESS-NEXT: [[TMP49:%.*]] = add i32 [[TMP6]], 20 -; POST-PROCESS-NEXT: [[TMP50:%.*]] = inttoptr i32 [[TMP49]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP51:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP50]], i32 0 -; POST-PROCESS-NEXT: [[TMP52:%.*]] = load i32, ptr addrspace(21) [[TMP51]], align 4 -; POST-PROCESS-NEXT: [[TMP53:%.*]] = add i32 [[TMP6]], 24 -; POST-PROCESS-NEXT: [[TMP54:%.*]] = inttoptr i32 [[TMP53]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP55:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP54]], i32 0 -; POST-PROCESS-NEXT: [[TMP56:%.*]] = load i32, ptr addrspace(21) [[TMP55]], align 4 -; POST-PROCESS-NEXT: [[TMP57:%.*]] = add i32 [[TMP6]], 28 -; POST-PROCESS-NEXT: [[TMP58:%.*]] = inttoptr i32 [[TMP57]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP59:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP58]], i32 0 -; POST-PROCESS-NEXT: [[TMP60:%.*]] = load i32, ptr addrspace(21) [[TMP59]], align 4 -; POST-PROCESS-NEXT: [[TMP61:%.*]] = add i32 [[TMP6]], 32 -; POST-PROCESS-NEXT: [[TMP62:%.*]] = inttoptr i32 [[TMP61]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP63:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP62]], i32 0 -; POST-PROCESS-NEXT: [[TMP64:%.*]] = load i32, ptr addrspace(21) [[TMP63]], align 4 -; POST-PROCESS-NEXT: [[TMP65:%.*]] = add i32 [[TMP6]], 36 -; POST-PROCESS-NEXT: [[TMP66:%.*]] = inttoptr i32 [[TMP65]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP67:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP66]], i32 0 -; POST-PROCESS-NEXT: [[TMP68:%.*]] = load i32, ptr addrspace(21) [[TMP67]], align 4 -; POST-PROCESS-NEXT: [[TMP69:%.*]] = add i32 [[TMP6]], 40 -; POST-PROCESS-NEXT: [[TMP70:%.*]] = inttoptr i32 [[TMP69]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP71:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP70]], i32 0 -; POST-PROCESS-NEXT: [[TMP72:%.*]] = load i32, ptr addrspace(21) [[TMP71]], align 4 -; POST-PROCESS-NEXT: [[TMP73:%.*]] = add i32 [[TMP6]], 44 -; POST-PROCESS-NEXT: [[TMP74:%.*]] = inttoptr i32 [[TMP73]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP75:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP74]], i32 0 -; POST-PROCESS-NEXT: [[TMP76:%.*]] = load i32, ptr addrspace(21) [[TMP75]], align 4 -; POST-PROCESS-NEXT: [[TMP77:%.*]] = add i32 [[TMP6]], 48 -; POST-PROCESS-NEXT: [[TMP78:%.*]] = inttoptr i32 [[TMP77]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP79:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP78]], i32 0 -; POST-PROCESS-NEXT: [[TMP80:%.*]] = load i32, ptr addrspace(21) [[TMP79]], align 4 -; POST-PROCESS-NEXT: [[TMP81:%.*]] = add i32 [[TMP6]], 52 -; POST-PROCESS-NEXT: [[TMP82:%.*]] = inttoptr i32 [[TMP81]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP83:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP82]], i32 0 -; POST-PROCESS-NEXT: [[TMP84:%.*]] = load i32, ptr addrspace(21) [[TMP83]], align 4 -; POST-PROCESS-NEXT: [[TMP85:%.*]] = add i32 [[TMP6]], 56 -; POST-PROCESS-NEXT: [[TMP86:%.*]] = inttoptr i32 [[TMP85]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP87:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP86]], i32 0 -; POST-PROCESS-NEXT: [[TMP88:%.*]] = load i32, ptr addrspace(21) [[TMP87]], align 4 -; POST-PROCESS-NEXT: [[TMP89:%.*]] = add i32 [[TMP6]], 60 -; POST-PROCESS-NEXT: [[TMP90:%.*]] = inttoptr i32 [[TMP89]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP91:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP90]], i32 0 -; POST-PROCESS-NEXT: [[TMP92:%.*]] = load i32, ptr addrspace(21) [[TMP91]], align 4 -; POST-PROCESS-NEXT: [[TMP93:%.*]] = add i32 [[TMP6]], 64 -; POST-PROCESS-NEXT: [[TMP94:%.*]] = inttoptr i32 [[TMP93]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP95:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP94]], i32 0 -; POST-PROCESS-NEXT: [[TMP96:%.*]] = load i32, ptr addrspace(21) [[TMP95]], align 4 -; POST-PROCESS-NEXT: [[TMP97:%.*]] = add i32 [[TMP6]], 68 -; POST-PROCESS-NEXT: [[TMP98:%.*]] = inttoptr i32 [[TMP97]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP99:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP98]], i32 0 -; POST-PROCESS-NEXT: [[TMP100:%.*]] = load i32, ptr addrspace(21) [[TMP99]], align 4 -; POST-PROCESS-NEXT: [[TMP101:%.*]] = add i32 [[TMP6]], 72 -; POST-PROCESS-NEXT: [[TMP102:%.*]] = inttoptr i32 [[TMP101]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP103:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP102]], i32 0 -; POST-PROCESS-NEXT: [[TMP104:%.*]] = load i32, ptr addrspace(21) [[TMP103]], align 4 -; POST-PROCESS-NEXT: [[TMP105:%.*]] = add i32 [[TMP6]], 76 -; POST-PROCESS-NEXT: [[TMP106:%.*]] = inttoptr i32 [[TMP105]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP107:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP106]], i32 0 -; POST-PROCESS-NEXT: [[TMP108:%.*]] = load i32, ptr addrspace(21) [[TMP107]], align 4 -; POST-PROCESS-NEXT: [[TMP109:%.*]] = add i32 [[TMP6]], 80 -; POST-PROCESS-NEXT: [[TMP110:%.*]] = inttoptr i32 [[TMP109]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP111:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP110]], i32 0 -; POST-PROCESS-NEXT: [[TMP112:%.*]] = load i32, ptr addrspace(21) [[TMP111]], align 4 -; POST-PROCESS-NEXT: [[TMP113:%.*]] = add i32 [[TMP6]], 84 -; POST-PROCESS-NEXT: [[TMP114:%.*]] = inttoptr i32 [[TMP113]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP115:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP114]], i32 0 -; POST-PROCESS-NEXT: [[TMP116:%.*]] = load i32, ptr addrspace(21) [[TMP115]], align 4 -; POST-PROCESS-NEXT: [[TMP117:%.*]] = add i32 [[TMP6]], 88 -; POST-PROCESS-NEXT: [[TMP118:%.*]] = inttoptr i32 [[TMP117]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP119:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP118]], i32 0 -; POST-PROCESS-NEXT: [[TMP120:%.*]] = load i32, ptr addrspace(21) [[TMP119]], align 4 -; POST-PROCESS-NEXT: [[TMP121:%.*]] = add i32 [[TMP6]], 92 -; POST-PROCESS-NEXT: [[TMP122:%.*]] = inttoptr i32 [[TMP121]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP123:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP122]], i32 0 -; POST-PROCESS-NEXT: [[TMP124:%.*]] = load i32, ptr addrspace(21) [[TMP123]], align 4 -; POST-PROCESS-NEXT: [[TMP125:%.*]] = add i32 [[TMP6]], 96 -; POST-PROCESS-NEXT: [[TMP126:%.*]] = inttoptr i32 [[TMP125]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP127:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP126]], i32 0 -; POST-PROCESS-NEXT: [[TMP128:%.*]] = load i32, ptr addrspace(21) [[TMP127]], align 4 -; POST-PROCESS-NEXT: [[TMP129:%.*]] = add i32 [[TMP6]], 100 -; POST-PROCESS-NEXT: [[TMP130:%.*]] = inttoptr i32 [[TMP129]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP131:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP130]], i32 0 -; POST-PROCESS-NEXT: [[TMP132:%.*]] = load i32, ptr addrspace(21) [[TMP131]], align 4 -; POST-PROCESS-NEXT: [[TMP133:%.*]] = add i32 [[TMP6]], 104 -; POST-PROCESS-NEXT: [[TMP134:%.*]] = inttoptr i32 [[TMP133]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP135:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP134]], i32 0 -; POST-PROCESS-NEXT: [[TMP136:%.*]] = load i32, ptr addrspace(21) [[TMP135]], align 4 -; POST-PROCESS-NEXT: [[TMP137:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP138:%.*]] = add i32 [[TMP1]], 116 -; POST-PROCESS-NEXT: [[TMP139:%.*]] = inttoptr i32 [[TMP138]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP140:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP139]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP137]], ptr addrspace(21) [[TMP140]], align 4 +; POST-PROCESS-NEXT: [[TMP9:%.*]] = inttoptr i32 [[PAYLOAD_FCA_0_EXTRACT]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP9]], i32 0 +; POST-PROCESS-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(21) [[TMP10]], align 4 +; POST-PROCESS-NEXT: [[TMP12:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 4 +; POST-PROCESS-NEXT: [[TMP13:%.*]] = inttoptr i32 [[TMP12]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP13]], i32 0 +; POST-PROCESS-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(21) [[TMP14]], align 4 +; POST-PROCESS-NEXT: [[TMP16:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 8 +; POST-PROCESS-NEXT: [[TMP17:%.*]] = inttoptr i32 [[TMP16]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP17]], i32 0 +; POST-PROCESS-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(21) [[TMP18]], align 4 +; POST-PROCESS-NEXT: [[TMP20:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 12 +; POST-PROCESS-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP20]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP21]], i32 0 +; POST-PROCESS-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(21) [[TMP22]], align 4 +; POST-PROCESS-NEXT: [[TMP24:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 16 +; POST-PROCESS-NEXT: [[TMP25:%.*]] = inttoptr i32 [[TMP24]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP25]], i32 0 +; POST-PROCESS-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(21) [[TMP26]], align 4 +; POST-PROCESS-NEXT: [[TMP28:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 20 +; POST-PROCESS-NEXT: [[TMP29:%.*]] = inttoptr i32 [[TMP28]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP29]], i32 0 +; POST-PROCESS-NEXT: [[TMP31:%.*]] = load i32, ptr addrspace(21) [[TMP30]], align 4 +; POST-PROCESS-NEXT: [[TMP32:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 24 +; POST-PROCESS-NEXT: [[TMP33:%.*]] = inttoptr i32 [[TMP32]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP33]], i32 0 +; POST-PROCESS-NEXT: [[TMP35:%.*]] = load i32, ptr addrspace(21) [[TMP34]], align 4 +; POST-PROCESS-NEXT: [[TMP36:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 28 +; POST-PROCESS-NEXT: [[TMP37:%.*]] = inttoptr i32 [[TMP36]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP37]], i32 0 +; POST-PROCESS-NEXT: [[TMP39:%.*]] = load i32, ptr addrspace(21) [[TMP38]], align 4 +; POST-PROCESS-NEXT: [[TMP40:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 32 +; POST-PROCESS-NEXT: [[TMP41:%.*]] = inttoptr i32 [[TMP40]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP42:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP41]], i32 0 +; POST-PROCESS-NEXT: [[TMP43:%.*]] = load i32, ptr addrspace(21) [[TMP42]], align 4 +; POST-PROCESS-NEXT: [[TMP44:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 36 +; POST-PROCESS-NEXT: [[TMP45:%.*]] = inttoptr i32 [[TMP44]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP46:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP45]], i32 0 +; POST-PROCESS-NEXT: [[TMP47:%.*]] = load i32, ptr addrspace(21) [[TMP46]], align 4 +; POST-PROCESS-NEXT: [[TMP48:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 40 +; POST-PROCESS-NEXT: [[TMP49:%.*]] = inttoptr i32 [[TMP48]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP49]], i32 0 +; POST-PROCESS-NEXT: [[TMP51:%.*]] = load i32, ptr addrspace(21) [[TMP50]], align 4 +; POST-PROCESS-NEXT: [[TMP52:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 44 +; POST-PROCESS-NEXT: [[TMP53:%.*]] = inttoptr i32 [[TMP52]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP54:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP53]], i32 0 +; POST-PROCESS-NEXT: [[TMP55:%.*]] = load i32, ptr addrspace(21) [[TMP54]], align 4 +; POST-PROCESS-NEXT: [[TMP56:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 48 +; POST-PROCESS-NEXT: [[TMP57:%.*]] = inttoptr i32 [[TMP56]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP58:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP57]], i32 0 +; POST-PROCESS-NEXT: [[TMP59:%.*]] = load i32, ptr addrspace(21) [[TMP58]], align 4 +; POST-PROCESS-NEXT: [[TMP60:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 52 +; POST-PROCESS-NEXT: [[TMP61:%.*]] = inttoptr i32 [[TMP60]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP62:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP61]], i32 0 +; POST-PROCESS-NEXT: [[TMP63:%.*]] = load i32, ptr addrspace(21) [[TMP62]], align 4 +; POST-PROCESS-NEXT: [[TMP64:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 56 +; POST-PROCESS-NEXT: [[TMP65:%.*]] = inttoptr i32 [[TMP64]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP66:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP65]], i32 0 +; POST-PROCESS-NEXT: [[TMP67:%.*]] = load i32, ptr addrspace(21) [[TMP66]], align 4 +; POST-PROCESS-NEXT: [[TMP68:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 60 +; POST-PROCESS-NEXT: [[TMP69:%.*]] = inttoptr i32 [[TMP68]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP70:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP69]], i32 0 +; POST-PROCESS-NEXT: [[TMP71:%.*]] = load i32, ptr addrspace(21) [[TMP70]], align 4 +; POST-PROCESS-NEXT: [[TMP72:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 64 +; POST-PROCESS-NEXT: [[TMP73:%.*]] = inttoptr i32 [[TMP72]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP74:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP73]], i32 0 +; POST-PROCESS-NEXT: [[TMP75:%.*]] = load i32, ptr addrspace(21) [[TMP74]], align 4 +; POST-PROCESS-NEXT: [[TMP76:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 68 +; POST-PROCESS-NEXT: [[TMP77:%.*]] = inttoptr i32 [[TMP76]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP78:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP77]], i32 0 +; POST-PROCESS-NEXT: [[TMP79:%.*]] = load i32, ptr addrspace(21) [[TMP78]], align 4 +; POST-PROCESS-NEXT: [[TMP80:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 72 +; POST-PROCESS-NEXT: [[TMP81:%.*]] = inttoptr i32 [[TMP80]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP82:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP81]], i32 0 +; POST-PROCESS-NEXT: [[TMP83:%.*]] = load i32, ptr addrspace(21) [[TMP82]], align 4 +; POST-PROCESS-NEXT: [[TMP84:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 76 +; POST-PROCESS-NEXT: [[TMP85:%.*]] = inttoptr i32 [[TMP84]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP86:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP85]], i32 0 +; POST-PROCESS-NEXT: [[TMP87:%.*]] = load i32, ptr addrspace(21) [[TMP86]], align 4 +; POST-PROCESS-NEXT: [[TMP88:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 80 +; POST-PROCESS-NEXT: [[TMP89:%.*]] = inttoptr i32 [[TMP88]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP90:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP89]], i32 0 +; POST-PROCESS-NEXT: [[TMP91:%.*]] = load i32, ptr addrspace(21) [[TMP90]], align 4 +; POST-PROCESS-NEXT: [[TMP92:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 84 +; POST-PROCESS-NEXT: [[TMP93:%.*]] = inttoptr i32 [[TMP92]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP94:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP93]], i32 0 +; POST-PROCESS-NEXT: [[TMP95:%.*]] = load i32, ptr addrspace(21) [[TMP94]], align 4 +; POST-PROCESS-NEXT: [[TMP96:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 88 +; POST-PROCESS-NEXT: [[TMP97:%.*]] = inttoptr i32 [[TMP96]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP98:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP97]], i32 0 +; POST-PROCESS-NEXT: [[TMP99:%.*]] = load i32, ptr addrspace(21) [[TMP98]], align 4 +; POST-PROCESS-NEXT: [[TMP100:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 92 +; POST-PROCESS-NEXT: [[TMP101:%.*]] = inttoptr i32 [[TMP100]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP102:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP101]], i32 0 +; POST-PROCESS-NEXT: [[TMP103:%.*]] = load i32, ptr addrspace(21) [[TMP102]], align 4 +; POST-PROCESS-NEXT: [[TMP104:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 96 +; POST-PROCESS-NEXT: [[TMP105:%.*]] = inttoptr i32 [[TMP104]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP106:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP105]], i32 0 +; POST-PROCESS-NEXT: [[TMP107:%.*]] = load i32, ptr addrspace(21) [[TMP106]], align 4 +; POST-PROCESS-NEXT: [[TMP108:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 100 +; POST-PROCESS-NEXT: [[TMP109:%.*]] = inttoptr i32 [[TMP108]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP110:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP109]], i32 0 +; POST-PROCESS-NEXT: [[TMP111:%.*]] = load i32, ptr addrspace(21) [[TMP110]], align 4 +; POST-PROCESS-NEXT: [[TMP112:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 104 +; POST-PROCESS-NEXT: [[TMP113:%.*]] = inttoptr i32 [[TMP112]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP114:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP113]], i32 0 +; POST-PROCESS-NEXT: [[TMP115:%.*]] = load i32, ptr addrspace(21) [[TMP114]], align 4 ; POST-PROCESS-NEXT: [[VAL_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> [[DOTFCA_1_0_EXTRACT]], 0 ; POST-PROCESS-NEXT: [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[VAL_I_FCA_0_INSERT]], 0 -; POST-PROCESS-NEXT: [[DOTSROA_053_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 0 -; POST-PROCESS-NEXT: [[TMP141:%.*]] = bitcast float [[DOTSROA_053_0_VEC_EXTRACT]] to i32 -; POST-PROCESS-NEXT: [[DOTSROA_053_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 1 -; POST-PROCESS-NEXT: [[TMP142:%.*]] = bitcast float [[DOTSROA_053_4_VEC_EXTRACT]] to i32 +; POST-PROCESS-NEXT: [[DOTSROA_0256_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 0 +; POST-PROCESS-NEXT: [[TMP116:%.*]] = bitcast float [[DOTSROA_0256_0_VEC_EXTRACT]] to i32 +; POST-PROCESS-NEXT: [[DOTSROA_0256_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 1 +; POST-PROCESS-NEXT: [[TMP117:%.*]] = bitcast float [[DOTSROA_0256_4_VEC_EXTRACT]] to i32 ; POST-PROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; POST-PROCESS-NEXT: [[TMP143:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 -; POST-PROCESS-NEXT: [[TMP144:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 -; POST-PROCESS-NEXT: [[TMP145:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP143]]) -; POST-PROCESS-NEXT: [[TMP146:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP145]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) -; POST-PROCESS-NEXT: [[TMP147:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP146]]) +; POST-PROCESS-NEXT: [[TMP118:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 +; POST-PROCESS-NEXT: [[TMP119:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 +; POST-PROCESS-NEXT: [[TMP120:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP118]]) +; POST-PROCESS-NEXT: [[TMP121:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP120]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; POST-PROCESS-NEXT: [[TMP122:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP121]]) ; POST-PROCESS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison, <3 x i32> [[DOTFCA_0_0_EXTRACT]], 0 ; POST-PROCESS-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]], 0 ; POST-PROCESS-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 ; POST-PROCESS-NEXT: [[TMP149:%.*]] = call i64 @continuation.getAddrAndMD(ptr @ClosestHit.resume.0) ; POST-PROCESS-NEXT: [[TRAV_DATA2_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], i64 [[TMP149]], 5 -; POST-PROCESS-NEXT: store i32 [[TMP1]], ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP148:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: store i32 [[TMP7]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP8]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP9]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP10]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 10), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP11]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 11), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP12]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 12), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP13]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 13), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP14]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 14), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP15]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 15), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP16]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 16), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP17]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 17), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP18]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 18), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP19]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 19), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP20]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 20), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP21]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 21), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP22]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 22), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP23]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 23), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP24]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 24), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP25]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 25), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP26]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 26), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP27]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 27), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP28]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 28), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP29]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 29), align 4 -; POST-PROCESS-NEXT: [[TMP150:%.*]] = inttoptr i32 [[TMP148]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP152:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP150]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP32]], ptr addrspace(21) [[TMP152]], align 4 -; POST-PROCESS-NEXT: [[TMP151:%.*]] = add i32 [[TMP148]], 4 -; POST-PROCESS-NEXT: [[TMP153:%.*]] = inttoptr i32 [[TMP151]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP155:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP153]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP36]], ptr addrspace(21) [[TMP155]], align 4 -; POST-PROCESS-NEXT: [[TMP154:%.*]] = add i32 [[TMP148]], 8 -; POST-PROCESS-NEXT: [[TMP156:%.*]] = inttoptr i32 [[TMP154]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP158:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP156]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP40]], ptr addrspace(21) [[TMP158]], align 4 -; POST-PROCESS-NEXT: [[TMP157:%.*]] = add i32 [[TMP148]], 12 -; POST-PROCESS-NEXT: [[TMP159:%.*]] = inttoptr i32 [[TMP157]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP161:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP159]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP44]], ptr addrspace(21) [[TMP161]], align 4 -; POST-PROCESS-NEXT: [[TMP160:%.*]] = add i32 [[TMP148]], 16 -; POST-PROCESS-NEXT: [[TMP162:%.*]] = inttoptr i32 [[TMP160]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP164:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP162]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP48]], ptr addrspace(21) [[TMP164]], align 4 -; POST-PROCESS-NEXT: [[TMP163:%.*]] = add i32 [[TMP148]], 20 -; POST-PROCESS-NEXT: [[TMP165:%.*]] = inttoptr i32 [[TMP163]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP167:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP165]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP52]], ptr addrspace(21) [[TMP167]], align 4 -; POST-PROCESS-NEXT: [[TMP166:%.*]] = add i32 [[TMP148]], 24 -; POST-PROCESS-NEXT: [[TMP168:%.*]] = inttoptr i32 [[TMP166]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP170:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP168]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP56]], ptr addrspace(21) [[TMP170]], align 4 -; POST-PROCESS-NEXT: [[TMP169:%.*]] = add i32 [[TMP148]], 28 -; POST-PROCESS-NEXT: [[TMP171:%.*]] = inttoptr i32 [[TMP169]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP173:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP171]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP60]], ptr addrspace(21) [[TMP173]], align 4 -; POST-PROCESS-NEXT: [[TMP172:%.*]] = add i32 [[TMP148]], 32 -; POST-PROCESS-NEXT: [[TMP174:%.*]] = inttoptr i32 [[TMP172]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP176:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP174]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP64]], ptr addrspace(21) [[TMP176]], align 4 -; POST-PROCESS-NEXT: [[TMP175:%.*]] = add i32 [[TMP148]], 36 -; POST-PROCESS-NEXT: [[TMP177:%.*]] = inttoptr i32 [[TMP175]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP179:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP177]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP68]], ptr addrspace(21) [[TMP179]], align 4 -; POST-PROCESS-NEXT: [[TMP178:%.*]] = add i32 [[TMP148]], 40 -; POST-PROCESS-NEXT: [[TMP180:%.*]] = inttoptr i32 [[TMP178]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP182:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP180]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP72]], ptr addrspace(21) [[TMP182]], align 4 -; POST-PROCESS-NEXT: [[TMP181:%.*]] = add i32 [[TMP148]], 44 -; POST-PROCESS-NEXT: [[TMP183:%.*]] = inttoptr i32 [[TMP181]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP185:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP183]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP76]], ptr addrspace(21) [[TMP185]], align 4 -; POST-PROCESS-NEXT: [[TMP184:%.*]] = add i32 [[TMP148]], 48 -; POST-PROCESS-NEXT: [[TMP186:%.*]] = inttoptr i32 [[TMP184]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP188:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP186]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP80]], ptr addrspace(21) [[TMP188]], align 4 -; POST-PROCESS-NEXT: [[TMP187:%.*]] = add i32 [[TMP148]], 52 -; POST-PROCESS-NEXT: [[TMP189:%.*]] = inttoptr i32 [[TMP187]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP191:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP189]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP84]], ptr addrspace(21) [[TMP191]], align 4 -; POST-PROCESS-NEXT: [[TMP190:%.*]] = add i32 [[TMP148]], 56 -; POST-PROCESS-NEXT: [[TMP192:%.*]] = inttoptr i32 [[TMP190]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP194:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP192]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP88]], ptr addrspace(21) [[TMP194]], align 4 -; POST-PROCESS-NEXT: [[TMP193:%.*]] = add i32 [[TMP148]], 60 -; POST-PROCESS-NEXT: [[TMP195:%.*]] = inttoptr i32 [[TMP193]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP197:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP195]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP92]], ptr addrspace(21) [[TMP197]], align 4 -; POST-PROCESS-NEXT: [[TMP196:%.*]] = add i32 [[TMP148]], 64 -; POST-PROCESS-NEXT: [[TMP198:%.*]] = inttoptr i32 [[TMP196]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP200:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP198]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP96]], ptr addrspace(21) [[TMP200]], align 4 -; POST-PROCESS-NEXT: [[TMP199:%.*]] = add i32 [[TMP148]], 68 -; POST-PROCESS-NEXT: [[TMP201:%.*]] = inttoptr i32 [[TMP199]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP203:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP201]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP100]], ptr addrspace(21) [[TMP203]], align 4 -; POST-PROCESS-NEXT: [[TMP202:%.*]] = add i32 [[TMP148]], 72 -; POST-PROCESS-NEXT: [[TMP204:%.*]] = inttoptr i32 [[TMP202]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP206:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP204]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP104]], ptr addrspace(21) [[TMP206]], align 4 -; POST-PROCESS-NEXT: [[TMP205:%.*]] = add i32 [[TMP148]], 76 -; POST-PROCESS-NEXT: [[TMP207:%.*]] = inttoptr i32 [[TMP205]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP209:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP207]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP108]], ptr addrspace(21) [[TMP209]], align 4 -; POST-PROCESS-NEXT: [[TMP208:%.*]] = add i32 [[TMP148]], 80 -; POST-PROCESS-NEXT: [[TMP210:%.*]] = inttoptr i32 [[TMP208]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP212:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP210]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP112]], ptr addrspace(21) [[TMP212]], align 4 -; POST-PROCESS-NEXT: [[TMP211:%.*]] = add i32 [[TMP148]], 84 -; POST-PROCESS-NEXT: [[TMP213:%.*]] = inttoptr i32 [[TMP211]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP215:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP213]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP116]], ptr addrspace(21) [[TMP215]], align 4 -; POST-PROCESS-NEXT: [[TMP214:%.*]] = add i32 [[TMP148]], 88 -; POST-PROCESS-NEXT: [[TMP216:%.*]] = inttoptr i32 [[TMP214]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP218:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP216]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP120]], ptr addrspace(21) [[TMP218]], align 4 -; POST-PROCESS-NEXT: [[TMP217:%.*]] = add i32 [[TMP148]], 92 -; POST-PROCESS-NEXT: [[TMP219:%.*]] = inttoptr i32 [[TMP217]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP221:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP219]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP124]], ptr addrspace(21) [[TMP221]], align 4 -; POST-PROCESS-NEXT: [[TMP220:%.*]] = add i32 [[TMP148]], 96 -; POST-PROCESS-NEXT: [[TMP222:%.*]] = inttoptr i32 [[TMP220]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP224:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP222]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP128]], ptr addrspace(21) [[TMP224]], align 4 -; POST-PROCESS-NEXT: [[TMP223:%.*]] = add i32 [[TMP148]], 100 -; POST-PROCESS-NEXT: [[TMP225:%.*]] = inttoptr i32 [[TMP223]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP227:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP225]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP132]], ptr addrspace(21) [[TMP227]], align 4 -; POST-PROCESS-NEXT: [[TMP226:%.*]] = add i32 [[TMP148]], 104 -; POST-PROCESS-NEXT: [[TMP228:%.*]] = inttoptr i32 [[TMP226]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP230:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP228]], i32 0 -; POST-PROCESS-NEXT: store i32 [[TMP136]], ptr addrspace(21) [[TMP230]], align 4 +; POST-PROCESS-NEXT: [[TMP124:%.*]] = inttoptr i32 [[TMP1]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP125:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP124]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP11]], ptr addrspace(21) [[TMP125]], align 4 +; POST-PROCESS-NEXT: [[TMP126:%.*]] = add i32 [[TMP1]], 4 +; POST-PROCESS-NEXT: [[TMP127:%.*]] = inttoptr i32 [[TMP126]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP128:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP127]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP15]], ptr addrspace(21) [[TMP128]], align 4 +; POST-PROCESS-NEXT: [[TMP129:%.*]] = add i32 [[TMP1]], 8 +; POST-PROCESS-NEXT: [[TMP130:%.*]] = inttoptr i32 [[TMP129]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP131:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP130]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP19]], ptr addrspace(21) [[TMP131]], align 4 +; POST-PROCESS-NEXT: [[TMP132:%.*]] = add i32 [[TMP1]], 12 +; POST-PROCESS-NEXT: [[TMP133:%.*]] = inttoptr i32 [[TMP132]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP134:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP133]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP23]], ptr addrspace(21) [[TMP134]], align 4 +; POST-PROCESS-NEXT: [[TMP135:%.*]] = add i32 [[TMP1]], 16 +; POST-PROCESS-NEXT: [[TMP136:%.*]] = inttoptr i32 [[TMP135]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP137:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP136]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP27]], ptr addrspace(21) [[TMP137]], align 4 +; POST-PROCESS-NEXT: [[TMP138:%.*]] = add i32 [[TMP1]], 20 +; POST-PROCESS-NEXT: [[TMP139:%.*]] = inttoptr i32 [[TMP138]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP140:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP139]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP31]], ptr addrspace(21) [[TMP140]], align 4 +; POST-PROCESS-NEXT: [[TMP141:%.*]] = add i32 [[TMP1]], 24 +; POST-PROCESS-NEXT: [[TMP142:%.*]] = inttoptr i32 [[TMP141]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP143:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP142]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP35]], ptr addrspace(21) [[TMP143]], align 4 +; POST-PROCESS-NEXT: [[TMP144:%.*]] = add i32 [[TMP1]], 28 +; POST-PROCESS-NEXT: [[TMP145:%.*]] = inttoptr i32 [[TMP144]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP146:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP145]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP39]], ptr addrspace(21) [[TMP146]], align 4 +; POST-PROCESS-NEXT: [[TMP147:%.*]] = add i32 [[TMP1]], 32 +; POST-PROCESS-NEXT: [[TMP148:%.*]] = inttoptr i32 [[TMP147]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP204:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP148]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP43]], ptr addrspace(21) [[TMP204]], align 4 +; POST-PROCESS-NEXT: [[TMP150:%.*]] = add i32 [[TMP1]], 36 +; POST-PROCESS-NEXT: [[TMP151:%.*]] = inttoptr i32 [[TMP150]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP152:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP151]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP47]], ptr addrspace(21) [[TMP152]], align 4 +; POST-PROCESS-NEXT: [[TMP153:%.*]] = add i32 [[TMP1]], 40 +; POST-PROCESS-NEXT: [[TMP154:%.*]] = inttoptr i32 [[TMP153]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP155:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP154]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP51]], ptr addrspace(21) [[TMP155]], align 4 +; POST-PROCESS-NEXT: [[TMP156:%.*]] = add i32 [[TMP1]], 44 +; POST-PROCESS-NEXT: [[TMP157:%.*]] = inttoptr i32 [[TMP156]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP158:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP157]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP55]], ptr addrspace(21) [[TMP158]], align 4 +; POST-PROCESS-NEXT: [[TMP159:%.*]] = add i32 [[TMP1]], 48 +; POST-PROCESS-NEXT: [[TMP160:%.*]] = inttoptr i32 [[TMP159]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP161:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP160]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP59]], ptr addrspace(21) [[TMP161]], align 4 +; POST-PROCESS-NEXT: [[TMP162:%.*]] = add i32 [[TMP1]], 52 +; POST-PROCESS-NEXT: [[TMP163:%.*]] = inttoptr i32 [[TMP162]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP164:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP163]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP63]], ptr addrspace(21) [[TMP164]], align 4 +; POST-PROCESS-NEXT: [[TMP165:%.*]] = add i32 [[TMP1]], 56 +; POST-PROCESS-NEXT: [[TMP166:%.*]] = inttoptr i32 [[TMP165]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP167:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP166]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP67]], ptr addrspace(21) [[TMP167]], align 4 +; POST-PROCESS-NEXT: [[TMP168:%.*]] = add i32 [[TMP1]], 60 +; POST-PROCESS-NEXT: [[TMP169:%.*]] = inttoptr i32 [[TMP168]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP170:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP169]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP71]], ptr addrspace(21) [[TMP170]], align 4 +; POST-PROCESS-NEXT: [[TMP171:%.*]] = add i32 [[TMP1]], 64 +; POST-PROCESS-NEXT: [[TMP172:%.*]] = inttoptr i32 [[TMP171]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP173:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP172]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP75]], ptr addrspace(21) [[TMP173]], align 4 +; POST-PROCESS-NEXT: [[TMP174:%.*]] = add i32 [[TMP1]], 68 +; POST-PROCESS-NEXT: [[TMP175:%.*]] = inttoptr i32 [[TMP174]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP176:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP175]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP79]], ptr addrspace(21) [[TMP176]], align 4 +; POST-PROCESS-NEXT: [[TMP177:%.*]] = add i32 [[TMP1]], 72 +; POST-PROCESS-NEXT: [[TMP178:%.*]] = inttoptr i32 [[TMP177]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP179:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP178]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP83]], ptr addrspace(21) [[TMP179]], align 4 +; POST-PROCESS-NEXT: [[TMP180:%.*]] = add i32 [[TMP1]], 76 +; POST-PROCESS-NEXT: [[TMP181:%.*]] = inttoptr i32 [[TMP180]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP182:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP181]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP87]], ptr addrspace(21) [[TMP182]], align 4 +; POST-PROCESS-NEXT: [[TMP183:%.*]] = add i32 [[TMP1]], 80 +; POST-PROCESS-NEXT: [[TMP184:%.*]] = inttoptr i32 [[TMP183]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP185:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP184]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP91]], ptr addrspace(21) [[TMP185]], align 4 +; POST-PROCESS-NEXT: [[TMP186:%.*]] = add i32 [[TMP1]], 84 +; POST-PROCESS-NEXT: [[TMP187:%.*]] = inttoptr i32 [[TMP186]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP188:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP187]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP95]], ptr addrspace(21) [[TMP188]], align 4 +; POST-PROCESS-NEXT: [[TMP189:%.*]] = add i32 [[TMP1]], 88 +; POST-PROCESS-NEXT: [[TMP190:%.*]] = inttoptr i32 [[TMP189]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP191:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP190]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP99]], ptr addrspace(21) [[TMP191]], align 4 +; POST-PROCESS-NEXT: [[TMP192:%.*]] = add i32 [[TMP1]], 92 +; POST-PROCESS-NEXT: [[TMP193:%.*]] = inttoptr i32 [[TMP192]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP194:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP193]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP103]], ptr addrspace(21) [[TMP194]], align 4 +; POST-PROCESS-NEXT: [[TMP195:%.*]] = add i32 [[TMP1]], 96 +; POST-PROCESS-NEXT: [[TMP196:%.*]] = inttoptr i32 [[TMP195]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP197:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP196]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP107]], ptr addrspace(21) [[TMP197]], align 4 +; POST-PROCESS-NEXT: [[TMP198:%.*]] = add i32 [[TMP1]], 100 +; POST-PROCESS-NEXT: [[TMP199:%.*]] = inttoptr i32 [[TMP198]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP200:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP199]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP111]], ptr addrspace(21) [[TMP200]], align 4 +; POST-PROCESS-NEXT: [[TMP201:%.*]] = add i32 [[TMP1]], 104 +; POST-PROCESS-NEXT: [[TMP202:%.*]] = inttoptr i32 [[TMP201]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP203:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP202]], i32 0 +; POST-PROCESS-NEXT: store i32 [[TMP115]], ptr addrspace(21) [[TMP203]], align 4 +; POST-PROCESS-NEXT: [[DOTFCA_0_INSERT54:%.*]] = insertvalue [30 x i32] poison, i32 [[TMP1]], 0 +; POST-PROCESS-NEXT: [[DOTFCA_1_INSERT57:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT54]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; POST-PROCESS-NEXT: [[DOTFCA_2_INSERT60:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT57]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; POST-PROCESS-NEXT: [[DOTFCA_3_INSERT63:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT60]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; POST-PROCESS-NEXT: [[DOTFCA_4_INSERT66:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT63]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; POST-PROCESS-NEXT: [[DOTFCA_5_INSERT69:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT66]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; POST-PROCESS-NEXT: [[DOTFCA_6_INSERT72:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT69]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; POST-PROCESS-NEXT: [[DOTFCA_7_INSERT75:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT72]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; POST-PROCESS-NEXT: [[DOTFCA_8_INSERT78:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT75]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; POST-PROCESS-NEXT: [[DOTFCA_9_INSERT81:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT78]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; POST-PROCESS-NEXT: [[DOTFCA_10_INSERT84:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT81]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; POST-PROCESS-NEXT: [[DOTFCA_11_INSERT87:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT84]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; POST-PROCESS-NEXT: [[DOTFCA_12_INSERT90:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT87]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; POST-PROCESS-NEXT: [[DOTFCA_13_INSERT93:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT90]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; POST-PROCESS-NEXT: [[DOTFCA_14_INSERT96:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT93]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; POST-PROCESS-NEXT: [[DOTFCA_15_INSERT99:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT96]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; POST-PROCESS-NEXT: [[DOTFCA_16_INSERT102:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT99]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; POST-PROCESS-NEXT: [[DOTFCA_17_INSERT105:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT102]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; POST-PROCESS-NEXT: [[DOTFCA_18_INSERT108:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT105]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; POST-PROCESS-NEXT: [[DOTFCA_19_INSERT111:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT108]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; POST-PROCESS-NEXT: [[DOTFCA_20_INSERT114:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT111]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; POST-PROCESS-NEXT: [[DOTFCA_21_INSERT117:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT114]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; POST-PROCESS-NEXT: [[DOTFCA_22_INSERT120:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT117]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; POST-PROCESS-NEXT: [[DOTFCA_23_INSERT123:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT120]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; POST-PROCESS-NEXT: [[DOTFCA_24_INSERT126:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT123]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; POST-PROCESS-NEXT: [[DOTFCA_25_INSERT129:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT126]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; POST-PROCESS-NEXT: [[DOTFCA_26_INSERT132:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT129]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; POST-PROCESS-NEXT: [[DOTFCA_27_INSERT135:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT132]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; POST-PROCESS-NEXT: [[DOTFCA_28_INSERT138:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT135]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; POST-PROCESS-NEXT: [[DOTFCA_29_INSERT141:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT138]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 ; POST-PROCESS-NEXT: [[TMP229:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: call void (i64, i64, ...) @continuation.waitContinue(i64 4, i64 -1, i32 [[TMP229]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]]), !continuation.registercount [[META17]], !continuation.returnedRegistercount [[META17]] +; POST-PROCESS-NEXT: call void (...) @lgc.ilcps.waitContinue(i64 4, i64 -1, i32 [[TMP229]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]], [10 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT141]]) ; POST-PROCESS-NEXT: unreachable ; ; ; POST-PROCESS-LABEL: define dso_local void @ClosestHit.resume.0( -; POST-PROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[TMP0:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP1:%.*]]) !lgc.rt.shaderstage [[META24]] !continuation.registercount [[META17]] !continuation [[META25]] { +; POST-PROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[TMP0:%.*]], { [[STRUCT_DISPATCHSYSTEMDATA:%.*]], [23 x i32], [30 x i32] } [[TMP1:%.*]]) !lgc.rt.shaderstage [[META24]] !continuation [[META25]] { ; POST-PROCESS-NEXT: entryresume.0: ; POST-PROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; POST-PROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; POST-PROCESS-NEXT: [[TMP27:%.*]] = load i32, ptr [[CSP]], align 4 ; POST-PROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP27]], -120 -; POST-PROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 -; POST-PROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 -; POST-PROCESS-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 -; POST-PROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 10), align 4 -; POST-PROCESS-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 11), align 4 -; POST-PROCESS-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 12), align 4 -; POST-PROCESS-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 13), align 4 -; POST-PROCESS-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 14), align 4 -; POST-PROCESS-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 15), align 4 -; POST-PROCESS-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 16), align 4 -; POST-PROCESS-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 17), align 4 -; POST-PROCESS-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 18), align 4 -; POST-PROCESS-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 19), align 4 -; POST-PROCESS-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 20), align 4 -; POST-PROCESS-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 21), align 4 -; POST-PROCESS-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 22), align 4 -; POST-PROCESS-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 23), align 4 -; POST-PROCESS-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 24), align 4 -; POST-PROCESS-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 25), align 4 -; POST-PROCESS-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 26), align 4 -; POST-PROCESS-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 27), align 4 -; POST-PROCESS-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 28), align 4 -; POST-PROCESS-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 29), align 4 -; POST-PROCESS-NEXT: [[TMP28:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP31:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP28]], i32 0 -; POST-PROCESS-NEXT: [[TMP29:%.*]] = load i32, ptr addrspace(21) [[TMP31]], align 4 +; POST-PROCESS-NEXT: [[TMP4:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [23 x i32], [30 x i32] } [[TMP1]], 2 +; POST-PROCESS-NEXT: [[TMP3:%.*]] = extractvalue [30 x i32] [[TMP4]], 0 +; POST-PROCESS-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP4]], 1 +; POST-PROCESS-NEXT: [[DOTFCA_2_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP4]], 2 +; POST-PROCESS-NEXT: [[DOTFCA_3_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP4]], 3 +; POST-PROCESS-NEXT: [[DOTFCA_4_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP4]], 4 +; POST-PROCESS-NEXT: [[DOTFCA_5_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP4]], 5 +; POST-PROCESS-NEXT: [[DOTFCA_6_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP4]], 6 +; POST-PROCESS-NEXT: [[DOTFCA_7_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP4]], 7 +; POST-PROCESS-NEXT: [[DOTFCA_8_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP4]], 8 +; POST-PROCESS-NEXT: [[DOTFCA_9_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP4]], 9 +; POST-PROCESS-NEXT: [[DOTFCA_10_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP4]], 10 +; POST-PROCESS-NEXT: [[DOTFCA_11_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP4]], 11 +; POST-PROCESS-NEXT: [[DOTFCA_12_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP4]], 12 +; POST-PROCESS-NEXT: [[DOTFCA_13_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP4]], 13 +; POST-PROCESS-NEXT: [[DOTFCA_14_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP4]], 14 +; POST-PROCESS-NEXT: [[DOTFCA_15_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP4]], 15 +; POST-PROCESS-NEXT: [[DOTFCA_16_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP4]], 16 +; POST-PROCESS-NEXT: [[DOTFCA_17_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP4]], 17 +; POST-PROCESS-NEXT: [[DOTFCA_18_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP4]], 18 +; POST-PROCESS-NEXT: [[DOTFCA_19_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP4]], 19 +; POST-PROCESS-NEXT: [[DOTFCA_20_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP4]], 20 +; POST-PROCESS-NEXT: [[DOTFCA_21_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP4]], 21 +; POST-PROCESS-NEXT: [[DOTFCA_22_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP4]], 22 +; POST-PROCESS-NEXT: [[DOTFCA_23_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP4]], 23 +; POST-PROCESS-NEXT: [[DOTFCA_24_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP4]], 24 +; POST-PROCESS-NEXT: [[TMP22:%.*]] = extractvalue [30 x i32] [[TMP4]], 25 +; POST-PROCESS-NEXT: [[TMP23:%.*]] = extractvalue [30 x i32] [[TMP4]], 26 +; POST-PROCESS-NEXT: [[TMP24:%.*]] = extractvalue [30 x i32] [[TMP4]], 27 +; POST-PROCESS-NEXT: [[DOTFCA_28_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP4]], 28 +; POST-PROCESS-NEXT: [[DOTFCA_29_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP4]], 29 +; POST-PROCESS-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP5]], i32 0 +; POST-PROCESS-NEXT: [[TMP29:%.*]] = load i32, ptr addrspace(21) [[TMP6]], align 4 ; POST-PROCESS-NEXT: [[TMP30:%.*]] = add i32 [[TMP3]], 4 -; POST-PROCESS-NEXT: [[TMP32:%.*]] = inttoptr i32 [[TMP30]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP35:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP32]], i32 0 -; POST-PROCESS-NEXT: [[TMP33:%.*]] = load i32, ptr addrspace(21) [[TMP35]], align 4 +; POST-PROCESS-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP30]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP9]], i32 0 +; POST-PROCESS-NEXT: [[TMP33:%.*]] = load i32, ptr addrspace(21) [[TMP10]], align 4 ; POST-PROCESS-NEXT: [[TMP34:%.*]] = add i32 [[TMP3]], 8 -; POST-PROCESS-NEXT: [[TMP36:%.*]] = inttoptr i32 [[TMP34]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP39:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP36]], i32 0 -; POST-PROCESS-NEXT: [[TMP37:%.*]] = load i32, ptr addrspace(21) [[TMP39]], align 4 +; POST-PROCESS-NEXT: [[TMP13:%.*]] = inttoptr i32 [[TMP34]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP13]], i32 0 +; POST-PROCESS-NEXT: [[TMP37:%.*]] = load i32, ptr addrspace(21) [[TMP14]], align 4 ; POST-PROCESS-NEXT: [[TMP38:%.*]] = add i32 [[TMP3]], 12 -; POST-PROCESS-NEXT: [[TMP40:%.*]] = inttoptr i32 [[TMP38]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP40]], i32 0 -; POST-PROCESS-NEXT: [[TMP41:%.*]] = load i32, ptr addrspace(21) [[TMP43]], align 4 +; POST-PROCESS-NEXT: [[TMP17:%.*]] = inttoptr i32 [[TMP38]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP17]], i32 0 +; POST-PROCESS-NEXT: [[TMP41:%.*]] = load i32, ptr addrspace(21) [[TMP18]], align 4 ; POST-PROCESS-NEXT: [[TMP42:%.*]] = add i32 [[TMP3]], 16 -; POST-PROCESS-NEXT: [[TMP44:%.*]] = inttoptr i32 [[TMP42]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP47:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP44]], i32 0 -; POST-PROCESS-NEXT: [[TMP45:%.*]] = load i32, ptr addrspace(21) [[TMP47]], align 4 +; POST-PROCESS-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP42]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP21]], i32 0 +; POST-PROCESS-NEXT: [[TMP45:%.*]] = load i32, ptr addrspace(21) [[TMP28]], align 4 ; POST-PROCESS-NEXT: [[TMP46:%.*]] = add i32 [[TMP3]], 20 -; POST-PROCESS-NEXT: [[TMP48:%.*]] = inttoptr i32 [[TMP46]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP51:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP48]], i32 0 -; POST-PROCESS-NEXT: [[TMP49:%.*]] = load i32, ptr addrspace(21) [[TMP51]], align 4 +; POST-PROCESS-NEXT: [[TMP25:%.*]] = inttoptr i32 [[TMP46]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP25]], i32 0 +; POST-PROCESS-NEXT: [[TMP49:%.*]] = load i32, ptr addrspace(21) [[TMP26]], align 4 ; POST-PROCESS-NEXT: [[TMP50:%.*]] = add i32 [[TMP3]], 24 -; POST-PROCESS-NEXT: [[TMP52:%.*]] = inttoptr i32 [[TMP50]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP55:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP52]], i32 0 -; POST-PROCESS-NEXT: [[TMP53:%.*]] = load i32, ptr addrspace(21) [[TMP55]], align 4 +; POST-PROCESS-NEXT: [[TMP31:%.*]] = inttoptr i32 [[TMP50]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP31]], i32 0 +; POST-PROCESS-NEXT: [[TMP53:%.*]] = load i32, ptr addrspace(21) [[TMP32]], align 4 ; POST-PROCESS-NEXT: [[TMP54:%.*]] = add i32 [[TMP3]], 28 -; POST-PROCESS-NEXT: [[TMP56:%.*]] = inttoptr i32 [[TMP54]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP59:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP56]], i32 0 -; POST-PROCESS-NEXT: [[TMP57:%.*]] = load i32, ptr addrspace(21) [[TMP59]], align 4 +; POST-PROCESS-NEXT: [[TMP35:%.*]] = inttoptr i32 [[TMP54]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP36:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP35]], i32 0 +; POST-PROCESS-NEXT: [[TMP57:%.*]] = load i32, ptr addrspace(21) [[TMP36]], align 4 ; POST-PROCESS-NEXT: [[TMP58:%.*]] = add i32 [[TMP3]], 32 -; POST-PROCESS-NEXT: [[TMP60:%.*]] = inttoptr i32 [[TMP58]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP63:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP60]], i32 0 -; POST-PROCESS-NEXT: [[TMP61:%.*]] = load i32, ptr addrspace(21) [[TMP63]], align 4 +; POST-PROCESS-NEXT: [[TMP39:%.*]] = inttoptr i32 [[TMP58]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP40:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP39]], i32 0 +; POST-PROCESS-NEXT: [[TMP61:%.*]] = load i32, ptr addrspace(21) [[TMP40]], align 4 ; POST-PROCESS-NEXT: [[TMP62:%.*]] = add i32 [[TMP3]], 36 -; POST-PROCESS-NEXT: [[TMP64:%.*]] = inttoptr i32 [[TMP62]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP67:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP64]], i32 0 -; POST-PROCESS-NEXT: [[TMP65:%.*]] = load i32, ptr addrspace(21) [[TMP67]], align 4 +; POST-PROCESS-NEXT: [[TMP43:%.*]] = inttoptr i32 [[TMP62]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP43]], i32 0 +; POST-PROCESS-NEXT: [[TMP65:%.*]] = load i32, ptr addrspace(21) [[TMP44]], align 4 ; POST-PROCESS-NEXT: [[TMP66:%.*]] = add i32 [[TMP3]], 40 -; POST-PROCESS-NEXT: [[TMP68:%.*]] = inttoptr i32 [[TMP66]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP71:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP68]], i32 0 -; POST-PROCESS-NEXT: [[TMP69:%.*]] = load i32, ptr addrspace(21) [[TMP71]], align 4 +; POST-PROCESS-NEXT: [[TMP47:%.*]] = inttoptr i32 [[TMP66]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP47]], i32 0 +; POST-PROCESS-NEXT: [[TMP69:%.*]] = load i32, ptr addrspace(21) [[TMP48]], align 4 ; POST-PROCESS-NEXT: [[TMP70:%.*]] = add i32 [[TMP3]], 44 -; POST-PROCESS-NEXT: [[TMP72:%.*]] = inttoptr i32 [[TMP70]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP75:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP72]], i32 0 -; POST-PROCESS-NEXT: [[TMP73:%.*]] = load i32, ptr addrspace(21) [[TMP75]], align 4 +; POST-PROCESS-NEXT: [[TMP51:%.*]] = inttoptr i32 [[TMP70]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP52:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP51]], i32 0 +; POST-PROCESS-NEXT: [[TMP73:%.*]] = load i32, ptr addrspace(21) [[TMP52]], align 4 ; POST-PROCESS-NEXT: [[TMP74:%.*]] = add i32 [[TMP3]], 48 -; POST-PROCESS-NEXT: [[TMP76:%.*]] = inttoptr i32 [[TMP74]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP79:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP76]], i32 0 -; POST-PROCESS-NEXT: [[TMP77:%.*]] = load i32, ptr addrspace(21) [[TMP79]], align 4 +; POST-PROCESS-NEXT: [[TMP55:%.*]] = inttoptr i32 [[TMP74]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP56:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP55]], i32 0 +; POST-PROCESS-NEXT: [[TMP77:%.*]] = load i32, ptr addrspace(21) [[TMP56]], align 4 ; POST-PROCESS-NEXT: [[TMP78:%.*]] = add i32 [[TMP3]], 52 -; POST-PROCESS-NEXT: [[TMP80:%.*]] = inttoptr i32 [[TMP78]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP83:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP80]], i32 0 -; POST-PROCESS-NEXT: [[TMP81:%.*]] = load i32, ptr addrspace(21) [[TMP83]], align 4 +; POST-PROCESS-NEXT: [[TMP59:%.*]] = inttoptr i32 [[TMP78]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP60:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP59]], i32 0 +; POST-PROCESS-NEXT: [[TMP81:%.*]] = load i32, ptr addrspace(21) [[TMP60]], align 4 ; POST-PROCESS-NEXT: [[TMP82:%.*]] = add i32 [[TMP3]], 56 -; POST-PROCESS-NEXT: [[TMP84:%.*]] = inttoptr i32 [[TMP82]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP87:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP84]], i32 0 -; POST-PROCESS-NEXT: [[TMP85:%.*]] = load i32, ptr addrspace(21) [[TMP87]], align 4 +; POST-PROCESS-NEXT: [[TMP63:%.*]] = inttoptr i32 [[TMP82]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP64:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP63]], i32 0 +; POST-PROCESS-NEXT: [[TMP85:%.*]] = load i32, ptr addrspace(21) [[TMP64]], align 4 ; POST-PROCESS-NEXT: [[TMP86:%.*]] = add i32 [[TMP3]], 60 -; POST-PROCESS-NEXT: [[TMP88:%.*]] = inttoptr i32 [[TMP86]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP91:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP88]], i32 0 -; POST-PROCESS-NEXT: [[TMP89:%.*]] = load i32, ptr addrspace(21) [[TMP91]], align 4 +; POST-PROCESS-NEXT: [[TMP67:%.*]] = inttoptr i32 [[TMP86]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP68:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP67]], i32 0 +; POST-PROCESS-NEXT: [[TMP89:%.*]] = load i32, ptr addrspace(21) [[TMP68]], align 4 ; POST-PROCESS-NEXT: [[TMP90:%.*]] = add i32 [[TMP3]], 64 -; POST-PROCESS-NEXT: [[TMP92:%.*]] = inttoptr i32 [[TMP90]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP95:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP92]], i32 0 -; POST-PROCESS-NEXT: [[TMP93:%.*]] = load i32, ptr addrspace(21) [[TMP95]], align 4 +; POST-PROCESS-NEXT: [[TMP71:%.*]] = inttoptr i32 [[TMP90]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP72:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP71]], i32 0 +; POST-PROCESS-NEXT: [[TMP93:%.*]] = load i32, ptr addrspace(21) [[TMP72]], align 4 ; POST-PROCESS-NEXT: [[TMP94:%.*]] = add i32 [[TMP3]], 68 -; POST-PROCESS-NEXT: [[TMP96:%.*]] = inttoptr i32 [[TMP94]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP99:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP96]], i32 0 -; POST-PROCESS-NEXT: [[TMP97:%.*]] = load i32, ptr addrspace(21) [[TMP99]], align 4 +; POST-PROCESS-NEXT: [[TMP75:%.*]] = inttoptr i32 [[TMP94]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP76:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP75]], i32 0 +; POST-PROCESS-NEXT: [[TMP97:%.*]] = load i32, ptr addrspace(21) [[TMP76]], align 4 ; POST-PROCESS-NEXT: [[TMP98:%.*]] = add i32 [[TMP3]], 72 -; POST-PROCESS-NEXT: [[TMP100:%.*]] = inttoptr i32 [[TMP98]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP103:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP100]], i32 0 -; POST-PROCESS-NEXT: [[TMP101:%.*]] = load i32, ptr addrspace(21) [[TMP103]], align 4 +; POST-PROCESS-NEXT: [[TMP79:%.*]] = inttoptr i32 [[TMP98]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP80:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP79]], i32 0 +; POST-PROCESS-NEXT: [[TMP101:%.*]] = load i32, ptr addrspace(21) [[TMP80]], align 4 ; POST-PROCESS-NEXT: [[TMP102:%.*]] = add i32 [[TMP3]], 76 -; POST-PROCESS-NEXT: [[TMP104:%.*]] = inttoptr i32 [[TMP102]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP107:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP104]], i32 0 -; POST-PROCESS-NEXT: [[TMP105:%.*]] = load i32, ptr addrspace(21) [[TMP107]], align 4 +; POST-PROCESS-NEXT: [[TMP83:%.*]] = inttoptr i32 [[TMP102]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP84:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP83]], i32 0 +; POST-PROCESS-NEXT: [[TMP105:%.*]] = load i32, ptr addrspace(21) [[TMP84]], align 4 ; POST-PROCESS-NEXT: [[TMP106:%.*]] = add i32 [[TMP3]], 80 -; POST-PROCESS-NEXT: [[TMP108:%.*]] = inttoptr i32 [[TMP106]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP111:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP108]], i32 0 -; POST-PROCESS-NEXT: [[TMP109:%.*]] = load i32, ptr addrspace(21) [[TMP111]], align 4 +; POST-PROCESS-NEXT: [[TMP87:%.*]] = inttoptr i32 [[TMP106]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP88:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP87]], i32 0 +; POST-PROCESS-NEXT: [[TMP109:%.*]] = load i32, ptr addrspace(21) [[TMP88]], align 4 ; POST-PROCESS-NEXT: [[TMP110:%.*]] = add i32 [[TMP3]], 84 -; POST-PROCESS-NEXT: [[TMP112:%.*]] = inttoptr i32 [[TMP110]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP115:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP112]], i32 0 -; POST-PROCESS-NEXT: [[TMP113:%.*]] = load i32, ptr addrspace(21) [[TMP115]], align 4 +; POST-PROCESS-NEXT: [[TMP91:%.*]] = inttoptr i32 [[TMP110]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP92:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP91]], i32 0 +; POST-PROCESS-NEXT: [[TMP113:%.*]] = load i32, ptr addrspace(21) [[TMP92]], align 4 ; POST-PROCESS-NEXT: [[TMP114:%.*]] = add i32 [[TMP3]], 88 -; POST-PROCESS-NEXT: [[TMP116:%.*]] = inttoptr i32 [[TMP114]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP119:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP116]], i32 0 -; POST-PROCESS-NEXT: [[TMP117:%.*]] = load i32, ptr addrspace(21) [[TMP119]], align 4 +; POST-PROCESS-NEXT: [[TMP95:%.*]] = inttoptr i32 [[TMP114]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP96:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP95]], i32 0 +; POST-PROCESS-NEXT: [[TMP117:%.*]] = load i32, ptr addrspace(21) [[TMP96]], align 4 ; POST-PROCESS-NEXT: [[TMP118:%.*]] = add i32 [[TMP3]], 92 -; POST-PROCESS-NEXT: [[TMP120:%.*]] = inttoptr i32 [[TMP118]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP123:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP120]], i32 0 -; POST-PROCESS-NEXT: [[TMP121:%.*]] = load i32, ptr addrspace(21) [[TMP123]], align 4 +; POST-PROCESS-NEXT: [[TMP99:%.*]] = inttoptr i32 [[TMP118]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP100:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP99]], i32 0 +; POST-PROCESS-NEXT: [[TMP121:%.*]] = load i32, ptr addrspace(21) [[TMP100]], align 4 ; POST-PROCESS-NEXT: [[TMP122:%.*]] = add i32 [[TMP3]], 96 -; POST-PROCESS-NEXT: [[TMP124:%.*]] = inttoptr i32 [[TMP122]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP127:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP124]], i32 0 -; POST-PROCESS-NEXT: [[TMP125:%.*]] = load i32, ptr addrspace(21) [[TMP127]], align 4 +; POST-PROCESS-NEXT: [[TMP103:%.*]] = inttoptr i32 [[TMP122]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP104:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP103]], i32 0 +; POST-PROCESS-NEXT: [[TMP125:%.*]] = load i32, ptr addrspace(21) [[TMP104]], align 4 ; POST-PROCESS-NEXT: [[TMP126:%.*]] = add i32 [[TMP3]], 100 -; POST-PROCESS-NEXT: [[TMP128:%.*]] = inttoptr i32 [[TMP126]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP131:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP128]], i32 0 -; POST-PROCESS-NEXT: [[TMP129:%.*]] = load i32, ptr addrspace(21) [[TMP131]], align 4 +; POST-PROCESS-NEXT: [[TMP107:%.*]] = inttoptr i32 [[TMP126]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP108:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP107]], i32 0 +; POST-PROCESS-NEXT: [[TMP129:%.*]] = load i32, ptr addrspace(21) [[TMP108]], align 4 ; POST-PROCESS-NEXT: [[TMP130:%.*]] = add i32 [[TMP3]], 104 -; POST-PROCESS-NEXT: [[TMP132:%.*]] = inttoptr i32 [[TMP130]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP136:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP132]], i32 0 -; POST-PROCESS-NEXT: [[TMP133:%.*]] = load i32, ptr addrspace(21) [[TMP136]], align 4 -; POST-PROCESS-NEXT: [[TMP134:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP1]], 0 +; POST-PROCESS-NEXT: [[TMP111:%.*]] = inttoptr i32 [[TMP130]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP116:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP111]], i32 0 +; POST-PROCESS-NEXT: [[TMP133:%.*]] = load i32, ptr addrspace(21) [[TMP116]], align 4 +; POST-PROCESS-NEXT: [[TMP112:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [23 x i32], [30 x i32] } [[TMP1]], 0 +; POST-PROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP112]], 0 ; POST-PROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; POST-PROCESS-NEXT: [[TMP135:%.*]] = add i32 [[TMP2]], 116 -; POST-PROCESS-NEXT: [[TMP137:%.*]] = inttoptr i32 [[TMP135]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP139:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP137]], i32 0 -; POST-PROCESS-NEXT: [[DOTRELOAD:%.*]] = load i32, ptr addrspace(21) [[TMP139]], align 4 +; POST-PROCESS-NEXT: [[TMP120:%.*]] = inttoptr i32 [[TMP135]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP115:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP120]], i32 0 +; POST-PROCESS-NEXT: [[TMP141:%.*]] = load i32, ptr addrspace(21) [[TMP115]], align 4 ; POST-PROCESS-NEXT: [[TMP138:%.*]] = add i32 [[TMP2]], 108 -; POST-PROCESS-NEXT: [[TMP142:%.*]] = inttoptr i32 [[TMP138]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP140:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP142]], i32 0 +; POST-PROCESS-NEXT: [[TMP123:%.*]] = inttoptr i32 [[TMP138]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP140:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP123]], i32 0 ; POST-PROCESS-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(21) [[TMP140]], align 4 -; POST-PROCESS-NEXT: store i32 [[DOTRELOAD]], ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: [[TMP141:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-NEXT: store i32 [[TMP4]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP5]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP6]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP7]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 10), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP8]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 11), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP9]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 12), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP10]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 13), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP11]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 14), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP12]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 15), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP13]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 16), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP14]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 17), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP15]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 18), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP16]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 19), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP17]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 20), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP18]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 21), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP19]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 22), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP20]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 23), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP21]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 24), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP22]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 25), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP23]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 26), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP24]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 27), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP25]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 28), align 4 -; POST-PROCESS-NEXT: store i32 [[TMP26]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 29), align 4 -; POST-PROCESS-NEXT: [[TMP145:%.*]] = inttoptr i32 [[TMP141]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP143:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP145]], i32 0 +; POST-PROCESS-NEXT: [[TMP119:%.*]] = inttoptr i32 [[TMP141]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP143:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP119]], i32 0 ; POST-PROCESS-NEXT: store i32 [[TMP29]], ptr addrspace(21) [[TMP143]], align 4 ; POST-PROCESS-NEXT: [[TMP144:%.*]] = add i32 [[TMP141]], 4 -; POST-PROCESS-NEXT: [[TMP148:%.*]] = inttoptr i32 [[TMP144]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP146:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP148]], i32 0 +; POST-PROCESS-NEXT: [[TMP124:%.*]] = inttoptr i32 [[TMP144]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP146:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP124]], i32 0 ; POST-PROCESS-NEXT: store i32 [[TMP33]], ptr addrspace(21) [[TMP146]], align 4 ; POST-PROCESS-NEXT: [[TMP147:%.*]] = add i32 [[TMP141]], 8 -; POST-PROCESS-NEXT: [[TMP151:%.*]] = inttoptr i32 [[TMP147]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP149:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP151]], i32 0 +; POST-PROCESS-NEXT: [[TMP127:%.*]] = inttoptr i32 [[TMP147]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP149:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP127]], i32 0 ; POST-PROCESS-NEXT: store i32 [[TMP37]], ptr addrspace(21) [[TMP149]], align 4 ; POST-PROCESS-NEXT: [[TMP150:%.*]] = add i32 [[TMP141]], 12 -; POST-PROCESS-NEXT: [[TMP154:%.*]] = inttoptr i32 [[TMP150]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP152:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP154]], i32 0 +; POST-PROCESS-NEXT: [[TMP128:%.*]] = inttoptr i32 [[TMP150]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP152:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP128]], i32 0 ; POST-PROCESS-NEXT: store i32 [[TMP41]], ptr addrspace(21) [[TMP152]], align 4 ; POST-PROCESS-NEXT: [[TMP153:%.*]] = add i32 [[TMP141]], 16 -; POST-PROCESS-NEXT: [[TMP157:%.*]] = inttoptr i32 [[TMP153]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP155:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP157]], i32 0 +; POST-PROCESS-NEXT: [[TMP131:%.*]] = inttoptr i32 [[TMP153]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP155:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP131]], i32 0 ; POST-PROCESS-NEXT: store i32 [[TMP45]], ptr addrspace(21) [[TMP155]], align 4 ; POST-PROCESS-NEXT: [[TMP156:%.*]] = add i32 [[TMP141]], 20 -; POST-PROCESS-NEXT: [[TMP160:%.*]] = inttoptr i32 [[TMP156]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP158:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP160]], i32 0 +; POST-PROCESS-NEXT: [[TMP134:%.*]] = inttoptr i32 [[TMP156]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP158:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP134]], i32 0 ; POST-PROCESS-NEXT: store i32 [[TMP49]], ptr addrspace(21) [[TMP158]], align 4 ; POST-PROCESS-NEXT: [[TMP159:%.*]] = add i32 [[TMP141]], 24 -; POST-PROCESS-NEXT: [[TMP163:%.*]] = inttoptr i32 [[TMP159]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP161:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP163]], i32 0 +; POST-PROCESS-NEXT: [[TMP137:%.*]] = inttoptr i32 [[TMP159]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP161:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP137]], i32 0 ; POST-PROCESS-NEXT: store i32 [[TMP53]], ptr addrspace(21) [[TMP161]], align 4 ; POST-PROCESS-NEXT: [[TMP162:%.*]] = add i32 [[TMP141]], 28 -; POST-PROCESS-NEXT: [[TMP166:%.*]] = inttoptr i32 [[TMP162]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP164:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP166]], i32 0 +; POST-PROCESS-NEXT: [[TMP142:%.*]] = inttoptr i32 [[TMP162]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP164:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP142]], i32 0 ; POST-PROCESS-NEXT: store i32 [[TMP57]], ptr addrspace(21) [[TMP164]], align 4 ; POST-PROCESS-NEXT: [[TMP165:%.*]] = add i32 [[TMP141]], 32 -; POST-PROCESS-NEXT: [[TMP169:%.*]] = inttoptr i32 [[TMP165]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP167:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP169]], i32 0 +; POST-PROCESS-NEXT: [[TMP145:%.*]] = inttoptr i32 [[TMP165]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP167:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP145]], i32 0 ; POST-PROCESS-NEXT: store i32 [[TMP61]], ptr addrspace(21) [[TMP167]], align 4 ; POST-PROCESS-NEXT: [[TMP168:%.*]] = add i32 [[TMP141]], 36 -; POST-PROCESS-NEXT: [[TMP172:%.*]] = inttoptr i32 [[TMP168]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP170:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP172]], i32 0 +; POST-PROCESS-NEXT: [[TMP148:%.*]] = inttoptr i32 [[TMP168]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP170:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP148]], i32 0 ; POST-PROCESS-NEXT: store i32 [[TMP65]], ptr addrspace(21) [[TMP170]], align 4 ; POST-PROCESS-NEXT: [[TMP171:%.*]] = add i32 [[TMP141]], 40 -; POST-PROCESS-NEXT: [[TMP175:%.*]] = inttoptr i32 [[TMP171]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP173:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP175]], i32 0 +; POST-PROCESS-NEXT: [[TMP151:%.*]] = inttoptr i32 [[TMP171]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP173:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP151]], i32 0 ; POST-PROCESS-NEXT: store i32 [[TMP69]], ptr addrspace(21) [[TMP173]], align 4 ; POST-PROCESS-NEXT: [[TMP174:%.*]] = add i32 [[TMP141]], 44 -; POST-PROCESS-NEXT: [[TMP178:%.*]] = inttoptr i32 [[TMP174]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP176:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP178]], i32 0 +; POST-PROCESS-NEXT: [[TMP154:%.*]] = inttoptr i32 [[TMP174]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP176:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP154]], i32 0 ; POST-PROCESS-NEXT: store i32 [[TMP73]], ptr addrspace(21) [[TMP176]], align 4 ; POST-PROCESS-NEXT: [[TMP177:%.*]] = add i32 [[TMP141]], 48 -; POST-PROCESS-NEXT: [[TMP181:%.*]] = inttoptr i32 [[TMP177]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP179:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP181]], i32 0 +; POST-PROCESS-NEXT: [[TMP157:%.*]] = inttoptr i32 [[TMP177]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP179:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP157]], i32 0 ; POST-PROCESS-NEXT: store i32 [[TMP77]], ptr addrspace(21) [[TMP179]], align 4 ; POST-PROCESS-NEXT: [[TMP180:%.*]] = add i32 [[TMP141]], 52 -; POST-PROCESS-NEXT: [[TMP184:%.*]] = inttoptr i32 [[TMP180]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP182:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP184]], i32 0 +; POST-PROCESS-NEXT: [[TMP160:%.*]] = inttoptr i32 [[TMP180]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP182:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP160]], i32 0 ; POST-PROCESS-NEXT: store i32 [[TMP81]], ptr addrspace(21) [[TMP182]], align 4 ; POST-PROCESS-NEXT: [[TMP183:%.*]] = add i32 [[TMP141]], 56 -; POST-PROCESS-NEXT: [[TMP187:%.*]] = inttoptr i32 [[TMP183]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP185:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP187]], i32 0 +; POST-PROCESS-NEXT: [[TMP163:%.*]] = inttoptr i32 [[TMP183]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP185:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP163]], i32 0 ; POST-PROCESS-NEXT: store i32 [[TMP85]], ptr addrspace(21) [[TMP185]], align 4 ; POST-PROCESS-NEXT: [[TMP186:%.*]] = add i32 [[TMP141]], 60 -; POST-PROCESS-NEXT: [[TMP190:%.*]] = inttoptr i32 [[TMP186]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP188:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP190]], i32 0 +; POST-PROCESS-NEXT: [[TMP166:%.*]] = inttoptr i32 [[TMP186]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP188:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP166]], i32 0 ; POST-PROCESS-NEXT: store i32 [[TMP89]], ptr addrspace(21) [[TMP188]], align 4 ; POST-PROCESS-NEXT: [[TMP189:%.*]] = add i32 [[TMP141]], 64 -; POST-PROCESS-NEXT: [[TMP193:%.*]] = inttoptr i32 [[TMP189]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP191:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP193]], i32 0 +; POST-PROCESS-NEXT: [[TMP169:%.*]] = inttoptr i32 [[TMP189]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP191:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP169]], i32 0 ; POST-PROCESS-NEXT: store i32 [[TMP93]], ptr addrspace(21) [[TMP191]], align 4 ; POST-PROCESS-NEXT: [[TMP192:%.*]] = add i32 [[TMP141]], 68 -; POST-PROCESS-NEXT: [[TMP196:%.*]] = inttoptr i32 [[TMP192]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP194:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP196]], i32 0 +; POST-PROCESS-NEXT: [[TMP172:%.*]] = inttoptr i32 [[TMP192]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP194:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP172]], i32 0 ; POST-PROCESS-NEXT: store i32 [[TMP97]], ptr addrspace(21) [[TMP194]], align 4 ; POST-PROCESS-NEXT: [[TMP195:%.*]] = add i32 [[TMP141]], 72 -; POST-PROCESS-NEXT: [[TMP199:%.*]] = inttoptr i32 [[TMP195]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP197:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP199]], i32 0 +; POST-PROCESS-NEXT: [[TMP175:%.*]] = inttoptr i32 [[TMP195]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP197:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP175]], i32 0 ; POST-PROCESS-NEXT: store i32 [[TMP101]], ptr addrspace(21) [[TMP197]], align 4 ; POST-PROCESS-NEXT: [[TMP198:%.*]] = add i32 [[TMP141]], 76 -; POST-PROCESS-NEXT: [[TMP202:%.*]] = inttoptr i32 [[TMP198]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP200:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP202]], i32 0 +; POST-PROCESS-NEXT: [[TMP178:%.*]] = inttoptr i32 [[TMP198]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP200:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP178]], i32 0 ; POST-PROCESS-NEXT: store i32 [[TMP105]], ptr addrspace(21) [[TMP200]], align 4 ; POST-PROCESS-NEXT: [[TMP201:%.*]] = add i32 [[TMP141]], 80 -; POST-PROCESS-NEXT: [[TMP205:%.*]] = inttoptr i32 [[TMP201]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP203:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP205]], i32 0 +; POST-PROCESS-NEXT: [[TMP181:%.*]] = inttoptr i32 [[TMP201]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP203:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP181]], i32 0 ; POST-PROCESS-NEXT: store i32 [[TMP109]], ptr addrspace(21) [[TMP203]], align 4 ; POST-PROCESS-NEXT: [[TMP204:%.*]] = add i32 [[TMP141]], 84 -; POST-PROCESS-NEXT: [[TMP208:%.*]] = inttoptr i32 [[TMP204]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP206:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP208]], i32 0 +; POST-PROCESS-NEXT: [[TMP184:%.*]] = inttoptr i32 [[TMP204]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP206:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP184]], i32 0 ; POST-PROCESS-NEXT: store i32 [[TMP113]], ptr addrspace(21) [[TMP206]], align 4 ; POST-PROCESS-NEXT: [[TMP207:%.*]] = add i32 [[TMP141]], 88 -; POST-PROCESS-NEXT: [[TMP211:%.*]] = inttoptr i32 [[TMP207]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP209:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP211]], i32 0 +; POST-PROCESS-NEXT: [[TMP187:%.*]] = inttoptr i32 [[TMP207]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP209:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP187]], i32 0 ; POST-PROCESS-NEXT: store i32 [[TMP117]], ptr addrspace(21) [[TMP209]], align 4 ; POST-PROCESS-NEXT: [[TMP210:%.*]] = add i32 [[TMP141]], 92 -; POST-PROCESS-NEXT: [[TMP214:%.*]] = inttoptr i32 [[TMP210]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP212:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP214]], i32 0 +; POST-PROCESS-NEXT: [[TMP190:%.*]] = inttoptr i32 [[TMP210]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP212:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP190]], i32 0 ; POST-PROCESS-NEXT: store i32 [[TMP121]], ptr addrspace(21) [[TMP212]], align 4 ; POST-PROCESS-NEXT: [[TMP213:%.*]] = add i32 [[TMP141]], 96 -; POST-PROCESS-NEXT: [[TMP217:%.*]] = inttoptr i32 [[TMP213]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP215:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP217]], i32 0 +; POST-PROCESS-NEXT: [[TMP193:%.*]] = inttoptr i32 [[TMP213]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP215:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP193]], i32 0 ; POST-PROCESS-NEXT: store i32 [[TMP125]], ptr addrspace(21) [[TMP215]], align 4 ; POST-PROCESS-NEXT: [[TMP216:%.*]] = add i32 [[TMP141]], 100 -; POST-PROCESS-NEXT: [[TMP220:%.*]] = inttoptr i32 [[TMP216]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP218:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP220]], i32 0 +; POST-PROCESS-NEXT: [[TMP196:%.*]] = inttoptr i32 [[TMP216]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP218:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP196]], i32 0 ; POST-PROCESS-NEXT: store i32 [[TMP129]], ptr addrspace(21) [[TMP218]], align 4 ; POST-PROCESS-NEXT: [[TMP219:%.*]] = add i32 [[TMP141]], 104 -; POST-PROCESS-NEXT: [[TMP225:%.*]] = inttoptr i32 [[TMP219]] to ptr addrspace(21) -; POST-PROCESS-NEXT: [[TMP221:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP225]], i32 0 +; POST-PROCESS-NEXT: [[TMP199:%.*]] = inttoptr i32 [[TMP219]] to ptr addrspace(21) +; POST-PROCESS-NEXT: [[TMP221:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP199]], i32 0 ; POST-PROCESS-NEXT: store i32 [[TMP133]], ptr addrspace(21) [[TMP221]], align 4 ; POST-PROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT]], 0 +; POST-PROCESS-NEXT: [[DOTFCA_0_INSERT1:%.*]] = insertvalue [30 x i32] poison, i32 [[TMP141]], 0 +; POST-PROCESS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT1]], i32 [[DOTFCA_1_EXTRACT]], 1 +; POST-PROCESS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT]], i32 [[DOTFCA_2_EXTRACT]], 2 +; POST-PROCESS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT]], i32 [[DOTFCA_3_EXTRACT]], 3 +; POST-PROCESS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT]], i32 [[DOTFCA_4_EXTRACT]], 4 +; POST-PROCESS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT]], i32 [[DOTFCA_5_EXTRACT]], 5 +; POST-PROCESS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT]], i32 [[DOTFCA_6_EXTRACT]], 6 +; POST-PROCESS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT]], i32 [[DOTFCA_7_EXTRACT]], 7 +; POST-PROCESS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT]], i32 [[DOTFCA_8_EXTRACT]], 8 +; POST-PROCESS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT]], i32 [[DOTFCA_9_EXTRACT]], 9 +; POST-PROCESS-NEXT: [[DOTFCA_10_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT]], i32 [[DOTFCA_10_EXTRACT]], 10 +; POST-PROCESS-NEXT: [[DOTFCA_11_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT]], i32 [[DOTFCA_11_EXTRACT]], 11 +; POST-PROCESS-NEXT: [[DOTFCA_12_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT]], i32 [[DOTFCA_12_EXTRACT]], 12 +; POST-PROCESS-NEXT: [[DOTFCA_13_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT]], i32 [[DOTFCA_13_EXTRACT]], 13 +; POST-PROCESS-NEXT: [[DOTFCA_14_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT]], i32 [[DOTFCA_14_EXTRACT]], 14 +; POST-PROCESS-NEXT: [[DOTFCA_15_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT]], i32 [[DOTFCA_15_EXTRACT]], 15 +; POST-PROCESS-NEXT: [[DOTFCA_16_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT]], i32 [[DOTFCA_16_EXTRACT]], 16 +; POST-PROCESS-NEXT: [[DOTFCA_17_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT]], i32 [[DOTFCA_17_EXTRACT]], 17 +; POST-PROCESS-NEXT: [[DOTFCA_18_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT]], i32 [[DOTFCA_18_EXTRACT]], 18 +; POST-PROCESS-NEXT: [[DOTFCA_19_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT]], i32 [[DOTFCA_19_EXTRACT]], 19 +; POST-PROCESS-NEXT: [[DOTFCA_20_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT]], i32 [[DOTFCA_20_EXTRACT]], 20 +; POST-PROCESS-NEXT: [[DOTFCA_21_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT]], i32 [[DOTFCA_21_EXTRACT]], 21 +; POST-PROCESS-NEXT: [[DOTFCA_22_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT]], i32 [[DOTFCA_22_EXTRACT]], 22 +; POST-PROCESS-NEXT: [[DOTFCA_23_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT]], i32 [[DOTFCA_23_EXTRACT]], 23 +; POST-PROCESS-NEXT: [[DOTFCA_24_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT]], i32 [[DOTFCA_24_EXTRACT]], 24 +; POST-PROCESS-NEXT: [[DOTFCA_25_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT]], i32 [[TMP22]], 25 +; POST-PROCESS-NEXT: [[DOTFCA_26_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT]], i32 [[TMP23]], 26 +; POST-PROCESS-NEXT: [[DOTFCA_27_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT]], i32 [[TMP24]], 27 +; POST-PROCESS-NEXT: [[DOTFCA_28_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT]], i32 [[DOTFCA_28_EXTRACT]], 28 +; POST-PROCESS-NEXT: [[DOTFCA_29_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT]], i32 [[DOTFCA_29_EXTRACT]], 29 ; POST-PROCESS-NEXT: [[TMP222:%.*]] = load i32, ptr [[CSP]], align 4 ; POST-PROCESS-NEXT: [[TMP223:%.*]] = add i32 [[TMP222]], -120 ; POST-PROCESS-NEXT: store i32 [[TMP223]], ptr [[CSP]], align 4 ; POST-PROCESS-NEXT: [[TMP224:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP224]], i64 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META17]] +; POST-PROCESS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP224]], i64 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]], [23 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]) ; POST-PROCESS-NEXT: unreachable ; ; @@ -2286,7 +2385,7 @@ attributes #3 = { nounwind } ; ; ; POST-PROCESS-GLOBAL-LABEL: define void @main( -; POST-PROCESS-GLOBAL-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META8:![0-9]+]] !continuation.entry [[META19:![0-9]+]] !continuation.registercount [[META8]] !continuation [[META20:![0-9]+]] !continuation.stacksize [[META21:![0-9]+]] !continuation.state [[META8]] { +; POST-PROCESS-GLOBAL-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META8:![0-9]+]] !continuation.entry [[META19:![0-9]+]] !continuation [[META20:![0-9]+]] !continuation.stacksize [[META21:![0-9]+]] { ; POST-PROCESS-GLOBAL-NEXT: AllocaSpillBB: ; POST-PROCESS-GLOBAL-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; POST-PROCESS-GLOBAL-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 @@ -2295,130 +2394,135 @@ attributes #3 = { nounwind } ; POST-PROCESS-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr [[CSP]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], 108 ; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP4]], ptr [[CSP]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_EXTRACT56:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 ; POST-PROCESS-GLOBAL-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; POST-PROCESS-GLOBAL-NEXT: [[TMP5:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP6:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP7:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP5]]) ; POST-PROCESS-GLOBAL-NEXT: [[TMP8:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP7]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) ; POST-PROCESS-GLOBAL-NEXT: [[TMP9:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP8]]) -; POST-PROCESS-GLOBAL-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT]], 0 +; POST-PROCESS-GLOBAL-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT56]], 0 ; POST-PROCESS-GLOBAL-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]], 0 ; POST-PROCESS-GLOBAL-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 ; POST-PROCESS-GLOBAL-NEXT: [[TMP11:%.*]] = call i64 @continuation.getAddrAndMD(ptr @main.resume.0) ; POST-PROCESS-GLOBAL-NEXT: [[TRAV_DATA2_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], i64 [[TMP11]], 5 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP3]], ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 10), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 11), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 12), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 13), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 14), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 15), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 16), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 17), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 18), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 19), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 20), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 21), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 22), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 23), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 24), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 25), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 26), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 27), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 28), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 29), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP10]] -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP13]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP12:%.*]] = add i32 [[TMP10]], 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP12]] -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP15]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP14:%.*]] = add i32 [[TMP10]], 8 -; POST-PROCESS-GLOBAL-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP14]] -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP17]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP16:%.*]] = add i32 [[TMP10]], 12 -; POST-PROCESS-GLOBAL-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP16]] -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP19]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP18:%.*]] = add i32 [[TMP10]], 16 -; POST-PROCESS-GLOBAL-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP18]] -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP21]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP20:%.*]] = add i32 [[TMP10]], 20 -; POST-PROCESS-GLOBAL-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP20]] -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP23]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP22:%.*]] = add i32 [[TMP10]], 24 -; POST-PROCESS-GLOBAL-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP22]] -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP25]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP24:%.*]] = add i32 [[TMP10]], 28 -; POST-PROCESS-GLOBAL-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP24]] -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP27]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP26:%.*]] = add i32 [[TMP10]], 32 -; POST-PROCESS-GLOBAL-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP26]] -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP29]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP28:%.*]] = add i32 [[TMP10]], 36 -; POST-PROCESS-GLOBAL-NEXT: [[TMP31:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP28]] -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP31]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP30:%.*]] = add i32 [[TMP10]], 40 -; POST-PROCESS-GLOBAL-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP30]] -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP33]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP32:%.*]] = add i32 [[TMP10]], 44 -; POST-PROCESS-GLOBAL-NEXT: [[TMP35:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP32]] -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP35]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP34:%.*]] = add i32 [[TMP10]], 48 -; POST-PROCESS-GLOBAL-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP34]] -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP37]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP36:%.*]] = add i32 [[TMP10]], 52 -; POST-PROCESS-GLOBAL-NEXT: [[TMP39:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP36]] -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP39]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP38:%.*]] = add i32 [[TMP10]], 56 -; POST-PROCESS-GLOBAL-NEXT: [[TMP41:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP38]] -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP41]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP40:%.*]] = add i32 [[TMP10]], 60 -; POST-PROCESS-GLOBAL-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP40]] -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP43]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP42:%.*]] = add i32 [[TMP10]], 64 -; POST-PROCESS-GLOBAL-NEXT: [[TMP45:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP42]] -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP45]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP44:%.*]] = add i32 [[TMP10]], 68 -; POST-PROCESS-GLOBAL-NEXT: [[TMP47:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP44]] -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP47]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP46:%.*]] = add i32 [[TMP10]], 72 -; POST-PROCESS-GLOBAL-NEXT: [[TMP49:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP46]] -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP49]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP48:%.*]] = add i32 [[TMP10]], 76 -; POST-PROCESS-GLOBAL-NEXT: [[TMP51:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP48]] -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP51]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP50:%.*]] = add i32 [[TMP10]], 80 -; POST-PROCESS-GLOBAL-NEXT: [[TMP53:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP50]] -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP53]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP52:%.*]] = add i32 [[TMP10]], 84 -; POST-PROCESS-GLOBAL-NEXT: [[TMP55:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP52]] -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP55]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP54:%.*]] = add i32 [[TMP10]], 88 -; POST-PROCESS-GLOBAL-NEXT: [[TMP57:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP54]] -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP57]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP56:%.*]] = add i32 [[TMP10]], 92 -; POST-PROCESS-GLOBAL-NEXT: [[TMP59:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP56]] -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP59]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP58:%.*]] = add i32 [[TMP10]], 96 -; POST-PROCESS-GLOBAL-NEXT: [[TMP61:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP58]] -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP61]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP60:%.*]] = add i32 [[TMP10]], 100 -; POST-PROCESS-GLOBAL-NEXT: [[TMP63:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP60]] -; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP63]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP62:%.*]] = add i32 [[TMP10]], 104 -; POST-PROCESS-GLOBAL-NEXT: [[TMP65:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP62]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP3]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP12]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP13:%.*]] = add i32 [[TMP3]], 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP13]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP14]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP15:%.*]] = add i32 [[TMP3]], 8 +; POST-PROCESS-GLOBAL-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP15]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP16]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP17:%.*]] = add i32 [[TMP3]], 12 +; POST-PROCESS-GLOBAL-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP17]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP18]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP19:%.*]] = add i32 [[TMP3]], 16 +; POST-PROCESS-GLOBAL-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP19]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP20]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP21:%.*]] = add i32 [[TMP3]], 20 +; POST-PROCESS-GLOBAL-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP21]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP22]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP23:%.*]] = add i32 [[TMP3]], 24 +; POST-PROCESS-GLOBAL-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP23]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP24]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP25:%.*]] = add i32 [[TMP3]], 28 +; POST-PROCESS-GLOBAL-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP25]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP26]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP27:%.*]] = add i32 [[TMP3]], 32 +; POST-PROCESS-GLOBAL-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP27]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP28]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP29:%.*]] = add i32 [[TMP3]], 36 +; POST-PROCESS-GLOBAL-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP29]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP30]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP31:%.*]] = add i32 [[TMP3]], 40 +; POST-PROCESS-GLOBAL-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP31]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP32]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP33:%.*]] = add i32 [[TMP3]], 44 +; POST-PROCESS-GLOBAL-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP33]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP34]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP35:%.*]] = add i32 [[TMP3]], 48 +; POST-PROCESS-GLOBAL-NEXT: [[TMP36:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP35]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP36]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP37:%.*]] = add i32 [[TMP3]], 52 +; POST-PROCESS-GLOBAL-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP37]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP38]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP39:%.*]] = add i32 [[TMP3]], 56 +; POST-PROCESS-GLOBAL-NEXT: [[TMP40:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP39]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP40]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP41:%.*]] = add i32 [[TMP3]], 60 +; POST-PROCESS-GLOBAL-NEXT: [[TMP42:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP41]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP42]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP43:%.*]] = add i32 [[TMP3]], 64 +; POST-PROCESS-GLOBAL-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP43]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP44]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP45:%.*]] = add i32 [[TMP3]], 68 +; POST-PROCESS-GLOBAL-NEXT: [[TMP46:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP45]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP46]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP47:%.*]] = add i32 [[TMP3]], 72 +; POST-PROCESS-GLOBAL-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP47]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP48]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP49:%.*]] = add i32 [[TMP3]], 76 +; POST-PROCESS-GLOBAL-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP49]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP50]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP51:%.*]] = add i32 [[TMP3]], 80 +; POST-PROCESS-GLOBAL-NEXT: [[TMP52:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP51]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP52]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP53:%.*]] = add i32 [[TMP3]], 84 +; POST-PROCESS-GLOBAL-NEXT: [[TMP54:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP53]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP54]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP55:%.*]] = add i32 [[TMP3]], 88 +; POST-PROCESS-GLOBAL-NEXT: [[TMP56:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP55]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP56]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP57:%.*]] = add i32 [[TMP3]], 92 +; POST-PROCESS-GLOBAL-NEXT: [[TMP58:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP57]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP58]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP59:%.*]] = add i32 [[TMP3]], 96 +; POST-PROCESS-GLOBAL-NEXT: [[TMP60:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP59]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP60]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP61:%.*]] = add i32 [[TMP3]], 100 +; POST-PROCESS-GLOBAL-NEXT: [[TMP62:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP61]] +; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP62]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP63:%.*]] = add i32 [[TMP3]], 104 +; POST-PROCESS-GLOBAL-NEXT: [[TMP65:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP63]] ; POST-PROCESS-GLOBAL-NEXT: store i32 undef, ptr addrspace(22) [[TMP65]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [30 x i32] poison, i32 [[TMP3]], 0 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT]], i32 undef, 1 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT]], i32 undef, 2 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT]], i32 undef, 3 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT]], i32 undef, 4 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT]], i32 undef, 5 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT]], i32 undef, 6 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT]], i32 undef, 7 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT]], i32 undef, 8 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT]], i32 undef, 9 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_10_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT]], i32 undef, 10 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_11_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT]], i32 undef, 11 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_12_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT]], i32 undef, 12 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_13_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT]], i32 undef, 13 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_14_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT]], i32 undef, 14 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_15_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT]], i32 undef, 15 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_16_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT]], i32 undef, 16 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_17_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT]], i32 undef, 17 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_18_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT]], i32 undef, 18 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_19_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT]], i32 undef, 19 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_20_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT]], i32 undef, 20 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_21_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT]], i32 undef, 21 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_22_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT]], i32 undef, 22 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_23_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT]], i32 undef, 23 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_24_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT]], i32 undef, 24 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_25_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT]], i32 undef, 25 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_26_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT]], i32 undef, 26 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_27_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT]], i32 undef, 27 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_28_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT]], i32 undef, 28 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_29_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT]], i32 undef, 29 ; POST-PROCESS-GLOBAL-NEXT: [[TMP64:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-GLOBAL-NEXT: call void (i64, i64, ...) @continuation.waitContinue(i64 4, i64 -1, i32 [[TMP64]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]]), !continuation.registercount [[META18:![0-9]+]], !continuation.returnedRegistercount [[META18]] +; POST-PROCESS-GLOBAL-NEXT: call void (...) @lgc.ilcps.waitContinue(i64 4, i64 -1, i32 [[TMP64]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]], [10 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]) ; POST-PROCESS-GLOBAL-NEXT: unreachable ; ; ; POST-PROCESS-GLOBAL-LABEL: define dso_local void @main.resume.0( -; POST-PROCESS-GLOBAL-SAME: i32 [[CSPINIT:%.*]], i64 [[TMP0:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP1:%.*]]) !lgc.rt.shaderstage [[META8]] !continuation.registercount [[META18]] !continuation [[META20]] { +; POST-PROCESS-GLOBAL-SAME: i32 [[CSPINIT:%.*]], i64 [[TMP0:%.*]], { [[STRUCT_DISPATCHSYSTEMDATA:%.*]], [23 x i32], [30 x i32] } [[TMP1:%.*]]) !lgc.rt.shaderstage [[META8]] !continuation [[META20]] { ; POST-PROCESS-GLOBAL-NEXT: entryresume.0: ; POST-PROCESS-GLOBAL-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; POST-PROCESS-GLOBAL-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 @@ -2426,129 +2530,166 @@ attributes #3 = { nounwind } ; POST-PROCESS-GLOBAL-NEXT: [[TMP29:%.*]] = inttoptr i64 [[TMP2]] to ptr addrspace(22) ; POST-PROCESS-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr [[CSP]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], -108 -; POST-PROCESS-GLOBAL-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 10), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 11), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 12), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 13), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 14), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 15), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 16), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 17), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 18), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 19), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 20), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 21), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 22), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 23), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 24), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 25), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 26), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 27), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 28), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP28:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 29), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP5]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP32:%.*]] = load i32, ptr addrspace(22) [[TMP30]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP6:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [23 x i32], [30 x i32] } [[TMP1]], 2 +; POST-PROCESS-GLOBAL-NEXT: [[TMP5:%.*]] = extractvalue [30 x i32] [[TMP6]], 0 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 1 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_2_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 2 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_3_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 3 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_4_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 4 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_5_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 5 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_6_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 6 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_7_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 7 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_8_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 8 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_9_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 9 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_10_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 10 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_11_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 11 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_12_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 12 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_13_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 13 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_14_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 14 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_15_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 15 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_16_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 16 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_17_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 17 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_18_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 18 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_19_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 19 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_20_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 20 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_21_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 21 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_22_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 22 +; POST-PROCESS-GLOBAL-NEXT: [[TMP22:%.*]] = extractvalue [30 x i32] [[TMP6]], 23 +; POST-PROCESS-GLOBAL-NEXT: [[TMP23:%.*]] = extractvalue [30 x i32] [[TMP6]], 24 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_25_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 25 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_26_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 26 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_27_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 27 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_28_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 28 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_29_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 29 +; POST-PROCESS-GLOBAL-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP5]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(22) [[TMP7]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP31:%.*]] = add i32 [[TMP5]], 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP31]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP35:%.*]] = load i32, ptr addrspace(22) [[TMP33]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP31]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(22) [[TMP10]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP34:%.*]] = add i32 [[TMP5]], 8 -; POST-PROCESS-GLOBAL-NEXT: [[TMP36:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP34]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP38:%.*]] = load i32, ptr addrspace(22) [[TMP36]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP34]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(22) [[TMP13]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP37:%.*]] = add i32 [[TMP5]], 12 -; POST-PROCESS-GLOBAL-NEXT: [[TMP39:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP37]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP41:%.*]] = load i32, ptr addrspace(22) [[TMP39]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP37]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(22) [[TMP16]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP40:%.*]] = add i32 [[TMP5]], 16 -; POST-PROCESS-GLOBAL-NEXT: [[TMP42:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP40]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP44:%.*]] = load i32, ptr addrspace(22) [[TMP42]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP40]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(22) [[TMP19]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP43:%.*]] = add i32 [[TMP5]], 20 -; POST-PROCESS-GLOBAL-NEXT: [[TMP45:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP43]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP47:%.*]] = load i32, ptr addrspace(22) [[TMP45]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP43]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(22) [[TMP24]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP46:%.*]] = add i32 [[TMP5]], 24 -; POST-PROCESS-GLOBAL-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP46]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP50:%.*]] = load i32, ptr addrspace(22) [[TMP48]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP46]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(22) [[TMP25]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP49:%.*]] = add i32 [[TMP5]], 28 -; POST-PROCESS-GLOBAL-NEXT: [[TMP51:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP49]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP53:%.*]] = load i32, ptr addrspace(22) [[TMP51]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP49]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP30:%.*]] = load i32, ptr addrspace(22) [[TMP28]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP52:%.*]] = add i32 [[TMP5]], 32 -; POST-PROCESS-GLOBAL-NEXT: [[TMP54:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP52]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP56:%.*]] = load i32, ptr addrspace(22) [[TMP54]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP52]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP32:%.*]] = load i32, ptr addrspace(22) [[TMP33]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP55:%.*]] = add i32 [[TMP5]], 36 -; POST-PROCESS-GLOBAL-NEXT: [[TMP57:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP55]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP59:%.*]] = load i32, ptr addrspace(22) [[TMP57]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP36:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP55]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP35:%.*]] = load i32, ptr addrspace(22) [[TMP36]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP58:%.*]] = add i32 [[TMP5]], 40 -; POST-PROCESS-GLOBAL-NEXT: [[TMP60:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP58]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP62:%.*]] = load i32, ptr addrspace(22) [[TMP60]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP39:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP58]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP38:%.*]] = load i32, ptr addrspace(22) [[TMP39]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP61:%.*]] = add i32 [[TMP5]], 44 -; POST-PROCESS-GLOBAL-NEXT: [[TMP63:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP61]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP65:%.*]] = load i32, ptr addrspace(22) [[TMP63]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP42:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP61]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP41:%.*]] = load i32, ptr addrspace(22) [[TMP42]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP64:%.*]] = add i32 [[TMP5]], 48 -; POST-PROCESS-GLOBAL-NEXT: [[TMP66:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP64]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP68:%.*]] = load i32, ptr addrspace(22) [[TMP66]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP45:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP64]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP44:%.*]] = load i32, ptr addrspace(22) [[TMP45]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP67:%.*]] = add i32 [[TMP5]], 52 -; POST-PROCESS-GLOBAL-NEXT: [[TMP69:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP67]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP71:%.*]] = load i32, ptr addrspace(22) [[TMP69]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP67]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP47:%.*]] = load i32, ptr addrspace(22) [[TMP48]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP70:%.*]] = add i32 [[TMP5]], 56 -; POST-PROCESS-GLOBAL-NEXT: [[TMP72:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP70]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP74:%.*]] = load i32, ptr addrspace(22) [[TMP72]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP51:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP70]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP50:%.*]] = load i32, ptr addrspace(22) [[TMP51]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP73:%.*]] = add i32 [[TMP5]], 60 -; POST-PROCESS-GLOBAL-NEXT: [[TMP75:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP73]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP77:%.*]] = load i32, ptr addrspace(22) [[TMP75]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP54:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP73]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP53:%.*]] = load i32, ptr addrspace(22) [[TMP54]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP76:%.*]] = add i32 [[TMP5]], 64 -; POST-PROCESS-GLOBAL-NEXT: [[TMP78:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP76]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP80:%.*]] = load i32, ptr addrspace(22) [[TMP78]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP57:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP76]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP56:%.*]] = load i32, ptr addrspace(22) [[TMP57]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP79:%.*]] = add i32 [[TMP5]], 68 -; POST-PROCESS-GLOBAL-NEXT: [[TMP81:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP79]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP83:%.*]] = load i32, ptr addrspace(22) [[TMP81]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP60:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP79]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP59:%.*]] = load i32, ptr addrspace(22) [[TMP60]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP82:%.*]] = add i32 [[TMP5]], 72 -; POST-PROCESS-GLOBAL-NEXT: [[TMP84:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP82]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP86:%.*]] = load i32, ptr addrspace(22) [[TMP84]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP63:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP82]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP62:%.*]] = load i32, ptr addrspace(22) [[TMP63]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP85:%.*]] = add i32 [[TMP5]], 76 -; POST-PROCESS-GLOBAL-NEXT: [[TMP87:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP85]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP89:%.*]] = load i32, ptr addrspace(22) [[TMP87]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP66:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP85]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP65:%.*]] = load i32, ptr addrspace(22) [[TMP66]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP88:%.*]] = add i32 [[TMP5]], 80 -; POST-PROCESS-GLOBAL-NEXT: [[TMP90:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP88]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP92:%.*]] = load i32, ptr addrspace(22) [[TMP90]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP69:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP88]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP68:%.*]] = load i32, ptr addrspace(22) [[TMP69]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP91:%.*]] = add i32 [[TMP5]], 84 -; POST-PROCESS-GLOBAL-NEXT: [[TMP93:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP91]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP95:%.*]] = load i32, ptr addrspace(22) [[TMP93]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP72:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP91]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP71:%.*]] = load i32, ptr addrspace(22) [[TMP72]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP94:%.*]] = add i32 [[TMP5]], 88 -; POST-PROCESS-GLOBAL-NEXT: [[TMP96:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP94]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP98:%.*]] = load i32, ptr addrspace(22) [[TMP96]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP75:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP94]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP74:%.*]] = load i32, ptr addrspace(22) [[TMP75]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP97:%.*]] = add i32 [[TMP5]], 92 -; POST-PROCESS-GLOBAL-NEXT: [[TMP99:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP97]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP101:%.*]] = load i32, ptr addrspace(22) [[TMP99]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP78:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP97]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP77:%.*]] = load i32, ptr addrspace(22) [[TMP78]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP100:%.*]] = add i32 [[TMP5]], 96 -; POST-PROCESS-GLOBAL-NEXT: [[TMP102:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP100]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP104:%.*]] = load i32, ptr addrspace(22) [[TMP102]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP81:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP100]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP80:%.*]] = load i32, ptr addrspace(22) [[TMP81]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP103:%.*]] = add i32 [[TMP5]], 100 -; POST-PROCESS-GLOBAL-NEXT: [[TMP105:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP103]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP107:%.*]] = load i32, ptr addrspace(22) [[TMP105]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP84:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP103]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP83:%.*]] = load i32, ptr addrspace(22) [[TMP84]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP106:%.*]] = add i32 [[TMP5]], 104 -; POST-PROCESS-GLOBAL-NEXT: [[TMP108:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP106]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP112:%.*]] = load i32, ptr addrspace(22) [[TMP108]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP109:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_EXTRACT1:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP1]], 0 +; POST-PROCESS-GLOBAL-NEXT: [[TMP90:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP106]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP86:%.*]] = load i32, ptr addrspace(22) [[TMP90]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP92:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [23 x i32], [30 x i32] } [[TMP1]], 0 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_EXTRACT57:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP92]], 0 ; POST-PROCESS-GLOBAL-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) -; POST-PROCESS-GLOBAL-NEXT: [[TMP110:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP111:%.*]] = add i32 [[TMP110]], -108 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP111]], ptr [[CSP]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP87:%.*]] = load i32, ptr [[CSP]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP89:%.*]] = add i32 [[TMP87]], -108 +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP89]], ptr [[CSP]], align 4 ; POST-PROCESS-GLOBAL-NEXT: ret void ; POST-PROCESS-GLOBAL: entryresume.0.split: ; POST-PROCESS-GLOBAL-NEXT: unreachable ; ; ; POST-PROCESS-GLOBAL-LABEL: define void @AnyHit( -; POST-PROCESS-GLOBAL-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] !lgc.rt.shaderstage [[META22:![0-9]+]] !continuation.registercount [[META18]] !continuation [[META23:![0-9]+]] !continuation.state [[META8]] { +; POST-PROCESS-GLOBAL-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_ANYHITTRAVERSALDATA:%.*]] [[TMP0:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[TMP1:%.*]], [6 x i32] [[PADDING:%.*]], [30 x i32] [[PAYLOAD:%.*]]) #[[ATTR2:[0-9]+]] !lgc.rt.shaderstage [[META22:![0-9]+]] !continuation [[META23:![0-9]+]] { ; POST-PROCESS-GLOBAL-NEXT: AllocaSpillBB: ; POST-PROCESS-GLOBAL-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_ANYHITTRAVERSALDATA]], align 8 ; POST-PROCESS-GLOBAL-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; POST-PROCESS-GLOBAL-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP2:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() ; POST-PROCESS-GLOBAL-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr addrspace(22) +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 0 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 1 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 2 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 3 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 4 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 5 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 6 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 7 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 8 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 9 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_10_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 10 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_11_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 11 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_12_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 12 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_13_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 13 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_14_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 14 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_15_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 15 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_16_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 16 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_17_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 17 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_18_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 18 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_19_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 19 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_20_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 20 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_21_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 21 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_22_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 22 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_23_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 23 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_24_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 24 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_25_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 25 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_26_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 26 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_27_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 27 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_28_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 28 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_29_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 29 ; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[TMP0]], 0, 0, 0, 0 ; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 ; POST-PROCESS-GLOBAL-NEXT: store <3 x i32> [[DOTFCA_0_0_0_0_EXTRACT]], ptr [[DOTFCA_0_0_0_0_GEP]], align 4 @@ -2580,275 +2721,257 @@ attributes #3 = { nounwind } ; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_1_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 ; POST-PROCESS-GLOBAL-NEXT: store i32 [[DOTFCA_1_1_EXTRACT]], ptr [[DOTFCA_1_1_GEP]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; POST-PROCESS-GLOBAL-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 10), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 11), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 12), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 13), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 14), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 15), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 16), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 17), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 18), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 19), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 20), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 21), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 22), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 23), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 24), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 25), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 26), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 27), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 28), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP28:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 29), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP5]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[PAYLOAD_FCA_0_EXTRACT]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(22) [[TMP5]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP7:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP7]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(22) [[TMP8]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP10:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 8 +; POST-PROCESS-GLOBAL-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP10]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(22) [[TMP11]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP13:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 12 +; POST-PROCESS-GLOBAL-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP13]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(22) [[TMP14]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP16:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 16 +; POST-PROCESS-GLOBAL-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP16]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(22) [[TMP17]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP19:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 20 +; POST-PROCESS-GLOBAL-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP19]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(22) [[TMP20]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP22:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 24 +; POST-PROCESS-GLOBAL-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP22]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(22) [[TMP23]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP25:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 28 +; POST-PROCESS-GLOBAL-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP25]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(22) [[TMP26]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP28:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 32 +; POST-PROCESS-GLOBAL-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP28]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP30:%.*]] = load i32, ptr addrspace(22) [[TMP29]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP31:%.*]] = add i32 [[TMP5]], 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP31:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 36 ; POST-PROCESS-GLOBAL-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP31]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP33:%.*]] = load i32, ptr addrspace(22) [[TMP32]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP34:%.*]] = add i32 [[TMP5]], 8 +; POST-PROCESS-GLOBAL-NEXT: [[TMP34:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 40 ; POST-PROCESS-GLOBAL-NEXT: [[TMP35:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP34]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP36:%.*]] = load i32, ptr addrspace(22) [[TMP35]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP37:%.*]] = add i32 [[TMP5]], 12 +; POST-PROCESS-GLOBAL-NEXT: [[TMP37:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 44 ; POST-PROCESS-GLOBAL-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP37]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP39:%.*]] = load i32, ptr addrspace(22) [[TMP38]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP40:%.*]] = add i32 [[TMP5]], 16 +; POST-PROCESS-GLOBAL-NEXT: [[TMP40:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 48 ; POST-PROCESS-GLOBAL-NEXT: [[TMP41:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP40]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP42:%.*]] = load i32, ptr addrspace(22) [[TMP41]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP43:%.*]] = add i32 [[TMP5]], 20 +; POST-PROCESS-GLOBAL-NEXT: [[TMP43:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 52 ; POST-PROCESS-GLOBAL-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP43]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP45:%.*]] = load i32, ptr addrspace(22) [[TMP44]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP46:%.*]] = add i32 [[TMP5]], 24 +; POST-PROCESS-GLOBAL-NEXT: [[TMP46:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 56 ; POST-PROCESS-GLOBAL-NEXT: [[TMP47:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP46]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP48:%.*]] = load i32, ptr addrspace(22) [[TMP47]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP49:%.*]] = add i32 [[TMP5]], 28 +; POST-PROCESS-GLOBAL-NEXT: [[TMP49:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 60 ; POST-PROCESS-GLOBAL-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP49]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP51:%.*]] = load i32, ptr addrspace(22) [[TMP50]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP52:%.*]] = add i32 [[TMP5]], 32 +; POST-PROCESS-GLOBAL-NEXT: [[TMP52:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 64 ; POST-PROCESS-GLOBAL-NEXT: [[TMP53:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP52]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP54:%.*]] = load i32, ptr addrspace(22) [[TMP53]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP55:%.*]] = add i32 [[TMP5]], 36 +; POST-PROCESS-GLOBAL-NEXT: [[TMP55:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 68 ; POST-PROCESS-GLOBAL-NEXT: [[TMP56:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP55]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP57:%.*]] = load i32, ptr addrspace(22) [[TMP56]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP58:%.*]] = add i32 [[TMP5]], 40 +; POST-PROCESS-GLOBAL-NEXT: [[TMP58:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 72 ; POST-PROCESS-GLOBAL-NEXT: [[TMP59:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP58]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP60:%.*]] = load i32, ptr addrspace(22) [[TMP59]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP61:%.*]] = add i32 [[TMP5]], 44 +; POST-PROCESS-GLOBAL-NEXT: [[TMP61:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 76 ; POST-PROCESS-GLOBAL-NEXT: [[TMP62:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP61]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP63:%.*]] = load i32, ptr addrspace(22) [[TMP62]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP64:%.*]] = add i32 [[TMP5]], 48 +; POST-PROCESS-GLOBAL-NEXT: [[TMP64:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 80 ; POST-PROCESS-GLOBAL-NEXT: [[TMP65:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP64]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP66:%.*]] = load i32, ptr addrspace(22) [[TMP65]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP67:%.*]] = add i32 [[TMP5]], 52 +; POST-PROCESS-GLOBAL-NEXT: [[TMP67:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 84 ; POST-PROCESS-GLOBAL-NEXT: [[TMP68:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP67]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP69:%.*]] = load i32, ptr addrspace(22) [[TMP68]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP70:%.*]] = add i32 [[TMP5]], 56 +; POST-PROCESS-GLOBAL-NEXT: [[TMP70:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 88 ; POST-PROCESS-GLOBAL-NEXT: [[TMP71:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP70]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP72:%.*]] = load i32, ptr addrspace(22) [[TMP71]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP73:%.*]] = add i32 [[TMP5]], 60 +; POST-PROCESS-GLOBAL-NEXT: [[TMP73:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 92 ; POST-PROCESS-GLOBAL-NEXT: [[TMP74:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP73]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP75:%.*]] = load i32, ptr addrspace(22) [[TMP74]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP76:%.*]] = add i32 [[TMP5]], 64 +; POST-PROCESS-GLOBAL-NEXT: [[TMP76:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 96 ; POST-PROCESS-GLOBAL-NEXT: [[TMP77:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP76]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP78:%.*]] = load i32, ptr addrspace(22) [[TMP77]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP79:%.*]] = add i32 [[TMP5]], 68 +; POST-PROCESS-GLOBAL-NEXT: [[TMP79:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 100 ; POST-PROCESS-GLOBAL-NEXT: [[TMP80:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP79]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP81:%.*]] = load i32, ptr addrspace(22) [[TMP80]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP82:%.*]] = add i32 [[TMP5]], 72 +; POST-PROCESS-GLOBAL-NEXT: [[TMP82:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 104 ; POST-PROCESS-GLOBAL-NEXT: [[TMP83:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP82]] ; POST-PROCESS-GLOBAL-NEXT: [[TMP84:%.*]] = load i32, ptr addrspace(22) [[TMP83]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP85:%.*]] = add i32 [[TMP5]], 76 -; POST-PROCESS-GLOBAL-NEXT: [[TMP86:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP85]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP87:%.*]] = load i32, ptr addrspace(22) [[TMP86]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP88:%.*]] = add i32 [[TMP5]], 80 -; POST-PROCESS-GLOBAL-NEXT: [[TMP89:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP88]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP90:%.*]] = load i32, ptr addrspace(22) [[TMP89]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP91:%.*]] = add i32 [[TMP5]], 84 -; POST-PROCESS-GLOBAL-NEXT: [[TMP92:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP91]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP93:%.*]] = load i32, ptr addrspace(22) [[TMP92]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP94:%.*]] = add i32 [[TMP5]], 88 -; POST-PROCESS-GLOBAL-NEXT: [[TMP95:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP94]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP96:%.*]] = load i32, ptr addrspace(22) [[TMP95]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP97:%.*]] = add i32 [[TMP5]], 92 -; POST-PROCESS-GLOBAL-NEXT: [[TMP98:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP97]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP99:%.*]] = load i32, ptr addrspace(22) [[TMP98]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP100:%.*]] = add i32 [[TMP5]], 96 -; POST-PROCESS-GLOBAL-NEXT: [[TMP101:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP100]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP102:%.*]] = load i32, ptr addrspace(22) [[TMP101]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP103:%.*]] = add i32 [[TMP5]], 100 -; POST-PROCESS-GLOBAL-NEXT: [[TMP104:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP103]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP105:%.*]] = load i32, ptr addrspace(22) [[TMP104]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP106:%.*]] = add i32 [[TMP5]], 104 -; POST-PROCESS-GLOBAL-NEXT: [[TMP107:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP106]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP108:%.*]] = load i32, ptr addrspace(22) [[TMP107]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP109:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; POST-PROCESS-GLOBAL-NEXT: [[ADDR_I:%.*]] = getelementptr [[STRUCT_SYSTEMDATA:%.*]], ptr [[TMP109]], i32 0, i32 1 +; POST-PROCESS-GLOBAL-NEXT: [[TMP85:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; POST-PROCESS-GLOBAL-NEXT: [[ADDR_I:%.*]] = getelementptr [[STRUCT_SYSTEMDATA:%.*]], ptr [[TMP85]], i32 0, i32 1 ; POST-PROCESS-GLOBAL-NEXT: [[VAL_I_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[ADDR_I]], i32 0, i32 0 ; POST-PROCESS-GLOBAL-NEXT: [[VAL_I_FCA_0_LOAD:%.*]] = load <2 x float>, ptr [[VAL_I_FCA_0_GEP]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[VAL_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[VAL_I_FCA_0_LOAD]], 0 ; POST-PROCESS-GLOBAL-NEXT: [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[VAL_I_FCA_0_INSERT]], 0 -; POST-PROCESS-GLOBAL-NEXT: [[DOTSROA_011_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 0 -; POST-PROCESS-GLOBAL-NEXT: [[TMP110:%.*]] = bitcast float [[DOTSROA_011_0_VEC_EXTRACT]] to i32 -; POST-PROCESS-GLOBAL-NEXT: [[DOTSROA_011_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 1 -; POST-PROCESS-GLOBAL-NEXT: [[TMP111:%.*]] = bitcast float [[DOTSROA_011_4_VEC_EXTRACT]] to i32 +; POST-PROCESS-GLOBAL-NEXT: [[DOTSROA_035_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 0 +; POST-PROCESS-GLOBAL-NEXT: [[TMP86:%.*]] = bitcast float [[DOTSROA_035_0_VEC_EXTRACT]] to i32 +; POST-PROCESS-GLOBAL-NEXT: [[DOTSROA_035_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 1 +; POST-PROCESS-GLOBAL-NEXT: [[TMP87:%.*]] = bitcast float [[DOTSROA_035_4_VEC_EXTRACT]] to i32 ; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP1]], 0 ; POST-PROCESS-GLOBAL-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; POST-PROCESS-GLOBAL-NEXT: call void @_cont_AcceptHit(ptr [[SYSTEM_DATA_ALLOCA]]) -; POST-PROCESS-GLOBAL-NEXT: [[TMP112:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP6]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP7]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP8]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP9]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 10), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP10]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 11), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP11]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 12), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP12]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 13), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP13]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 14), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP14]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 15), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP15]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 16), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP16]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 17), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP17]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 18), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP18]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 19), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP19]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 20), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP20]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 21), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP21]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 22), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP22]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 23), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP23]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 24), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP24]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 25), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP25]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 26), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP26]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 27), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP27]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 28), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP28]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 29), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP113:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP112]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP30]], ptr addrspace(22) [[TMP113]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP114:%.*]] = add i32 [[TMP112]], 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP115:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP114]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP33]], ptr addrspace(22) [[TMP115]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP116:%.*]] = add i32 [[TMP112]], 8 -; POST-PROCESS-GLOBAL-NEXT: [[TMP117:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP116]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP36]], ptr addrspace(22) [[TMP117]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP118:%.*]] = add i32 [[TMP112]], 12 -; POST-PROCESS-GLOBAL-NEXT: [[TMP119:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP118]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP39]], ptr addrspace(22) [[TMP119]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP120:%.*]] = add i32 [[TMP112]], 16 -; POST-PROCESS-GLOBAL-NEXT: [[TMP121:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP120]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP42]], ptr addrspace(22) [[TMP121]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP122:%.*]] = add i32 [[TMP112]], 20 -; POST-PROCESS-GLOBAL-NEXT: [[TMP123:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP122]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP45]], ptr addrspace(22) [[TMP123]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP124:%.*]] = add i32 [[TMP112]], 24 -; POST-PROCESS-GLOBAL-NEXT: [[TMP125:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP124]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP48]], ptr addrspace(22) [[TMP125]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP126:%.*]] = add i32 [[TMP112]], 28 -; POST-PROCESS-GLOBAL-NEXT: [[TMP127:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP126]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP51]], ptr addrspace(22) [[TMP127]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP128:%.*]] = add i32 [[TMP112]], 32 -; POST-PROCESS-GLOBAL-NEXT: [[TMP129:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP128]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP54]], ptr addrspace(22) [[TMP129]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP130:%.*]] = add i32 [[TMP112]], 36 -; POST-PROCESS-GLOBAL-NEXT: [[TMP131:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP130]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP57]], ptr addrspace(22) [[TMP131]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP132:%.*]] = add i32 [[TMP112]], 40 -; POST-PROCESS-GLOBAL-NEXT: [[TMP133:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP132]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP60]], ptr addrspace(22) [[TMP133]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP134:%.*]] = add i32 [[TMP112]], 44 -; POST-PROCESS-GLOBAL-NEXT: [[TMP135:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP134]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP63]], ptr addrspace(22) [[TMP135]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP136:%.*]] = add i32 [[TMP112]], 48 -; POST-PROCESS-GLOBAL-NEXT: [[TMP137:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP136]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP66]], ptr addrspace(22) [[TMP137]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP138:%.*]] = add i32 [[TMP112]], 52 -; POST-PROCESS-GLOBAL-NEXT: [[TMP139:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP138]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP69]], ptr addrspace(22) [[TMP139]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP140:%.*]] = add i32 [[TMP112]], 56 -; POST-PROCESS-GLOBAL-NEXT: [[TMP141:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP140]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP72]], ptr addrspace(22) [[TMP141]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP142:%.*]] = add i32 [[TMP112]], 60 -; POST-PROCESS-GLOBAL-NEXT: [[TMP143:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP142]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP75]], ptr addrspace(22) [[TMP143]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP144:%.*]] = add i32 [[TMP112]], 64 -; POST-PROCESS-GLOBAL-NEXT: [[TMP145:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP144]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP78]], ptr addrspace(22) [[TMP145]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP146:%.*]] = add i32 [[TMP112]], 68 -; POST-PROCESS-GLOBAL-NEXT: [[TMP147:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP146]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP81]], ptr addrspace(22) [[TMP147]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP148:%.*]] = add i32 [[TMP112]], 72 -; POST-PROCESS-GLOBAL-NEXT: [[TMP149:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP148]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP84]], ptr addrspace(22) [[TMP149]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP150:%.*]] = add i32 [[TMP112]], 76 -; POST-PROCESS-GLOBAL-NEXT: [[TMP151:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP150]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP87]], ptr addrspace(22) [[TMP151]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP152:%.*]] = add i32 [[TMP112]], 80 -; POST-PROCESS-GLOBAL-NEXT: [[TMP153:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP152]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP90]], ptr addrspace(22) [[TMP153]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP154:%.*]] = add i32 [[TMP112]], 84 -; POST-PROCESS-GLOBAL-NEXT: [[TMP155:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP154]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP93]], ptr addrspace(22) [[TMP155]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP156:%.*]] = add i32 [[TMP112]], 88 -; POST-PROCESS-GLOBAL-NEXT: [[TMP157:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP156]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP96]], ptr addrspace(22) [[TMP157]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP158:%.*]] = add i32 [[TMP112]], 92 -; POST-PROCESS-GLOBAL-NEXT: [[TMP159:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP158]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP99]], ptr addrspace(22) [[TMP159]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP160:%.*]] = add i32 [[TMP112]], 96 -; POST-PROCESS-GLOBAL-NEXT: [[TMP161:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP160]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP102]], ptr addrspace(22) [[TMP161]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP162:%.*]] = add i32 [[TMP112]], 100 -; POST-PROCESS-GLOBAL-NEXT: [[TMP163:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP162]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP105]], ptr addrspace(22) [[TMP163]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP164:%.*]] = add i32 [[TMP112]], 104 -; POST-PROCESS-GLOBAL-NEXT: [[TMP165:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP164]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP108]], ptr addrspace(22) [[TMP165]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP88:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[PAYLOAD_FCA_0_EXTRACT]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP6]], ptr addrspace(22) [[TMP88]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP89:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP90:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP89]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP9]], ptr addrspace(22) [[TMP90]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP91:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 8 +; POST-PROCESS-GLOBAL-NEXT: [[TMP92:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP91]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP12]], ptr addrspace(22) [[TMP92]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP93:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 12 +; POST-PROCESS-GLOBAL-NEXT: [[TMP94:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP93]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP15]], ptr addrspace(22) [[TMP94]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP95:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 16 +; POST-PROCESS-GLOBAL-NEXT: [[TMP96:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP95]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP18]], ptr addrspace(22) [[TMP96]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP97:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 20 +; POST-PROCESS-GLOBAL-NEXT: [[TMP98:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP97]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP21]], ptr addrspace(22) [[TMP98]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP99:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 24 +; POST-PROCESS-GLOBAL-NEXT: [[TMP100:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP99]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP24]], ptr addrspace(22) [[TMP100]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP101:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 28 +; POST-PROCESS-GLOBAL-NEXT: [[TMP102:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP101]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP27]], ptr addrspace(22) [[TMP102]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP103:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 32 +; POST-PROCESS-GLOBAL-NEXT: [[TMP104:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP103]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP30]], ptr addrspace(22) [[TMP104]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP105:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 36 +; POST-PROCESS-GLOBAL-NEXT: [[TMP106:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP105]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP33]], ptr addrspace(22) [[TMP106]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP107:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 40 +; POST-PROCESS-GLOBAL-NEXT: [[TMP108:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP107]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP36]], ptr addrspace(22) [[TMP108]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP109:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 44 +; POST-PROCESS-GLOBAL-NEXT: [[TMP110:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP109]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP39]], ptr addrspace(22) [[TMP110]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP111:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 48 +; POST-PROCESS-GLOBAL-NEXT: [[TMP112:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP111]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP42]], ptr addrspace(22) [[TMP112]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP113:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 52 +; POST-PROCESS-GLOBAL-NEXT: [[TMP114:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP113]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP45]], ptr addrspace(22) [[TMP114]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP115:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 56 +; POST-PROCESS-GLOBAL-NEXT: [[TMP116:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP115]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP48]], ptr addrspace(22) [[TMP116]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP117:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 60 +; POST-PROCESS-GLOBAL-NEXT: [[TMP118:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP117]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP51]], ptr addrspace(22) [[TMP118]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP119:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 64 +; POST-PROCESS-GLOBAL-NEXT: [[TMP120:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP119]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP54]], ptr addrspace(22) [[TMP120]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP121:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 68 +; POST-PROCESS-GLOBAL-NEXT: [[TMP122:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP121]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP57]], ptr addrspace(22) [[TMP122]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP123:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 72 +; POST-PROCESS-GLOBAL-NEXT: [[TMP124:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP123]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP60]], ptr addrspace(22) [[TMP124]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP125:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 76 +; POST-PROCESS-GLOBAL-NEXT: [[TMP126:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP125]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP63]], ptr addrspace(22) [[TMP126]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP127:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 80 +; POST-PROCESS-GLOBAL-NEXT: [[TMP128:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP127]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP66]], ptr addrspace(22) [[TMP128]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP129:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 84 +; POST-PROCESS-GLOBAL-NEXT: [[TMP130:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP129]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP69]], ptr addrspace(22) [[TMP130]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP131:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 88 +; POST-PROCESS-GLOBAL-NEXT: [[TMP132:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP131]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP72]], ptr addrspace(22) [[TMP132]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP133:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 92 +; POST-PROCESS-GLOBAL-NEXT: [[TMP134:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP133]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP75]], ptr addrspace(22) [[TMP134]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP135:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 96 +; POST-PROCESS-GLOBAL-NEXT: [[TMP136:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP135]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP78]], ptr addrspace(22) [[TMP136]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP137:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 100 +; POST-PROCESS-GLOBAL-NEXT: [[TMP138:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP137]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP81]], ptr addrspace(22) [[TMP138]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP139:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 104 +; POST-PROCESS-GLOBAL-NEXT: [[TMP140:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP3]], i32 [[TMP139]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP84]], ptr addrspace(22) [[TMP140]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 -; POST-PROCESS-GLOBAL-NEXT: [[TMP166:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT]] to i32 -; POST-PROCESS-GLOBAL-NEXT: [[TMP167:%.*]] = bitcast i32 [[TMP166]] to float -; POST-PROCESS-GLOBAL-NEXT: [[DOTSROA_012_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP167]], i32 0 +; POST-PROCESS-GLOBAL-NEXT: [[TMP141:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT]] to i32 +; POST-PROCESS-GLOBAL-NEXT: [[TMP142:%.*]] = bitcast i32 [[TMP141]] to float +; POST-PROCESS-GLOBAL-NEXT: [[DOTSROA_037_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP142]], i32 0 ; POST-PROCESS-GLOBAL-NEXT: [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 -; POST-PROCESS-GLOBAL-NEXT: [[TMP168:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT]] to i32 -; POST-PROCESS-GLOBAL-NEXT: [[TMP169:%.*]] = bitcast i32 [[TMP168]] to float -; POST-PROCESS-GLOBAL-NEXT: [[DOTSROA_012_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_012_0_VEC_INSERT]], float [[TMP169]], i32 1 -; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_012_4_VEC_INSERT]], 0 -; POST-PROCESS-GLOBAL-NEXT: [[TMP170:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; POST-PROCESS-GLOBAL-NEXT: call void @_cont_SetTriangleHitAttributes(ptr [[TMP170]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT]]) -; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_0_GEP1:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 -; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_0_LOAD:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP1]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP143:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT]] to i32 +; POST-PROCESS-GLOBAL-NEXT: [[TMP144:%.*]] = bitcast i32 [[TMP143]] to float +; POST-PROCESS-GLOBAL-NEXT: [[DOTSROA_037_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_037_0_VEC_INSERT]], float [[TMP144]], i32 1 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_INSERT36:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_037_4_VEC_INSERT]], 0 +; POST-PROCESS-GLOBAL-NEXT: [[TMP145:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; POST-PROCESS-GLOBAL-NEXT: call void @_cont_SetTriangleHitAttributes(ptr [[TMP145]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT36]]) +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_0_GEP25:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0, i32 0 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_0_LOAD:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_0_GEP25]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_0_LOAD]], 0, 0, 0, 0 -; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_1_0_GEP2:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 1, i32 0 -; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_1_0_LOAD:%.*]] = load <2 x float>, ptr [[DOTFCA_0_0_1_0_GEP2]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_1_0_GEP26:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 1, i32 0 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_1_0_LOAD:%.*]] = load <2 x float>, ptr [[DOTFCA_0_0_1_0_GEP26]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_0_0_INSERT]], <2 x float> [[DOTFCA_0_0_1_0_LOAD]], 0, 0, 1, 0 -; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_1_0_GEP3:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 -; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_1_0_LOAD:%.*]] = load float, ptr [[DOTFCA_0_1_0_GEP3]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_1_0_GEP27:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_1_0_LOAD:%.*]] = load float, ptr [[DOTFCA_0_1_0_GEP27]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_0_1_0_INSERT]], float [[DOTFCA_0_1_0_LOAD]], 0, 1, 0 -; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_1_1_GEP4:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 -; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_1_1_LOAD:%.*]] = load i32, ptr [[DOTFCA_0_1_1_GEP4]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_1_1_GEP28:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 1 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_1_1_LOAD:%.*]] = load i32, ptr [[DOTFCA_0_1_1_GEP28]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], i32 [[DOTFCA_0_1_1_LOAD]], 0, 1, 1 -; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_2_GEP5:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 -; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_2_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP5]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_2_GEP29:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 2 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_2_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_2_GEP29]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_1_1_INSERT]], <3 x float> [[DOTFCA_0_2_LOAD]], 0, 2 -; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_3_GEP6:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 -; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_3_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP6]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_3_GEP30:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 3 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_3_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_0_3_GEP30]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_2_INSERT]], <3 x float> [[DOTFCA_0_3_LOAD]], 0, 3 -; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_4_GEP7:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 -; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_4_LOAD:%.*]] = load float, ptr [[DOTFCA_0_4_GEP7]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_4_GEP31:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 4 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_4_LOAD:%.*]] = load float, ptr [[DOTFCA_0_4_GEP31]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_3_INSERT]], float [[DOTFCA_0_4_LOAD]], 0, 4 -; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_5_GEP8:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 -; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_5_LOAD:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP8]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_5_GEP32:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 5 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_5_LOAD:%.*]] = load i64, ptr [[DOTFCA_0_5_GEP32]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_4_INSERT]], i64 [[DOTFCA_0_5_LOAD]], 0, 5 -; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_1_0_GEP9:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 -; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_1_0_LOAD:%.*]] = load float, ptr [[DOTFCA_1_0_GEP9]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_1_0_GEP33:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_1_0_LOAD:%.*]] = load float, ptr [[DOTFCA_1_0_GEP33]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_0_5_INSERT]], float [[DOTFCA_1_0_LOAD]], 1, 0 -; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_1_1_GEP10:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 -; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_1_1_LOAD:%.*]] = load i32, ptr [[DOTFCA_1_1_GEP10]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_1_1_GEP34:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_1_1_LOAD:%.*]] = load i32, ptr [[DOTFCA_1_1_GEP34]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], i32 [[DOTFCA_1_1_LOAD]], 1, 1 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_10_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_11_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_12_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_13_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_14_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_15_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_16_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_17_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_18_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_19_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_20_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_21_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_22_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_23_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_24_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_25_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_26_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_27_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_28_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_29_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 ; POST-PROCESS-GLOBAL-NEXT: [[TMP171:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-GLOBAL-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP171]], i64 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]]), !continuation.registercount [[META18]] +; POST-PROCESS-GLOBAL-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR]], i32 [[TMP171]], i64 poison, [[STRUCT_ANYHITTRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]) ; POST-PROCESS-GLOBAL-NEXT: unreachable ; ; ; POST-PROCESS-GLOBAL-LABEL: define void @ClosestHit( -; POST-PROCESS-GLOBAL-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META24:![0-9]+]] !continuation.registercount [[META18]] !continuation [[META25:![0-9]+]] !continuation.stacksize [[META26:![0-9]+]] !continuation.state [[META27:![0-9]+]] { +; POST-PROCESS-GLOBAL-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]], [21 x i32] [[PADDING:%.*]], [30 x i32] [[PAYLOAD:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META24:![0-9]+]] !continuation [[META25:![0-9]+]] !continuation.stacksize [[META26:![0-9]+]] { ; POST-PROCESS-GLOBAL-NEXT: AllocaSpillBB: ; POST-PROCESS-GLOBAL-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; POST-PROCESS-GLOBAL-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 @@ -2860,245 +2983,255 @@ attributes #3 = { nounwind } ; POST-PROCESS-GLOBAL-NEXT: [[TMP5:%.*]] = add i32 [[TMP3]], 108 ; POST-PROCESS-GLOBAL-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP5]] ; POST-PROCESS-GLOBAL-NEXT: store i64 [[RETURNADDR]], ptr addrspace(22) [[TMP6]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 0 +; POST-PROCESS-GLOBAL-NEXT: [[TMP7:%.*]] = add i32 [[TMP3]], 116 +; POST-PROCESS-GLOBAL-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP7]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[PAYLOAD_FCA_0_EXTRACT]], ptr addrspace(22) [[TMP8]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 1 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 2 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 3 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 4 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 5 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 6 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 7 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 8 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 9 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_10_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 10 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_11_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 11 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_12_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 12 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_13_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 13 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_14_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 14 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_15_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 15 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_16_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 16 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_17_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 17 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_18_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 18 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_19_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 19 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_20_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 20 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_21_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 21 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_22_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 22 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_23_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 23 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_24_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 24 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_25_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 25 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_26_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 26 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_27_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 27 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_28_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 28 +; POST-PROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_29_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 29 ; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 0, 0 ; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 1, 0 -; POST-PROCESS-GLOBAL-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 10), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 11), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 12), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 13), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 14), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 15), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 16), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 17), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 18), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 19), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 20), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 21), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 22), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 23), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 24), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 25), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 26), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP28:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 27), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP29:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 28), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP30:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 29), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP31:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP7]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP32:%.*]] = load i32, ptr addrspace(22) [[TMP31]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP33:%.*]] = add i32 [[TMP7]], 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP33]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP35:%.*]] = load i32, ptr addrspace(22) [[TMP34]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP36:%.*]] = add i32 [[TMP7]], 8 -; POST-PROCESS-GLOBAL-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP36]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP38:%.*]] = load i32, ptr addrspace(22) [[TMP37]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP39:%.*]] = add i32 [[TMP7]], 12 -; POST-PROCESS-GLOBAL-NEXT: [[TMP40:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP39]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP41:%.*]] = load i32, ptr addrspace(22) [[TMP40]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP42:%.*]] = add i32 [[TMP7]], 16 -; POST-PROCESS-GLOBAL-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP42]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP44:%.*]] = load i32, ptr addrspace(22) [[TMP43]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP45:%.*]] = add i32 [[TMP7]], 20 -; POST-PROCESS-GLOBAL-NEXT: [[TMP46:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP45]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP47:%.*]] = load i32, ptr addrspace(22) [[TMP46]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP48:%.*]] = add i32 [[TMP7]], 24 -; POST-PROCESS-GLOBAL-NEXT: [[TMP49:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP48]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP50:%.*]] = load i32, ptr addrspace(22) [[TMP49]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP51:%.*]] = add i32 [[TMP7]], 28 -; POST-PROCESS-GLOBAL-NEXT: [[TMP52:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP51]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP53:%.*]] = load i32, ptr addrspace(22) [[TMP52]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP54:%.*]] = add i32 [[TMP7]], 32 -; POST-PROCESS-GLOBAL-NEXT: [[TMP55:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP54]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP56:%.*]] = load i32, ptr addrspace(22) [[TMP55]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP57:%.*]] = add i32 [[TMP7]], 36 -; POST-PROCESS-GLOBAL-NEXT: [[TMP58:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP57]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP59:%.*]] = load i32, ptr addrspace(22) [[TMP58]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP60:%.*]] = add i32 [[TMP7]], 40 -; POST-PROCESS-GLOBAL-NEXT: [[TMP61:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP60]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP62:%.*]] = load i32, ptr addrspace(22) [[TMP61]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP63:%.*]] = add i32 [[TMP7]], 44 -; POST-PROCESS-GLOBAL-NEXT: [[TMP64:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP63]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP65:%.*]] = load i32, ptr addrspace(22) [[TMP64]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP66:%.*]] = add i32 [[TMP7]], 48 -; POST-PROCESS-GLOBAL-NEXT: [[TMP67:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP66]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP68:%.*]] = load i32, ptr addrspace(22) [[TMP67]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP69:%.*]] = add i32 [[TMP7]], 52 -; POST-PROCESS-GLOBAL-NEXT: [[TMP70:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP69]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP71:%.*]] = load i32, ptr addrspace(22) [[TMP70]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP72:%.*]] = add i32 [[TMP7]], 56 -; POST-PROCESS-GLOBAL-NEXT: [[TMP73:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP72]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP74:%.*]] = load i32, ptr addrspace(22) [[TMP73]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP75:%.*]] = add i32 [[TMP7]], 60 -; POST-PROCESS-GLOBAL-NEXT: [[TMP76:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP75]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP77:%.*]] = load i32, ptr addrspace(22) [[TMP76]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP78:%.*]] = add i32 [[TMP7]], 64 -; POST-PROCESS-GLOBAL-NEXT: [[TMP79:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP78]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP80:%.*]] = load i32, ptr addrspace(22) [[TMP79]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP81:%.*]] = add i32 [[TMP7]], 68 -; POST-PROCESS-GLOBAL-NEXT: [[TMP82:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP81]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP83:%.*]] = load i32, ptr addrspace(22) [[TMP82]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP84:%.*]] = add i32 [[TMP7]], 72 -; POST-PROCESS-GLOBAL-NEXT: [[TMP85:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP84]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP86:%.*]] = load i32, ptr addrspace(22) [[TMP85]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP87:%.*]] = add i32 [[TMP7]], 76 -; POST-PROCESS-GLOBAL-NEXT: [[TMP88:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP87]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP89:%.*]] = load i32, ptr addrspace(22) [[TMP88]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP90:%.*]] = add i32 [[TMP7]], 80 -; POST-PROCESS-GLOBAL-NEXT: [[TMP91:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP90]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP92:%.*]] = load i32, ptr addrspace(22) [[TMP91]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP93:%.*]] = add i32 [[TMP7]], 84 -; POST-PROCESS-GLOBAL-NEXT: [[TMP94:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP93]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP95:%.*]] = load i32, ptr addrspace(22) [[TMP94]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP96:%.*]] = add i32 [[TMP7]], 88 -; POST-PROCESS-GLOBAL-NEXT: [[TMP97:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP96]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP98:%.*]] = load i32, ptr addrspace(22) [[TMP97]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP99:%.*]] = add i32 [[TMP7]], 92 -; POST-PROCESS-GLOBAL-NEXT: [[TMP100:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP99]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP101:%.*]] = load i32, ptr addrspace(22) [[TMP100]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP102:%.*]] = add i32 [[TMP7]], 96 -; POST-PROCESS-GLOBAL-NEXT: [[TMP103:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP102]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP104:%.*]] = load i32, ptr addrspace(22) [[TMP103]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP105:%.*]] = add i32 [[TMP7]], 100 -; POST-PROCESS-GLOBAL-NEXT: [[TMP106:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP105]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP107:%.*]] = load i32, ptr addrspace(22) [[TMP106]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP108:%.*]] = add i32 [[TMP7]], 104 -; POST-PROCESS-GLOBAL-NEXT: [[TMP109:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP108]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP110:%.*]] = load i32, ptr addrspace(22) [[TMP109]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP111:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP112:%.*]] = add i32 [[TMP3]], 116 -; POST-PROCESS-GLOBAL-NEXT: [[TMP113:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP112]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP111]], ptr addrspace(22) [[TMP113]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[PAYLOAD_FCA_0_EXTRACT]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(22) [[TMP9]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP11:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP11]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(22) [[TMP12]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP14:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 8 +; POST-PROCESS-GLOBAL-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP14]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(22) [[TMP15]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP17:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 12 +; POST-PROCESS-GLOBAL-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP17]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(22) [[TMP18]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP20:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 16 +; POST-PROCESS-GLOBAL-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP20]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(22) [[TMP21]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP23:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 20 +; POST-PROCESS-GLOBAL-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP23]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(22) [[TMP24]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP26:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 24 +; POST-PROCESS-GLOBAL-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP26]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP28:%.*]] = load i32, ptr addrspace(22) [[TMP27]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP29:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 28 +; POST-PROCESS-GLOBAL-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP29]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP31:%.*]] = load i32, ptr addrspace(22) [[TMP30]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP32:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 32 +; POST-PROCESS-GLOBAL-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP32]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP34:%.*]] = load i32, ptr addrspace(22) [[TMP33]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP35:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 36 +; POST-PROCESS-GLOBAL-NEXT: [[TMP36:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP35]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP37:%.*]] = load i32, ptr addrspace(22) [[TMP36]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP38:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 40 +; POST-PROCESS-GLOBAL-NEXT: [[TMP39:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP38]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP40:%.*]] = load i32, ptr addrspace(22) [[TMP39]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP41:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 44 +; POST-PROCESS-GLOBAL-NEXT: [[TMP42:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP41]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP43:%.*]] = load i32, ptr addrspace(22) [[TMP42]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP44:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 48 +; POST-PROCESS-GLOBAL-NEXT: [[TMP45:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP44]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP46:%.*]] = load i32, ptr addrspace(22) [[TMP45]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP47:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 52 +; POST-PROCESS-GLOBAL-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP47]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP49:%.*]] = load i32, ptr addrspace(22) [[TMP48]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP50:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 56 +; POST-PROCESS-GLOBAL-NEXT: [[TMP51:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP50]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP52:%.*]] = load i32, ptr addrspace(22) [[TMP51]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP53:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 60 +; POST-PROCESS-GLOBAL-NEXT: [[TMP54:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP53]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP55:%.*]] = load i32, ptr addrspace(22) [[TMP54]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP56:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 64 +; POST-PROCESS-GLOBAL-NEXT: [[TMP57:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP56]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP58:%.*]] = load i32, ptr addrspace(22) [[TMP57]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP59:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 68 +; POST-PROCESS-GLOBAL-NEXT: [[TMP60:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP59]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP61:%.*]] = load i32, ptr addrspace(22) [[TMP60]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP62:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 72 +; POST-PROCESS-GLOBAL-NEXT: [[TMP63:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP62]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP64:%.*]] = load i32, ptr addrspace(22) [[TMP63]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP65:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 76 +; POST-PROCESS-GLOBAL-NEXT: [[TMP66:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP65]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP67:%.*]] = load i32, ptr addrspace(22) [[TMP66]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP68:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 80 +; POST-PROCESS-GLOBAL-NEXT: [[TMP69:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP68]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP70:%.*]] = load i32, ptr addrspace(22) [[TMP69]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP71:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 84 +; POST-PROCESS-GLOBAL-NEXT: [[TMP72:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP71]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP73:%.*]] = load i32, ptr addrspace(22) [[TMP72]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP74:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 88 +; POST-PROCESS-GLOBAL-NEXT: [[TMP75:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP74]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP76:%.*]] = load i32, ptr addrspace(22) [[TMP75]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP77:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 92 +; POST-PROCESS-GLOBAL-NEXT: [[TMP78:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP77]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP79:%.*]] = load i32, ptr addrspace(22) [[TMP78]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP80:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 96 +; POST-PROCESS-GLOBAL-NEXT: [[TMP81:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP80]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP82:%.*]] = load i32, ptr addrspace(22) [[TMP81]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP83:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 100 +; POST-PROCESS-GLOBAL-NEXT: [[TMP84:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP83]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP85:%.*]] = load i32, ptr addrspace(22) [[TMP84]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP86:%.*]] = add i32 [[PAYLOAD_FCA_0_EXTRACT]], 104 +; POST-PROCESS-GLOBAL-NEXT: [[TMP87:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP86]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP88:%.*]] = load i32, ptr addrspace(22) [[TMP87]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[VAL_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> [[DOTFCA_1_0_EXTRACT]], 0 ; POST-PROCESS-GLOBAL-NEXT: [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[VAL_I_FCA_0_INSERT]], 0 -; POST-PROCESS-GLOBAL-NEXT: [[DOTSROA_053_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 0 -; POST-PROCESS-GLOBAL-NEXT: [[TMP114:%.*]] = bitcast float [[DOTSROA_053_0_VEC_EXTRACT]] to i32 -; POST-PROCESS-GLOBAL-NEXT: [[DOTSROA_053_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 1 -; POST-PROCESS-GLOBAL-NEXT: [[TMP115:%.*]] = bitcast float [[DOTSROA_053_4_VEC_EXTRACT]] to i32 +; POST-PROCESS-GLOBAL-NEXT: [[DOTSROA_0256_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 0 +; POST-PROCESS-GLOBAL-NEXT: [[TMP89:%.*]] = bitcast float [[DOTSROA_0256_0_VEC_EXTRACT]] to i32 +; POST-PROCESS-GLOBAL-NEXT: [[DOTSROA_0256_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 1 +; POST-PROCESS-GLOBAL-NEXT: [[TMP90:%.*]] = bitcast float [[DOTSROA_0256_4_VEC_EXTRACT]] to i32 ; POST-PROCESS-GLOBAL-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; POST-PROCESS-GLOBAL-NEXT: [[TMP116:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP117:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP118:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP116]]) -; POST-PROCESS-GLOBAL-NEXT: [[TMP119:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP118]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) -; POST-PROCESS-GLOBAL-NEXT: [[TMP120:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP119]]) +; POST-PROCESS-GLOBAL-NEXT: [[TMP91:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP92:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP93:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP91]]) +; POST-PROCESS-GLOBAL-NEXT: [[TMP94:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP93]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; POST-PROCESS-GLOBAL-NEXT: [[TMP95:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP94]]) ; POST-PROCESS-GLOBAL-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison, <3 x i32> [[DOTFCA_0_0_EXTRACT]], 0 ; POST-PROCESS-GLOBAL-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]], 0 ; POST-PROCESS-GLOBAL-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 ; POST-PROCESS-GLOBAL-NEXT: [[TMP122:%.*]] = call i64 @continuation.getAddrAndMD(ptr @ClosestHit.resume.0) ; POST-PROCESS-GLOBAL-NEXT: [[TRAV_DATA2_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], i64 [[TMP122]], 5 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP3]], ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP121:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP8]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP9]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP10]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP11]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 10), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP12]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 11), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP13]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 12), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP14]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 13), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP15]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 14), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP16]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 15), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP17]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 16), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP18]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 17), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP19]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 18), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP20]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 19), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP21]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 20), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP22]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 21), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP23]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 22), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP24]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 23), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP25]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 24), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP26]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 25), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP27]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 26), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP28]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 27), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP29]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 28), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP30]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 29), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP124:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP121]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP32]], ptr addrspace(22) [[TMP124]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP123:%.*]] = add i32 [[TMP121]], 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP126:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP123]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP35]], ptr addrspace(22) [[TMP126]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP125:%.*]] = add i32 [[TMP121]], 8 -; POST-PROCESS-GLOBAL-NEXT: [[TMP128:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP125]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP38]], ptr addrspace(22) [[TMP128]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP127:%.*]] = add i32 [[TMP121]], 12 -; POST-PROCESS-GLOBAL-NEXT: [[TMP130:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP127]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP41]], ptr addrspace(22) [[TMP130]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP129:%.*]] = add i32 [[TMP121]], 16 -; POST-PROCESS-GLOBAL-NEXT: [[TMP132:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP129]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP44]], ptr addrspace(22) [[TMP132]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP131:%.*]] = add i32 [[TMP121]], 20 -; POST-PROCESS-GLOBAL-NEXT: [[TMP134:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP131]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP47]], ptr addrspace(22) [[TMP134]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP133:%.*]] = add i32 [[TMP121]], 24 -; POST-PROCESS-GLOBAL-NEXT: [[TMP136:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP133]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP50]], ptr addrspace(22) [[TMP136]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP135:%.*]] = add i32 [[TMP121]], 28 -; POST-PROCESS-GLOBAL-NEXT: [[TMP138:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP135]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP53]], ptr addrspace(22) [[TMP138]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP137:%.*]] = add i32 [[TMP121]], 32 -; POST-PROCESS-GLOBAL-NEXT: [[TMP140:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP137]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP56]], ptr addrspace(22) [[TMP140]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP139:%.*]] = add i32 [[TMP121]], 36 -; POST-PROCESS-GLOBAL-NEXT: [[TMP142:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP139]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP59]], ptr addrspace(22) [[TMP142]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP141:%.*]] = add i32 [[TMP121]], 40 -; POST-PROCESS-GLOBAL-NEXT: [[TMP144:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP141]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP62]], ptr addrspace(22) [[TMP144]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP143:%.*]] = add i32 [[TMP121]], 44 -; POST-PROCESS-GLOBAL-NEXT: [[TMP146:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP143]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP65]], ptr addrspace(22) [[TMP146]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP145:%.*]] = add i32 [[TMP121]], 48 -; POST-PROCESS-GLOBAL-NEXT: [[TMP148:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP145]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP68]], ptr addrspace(22) [[TMP148]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP147:%.*]] = add i32 [[TMP121]], 52 -; POST-PROCESS-GLOBAL-NEXT: [[TMP150:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP147]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP71]], ptr addrspace(22) [[TMP150]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP149:%.*]] = add i32 [[TMP121]], 56 -; POST-PROCESS-GLOBAL-NEXT: [[TMP152:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP149]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP74]], ptr addrspace(22) [[TMP152]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP151:%.*]] = add i32 [[TMP121]], 60 -; POST-PROCESS-GLOBAL-NEXT: [[TMP154:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP151]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP77]], ptr addrspace(22) [[TMP154]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP153:%.*]] = add i32 [[TMP121]], 64 -; POST-PROCESS-GLOBAL-NEXT: [[TMP156:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP153]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP80]], ptr addrspace(22) [[TMP156]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP155:%.*]] = add i32 [[TMP121]], 68 -; POST-PROCESS-GLOBAL-NEXT: [[TMP158:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP155]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP83]], ptr addrspace(22) [[TMP158]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP157:%.*]] = add i32 [[TMP121]], 72 -; POST-PROCESS-GLOBAL-NEXT: [[TMP160:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP157]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP86]], ptr addrspace(22) [[TMP160]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP159:%.*]] = add i32 [[TMP121]], 76 -; POST-PROCESS-GLOBAL-NEXT: [[TMP162:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP159]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP89]], ptr addrspace(22) [[TMP162]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP161:%.*]] = add i32 [[TMP121]], 80 -; POST-PROCESS-GLOBAL-NEXT: [[TMP164:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP161]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP92]], ptr addrspace(22) [[TMP164]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP163:%.*]] = add i32 [[TMP121]], 84 -; POST-PROCESS-GLOBAL-NEXT: [[TMP166:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP163]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP95]], ptr addrspace(22) [[TMP166]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP165:%.*]] = add i32 [[TMP121]], 88 -; POST-PROCESS-GLOBAL-NEXT: [[TMP168:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP165]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP98]], ptr addrspace(22) [[TMP168]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP167:%.*]] = add i32 [[TMP121]], 92 -; POST-PROCESS-GLOBAL-NEXT: [[TMP170:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP167]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP101]], ptr addrspace(22) [[TMP170]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP169:%.*]] = add i32 [[TMP121]], 96 -; POST-PROCESS-GLOBAL-NEXT: [[TMP172:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP169]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP104]], ptr addrspace(22) [[TMP172]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP171:%.*]] = add i32 [[TMP121]], 100 -; POST-PROCESS-GLOBAL-NEXT: [[TMP174:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP171]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP107]], ptr addrspace(22) [[TMP174]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP173:%.*]] = add i32 [[TMP121]], 104 -; POST-PROCESS-GLOBAL-NEXT: [[TMP176:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP173]] -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP110]], ptr addrspace(22) [[TMP176]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP97:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP3]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP10]], ptr addrspace(22) [[TMP97]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP98:%.*]] = add i32 [[TMP3]], 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP99:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP98]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP13]], ptr addrspace(22) [[TMP99]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP100:%.*]] = add i32 [[TMP3]], 8 +; POST-PROCESS-GLOBAL-NEXT: [[TMP101:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP100]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP16]], ptr addrspace(22) [[TMP101]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP102:%.*]] = add i32 [[TMP3]], 12 +; POST-PROCESS-GLOBAL-NEXT: [[TMP103:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP102]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP19]], ptr addrspace(22) [[TMP103]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP104:%.*]] = add i32 [[TMP3]], 16 +; POST-PROCESS-GLOBAL-NEXT: [[TMP105:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP104]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP22]], ptr addrspace(22) [[TMP105]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP106:%.*]] = add i32 [[TMP3]], 20 +; POST-PROCESS-GLOBAL-NEXT: [[TMP107:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP106]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP25]], ptr addrspace(22) [[TMP107]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP108:%.*]] = add i32 [[TMP3]], 24 +; POST-PROCESS-GLOBAL-NEXT: [[TMP109:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP108]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP28]], ptr addrspace(22) [[TMP109]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP110:%.*]] = add i32 [[TMP3]], 28 +; POST-PROCESS-GLOBAL-NEXT: [[TMP111:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP110]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP31]], ptr addrspace(22) [[TMP111]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP112:%.*]] = add i32 [[TMP3]], 32 +; POST-PROCESS-GLOBAL-NEXT: [[TMP113:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP112]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP34]], ptr addrspace(22) [[TMP113]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP114:%.*]] = add i32 [[TMP3]], 36 +; POST-PROCESS-GLOBAL-NEXT: [[TMP115:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP114]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP37]], ptr addrspace(22) [[TMP115]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP116:%.*]] = add i32 [[TMP3]], 40 +; POST-PROCESS-GLOBAL-NEXT: [[TMP117:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP116]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP40]], ptr addrspace(22) [[TMP117]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP118:%.*]] = add i32 [[TMP3]], 44 +; POST-PROCESS-GLOBAL-NEXT: [[TMP119:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP118]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP43]], ptr addrspace(22) [[TMP119]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP120:%.*]] = add i32 [[TMP3]], 48 +; POST-PROCESS-GLOBAL-NEXT: [[TMP121:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP120]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP46]], ptr addrspace(22) [[TMP121]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP150:%.*]] = add i32 [[TMP3]], 52 +; POST-PROCESS-GLOBAL-NEXT: [[TMP123:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP150]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP49]], ptr addrspace(22) [[TMP123]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP124:%.*]] = add i32 [[TMP3]], 56 +; POST-PROCESS-GLOBAL-NEXT: [[TMP125:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP124]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP52]], ptr addrspace(22) [[TMP125]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP126:%.*]] = add i32 [[TMP3]], 60 +; POST-PROCESS-GLOBAL-NEXT: [[TMP127:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP126]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP55]], ptr addrspace(22) [[TMP127]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP128:%.*]] = add i32 [[TMP3]], 64 +; POST-PROCESS-GLOBAL-NEXT: [[TMP129:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP128]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP58]], ptr addrspace(22) [[TMP129]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP130:%.*]] = add i32 [[TMP3]], 68 +; POST-PROCESS-GLOBAL-NEXT: [[TMP131:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP130]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP61]], ptr addrspace(22) [[TMP131]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP132:%.*]] = add i32 [[TMP3]], 72 +; POST-PROCESS-GLOBAL-NEXT: [[TMP133:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP132]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP64]], ptr addrspace(22) [[TMP133]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP134:%.*]] = add i32 [[TMP3]], 76 +; POST-PROCESS-GLOBAL-NEXT: [[TMP135:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP134]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP67]], ptr addrspace(22) [[TMP135]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP136:%.*]] = add i32 [[TMP3]], 80 +; POST-PROCESS-GLOBAL-NEXT: [[TMP137:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP136]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP70]], ptr addrspace(22) [[TMP137]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP138:%.*]] = add i32 [[TMP3]], 84 +; POST-PROCESS-GLOBAL-NEXT: [[TMP139:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP138]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP73]], ptr addrspace(22) [[TMP139]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP140:%.*]] = add i32 [[TMP3]], 88 +; POST-PROCESS-GLOBAL-NEXT: [[TMP141:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP140]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP76]], ptr addrspace(22) [[TMP141]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP142:%.*]] = add i32 [[TMP3]], 92 +; POST-PROCESS-GLOBAL-NEXT: [[TMP143:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP142]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP79]], ptr addrspace(22) [[TMP143]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP144:%.*]] = add i32 [[TMP3]], 96 +; POST-PROCESS-GLOBAL-NEXT: [[TMP145:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP144]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP82]], ptr addrspace(22) [[TMP145]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP146:%.*]] = add i32 [[TMP3]], 100 +; POST-PROCESS-GLOBAL-NEXT: [[TMP147:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP146]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP85]], ptr addrspace(22) [[TMP147]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP148:%.*]] = add i32 [[TMP3]], 104 +; POST-PROCESS-GLOBAL-NEXT: [[TMP149:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP148]] +; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP88]], ptr addrspace(22) [[TMP149]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_INSERT54:%.*]] = insertvalue [30 x i32] poison, i32 [[TMP3]], 0 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_1_INSERT57:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT54]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_2_INSERT60:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT57]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_3_INSERT63:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT60]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_4_INSERT66:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT63]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_5_INSERT69:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT66]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_6_INSERT72:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT69]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_7_INSERT75:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT72]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_8_INSERT78:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT75]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_9_INSERT81:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT78]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_10_INSERT84:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT81]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_11_INSERT87:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT84]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_12_INSERT90:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT87]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_13_INSERT93:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT90]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_14_INSERT96:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT93]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_15_INSERT99:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT96]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_16_INSERT102:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT99]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_17_INSERT105:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT102]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_18_INSERT108:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT105]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_19_INSERT111:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT108]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_20_INSERT114:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT111]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_21_INSERT117:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT114]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_22_INSERT120:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT117]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_23_INSERT123:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT120]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_24_INSERT126:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT123]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_25_INSERT129:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT126]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_26_INSERT132:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT129]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_27_INSERT135:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT132]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_28_INSERT138:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT135]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_29_INSERT141:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT138]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 ; POST-PROCESS-GLOBAL-NEXT: [[TMP175:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-GLOBAL-NEXT: call void (i64, i64, ...) @continuation.waitContinue(i64 4, i64 -1, i32 [[TMP175]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]]), !continuation.registercount [[META18]], !continuation.returnedRegistercount [[META18]] +; POST-PROCESS-GLOBAL-NEXT: call void (...) @lgc.ilcps.waitContinue(i64 4, i64 -1, i32 [[TMP175]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]], [10 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT141]]) ; POST-PROCESS-GLOBAL-NEXT: unreachable ; ; ; POST-PROCESS-GLOBAL-LABEL: define dso_local void @ClosestHit.resume.0( -; POST-PROCESS-GLOBAL-SAME: i32 [[CSPINIT:%.*]], i64 [[TMP0:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP1:%.*]]) !lgc.rt.shaderstage [[META24]] !continuation.registercount [[META18]] !continuation [[META25]] { +; POST-PROCESS-GLOBAL-SAME: i32 [[CSPINIT:%.*]], i64 [[TMP0:%.*]], { [[STRUCT_DISPATCHSYSTEMDATA:%.*]], [23 x i32], [30 x i32] } [[TMP1:%.*]]) !lgc.rt.shaderstage [[META24]] !continuation [[META25]] { ; POST-PROCESS-GLOBAL-NEXT: entryresume.0: ; POST-PROCESS-GLOBAL-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; POST-PROCESS-GLOBAL-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 @@ -3106,144 +3239,126 @@ attributes #3 = { nounwind } ; POST-PROCESS-GLOBAL-NEXT: [[TMP29:%.*]] = inttoptr i64 [[TMP2]] to ptr addrspace(22) ; POST-PROCESS-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr [[CSP]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], -120 -; POST-PROCESS-GLOBAL-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 10), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 11), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 12), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 13), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 14), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 15), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 16), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 17), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 18), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 19), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 20), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 21), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 22), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 23), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 24), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 25), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 26), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 27), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 28), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP28:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 29), align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP5]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP30:%.*]] = load i32, ptr addrspace(22) [[TMP32]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP6:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [23 x i32], [30 x i32] } [[TMP1]], 2 +; POST-PROCESS-GLOBAL-NEXT: [[TMP5:%.*]] = extractvalue [30 x i32] [[TMP6]], 0 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 1 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_2_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 2 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_3_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 3 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_4_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 4 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_5_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 5 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_6_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 6 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_7_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 7 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_8_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 8 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_9_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 9 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_10_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 10 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_11_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 11 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_12_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 12 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_13_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 13 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_14_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 14 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_15_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 15 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_16_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 16 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_17_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 17 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_18_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 18 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_19_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 19 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_20_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 20 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_21_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 21 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_22_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 22 +; POST-PROCESS-GLOBAL-NEXT: [[TMP22:%.*]] = extractvalue [30 x i32] [[TMP6]], 23 +; POST-PROCESS-GLOBAL-NEXT: [[TMP23:%.*]] = extractvalue [30 x i32] [[TMP6]], 24 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_25_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 25 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_26_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 26 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_27_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 27 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_28_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 28 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_29_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP6]], 29 +; POST-PROCESS-GLOBAL-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP5]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP30:%.*]] = load i32, ptr addrspace(22) [[TMP7]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP31:%.*]] = add i32 [[TMP5]], 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP35:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP31]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP33:%.*]] = load i32, ptr addrspace(22) [[TMP35]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP31]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP33:%.*]] = load i32, ptr addrspace(22) [[TMP10]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP34:%.*]] = add i32 [[TMP5]], 8 -; POST-PROCESS-GLOBAL-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP34]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP36:%.*]] = load i32, ptr addrspace(22) [[TMP38]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP34]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP36:%.*]] = load i32, ptr addrspace(22) [[TMP13]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP37:%.*]] = add i32 [[TMP5]], 12 -; POST-PROCESS-GLOBAL-NEXT: [[TMP41:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP37]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP39:%.*]] = load i32, ptr addrspace(22) [[TMP41]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP37]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP39:%.*]] = load i32, ptr addrspace(22) [[TMP16]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP40:%.*]] = add i32 [[TMP5]], 16 -; POST-PROCESS-GLOBAL-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP40]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP42:%.*]] = load i32, ptr addrspace(22) [[TMP44]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP40]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP42:%.*]] = load i32, ptr addrspace(22) [[TMP19]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP43:%.*]] = add i32 [[TMP5]], 20 -; POST-PROCESS-GLOBAL-NEXT: [[TMP47:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP43]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP45:%.*]] = load i32, ptr addrspace(22) [[TMP47]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP43]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP45:%.*]] = load i32, ptr addrspace(22) [[TMP24]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP46:%.*]] = add i32 [[TMP5]], 24 -; POST-PROCESS-GLOBAL-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP46]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP48:%.*]] = load i32, ptr addrspace(22) [[TMP50]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP46]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP48:%.*]] = load i32, ptr addrspace(22) [[TMP25]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP49:%.*]] = add i32 [[TMP5]], 28 -; POST-PROCESS-GLOBAL-NEXT: [[TMP53:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP49]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP51:%.*]] = load i32, ptr addrspace(22) [[TMP53]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP49]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP51:%.*]] = load i32, ptr addrspace(22) [[TMP28]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP52:%.*]] = add i32 [[TMP5]], 32 -; POST-PROCESS-GLOBAL-NEXT: [[TMP56:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP52]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP54:%.*]] = load i32, ptr addrspace(22) [[TMP56]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP52]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP54:%.*]] = load i32, ptr addrspace(22) [[TMP32]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP55:%.*]] = add i32 [[TMP5]], 36 -; POST-PROCESS-GLOBAL-NEXT: [[TMP59:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP55]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP57:%.*]] = load i32, ptr addrspace(22) [[TMP59]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP35:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP55]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP57:%.*]] = load i32, ptr addrspace(22) [[TMP35]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP58:%.*]] = add i32 [[TMP5]], 40 -; POST-PROCESS-GLOBAL-NEXT: [[TMP62:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP58]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP60:%.*]] = load i32, ptr addrspace(22) [[TMP62]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP58]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP60:%.*]] = load i32, ptr addrspace(22) [[TMP38]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP61:%.*]] = add i32 [[TMP5]], 44 -; POST-PROCESS-GLOBAL-NEXT: [[TMP65:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP61]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP63:%.*]] = load i32, ptr addrspace(22) [[TMP65]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP41:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP61]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP63:%.*]] = load i32, ptr addrspace(22) [[TMP41]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP64:%.*]] = add i32 [[TMP5]], 48 -; POST-PROCESS-GLOBAL-NEXT: [[TMP68:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP64]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP66:%.*]] = load i32, ptr addrspace(22) [[TMP68]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP64]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP66:%.*]] = load i32, ptr addrspace(22) [[TMP44]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP67:%.*]] = add i32 [[TMP5]], 52 -; POST-PROCESS-GLOBAL-NEXT: [[TMP71:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP67]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP69:%.*]] = load i32, ptr addrspace(22) [[TMP71]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP47:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP67]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP69:%.*]] = load i32, ptr addrspace(22) [[TMP47]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP70:%.*]] = add i32 [[TMP5]], 56 -; POST-PROCESS-GLOBAL-NEXT: [[TMP74:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP70]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP72:%.*]] = load i32, ptr addrspace(22) [[TMP74]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP70]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP72:%.*]] = load i32, ptr addrspace(22) [[TMP50]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP73:%.*]] = add i32 [[TMP5]], 60 -; POST-PROCESS-GLOBAL-NEXT: [[TMP77:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP73]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP75:%.*]] = load i32, ptr addrspace(22) [[TMP77]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP53:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP73]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP75:%.*]] = load i32, ptr addrspace(22) [[TMP53]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP76:%.*]] = add i32 [[TMP5]], 64 -; POST-PROCESS-GLOBAL-NEXT: [[TMP80:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP76]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP78:%.*]] = load i32, ptr addrspace(22) [[TMP80]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP56:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP76]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP78:%.*]] = load i32, ptr addrspace(22) [[TMP56]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP79:%.*]] = add i32 [[TMP5]], 68 -; POST-PROCESS-GLOBAL-NEXT: [[TMP83:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP79]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP81:%.*]] = load i32, ptr addrspace(22) [[TMP83]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP59:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP79]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP81:%.*]] = load i32, ptr addrspace(22) [[TMP59]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP82:%.*]] = add i32 [[TMP5]], 72 -; POST-PROCESS-GLOBAL-NEXT: [[TMP86:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP82]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP84:%.*]] = load i32, ptr addrspace(22) [[TMP86]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP62:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP82]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP84:%.*]] = load i32, ptr addrspace(22) [[TMP62]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP85:%.*]] = add i32 [[TMP5]], 76 -; POST-PROCESS-GLOBAL-NEXT: [[TMP89:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP85]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP87:%.*]] = load i32, ptr addrspace(22) [[TMP89]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP65:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP85]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP87:%.*]] = load i32, ptr addrspace(22) [[TMP65]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP88:%.*]] = add i32 [[TMP5]], 80 -; POST-PROCESS-GLOBAL-NEXT: [[TMP92:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP88]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP90:%.*]] = load i32, ptr addrspace(22) [[TMP92]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP68:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP88]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP90:%.*]] = load i32, ptr addrspace(22) [[TMP68]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP91:%.*]] = add i32 [[TMP5]], 84 -; POST-PROCESS-GLOBAL-NEXT: [[TMP95:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP91]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP93:%.*]] = load i32, ptr addrspace(22) [[TMP95]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP71:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP91]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP93:%.*]] = load i32, ptr addrspace(22) [[TMP71]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP94:%.*]] = add i32 [[TMP5]], 88 -; POST-PROCESS-GLOBAL-NEXT: [[TMP98:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP94]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP96:%.*]] = load i32, ptr addrspace(22) [[TMP98]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP74:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP94]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP96:%.*]] = load i32, ptr addrspace(22) [[TMP74]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP97:%.*]] = add i32 [[TMP5]], 92 -; POST-PROCESS-GLOBAL-NEXT: [[TMP101:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP97]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP99:%.*]] = load i32, ptr addrspace(22) [[TMP101]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP77:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP97]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP99:%.*]] = load i32, ptr addrspace(22) [[TMP77]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP100:%.*]] = add i32 [[TMP5]], 96 -; POST-PROCESS-GLOBAL-NEXT: [[TMP104:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP100]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP102:%.*]] = load i32, ptr addrspace(22) [[TMP104]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP80:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP100]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP102:%.*]] = load i32, ptr addrspace(22) [[TMP80]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP103:%.*]] = add i32 [[TMP5]], 100 -; POST-PROCESS-GLOBAL-NEXT: [[TMP107:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP103]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP105:%.*]] = load i32, ptr addrspace(22) [[TMP107]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP83:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP103]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP105:%.*]] = load i32, ptr addrspace(22) [[TMP83]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP106:%.*]] = add i32 [[TMP5]], 104 -; POST-PROCESS-GLOBAL-NEXT: [[TMP111:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP106]] -; POST-PROCESS-GLOBAL-NEXT: [[TMP108:%.*]] = load i32, ptr addrspace(22) [[TMP111]], align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP109:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP1]], 0 +; POST-PROCESS-GLOBAL-NEXT: [[TMP86:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP106]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP108:%.*]] = load i32, ptr addrspace(22) [[TMP86]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP92:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [23 x i32], [30 x i32] } [[TMP1]], 0 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP92]], 0 ; POST-PROCESS-GLOBAL-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; POST-PROCESS-GLOBAL-NEXT: [[TMP110:%.*]] = add i32 [[TMP4]], 116 -; POST-PROCESS-GLOBAL-NEXT: [[TMP171:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP110]] -; POST-PROCESS-GLOBAL-NEXT: [[DOTRELOAD:%.*]] = load i32, ptr addrspace(22) [[TMP171]], align 4 +; POST-PROCESS-GLOBAL-NEXT: [[TMP89:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP110]] +; POST-PROCESS-GLOBAL-NEXT: [[TMP114:%.*]] = load i32, ptr addrspace(22) [[TMP89]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP112:%.*]] = add i32 [[TMP4]], 108 ; POST-PROCESS-GLOBAL-NEXT: [[TMP113:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP112]] ; POST-PROCESS-GLOBAL-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(22) [[TMP113]], align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[DOTRELOAD]], ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: [[TMP114:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP6]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP7]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP8]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP9]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 10), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP10]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 11), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP11]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 12), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP12]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 13), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP13]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 14), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP14]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 15), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP15]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 16), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP16]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 17), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP17]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 18), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP18]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 19), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP19]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 20), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP20]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 21), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP21]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 22), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP22]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 23), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP23]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 24), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP24]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 25), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP25]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 26), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP26]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 27), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP27]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 28), align 4 -; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP28]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 29), align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP115:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP114]] ; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP30]], ptr addrspace(22) [[TMP115]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP116:%.*]] = add i32 [[TMP114]], 4 @@ -3325,10 +3440,40 @@ attributes #3 = { nounwind } ; POST-PROCESS-GLOBAL-NEXT: [[TMP167:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP29]], i32 [[TMP166]] ; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP108]], ptr addrspace(22) [[TMP167]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT]], 0 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_0_INSERT1:%.*]] = insertvalue [30 x i32] poison, i32 [[TMP114]], 0 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT1]], i32 [[DOTFCA_1_EXTRACT]], 1 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT]], i32 [[DOTFCA_2_EXTRACT]], 2 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT]], i32 [[DOTFCA_3_EXTRACT]], 3 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT]], i32 [[DOTFCA_4_EXTRACT]], 4 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT]], i32 [[DOTFCA_5_EXTRACT]], 5 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT]], i32 [[DOTFCA_6_EXTRACT]], 6 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT]], i32 [[DOTFCA_7_EXTRACT]], 7 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT]], i32 [[DOTFCA_8_EXTRACT]], 8 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT]], i32 [[DOTFCA_9_EXTRACT]], 9 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_10_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT]], i32 [[DOTFCA_10_EXTRACT]], 10 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_11_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT]], i32 [[DOTFCA_11_EXTRACT]], 11 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_12_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT]], i32 [[DOTFCA_12_EXTRACT]], 12 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_13_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT]], i32 [[DOTFCA_13_EXTRACT]], 13 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_14_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT]], i32 [[DOTFCA_14_EXTRACT]], 14 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_15_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT]], i32 [[DOTFCA_15_EXTRACT]], 15 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_16_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT]], i32 [[DOTFCA_16_EXTRACT]], 16 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_17_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT]], i32 [[DOTFCA_17_EXTRACT]], 17 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_18_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT]], i32 [[DOTFCA_18_EXTRACT]], 18 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_19_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT]], i32 [[DOTFCA_19_EXTRACT]], 19 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_20_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT]], i32 [[DOTFCA_20_EXTRACT]], 20 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_21_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT]], i32 [[DOTFCA_21_EXTRACT]], 21 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_22_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT]], i32 [[DOTFCA_22_EXTRACT]], 22 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_23_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT]], i32 [[TMP22]], 23 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_24_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT]], i32 [[TMP23]], 24 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_25_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT]], i32 [[DOTFCA_25_EXTRACT]], 25 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_26_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT]], i32 [[DOTFCA_26_EXTRACT]], 26 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_27_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT]], i32 [[DOTFCA_27_EXTRACT]], 27 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_28_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT]], i32 [[DOTFCA_28_EXTRACT]], 28 +; POST-PROCESS-GLOBAL-NEXT: [[DOTFCA_29_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT]], i32 [[DOTFCA_29_EXTRACT]], 29 ; POST-PROCESS-GLOBAL-NEXT: [[TMP168:%.*]] = load i32, ptr [[CSP]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP169:%.*]] = add i32 [[TMP168]], -120 ; POST-PROCESS-GLOBAL-NEXT: store i32 [[TMP169]], ptr [[CSP]], align 4 ; POST-PROCESS-GLOBAL-NEXT: [[TMP170:%.*]] = load i32, ptr [[CSP]], align 4 -; POST-PROCESS-GLOBAL-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP170]], i64 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META18]] +; POST-PROCESS-GLOBAL-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP170]], i64 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]], [23 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]) ; POST-PROCESS-GLOBAL-NEXT: unreachable ; diff --git a/llvmraytracing/test/dx/register-buffer.ll b/llvmraytracing/test/dx/register-buffer.ll deleted file mode 100644 index 1e75847e28..0000000000 --- a/llvmraytracing/test/dx/register-buffer.ll +++ /dev/null @@ -1,956 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals --version 3 -; RUN: opt --verify-each -passes='register-buffer,lint,instsimplify' -S %s --lint-abort-on-error | FileCheck %s - -target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:16-i32:32-i64:32-f16:16-f32:32-f64:32-v8:8-v16:16-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" - -@GLOBAL = external global [20 x i32], !registerbuffer !1 -@GLOBAL_NO_REGS = external global [20 x i32], !registerbuffer !2 -!1 = !{ i32 15, i32 32 } -!2 = !{ i32 0, i32 32 } - -%complex_type = type { %complex_type*, half, %complex_type addrspace(1)* } - -declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) -declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) - -;. -; CHECK: @GLOBAL = external addrspace(20) global [15 x i32] -; CHECK: @GLOBAL_NO_REGS = external addrspace(20) global [0 x i32] -;. -define i32 @load_i32_reg() { -; CHECK-LABEL: define i32 @load_i32_reg() { -; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i64 40) to ptr addrspace(20)), align 4 -; CHECK-NEXT: ret i32 [[TMP1]] -; - %addr = getelementptr [20 x i32], [20 x i32]* @GLOBAL, i32 0, i32 10 - %val = load i32, i32* %addr - ret i32 %val -} - -define i32 @load_i32_reg2() { -; CHECK-LABEL: define i32 @load_i32_reg2() { -; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i64 56) to ptr addrspace(20)), align 4 -; CHECK-NEXT: ret i32 [[TMP1]] -; - %addr = getelementptr [20 x i32], [20 x i32]* @GLOBAL, i32 0, i32 14 - %val = load i32, i32* %addr - ret i32 %val -} - -define i32 @load_i32_mem() { -; CHECK-LABEL: define i32 @load_i32_mem() { -; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @registerbuffer.getpointer.a15i32(ptr addrspace(20) @GLOBAL) -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr addrspace(32) [[TMP1]], i32 -15 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [20 x i32], ptr addrspace(32) [[TMP2]], i32 0, i32 15 -; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(32) [[TMP3]], align 4 -; CHECK-NEXT: ret i32 [[TMP4]] -; - %addr = getelementptr [20 x i32], [20 x i32]* @GLOBAL, i32 0, i32 15 - %val = load i32, i32* %addr - ret i32 %val -} - -define i32 @load_i32_dyn(i32 %i) { -; CHECK-LABEL: define i32 @load_i32_dyn( -; CHECK-SAME: i32 [[I:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @registerbuffer.getpointer.a15i32(ptr addrspace(20) @GLOBAL) -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr addrspace(32) [[TMP1]], i32 -15 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [20 x i32], ptr addrspace(32) [[TMP2]], i32 0, i32 [[I]] -; CHECK-NEXT: [[ADDR:%.*]] = getelementptr [20 x i32], ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i32 0, i32 [[I]] -; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(20) -; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr addrspace(20) [[TMP4]] to i32 -; CHECK-NEXT: [[TMP6:%.*]] = sub i32 [[TMP5]], ptrtoint (ptr addrspace(20) @GLOBAL to i32) -; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP6]], 60 -; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP10:%.*]] -; CHECK: 8: -; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) [[TMP4]], align 4 -; CHECK-NEXT: br label [[TMP12:%.*]] -; CHECK: 10: -; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(32) [[TMP3]], align 4 -; CHECK-NEXT: br label [[TMP12]] -; CHECK: 12: -; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ [[TMP9]], [[TMP8]] ], [ [[TMP11]], [[TMP10]] ] -; CHECK-NEXT: ret i32 [[TMP13]] -; - %addr = getelementptr [20 x i32], [20 x i32]* @GLOBAL, i32 0, i32 %i - %val = load i32, i32* %addr - ret i32 %val -} - -define i32 @load_i32_dyn_no_regs(i32 %i) { -; CHECK-LABEL: define i32 @load_i32_dyn_no_regs( -; CHECK-SAME: i32 [[I:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @registerbuffer.getpointer.a0i32(ptr addrspace(20) @GLOBAL_NO_REGS) -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr [20 x i32], ptr addrspace(32) [[TMP1]], i32 0, i32 [[I]] -; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(32) [[TMP2]], align 4 -; CHECK-NEXT: ret i32 [[TMP3]] -; - %addr = getelementptr [20 x i32], [20 x i32]* @GLOBAL_NO_REGS, i32 0, i32 %i - %val = load i32, i32* %addr - ret i32 %val -} - -define i64 @load_i64_reg() { -; CHECK-LABEL: define i64 @load_i64_reg() { -; CHECK-NEXT: [[VAL_FCA_ALLOCA:%.*]] = alloca <{ i32, i32 }>, align 8 -; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i64 40) to ptr addrspace(20)), align 4 -; CHECK-NEXT: [[VAL_FCA_INSERT_0:%.*]] = insertvalue <{ i32, i32 }> poison, i32 [[TMP1]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i64 44) to ptr addrspace(20)), align 4 -; CHECK-NEXT: [[VAL_FCA_INSERT_1:%.*]] = insertvalue <{ i32, i32 }> [[VAL_FCA_INSERT_0]], i32 [[TMP2]], 1 -; CHECK-NEXT: store <{ i32, i32 }> [[VAL_FCA_INSERT_1]], ptr [[VAL_FCA_ALLOCA]], align 1 -; CHECK-NEXT: [[VAL_FCA_ALLOCA_LOAD:%.*]] = load i64, ptr [[VAL_FCA_ALLOCA]], align 8 -; CHECK-NEXT: ret i64 [[VAL_FCA_ALLOCA_LOAD]] -; - %addr = getelementptr [20 x i32], [20 x i32]* @GLOBAL, i32 0, i32 10 - %addr64 = bitcast i32* %addr to i64* - %val = load i64, i64* %addr64 - ret i64 %val -} - -define i64 @load_i64_mem() { -; CHECK-LABEL: define i64 @load_i64_mem() { -; CHECK-NEXT: [[VAL_FCA_ALLOCA:%.*]] = alloca <{ i32, i32 }>, align 8 -; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @registerbuffer.getpointer.a15i32(ptr addrspace(20) @GLOBAL) -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr addrspace(32) [[TMP1]], i32 -15 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [20 x i32], ptr addrspace(32) [[TMP2]], i32 0, i32 14 -; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i64 56) to ptr addrspace(20)), align 4 -; CHECK-NEXT: [[VAL_FCA_INSERT_0:%.*]] = insertvalue <{ i32, i32 }> poison, i32 [[TMP4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr <{ i32, i32 }>, ptr addrspace(32) [[TMP3]], i32 0, i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(32) [[TMP5]], align 4 -; CHECK-NEXT: [[VAL_FCA_INSERT_1:%.*]] = insertvalue <{ i32, i32 }> [[VAL_FCA_INSERT_0]], i32 [[TMP6]], 1 -; CHECK-NEXT: store <{ i32, i32 }> [[VAL_FCA_INSERT_1]], ptr [[VAL_FCA_ALLOCA]], align 1 -; CHECK-NEXT: [[VAL_FCA_ALLOCA_LOAD:%.*]] = load i64, ptr [[VAL_FCA_ALLOCA]], align 8 -; CHECK-NEXT: ret i64 [[VAL_FCA_ALLOCA_LOAD]] -; - %addr = getelementptr [20 x i32], [20 x i32]* @GLOBAL, i32 0, i32 14 - %addr64 = bitcast i32* %addr to i64* - %val = load i64, i64* %addr64 - ret i64 %val -} - -define i64 @load_i64_dyn(i32 %i) { -; CHECK-LABEL: define i64 @load_i64_dyn( -; CHECK-SAME: i32 [[I:%.*]]) { -; CHECK-NEXT: [[VAL_FCA_ALLOCA:%.*]] = alloca <{ i32, i32 }>, align 8 -; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @registerbuffer.getpointer.a15i32(ptr addrspace(20) @GLOBAL) -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr addrspace(32) [[TMP1]], i32 -15 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [20 x i32], ptr addrspace(32) [[TMP2]], i32 0, i32 [[I]] -; CHECK-NEXT: [[ADDR:%.*]] = getelementptr [20 x i32], ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i32 0, i32 [[I]] -; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(20) -; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr addrspace(20) [[TMP4]] to i32 -; CHECK-NEXT: [[TMP6:%.*]] = sub i32 [[TMP5]], ptrtoint (ptr addrspace(20) @GLOBAL to i32) -; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP6]], 60 -; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP10:%.*]] -; CHECK: 8: -; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) [[TMP4]], align 4 -; CHECK-NEXT: br label [[TMP12:%.*]] -; CHECK: 10: -; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(32) [[TMP3]], align 4 -; CHECK-NEXT: br label [[TMP12]] -; CHECK: 12: -; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ [[TMP9]], [[TMP8]] ], [ [[TMP11]], [[TMP10]] ] -; CHECK-NEXT: [[VAL_FCA_INSERT_0:%.*]] = insertvalue <{ i32, i32 }> poison, i32 [[TMP13]], 0 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr <{ i32, i32 }>, ptr addrspace(32) [[TMP3]], i32 0, i32 1 -; CHECK-NEXT: [[VAL_FCA_GEP_1:%.*]] = getelementptr inbounds <{ i32, i32 }>, ptr [[ADDR]], i32 0, i32 1 -; CHECK-NEXT: [[TMP15:%.*]] = addrspacecast ptr [[VAL_FCA_GEP_1]] to ptr addrspace(20) -; CHECK-NEXT: [[TMP16:%.*]] = ptrtoint ptr addrspace(20) [[TMP15]] to i32 -; CHECK-NEXT: [[TMP17:%.*]] = sub i32 [[TMP16]], ptrtoint (ptr addrspace(20) @GLOBAL to i32) -; CHECK-NEXT: [[TMP18:%.*]] = icmp ult i32 [[TMP17]], 60 -; CHECK-NEXT: br i1 [[TMP18]], label [[TMP19:%.*]], label [[TMP21:%.*]] -; CHECK: 19: -; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(20) [[TMP15]], align 4 -; CHECK-NEXT: br label [[TMP23:%.*]] -; CHECK: 21: -; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(32) [[TMP14]], align 4 -; CHECK-NEXT: br label [[TMP23]] -; CHECK: 23: -; CHECK-NEXT: [[TMP24:%.*]] = phi i32 [ [[TMP20]], [[TMP19]] ], [ [[TMP22]], [[TMP21]] ] -; CHECK-NEXT: [[VAL_FCA_INSERT_1:%.*]] = insertvalue <{ i32, i32 }> [[VAL_FCA_INSERT_0]], i32 [[TMP24]], 1 -; CHECK-NEXT: store <{ i32, i32 }> [[VAL_FCA_INSERT_1]], ptr [[VAL_FCA_ALLOCA]], align 1 -; CHECK-NEXT: [[VAL_FCA_ALLOCA_LOAD:%.*]] = load i64, ptr [[VAL_FCA_ALLOCA]], align 8 -; CHECK-NEXT: ret i64 [[VAL_FCA_ALLOCA_LOAD]] -; - %addr = getelementptr [20 x i32], [20 x i32]* @GLOBAL, i32 0, i32 %i - %addr64 = bitcast i32* %addr to i64* - %val = load i64, i64* %addr64 - ret i64 %val -} - -define i32 @store_i32_reg(i32 %val) { -; CHECK-LABEL: define i32 @store_i32_reg( -; CHECK-SAME: i32 [[VAL:%.*]]) { -; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(20) addrspacecast (ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i64 40) to ptr addrspace(20)), align 4 -; CHECK-NEXT: ret i32 [[VAL]] -; - %addr = getelementptr [20 x i32], [20 x i32]* @GLOBAL, i32 0, i32 10 - store i32 %val, i32* %addr - ret i32 %val -} - -define i32 @store_i32_reg2(i32 %val) { -; CHECK-LABEL: define i32 @store_i32_reg2( -; CHECK-SAME: i32 [[VAL:%.*]]) { -; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(20) addrspacecast (ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i64 56) to ptr addrspace(20)), align 4 -; CHECK-NEXT: ret i32 [[VAL]] -; - %addr = getelementptr [20 x i32], [20 x i32]* @GLOBAL, i32 0, i32 14 - store i32 %val, i32* %addr - ret i32 %val -} - -define i32 @store_i32_mem(i32 %val) { -; CHECK-LABEL: define i32 @store_i32_mem( -; CHECK-SAME: i32 [[VAL:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @registerbuffer.getpointer.a15i32(ptr addrspace(20) @GLOBAL) -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr addrspace(32) [[TMP1]], i32 -15 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [20 x i32], ptr addrspace(32) [[TMP2]], i32 0, i32 15 -; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(32) [[TMP3]], align 4 -; CHECK-NEXT: ret i32 [[VAL]] -; - %addr = getelementptr [20 x i32], [20 x i32]* @GLOBAL, i32 0, i32 15 - store i32 %val, i32* %addr - ret i32 %val -} - -define i32 @store_i32_dyn(i32 %val, i32 %i) { -; CHECK-LABEL: define i32 @store_i32_dyn( -; CHECK-SAME: i32 [[VAL:%.*]], i32 [[I:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @registerbuffer.getpointer.a15i32(ptr addrspace(20) @GLOBAL) -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr addrspace(32) [[TMP1]], i32 -15 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [20 x i32], ptr addrspace(32) [[TMP2]], i32 0, i32 [[I]] -; CHECK-NEXT: [[ADDR:%.*]] = getelementptr [20 x i32], ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i32 0, i32 [[I]] -; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(20) -; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr addrspace(20) [[TMP4]] to i32 -; CHECK-NEXT: [[TMP6:%.*]] = sub i32 [[TMP5]], ptrtoint (ptr addrspace(20) @GLOBAL to i32) -; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP6]], 60 -; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]] -; CHECK: 8: -; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(20) [[TMP4]], align 4 -; CHECK-NEXT: br label [[TMP10:%.*]] -; CHECK: 9: -; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(32) [[TMP3]], align 4 -; CHECK-NEXT: br label [[TMP10]] -; CHECK: 10: -; CHECK-NEXT: ret i32 [[VAL]] -; - %addr = getelementptr [20 x i32], [20 x i32]* @GLOBAL, i32 0, i32 %i - store i32 %val, i32* %addr - ret i32 %val -} - -define i32 @load_unaligned_i32_reg() { -; CHECK-LABEL: define i32 @load_unaligned_i32_reg() { -; CHECK-NEXT: [[VAL_FCA_ALLOCA:%.*]] = alloca <{ i8, i8, i8, i8 }>, align 8 -; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr addrspace(20) addrspacecast (ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i64 40) to ptr addrspace(20)), align 2 -; CHECK-NEXT: [[VAL_FCA_INSERT_0:%.*]] = insertvalue <{ i8, i8, i8, i8 }> poison, i8 [[TMP1]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr addrspace(20) addrspacecast (ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i64 41) to ptr addrspace(20)), align 1 -; CHECK-NEXT: [[VAL_FCA_INSERT_1:%.*]] = insertvalue <{ i8, i8, i8, i8 }> [[VAL_FCA_INSERT_0]], i8 [[TMP2]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr addrspace(20) addrspacecast (ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i64 42) to ptr addrspace(20)), align 2 -; CHECK-NEXT: [[VAL_FCA_INSERT_2:%.*]] = insertvalue <{ i8, i8, i8, i8 }> [[VAL_FCA_INSERT_1]], i8 [[TMP3]], 2 -; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr addrspace(20) addrspacecast (ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i64 43) to ptr addrspace(20)), align 1 -; CHECK-NEXT: [[VAL_FCA_INSERT_3:%.*]] = insertvalue <{ i8, i8, i8, i8 }> [[VAL_FCA_INSERT_2]], i8 [[TMP4]], 3 -; CHECK-NEXT: store <{ i8, i8, i8, i8 }> [[VAL_FCA_INSERT_3]], ptr [[VAL_FCA_ALLOCA]], align 1 -; CHECK-NEXT: [[VAL_FCA_ALLOCA_LOAD:%.*]] = load i32, ptr [[VAL_FCA_ALLOCA]], align 8 -; CHECK-NEXT: ret i32 [[VAL_FCA_ALLOCA_LOAD]] -; - %addr = getelementptr [20 x i32], [20 x i32]* @GLOBAL, i32 0, i32 10 - %val = load i32, i32* %addr, align 2 - ret i32 %val -} - -define i32 @load_unaligned_i32_reg2() { -; CHECK-LABEL: define i32 @load_unaligned_i32_reg2() { -; CHECK-NEXT: [[VAL_FCA_ALLOCA:%.*]] = alloca <{ i8, i8, i8, i8 }>, align 8 -; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr addrspace(20) addrspacecast (ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i64 56) to ptr addrspace(20)), align 2 -; CHECK-NEXT: [[VAL_FCA_INSERT_0:%.*]] = insertvalue <{ i8, i8, i8, i8 }> poison, i8 [[TMP1]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr addrspace(20) addrspacecast (ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i64 57) to ptr addrspace(20)), align 1 -; CHECK-NEXT: [[VAL_FCA_INSERT_1:%.*]] = insertvalue <{ i8, i8, i8, i8 }> [[VAL_FCA_INSERT_0]], i8 [[TMP2]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr addrspace(20) addrspacecast (ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i64 58) to ptr addrspace(20)), align 2 -; CHECK-NEXT: [[VAL_FCA_INSERT_2:%.*]] = insertvalue <{ i8, i8, i8, i8 }> [[VAL_FCA_INSERT_1]], i8 [[TMP3]], 2 -; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr addrspace(20) addrspacecast (ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i64 59) to ptr addrspace(20)), align 1 -; CHECK-NEXT: [[VAL_FCA_INSERT_3:%.*]] = insertvalue <{ i8, i8, i8, i8 }> [[VAL_FCA_INSERT_2]], i8 [[TMP4]], 3 -; CHECK-NEXT: store <{ i8, i8, i8, i8 }> [[VAL_FCA_INSERT_3]], ptr [[VAL_FCA_ALLOCA]], align 1 -; CHECK-NEXT: [[VAL_FCA_ALLOCA_LOAD:%.*]] = load i32, ptr [[VAL_FCA_ALLOCA]], align 8 -; CHECK-NEXT: ret i32 [[VAL_FCA_ALLOCA_LOAD]] -; - %addr = getelementptr [20 x i32], [20 x i32]* @GLOBAL, i32 0, i32 14 - %val = load i32, i32* %addr, align 2 - ret i32 %val -} - -define i32 @load_unaligned_i32_mem() { -; CHECK-LABEL: define i32 @load_unaligned_i32_mem() { -; CHECK-NEXT: [[VAL_FCA_ALLOCA:%.*]] = alloca <{ i8, i8, i8, i8 }>, align 8 -; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @registerbuffer.getpointer.a15i32(ptr addrspace(20) @GLOBAL) -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr addrspace(32) [[TMP1]], i32 -15 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [20 x i32], ptr addrspace(32) [[TMP2]], i32 0, i32 15 -; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr addrspace(32) [[TMP3]], align 2 -; CHECK-NEXT: [[VAL_FCA_INSERT_0:%.*]] = insertvalue <{ i8, i8, i8, i8 }> poison, i8 [[TMP4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr <{ i8, i8, i8, i8 }>, ptr addrspace(32) [[TMP3]], i32 0, i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr addrspace(32) [[TMP5]], align 1 -; CHECK-NEXT: [[VAL_FCA_INSERT_1:%.*]] = insertvalue <{ i8, i8, i8, i8 }> [[VAL_FCA_INSERT_0]], i8 [[TMP6]], 1 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <{ i8, i8, i8, i8 }>, ptr addrspace(32) [[TMP3]], i32 0, i32 2 -; CHECK-NEXT: [[TMP8:%.*]] = load i8, ptr addrspace(32) [[TMP7]], align 2 -; CHECK-NEXT: [[VAL_FCA_INSERT_2:%.*]] = insertvalue <{ i8, i8, i8, i8 }> [[VAL_FCA_INSERT_1]], i8 [[TMP8]], 2 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr <{ i8, i8, i8, i8 }>, ptr addrspace(32) [[TMP3]], i32 0, i32 3 -; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr addrspace(32) [[TMP9]], align 1 -; CHECK-NEXT: [[VAL_FCA_INSERT_3:%.*]] = insertvalue <{ i8, i8, i8, i8 }> [[VAL_FCA_INSERT_2]], i8 [[TMP10]], 3 -; CHECK-NEXT: store <{ i8, i8, i8, i8 }> [[VAL_FCA_INSERT_3]], ptr [[VAL_FCA_ALLOCA]], align 1 -; CHECK-NEXT: [[VAL_FCA_ALLOCA_LOAD:%.*]] = load i32, ptr [[VAL_FCA_ALLOCA]], align 8 -; CHECK-NEXT: ret i32 [[VAL_FCA_ALLOCA_LOAD]] -; - %addr = getelementptr [20 x i32], [20 x i32]* @GLOBAL, i32 0, i32 15 - %val = load i32, i32* %addr, align 2 - ret i32 %val -} - -define i32 @load_unaligned_i32_dyn(i32 %i) { -; CHECK-LABEL: define i32 @load_unaligned_i32_dyn( -; CHECK-SAME: i32 [[I:%.*]]) { -; CHECK-NEXT: [[VAL_FCA_ALLOCA:%.*]] = alloca <{ i8, i8, i8, i8 }>, align 8 -; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @registerbuffer.getpointer.a15i32(ptr addrspace(20) @GLOBAL) -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr addrspace(32) [[TMP1]], i32 -15 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [20 x i32], ptr addrspace(32) [[TMP2]], i32 0, i32 [[I]] -; CHECK-NEXT: [[ADDR:%.*]] = getelementptr [20 x i32], ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i32 0, i32 [[I]] -; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(20) -; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr addrspace(20) [[TMP4]] to i32 -; CHECK-NEXT: [[TMP6:%.*]] = sub i32 [[TMP5]], ptrtoint (ptr addrspace(20) @GLOBAL to i32) -; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP6]], 60 -; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP10:%.*]] -; CHECK: 8: -; CHECK-NEXT: [[TMP9:%.*]] = load i8, ptr addrspace(20) [[TMP4]], align 2 -; CHECK-NEXT: br label [[TMP12:%.*]] -; CHECK: 10: -; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr addrspace(32) [[TMP3]], align 2 -; CHECK-NEXT: br label [[TMP12]] -; CHECK: 12: -; CHECK-NEXT: [[TMP13:%.*]] = phi i8 [ [[TMP9]], [[TMP8]] ], [ [[TMP11]], [[TMP10]] ] -; CHECK-NEXT: [[VAL_FCA_INSERT_0:%.*]] = insertvalue <{ i8, i8, i8, i8 }> poison, i8 [[TMP13]], 0 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr <{ i8, i8, i8, i8 }>, ptr addrspace(32) [[TMP3]], i32 0, i32 1 -; CHECK-NEXT: [[VAL_FCA_GEP_1:%.*]] = getelementptr inbounds <{ i8, i8, i8, i8 }>, ptr [[ADDR]], i32 0, i32 1 -; CHECK-NEXT: [[TMP15:%.*]] = addrspacecast ptr [[VAL_FCA_GEP_1]] to ptr addrspace(20) -; CHECK-NEXT: [[TMP16:%.*]] = ptrtoint ptr addrspace(20) [[TMP15]] to i32 -; CHECK-NEXT: [[TMP17:%.*]] = sub i32 [[TMP16]], ptrtoint (ptr addrspace(20) @GLOBAL to i32) -; CHECK-NEXT: [[TMP18:%.*]] = icmp ult i32 [[TMP17]], 60 -; CHECK-NEXT: br i1 [[TMP18]], label [[TMP19:%.*]], label [[TMP21:%.*]] -; CHECK: 19: -; CHECK-NEXT: [[TMP20:%.*]] = load i8, ptr addrspace(20) [[TMP15]], align 1 -; CHECK-NEXT: br label [[TMP23:%.*]] -; CHECK: 21: -; CHECK-NEXT: [[TMP22:%.*]] = load i8, ptr addrspace(32) [[TMP14]], align 1 -; CHECK-NEXT: br label [[TMP23]] -; CHECK: 23: -; CHECK-NEXT: [[TMP24:%.*]] = phi i8 [ [[TMP20]], [[TMP19]] ], [ [[TMP22]], [[TMP21]] ] -; CHECK-NEXT: [[VAL_FCA_INSERT_1:%.*]] = insertvalue <{ i8, i8, i8, i8 }> [[VAL_FCA_INSERT_0]], i8 [[TMP24]], 1 -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr <{ i8, i8, i8, i8 }>, ptr addrspace(32) [[TMP3]], i32 0, i32 2 -; CHECK-NEXT: [[VAL_FCA_GEP_2:%.*]] = getelementptr inbounds <{ i8, i8, i8, i8 }>, ptr [[ADDR]], i32 0, i32 2 -; CHECK-NEXT: [[TMP26:%.*]] = addrspacecast ptr [[VAL_FCA_GEP_2]] to ptr addrspace(20) -; CHECK-NEXT: [[TMP27:%.*]] = ptrtoint ptr addrspace(20) [[TMP26]] to i32 -; CHECK-NEXT: [[TMP28:%.*]] = sub i32 [[TMP27]], ptrtoint (ptr addrspace(20) @GLOBAL to i32) -; CHECK-NEXT: [[TMP29:%.*]] = icmp ult i32 [[TMP28]], 60 -; CHECK-NEXT: br i1 [[TMP29]], label [[TMP30:%.*]], label [[TMP32:%.*]] -; CHECK: 30: -; CHECK-NEXT: [[TMP31:%.*]] = load i8, ptr addrspace(20) [[TMP26]], align 2 -; CHECK-NEXT: br label [[TMP34:%.*]] -; CHECK: 32: -; CHECK-NEXT: [[TMP33:%.*]] = load i8, ptr addrspace(32) [[TMP25]], align 2 -; CHECK-NEXT: br label [[TMP34]] -; CHECK: 34: -; CHECK-NEXT: [[TMP35:%.*]] = phi i8 [ [[TMP31]], [[TMP30]] ], [ [[TMP33]], [[TMP32]] ] -; CHECK-NEXT: [[VAL_FCA_INSERT_2:%.*]] = insertvalue <{ i8, i8, i8, i8 }> [[VAL_FCA_INSERT_1]], i8 [[TMP35]], 2 -; CHECK-NEXT: [[TMP36:%.*]] = getelementptr <{ i8, i8, i8, i8 }>, ptr addrspace(32) [[TMP3]], i32 0, i32 3 -; CHECK-NEXT: [[VAL_FCA_GEP_3:%.*]] = getelementptr inbounds <{ i8, i8, i8, i8 }>, ptr [[ADDR]], i32 0, i32 3 -; CHECK-NEXT: [[TMP37:%.*]] = addrspacecast ptr [[VAL_FCA_GEP_3]] to ptr addrspace(20) -; CHECK-NEXT: [[TMP38:%.*]] = ptrtoint ptr addrspace(20) [[TMP37]] to i32 -; CHECK-NEXT: [[TMP39:%.*]] = sub i32 [[TMP38]], ptrtoint (ptr addrspace(20) @GLOBAL to i32) -; CHECK-NEXT: [[TMP40:%.*]] = icmp ult i32 [[TMP39]], 60 -; CHECK-NEXT: br i1 [[TMP40]], label [[TMP41:%.*]], label [[TMP43:%.*]] -; CHECK: 41: -; CHECK-NEXT: [[TMP42:%.*]] = load i8, ptr addrspace(20) [[TMP37]], align 1 -; CHECK-NEXT: br label [[TMP45:%.*]] -; CHECK: 43: -; CHECK-NEXT: [[TMP44:%.*]] = load i8, ptr addrspace(32) [[TMP36]], align 1 -; CHECK-NEXT: br label [[TMP45]] -; CHECK: 45: -; CHECK-NEXT: [[TMP46:%.*]] = phi i8 [ [[TMP42]], [[TMP41]] ], [ [[TMP44]], [[TMP43]] ] -; CHECK-NEXT: [[VAL_FCA_INSERT_3:%.*]] = insertvalue <{ i8, i8, i8, i8 }> [[VAL_FCA_INSERT_2]], i8 [[TMP46]], 3 -; CHECK-NEXT: store <{ i8, i8, i8, i8 }> [[VAL_FCA_INSERT_3]], ptr [[VAL_FCA_ALLOCA]], align 1 -; CHECK-NEXT: [[VAL_FCA_ALLOCA_LOAD:%.*]] = load i32, ptr [[VAL_FCA_ALLOCA]], align 8 -; CHECK-NEXT: ret i32 [[VAL_FCA_ALLOCA_LOAD]] -; - %addr = getelementptr [20 x i32], [20 x i32]* @GLOBAL, i32 0, i32 %i - %val = load i32, i32* %addr, align 2 - ret i32 %val -} - -define i32 @store_unaligned_i32_reg(i32 %val) { -; CHECK-LABEL: define i32 @store_unaligned_i32_reg( -; CHECK-SAME: i32 [[VAL:%.*]]) { -; CHECK-NEXT: [[VAL_FCA_ALLOCA:%.*]] = alloca <{ i8, i8, i8, i8 }>, align 8 -; CHECK-NEXT: store i32 [[VAL]], ptr [[VAL_FCA_ALLOCA]], align 8 -; CHECK-NEXT: [[VAL_FCA_ALLOCA_LOAD:%.*]] = load <{ i8, i8, i8, i8 }>, ptr [[VAL_FCA_ALLOCA]], align 1 -; CHECK-NEXT: [[VAL_FCA_EXTRACT_0:%.*]] = extractvalue <{ i8, i8, i8, i8 }> [[VAL_FCA_ALLOCA_LOAD]], 0 -; CHECK-NEXT: store i8 [[VAL_FCA_EXTRACT_0]], ptr addrspace(20) addrspacecast (ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i64 40) to ptr addrspace(20)), align 2 -; CHECK-NEXT: [[VAL_FCA_EXTRACT_1:%.*]] = extractvalue <{ i8, i8, i8, i8 }> [[VAL_FCA_ALLOCA_LOAD]], 1 -; CHECK-NEXT: store i8 [[VAL_FCA_EXTRACT_1]], ptr addrspace(20) addrspacecast (ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i64 41) to ptr addrspace(20)), align 1 -; CHECK-NEXT: [[VAL_FCA_EXTRACT_2:%.*]] = extractvalue <{ i8, i8, i8, i8 }> [[VAL_FCA_ALLOCA_LOAD]], 2 -; CHECK-NEXT: store i8 [[VAL_FCA_EXTRACT_2]], ptr addrspace(20) addrspacecast (ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i64 42) to ptr addrspace(20)), align 2 -; CHECK-NEXT: [[VAL_FCA_EXTRACT_3:%.*]] = extractvalue <{ i8, i8, i8, i8 }> [[VAL_FCA_ALLOCA_LOAD]], 3 -; CHECK-NEXT: store i8 [[VAL_FCA_EXTRACT_3]], ptr addrspace(20) addrspacecast (ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i64 43) to ptr addrspace(20)), align 1 -; CHECK-NEXT: ret i32 [[VAL]] -; - %addr = getelementptr [20 x i32], [20 x i32]* @GLOBAL, i32 0, i32 10 - store i32 %val, i32* %addr, align 2 - ret i32 %val -} - -define i32 @store_unaligned_i32_mem(i32 %val) { -; CHECK-LABEL: define i32 @store_unaligned_i32_mem( -; CHECK-SAME: i32 [[VAL:%.*]]) { -; CHECK-NEXT: [[VAL_FCA_ALLOCA:%.*]] = alloca <{ i8, i8, i8, i8 }>, align 8 -; CHECK-NEXT: store i32 [[VAL]], ptr [[VAL_FCA_ALLOCA]], align 8 -; CHECK-NEXT: [[VAL_FCA_ALLOCA_LOAD:%.*]] = load <{ i8, i8, i8, i8 }>, ptr [[VAL_FCA_ALLOCA]], align 1 -; CHECK-NEXT: [[VAL_FCA_EXTRACT_0:%.*]] = extractvalue <{ i8, i8, i8, i8 }> [[VAL_FCA_ALLOCA_LOAD]], 0 -; CHECK-NEXT: store i8 [[VAL_FCA_EXTRACT_0]], ptr addrspace(20) addrspacecast (ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i64 56) to ptr addrspace(20)), align 2 -; CHECK-NEXT: [[VAL_FCA_EXTRACT_1:%.*]] = extractvalue <{ i8, i8, i8, i8 }> [[VAL_FCA_ALLOCA_LOAD]], 1 -; CHECK-NEXT: store i8 [[VAL_FCA_EXTRACT_1]], ptr addrspace(20) addrspacecast (ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i64 57) to ptr addrspace(20)), align 1 -; CHECK-NEXT: [[VAL_FCA_EXTRACT_2:%.*]] = extractvalue <{ i8, i8, i8, i8 }> [[VAL_FCA_ALLOCA_LOAD]], 2 -; CHECK-NEXT: store i8 [[VAL_FCA_EXTRACT_2]], ptr addrspace(20) addrspacecast (ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i64 58) to ptr addrspace(20)), align 2 -; CHECK-NEXT: [[VAL_FCA_EXTRACT_3:%.*]] = extractvalue <{ i8, i8, i8, i8 }> [[VAL_FCA_ALLOCA_LOAD]], 3 -; CHECK-NEXT: store i8 [[VAL_FCA_EXTRACT_3]], ptr addrspace(20) addrspacecast (ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i64 59) to ptr addrspace(20)), align 1 -; CHECK-NEXT: ret i32 [[VAL]] -; - %addr = getelementptr [20 x i32], [20 x i32]* @GLOBAL, i32 0, i32 14 - store i32 %val, i32* %addr, align 2 - ret i32 %val -} - -define i32 @store_unaligned_i32_dyn(i32 %val, i32 %i) { -; CHECK-LABEL: define i32 @store_unaligned_i32_dyn( -; CHECK-SAME: i32 [[VAL:%.*]], i32 [[I:%.*]]) { -; CHECK-NEXT: [[VAL_FCA_ALLOCA:%.*]] = alloca <{ i8, i8, i8, i8 }>, align 8 -; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @registerbuffer.getpointer.a15i32(ptr addrspace(20) @GLOBAL) -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr addrspace(32) [[TMP1]], i32 -15 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [20 x i32], ptr addrspace(32) [[TMP2]], i32 0, i32 [[I]] -; CHECK-NEXT: [[ADDR:%.*]] = getelementptr [20 x i32], ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i32 0, i32 [[I]] -; CHECK-NEXT: store i32 [[VAL]], ptr [[VAL_FCA_ALLOCA]], align 8 -; CHECK-NEXT: [[VAL_FCA_ALLOCA_LOAD:%.*]] = load <{ i8, i8, i8, i8 }>, ptr [[VAL_FCA_ALLOCA]], align 1 -; CHECK-NEXT: [[VAL_FCA_EXTRACT_0:%.*]] = extractvalue <{ i8, i8, i8, i8 }> [[VAL_FCA_ALLOCA_LOAD]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(20) -; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr addrspace(20) [[TMP4]] to i32 -; CHECK-NEXT: [[TMP6:%.*]] = sub i32 [[TMP5]], ptrtoint (ptr addrspace(20) @GLOBAL to i32) -; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP6]], 60 -; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]] -; CHECK: 8: -; CHECK-NEXT: store i8 [[VAL_FCA_EXTRACT_0]], ptr addrspace(20) [[TMP4]], align 2 -; CHECK-NEXT: br label [[TMP10:%.*]] -; CHECK: 9: -; CHECK-NEXT: store i8 [[VAL_FCA_EXTRACT_0]], ptr addrspace(32) [[TMP3]], align 2 -; CHECK-NEXT: br label [[TMP10]] -; CHECK: 10: -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr <{ i8, i8, i8, i8 }>, ptr addrspace(32) [[TMP3]], i32 0, i32 1 -; CHECK-NEXT: [[VAL_FCA_GEP_1:%.*]] = getelementptr inbounds <{ i8, i8, i8, i8 }>, ptr [[ADDR]], i32 0, i32 1 -; CHECK-NEXT: [[VAL_FCA_EXTRACT_1:%.*]] = extractvalue <{ i8, i8, i8, i8 }> [[VAL_FCA_ALLOCA_LOAD]], 1 -; CHECK-NEXT: [[TMP12:%.*]] = addrspacecast ptr [[VAL_FCA_GEP_1]] to ptr addrspace(20) -; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint ptr addrspace(20) [[TMP12]] to i32 -; CHECK-NEXT: [[TMP14:%.*]] = sub i32 [[TMP13]], ptrtoint (ptr addrspace(20) @GLOBAL to i32) -; CHECK-NEXT: [[TMP15:%.*]] = icmp ult i32 [[TMP14]], 60 -; CHECK-NEXT: br i1 [[TMP15]], label [[TMP16:%.*]], label [[TMP17:%.*]] -; CHECK: 16: -; CHECK-NEXT: store i8 [[VAL_FCA_EXTRACT_1]], ptr addrspace(20) [[TMP12]], align 1 -; CHECK-NEXT: br label [[TMP18:%.*]] -; CHECK: 17: -; CHECK-NEXT: store i8 [[VAL_FCA_EXTRACT_1]], ptr addrspace(32) [[TMP11]], align 1 -; CHECK-NEXT: br label [[TMP18]] -; CHECK: 18: -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr <{ i8, i8, i8, i8 }>, ptr addrspace(32) [[TMP3]], i32 0, i32 2 -; CHECK-NEXT: [[VAL_FCA_GEP_2:%.*]] = getelementptr inbounds <{ i8, i8, i8, i8 }>, ptr [[ADDR]], i32 0, i32 2 -; CHECK-NEXT: [[VAL_FCA_EXTRACT_2:%.*]] = extractvalue <{ i8, i8, i8, i8 }> [[VAL_FCA_ALLOCA_LOAD]], 2 -; CHECK-NEXT: [[TMP20:%.*]] = addrspacecast ptr [[VAL_FCA_GEP_2]] to ptr addrspace(20) -; CHECK-NEXT: [[TMP21:%.*]] = ptrtoint ptr addrspace(20) [[TMP20]] to i32 -; CHECK-NEXT: [[TMP22:%.*]] = sub i32 [[TMP21]], ptrtoint (ptr addrspace(20) @GLOBAL to i32) -; CHECK-NEXT: [[TMP23:%.*]] = icmp ult i32 [[TMP22]], 60 -; CHECK-NEXT: br i1 [[TMP23]], label [[TMP24:%.*]], label [[TMP25:%.*]] -; CHECK: 24: -; CHECK-NEXT: store i8 [[VAL_FCA_EXTRACT_2]], ptr addrspace(20) [[TMP20]], align 2 -; CHECK-NEXT: br label [[TMP26:%.*]] -; CHECK: 25: -; CHECK-NEXT: store i8 [[VAL_FCA_EXTRACT_2]], ptr addrspace(32) [[TMP19]], align 2 -; CHECK-NEXT: br label [[TMP26]] -; CHECK: 26: -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr <{ i8, i8, i8, i8 }>, ptr addrspace(32) [[TMP3]], i32 0, i32 3 -; CHECK-NEXT: [[VAL_FCA_GEP_3:%.*]] = getelementptr inbounds <{ i8, i8, i8, i8 }>, ptr [[ADDR]], i32 0, i32 3 -; CHECK-NEXT: [[VAL_FCA_EXTRACT_3:%.*]] = extractvalue <{ i8, i8, i8, i8 }> [[VAL_FCA_ALLOCA_LOAD]], 3 -; CHECK-NEXT: [[TMP28:%.*]] = addrspacecast ptr [[VAL_FCA_GEP_3]] to ptr addrspace(20) -; CHECK-NEXT: [[TMP29:%.*]] = ptrtoint ptr addrspace(20) [[TMP28]] to i32 -; CHECK-NEXT: [[TMP30:%.*]] = sub i32 [[TMP29]], ptrtoint (ptr addrspace(20) @GLOBAL to i32) -; CHECK-NEXT: [[TMP31:%.*]] = icmp ult i32 [[TMP30]], 60 -; CHECK-NEXT: br i1 [[TMP31]], label [[TMP32:%.*]], label [[TMP33:%.*]] -; CHECK: 32: -; CHECK-NEXT: store i8 [[VAL_FCA_EXTRACT_3]], ptr addrspace(20) [[TMP28]], align 1 -; CHECK-NEXT: br label [[TMP34:%.*]] -; CHECK: 33: -; CHECK-NEXT: store i8 [[VAL_FCA_EXTRACT_3]], ptr addrspace(32) [[TMP27]], align 1 -; CHECK-NEXT: br label [[TMP34]] -; CHECK: 34: -; CHECK-NEXT: ret i32 [[VAL]] -; - %addr = getelementptr [20 x i32], [20 x i32]* @GLOBAL, i32 0, i32 %i - store i32 %val, i32* %addr, align 2 - ret i32 %val -} - -define %complex_type @load_struct_reg() { -; CHECK-LABEL: define %complex_type @load_struct_reg() { -; CHECK-NEXT: [[VAL_FCA_0_ALLOCA:%.*]] = alloca <{ i32, i32 }>, align 8 -; CHECK-NEXT: [[VAL_FCA_2_ALLOCA:%.*]] = alloca <{ i32, i32 }>, align 8 -; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i64 40) to ptr addrspace(20)), align 4 -; CHECK-NEXT: [[VAL_FCA_0_INSERT_0:%.*]] = insertvalue <{ i32, i32 }> poison, i32 [[TMP1]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i64 44) to ptr addrspace(20)), align 4 -; CHECK-NEXT: [[VAL_FCA_0_INSERT_1:%.*]] = insertvalue <{ i32, i32 }> [[VAL_FCA_0_INSERT_0]], i32 [[TMP2]], 1 -; CHECK-NEXT: store <{ i32, i32 }> [[VAL_FCA_0_INSERT_1]], ptr [[VAL_FCA_0_ALLOCA]], align 1 -; CHECK-NEXT: [[VAL_FCA_0_ALLOCA_LOAD:%.*]] = load ptr, ptr [[VAL_FCA_0_ALLOCA]], align 8 -; CHECK-NEXT: [[VAL_FCA_0_INSERT:%.*]] = insertvalue [[COMPLEX_TYPE:%.*]] poison, ptr [[VAL_FCA_0_ALLOCA_LOAD]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = load half, ptr addrspace(20) addrspacecast (ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i64 48) to ptr addrspace(20)), align 4 -; CHECK-NEXT: [[VAL_FCA_1_INSERT:%.*]] = insertvalue [[COMPLEX_TYPE]] [[VAL_FCA_0_INSERT]], half [[TMP3]], 1 -; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i64 52) to ptr addrspace(20)), align 4 -; CHECK-NEXT: [[VAL_FCA_2_INSERT_0:%.*]] = insertvalue <{ i32, i32 }> poison, i32 [[TMP4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i64 56) to ptr addrspace(20)), align 4 -; CHECK-NEXT: [[VAL_FCA_2_INSERT_1:%.*]] = insertvalue <{ i32, i32 }> [[VAL_FCA_2_INSERT_0]], i32 [[TMP5]], 1 -; CHECK-NEXT: store <{ i32, i32 }> [[VAL_FCA_2_INSERT_1]], ptr [[VAL_FCA_2_ALLOCA]], align 1 -; CHECK-NEXT: [[VAL_FCA_2_ALLOCA_LOAD:%.*]] = load ptr addrspace(1), ptr [[VAL_FCA_2_ALLOCA]], align 8 -; CHECK-NEXT: [[VAL_FCA_2_INSERT:%.*]] = insertvalue [[COMPLEX_TYPE]] [[VAL_FCA_1_INSERT]], ptr addrspace(1) [[VAL_FCA_2_ALLOCA_LOAD]], 2 -; CHECK-NEXT: ret [[COMPLEX_TYPE]] [[VAL_FCA_2_INSERT]] -; - %addr = getelementptr [20 x i32], [20 x i32]* @GLOBAL, i32 0, i32 10 - %addr_type = bitcast i32* %addr to %complex_type* - %val = load %complex_type, %complex_type* %addr_type - ret %complex_type %val -} - -define %complex_type @load_struct_mem() { -; CHECK-LABEL: define %complex_type @load_struct_mem() { -; CHECK-NEXT: [[VAL_FCA_0_ALLOCA:%.*]] = alloca <{ i32, i32 }>, align 8 -; CHECK-NEXT: [[VAL_FCA_2_ALLOCA:%.*]] = alloca <{ i32, i32 }>, align 8 -; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @registerbuffer.getpointer.a15i32(ptr addrspace(20) @GLOBAL) -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr addrspace(32) [[TMP1]], i32 -15 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [20 x i32], ptr addrspace(32) [[TMP2]], i32 0, i32 15 -; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(32) [[TMP3]], align 4 -; CHECK-NEXT: [[VAL_FCA_0_INSERT_0:%.*]] = insertvalue <{ i32, i32 }> poison, i32 [[TMP4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr <{ i32, i32 }>, ptr addrspace(32) [[TMP3]], i32 0, i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(32) [[TMP5]], align 4 -; CHECK-NEXT: [[VAL_FCA_0_INSERT_1:%.*]] = insertvalue <{ i32, i32 }> [[VAL_FCA_0_INSERT_0]], i32 [[TMP6]], 1 -; CHECK-NEXT: store <{ i32, i32 }> [[VAL_FCA_0_INSERT_1]], ptr [[VAL_FCA_0_ALLOCA]], align 1 -; CHECK-NEXT: [[VAL_FCA_0_ALLOCA_LOAD:%.*]] = load ptr, ptr [[VAL_FCA_0_ALLOCA]], align 8 -; CHECK-NEXT: [[VAL_FCA_0_INSERT:%.*]] = insertvalue [[COMPLEX_TYPE:%.*]] poison, ptr [[VAL_FCA_0_ALLOCA_LOAD]], 0 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr [[COMPLEX_TYPE]], ptr addrspace(32) [[TMP3]], i32 0, i32 1 -; CHECK-NEXT: [[TMP8:%.*]] = load half, ptr addrspace(32) [[TMP7]], align 4 -; CHECK-NEXT: [[VAL_FCA_1_INSERT:%.*]] = insertvalue [[COMPLEX_TYPE]] [[VAL_FCA_0_INSERT]], half [[TMP8]], 1 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr [[COMPLEX_TYPE]], ptr addrspace(32) [[TMP3]], i32 0, i32 2 -; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(32) [[TMP9]], align 4 -; CHECK-NEXT: [[VAL_FCA_2_INSERT_0:%.*]] = insertvalue <{ i32, i32 }> poison, i32 [[TMP10]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr <{ i32, i32 }>, ptr addrspace(32) [[TMP9]], i32 0, i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(32) [[TMP11]], align 4 -; CHECK-NEXT: [[VAL_FCA_2_INSERT_1:%.*]] = insertvalue <{ i32, i32 }> [[VAL_FCA_2_INSERT_0]], i32 [[TMP12]], 1 -; CHECK-NEXT: store <{ i32, i32 }> [[VAL_FCA_2_INSERT_1]], ptr [[VAL_FCA_2_ALLOCA]], align 1 -; CHECK-NEXT: [[VAL_FCA_2_ALLOCA_LOAD:%.*]] = load ptr addrspace(1), ptr [[VAL_FCA_2_ALLOCA]], align 8 -; CHECK-NEXT: [[VAL_FCA_2_INSERT:%.*]] = insertvalue [[COMPLEX_TYPE]] [[VAL_FCA_1_INSERT]], ptr addrspace(1) [[VAL_FCA_2_ALLOCA_LOAD]], 2 -; CHECK-NEXT: ret [[COMPLEX_TYPE]] [[VAL_FCA_2_INSERT]] -; - %addr = getelementptr [20 x i32], [20 x i32]* @GLOBAL, i32 0, i32 15 - %addr_type = bitcast i32* %addr to %complex_type* - %val = load %complex_type, %complex_type* %addr_type - ret %complex_type %val -} - -define %complex_type @load_struct_both() { -; CHECK-LABEL: define %complex_type @load_struct_both() { -; CHECK-NEXT: [[VAL_FCA_0_ALLOCA:%.*]] = alloca <{ i32, i32 }>, align 8 -; CHECK-NEXT: [[VAL_FCA_2_ALLOCA:%.*]] = alloca <{ i32, i32 }>, align 8 -; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @registerbuffer.getpointer.a15i32(ptr addrspace(20) @GLOBAL) -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr addrspace(32) [[TMP1]], i32 -15 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [20 x i32], ptr addrspace(32) [[TMP2]], i32 0, i32 13 -; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i64 52) to ptr addrspace(20)), align 4 -; CHECK-NEXT: [[VAL_FCA_0_INSERT_0:%.*]] = insertvalue <{ i32, i32 }> poison, i32 [[TMP4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i64 56) to ptr addrspace(20)), align 4 -; CHECK-NEXT: [[VAL_FCA_0_INSERT_1:%.*]] = insertvalue <{ i32, i32 }> [[VAL_FCA_0_INSERT_0]], i32 [[TMP5]], 1 -; CHECK-NEXT: store <{ i32, i32 }> [[VAL_FCA_0_INSERT_1]], ptr [[VAL_FCA_0_ALLOCA]], align 1 -; CHECK-NEXT: [[VAL_FCA_0_ALLOCA_LOAD:%.*]] = load ptr, ptr [[VAL_FCA_0_ALLOCA]], align 8 -; CHECK-NEXT: [[VAL_FCA_0_INSERT:%.*]] = insertvalue [[COMPLEX_TYPE:%.*]] poison, ptr [[VAL_FCA_0_ALLOCA_LOAD]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr [[COMPLEX_TYPE]], ptr addrspace(32) [[TMP3]], i32 0, i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = load half, ptr addrspace(32) [[TMP6]], align 4 -; CHECK-NEXT: [[VAL_FCA_1_INSERT:%.*]] = insertvalue [[COMPLEX_TYPE]] [[VAL_FCA_0_INSERT]], half [[TMP7]], 1 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr [[COMPLEX_TYPE]], ptr addrspace(32) [[TMP3]], i32 0, i32 2 -; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(32) [[TMP8]], align 4 -; CHECK-NEXT: [[VAL_FCA_2_INSERT_0:%.*]] = insertvalue <{ i32, i32 }> poison, i32 [[TMP9]], 0 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr <{ i32, i32 }>, ptr addrspace(32) [[TMP8]], i32 0, i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(32) [[TMP10]], align 4 -; CHECK-NEXT: [[VAL_FCA_2_INSERT_1:%.*]] = insertvalue <{ i32, i32 }> [[VAL_FCA_2_INSERT_0]], i32 [[TMP11]], 1 -; CHECK-NEXT: store <{ i32, i32 }> [[VAL_FCA_2_INSERT_1]], ptr [[VAL_FCA_2_ALLOCA]], align 1 -; CHECK-NEXT: [[VAL_FCA_2_ALLOCA_LOAD:%.*]] = load ptr addrspace(1), ptr [[VAL_FCA_2_ALLOCA]], align 8 -; CHECK-NEXT: [[VAL_FCA_2_INSERT:%.*]] = insertvalue [[COMPLEX_TYPE]] [[VAL_FCA_1_INSERT]], ptr addrspace(1) [[VAL_FCA_2_ALLOCA_LOAD]], 2 -; CHECK-NEXT: ret [[COMPLEX_TYPE]] [[VAL_FCA_2_INSERT]] -; - %addr = getelementptr [20 x i32], [20 x i32]* @GLOBAL, i32 0, i32 13 - %addr_type = bitcast i32* %addr to %complex_type* - %val = load %complex_type, %complex_type* %addr_type - ret %complex_type %val -} - -define %complex_type @load_struct_dyn(i32 %i) { -; CHECK-LABEL: define %complex_type @load_struct_dyn( -; CHECK-SAME: i32 [[I:%.*]]) { -; CHECK-NEXT: [[VAL_FCA_0_ALLOCA:%.*]] = alloca <{ i32, i32 }>, align 8 -; CHECK-NEXT: [[VAL_FCA_2_ALLOCA:%.*]] = alloca <{ i32, i32 }>, align 8 -; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @registerbuffer.getpointer.a15i32(ptr addrspace(20) @GLOBAL) -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr addrspace(32) [[TMP1]], i32 -15 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [20 x i32], ptr addrspace(32) [[TMP2]], i32 0, i32 [[I]] -; CHECK-NEXT: [[ADDR:%.*]] = getelementptr [20 x i32], ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i32 0, i32 [[I]] -; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(20) -; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr addrspace(20) [[TMP4]] to i32 -; CHECK-NEXT: [[TMP6:%.*]] = sub i32 [[TMP5]], ptrtoint (ptr addrspace(20) @GLOBAL to i32) -; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP6]], 60 -; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP10:%.*]] -; CHECK: 8: -; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) [[TMP4]], align 4 -; CHECK-NEXT: br label [[TMP12:%.*]] -; CHECK: 10: -; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(32) [[TMP3]], align 4 -; CHECK-NEXT: br label [[TMP12]] -; CHECK: 12: -; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ [[TMP9]], [[TMP8]] ], [ [[TMP11]], [[TMP10]] ] -; CHECK-NEXT: [[VAL_FCA_0_INSERT_0:%.*]] = insertvalue <{ i32, i32 }> poison, i32 [[TMP13]], 0 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr <{ i32, i32 }>, ptr addrspace(32) [[TMP3]], i32 0, i32 1 -; CHECK-NEXT: [[VAL_FCA_0_GEP_1:%.*]] = getelementptr inbounds <{ i32, i32 }>, ptr [[ADDR]], i32 0, i32 1 -; CHECK-NEXT: [[TMP15:%.*]] = addrspacecast ptr [[VAL_FCA_0_GEP_1]] to ptr addrspace(20) -; CHECK-NEXT: [[TMP16:%.*]] = ptrtoint ptr addrspace(20) [[TMP15]] to i32 -; CHECK-NEXT: [[TMP17:%.*]] = sub i32 [[TMP16]], ptrtoint (ptr addrspace(20) @GLOBAL to i32) -; CHECK-NEXT: [[TMP18:%.*]] = icmp ult i32 [[TMP17]], 60 -; CHECK-NEXT: br i1 [[TMP18]], label [[TMP19:%.*]], label [[TMP21:%.*]] -; CHECK: 19: -; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(20) [[TMP15]], align 4 -; CHECK-NEXT: br label [[TMP23:%.*]] -; CHECK: 21: -; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(32) [[TMP14]], align 4 -; CHECK-NEXT: br label [[TMP23]] -; CHECK: 23: -; CHECK-NEXT: [[TMP24:%.*]] = phi i32 [ [[TMP20]], [[TMP19]] ], [ [[TMP22]], [[TMP21]] ] -; CHECK-NEXT: [[VAL_FCA_0_INSERT_1:%.*]] = insertvalue <{ i32, i32 }> [[VAL_FCA_0_INSERT_0]], i32 [[TMP24]], 1 -; CHECK-NEXT: store <{ i32, i32 }> [[VAL_FCA_0_INSERT_1]], ptr [[VAL_FCA_0_ALLOCA]], align 1 -; CHECK-NEXT: [[VAL_FCA_0_ALLOCA_LOAD:%.*]] = load ptr, ptr [[VAL_FCA_0_ALLOCA]], align 8 -; CHECK-NEXT: [[VAL_FCA_0_INSERT:%.*]] = insertvalue [[COMPLEX_TYPE:%.*]] poison, ptr [[VAL_FCA_0_ALLOCA_LOAD]], 0 -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr [[COMPLEX_TYPE]], ptr addrspace(32) [[TMP3]], i32 0, i32 1 -; CHECK-NEXT: [[VAL_FCA_1_GEP:%.*]] = getelementptr inbounds [[COMPLEX_TYPE]], ptr [[ADDR]], i32 0, i32 1 -; CHECK-NEXT: [[TMP26:%.*]] = addrspacecast ptr [[VAL_FCA_1_GEP]] to ptr addrspace(20) -; CHECK-NEXT: [[TMP27:%.*]] = ptrtoint ptr addrspace(20) [[TMP26]] to i32 -; CHECK-NEXT: [[TMP28:%.*]] = sub i32 [[TMP27]], ptrtoint (ptr addrspace(20) @GLOBAL to i32) -; CHECK-NEXT: [[TMP29:%.*]] = icmp ult i32 [[TMP28]], 60 -; CHECK-NEXT: br i1 [[TMP29]], label [[TMP30:%.*]], label [[TMP32:%.*]] -; CHECK: 30: -; CHECK-NEXT: [[TMP31:%.*]] = load half, ptr addrspace(20) [[TMP26]], align 4 -; CHECK-NEXT: br label [[TMP34:%.*]] -; CHECK: 32: -; CHECK-NEXT: [[TMP33:%.*]] = load half, ptr addrspace(32) [[TMP25]], align 4 -; CHECK-NEXT: br label [[TMP34]] -; CHECK: 34: -; CHECK-NEXT: [[TMP35:%.*]] = phi half [ [[TMP31]], [[TMP30]] ], [ [[TMP33]], [[TMP32]] ] -; CHECK-NEXT: [[VAL_FCA_1_INSERT:%.*]] = insertvalue [[COMPLEX_TYPE]] [[VAL_FCA_0_INSERT]], half [[TMP35]], 1 -; CHECK-NEXT: [[TMP36:%.*]] = getelementptr [[COMPLEX_TYPE]], ptr addrspace(32) [[TMP3]], i32 0, i32 2 -; CHECK-NEXT: [[VAL_FCA_2_GEP:%.*]] = getelementptr inbounds [[COMPLEX_TYPE]], ptr [[ADDR]], i32 0, i32 2 -; CHECK-NEXT: [[TMP37:%.*]] = addrspacecast ptr [[VAL_FCA_2_GEP]] to ptr addrspace(20) -; CHECK-NEXT: [[TMP38:%.*]] = ptrtoint ptr addrspace(20) [[TMP37]] to i32 -; CHECK-NEXT: [[TMP39:%.*]] = sub i32 [[TMP38]], ptrtoint (ptr addrspace(20) @GLOBAL to i32) -; CHECK-NEXT: [[TMP40:%.*]] = icmp ult i32 [[TMP39]], 60 -; CHECK-NEXT: br i1 [[TMP40]], label [[TMP41:%.*]], label [[TMP43:%.*]] -; CHECK: 41: -; CHECK-NEXT: [[TMP42:%.*]] = load i32, ptr addrspace(20) [[TMP37]], align 4 -; CHECK-NEXT: br label [[TMP45:%.*]] -; CHECK: 43: -; CHECK-NEXT: [[TMP44:%.*]] = load i32, ptr addrspace(32) [[TMP36]], align 4 -; CHECK-NEXT: br label [[TMP45]] -; CHECK: 45: -; CHECK-NEXT: [[TMP46:%.*]] = phi i32 [ [[TMP42]], [[TMP41]] ], [ [[TMP44]], [[TMP43]] ] -; CHECK-NEXT: [[VAL_FCA_2_INSERT_0:%.*]] = insertvalue <{ i32, i32 }> poison, i32 [[TMP46]], 0 -; CHECK-NEXT: [[TMP47:%.*]] = getelementptr <{ i32, i32 }>, ptr addrspace(32) [[TMP36]], i32 0, i32 1 -; CHECK-NEXT: [[VAL_FCA_2_GEP_1:%.*]] = getelementptr inbounds <{ i32, i32 }>, ptr [[VAL_FCA_2_GEP]], i32 0, i32 1 -; CHECK-NEXT: [[TMP48:%.*]] = addrspacecast ptr [[VAL_FCA_2_GEP_1]] to ptr addrspace(20) -; CHECK-NEXT: [[TMP49:%.*]] = ptrtoint ptr addrspace(20) [[TMP48]] to i32 -; CHECK-NEXT: [[TMP50:%.*]] = sub i32 [[TMP49]], ptrtoint (ptr addrspace(20) @GLOBAL to i32) -; CHECK-NEXT: [[TMP51:%.*]] = icmp ult i32 [[TMP50]], 60 -; CHECK-NEXT: br i1 [[TMP51]], label [[TMP52:%.*]], label [[TMP54:%.*]] -; CHECK: 52: -; CHECK-NEXT: [[TMP53:%.*]] = load i32, ptr addrspace(20) [[TMP48]], align 4 -; CHECK-NEXT: br label [[TMP56:%.*]] -; CHECK: 54: -; CHECK-NEXT: [[TMP55:%.*]] = load i32, ptr addrspace(32) [[TMP47]], align 4 -; CHECK-NEXT: br label [[TMP56]] -; CHECK: 56: -; CHECK-NEXT: [[TMP57:%.*]] = phi i32 [ [[TMP53]], [[TMP52]] ], [ [[TMP55]], [[TMP54]] ] -; CHECK-NEXT: [[VAL_FCA_2_INSERT_1:%.*]] = insertvalue <{ i32, i32 }> [[VAL_FCA_2_INSERT_0]], i32 [[TMP57]], 1 -; CHECK-NEXT: store <{ i32, i32 }> [[VAL_FCA_2_INSERT_1]], ptr [[VAL_FCA_2_ALLOCA]], align 1 -; CHECK-NEXT: [[VAL_FCA_2_ALLOCA_LOAD:%.*]] = load ptr addrspace(1), ptr [[VAL_FCA_2_ALLOCA]], align 8 -; CHECK-NEXT: [[VAL_FCA_2_INSERT:%.*]] = insertvalue [[COMPLEX_TYPE]] [[VAL_FCA_1_INSERT]], ptr addrspace(1) [[VAL_FCA_2_ALLOCA_LOAD]], 2 -; CHECK-NEXT: ret [[COMPLEX_TYPE]] [[VAL_FCA_2_INSERT]] -; - %addr = getelementptr [20 x i32], [20 x i32]* @GLOBAL, i32 0, i32 %i - %addr_type = bitcast i32* %addr to %complex_type* - %val = load %complex_type, %complex_type* %addr_type - ret %complex_type %val -} - -define %complex_type @store_struct_reg(%complex_type %val) { -; CHECK-LABEL: define %complex_type @store_struct_reg( -; CHECK-SAME: [[COMPLEX_TYPE:%.*]] [[VAL:%.*]]) { -; CHECK-NEXT: [[VAL_FCA_0_ALLOCA:%.*]] = alloca <{ i32, i32 }>, align 8 -; CHECK-NEXT: [[VAL_FCA_2_ALLOCA:%.*]] = alloca <{ i32, i32 }>, align 8 -; CHECK-NEXT: [[VAL_FCA_0_EXTRACT:%.*]] = extractvalue [[COMPLEX_TYPE]] [[VAL]], 0 -; CHECK-NEXT: store ptr [[VAL_FCA_0_EXTRACT]], ptr [[VAL_FCA_0_ALLOCA]], align 8 -; CHECK-NEXT: [[VAL_FCA_0_ALLOCA_LOAD:%.*]] = load <{ i32, i32 }>, ptr [[VAL_FCA_0_ALLOCA]], align 1 -; CHECK-NEXT: [[VAL_FCA_0_EXTRACT_0:%.*]] = extractvalue <{ i32, i32 }> [[VAL_FCA_0_ALLOCA_LOAD]], 0 -; CHECK-NEXT: store i32 [[VAL_FCA_0_EXTRACT_0]], ptr addrspace(20) addrspacecast (ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i64 40) to ptr addrspace(20)), align 4 -; CHECK-NEXT: [[VAL_FCA_0_EXTRACT_1:%.*]] = extractvalue <{ i32, i32 }> [[VAL_FCA_0_ALLOCA_LOAD]], 1 -; CHECK-NEXT: store i32 [[VAL_FCA_0_EXTRACT_1]], ptr addrspace(20) addrspacecast (ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i64 44) to ptr addrspace(20)), align 4 -; CHECK-NEXT: [[VAL_FCA_1_EXTRACT:%.*]] = extractvalue [[COMPLEX_TYPE]] [[VAL]], 1 -; CHECK-NEXT: store half [[VAL_FCA_1_EXTRACT]], ptr addrspace(20) addrspacecast (ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i64 48) to ptr addrspace(20)), align 4 -; CHECK-NEXT: [[VAL_FCA_2_EXTRACT:%.*]] = extractvalue [[COMPLEX_TYPE]] [[VAL]], 2 -; CHECK-NEXT: store ptr addrspace(1) [[VAL_FCA_2_EXTRACT]], ptr [[VAL_FCA_2_ALLOCA]], align 8 -; CHECK-NEXT: [[VAL_FCA_2_ALLOCA_LOAD:%.*]] = load <{ i32, i32 }>, ptr [[VAL_FCA_2_ALLOCA]], align 1 -; CHECK-NEXT: [[VAL_FCA_2_EXTRACT_0:%.*]] = extractvalue <{ i32, i32 }> [[VAL_FCA_2_ALLOCA_LOAD]], 0 -; CHECK-NEXT: store i32 [[VAL_FCA_2_EXTRACT_0]], ptr addrspace(20) addrspacecast (ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i64 52) to ptr addrspace(20)), align 4 -; CHECK-NEXT: [[VAL_FCA_2_EXTRACT_1:%.*]] = extractvalue <{ i32, i32 }> [[VAL_FCA_2_ALLOCA_LOAD]], 1 -; CHECK-NEXT: store i32 [[VAL_FCA_2_EXTRACT_1]], ptr addrspace(20) addrspacecast (ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i64 56) to ptr addrspace(20)), align 4 -; CHECK-NEXT: ret [[COMPLEX_TYPE]] [[VAL]] -; - %addr = getelementptr [20 x i32], [20 x i32]* @GLOBAL, i32 0, i32 10 - %addr_type = bitcast i32* %addr to %complex_type* - store %complex_type %val, %complex_type* %addr_type - ret %complex_type %val -} - -define %complex_type @store_struct_mem(%complex_type %val) { -; CHECK-LABEL: define %complex_type @store_struct_mem( -; CHECK-SAME: [[COMPLEX_TYPE:%.*]] [[VAL:%.*]]) { -; CHECK-NEXT: [[VAL_FCA_0_ALLOCA:%.*]] = alloca <{ i32, i32 }>, align 8 -; CHECK-NEXT: [[VAL_FCA_2_ALLOCA:%.*]] = alloca <{ i32, i32 }>, align 8 -; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @registerbuffer.getpointer.a15i32(ptr addrspace(20) @GLOBAL) -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr addrspace(32) [[TMP1]], i32 -15 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [20 x i32], ptr addrspace(32) [[TMP2]], i32 0, i32 15 -; CHECK-NEXT: [[VAL_FCA_0_EXTRACT:%.*]] = extractvalue [[COMPLEX_TYPE]] [[VAL]], 0 -; CHECK-NEXT: store ptr [[VAL_FCA_0_EXTRACT]], ptr [[VAL_FCA_0_ALLOCA]], align 8 -; CHECK-NEXT: [[VAL_FCA_0_ALLOCA_LOAD:%.*]] = load <{ i32, i32 }>, ptr [[VAL_FCA_0_ALLOCA]], align 1 -; CHECK-NEXT: [[VAL_FCA_0_EXTRACT_0:%.*]] = extractvalue <{ i32, i32 }> [[VAL_FCA_0_ALLOCA_LOAD]], 0 -; CHECK-NEXT: store i32 [[VAL_FCA_0_EXTRACT_0]], ptr addrspace(32) [[TMP3]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr <{ i32, i32 }>, ptr addrspace(32) [[TMP3]], i32 0, i32 1 -; CHECK-NEXT: [[VAL_FCA_0_EXTRACT_1:%.*]] = extractvalue <{ i32, i32 }> [[VAL_FCA_0_ALLOCA_LOAD]], 1 -; CHECK-NEXT: store i32 [[VAL_FCA_0_EXTRACT_1]], ptr addrspace(32) [[TMP4]], align 4 -; CHECK-NEXT: [[VAL_FCA_1_EXTRACT:%.*]] = extractvalue [[COMPLEX_TYPE]] [[VAL]], 1 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr [[COMPLEX_TYPE]], ptr addrspace(32) [[TMP3]], i32 0, i32 1 -; CHECK-NEXT: store half [[VAL_FCA_1_EXTRACT]], ptr addrspace(32) [[TMP5]], align 4 -; CHECK-NEXT: [[VAL_FCA_2_EXTRACT:%.*]] = extractvalue [[COMPLEX_TYPE]] [[VAL]], 2 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr [[COMPLEX_TYPE]], ptr addrspace(32) [[TMP3]], i32 0, i32 2 -; CHECK-NEXT: store ptr addrspace(1) [[VAL_FCA_2_EXTRACT]], ptr [[VAL_FCA_2_ALLOCA]], align 8 -; CHECK-NEXT: [[VAL_FCA_2_ALLOCA_LOAD:%.*]] = load <{ i32, i32 }>, ptr [[VAL_FCA_2_ALLOCA]], align 1 -; CHECK-NEXT: [[VAL_FCA_2_EXTRACT_0:%.*]] = extractvalue <{ i32, i32 }> [[VAL_FCA_2_ALLOCA_LOAD]], 0 -; CHECK-NEXT: store i32 [[VAL_FCA_2_EXTRACT_0]], ptr addrspace(32) [[TMP6]], align 4 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <{ i32, i32 }>, ptr addrspace(32) [[TMP6]], i32 0, i32 1 -; CHECK-NEXT: [[VAL_FCA_2_EXTRACT_1:%.*]] = extractvalue <{ i32, i32 }> [[VAL_FCA_2_ALLOCA_LOAD]], 1 -; CHECK-NEXT: store i32 [[VAL_FCA_2_EXTRACT_1]], ptr addrspace(32) [[TMP7]], align 4 -; CHECK-NEXT: ret [[COMPLEX_TYPE]] [[VAL]] -; - %addr = getelementptr [20 x i32], [20 x i32]* @GLOBAL, i32 0, i32 15 - %addr_type = bitcast i32* %addr to %complex_type* - store %complex_type %val, %complex_type* %addr_type - ret %complex_type %val -} - -define %complex_type @store_struct_both(%complex_type %val) { -; CHECK-LABEL: define %complex_type @store_struct_both( -; CHECK-SAME: [[COMPLEX_TYPE:%.*]] [[VAL:%.*]]) { -; CHECK-NEXT: [[VAL_FCA_0_ALLOCA:%.*]] = alloca <{ i32, i32 }>, align 8 -; CHECK-NEXT: [[VAL_FCA_2_ALLOCA:%.*]] = alloca <{ i32, i32 }>, align 8 -; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @registerbuffer.getpointer.a15i32(ptr addrspace(20) @GLOBAL) -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr addrspace(32) [[TMP1]], i32 -15 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [20 x i32], ptr addrspace(32) [[TMP2]], i32 0, i32 13 -; CHECK-NEXT: [[VAL_FCA_0_EXTRACT:%.*]] = extractvalue [[COMPLEX_TYPE]] [[VAL]], 0 -; CHECK-NEXT: store ptr [[VAL_FCA_0_EXTRACT]], ptr [[VAL_FCA_0_ALLOCA]], align 8 -; CHECK-NEXT: [[VAL_FCA_0_ALLOCA_LOAD:%.*]] = load <{ i32, i32 }>, ptr [[VAL_FCA_0_ALLOCA]], align 1 -; CHECK-NEXT: [[VAL_FCA_0_EXTRACT_0:%.*]] = extractvalue <{ i32, i32 }> [[VAL_FCA_0_ALLOCA_LOAD]], 0 -; CHECK-NEXT: store i32 [[VAL_FCA_0_EXTRACT_0]], ptr addrspace(20) addrspacecast (ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i64 52) to ptr addrspace(20)), align 4 -; CHECK-NEXT: [[VAL_FCA_0_EXTRACT_1:%.*]] = extractvalue <{ i32, i32 }> [[VAL_FCA_0_ALLOCA_LOAD]], 1 -; CHECK-NEXT: store i32 [[VAL_FCA_0_EXTRACT_1]], ptr addrspace(20) addrspacecast (ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i64 56) to ptr addrspace(20)), align 4 -; CHECK-NEXT: [[VAL_FCA_1_EXTRACT:%.*]] = extractvalue [[COMPLEX_TYPE]] [[VAL]], 1 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [[COMPLEX_TYPE]], ptr addrspace(32) [[TMP3]], i32 0, i32 1 -; CHECK-NEXT: store half [[VAL_FCA_1_EXTRACT]], ptr addrspace(32) [[TMP4]], align 4 -; CHECK-NEXT: [[VAL_FCA_2_EXTRACT:%.*]] = extractvalue [[COMPLEX_TYPE]] [[VAL]], 2 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr [[COMPLEX_TYPE]], ptr addrspace(32) [[TMP3]], i32 0, i32 2 -; CHECK-NEXT: store ptr addrspace(1) [[VAL_FCA_2_EXTRACT]], ptr [[VAL_FCA_2_ALLOCA]], align 8 -; CHECK-NEXT: [[VAL_FCA_2_ALLOCA_LOAD:%.*]] = load <{ i32, i32 }>, ptr [[VAL_FCA_2_ALLOCA]], align 1 -; CHECK-NEXT: [[VAL_FCA_2_EXTRACT_0:%.*]] = extractvalue <{ i32, i32 }> [[VAL_FCA_2_ALLOCA_LOAD]], 0 -; CHECK-NEXT: store i32 [[VAL_FCA_2_EXTRACT_0]], ptr addrspace(32) [[TMP5]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr <{ i32, i32 }>, ptr addrspace(32) [[TMP5]], i32 0, i32 1 -; CHECK-NEXT: [[VAL_FCA_2_EXTRACT_1:%.*]] = extractvalue <{ i32, i32 }> [[VAL_FCA_2_ALLOCA_LOAD]], 1 -; CHECK-NEXT: store i32 [[VAL_FCA_2_EXTRACT_1]], ptr addrspace(32) [[TMP6]], align 4 -; CHECK-NEXT: ret [[COMPLEX_TYPE]] [[VAL]] -; - %addr = getelementptr [20 x i32], [20 x i32]* @GLOBAL, i32 0, i32 13 - %addr_type = bitcast i32* %addr to %complex_type* - store %complex_type %val, %complex_type* %addr_type - ret %complex_type %val -} - -define %complex_type @store_struct_dyn(%complex_type %val, i32 %i) { -; CHECK-LABEL: define %complex_type @store_struct_dyn( -; CHECK-SAME: [[COMPLEX_TYPE:%.*]] [[VAL:%.*]], i32 [[I:%.*]]) { -; CHECK-NEXT: [[VAL_FCA_0_ALLOCA:%.*]] = alloca <{ i32, i32 }>, align 8 -; CHECK-NEXT: [[VAL_FCA_2_ALLOCA:%.*]] = alloca <{ i32, i32 }>, align 8 -; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(32) @registerbuffer.getpointer.a15i32(ptr addrspace(20) @GLOBAL) -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr addrspace(32) [[TMP1]], i32 -15 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [20 x i32], ptr addrspace(32) [[TMP2]], i32 0, i32 [[I]] -; CHECK-NEXT: [[ADDR:%.*]] = getelementptr [20 x i32], ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i32 0, i32 [[I]] -; CHECK-NEXT: [[VAL_FCA_0_EXTRACT:%.*]] = extractvalue [[COMPLEX_TYPE]] [[VAL]], 0 -; CHECK-NEXT: store ptr [[VAL_FCA_0_EXTRACT]], ptr [[VAL_FCA_0_ALLOCA]], align 8 -; CHECK-NEXT: [[VAL_FCA_0_ALLOCA_LOAD:%.*]] = load <{ i32, i32 }>, ptr [[VAL_FCA_0_ALLOCA]], align 1 -; CHECK-NEXT: [[VAL_FCA_0_EXTRACT_0:%.*]] = extractvalue <{ i32, i32 }> [[VAL_FCA_0_ALLOCA_LOAD]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(20) -; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr addrspace(20) [[TMP4]] to i32 -; CHECK-NEXT: [[TMP6:%.*]] = sub i32 [[TMP5]], ptrtoint (ptr addrspace(20) @GLOBAL to i32) -; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP6]], 60 -; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]] -; CHECK: 8: -; CHECK-NEXT: store i32 [[VAL_FCA_0_EXTRACT_0]], ptr addrspace(20) [[TMP4]], align 4 -; CHECK-NEXT: br label [[TMP10:%.*]] -; CHECK: 9: -; CHECK-NEXT: store i32 [[VAL_FCA_0_EXTRACT_0]], ptr addrspace(32) [[TMP3]], align 4 -; CHECK-NEXT: br label [[TMP10]] -; CHECK: 10: -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr <{ i32, i32 }>, ptr addrspace(32) [[TMP3]], i32 0, i32 1 -; CHECK-NEXT: [[VAL_FCA_0_GEP_1:%.*]] = getelementptr inbounds <{ i32, i32 }>, ptr [[ADDR]], i32 0, i32 1 -; CHECK-NEXT: [[VAL_FCA_0_EXTRACT_1:%.*]] = extractvalue <{ i32, i32 }> [[VAL_FCA_0_ALLOCA_LOAD]], 1 -; CHECK-NEXT: [[TMP12:%.*]] = addrspacecast ptr [[VAL_FCA_0_GEP_1]] to ptr addrspace(20) -; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint ptr addrspace(20) [[TMP12]] to i32 -; CHECK-NEXT: [[TMP14:%.*]] = sub i32 [[TMP13]], ptrtoint (ptr addrspace(20) @GLOBAL to i32) -; CHECK-NEXT: [[TMP15:%.*]] = icmp ult i32 [[TMP14]], 60 -; CHECK-NEXT: br i1 [[TMP15]], label [[TMP16:%.*]], label [[TMP17:%.*]] -; CHECK: 16: -; CHECK-NEXT: store i32 [[VAL_FCA_0_EXTRACT_1]], ptr addrspace(20) [[TMP12]], align 4 -; CHECK-NEXT: br label [[TMP18:%.*]] -; CHECK: 17: -; CHECK-NEXT: store i32 [[VAL_FCA_0_EXTRACT_1]], ptr addrspace(32) [[TMP11]], align 4 -; CHECK-NEXT: br label [[TMP18]] -; CHECK: 18: -; CHECK-NEXT: [[VAL_FCA_1_EXTRACT:%.*]] = extractvalue [[COMPLEX_TYPE]] [[VAL]], 1 -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr [[COMPLEX_TYPE]], ptr addrspace(32) [[TMP3]], i32 0, i32 1 -; CHECK-NEXT: [[VAL_FCA_1_GEP:%.*]] = getelementptr inbounds [[COMPLEX_TYPE]], ptr [[ADDR]], i32 0, i32 1 -; CHECK-NEXT: [[TMP20:%.*]] = addrspacecast ptr [[VAL_FCA_1_GEP]] to ptr addrspace(20) -; CHECK-NEXT: [[TMP21:%.*]] = ptrtoint ptr addrspace(20) [[TMP20]] to i32 -; CHECK-NEXT: [[TMP22:%.*]] = sub i32 [[TMP21]], ptrtoint (ptr addrspace(20) @GLOBAL to i32) -; CHECK-NEXT: [[TMP23:%.*]] = icmp ult i32 [[TMP22]], 60 -; CHECK-NEXT: br i1 [[TMP23]], label [[TMP24:%.*]], label [[TMP25:%.*]] -; CHECK: 24: -; CHECK-NEXT: store half [[VAL_FCA_1_EXTRACT]], ptr addrspace(20) [[TMP20]], align 4 -; CHECK-NEXT: br label [[TMP26:%.*]] -; CHECK: 25: -; CHECK-NEXT: store half [[VAL_FCA_1_EXTRACT]], ptr addrspace(32) [[TMP19]], align 4 -; CHECK-NEXT: br label [[TMP26]] -; CHECK: 26: -; CHECK-NEXT: [[VAL_FCA_2_EXTRACT:%.*]] = extractvalue [[COMPLEX_TYPE]] [[VAL]], 2 -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr [[COMPLEX_TYPE]], ptr addrspace(32) [[TMP3]], i32 0, i32 2 -; CHECK-NEXT: [[VAL_FCA_2_GEP:%.*]] = getelementptr inbounds [[COMPLEX_TYPE]], ptr [[ADDR]], i32 0, i32 2 -; CHECK-NEXT: store ptr addrspace(1) [[VAL_FCA_2_EXTRACT]], ptr [[VAL_FCA_2_ALLOCA]], align 8 -; CHECK-NEXT: [[VAL_FCA_2_ALLOCA_LOAD:%.*]] = load <{ i32, i32 }>, ptr [[VAL_FCA_2_ALLOCA]], align 1 -; CHECK-NEXT: [[VAL_FCA_2_EXTRACT_0:%.*]] = extractvalue <{ i32, i32 }> [[VAL_FCA_2_ALLOCA_LOAD]], 0 -; CHECK-NEXT: [[TMP28:%.*]] = addrspacecast ptr [[VAL_FCA_2_GEP]] to ptr addrspace(20) -; CHECK-NEXT: [[TMP29:%.*]] = ptrtoint ptr addrspace(20) [[TMP28]] to i32 -; CHECK-NEXT: [[TMP30:%.*]] = sub i32 [[TMP29]], ptrtoint (ptr addrspace(20) @GLOBAL to i32) -; CHECK-NEXT: [[TMP31:%.*]] = icmp ult i32 [[TMP30]], 60 -; CHECK-NEXT: br i1 [[TMP31]], label [[TMP32:%.*]], label [[TMP33:%.*]] -; CHECK: 32: -; CHECK-NEXT: store i32 [[VAL_FCA_2_EXTRACT_0]], ptr addrspace(20) [[TMP28]], align 4 -; CHECK-NEXT: br label [[TMP34:%.*]] -; CHECK: 33: -; CHECK-NEXT: store i32 [[VAL_FCA_2_EXTRACT_0]], ptr addrspace(32) [[TMP27]], align 4 -; CHECK-NEXT: br label [[TMP34]] -; CHECK: 34: -; CHECK-NEXT: [[TMP35:%.*]] = getelementptr <{ i32, i32 }>, ptr addrspace(32) [[TMP27]], i32 0, i32 1 -; CHECK-NEXT: [[VAL_FCA_2_GEP_1:%.*]] = getelementptr inbounds <{ i32, i32 }>, ptr [[VAL_FCA_2_GEP]], i32 0, i32 1 -; CHECK-NEXT: [[VAL_FCA_2_EXTRACT_1:%.*]] = extractvalue <{ i32, i32 }> [[VAL_FCA_2_ALLOCA_LOAD]], 1 -; CHECK-NEXT: [[TMP36:%.*]] = addrspacecast ptr [[VAL_FCA_2_GEP_1]] to ptr addrspace(20) -; CHECK-NEXT: [[TMP37:%.*]] = ptrtoint ptr addrspace(20) [[TMP36]] to i32 -; CHECK-NEXT: [[TMP38:%.*]] = sub i32 [[TMP37]], ptrtoint (ptr addrspace(20) @GLOBAL to i32) -; CHECK-NEXT: [[TMP39:%.*]] = icmp ult i32 [[TMP38]], 60 -; CHECK-NEXT: br i1 [[TMP39]], label [[TMP40:%.*]], label [[TMP41:%.*]] -; CHECK: 40: -; CHECK-NEXT: store i32 [[VAL_FCA_2_EXTRACT_1]], ptr addrspace(20) [[TMP36]], align 4 -; CHECK-NEXT: br label [[TMP42:%.*]] -; CHECK: 41: -; CHECK-NEXT: store i32 [[VAL_FCA_2_EXTRACT_1]], ptr addrspace(32) [[TMP35]], align 4 -; CHECK-NEXT: br label [[TMP42]] -; CHECK: 42: -; CHECK-NEXT: ret [[COMPLEX_TYPE]] [[VAL]] -; - %addr = getelementptr [20 x i32], [20 x i32]* @GLOBAL, i32 0, i32 %i - %addr_type = bitcast i32* %addr to %complex_type* - store %complex_type %val, %complex_type* %addr_type - ret %complex_type %val -} - -define <3 x float> @load_v3float_reg() { -; CHECK-LABEL: define <3 x float> @load_v3float_reg() { -; CHECK-NEXT: [[VAL_FCA_ALLOCA:%.*]] = alloca <{ i32, i32, i32 }>, align 8 -; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i64 40) to ptr addrspace(20)), align 4 -; CHECK-NEXT: [[VAL_FCA_INSERT_0:%.*]] = insertvalue <{ i32, i32, i32 }> poison, i32 [[TMP1]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i64 44) to ptr addrspace(20)), align 4 -; CHECK-NEXT: [[VAL_FCA_INSERT_1:%.*]] = insertvalue <{ i32, i32, i32 }> [[VAL_FCA_INSERT_0]], i32 [[TMP2]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i64 48) to ptr addrspace(20)), align 4 -; CHECK-NEXT: [[VAL_FCA_INSERT_2:%.*]] = insertvalue <{ i32, i32, i32 }> [[VAL_FCA_INSERT_1]], i32 [[TMP3]], 2 -; CHECK-NEXT: store <{ i32, i32, i32 }> [[VAL_FCA_INSERT_2]], ptr [[VAL_FCA_ALLOCA]], align 1 -; CHECK-NEXT: [[VAL_FCA_ALLOCA_LOAD:%.*]] = load <3 x float>, ptr [[VAL_FCA_ALLOCA]], align 8 -; CHECK-NEXT: ret <3 x float> [[VAL_FCA_ALLOCA_LOAD]] -; - %addr = getelementptr [20 x i32], [20 x i32]* @GLOBAL, i32 0, i32 10 - %addr_type = bitcast i32* %addr to <3 x float>* - %val = load <3 x float>, <3 x float>* %addr_type - ret <3 x float> %val -} - -define <3 x half> @load_v3half_reg() { -; CHECK-LABEL: define <3 x half> @load_v3half_reg() { -; CHECK-NEXT: [[VAL_FCA_ALLOCA:%.*]] = alloca <{ i32, i16 }>, align 8 -; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(20) addrspacecast (ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i64 40) to ptr addrspace(20)), align 4 -; CHECK-NEXT: [[VAL_FCA_INSERT_0:%.*]] = insertvalue <{ i32, i16 }> poison, i32 [[TMP1]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr addrspace(20) addrspacecast (ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i64 44) to ptr addrspace(20)), align 4 -; CHECK-NEXT: [[VAL_FCA_INSERT_1:%.*]] = insertvalue <{ i32, i16 }> [[VAL_FCA_INSERT_0]], i16 [[TMP2]], 1 -; CHECK-NEXT: store <{ i32, i16 }> [[VAL_FCA_INSERT_1]], ptr [[VAL_FCA_ALLOCA]], align 1 -; CHECK-NEXT: [[VAL_FCA_ALLOCA_LOAD:%.*]] = load <3 x half>, ptr [[VAL_FCA_ALLOCA]], align 8 -; CHECK-NEXT: ret <3 x half> [[VAL_FCA_ALLOCA_LOAD]] -; - %addr = getelementptr [20 x i32], [20 x i32]* @GLOBAL, i32 0, i32 10 - %addr_type = bitcast i32* %addr to <3 x half>* - %val = load <3 x half>, <3 x half>* %addr_type - ret <3 x half> %val -} - -define <3 x i8> @load_v3i8_reg() { -; CHECK-LABEL: define <3 x i8> @load_v3i8_reg() { -; CHECK-NEXT: [[TMP1:%.*]] = load <3 x i8>, ptr addrspace(20) addrspacecast (ptr getelementptr (i8, ptr addrspacecast (ptr addrspace(20) @GLOBAL to ptr), i64 40) to ptr addrspace(20)), align 4 -; CHECK-NEXT: ret <3 x i8> [[TMP1]] -; - %addr = getelementptr [20 x i32], [20 x i32]* @GLOBAL, i32 0, i32 10 - %addr_type = bitcast i32* %addr to <3 x i8>* - %val = load <3 x i8>, <3 x i8>* %addr_type - ret <3 x i8> %val -} - -define void @with_lifetime_intrinsics() { -; CHECK-LABEL: define void @with_lifetime_intrinsics() { -; CHECK-NEXT: ret void -; - %ptr = bitcast [20 x i32]* @GLOBAL to i8* - call void @llvm.lifetime.start.p0i8(i64 80, i8* %ptr) - call void @llvm.lifetime.end.p0i8(i64 80, i8* %ptr) - ret void -} -;. -; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } -; CHECK: attributes #[[ATTR1:[0-9]+]] = { nofree norecurse nosync nounwind willreturn memory(read) } -;. diff --git a/llvmraytracing/test/dx/remat-intrinsic.ll b/llvmraytracing/test/dx/remat-intrinsic.ll index 51d909f62c..8089dcc374 100644 --- a/llvmraytracing/test/dx/remat-intrinsic.ll +++ b/llvmraytracing/test/dx/remat-intrinsic.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 3 -; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata' \ +; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,dxil-cont-post-process,lint,continuations-lint,remove-types-metadata' \ ; RUN: -S %s --lint-abort-on-error | FileCheck -check-prefix=POSTPROCESS %s target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:16-i32:32-i64:32-f16:16-f32:32-f64:32-v8:8-v16:16-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" @@ -16,27 +16,28 @@ target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16: @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A" = external constant %dx.types.Handle, align 4 -declare i32 @_cont_GetContinuationStackAddr() +; Need _cont_ReportHit to get system data type +declare !pointeetys !25 i1 @_cont_ReportHit(%struct.TraversalData* %data, float %t, i32 %hitKind) -declare %struct.DispatchSystemData @_cont_SetupRayGen() +declare i32 @_cont_GetContinuationStackAddr() declare %struct.DispatchSystemData @_AmdAwaitTraversal(i64, %struct.TraversalData) declare %struct.DispatchSystemData @_AmdAwaitShader(i64, %struct.DispatchSystemData) -declare !types !14 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData*) +declare !pointeetys !14 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData*) ; Function Attrs: nounwind memory(none) -declare !types !16 <3 x i32> @_cont_DispatchRaysIndex3(%struct.DispatchSystemData* nocapture readnone) #0 +declare !pointeetys !16 <3 x i32> @_cont_DispatchRaysIndex3(%struct.DispatchSystemData* nocapture readnone) #0 ; Function Attrs: nounwind memory(none) -declare !types !18 void @_AmdRestoreSystemData(%struct.DispatchSystemData*) #0 +declare !pointeetys !18 void @_AmdRestoreSystemData(%struct.DispatchSystemData*) #0 -define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) !types !19 { +define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) !pointeetys !19 { ret i32 5 } -define void @_cont_CallShader(%struct.DispatchSystemData* %data, i32 %0) !types !20 { +define void @_cont_CallShader(%struct.DispatchSystemData* %data, i32 %0) !pointeetys !20 { %dis_data = load %struct.DispatchSystemData, %struct.DispatchSystemData* %data, align 4 %newdata = call %struct.DispatchSystemData @_AmdAwaitShader(i64 2, %struct.DispatchSystemData %dis_data) store %struct.DispatchSystemData %newdata, %struct.DispatchSystemData* %data, align 4 @@ -44,7 +45,7 @@ define void @_cont_CallShader(%struct.DispatchSystemData* %data, i32 %0) !types ret void } -define void @called(%struct.MyParams* %params) !types !21 { +define void @called(%struct.MyParams* %params) !pointeetys !21 { %i = call i32 @dx.op.dispatchRaysIndex.i32(i32 145, i8 0) %unpacked = call %dx.types.fouri32 @dx.op.unpack4x8.i32(i32 219, i8 1, i32 %i) %params_i = getelementptr %struct.MyParams, %struct.MyParams* %params, i32 0, i32 0 @@ -63,7 +64,7 @@ define void @called(%struct.MyParams* %params) !types !21 { } ; Function Attrs: nounwind -declare !types !23 void @dx.op.callShader.struct.MyParams(i32, i32, %struct.MyParams*) #1 +declare !pointeetys !23 void @dx.op.callShader.struct.MyParams(i32, i32, %struct.MyParams*) #1 ; Function Attrs: nounwind memory(none) declare i32 @dx.op.dispatchRaysIndex.i32(i32, i8) #0 @@ -107,23 +108,25 @@ attributes #1 = { nounwind } !11 = !{i32 0, i32 9} !12 = !{i32 0, i64 65536} !13 = !{i32 30} -!14 = !{!"function", %struct.BuiltInTriangleIntersectionAttributes poison, !15} +!14 = !{%struct.SystemData poison} !15 = !{i32 0, %struct.SystemData poison} -!16 = !{!"function", <3 x i32> poison, !17} +!16 = !{%struct.DispatchSystemData poison} !17 = !{i32 0, %struct.DispatchSystemData poison} -!18 = !{!"function", !"void", !17} -!19 = !{!"function", i32 poison, !17} -!20 = !{!"function", !"void", !17, i32 poison} -!21 = !{!"function", !"void", !22} +!18 = !{%struct.DispatchSystemData poison} +!19 = !{%struct.DispatchSystemData poison} +!20 = !{%struct.DispatchSystemData poison} +!21 = !{%struct.MyParams poison} !22 = !{i32 0, %struct.MyParams poison} -!23 = !{!"function", !"void", i32 poison, i32 poison, !22} +!23 = !{%struct.MyParams poison} +!24 = !{i32 0, %struct.TraversalData poison} +!25 = !{%struct.TraversalData poison} ; POSTPROCESS-LABEL: define i32 @_cont_GetLocalRootIndex( ; POSTPROCESS-SAME: ptr [[DATA:%.*]]) #[[ATTR1:[0-9]+]] { ; POSTPROCESS-NEXT: ret i32 5 ; ; ; POSTPROCESS-LABEL: define void @called( -; POSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !continuation [[META16:![0-9]+]] !lgc.rt.shaderstage [[META17:![0-9]+]] !continuation.registercount [[META14:![0-9]+]] !continuation.stacksize [[META18:![0-9]+]] !continuation.state [[META18]] { +; POSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]], [8 x i32] [[PADDING:%.*]], [1 x i32] [[PAYLOAD:%.*]]) !continuation [[META16:![0-9]+]] !lgc.rt.shaderstage [[META17:![0-9]+]] !continuation.stacksize [[META18:![0-9]+]] { ; POSTPROCESS-NEXT: AllocaSpillBB: ; POSTPROCESS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 ; POSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 @@ -135,8 +138,8 @@ attributes #1 = { nounwind } ; POSTPROCESS-NEXT: [[TMP3:%.*]] = inttoptr i32 [[TMP1]] to ptr addrspace(21) ; POSTPROCESS-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP3]], i32 0 ; POSTPROCESS-NEXT: store i64 [[RETURNADDR]], ptr addrspace(21) [[TMP4]], align 4 -; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 -; POSTPROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [1 x i32] [[PAYLOAD]], 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT9:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 ; POSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; POSTPROCESS-NEXT: [[TMP6:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[SYSTEM_DATA_ALLOCA]]) ; POSTPROCESS-NEXT: [[I:%.*]] = extractelement <3 x i32> [[TMP6]], i8 0 @@ -144,42 +147,45 @@ attributes #1 = { nounwind } ; POSTPROCESS-NEXT: [[HANDLE0:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 ; POSTPROCESS-NEXT: [[HANDLE1:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[HANDLE0]]) ; POSTPROCESS-NEXT: [[HANDLE2:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[HANDLE1]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) -; POSTPROCESS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT]], 0 -; POSTPROCESS-NEXT: store i32 [[TMP5]], ptr addrspace(20) @REGISTERS, align 4 +; POSTPROCESS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT9]], 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT4:%.*]] = insertvalue [1 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 ; POSTPROCESS-NEXT: [[TMP8:%.*]] = call i64 @continuation.getAddrAndMD(ptr @called.resume.0) ; POSTPROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 2, i32 [[TMP7]], i64 [[TMP8]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]]), !continuation.registercount [[META14]], !continuation.returnedRegistercount [[META14]] +; POSTPROCESS-NEXT: call void (...) @lgc.ilcps.continue(i64 2, i32 [[TMP7]], i64 [[TMP8]], [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]], [9 x i32] poison, [1 x i32] [[DOTFCA_0_INSERT4]]) ; POSTPROCESS-NEXT: unreachable ; ; ; POSTPROCESS-LABEL: define dso_local void @called.resume.0( -; POSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[TMP0:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP1:%.*]]) !continuation [[META16]] !lgc.rt.shaderstage [[META17]] !continuation.registercount [[META14]] { +; POSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[TMP0:%.*]], { [[STRUCT_DISPATCHSYSTEMDATA:%.*]], [8 x i32], [1 x i32] } [[TMP1:%.*]]) !continuation [[META16]] !lgc.rt.shaderstage [[META17]] { ; POSTPROCESS-NEXT: entryresume.0: -; POSTPROCESS-NEXT: [[SYSTEM_DATA_ALLOCA1:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 +; POSTPROCESS-NEXT: [[TMP16:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 ; POSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; POSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 -; POSTPROCESS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP1]], ptr [[SYSTEM_DATA_ALLOCA1]], align 4 +; POSTPROCESS-NEXT: [[TMP3:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [1 x i32] } [[TMP1]], 0 +; POSTPROCESS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP3]], ptr [[TMP16]], align 4 ; POSTPROCESS-NEXT: [[TMP13:%.*]] = load i32, ptr [[CSP]], align 4 ; POSTPROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP13]], -8 -; POSTPROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT3:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP1]], 0 +; POSTPROCESS-NEXT: [[TMP4:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [1 x i32] } [[TMP1]], 2 +; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [1 x i32] [[TMP4]], 0 +; POSTPROCESS-NEXT: [[TMP15:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [1 x i32] } [[TMP1]], 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT3:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP15]], 0 ; POSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; POSTPROCESS-NEXT: [[TMP4:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(21) -; POSTPROCESS-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP4]], i32 0 -; POSTPROCESS-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(21) [[TMP5]], align 4 +; POSTPROCESS-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(21) +; POSTPROCESS-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP5]], i32 0 +; POSTPROCESS-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(21) [[TMP6]], align 4 ; POSTPROCESS-NEXT: [[HANDLE011:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 ; POSTPROCESS-NEXT: [[HANDLE110:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[HANDLE011]]) ; POSTPROCESS-NEXT: [[HANDLE29:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[HANDLE110]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) -; POSTPROCESS-NEXT: [[TMP6:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[SYSTEM_DATA_ALLOCA1]]) -; POSTPROCESS-NEXT: [[I8:%.*]] = extractelement <3 x i32> [[TMP6]], i8 0 +; POSTPROCESS-NEXT: [[TMP17:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[TMP16]]) +; POSTPROCESS-NEXT: [[I8:%.*]] = extractelement <3 x i32> [[TMP17]], i8 0 ; POSTPROCESS-NEXT: [[UNPACKED7:%.*]] = call [[DX_TYPES_FOURI32:%.*]] [[DX_OP_UNPACK4X8_I32:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 219, i8 1, i32 [[I8]]) -; POSTPROCESS-NEXT: [[TMP7:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[SYSTEM_DATA_ALLOCA1]]) +; POSTPROCESS-NEXT: [[TMP7:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[TMP16]]) ; POSTPROCESS-NEXT: [[I6:%.*]] = extractelement <3 x i32> [[TMP7]], i8 0 ; POSTPROCESS-NEXT: [[UNPACKED5:%.*]] = call [[DX_TYPES_FOURI32]] [[DX_OP_UNPACK4X8_I32]](i32 219, i8 1, i32 [[I6]]) -; POSTPROCESS-NEXT: [[TMP8:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[SYSTEM_DATA_ALLOCA1]]) +; POSTPROCESS-NEXT: [[TMP8:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[TMP16]]) ; POSTPROCESS-NEXT: [[I4:%.*]] = extractelement <3 x i32> [[TMP8]], i8 0 ; POSTPROCESS-NEXT: [[UNPACKED3:%.*]] = call [[DX_TYPES_FOURI32]] [[DX_OP_UNPACK4X8_I32]](i32 219, i8 1, i32 [[I4]]) -; POSTPROCESS-NEXT: [[TMP9:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[SYSTEM_DATA_ALLOCA1]]) +; POSTPROCESS-NEXT: [[TMP9:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[TMP16]]) ; POSTPROCESS-NEXT: [[I2:%.*]] = extractelement <3 x i32> [[TMP9]], i8 0 ; POSTPROCESS-NEXT: [[UNPACKED1:%.*]] = call [[DX_TYPES_FOURI32]] [[DX_OP_UNPACK4X8_I32]](i32 219, i8 1, i32 [[I2]]) ; POSTPROCESS-NEXT: [[A:%.*]] = extractvalue [[DX_TYPES_FOURI32]] [[UNPACKED7]], 0 @@ -188,12 +194,12 @@ attributes #1 = { nounwind } ; POSTPROCESS-NEXT: [[D:%.*]] = extractvalue [[DX_TYPES_FOURI32]] [[UNPACKED1]], 3 ; POSTPROCESS-NEXT: [[PACKED:%.*]] = call i32 @dx.op.pack4x8.i32(i32 220, i8 0, i32 [[A]], i32 [[B]], i32 [[C]], i32 [[D]]) ; POSTPROCESS-NEXT: call void @dx.op.textureStore.f32(i32 67, [[DX_TYPES_HANDLE]] [[HANDLE29]], i32 0, i32 0, i32 undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 1.000000e+00, i8 15) -; POSTPROCESS-NEXT: store i32 [[PACKED]], ptr addrspace(20) @REGISTERS, align 4 ; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT3]], 0 +; POSTPROCESS-NEXT: [[DOTFCA_0_INSERT1:%.*]] = insertvalue [1 x i32] poison, i32 [[PACKED]], 0 ; POSTPROCESS-NEXT: [[TMP10:%.*]] = load i32, ptr [[CSP]], align 4 ; POSTPROCESS-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], -8 ; POSTPROCESS-NEXT: store i32 [[TMP11]], ptr [[CSP]], align 4 ; POSTPROCESS-NEXT: [[TMP12:%.*]] = load i32, ptr [[CSP]], align 4 -; POSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP12]], i64 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META14]] +; POSTPROCESS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP12]], i64 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]], [8 x i32] poison, [1 x i32] [[DOTFCA_0_INSERT1]]) ; POSTPROCESS-NEXT: unreachable ; diff --git a/llvmraytracing/test/dx/remove-types-metadata.ll b/llvmraytracing/test/dx/remove-types-metadata.ll index 7694d52e5e..b0d69036da 100644 --- a/llvmraytracing/test/dx/remove-types-metadata.ll +++ b/llvmraytracing/test/dx/remove-types-metadata.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals --version 3 -; RUN: opt --verify-each -passes='remove-types-metadata' -S %s --lint-abort-on-error | FileCheck -check-prefix=METADATA %s +; RUN: opt --verify-each -passes='continuations-lint,remove-types-metadata' -S %s --lint-abort-on-error | FileCheck -check-prefix=METADATA %s target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:16-i32:32-i64:32-f16:16-f32:32-f64:32-v8:8-v16:16-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" @@ -20,50 +20,48 @@ target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16: declare i32 @_cont_GetContinuationStackAddr() #0 -declare %struct.DispatchSystemData @_cont_SetupRayGen() #0 - declare %struct.DispatchSystemData @_AmdAwaitTraversal(i64, %struct.TraversalData) #0 declare %struct.DispatchSystemData @_AmdAwaitShader(i64, %struct.DispatchSystemData) #0 declare %struct.AnyHitTraversalData @_AmdAwaitAnyHit(i64, %struct.AnyHitTraversalData, float, i32) #0 -declare !types !31 %struct.HitData @_cont_GetCandidateState(%struct.AnyHitTraversalData*) #0 +declare !pointeetys !31 %struct.HitData @_cont_GetCandidateState(%struct.AnyHitTraversalData*) #0 -declare !types !33 %struct.HitData @_cont_GetCommittedState(%struct.SystemData*) #0 +declare !pointeetys !33 %struct.HitData @_cont_GetCommittedState(%struct.SystemData*) #0 -declare !types !35 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData*) #0 +declare !pointeetys !35 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData*) #0 -declare !types !36 void @_cont_SetTriangleHitAttributes(%struct.SystemData*, %struct.BuiltInTriangleIntersectionAttributes) #0 +declare !pointeetys !36 void @_cont_SetTriangleHitAttributes(%struct.SystemData*, %struct.BuiltInTriangleIntersectionAttributes) #0 -declare !types !37 i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData*) +declare !pointeetys !37 i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData*) -declare !types !39 i1 @_cont_IsEndSearch(%struct.TraversalData*) #0 +declare !pointeetys !39 i1 @_cont_IsEndSearch(%struct.TraversalData*) #0 -declare !types !41 i32 @_cont_HitKind(%struct.SystemData*) #0 +declare !pointeetys !41 i32 @_cont_HitKind(%struct.SystemData*) #0 ; Function Attrs: nounwind declare i64 @_AmdGetResumePointAddr() #1 ; Function Attrs: nounwind -declare !types !42 void @_AmdRestoreSystemData(%struct.DispatchSystemData*) #1 +declare !pointeetys !42 void @_AmdRestoreSystemData(%struct.DispatchSystemData*) #1 ; Function Attrs: nounwind -declare !types !43 void @_AmdRestoreSystemDataAnyHit(%struct.AnyHitTraversalData*) #1 +declare !pointeetys !43 void @_AmdRestoreSystemDataAnyHit(%struct.AnyHitTraversalData*) #1 ; Function Attrs: nounwind -declare !types !42 void @_cont_AcceptHitAndEndSearch(%struct.DispatchSystemData* nocapture readnone) #1 +declare !pointeetys !42 void @_cont_AcceptHitAndEndSearch(%struct.DispatchSystemData* nocapture readnone) #1 ; Function Attrs: nounwind -declare !types !43 void @_cont_AcceptHit(%struct.AnyHitTraversalData* nocapture readnone) #1 +declare !pointeetys !43 void @_cont_AcceptHit(%struct.AnyHitTraversalData* nocapture readnone) #1 ; Function Attrs: nounwind -declare !types !42 void @_cont_IgnoreHit(%struct.DispatchSystemData* nocapture readnone) #1 +declare !pointeetys !42 void @_cont_IgnoreHit(%struct.DispatchSystemData* nocapture readnone) #1 ; Function Attrs: nounwind -declare !types !43 void @_AmdAcceptHitAttributes(%struct.AnyHitTraversalData* nocapture readnone) #1 +declare !pointeetys !43 void @_AmdAcceptHitAttributes(%struct.AnyHitTraversalData* nocapture readnone) #1 -define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, float %6, float %7, float %8, float %9, float %10, float %11, float %12, float %13) #0 !types !44 { +define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, float %6, float %7, float %8, float %9, float %10, float %11, float %12, float %13) #0 !pointeetys !44 { ; METADATA-LABEL: define void @_cont_TraceRay( ; METADATA-SAME: ptr [[DATA:%.*]], i64 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]], i32 [[TMP4:%.*]], i32 [[TMP5:%.*]], float [[TMP6:%.*]], float [[TMP7:%.*]], float [[TMP8:%.*]], float [[TMP9:%.*]], float [[TMP10:%.*]], float [[TMP11:%.*]], float [[TMP12:%.*]], float [[TMP13:%.*]]) #[[ATTR0:[0-9]+]] { ; METADATA-NEXT: [[DIS_DATA:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[DATA]], align 4 @@ -87,7 +85,7 @@ define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i ret void } -define void @_cont_CallShader(%struct.DispatchSystemData* %data, i32 %0) #0 !types !45 { +define void @_cont_CallShader(%struct.DispatchSystemData* %data, i32 %0) #0 !pointeetys !45 { ; METADATA-LABEL: define void @_cont_CallShader( ; METADATA-SAME: ptr [[DATA:%.*]], i32 [[TMP0:%.*]]) #[[ATTR0]] { ; METADATA-NEXT: [[DIS_DATA:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[DATA]], align 4 @@ -103,7 +101,7 @@ define void @_cont_CallShader(%struct.DispatchSystemData* %data, i32 %0) #0 !typ ret void } -define i1 @_cont_ReportHit(%struct.AnyHitTraversalData* %data, float %t, i32 %hitKind) #0 !types !46 { +define i1 @_cont_ReportHit(%struct.AnyHitTraversalData* %data, float %t, i32 %hitKind) #0 !pointeetys !46 { ; METADATA-LABEL: define i1 @_cont_ReportHit( ; METADATA-SAME: ptr [[DATA:%.*]], float [[T:%.*]], i32 [[HITKIND:%.*]]) #[[ATTR0]] { ; METADATA-NEXT: [[ORIGTPTR:%.*]] = getelementptr inbounds [[STRUCT_ANYHITTRAVERSALDATA:%.*]], ptr [[DATA]], i32 0, i32 0, i32 4 @@ -137,7 +135,7 @@ isEnd: ; preds = %0 ret i1 false } -define i32 @_cont_DispatchRaysIndex(%struct.DispatchSystemData* %data, i32 %i) !types !47 { +define i32 @_cont_DispatchRaysIndex(%struct.DispatchSystemData* %data, i32 %i) !pointeetys !47 { ; METADATA-LABEL: define i32 @_cont_DispatchRaysIndex( ; METADATA-SAME: ptr [[DATA:%.*]], i32 [[I:%.*]]) { ; METADATA-NEXT: [[RESPTR:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[DATA]], i32 0, i32 0, i32 [[I]] @@ -149,7 +147,7 @@ define i32 @_cont_DispatchRaysIndex(%struct.DispatchSystemData* %data, i32 %i) ! ret i32 %res } -define float @_cont_ObjectRayOrigin(%struct.DispatchSystemData* nocapture readnone %data, %struct.HitData* %hitData, i32 %i) !types !48 { +define float @_cont_ObjectRayOrigin(%struct.DispatchSystemData* nocapture readnone %data, %struct.HitData* %hitData, i32 %i) !pointeetys !48 { ; METADATA-LABEL: define float @_cont_ObjectRayOrigin( ; METADATA-SAME: ptr nocapture readnone [[DATA:%.*]], ptr [[HITDATA:%.*]], i32 [[I:%.*]]) { ; METADATA-NEXT: [[RESPTR:%.*]] = getelementptr [[STRUCT_HITDATA:%.*]], ptr [[HITDATA]], i32 0, i32 0, i32 [[I]] @@ -161,7 +159,7 @@ define float @_cont_ObjectRayOrigin(%struct.DispatchSystemData* nocapture readno ret float %res } -define float @_cont_ObjectRayDirection(%struct.DispatchSystemData* nocapture readnone %data, %struct.HitData* %hitData, i32 %i) !types !48 { +define float @_cont_ObjectRayDirection(%struct.DispatchSystemData* nocapture readnone %data, %struct.HitData* %hitData, i32 %i) !pointeetys !48 { ; METADATA-LABEL: define float @_cont_ObjectRayDirection( ; METADATA-SAME: ptr nocapture readnone [[DATA:%.*]], ptr [[HITDATA:%.*]], i32 [[I:%.*]]) { ; METADATA-NEXT: [[RESPTR:%.*]] = getelementptr [[STRUCT_HITDATA:%.*]], ptr [[HITDATA]], i32 0, i32 1, i32 [[I]] @@ -173,7 +171,7 @@ define float @_cont_ObjectRayDirection(%struct.DispatchSystemData* nocapture rea ret float %res } -define float @_cont_RayTCurrent(%struct.DispatchSystemData* nocapture readnone %data, %struct.HitData* %hitData) !types !50 { +define float @_cont_RayTCurrent(%struct.DispatchSystemData* nocapture readnone %data, %struct.HitData* %hitData) !pointeetys !50 { ; METADATA-LABEL: define float @_cont_RayTCurrent( ; METADATA-SAME: ptr nocapture readnone [[DATA:%.*]], ptr [[HITDATA:%.*]]) { ; METADATA-NEXT: [[RESPTR:%.*]] = getelementptr [[STRUCT_HITDATA:%.*]], ptr [[HITDATA]], i32 0, i32 2 @@ -238,7 +236,7 @@ define void @MyRayGen() #2 { } ; Function Attrs: nounwind -define void @MyClosestHitShader(%struct.RayPayload* noalias nocapture %payload, %struct.BuiltInTriangleIntersectionAttributes* nocapture readonly %attr) #2 !types !54 { +define void @MyClosestHitShader(%struct.RayPayload* noalias nocapture %payload, %struct.BuiltInTriangleIntersectionAttributes* nocapture readonly %attr) #2 !pointeetys !54 { ; METADATA: Function Attrs: nounwind ; METADATA-LABEL: define void @MyClosestHitShader( ; METADATA-SAME: ptr noalias nocapture [[PAYLOAD:%.*]], ptr nocapture readonly [[ATTR:%.*]]) #[[ATTR2]] { @@ -272,7 +270,7 @@ define void @MyClosestHitShader(%struct.RayPayload* noalias nocapture %payload, } ; Function Attrs: nounwind -define void @MyAnyHitShader(%struct.RayPayload* noalias nocapture %payload, %struct.BuiltInTriangleIntersectionAttributes* nocapture readnone %attr) #2 !types !54 { +define void @MyAnyHitShader(%struct.RayPayload* noalias nocapture %payload, %struct.BuiltInTriangleIntersectionAttributes* nocapture readnone %attr) #2 !pointeetys !54 { ; METADATA: Function Attrs: nounwind ; METADATA-LABEL: define void @MyAnyHitShader( ; METADATA-SAME: ptr noalias nocapture [[PAYLOAD:%.*]], ptr nocapture readnone [[ATTR:%.*]]) #[[ATTR2]] { @@ -376,7 +374,7 @@ define void @MyIntersectionShader() #2 { } ; Function Attrs: nounwind -define void @MyMissShader(%struct.RayPayload* noalias nocapture %payload) #2 !types !57 { +define void @MyMissShader(%struct.RayPayload* noalias nocapture %payload) #2 !pointeetys !57 { ; METADATA: Function Attrs: nounwind ; METADATA-LABEL: define void @MyMissShader( ; METADATA-SAME: ptr noalias nocapture [[PAYLOAD:%.*]]) #[[ATTR2]] { @@ -390,7 +388,7 @@ define void @MyMissShader(%struct.RayPayload* noalias nocapture %payload) #2 !ty } ; Function Attrs: nounwind -declare !types !58 void @dx.op.traceRay.struct.RayPayload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.RayPayload*) #1 +declare !pointeetys !58 void @dx.op.traceRay.struct.RayPayload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.RayPayload*) #1 ; Function Attrs: nounwind declare void @dx.op.textureStore.f32(i32, %dx.types.Handle, i32, i32, i32, float, float, float, float, i8) #1 @@ -412,7 +410,7 @@ declare void @dx.op.acceptHitAndEndSearch(i32) #0 declare void @dx.op.ignoreHit(i32) #0 ; Function Attrs: nounwind -declare !types !59 i1 @dx.op.reportHit.struct.BuiltInTriangleIntersectionAttributes(i32, float, i32, %struct.BuiltInTriangleIntersectionAttributes*) #1 +declare !pointeetys !59 i1 @dx.op.reportHit.struct.BuiltInTriangleIntersectionAttributes(i32, float, i32, %struct.BuiltInTriangleIntersectionAttributes*) #1 ; Function Attrs: nounwind memory(none) declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #3 @@ -421,10 +419,10 @@ declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types. declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) #4 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) -declare !types !60 void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #5 +declare !pointeetys !60 void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #5 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) -declare !types !60 void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #5 +declare !pointeetys !60 void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #5 attributes #0 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="0" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind } @@ -472,37 +470,36 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re !28 = !{i32 8, i32 11, i32 6, i32 16, i32 5, !22} !29 = !{void ()* @MyRayGen, !"MyRayGen", null, null, !30} !30 = !{i32 8, i32 7, i32 5, !22} -!31 = !{!"function", %struct.HitData poison, !32} +!31 = !{%struct.AnyHitTraversalData poison} !32 = !{i32 0, %struct.AnyHitTraversalData poison} -!33 = !{!"function", %struct.HitData poison, !34} +!33 = !{%struct.SystemData poison} !34 = !{i32 0, %struct.SystemData poison} -!35 = !{!"function", %struct.BuiltInTriangleIntersectionAttributes poison, !34} -!36 = !{!"function", !"void", !34, %struct.BuiltInTriangleIntersectionAttributes poison} -!37 = !{!"function", i32 poison, !38} +!35 = !{%struct.SystemData poison} +!36 = !{%struct.SystemData poison} +!37 = !{%struct.DispatchSystemData poison} !38 = !{i32 0, %struct.DispatchSystemData poison} -!39 = !{!"function", i1 poison, !40} +!39 = !{%struct.TraversalData poison} !40 = !{i32 0, %struct.TraversalData poison} -!41 = !{!"function", i32 poison, !34} -!42 = !{!"function", !"void", !38} -!43 = !{!"function", !"void", !32} -!44 = !{!"function", !"void", !38, i64 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison} -!45 = !{!"function", !"void", !38, i32 poison} -!46 = !{!"function", i1 poison, !32, float poison, i32 poison} -!47 = !{!"function", i32 poison, !38, i32 poison} -!48 = !{!"function", float poison, !38, !49, i32 poison} +!41 = !{%struct.SystemData poison} +!42 = !{%struct.DispatchSystemData poison} +!43 = !{%struct.AnyHitTraversalData poison} +!44 = !{%struct.DispatchSystemData poison} +!45 = !{%struct.DispatchSystemData poison} +!46 = !{%struct.AnyHitTraversalData poison} +!47 = !{%struct.DispatchSystemData poison} +!48 = !{null, %struct.DispatchSystemData poison, %struct.HitData poison} !49 = !{i32 0, %struct.HitData poison} -!50 = !{!"function", float poison, !38, !49} +!50 = !{null, %struct.DispatchSystemData poison, %struct.HitData poison} !51 = !{!52, !52, i64 0} !52 = !{!"omnipotent char", !53, i64 0} !53 = !{!"Simple C/C++ TBAA"} -!54 = !{!"function", !"void", !55, !56} +!54 = !{null, %struct.RayPayload poison, %struct.BuiltInTriangleIntersectionAttributes poison} !55 = !{i32 0, %struct.RayPayload poison} !56 = !{i32 0, %struct.BuiltInTriangleIntersectionAttributes poison} -!57 = !{!"function", !"void", !55} -!58 = !{!"function", !"void", i32 poison, %dx.types.Handle poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, !55} -!59 = !{!"function", i1 poison, i32 poison, float poison, i32 poison, !56} -!60 = !{!"function", !"void", i64 poison, !61} -!61 = !{i32 0, i8 poison} +!57 = !{%struct.RayPayload poison} +!58 = !{%struct.RayPayload poison} +!59 = !{%struct.BuiltInTriangleIntersectionAttributes poison} +!60 = !{i8 poison} ;. ; METADATA: attributes #[[ATTR0]] = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="0" "unsafe-fp-math"="false" "use-soft-float"="false" } ; METADATA: attributes #[[ATTR1]] = { nounwind } diff --git a/llvmraytracing/test/dx/remove-unused-declarations.ll b/llvmraytracing/test/dx/remove-unused-declarations.ll index 44a2222201..f53df7b21a 100644 --- a/llvmraytracing/test/dx/remove-unused-declarations.ll +++ b/llvmraytracing/test/dx/remove-unused-declarations.ll @@ -1,5 +1,5 @@ ; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint' -S %s --lint-abort-on-error | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE-DECL %s -; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint' -S %s --lint-abort-on-error | FileCheck -check-prefix=DXILCONTPOSTPROCESS-DECL %s +; RUN: opt --verify-each -passes='dxil-cont-intrinsic-prepare,lint,dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,dxil-cont-post-process,lint' -S %s --lint-abort-on-error | FileCheck -check-prefix=DXILCONTPOSTPROCESS-DECL %s target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:16-i32:32-i64:32-f16:16-f32:32-f64:32-v8:8-v16:16-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" @@ -11,18 +11,18 @@ target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16: declare i64 @_cont_GetTraversalAddr() #4 declare i32 @_cont_GetContinuationStackAddr() #4 -declare !types !31 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.DispatchSystemData*) #4 -declare !types !32 void @_cont_SetTriangleHitAttributes(%struct.SystemData*, %struct.BuiltInTriangleIntersectionAttributes) #4 +declare !pointeetys !31 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.DispatchSystemData*) #4 +declare !pointeetys !32 void @_cont_SetTriangleHitAttributes(%struct.SystemData*, %struct.BuiltInTriangleIntersectionAttributes) #4 declare %struct.DispatchSystemData @_cont_Traversal(%struct.TraversalData) #4 -declare %struct.DispatchSystemData @_cont_SetupRayGen() #4 -declare !types !33 %struct.HitData @_cont_GetCandidateState(%struct.AnyHitTraversalData*) #4 -declare !types !34 %struct.HitData @_cont_GetCommittedState(%struct.DispatchSystemData*) #4 +declare !pointeetys !33 %struct.HitData @_cont_GetCandidateState(%struct.AnyHitTraversalData*) #4 +declare !pointeetys !34 %struct.HitData @_cont_GetCommittedState(%struct.DispatchSystemData*) #4 +declare !pointeetys !53 i1 @_cont_ReportHit(%struct.AnyHitTraversalData* %data, float %t, i32 %hitKind) -define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) #4 !types !37 { +define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) #4 !pointeetys !37 { ret i32 5 } -define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float) #4 !types !38 { +define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float) #4 !pointeetys !38 { %dis_data = load %struct.DispatchSystemData, %struct.DispatchSystemData* %data %sys_data = insertvalue %struct.SystemData undef, %struct.DispatchSystemData %dis_data, 0 %trav_data = insertvalue %struct.TraversalData undef, %struct.SystemData %sys_data, 0 @@ -32,21 +32,21 @@ define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64, i32, i32, i3 } ; Function Attrs: nounwind readnone -declare !types !40 <3 x i32> @_cont_DispatchRaysIndex3(%struct.DispatchSystemData* nocapture readnone %data) #2 -declare !types !40 <3 x i32> @_cont_DispatchRaysDimensions3(%struct.DispatchSystemData* nocapture readnone %data) #2 -declare !types !41 <3 x float> @_cont_WorldRayOrigin3(%struct.DispatchSystemData* nocapture readnone %data) #2 -declare !types !41 <3 x float> @_cont_WorldRayDirection3(%struct.DispatchSystemData* nocapture readnone %data) #2 -declare !types !42 float @_cont_RayTMin(%struct.DispatchSystemData* nocapture readnone %data) #2 -declare !types !43 float @_cont_RayTCurrent(%struct.DispatchSystemData* nocapture readnone %data, %struct.HitData*) #1 -declare !types !51 i32 @_cont_RayFlags(%struct.DispatchSystemData* nocapture readnone %data) #2 -declare !types !52 i32 @_cont_InstanceIndex(%struct.DispatchSystemData* nocapture readnone %data, %struct.HitData*) #2 -declare !types !52 i32 @_cont_InstanceID(%struct.DispatchSystemData* nocapture readnone %data, %struct.HitData*) #2 -declare !types !52 i32 @_cont_PrimitiveIndex(%struct.DispatchSystemData* nocapture readnone %data, %struct.HitData*) #2 -declare !types !46 <3 x float> @_cont_ObjectRayOrigin3(%struct.DispatchSystemData* nocapture readnone %data, %struct.HitData*) #2 -declare !types !46 <3 x float> @_cont_ObjectRayDirection3(%struct.DispatchSystemData* nocapture readnone %data, %struct.HitData*) #2 -declare !types !47 [4 x <3 x float>] @_cont_ObjectToWorld4x3(%struct.DispatchSystemData* nocapture readnone %data, %struct.HitData*) #2 -declare !types !47 [4 x <3 x float>] @_cont_WorldToObject4x3(%struct.DispatchSystemData* nocapture readnone %data, %struct.HitData*) #2 -declare !types !45 i32 @_cont_HitKind(%struct.SystemData* nocapture readnone %data, %struct.HitData*) #2 +declare !pointeetys !40 <3 x i32> @_cont_DispatchRaysIndex3(%struct.DispatchSystemData* nocapture readnone %data) #2 +declare !pointeetys !40 <3 x i32> @_cont_DispatchRaysDimensions3(%struct.DispatchSystemData* nocapture readnone %data) #2 +declare !pointeetys !41 <3 x float> @_cont_WorldRayOrigin3(%struct.DispatchSystemData* nocapture readnone %data) #2 +declare !pointeetys !41 <3 x float> @_cont_WorldRayDirection3(%struct.DispatchSystemData* nocapture readnone %data) #2 +declare !pointeetys !42 float @_cont_RayTMin(%struct.DispatchSystemData* nocapture readnone %data) #2 +declare !pointeetys !43 float @_cont_RayTCurrent(%struct.DispatchSystemData* nocapture readnone %data, %struct.HitData*) #1 +declare !pointeetys !51 i32 @_cont_RayFlags(%struct.DispatchSystemData* nocapture readnone %data) #2 +declare !pointeetys !52 i32 @_cont_InstanceIndex(%struct.DispatchSystemData* nocapture readnone %data, %struct.HitData*) #2 +declare !pointeetys !52 i32 @_cont_InstanceID(%struct.DispatchSystemData* nocapture readnone %data, %struct.HitData*) #2 +declare !pointeetys !52 i32 @_cont_PrimitiveIndex(%struct.DispatchSystemData* nocapture readnone %data, %struct.HitData*) #2 +declare !pointeetys !46 <3 x float> @_cont_ObjectRayOrigin3(%struct.DispatchSystemData* nocapture readnone %data, %struct.HitData*) #2 +declare !pointeetys !46 <3 x float> @_cont_ObjectRayDirection3(%struct.DispatchSystemData* nocapture readnone %data, %struct.HitData*) #2 +declare !pointeetys !47 [4 x <3 x float>] @_cont_ObjectToWorld4x3(%struct.DispatchSystemData* nocapture readnone %data, %struct.HitData*) #2 +declare !pointeetys !47 [4 x <3 x float>] @_cont_WorldToObject4x3(%struct.DispatchSystemData* nocapture readnone %data, %struct.HitData*) #2 +declare !pointeetys !45 i32 @_cont_HitKind(%struct.SystemData* nocapture readnone %data, %struct.HitData*) #2 %dx.types.Handle = type { i8* } %struct.RaytracingAccelerationStructure = type { i32 } @@ -58,7 +58,7 @@ declare !types !45 i32 @_cont_HitKind(%struct.SystemData* nocapture readnone %da @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A" = external constant %dx.types.Handle, align 4 ; Function Attrs: nounwind -define void @ClosestHit(%struct.RayPayload*, %struct.BuiltInTriangleIntersectionAttributes*) #0 !types !48 { +define void @ClosestHit(%struct.RayPayload*, %struct.BuiltInTriangleIntersectionAttributes*) #0 !pointeetys !48 { %a = call i32 @dx.op.dispatchRaysIndex.i32(i32 145, i8 0) ; DispatchRaysIndex(col) %b = call i32 @dx.op.dispatchRaysDimensions.i32(i32 146, i8 0) ; DispatchRaysDimensions(col) %c = call float @dx.op.worldRayOrigin.f32(i32 147, i8 0) ; WorldRayOrigin(col) @@ -157,25 +157,26 @@ attributes #4 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no- !22 = !{i32 0} !29 = !{void (%struct.RayPayload*, %struct.BuiltInTriangleIntersectionAttributes*)* @ClosestHit, !"ClosestHit", null, null, !30} !30 = !{i32 8, i32 10, i32 5, !22} -!31 = !{!"function", %struct.BuiltInTriangleIntersectionAttributes poison, !39} -!32 = !{!"function", !"void", !36, %struct.BuiltInTriangleIntersectionAttributes poison} -!33 = !{!"function", %struct.HitData poison, !35} -!34 = !{!"function", %struct.HitData poison, !39} +!31 = !{%struct.DispatchSystemData poison} +!32 = !{%struct.SystemData poison} +!33 = !{%struct.AnyHitTraversalData poison} +!34 = !{%struct.DispatchSystemData poison} !35 = !{i32 0, %struct.AnyHitTraversalData poison} !36 = !{i32 0, %struct.SystemData poison} -!37 = !{!"function", i32 poison, !39} -!38 = !{!"function", !"void", !39, i64 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison} +!37 = !{%struct.DispatchSystemData poison} +!38 = !{%struct.DispatchSystemData poison} !39 = !{i32 0, %struct.DispatchSystemData poison} -!40 = !{!"function", <3 x i32> poison, !39} -!41 = !{!"function", <3 x float> poison, !39} -!42 = !{!"function", float poison, !39} -!43 = !{!"function", float poison, !39, !44} +!40 = !{%struct.DispatchSystemData poison} +!41 = !{%struct.DispatchSystemData poison} +!42 = !{%struct.DispatchSystemData poison} +!43 = !{null, %struct.DispatchSystemData poison, %struct.HitData poison} !44 = !{i32 0, %struct.HitData poison} -!45 = !{!"function", i32 poison, !36, !44} -!46 = !{!"function", <3 x float> poison, !39, !44} -!47 = !{!"function", [4 x <3 x float>] poison, !39, !44} -!48 = !{!"function", !"void", !49, !50} +!45 = !{null, %struct.SystemData poison, %struct.HitData poison} +!46 = !{null, %struct.DispatchSystemData poison, %struct.HitData poison} +!47 = !{null, %struct.DispatchSystemData poison, %struct.HitData poison} +!48 = !{null, %struct.RayPayload poison, %struct.BuiltInTriangleIntersectionAttributes poison} !49 = !{i32 0, %struct.RayPayload poison} !50 = !{i32 0, %struct.BuiltInTriangleIntersectionAttributes poison} -!51 = !{!"function", i32 poison, !39} -!52 = !{!"function", i32 poison, !39, !44} +!51 = !{%struct.DispatchSystemData poison} +!52 = !{null, %struct.DispatchSystemData poison, %struct.HitData poison} +!53 = !{%struct.AnyHitTraversalData poison} diff --git a/llvmraytracing/test/dx/dxil-cont-post-process-report-sizes.ll b/llvmraytracing/test/dx/stats-report-sizes.ll similarity index 71% rename from llvmraytracing/test/dx/dxil-cont-post-process-report-sizes.ll rename to llvmraytracing/test/dx/stats-report-sizes.ll index 62fc6da8f8..ae343aba9a 100644 --- a/llvmraytracing/test/dx/dxil-cont-post-process-report-sizes.ll +++ b/llvmraytracing/test/dx/stats-report-sizes.ll @@ -1,6 +1,6 @@ -; RUN: opt --report-cont-state-sizes --verify-each -passes='dxil-cont-post-process,lint,remove-types-metadata' -S %s --lint-abort-on-error 2>&1 | FileCheck %s --check-prefix=REPORT-CONT-SIZES -; RUN: opt --report-payload-register-sizes --verify-each -passes='dxil-cont-post-process,lint,remove-types-metadata' -S %s --lint-abort-on-error 2>&1 | FileCheck %s --check-prefix=REPORT-PAYLOAD-SIZES -; RUN: opt --report-system-data-sizes --verify-each -passes='dxil-cont-post-process,lint,remove-types-metadata' -S %s --lint-abort-on-error 2>&1 | FileCheck %s --check-prefix=REPORT-SYSTEM-DATA-SIZES +; RUN: opt --report-cont-state-sizes --verify-each -passes='continuations-stats-report,dxil-cont-post-process,lint,continuations-lint,remove-types-metadata' -S %s --lint-abort-on-error 2>&1 | FileCheck %s --check-prefix=REPORT-CONT-SIZES +; RUN: opt --report-payload-register-sizes --verify-each -passes='continuations-stats-report,dxil-cont-post-process,lint,continuations-lint,remove-types-metadata' -S %s --lint-abort-on-error 2>&1 | FileCheck %s --check-prefix=REPORT-PAYLOAD-SIZES +; RUN: opt --report-system-data-sizes --verify-each -passes='continuations-stats-report,dxil-cont-post-process,lint,continuations-lint,remove-types-metadata' -S %s --lint-abort-on-error 2>&1 | FileCheck %s --check-prefix=REPORT-SYSTEM-DATA-SIZES target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:16-i32:32-i64:32-f16:16-f32:32-f64:32-v8:8-v16:16-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" @@ -10,8 +10,7 @@ target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16: declare i32 @continuation.initialContinuationStackPtr() declare i32 @_cont_GetContinuationStackAddr() declare i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData*) -declare %struct.DispatchSystemData @_cont_SetupRayGen() -declare void @continuation.continue(i64, ...) +declare void @lgc.ilcps.continue(i64, ...) ; REPORT-CONT-SIZES: Continuation state size of "RayGen" (raygeneration): 108 bytes ; REPORT-PAYLOAD-SIZES: Incoming and max outgoing payload VGPR size of "RayGen" (raygeneration): 28 and 24 bytes @@ -19,21 +18,21 @@ define void @RayGen(i64 %dummyRetAddr, %struct.DispatchSystemData %0) !continuat %csp = alloca i32, align 4 %cspInit = call i32 @continuation.initialContinuationStackPtr() store i32 %cspInit, i32* %csp - call void (i64, ...) @continuation.continue(i64 2), !continuation.registercount !6 + call void (i64, ...) @lgc.ilcps.continue(i64 2, i32 poison, i64 poison), !continuation.registercount !6 ret void } ; This is needed as fake continuation of RayGen, because we only report continuation state sizes ; if we find a continuation function using !continuation metadata. ; REPORT-SYSTEM-DATA-SIZES-DAG: Incoming system data of "RayGen.resume.0" (raygeneration) is "struct.DispatchSystemData", size: 4 bytes -define void @RayGen.resume.0(i64 %0, %struct.DispatchSystemData %1) !continuation !3 !lgc.rt.shaderstage !12 { +define void @RayGen.resume.0(i64 %0, { %struct.DispatchSystemData } %1) !continuation !3 !lgc.rt.shaderstage !12 { ret void } ; REPORT-PAYLOAD-SIZES: Incoming and max outgoing payload VGPR size of "CHS" (closesthit): 32 and 36 bytes ; REPORT-SYSTEM-DATA-SIZES-DAG: Incoming system data of "CHS" (closesthit) is "struct.CHSSystemData", size: 400 bytes define void @CHS(i64 %returnAddr, %struct.CHSSystemData %0) !continuation !14 !continuation.registercount !8 !lgc.rt.shaderstage !13 { - call void (i64, ...) @continuation.continue(i64 2), !continuation.registercount !9 + call void (i64, ...) @lgc.ilcps.continue(i64 2, i32 poison, i64 poison), !continuation.registercount !9 ret void } diff --git a/llvmraytracing/test/dx/traceray.ll b/llvmraytracing/test/dx/traceray.ll index da0acf92a5..41ed6ef32e 100644 --- a/llvmraytracing/test/dx/traceray.ll +++ b/llvmraytracing/test/dx/traceray.ll @@ -1,9 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 3 ; RUN: grep -v lgc.cps.module %s | grep -v SKIP_GLOBAL_ADDRSPACE | opt --verify-each -passes="dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,remove-types-metadata" -S --lint-abort-on-error | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE %s -; RUN: grep -v lgc.cps.module %s | grep -v SKIP_GLOBAL_ADDRSPACE | opt --verify-each -passes="dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata" -S --lint-abort-on-error | FileCheck -check-prefix=DXILCONTPOSTPROCESS %s -; RUN: grep -v lgc.cps.module %s | opt --verify-each -passes="dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata" -S --lint-abort-on-error | FileCheck -check-prefix=DXILCONTPOSTPROCESS-GLOBAL %s +; RUN: grep -v lgc.cps.module %s | grep -v SKIP_GLOBAL_ADDRSPACE | opt --verify-each -passes="dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,dxil-cont-post-process,lint,remove-types-metadata" -S --lint-abort-on-error | FileCheck -check-prefix=DXILCONTPOSTPROCESS %s +; RUN: grep -v lgc.cps.module %s | opt --verify-each -passes="dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,dxil-cont-post-process,lint,remove-types-metadata" -S --lint-abort-on-error | FileCheck -check-prefix=DXILCONTPOSTPROCESS-GLOBAL %s ; RUN: grep -v SKIP_GLOBAL_ADDRSPACE %s | opt --verify-each -passes="dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,lint,remove-types-metadata" -S --lint-abort-on-error | FileCheck -check-prefix=LOWERRAYTRACINGPIPELINE-CPS %s -; RUN: grep -v SKIP_GLOBAL_ADDRSPACE %s | opt --verify-each -passes="dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,dxil-cleanup-continuations,lint,register-buffer,lint,dxil-cont-post-process,lint,remove-types-metadata" -S --lint-abort-on-error | FileCheck -check-prefix=DXILCONTPOSTPROCESS-CPS %s +; RUN: grep -v SKIP_GLOBAL_ADDRSPACE %s | opt --verify-each -passes="dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,dxil-cleanup-continuations,lint,remove-types-metadata" -S --lint-abort-on-error | FileCheck -check-prefix=CLEANUP-CPS %s +; RUN: grep -v SKIP_GLOBAL_ADDRSPACE %s | opt --verify-each -passes="dxil-cont-lgc-rt-op-converter,lint,inline,lint,lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,dxil-cleanup-continuations,lint,dxil-cont-post-process,lint,remove-types-metadata" -S --lint-abort-on-error | FileCheck -check-prefix=DXILCONTPOSTPROCESS-CPS %s target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:16-i32:32-i64:32-f16:16-f32:32-f64:32-v8:8-v16:16-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" @@ -27,8 +28,6 @@ target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16: declare i32 @_cont_GetContinuationStackAddr() #0 -declare %struct.DispatchSystemData @_cont_SetupRayGen() #0 - ; To exercise both waiting and non-waiting Await, we use WaitAwait for Traversal, ; and Await for Callshader. This does not necessarily reflect current choices in GPURT. declare %struct.DispatchSystemData @_AmdWaitAwaitTraversal(i64, i64, %struct.TraversalData) #0 @@ -37,59 +36,59 @@ declare %struct.DispatchSystemData @_AmdAwaitShader(i64, %struct.DispatchSystemD declare %struct.TraversalData @_AmdAwaitAnyHit(i64, %struct.TraversalData, float, i32) #0 -declare void @continuation.continue(i64, ...) #0 +declare void @lgc.ilcps.continue(...) #0 declare void @_AmdContStackSetPtr(i32) #0 -declare !types !32 i32 @_cont_HitKind(%struct.SystemData*) #0 +declare !pointeetys !32 i32 @_cont_HitKind(%struct.SystemData*) #0 declare i64 @_cont_GetContinuationStackGlobalMemBase() ; SKIP_GLOBAL_ADDRSPACE ; Function Attrs: nounwind declare i64 @_AmdGetResumePointAddr() #1 -declare !types !34 %struct.HitData @_cont_GetCommittedState(%struct.SystemData*) #0 +declare !pointeetys !34 %struct.HitData @_cont_GetCommittedState(%struct.SystemData*) #0 ; Function Attrs: nounwind -declare !types !35 void @_AmdRestoreSystemData(%struct.DispatchSystemData*) #1 +declare !pointeetys !35 void @_AmdRestoreSystemData(%struct.DispatchSystemData*) #1 ; Function Attrs: nounwind -declare !types !37 void @_AmdRestoreSystemDataAnyHit(%struct.TraversalData*) #1 +declare !pointeetys !37 void @_AmdRestoreSystemDataAnyHit(%struct.TraversalData*) #1 ; Function Attrs: nounwind -declare !types !37 void @_cont_AcceptHit(%struct.TraversalData* nocapture readnone) #1 +declare !pointeetys !37 void @_cont_AcceptHit(%struct.TraversalData* nocapture readnone) #1 ; Function Attrs: nounwind -declare !types !37 void @_AmdAcceptHitAttributes(%struct.TraversalData*) #1 +declare !pointeetys !37 void @_AmdAcceptHitAttributes(%struct.TraversalData*) #1 declare i1 @opaqueIsEnd() #0 -define void @_cont_ExitRayGen(ptr nocapture readonly %data) alwaysinline nounwind !types !{!"function", !"void", !{i32 0, %struct.DispatchSystemData poison}} { +define void @_cont_ExitRayGen(ptr nocapture readonly %data) alwaysinline nounwind !pointeetys !{%struct.DispatchSystemData poison} { ret void } -define i1 @_cont_IsEndSearch(%struct.TraversalData* %data) #0 !types !39 { +define i1 @_cont_IsEndSearch(%struct.TraversalData* %data) #0 !pointeetys !39 { %isEnd = call i1 @opaqueIsEnd() ret i1 %isEnd } -define %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData* %data) #0 !types !40 { +define %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData* %data) #0 !pointeetys !40 { %addr = getelementptr %struct.SystemData, %struct.SystemData* %data, i32 0, i32 1 %val = load %struct.BuiltInTriangleIntersectionAttributes, %struct.BuiltInTriangleIntersectionAttributes* %addr, align 4 ret %struct.BuiltInTriangleIntersectionAttributes %val } -define void @_cont_SetTriangleHitAttributes(%struct.SystemData* %data, %struct.BuiltInTriangleIntersectionAttributes %val) #0 !types !41 { +define void @_cont_SetTriangleHitAttributes(%struct.SystemData* %data, %struct.BuiltInTriangleIntersectionAttributes %val) #0 !pointeetys !41 { %addr = getelementptr %struct.SystemData, %struct.SystemData* %data, i32 0, i32 1 store %struct.BuiltInTriangleIntersectionAttributes %val, %struct.BuiltInTriangleIntersectionAttributes* %addr, align 4 ret void } -define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) #0 !types !42 { +define i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData* %data) #0 !pointeetys !42 { ret i32 5 } -define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, float %6, float %7, float %8, float %9, float %10, float %11, float %12, float %13) #0 !types !43 { +define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, float %6, float %7, float %8, float %9, float %10, float %11, float %12, float %13) #0 !pointeetys !43 { %dis_data = load %struct.DispatchSystemData, %struct.DispatchSystemData* %data, align 4 %sys_data = insertvalue %struct.SystemData undef, %struct.DispatchSystemData %dis_data, 0 %trav_data = insertvalue %struct.TraversalData undef, %struct.SystemData %sys_data, 0 @@ -101,7 +100,7 @@ define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i ret void } -define void @_cont_CallShader(%struct.DispatchSystemData* %data, i32 %0) #0 !types !44 { +define void @_cont_CallShader(%struct.DispatchSystemData* %data, i32 %0) #0 !pointeetys !44 { %dis_data = load %struct.DispatchSystemData, %struct.DispatchSystemData* %data, align 4 %newdata = call %struct.DispatchSystemData @_AmdAwaitShader(i64 2, %struct.DispatchSystemData %dis_data) store %struct.DispatchSystemData %newdata, %struct.DispatchSystemData* %data, align 4 @@ -112,12 +111,12 @@ define void @_cont_CallShader(%struct.DispatchSystemData* %data, i32 %0) #0 !typ define void @_cont_KernelEntry() #0 !lgc.rt.shaderstage !69 { %cspInit = ptrtoint ptr @debug_global to i32 call void @_AmdContStackSetPtr(i32 %cspInit) - call void (i64, ...) @continuation.continue(i64 0, %struct.DispatchSystemData poison) + call void (...) @lgc.ilcps.continue(i64 0, i32 poison, i64 undef, %struct.DispatchSystemData poison) ret void } ; Function Attrs: alwaysinline -define i1 @_cont_ReportHit(%struct.TraversalData* %data, float %t, i32 %hitKind) #2 !types !45 { +define i1 @_cont_ReportHit(%struct.TraversalData* %data, float %t, i32 %hitKind) #2 !pointeetys !45 { %doanyhit = fcmp fast ogt float %t, 0.000000e+00 br i1 %doanyhit, label %anyhit, label %accepthit @@ -133,29 +132,29 @@ accepthit: ; preds = %0 ret i1 true } -define %struct.HitData @_cont_GetCandidateState(%struct.TraversalData* %data) #0 !types !46 { +define %struct.HitData @_cont_GetCandidateState(%struct.TraversalData* %data) #0 !pointeetys !46 { %resPtr = getelementptr %struct.TraversalData, %struct.TraversalData* %data, i32 0, i32 1 %res = load %struct.HitData, %struct.HitData* %resPtr, align 4 ret %struct.HitData %res } -define float @_cont_RayTCurrent(%struct.DispatchSystemData* nocapture readnone %data, %struct.HitData* %hitData) !types !47 { +define float @_cont_RayTCurrent(%struct.DispatchSystemData* nocapture readnone %data, %struct.HitData* %hitData) !pointeetys !47 { %resPtr = getelementptr %struct.HitData, %struct.HitData* %hitData, i32 0, i32 0 %res = load float, float* %resPtr, align 4 ret float %res } ; Function Attrs: nounwind memory(none) -declare !types !49 <3 x i32> @_cont_DispatchRaysIndex3(%struct.DispatchSystemData* nocapture readnone) #3 +declare !pointeetys !49 <3 x i32> @_cont_DispatchRaysIndex3(%struct.DispatchSystemData* nocapture readnone) #3 ; Function Attrs: nounwind memory(none) -declare !types !50 <3 x float> @_cont_ObjectRayOrigin3(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #3 +declare !pointeetys !50 <3 x float> @_cont_ObjectRayOrigin3(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #3 ; Function Attrs: nounwind memory(none) -declare !types !50 <3 x float> @_cont_ObjectRayDirection3(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #3 +declare !pointeetys !50 <3 x float> @_cont_ObjectRayDirection3(%struct.DispatchSystemData* nocapture readnone, %struct.HitData*) #3 ; Function Attrs: nounwind -declare !types !35 void @_cont_AcceptHitAndEndSearch(%struct.DispatchSystemData* nocapture readnone) #1 +declare !pointeetys !35 void @_cont_AcceptHitAndEndSearch(%struct.DispatchSystemData* nocapture readnone) #1 ; Function Attrs: nounwind define void @MyRayGen() #4 !lgc.rt.shaderstage !64 { @@ -184,7 +183,7 @@ define void @MyRayGen() #4 !lgc.rt.shaderstage !64 { } ; Function Attrs: nounwind -define void @MyClosestHitShader(%struct.RayPayload* noalias nocapture %payload, %struct.BuiltInTriangleIntersectionAttributes* nocapture readonly %attr) #4 !types !54 !lgc.rt.shaderstage !65 { +define void @MyClosestHitShader(%struct.RayPayload* noalias nocapture %payload, %struct.BuiltInTriangleIntersectionAttributes* nocapture readonly %attr) #4 !pointeetys !54 !lgc.rt.shaderstage !65 { %1 = getelementptr inbounds %struct.BuiltInTriangleIntersectionAttributes, %struct.BuiltInTriangleIntersectionAttributes* %attr, i32 0, i32 0 %2 = load <2 x float>, <2 x float>* %1, align 4 %3 = extractelement <2 x float> %2, i32 0 @@ -201,7 +200,7 @@ define void @MyClosestHitShader(%struct.RayPayload* noalias nocapture %payload, } ; Function Attrs: nounwind -define void @MyAnyHitShader(%struct.RayPayload* noalias nocapture %payload, %struct.BuiltInTriangleIntersectionAttributes* nocapture readnone %attr) #4 !types !54 !lgc.rt.shaderstage !66 { +define void @MyAnyHitShader(%struct.RayPayload* noalias nocapture %payload, %struct.BuiltInTriangleIntersectionAttributes* nocapture readnone %attr) #4 !pointeetys !54 !lgc.rt.shaderstage !66 { %1 = getelementptr inbounds %struct.RayPayload, %struct.RayPayload* %payload, i32 0, i32 0 %2 = load <4 x float>, <4 x float>* %1, align 4 %3 = call float @dx.op.objectRayOrigin.f32(i32 149, i8 0) @@ -259,14 +258,14 @@ define void @MyIntersectionShaderLargeAttrs() #4 { } ; Function Attrs: nounwind -define void @MyMissShader(%struct.RayPayload* noalias nocapture %payload) #4 !types !57 !lgc.rt.shaderstage !68 { +define void @MyMissShader(%struct.RayPayload* noalias nocapture %payload) #4 !pointeetys !57 !lgc.rt.shaderstage !68 { %1 = getelementptr inbounds %struct.RayPayload, %struct.RayPayload* %payload, i32 0, i32 0 store <4 x float> , <4 x float>* %1, align 4 ret void } ; Function Attrs: nounwind -declare !types !58 void @dx.op.traceRay.struct.RayPayload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.RayPayload*) #1 +declare !pointeetys !58 void @dx.op.traceRay.struct.RayPayload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.RayPayload*) #1 ; Function Attrs: nounwind declare void @dx.op.textureStore.f32(i32, %dx.types.Handle, i32, i32, i32, float, float, float, float, i8) #1 @@ -286,10 +285,10 @@ declare float @dx.op.rayTCurrent.f32(i32) #5 declare void @dx.op.acceptHitAndEndSearch(i32) #0 ; Function Attrs: nounwind -declare !types !59 i1 @dx.op.reportHit.struct.BuiltInTriangleIntersectionAttributes(i32, float, i32, %struct.BuiltInTriangleIntersectionAttributes*) #1 +declare !pointeetys !59 i1 @dx.op.reportHit.struct.BuiltInTriangleIntersectionAttributes(i32, float, i32, %struct.BuiltInTriangleIntersectionAttributes*) #1 ; Function Attrs: nounwind -declare !types !60 i1 @dx.op.reportHit.struct.LargeIntersectionAttributes(i32, float, i32, %struct.LargeIntersectionAttributes*) #1 +declare !pointeetys !60 i1 @dx.op.reportHit.struct.LargeIntersectionAttributes(i32, float, i32, %struct.LargeIntersectionAttributes*) #1 ; Function Attrs: nounwind memory(none) declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #3 @@ -298,10 +297,10 @@ declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types. declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) #5 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) -declare !types !62 void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #6 +declare !pointeetys !62 void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #6 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) -declare !types !62 void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #6 +declare !pointeetys !62 void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #6 attributes #0 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="0" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind } @@ -353,37 +352,37 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re !29 = !{void ()* @MyRayGen, !"MyRayGen", null, null, !30} !30 = !{i32 8, i32 7, i32 5, !22} !31 = !{void ()* @MyIntersectionShaderLargeAttrs, !"MyIntersectionShaderLargeAttrs", null, null, !26} -!32 = !{!"function", i32 poison, !33} +!32 = !{%struct.SystemData poison} !33 = !{i32 0, %struct.SystemData poison} -!34 = !{!"function", %struct.HitData poison, !33} -!35 = !{!"function", !"void", !36} +!34 = !{%struct.SystemData poison} +!35 = !{%struct.DispatchSystemData poison} !36 = !{i32 0, %struct.DispatchSystemData poison} -!37 = !{!"function", !"void", !38} +!37 = !{%struct.TraversalData poison} !38 = !{i32 0, %struct.TraversalData poison} -!39 = !{!"function", i1 poison, !38} -!40 = !{!"function", %struct.BuiltInTriangleIntersectionAttributes poison, !33} -!41 = !{!"function", !"void", !33, %struct.BuiltInTriangleIntersectionAttributes poison} -!42 = !{!"function", i32 poison, !36} -!43 = !{!"function", !"void", !36, i64 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison} -!44 = !{!"function", !"void", !36, i32 poison} -!45 = !{!"function", i1 poison, !38, float poison, i32 poison} -!46 = !{!"function", %struct.HitData poison, !38} -!47 = !{!"function", float poison, !36, !48} +!39 = !{%struct.TraversalData poison} +!40 = !{%struct.SystemData poison} +!41 = !{%struct.SystemData poison} +!42 = !{%struct.DispatchSystemData poison} +!43 = !{%struct.DispatchSystemData poison} +!44 = !{%struct.DispatchSystemData poison} +!45 = !{%struct.TraversalData poison} +!46 = !{%struct.TraversalData poison} +!47 = !{null, %struct.DispatchSystemData poison, %struct.HitData poison} !48 = !{i32 0, %struct.HitData poison} -!49 = !{!"function", <3 x i32> poison, !36} -!50 = !{!"function", <3 x float> poison, !36, !48} +!49 = !{%struct.DispatchSystemData poison} +!50 = !{null, %struct.DispatchSystemData poison, %struct.HitData poison} !51 = !{!52, !52, i64 0} !52 = !{!"omnipotent char", !53, i64 0} !53 = !{!"Simple C/C++ TBAA"} -!54 = !{!"function", !"void", !55, !56} +!54 = !{null, %struct.RayPayload poison, %struct.BuiltInTriangleIntersectionAttributes poison} !55 = !{i32 0, %struct.RayPayload poison} !56 = !{i32 0, %struct.BuiltInTriangleIntersectionAttributes poison} -!57 = !{!"function", !"void", !55} -!58 = !{!"function", !"void", i32 poison, %dx.types.Handle poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, !55} -!59 = !{!"function", i1 poison, i32 poison, float poison, i32 poison, !56} -!60 = !{!"function", i1 poison, i32 poison, float poison, i32 poison, !61} +!57 = !{%struct.RayPayload poison} +!58 = !{%struct.RayPayload poison} +!59 = !{%struct.BuiltInTriangleIntersectionAttributes poison} +!60 = !{%struct.LargeIntersectionAttributes poison} !61 = !{i32 0, %struct.LargeIntersectionAttributes poison} -!62 = !{!"function", !"void", i64 poison, !63} +!62 = !{i8 poison} !63 = !{i32 0, i8 poison} !64 = !{i32 0} !65 = !{i32 3} @@ -422,7 +421,7 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-SAME: ) #[[ATTR0]] !lgc.rt.shaderstage [[META35:![0-9]+]] !continuation.registercount [[META22:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[CSPINIT:%.*]] = ptrtoint ptr @debug_global to i32 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @_AmdContStackSetPtr(i32 [[CSPINIT]]) -; LOWERRAYTRACINGPIPELINE-NEXT: call void (i64, ...) @continuation.continue(i64 0, [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison), !continuation.registercount [[META22]] +; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.continue(i64 0, i32 poison, i64 undef, [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison), !continuation.registercount [[META22]] ; LOWERRAYTRACINGPIPELINE-NEXT: ret void ; ; @@ -443,6 +442,7 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-LABEL: define void @MyRayGen( ; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3:[0-9]+]] !lgc.rt.shaderstage [[META22]] !continuation.registercount [[META22]] !continuation.entry [[META13:![0-9]+]] !continuation [[META36:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [10 x i32], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 @@ -462,31 +462,41 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-NEXT: [[TRAV_DATA2_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], i64 [[ADDR_I]], 5 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP4]], i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP11]], ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP11]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP13]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP13]], ptr [[TMP37]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, ptr [[TMP37]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP15]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP15]], ptr [[TMP38]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP37]], i32 2 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 2 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP17]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = call ptr inttoptr (i64 4 to ptr)(i64 -1, i64 poison, [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]]), !continuation.registercount [[META33:![0-9]+]], !continuation.wait.await [[META13]], !continuation.returnedRegistercount [[META33]] -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = call [[STRUCT_DISPATCHSYSTEMDATA]] [[AWAIT:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP18]]) +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP17]], ptr [[TMP18]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP41:%.*]] = load [10 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP42:%.*]] = call ptr inttoptr (i64 4 to ptr)(i64 -1, i64 poison, [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]], [8 x i32] poison, [10 x i32] [[TMP41]]), !continuation.registercount [[META33:![0-9]+]], !continuation.wait.await [[META13]], !continuation.returnedRegistercount [[META33]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP43:%.*]] = call { [[STRUCT_DISPATCHSYSTEMDATA]], [21 x i32], [10 x i32] } @await(ptr [[TMP42]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [21 x i32], [10 x i32] } [[TMP43]], 2 +; LOWERRAYTRACINGPIPELINE-NEXT: store [10 x i32] [[TMP24]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_RAYPAYLOAD]] poison, ptr [[TMP4]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP4]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP21]], ptr [[TMP20]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP44:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP44]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP39:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP44]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP31]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP25]], ptr [[TMP39]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP45:%.*]] = getelementptr inbounds i32, ptr [[TMP44]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP45]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [21 x i32], [10 x i32] } [[TMP43]], 0 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP19]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; LOWERRAYTRACINGPIPELINE-NEXT: br label [[DOTSPLIT:%.*]] @@ -508,24 +518,29 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; ; ; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.DispatchSystemData @MyClosestHitShader( -; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META40:![0-9]+]] !continuation.registercount [[META33]] !continuation [[META41:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]], [19 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META40:![0-9]+]] !continuation.registercount [[META33]] !continuation [[META41:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [10 x i32], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[HITATTRS:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: store [10 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_SYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP3]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP17]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP8]], ptr [[TMP7]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP18]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP10]], ptr [[TMP9]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP19]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[ADDR_I:%.*]] = getelementptr [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[VAL_I:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[ADDR_I]], align 4 @@ -551,24 +566,28 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-NEXT: store <4 x float> [[TMP31]], ptr [[TMP32]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP3]], i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP33]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP36]], ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP36]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[TMP33]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP37]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP39]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP39]], ptr [[TMP34]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, ptr [[TMP34]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, ptr [[TMP37]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP41]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP41]], ptr [[TMP38]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, ptr [[TMP34]], i32 2 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP42:%.*]] = getelementptr inbounds i32, ptr [[TMP37]], i32 2 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP43]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP43]], ptr [[TMP46]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP45:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP44]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP45]]), !continuation.registercount [[META33]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP47:%.*]] = load [10 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP45]], [21 x i32] poison, [10 x i32] [[TMP47]]), !continuation.registercount [[META33]] ; LOWERRAYTRACINGPIPELINE-NEXT: unreachable ; ; ; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.TraversalData @MyAnyHitShader( -; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[TMP1:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META42:![0-9]+]] !continuation.registercount [[META33]] !continuation [[META43:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[TMP1:%.*]], [6 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META42:![0-9]+]] !continuation.registercount [[META33]] !continuation [[META43:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_HITDATA]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = alloca [[STRUCT_HITDATA]], align 8 @@ -576,33 +595,39 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_TRAVERSALDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [10 x i32], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[ORIGHITATTRS:%.*]] = alloca [8 x i32], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[HITATTRSALLOCA:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: store [10 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_TRAVERSALDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP9]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP14]], ptr [[TMP13]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP22]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP16]], ptr [[TMP13]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP16]], ptr [[TMP15]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP37]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP18]], ptr [[TMP15]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP18]], ptr [[TMP17]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP38]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP23]], ptr [[TMP17]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[ADDR_I:%.*]] = getelementptr [[STRUCT_SYSTEMDATA:%.*]], ptr [[TMP19]], i32 0, i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[VAL_I:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[ADDR_I]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[VAL_I]], ptr [[TMP8]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP8]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP20]], ptr [[ORIGHITATTRS]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN:%.*]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 0, i32 0, i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP42:%.*]] = load i32, ptr [[TMP8]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP42]], ptr [[ORIGHITATTRS]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP23]], ptr [[TMP21]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP55:%.*]] = load i32, ptr [[TMP40]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP55]], ptr [[TMP21]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP1]], ptr [[HITATTRSALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP9]], i32 0, i32 0 @@ -628,71 +653,81 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP32:%.*]] = fadd fast float [[TMP31]], [[EXTRACT1]] ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP33:%.*]] = fcmp fast ogt float [[TMP32]], 0.000000e+00 ; LOWERRAYTRACINGPIPELINE-NEXT: br i1 [[TMP33]], label [[TMP34:%.*]], label [[TMP51:%.*]] -; LOWERRAYTRACINGPIPELINE: 34: +; LOWERRAYTRACINGPIPELINE: 38: ; LOWERRAYTRACINGPIPELINE-NEXT: store <4 x float> [[TMP25]], ptr [[TMP24]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP35]]) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP9]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP37]], ptr addrspace(20) @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP45:%.*]] = getelementptr inbounds i32, ptr [[TMP36]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP45]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP47]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, ptr [[TMP45]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP40]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP49]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP36]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP41]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP45:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[TMP36]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP44]], ptr [[TMP45]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, ptr [[TMP45]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, ptr [[TMP43]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP46]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP47]], ptr [[TMP48]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP50:%.*]] = getelementptr inbounds i32, ptr [[TMP45]], i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP50]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP43]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP46:%.*]] = load i32, ptr [[HITATTRSALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP46]], ptr [[TMP7]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP57:%.*]] = load i32, ptr [[TMP48]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP57]], ptr [[TMP56]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP49:%.*]] = getelementptr inbounds i32, ptr [[TMP43]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP56:%.*]] = load i32, ptr [[TMP49]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP56]], ptr [[TMP50]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP57:%.*]] = load i32, ptr [[HITATTRSALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP57]], ptr [[TMP7]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP52:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP53:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP54:%.*]] = load i32, ptr [[TMP52]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP54]], ptr [[TMP53]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP58:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP7]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[ADDR_I1:%.*]] = getelementptr [[STRUCT_SYSTEMDATA]], ptr [[TMP59]], i32 0, i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP58]], ptr [[ADDR_I1]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP60:%.*]] = load [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_TRAVERSALDATA]] [[TMP60]]), !continuation.registercount [[META33]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP68:%.*]] = load [10 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_TRAVERSALDATA]] [[TMP60]], [8 x i32] poison, [10 x i32] [[TMP68]]), !continuation.registercount [[META33]] ; LOWERRAYTRACINGPIPELINE-NEXT: unreachable -; LOWERRAYTRACINGPIPELINE: 51: +; LOWERRAYTRACINGPIPELINE: 59: ; LOWERRAYTRACINGPIPELINE-NEXT: store <4 x float> [[TMP25]], ptr [[TMP24]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @_cont_AcceptHit(ptr [[SYSTEM_DATA_ALLOCA]]) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP9]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP65:%.*]] = load i32, ptr [[TMP62]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP65]], ptr addrspace(20) @PAYLOAD, align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, ptr [[TMP62]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP68:%.*]] = load i32, ptr [[TMP66]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP68]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP64:%.*]] = getelementptr inbounds i32, ptr [[TMP66]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP70:%.*]] = load i32, ptr [[TMP64]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP70]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP61:%.*]] = load i32, ptr [[TMP62]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP61]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP63:%.*]] = getelementptr inbounds i32, ptr [[TMP62]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP64:%.*]] = load i32, ptr [[TMP63]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP64]], ptr [[TMP66]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP65:%.*]] = getelementptr inbounds i32, ptr [[TMP66]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP75:%.*]] = getelementptr inbounds i32, ptr [[TMP63]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP67:%.*]] = load i32, ptr [[TMP75]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP67]], ptr [[TMP65]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, ptr [[TMP66]], i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP72:%.*]] = load i32, ptr [[TMP71]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP72]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP63:%.*]] = load i32, ptr [[HITATTRSALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP63]], ptr [[TMP6]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP77:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP78:%.*]] = load i32, ptr [[TMP61]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP78]], ptr [[TMP77]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP69:%.*]] = getelementptr inbounds i32, ptr [[TMP63]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP70:%.*]] = load i32, ptr [[TMP69]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP70]], ptr [[TMP71]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP76:%.*]] = load i32, ptr [[HITATTRSALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP76]], ptr [[TMP6]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP72:%.*]] = getelementptr inbounds i32, ptr [[HITATTRSALLOCA]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP74:%.*]] = load i32, ptr [[TMP72]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP74]], ptr [[TMP73]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP79:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP6]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[ADDR_I2:%.*]] = getelementptr [[STRUCT_SYSTEMDATA]], ptr [[TMP80]], i32 0, i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP79]], ptr [[ADDR_I2]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP81:%.*]] = load [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_TRAVERSALDATA]] [[TMP81]]), !continuation.registercount [[META33]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP78:%.*]] = load [10 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_TRAVERSALDATA]] [[TMP81]], [8 x i32] poison, [10 x i32] [[TMP78]]), !continuation.registercount [[META33]] ; LOWERRAYTRACINGPIPELINE-NEXT: unreachable ; ; ; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.TraversalData @MyIntersectionShader( -; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META44:![0-9]+]] !continuation.registercount [[META32:![0-9]+]] !continuation [[META45:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]], [8 x i32] [[PADDING:%.*]], [30 x i32] [[PAYLOAD:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META44:![0-9]+]] !continuation.registercount [[META32:![0-9]+]] !continuation [[META45:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_TRAVERSALDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [30 x i32], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store [30 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_TRAVERSALDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) @@ -708,13 +743,18 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE: anyhit.i: ; LOWERRAYTRACINGPIPELINE-NEXT: [[TRAV_DATA_I:%.*]] = load [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP4]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = call ptr inttoptr (i64 3 to ptr)([[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], float [[RES_I1]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP8]]), !continuation.registercount [[META32]], !continuation.returnedRegistercount [[META32]] -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = call [[STRUCT_TRAVERSALDATA]] [[AWAIT_1:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP9]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = load [30 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = call ptr inttoptr (i64 3 to ptr)([[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], float [[RES_I1]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP8]], [20 x i32] poison, [30 x i32] [[TMP9]]), !continuation.registercount [[META32]], !continuation.returnedRegistercount [[META32]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = call { [[STRUCT_TRAVERSALDATA]], [8 x i32], [30 x i32] } @await.1(ptr [[TMP13]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = extractvalue { [[STRUCT_TRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP19]], 2 +; LOWERRAYTRACINGPIPELINE-NEXT: store [30 x i32] [[TMP25]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = extractvalue { [[STRUCT_TRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP19]], 0 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_TRAVERSALDATA]] [[TMP10]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; LOWERRAYTRACINGPIPELINE-NEXT: br label [[_CONT_REPORTHIT_EXIT:%.*]] ; LOWERRAYTRACINGPIPELINE: accepthit.i: +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP12]], ptr [[TMP3]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 1 @@ -729,23 +769,27 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE: _cont_ReportHit.exit: ; LOWERRAYTRACINGPIPELINE-NEXT: [[ISEND_I:%.*]] = call i1 @opaqueIsEnd() ; LOWERRAYTRACINGPIPELINE-NEXT: br i1 [[ISEND_I]], label [[TMP20:%.*]], label [[TMP22:%.*]] -; LOWERRAYTRACINGPIPELINE: 18: +; LOWERRAYTRACINGPIPELINE: 22: ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = load [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_TRAVERSALDATA]] [[TMP21]]), !continuation.registercount [[META32]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = load [30 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_TRAVERSALDATA]] [[TMP21]], [8 x i32] poison, [30 x i32] [[TMP24]]), !continuation.registercount [[META32]] ; LOWERRAYTRACINGPIPELINE-NEXT: unreachable -; LOWERRAYTRACINGPIPELINE: 20: +; LOWERRAYTRACINGPIPELINE: 25: ; LOWERRAYTRACINGPIPELINE-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[TMP7]]) #[[ATTR1]] ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = load [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_TRAVERSALDATA]] [[TMP23]]), !continuation.registercount [[META32]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = load [30 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_TRAVERSALDATA]] [[TMP23]], [8 x i32] poison, [30 x i32] [[TMP27]]), !continuation.registercount [[META32]] ; LOWERRAYTRACINGPIPELINE-NEXT: unreachable ; ; ; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.TraversalData @MyIntersectionShaderLargeAttrs( -; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META44]] !continuation.registercount [[META32]] !continuation [[META46:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]], [8 x i32] [[PADDING:%.*]], [30 x i32] [[PAYLOAD:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META44]] !continuation.registercount [[META32]] !continuation [[META46:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = alloca [[STRUCT_LARGEINTERSECTIONATTRIBUTES:%.*]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_TRAVERSALDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [30 x i32], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store [30 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_TRAVERSALDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) @@ -775,13 +819,18 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE: anyhit.i: ; LOWERRAYTRACINGPIPELINE-NEXT: [[TRAV_DATA_I:%.*]] = load [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = load [[STRUCT_LARGEINTERSECTIONATTRIBUTES]], ptr [[TMP4]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = call ptr inttoptr (i64 3 to ptr)([[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], float [[RES_I1]], i32 0, [[STRUCT_LARGEINTERSECTIONATTRIBUTES]] [[TMP8]]), !continuation.registercount [[META32]], !continuation.returnedRegistercount [[META32]] -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = call [[STRUCT_TRAVERSALDATA]] [[AWAIT_2:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP9]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = load [30 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = call ptr inttoptr (i64 3 to ptr)([[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], float [[RES_I1]], i32 0, [[STRUCT_LARGEINTERSECTIONATTRIBUTES]] [[TMP8]], [15 x i32] poison, [30 x i32] [[TMP9]]), !continuation.registercount [[META32]], !continuation.returnedRegistercount [[META32]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP34:%.*]] = call { [[STRUCT_TRAVERSALDATA]], [8 x i32], [30 x i32] } @await.2(ptr [[TMP13]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP35:%.*]] = extractvalue { [[STRUCT_TRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP34]], 2 +; LOWERRAYTRACINGPIPELINE-NEXT: store [30 x i32] [[TMP35]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = extractvalue { [[STRUCT_TRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP34]], 0 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_TRAVERSALDATA]] [[TMP10]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; LOWERRAYTRACINGPIPELINE-NEXT: br label [[_CONT_REPORTHIT_EXIT:%.*]] ; LOWERRAYTRACINGPIPELINE: accepthit.i: +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP36:%.*]] = getelementptr i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP12]], ptr [[TMP3]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 1 @@ -790,19 +839,23 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 2 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP17]], ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @PAYLOAD, i32 1), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP17]], ptr [[TMP36]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 3 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[TMP36]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP19]], ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @PAYLOAD, i32 2), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP19]], ptr [[TMP37]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP39:%.*]] = getelementptr inbounds i32, ptr [[TMP36]], i32 2 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP21]], ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @PAYLOAD, i32 3), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP21]], ptr [[TMP39]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 5 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[TMP36]], i32 3 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP23]], ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @PAYLOAD, i32 4), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP23]], ptr [[TMP29]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 6 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, ptr [[TMP36]], i32 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP25]], ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @PAYLOAD, i32 5), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP25]], ptr [[TMP40]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP3]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[ADDR_I:%.*]] = getelementptr [[STRUCT_SYSTEMDATA:%.*]], ptr [[TMP27]], i32 0, i32 1 @@ -811,53 +864,64 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE: _cont_ReportHit.exit: ; LOWERRAYTRACINGPIPELINE-NEXT: [[ISEND_I:%.*]] = call i1 @opaqueIsEnd() ; LOWERRAYTRACINGPIPELINE-NEXT: br i1 [[ISEND_I]], label [[TMP30:%.*]], label [[TMP32:%.*]] -; LOWERRAYTRACINGPIPELINE: 28: +; LOWERRAYTRACINGPIPELINE: 36: ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP31:%.*]] = load [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_TRAVERSALDATA]] [[TMP31]]), !continuation.registercount [[META32]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP38:%.*]] = load [30 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_TRAVERSALDATA]] [[TMP31]], [8 x i32] poison, [30 x i32] [[TMP38]]), !continuation.registercount [[META32]] ; LOWERRAYTRACINGPIPELINE-NEXT: unreachable -; LOWERRAYTRACINGPIPELINE: 30: +; LOWERRAYTRACINGPIPELINE: 39: ; LOWERRAYTRACINGPIPELINE-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[TMP7]]) #[[ATTR1]] ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP33:%.*]] = load [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_TRAVERSALDATA]] [[TMP33]]), !continuation.registercount [[META32]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP41:%.*]] = load [30 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_TRAVERSALDATA]] [[TMP33]], [8 x i32] poison, [30 x i32] [[TMP41]]), !continuation.registercount [[META32]] ; LOWERRAYTRACINGPIPELINE-NEXT: unreachable ; ; ; LOWERRAYTRACINGPIPELINE-LABEL: define %struct.DispatchSystemData @MyMissShader( -; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META47:![0-9]+]] !continuation.registercount [[META33]] !continuation [[META48:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]], [19 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META47:![0-9]+]] !continuation.registercount [[META33]] !continuation [[META48:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [10 x i32], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: store [10 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_SYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP2]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP12]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP14]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP13]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP11]], ptr [[TMP10]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP2]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: store <4 x float> , ptr [[TMP15]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP2]], i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP16]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP19]], ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP19]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP20]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP22]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP22]], ptr [[TMP18]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP24]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP24]], ptr [[TMP21]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i32 2 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i32 2 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP26]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP26]], ptr [[TMP30]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP28:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP27]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP28]]), !continuation.registercount [[META33]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP29:%.*]] = load [10 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP28]], [21 x i32] poison, [10 x i32] [[TMP29]]), !continuation.registercount [[META33]] ; LOWERRAYTRACINGPIPELINE-NEXT: unreachable ; ; @@ -887,12 +951,12 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; ; ; DXILCONTPOSTPROCESS-LABEL: define void @_cont_KernelEntry( -; DXILCONTPOSTPROCESS-SAME: ) #[[ATTR0]] !lgc.rt.shaderstage [[META35:![0-9]+]] !continuation.registercount [[META22:![0-9]+]] !continuation [[META36:![0-9]+]] { +; DXILCONTPOSTPROCESS-SAME: ) #[[ATTR0]] !lgc.rt.shaderstage [[META35:![0-9]+]] !continuation [[META36:![0-9]+]] { ; DXILCONTPOSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; DXILCONTPOSTPROCESS-NEXT: [[CSPINIT:%.*]] = ptrtoint ptr @debug_global to i32 ; DXILCONTPOSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 0, i32 [[TMP1]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison), !continuation.registercount [[META22]] +; DXILCONTPOSTPROCESS-NEXT: call void (...) @lgc.ilcps.continue(i64 0, i32 [[TMP1]], i64 undef, [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison) ; DXILCONTPOSTPROCESS-NEXT: ret void ; ; @@ -911,7 +975,7 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; ; ; DXILCONTPOSTPROCESS-LABEL: define void @MyRayGen( -; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3:[0-9]+]] !lgc.rt.shaderstage [[META22]] !continuation.registercount [[META22]] !continuation.entry [[META13:![0-9]+]] !continuation [[META37:![0-9]+]] !continuation.state [[META22]] { +; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3:[0-9]+]] !lgc.rt.shaderstage [[META22:![0-9]+]] !continuation.entry [[META13:![0-9]+]] !continuation [[META37:![0-9]+]] { ; DXILCONTPOSTPROCESS-NEXT: AllocaSpillBB: ; DXILCONTPOSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; DXILCONTPOSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 @@ -929,49 +993,64 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA2_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], i64 [[TMP6]], 5 ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP7]], ptr addrspace(20) @REGISTERS, align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 1 ; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP8]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 2 ; DXILCONTPOSTPROCESS-NEXT: [[TMP9:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP9]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 3 ; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP10]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP7]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT]], i32 undef, 1 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT]], i32 undef, 2 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT]], i32 undef, 3 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT]], i32 undef, 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT]], i32 undef, 5 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT]], i32 undef, 6 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT]], i32 [[TMP8]], 7 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT]], i32 [[TMP9]], 8 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT]], i32 [[TMP10]], 9 ; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: call void (i64, i64, ...) @continuation.waitContinue(i64 4, i64 -1, i32 [[TMP11]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]]), !continuation.registercount [[META33:![0-9]+]], !continuation.returnedRegistercount [[META33]] +; DXILCONTPOSTPROCESS-NEXT: call void (...) @lgc.ilcps.waitContinue(i64 4, i64 -1, i32 [[TMP11]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]], [8 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]) ; DXILCONTPOSTPROCESS-NEXT: unreachable ; ; ; DXILCONTPOSTPROCESS-LABEL: define dso_local void @MyRayGen.resume.0( -; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[TMP0:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP1:%.*]]) !lgc.rt.shaderstage [[META22]] !continuation.registercount [[META33]] !continuation [[META37]] { +; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[TMP0:%.*]], { [[STRUCT_DISPATCHSYSTEMDATA:%.*]], [21 x i32], [10 x i32] } [[TMP1:%.*]]) !lgc.rt.shaderstage [[META22]] !continuation [[META37]] { ; DXILCONTPOSTPROCESS-NEXT: entryresume.0: ; DXILCONTPOSTPROCESS-NEXT: [[SYSTEM_DATA_ALLOCA1:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 ; DXILCONTPOSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; DXILCONTPOSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP1]], ptr [[SYSTEM_DATA_ALLOCA1]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP20:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [21 x i32], [10 x i32] } [[TMP1]], 0 +; DXILCONTPOSTPROCESS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP20]], ptr [[SYSTEM_DATA_ALLOCA1]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP18:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [21 x i32], [10 x i32] } [[TMP1]], 2 +; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = extractvalue [10 x i32] [[TMP18]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP18]], 1 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_2_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP18]], 2 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP18]], 3 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP18]], 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP18]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_6_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP18]], 6 +; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = extractvalue [10 x i32] [[TMP18]], 7 +; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = extractvalue [10 x i32] [[TMP18]], 8 +; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = extractvalue [10 x i32] [[TMP18]], 9 ; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP12]] to float ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to float ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP4]], i32 1 -; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP5]] to float ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP6]], i32 2 -; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP7]] to float ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP8]], i32 3 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT6:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP1]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP19:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [21 x i32], [10 x i32] } [[TMP1]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT21:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP19]], 0 ; DXILCONTPOSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; DXILCONTPOSTPROCESS-NEXT: [[TMP9:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[SYSTEM_DATA_ALLOCA1]]) ; DXILCONTPOSTPROCESS-NEXT: [[EXTRACT:%.*]] = extractelement <3 x i32> [[TMP10]], i8 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[SYSTEM_DATA_ALLOCA1]]) ; DXILCONTPOSTPROCESS-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x i32> [[TMP11]], i8 1 -; DXILCONTPOSTPROCESS-NEXT: [[TMP18:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP9]]) -; DXILCONTPOSTPROCESS-NEXT: [[TMP13:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP18]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 4098, i32 1033 }) +; DXILCONTPOSTPROCESS-NEXT: [[TMP21:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP9]]) +; DXILCONTPOSTPROCESS-NEXT: [[TMP13:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP21]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 4098, i32 1033 }) ; DXILCONTPOSTPROCESS-NEXT: [[TMP14:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP15:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 1 ; DXILCONTPOSTPROCESS-NEXT: [[TMP16:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 2 @@ -983,34 +1062,40 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; ; ; DXILCONTPOSTPROCESS-LABEL: define void @MyClosestHitShader( -; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META38:![0-9]+]] !continuation.registercount [[META33]] !continuation [[META39:![0-9]+]] !continuation.state [[META22]] { +; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]], [19 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META38:![0-9]+]] !continuation [[META39:![0-9]+]] { ; DXILCONTPOSTPROCESS-NEXT: AllocaSpillBB: ; DXILCONTPOSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; DXILCONTPOSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 1 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 2 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 3 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 4 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 6 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 7 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 8 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 9 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 0, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 1, 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float -; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to float -; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP4]], i32 1 -; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP5]] to float -; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP6]], i32 2 -; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP7]] to float -; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP8]], i32 3 +; DXILCONTPOSTPROCESS-NEXT: [[TMP1:%.*]] = bitcast i32 [[PAYLOAD_FCA_0_EXTRACT]] to float +; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = bitcast i32 [[PAYLOAD_FCA_7_EXTRACT]] to float +; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP2]], i32 1 +; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = bitcast i32 [[PAYLOAD_FCA_8_EXTRACT]] to float +; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP3]], i32 2 +; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = bitcast i32 [[PAYLOAD_FCA_9_EXTRACT]] to float +; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP4]], i32 3 ; DXILCONTPOSTPROCESS-NEXT: [[VAL_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> [[DOTFCA_1_0_EXTRACT]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[VAL_I_FCA_0_INSERT]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_06_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP9:%.*]] = bitcast float [[DOTSROA_06_0_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP9]] to float -; DXILCONTPOSTPROCESS-NEXT: [[HITATTRS_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP10]], i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_06_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 1 -; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = bitcast float [[DOTSROA_06_4_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP11]] to float -; DXILCONTPOSTPROCESS-NEXT: [[HITATTRS_SROA_0_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[HITATTRS_SROA_0_0_VEC_INSERT]], float [[TMP12]], i32 1 +; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_011_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = bitcast float [[DOTSROA_011_0_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP5]] to float +; DXILCONTPOSTPROCESS-NEXT: [[HITATTRS_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP6]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_011_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 1 +; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = bitcast float [[DOTSROA_011_4_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP7]] to float +; DXILCONTPOSTPROCESS-NEXT: [[HITATTRS_SROA_0_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[HITATTRS_SROA_0_0_VEC_INSERT]], float [[TMP8]], i32 1 ; DXILCONTPOSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; DXILCONTPOSTPROCESS-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[HITATTRS_SROA_0_4_VEC_INSERT]], i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP14:%.*]] = fsub fast float 1.000000e+00, [[TMP13]] @@ -1022,30 +1107,46 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: [[TMP20:%.*]] = insertelement <4 x float> [[TMP19]], float 1.000000e+00, i64 3 ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP20]], i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP21:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP21]], ptr addrspace(20) @REGISTERS, align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP20]], i32 1 ; DXILCONTPOSTPROCESS-NEXT: [[TMP22:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP22]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP20]], i32 2 ; DXILCONTPOSTPROCESS-NEXT: [[TMP23:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP23]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP20]], i32 3 ; DXILCONTPOSTPROCESS-NEXT: [[TMP24:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP24]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison, <3 x i32> [[DOTFCA_0_0_EXTRACT]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT1:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP21]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT1]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT]], i32 [[TMP22]], 7 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT]], i32 [[TMP23]], 8 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT]], i32 [[TMP24]], 9 ; DXILCONTPOSTPROCESS-NEXT: [[TMP25:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP25]], i64 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META33]] +; DXILCONTPOSTPROCESS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR]], i32 [[TMP25]], i64 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]], [21 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]) ; DXILCONTPOSTPROCESS-NEXT: unreachable ; ; ; DXILCONTPOSTPROCESS-LABEL: define void @MyAnyHitShader( -; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[TMP1:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META40:![0-9]+]] !continuation.registercount [[META33]] !continuation [[META41:![0-9]+]] !continuation.state [[META22]] { +; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[TMP1:%.*]], [6 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META40:![0-9]+]] !continuation [[META41:![0-9]+]] { ; DXILCONTPOSTPROCESS-NEXT: AllocaSpillBB: ; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 ; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_HITDATA]], align 8 ; DXILCONTPOSTPROCESS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_TRAVERSALDATA]], align 8 ; DXILCONTPOSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; DXILCONTPOSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 1 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 2 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 3 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 4 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 6 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 7 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 8 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 9 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 0, 0, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 ; DXILCONTPOSTPROCESS-NEXT: store <3 x i32> [[DOTFCA_0_0_0_EXTRACT]], ptr [[DOTFCA_0_0_0_GEP]], align 4 @@ -1071,31 +1172,27 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_GEP:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 5 ; DXILCONTPOSTPROCESS-NEXT: store i64 [[DOTFCA_5_EXTRACT]], ptr [[DOTFCA_5_GEP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP5]] to float -; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> undef, float [[TMP6]], i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP7]] to float -; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP8]], i32 1 -; DXILCONTPOSTPROCESS-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP9]] to float -; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP10]], i32 2 -; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP11]] to float -; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP12]], i32 3 -; DXILCONTPOSTPROCESS-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[ADDR_I:%.*]] = getelementptr [[STRUCT_SYSTEMDATA:%.*]], ptr [[TMP13]], i32 0, i32 1 +; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = bitcast i32 [[PAYLOAD_FCA_0_EXTRACT]] to float +; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> undef, float [[TMP5]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = bitcast i32 [[PAYLOAD_FCA_7_EXTRACT]] to float +; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP6]], i32 1 +; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = bitcast i32 [[PAYLOAD_FCA_8_EXTRACT]] to float +; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP7]], i32 2 +; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = bitcast i32 [[PAYLOAD_FCA_9_EXTRACT]] to float +; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP8]], i32 3 +; DXILCONTPOSTPROCESS-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[ADDR_I:%.*]] = getelementptr [[STRUCT_SYSTEMDATA:%.*]], ptr [[TMP9]], i32 0, i32 1 ; DXILCONTPOSTPROCESS-NEXT: [[VAL_I_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[ADDR_I]], i32 0, i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[VAL_I_FCA_0_LOAD:%.*]] = load <2 x float>, ptr [[VAL_I_FCA_0_GEP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[VAL_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[VAL_I_FCA_0_LOAD]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[VAL_I_FCA_0_INSERT]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_060_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP14:%.*]] = bitcast float [[DOTSROA_060_0_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_060_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 1 -; DXILCONTPOSTPROCESS-NEXT: [[TMP15:%.*]] = bitcast float [[DOTSROA_060_4_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0108_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = bitcast float [[DOTSROA_0108_0_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0108_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 1 +; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = bitcast float [[DOTSROA_0108_4_VEC_EXTRACT]] to i32 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP1]], 0 ; DXILCONTPOSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; DXILCONTPOSTPROCESS-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[RESPTR_I3:%.*]] = getelementptr [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1 ; DXILCONTPOSTPROCESS-NEXT: [[RES_I4_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[RESPTR_I3]], i32 0, i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[RES_I4_FCA_0_LOAD:%.*]] = load float, ptr [[RES_I4_FCA_0_GEP]], align 4 @@ -1109,9 +1206,9 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: [[RES_I4_FCA_1_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I4_FCA_1_INSERT]], 1 ; DXILCONTPOSTPROCESS-NEXT: [[RES_I4_FCA_1_INSERT_FCA_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP2]], i32 0, i32 1 ; DXILCONTPOSTPROCESS-NEXT: store i32 [[RES_I4_FCA_1_INSERT_FCA_1_EXTRACT]], ptr [[RES_I4_FCA_1_INSERT_FCA_1_GEP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP17:%.*]] = call <3 x float> @_cont_ObjectRayOrigin3(ptr [[TMP16]], ptr [[TMP2]]) -; DXILCONTPOSTPROCESS-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x float> [[TMP17]], i8 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP13:%.*]] = call <3 x float> @_cont_ObjectRayOrigin3(ptr [[TMP12]], ptr [[TMP2]]) +; DXILCONTPOSTPROCESS-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x float> [[TMP13]], i8 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[RESPTR_I:%.*]] = getelementptr [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1 ; DXILCONTPOSTPROCESS-NEXT: [[RES_I_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[RESPTR_I]], i32 0, i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[RES_I_FCA_0_LOAD:%.*]] = load float, ptr [[RES_I_FCA_0_GEP]], align 4 @@ -1125,9 +1222,9 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: [[RES_I_FCA_1_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_1_INSERT]], 1 ; DXILCONTPOSTPROCESS-NEXT: [[RES_I_FCA_1_INSERT_FCA_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP3]], i32 0, i32 1 ; DXILCONTPOSTPROCESS-NEXT: store i32 [[RES_I_FCA_1_INSERT_FCA_1_EXTRACT]], ptr [[RES_I_FCA_1_INSERT_FCA_1_GEP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP19:%.*]] = call <3 x float> @_cont_ObjectRayDirection3(ptr [[TMP18]], ptr [[TMP3]]) -; DXILCONTPOSTPROCESS-NEXT: [[EXTRACT:%.*]] = extractelement <3 x float> [[TMP19]], i8 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP15:%.*]] = call <3 x float> @_cont_ObjectRayDirection3(ptr [[TMP14]], ptr [[TMP3]]) +; DXILCONTPOSTPROCESS-NEXT: [[EXTRACT:%.*]] = extractelement <3 x float> [[TMP15]], i8 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[RESPTR_I5:%.*]] = getelementptr [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1 ; DXILCONTPOSTPROCESS-NEXT: [[RES_I6_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[RESPTR_I5]], i32 0, i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[RES_I6_FCA_0_LOAD:%.*]] = load float, ptr [[RES_I6_FCA_0_GEP]], align 4 @@ -1137,32 +1234,28 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: [[RES_I6_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I6_FCA_0_INSERT]], i32 [[RES_I6_FCA_1_LOAD]], 1 ; DXILCONTPOSTPROCESS-NEXT: [[RES_I6_FCA_1_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I6_FCA_1_INSERT]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[RES_I6_FCA_1_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I6_FCA_1_INSERT]], 1 -; DXILCONTPOSTPROCESS-NEXT: [[TMP21:%.*]] = fmul fast float [[RES_I6_FCA_1_INSERT_FCA_0_EXTRACT]], [[EXTRACT]] -; DXILCONTPOSTPROCESS-NEXT: [[TMP22:%.*]] = fadd fast float [[TMP21]], [[EXTRACT1]] -; DXILCONTPOSTPROCESS-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP22]], 0.000000e+00 -; DXILCONTPOSTPROCESS-NEXT: br i1 [[TMP23]], label [[TMP24:%.*]], label [[TMP36:%.*]] -; DXILCONTPOSTPROCESS: 24: -; DXILCONTPOSTPROCESS-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP25]]) +; DXILCONTPOSTPROCESS-NEXT: [[TMP17:%.*]] = fmul fast float [[RES_I6_FCA_1_INSERT_FCA_0_EXTRACT]], [[EXTRACT]] +; DXILCONTPOSTPROCESS-NEXT: [[TMP18:%.*]] = fadd fast float [[TMP17]], [[EXTRACT1]] +; DXILCONTPOSTPROCESS-NEXT: [[TMP19:%.*]] = fcmp fast ogt float [[TMP18]], 0.000000e+00 +; DXILCONTPOSTPROCESS-NEXT: br i1 [[TMP19]], label [[TMP20:%.*]], label [[TMP32:%.*]] +; DXILCONTPOSTPROCESS: 20: +; DXILCONTPOSTPROCESS-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; DXILCONTPOSTPROCESS-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP21]]) ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP26:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP26]], ptr addrspace(20) @REGISTERS, align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP22:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 1 -; DXILCONTPOSTPROCESS-NEXT: [[TMP27:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP27]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP23:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 2 -; DXILCONTPOSTPROCESS-NEXT: [[TMP28:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP28]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP24:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 3 ; DXILCONTPOSTPROCESS-NEXT: [[TMP29:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP29]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 ; DXILCONTPOSTPROCESS-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT9:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP30:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT9]] to i32 ; DXILCONTPOSTPROCESS-NEXT: [[TMP31:%.*]] = bitcast i32 [[TMP30]] to float ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_062_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP31]], i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT11:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 -; DXILCONTPOSTPROCESS-NEXT: [[TMP32:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT11]] to i32 -; DXILCONTPOSTPROCESS-NEXT: [[TMP33:%.*]] = bitcast i32 [[TMP32]] to float +; DXILCONTPOSTPROCESS-NEXT: [[TMP28:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT11]] to i32 +; DXILCONTPOSTPROCESS-NEXT: [[TMP33:%.*]] = bitcast i32 [[TMP28]] to float ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_062_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_062_0_VEC_INSERT]], float [[TMP33]], i32 1 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_062_4_VEC_INSERT]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 @@ -1170,17 +1263,17 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT25:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[ADDR_I1]], i32 0, i32 0 ; DXILCONTPOSTPROCESS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT25]], ptr [[DOTFCA_0_GEP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_0_GEP26:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_0_LOAD:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_GEP26]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_0_GEP70:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_0_LOAD:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_GEP70]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_LOAD]], 0, 0, 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_1_0_GEP27:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_1_0_LOAD:%.*]] = load <2 x float>, ptr [[DOTFCA_0_1_0_GEP27]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_1_0_GEP71:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_1_0_LOAD:%.*]] = load <2 x float>, ptr [[DOTFCA_0_1_0_GEP71]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT]], <2 x float> [[DOTFCA_0_1_0_LOAD]], 0, 1, 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_0_GEP28:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_0_LOAD:%.*]] = load float, ptr [[DOTFCA_1_0_GEP28]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_0_GEP72:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_0_LOAD:%.*]] = load float, ptr [[DOTFCA_1_0_GEP72]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], float [[DOTFCA_1_0_LOAD]], 1, 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_1_GEP29:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_1_LOAD:%.*]] = load i32, ptr [[DOTFCA_1_1_GEP29]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_1_GEP73:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_1_LOAD:%.*]] = load i32, ptr [[DOTFCA_1_1_GEP73]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], i32 [[DOTFCA_1_1_LOAD]], 1, 1 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_2_GEP30:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 2 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_2_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_2_GEP30]], align 4 @@ -1194,23 +1287,29 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_GEP33:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 5 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_LOAD:%.*]] = load i64, ptr [[DOTFCA_5_GEP33]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT]], i64 [[DOTFCA_5_LOAD]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT1:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP22]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT1]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_2_INSERT1:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_INSERT1:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT1]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_INSERT1:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT1]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_INSERT1:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT1]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT1]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT]], i32 [[TMP23]], 7 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT]], i32 [[TMP24]], 8 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT]], i32 [[TMP29]], 9 ; DXILCONTPOSTPROCESS-NEXT: [[TMP35:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP35]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT]]), !continuation.registercount [[META33]] +; DXILCONTPOSTPROCESS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR]], i32 [[TMP35]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT]], [8 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]) ; DXILCONTPOSTPROCESS-NEXT: unreachable -; DXILCONTPOSTPROCESS: 36: +; DXILCONTPOSTPROCESS: 32: ; DXILCONTPOSTPROCESS-NEXT: call void @_cont_AcceptHit(ptr [[SYSTEM_DATA_ALLOCA]]) ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT14:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP37:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT14]] to i32 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP37]], ptr addrspace(20) @REGISTERS, align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT18:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 1 ; DXILCONTPOSTPROCESS-NEXT: [[TMP38:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT18]] to i32 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP38]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT20:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 2 ; DXILCONTPOSTPROCESS-NEXT: [[TMP39:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT20]] to i32 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP39]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT23:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 3 ; DXILCONTPOSTPROCESS-NEXT: [[TMP40:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT23]] to i32 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP40]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 ; DXILCONTPOSTPROCESS-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP41:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT]] to i32 ; DXILCONTPOSTPROCESS-NEXT: [[TMP42:%.*]] = bitcast i32 [[TMP41]] to float @@ -1249,13 +1348,23 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_GEP57:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 5 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_LOAD58:%.*]] = load i64, ptr [[DOTFCA_5_GEP57]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_INSERT59:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT56]], i64 [[DOTFCA_5_LOAD58]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT27:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP37]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_INSERT30:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT27]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_2_INSERT33:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT30]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_INSERT36:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT33]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_INSERT39:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT36]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_INSERT42:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT39]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_6_INSERT45:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT42]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_7_INSERT48:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT45]], i32 [[TMP38]], 7 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_8_INSERT51:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT48]], i32 [[TMP39]], 8 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_9_INSERT54:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT51]], i32 [[TMP40]], 9 ; DXILCONTPOSTPROCESS-NEXT: [[TMP46:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP46]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT59]]), !continuation.registercount [[META33]] +; DXILCONTPOSTPROCESS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR]], i32 [[TMP46]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT59]], [8 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT54]]) ; DXILCONTPOSTPROCESS-NEXT: unreachable ; ; ; DXILCONTPOSTPROCESS-LABEL: define void @MyIntersectionShader( -; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META42:![0-9]+]] !continuation.registercount [[META32:![0-9]+]] !continuation [[META43:![0-9]+]] !continuation.stacksize [[META44:![0-9]+]] !continuation.state [[META44]] { +; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]], [8 x i32] [[PADDING:%.*]], [30 x i32] [[PAYLOAD:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META42:![0-9]+]] !continuation [[META43:![0-9]+]] !continuation.stacksize [[META44:![0-9]+]] { ; DXILCONTPOSTPROCESS-NEXT: AllocaSpillBB: ; DXILCONTPOSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; DXILCONTPOSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 @@ -1265,14 +1374,44 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = inttoptr i32 [[TMP1]] to ptr addrspace(21) ; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP3]], i32 0 ; DXILCONTPOSTPROCESS-NEXT: store i64 [[RETURNADDR]], ptr addrspace(21) [[TMP4]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 1 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 2 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 3 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 4 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 6 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 7 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 8 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 9 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_10_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 10 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_11_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 11 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_12_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 12 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_13_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 13 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_14_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 14 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_15_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 15 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_16_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 16 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_17_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 17 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_18_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 18 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_19_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 19 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_20_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 20 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_21_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 21 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_22_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 22 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_23_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 23 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_24_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 24 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_25_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 25 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_26_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 26 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_27_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 27 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_28_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 28 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_29_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 29 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 0, 0, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 0, 1, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 1, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 1, 1 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 2 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 3 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 4 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_2_EXTRACT272:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 2 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_EXTRACT273:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 3 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_EXTRACT274:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_EXTRACT275:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 5 ; DXILCONTPOSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; DXILCONTPOSTPROCESS-NEXT: [[RES_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA:%.*]] poison, float [[DOTFCA_1_0_EXTRACT]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[RES_I_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_0_INSERT]], i32 [[DOTFCA_1_1_EXTRACT]], 1 @@ -1285,79 +1424,201 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_0_0_INSERT]], <2 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 ; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_0_INSERT]], float [[DOTFCA_1_0_EXTRACT]], 1, 0 ; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_0_INSERT]], i32 [[DOTFCA_1_1_EXTRACT]], 1, 1 -; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_2_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_1_INSERT]], <3 x float> [[DOTFCA_2_EXTRACT]], 2 -; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_2_INSERT]], <3 x float> [[DOTFCA_3_EXTRACT]], 3 -; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_4_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_3_INSERT]], float [[DOTFCA_4_EXTRACT]], 4 -; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_5_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_4_INSERT]], i64 [[DOTFCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_2_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_1_INSERT]], <3 x float> [[DOTFCA_2_EXTRACT272]], 2 +; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_2_INSERT]], <3 x float> [[DOTFCA_3_EXTRACT273]], 3 +; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_4_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_3_INSERT]], float [[DOTFCA_4_EXTRACT274]], 4 +; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_5_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_4_INSERT]], i64 [[DOTFCA_5_EXTRACT275]], 5 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> undef, 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT4:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_INSERT7:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT4]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_2_INSERT10:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT7]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_INSERT13:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT10]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_INSERT16:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT13]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_INSERT19:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT16]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_6_INSERT22:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT19]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_7_INSERT25:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT22]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_8_INSERT28:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT25]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_9_INSERT31:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT28]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_10_INSERT34:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT31]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_11_INSERT37:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT34]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_12_INSERT40:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT37]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_13_INSERT43:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT40]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_14_INSERT46:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT43]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_15_INSERT49:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT46]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_16_INSERT52:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT49]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_17_INSERT55:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT52]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_18_INSERT58:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT55]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_19_INSERT61:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT58]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_20_INSERT64:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT61]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_21_INSERT67:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT64]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_22_INSERT70:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT67]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_23_INSERT73:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT70]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_24_INSERT76:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT73]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_25_INSERT79:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT76]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_26_INSERT82:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT79]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_27_INSERT85:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT82]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_28_INSERT88:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT85]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_29_INSERT91:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT88]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 ; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = call i64 @continuation.getAddrAndMD(ptr @MyIntersectionShader.resume.0) ; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 3, i32 [[TMP5]], i64 [[TMP6]], [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_5_INSERT]], float [[RES_I_FCA_1_INSERT_FCA_0_EXTRACT]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META32]], !continuation.returnedRegistercount [[META32]] +; DXILCONTPOSTPROCESS-NEXT: call void (...) @lgc.ilcps.continue(i64 3, i32 [[TMP5]], i64 [[TMP6]], [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_5_INSERT]], float [[RES_I_FCA_1_INSERT_FCA_0_EXTRACT]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT]], [20 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT91]]) ; DXILCONTPOSTPROCESS-NEXT: unreachable ; DXILCONTPOSTPROCESS: accepthit.i: ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 ; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP7]] to float -; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_065_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP8]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0345_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP8]], i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 1 ; DXILCONTPOSTPROCESS-NEXT: [[TMP9:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 ; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP9]] to float -; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_065_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_065_0_VEC_INSERT]], float [[TMP10]], i32 1 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT64:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_065_4_VEC_INSERT]], 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT64]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0345_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0345_0_VEC_INSERT]], float [[TMP10]], i32 1 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT344:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0345_4_VEC_INSERT]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT304:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT344]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[ISEND_I:%.*]] = call i1 @opaqueIsEnd() ; DXILCONTPOSTPROCESS-NEXT: br i1 [[ISEND_I]], label [[TMP11:%.*]], label [[TMP15:%.*]] ; DXILCONTPOSTPROCESS: 11: -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_0_INSERT28:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_EXTRACT]], 0, 0, 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_1_0_INSERT31:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT28]], <2 x float> [[DOTFCA_0_EXTRACT]], 0, 1, 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_0_INSERT34:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT31]], float [[DOTFCA_1_0_EXTRACT]], 1, 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_1_INSERT37:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_0_INSERT34]], i32 [[DOTFCA_1_1_EXTRACT]], 1, 1 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_2_INSERT40:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_1_INSERT37]], <3 x float> [[DOTFCA_2_EXTRACT]], 2 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_INSERT43:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT40]], <3 x float> [[DOTFCA_3_EXTRACT]], 3 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_INSERT46:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT43]], float [[DOTFCA_4_EXTRACT]], 4 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_INSERT49:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT46]], i64 [[DOTFCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_0_INSERT307:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_EXTRACT]], 0, 0, 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_1_0_INSERT310:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT307]], <2 x float> [[DOTFCA_0_EXTRACT304]], 0, 1, 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_0_INSERT313:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT310]], float [[DOTFCA_1_0_EXTRACT]], 1, 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_1_INSERT316:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_0_INSERT313]], i32 [[DOTFCA_1_1_EXTRACT]], 1, 1 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_2_INSERT319:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_1_INSERT316]], <3 x float> [[DOTFCA_2_EXTRACT272]], 2 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_INSERT322:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT319]], <3 x float> [[DOTFCA_3_EXTRACT273]], 3 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_INSERT325:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT322]], float [[DOTFCA_4_EXTRACT274]], 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_INSERT328:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT325]], i64 [[DOTFCA_5_EXTRACT275]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT124:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_INSERT127:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT124]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_2_INSERT130:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT127]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_INSERT133:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT130]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_INSERT136:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT133]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_INSERT139:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT136]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_6_INSERT142:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT139]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_7_INSERT145:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT142]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_8_INSERT148:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT145]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_9_INSERT151:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT148]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_10_INSERT154:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT151]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_11_INSERT157:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT154]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_12_INSERT160:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT157]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_13_INSERT163:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT160]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_14_INSERT166:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT163]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_15_INSERT169:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT166]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_16_INSERT172:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT169]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_17_INSERT175:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT172]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_18_INSERT178:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT175]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_19_INSERT181:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT178]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_20_INSERT184:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT181]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_21_INSERT187:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT184]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_22_INSERT190:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT187]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_23_INSERT193:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT190]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_24_INSERT196:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT193]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_25_INSERT199:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT196]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_26_INSERT202:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT199]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_27_INSERT205:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT202]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_28_INSERT208:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT205]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_29_INSERT211:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT208]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 ; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = load i32, ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], -8 ; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP13]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP14:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP14]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT49]]), !continuation.registercount [[META32]] +; DXILCONTPOSTPROCESS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR]], i32 [[TMP14]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT328]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT211]]) ; DXILCONTPOSTPROCESS-NEXT: unreachable ; DXILCONTPOSTPROCESS: 15: ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_EXTRACT]], 0, 0, 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT]], <2 x float> [[DOTFCA_0_EXTRACT]], 0, 1, 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT]], <2 x float> [[DOTFCA_0_EXTRACT304]], 0, 1, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], float [[DOTFCA_1_0_EXTRACT]], 1, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], i32 [[DOTFCA_1_1_EXTRACT]], 1, 1 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], <3 x float> [[DOTFCA_2_EXTRACT]], 2 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT]], <3 x float> [[DOTFCA_3_EXTRACT]], 3 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT]], float [[DOTFCA_4_EXTRACT]], 4 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT]], i64 [[DOTFCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_2_INSERT281:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], <3 x float> [[DOTFCA_2_EXTRACT272]], 2 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_INSERT283:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT281]], <3 x float> [[DOTFCA_3_EXTRACT273]], 3 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_INSERT285:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT283]], float [[DOTFCA_4_EXTRACT274]], 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_INSERT287:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT285]], i64 [[DOTFCA_5_EXTRACT275]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT1:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT1]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_10_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_11_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_12_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_13_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_14_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_15_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_16_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_17_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_18_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_19_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_20_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_21_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_22_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_23_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_24_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_25_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_26_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_27_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_28_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_29_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 ; DXILCONTPOSTPROCESS-NEXT: [[TMP16:%.*]] = load i32, ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP17:%.*]] = add i32 [[TMP16]], -8 ; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP17]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP18:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP18]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT]]), !continuation.registercount [[META32]] +; DXILCONTPOSTPROCESS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR]], i32 [[TMP18]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT287]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]) ; DXILCONTPOSTPROCESS-NEXT: unreachable ; ; ; DXILCONTPOSTPROCESS-LABEL: define dso_local void @MyIntersectionShader.resume.0( -; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[TMP0:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP1:%.*]]) !lgc.rt.shaderstage [[META42]] !continuation.registercount [[META32]] !continuation [[META43]] { +; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[TMP0:%.*]], { [[STRUCT_TRAVERSALDATA:%.*]], [8 x i32], [30 x i32] } [[TMP1:%.*]]) !lgc.rt.shaderstage [[META42]] !continuation [[META43]] { ; DXILCONTPOSTPROCESS-NEXT: entryresume.0: ; DXILCONTPOSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; DXILCONTPOSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP15:%.*]] = load i32, ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP15]], -8 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_0_EXTRACT10:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP1]], 0, 0, 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_1_0_EXTRACT12:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP1]], 0, 1, 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_0_EXTRACT14:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP1]], 1, 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_1_EXTRACT16:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP1]], 1, 1 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_2_EXTRACT18:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP1]], 2 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_EXTRACT20:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP1]], 3 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_EXTRACT22:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP1]], 4 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_EXTRACT24:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP1]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[TMP16:%.*]] = extractvalue { [[STRUCT_TRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP1]], 2 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 1 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_2_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 2 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 3 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_6_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 6 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_7_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 7 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_8_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 8 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_9_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 9 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_10_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 10 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_11_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 11 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_12_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 12 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_13_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 13 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_14_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 14 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_15_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 15 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_16_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 16 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_17_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 17 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_18_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 18 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_19_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 19 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_20_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 20 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_21_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 21 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_22_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 22 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_23_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 23 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_24_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 24 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_25_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 25 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_26_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 26 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_27_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 27 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_28_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 28 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_29_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 29 +; DXILCONTPOSTPROCESS-NEXT: [[TMP17:%.*]] = extractvalue { [[STRUCT_TRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP1]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_0_EXTRACT10:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP17]], 0, 0, 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_1_0_EXTRACT12:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP17]], 0, 1, 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_0_EXTRACT14:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP17]], 1, 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_1_EXTRACT16:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP17]], 1, 1 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_2_EXTRACT18:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP17]], 2 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_EXTRACT20:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP17]], 3 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_EXTRACT22:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP17]], 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_EXTRACT24:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP17]], 5 ; DXILCONTPOSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; DXILCONTPOSTPROCESS-NEXT: [[ISEND_I:%.*]] = call i1 @opaqueIsEnd() ; DXILCONTPOSTPROCESS-NEXT: br i1 [[ISEND_I]], label [[TMP3:%.*]], label [[TMP9:%.*]] -; DXILCONTPOSTPROCESS: 4: +; DXILCONTPOSTPROCESS: 6: ; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(21) ; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP4]], i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[RETURNADDR_RELOAD2:%.*]] = load i64, ptr addrspace(21) [[TMP5]], align 4 @@ -1369,13 +1630,43 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_INSERT43:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT40]], <3 x float> [[DOTFCA_3_EXTRACT20]], 3 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_INSERT46:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT43]], float [[DOTFCA_4_EXTRACT22]], 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_INSERT49:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT46]], i64 [[DOTFCA_5_EXTRACT24]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT124:%.*]] = insertvalue [30 x i32] poison, i32 [[DOTFCA_0_EXTRACT]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_INSERT127:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT124]], i32 [[DOTFCA_1_EXTRACT]], 1 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_2_INSERT130:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT127]], i32 [[DOTFCA_2_EXTRACT]], 2 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_INSERT133:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT130]], i32 [[DOTFCA_3_EXTRACT]], 3 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_INSERT136:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT133]], i32 [[DOTFCA_4_EXTRACT]], 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_INSERT139:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT136]], i32 [[DOTFCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_6_INSERT142:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT139]], i32 [[DOTFCA_6_EXTRACT]], 6 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_7_INSERT145:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT142]], i32 [[DOTFCA_7_EXTRACT]], 7 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_8_INSERT148:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT145]], i32 [[DOTFCA_8_EXTRACT]], 8 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_9_INSERT151:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT148]], i32 [[DOTFCA_9_EXTRACT]], 9 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_10_INSERT154:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT151]], i32 [[DOTFCA_10_EXTRACT]], 10 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_11_INSERT157:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT154]], i32 [[DOTFCA_11_EXTRACT]], 11 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_12_INSERT160:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT157]], i32 [[DOTFCA_12_EXTRACT]], 12 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_13_INSERT163:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT160]], i32 [[DOTFCA_13_EXTRACT]], 13 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_14_INSERT166:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT163]], i32 [[DOTFCA_14_EXTRACT]], 14 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_15_INSERT169:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT166]], i32 [[DOTFCA_15_EXTRACT]], 15 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_16_INSERT172:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT169]], i32 [[DOTFCA_16_EXTRACT]], 16 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_17_INSERT175:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT172]], i32 [[DOTFCA_17_EXTRACT]], 17 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_18_INSERT178:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT175]], i32 [[DOTFCA_18_EXTRACT]], 18 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_19_INSERT181:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT178]], i32 [[DOTFCA_19_EXTRACT]], 19 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_20_INSERT184:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT181]], i32 [[DOTFCA_20_EXTRACT]], 20 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_21_INSERT187:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT184]], i32 [[DOTFCA_21_EXTRACT]], 21 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_22_INSERT190:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT187]], i32 [[DOTFCA_22_EXTRACT]], 22 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_23_INSERT193:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT190]], i32 [[DOTFCA_23_EXTRACT]], 23 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_24_INSERT196:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT193]], i32 [[DOTFCA_24_EXTRACT]], 24 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_25_INSERT199:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT196]], i32 [[DOTFCA_25_EXTRACT]], 25 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_26_INSERT202:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT199]], i32 [[DOTFCA_26_EXTRACT]], 26 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_27_INSERT205:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT202]], i32 [[DOTFCA_27_EXTRACT]], 27 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_28_INSERT208:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT205]], i32 [[DOTFCA_28_EXTRACT]], 28 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_29_INSERT211:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT208]], i32 [[DOTFCA_29_EXTRACT]], 29 ; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = load i32, ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], -8 ; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP7]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD2]], i32 [[TMP8]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT49]]), !continuation.registercount [[META32]] +; DXILCONTPOSTPROCESS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR_RELOAD2]], i32 [[TMP8]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT49]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT211]]) ; DXILCONTPOSTPROCESS-NEXT: unreachable -; DXILCONTPOSTPROCESS: 10: +; DXILCONTPOSTPROCESS: 12: ; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(21) ; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP10]], i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(21) [[TMP11]], align 4 @@ -1387,16 +1678,46 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT]], <3 x float> [[DOTFCA_3_EXTRACT20]], 3 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT]], float [[DOTFCA_4_EXTRACT22]], 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT]], i64 [[DOTFCA_5_EXTRACT24]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [30 x i32] poison, i32 [[DOTFCA_0_EXTRACT]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT]], i32 [[DOTFCA_1_EXTRACT]], 1 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_2_INSERT1:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT]], i32 [[DOTFCA_2_EXTRACT]], 2 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_INSERT1:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT1]], i32 [[DOTFCA_3_EXTRACT]], 3 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_INSERT1:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT1]], i32 [[DOTFCA_4_EXTRACT]], 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_INSERT1:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT1]], i32 [[DOTFCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT1]], i32 [[DOTFCA_6_EXTRACT]], 6 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT]], i32 [[DOTFCA_7_EXTRACT]], 7 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT]], i32 [[DOTFCA_8_EXTRACT]], 8 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT]], i32 [[DOTFCA_9_EXTRACT]], 9 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_10_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT]], i32 [[DOTFCA_10_EXTRACT]], 10 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_11_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT]], i32 [[DOTFCA_11_EXTRACT]], 11 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_12_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT]], i32 [[DOTFCA_12_EXTRACT]], 12 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_13_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT]], i32 [[DOTFCA_13_EXTRACT]], 13 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_14_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT]], i32 [[DOTFCA_14_EXTRACT]], 14 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_15_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT]], i32 [[DOTFCA_15_EXTRACT]], 15 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_16_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT]], i32 [[DOTFCA_16_EXTRACT]], 16 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_17_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT]], i32 [[DOTFCA_17_EXTRACT]], 17 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_18_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT]], i32 [[DOTFCA_18_EXTRACT]], 18 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_19_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT]], i32 [[DOTFCA_19_EXTRACT]], 19 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_20_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT]], i32 [[DOTFCA_20_EXTRACT]], 20 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_21_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT]], i32 [[DOTFCA_21_EXTRACT]], 21 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_22_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT]], i32 [[DOTFCA_22_EXTRACT]], 22 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_23_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT]], i32 [[DOTFCA_23_EXTRACT]], 23 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_24_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT]], i32 [[DOTFCA_24_EXTRACT]], 24 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_25_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT]], i32 [[DOTFCA_25_EXTRACT]], 25 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_26_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT]], i32 [[DOTFCA_26_EXTRACT]], 26 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_27_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT]], i32 [[DOTFCA_27_EXTRACT]], 27 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_28_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT]], i32 [[DOTFCA_28_EXTRACT]], 28 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_29_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT]], i32 [[DOTFCA_29_EXTRACT]], 29 ; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = load i32, ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], -8 ; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP13]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP14:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP14]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT]]), !continuation.registercount [[META32]] +; DXILCONTPOSTPROCESS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP14]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]) ; DXILCONTPOSTPROCESS-NEXT: unreachable ; ; ; DXILCONTPOSTPROCESS-LABEL: define void @MyIntersectionShaderLargeAttrs( -; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META42]] !continuation.registercount [[META32]] !continuation [[META45:![0-9]+]] !continuation.stacksize [[META44]] !continuation.state [[META44]] { +; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]], [8 x i32] [[PADDING:%.*]], [30 x i32] [[PAYLOAD:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META42]] !continuation [[META45:![0-9]+]] !continuation.stacksize [[META44]] { ; DXILCONTPOSTPROCESS-NEXT: AllocaSpillBB: ; DXILCONTPOSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; DXILCONTPOSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 @@ -1406,14 +1727,44 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = inttoptr i32 [[TMP1]] to ptr addrspace(21) ; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP3]], i32 0 ; DXILCONTPOSTPROCESS-NEXT: store i64 [[RETURNADDR]], ptr addrspace(21) [[TMP4]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 1 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 2 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 3 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 4 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 6 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 7 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 8 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 9 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_10_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 10 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_11_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 11 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_12_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 12 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_13_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 13 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_14_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 14 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_15_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 15 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_16_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 16 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_17_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 17 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_18_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 18 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_19_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 19 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_20_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 20 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_21_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 21 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_22_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 22 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_23_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 23 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_24_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 24 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_25_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 25 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_26_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 26 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_27_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 27 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_28_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 28 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_29_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 29 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 0, 0, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 0, 1, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 1, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 1, 1 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 2 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 3 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 4 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_2_EXTRACT272:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 2 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_EXTRACT273:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 3 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_EXTRACT274:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_EXTRACT275:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 5 ; DXILCONTPOSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; DXILCONTPOSTPROCESS-NEXT: [[RES_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA:%.*]] poison, float [[DOTFCA_1_0_EXTRACT]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[RES_I_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_0_INSERT]], i32 [[DOTFCA_1_1_EXTRACT]], 1 @@ -1426,10 +1777,10 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_0_0_INSERT]], <2 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 ; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_0_INSERT]], float [[DOTFCA_1_0_EXTRACT]], 1, 0 ; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_0_INSERT]], i32 [[DOTFCA_1_1_EXTRACT]], 1, 1 -; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_2_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_1_INSERT]], <3 x float> [[DOTFCA_2_EXTRACT]], 2 -; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_2_INSERT]], <3 x float> [[DOTFCA_3_EXTRACT]], 3 -; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_4_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_3_INSERT]], float [[DOTFCA_4_EXTRACT]], 4 -; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_5_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_4_INSERT]], i64 [[DOTFCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_2_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_1_INSERT]], <3 x float> [[DOTFCA_2_EXTRACT272]], 2 +; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_2_INSERT]], <3 x float> [[DOTFCA_3_EXTRACT273]], 3 +; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_4_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_3_INSERT]], float [[DOTFCA_4_EXTRACT274]], 4 +; DXILCONTPOSTPROCESS-NEXT: [[TRAV_DATA_I_FCA_5_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_4_INSERT]], i64 [[DOTFCA_5_EXTRACT275]], 5 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_LARGEINTERSECTIONATTRIBUTES:%.*]] poison, i32 100, 0, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_LARGEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_0_INSERT]], i32 101, 0, 1 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_LARGEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_1_INSERT]], i32 102, 0, 2 @@ -1437,75 +1788,192 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_LARGEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_3_INSERT]], i32 104, 0, 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_LARGEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_4_INSERT]], i32 105, 0, 5 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_6_INSERT:%.*]] = insertvalue [[STRUCT_LARGEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_5_INSERT]], i32 106, 0, 6 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT4:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_INSERT7:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT4]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_2_INSERT10:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT7]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_INSERT13:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT10]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_INSERT16:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT13]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_INSERT19:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT16]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_6_INSERT22:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT19]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_7_INSERT25:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT22]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_8_INSERT28:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT25]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_9_INSERT31:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT28]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_10_INSERT34:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT31]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_11_INSERT37:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT34]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_12_INSERT40:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT37]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_13_INSERT43:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT40]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_14_INSERT46:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT43]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_15_INSERT49:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT46]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_16_INSERT52:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT49]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_17_INSERT55:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT52]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_18_INSERT58:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT55]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_19_INSERT61:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT58]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_20_INSERT64:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT61]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_21_INSERT67:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT64]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_22_INSERT70:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT67]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_23_INSERT73:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT70]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_24_INSERT76:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT73]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_25_INSERT79:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT76]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_26_INSERT82:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT79]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_27_INSERT85:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT82]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_28_INSERT88:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT85]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_29_INSERT91:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT88]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 ; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = call i64 @continuation.getAddrAndMD(ptr @MyIntersectionShaderLargeAttrs.resume.0) ; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 3, i32 [[TMP5]], i64 [[TMP6]], [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_5_INSERT]], float [[RES_I_FCA_1_INSERT_FCA_0_EXTRACT]], i32 0, [[STRUCT_LARGEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_6_INSERT]]), !continuation.registercount [[META32]], !continuation.returnedRegistercount [[META32]] +; DXILCONTPOSTPROCESS-NEXT: call void (...) @lgc.ilcps.continue(i64 3, i32 [[TMP5]], i64 [[TMP6]], [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_5_INSERT]], float [[RES_I_FCA_1_INSERT_FCA_0_EXTRACT]], i32 0, [[STRUCT_LARGEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_6_INSERT]], [15 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT91]]) ; DXILCONTPOSTPROCESS-NEXT: unreachable ; DXILCONTPOSTPROCESS: accepthit.i: ; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = bitcast i32 100 to float -; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_070_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP7]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0350_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP7]], i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = bitcast i32 101 to float -; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_070_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_070_0_VEC_INSERT]], float [[TMP8]], i32 1 -; DXILCONTPOSTPROCESS-NEXT: store i32 102, ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @REGISTERS, i32 1), align 4 -; DXILCONTPOSTPROCESS-NEXT: store i32 103, ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @REGISTERS, i32 2), align 4 -; DXILCONTPOSTPROCESS-NEXT: store i32 104, ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @REGISTERS, i32 3), align 4 -; DXILCONTPOSTPROCESS-NEXT: store i32 105, ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @REGISTERS, i32 4), align 4 -; DXILCONTPOSTPROCESS-NEXT: store i32 106, ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @REGISTERS, i32 5), align 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_070_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0350_0_VEC_INSERT]], float [[TMP8]], i32 1 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> [[DOTSROA_070_4_VEC_INSERT]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT]], 0 ; DXILCONTPOSTPROCESS-NEXT: [[ISEND_I:%.*]] = call i1 @opaqueIsEnd() ; DXILCONTPOSTPROCESS-NEXT: br i1 [[ISEND_I]], label [[TMP9:%.*]], label [[TMP13:%.*]] ; DXILCONTPOSTPROCESS: 9: -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_0_INSERT28:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_EXTRACT]], 0, 0, 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_1_0_INSERT31:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT28]], <2 x float> [[DOTFCA_0_EXTRACT]], 0, 1, 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_0_INSERT34:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT31]], float [[DOTFCA_1_0_EXTRACT]], 1, 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_1_INSERT37:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_0_INSERT34]], i32 [[DOTFCA_1_1_EXTRACT]], 1, 1 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_2_INSERT40:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_1_INSERT37]], <3 x float> [[DOTFCA_2_EXTRACT]], 2 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_INSERT43:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT40]], <3 x float> [[DOTFCA_3_EXTRACT]], 3 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_INSERT46:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT43]], float [[DOTFCA_4_EXTRACT]], 4 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_INSERT49:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT46]], i64 [[DOTFCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_0_INSERT307:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_EXTRACT]], 0, 0, 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_1_0_INSERT310:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT307]], <2 x float> [[DOTFCA_0_EXTRACT]], 0, 1, 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_0_INSERT313:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT310]], float [[DOTFCA_1_0_EXTRACT]], 1, 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_1_INSERT316:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_0_INSERT313]], i32 [[DOTFCA_1_1_EXTRACT]], 1, 1 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_2_INSERT319:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_1_INSERT316]], <3 x float> [[DOTFCA_2_EXTRACT272]], 2 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_INSERT322:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT319]], <3 x float> [[DOTFCA_3_EXTRACT273]], 3 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_INSERT325:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT322]], float [[DOTFCA_4_EXTRACT274]], 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_INSERT328:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT325]], i64 [[DOTFCA_5_EXTRACT275]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT124:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_INSERT127:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT124]], i32 102, 1 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_2_INSERT130:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT127]], i32 103, 2 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_INSERT133:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT130]], i32 104, 3 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_INSERT136:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT133]], i32 105, 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_INSERT139:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT136]], i32 106, 5 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_6_INSERT142:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT139]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_7_INSERT145:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT142]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_8_INSERT148:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT145]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_9_INSERT151:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT148]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_10_INSERT154:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT151]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_11_INSERT157:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT154]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_12_INSERT160:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT157]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_13_INSERT163:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT160]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_14_INSERT166:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT163]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_15_INSERT169:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT166]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_16_INSERT172:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT169]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_17_INSERT175:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT172]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_18_INSERT178:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT175]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_19_INSERT181:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT178]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_20_INSERT184:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT181]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_21_INSERT187:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT184]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_22_INSERT190:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT187]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_23_INSERT193:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT190]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_24_INSERT196:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT193]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_25_INSERT199:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT196]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_26_INSERT202:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT199]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_27_INSERT205:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT202]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_28_INSERT208:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT205]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_29_INSERT211:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT208]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 ; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = load i32, ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], -8 ; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP11]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP12]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT49]]), !continuation.registercount [[META32]] +; DXILCONTPOSTPROCESS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR]], i32 [[TMP12]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT328]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT211]]) ; DXILCONTPOSTPROCESS-NEXT: unreachable ; DXILCONTPOSTPROCESS: 13: ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_EXTRACT]], 0, 0, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT]], <2 x float> [[DOTFCA_0_EXTRACT]], 0, 1, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], float [[DOTFCA_1_0_EXTRACT]], 1, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], i32 [[DOTFCA_1_1_EXTRACT]], 1, 1 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], <3 x float> [[DOTFCA_2_EXTRACT]], 2 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT]], <3 x float> [[DOTFCA_3_EXTRACT]], 3 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT]], float [[DOTFCA_4_EXTRACT]], 4 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT]], i64 [[DOTFCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_2_INSERT281:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], <3 x float> [[DOTFCA_2_EXTRACT272]], 2 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_INSERT283:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT281]], <3 x float> [[DOTFCA_3_EXTRACT273]], 3 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_INSERT285:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT283]], float [[DOTFCA_4_EXTRACT274]], 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_INSERT287:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT285]], i64 [[DOTFCA_5_EXTRACT275]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT1:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT1]], i32 102, 1 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT]], i32 103, 2 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT]], i32 104, 3 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT]], i32 105, 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT]], i32 106, 5 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_10_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_11_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_12_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_13_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_14_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_15_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_16_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_17_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_18_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_19_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_20_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_21_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_22_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_23_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_24_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_25_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_26_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_27_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_28_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_29_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 ; DXILCONTPOSTPROCESS-NEXT: [[TMP14:%.*]] = load i32, ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], -8 ; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP15]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP16:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP16]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT]]), !continuation.registercount [[META32]] +; DXILCONTPOSTPROCESS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR]], i32 [[TMP16]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT287]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]) ; DXILCONTPOSTPROCESS-NEXT: unreachable ; ; ; DXILCONTPOSTPROCESS-LABEL: define dso_local void @MyIntersectionShaderLargeAttrs.resume.0( -; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[TMP0:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP1:%.*]]) !lgc.rt.shaderstage [[META42]] !continuation.registercount [[META32]] !continuation [[META45]] { +; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[TMP0:%.*]], { [[STRUCT_TRAVERSALDATA:%.*]], [8 x i32], [30 x i32] } [[TMP1:%.*]]) !lgc.rt.shaderstage [[META42]] !continuation [[META45]] { ; DXILCONTPOSTPROCESS-NEXT: entryresume.0: ; DXILCONTPOSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; DXILCONTPOSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP15:%.*]] = load i32, ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = add i32 [[TMP15]], -8 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_0_EXTRACT10:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP1]], 0, 0, 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_1_0_EXTRACT12:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP1]], 0, 1, 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_0_EXTRACT14:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP1]], 1, 0 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_1_EXTRACT16:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP1]], 1, 1 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_2_EXTRACT18:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP1]], 2 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_EXTRACT20:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP1]], 3 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_EXTRACT22:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP1]], 4 -; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_EXTRACT24:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP1]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[TMP16:%.*]] = extractvalue { [[STRUCT_TRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP1]], 2 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 1 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_2_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 2 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 3 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_6_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 6 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_7_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 7 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_8_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 8 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_9_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 9 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_10_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 10 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_11_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 11 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_12_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 12 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_13_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 13 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_14_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 14 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_15_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 15 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_16_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 16 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_17_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 17 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_18_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 18 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_19_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 19 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_20_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 20 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_21_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 21 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_22_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 22 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_23_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 23 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_24_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 24 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_25_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 25 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_26_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 26 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_27_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 27 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_28_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 28 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_29_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 29 +; DXILCONTPOSTPROCESS-NEXT: [[TMP17:%.*]] = extractvalue { [[STRUCT_TRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP1]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_0_EXTRACT10:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP17]], 0, 0, 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_1_0_EXTRACT12:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP17]], 0, 1, 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_0_EXTRACT14:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP17]], 1, 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_1_EXTRACT16:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP17]], 1, 1 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_2_EXTRACT18:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP17]], 2 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_EXTRACT20:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP17]], 3 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_EXTRACT22:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP17]], 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_EXTRACT24:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP17]], 5 ; DXILCONTPOSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; DXILCONTPOSTPROCESS-NEXT: [[ISEND_I:%.*]] = call i1 @opaqueIsEnd() ; DXILCONTPOSTPROCESS-NEXT: br i1 [[ISEND_I]], label [[TMP3:%.*]], label [[TMP9:%.*]] -; DXILCONTPOSTPROCESS: 4: +; DXILCONTPOSTPROCESS: 6: ; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(21) ; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP4]], i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[RETURNADDR_RELOAD2:%.*]] = load i64, ptr addrspace(21) [[TMP5]], align 4 @@ -1517,13 +1985,43 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_INSERT43:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT40]], <3 x float> [[DOTFCA_3_EXTRACT20]], 3 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_INSERT46:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT43]], float [[DOTFCA_4_EXTRACT22]], 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_INSERT49:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT46]], i64 [[DOTFCA_5_EXTRACT24]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT124:%.*]] = insertvalue [30 x i32] poison, i32 [[DOTFCA_0_EXTRACT]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_INSERT127:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT124]], i32 [[DOTFCA_1_EXTRACT]], 1 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_2_INSERT130:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT127]], i32 [[DOTFCA_2_EXTRACT]], 2 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_INSERT133:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT130]], i32 [[DOTFCA_3_EXTRACT]], 3 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_INSERT136:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT133]], i32 [[DOTFCA_4_EXTRACT]], 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_INSERT139:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT136]], i32 [[DOTFCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_6_INSERT142:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT139]], i32 [[DOTFCA_6_EXTRACT]], 6 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_7_INSERT145:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT142]], i32 [[DOTFCA_7_EXTRACT]], 7 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_8_INSERT148:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT145]], i32 [[DOTFCA_8_EXTRACT]], 8 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_9_INSERT151:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT148]], i32 [[DOTFCA_9_EXTRACT]], 9 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_10_INSERT154:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT151]], i32 [[DOTFCA_10_EXTRACT]], 10 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_11_INSERT157:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT154]], i32 [[DOTFCA_11_EXTRACT]], 11 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_12_INSERT160:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT157]], i32 [[DOTFCA_12_EXTRACT]], 12 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_13_INSERT163:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT160]], i32 [[DOTFCA_13_EXTRACT]], 13 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_14_INSERT166:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT163]], i32 [[DOTFCA_14_EXTRACT]], 14 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_15_INSERT169:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT166]], i32 [[DOTFCA_15_EXTRACT]], 15 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_16_INSERT172:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT169]], i32 [[DOTFCA_16_EXTRACT]], 16 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_17_INSERT175:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT172]], i32 [[DOTFCA_17_EXTRACT]], 17 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_18_INSERT178:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT175]], i32 [[DOTFCA_18_EXTRACT]], 18 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_19_INSERT181:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT178]], i32 [[DOTFCA_19_EXTRACT]], 19 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_20_INSERT184:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT181]], i32 [[DOTFCA_20_EXTRACT]], 20 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_21_INSERT187:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT184]], i32 [[DOTFCA_21_EXTRACT]], 21 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_22_INSERT190:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT187]], i32 [[DOTFCA_22_EXTRACT]], 22 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_23_INSERT193:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT190]], i32 [[DOTFCA_23_EXTRACT]], 23 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_24_INSERT196:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT193]], i32 [[DOTFCA_24_EXTRACT]], 24 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_25_INSERT199:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT196]], i32 [[DOTFCA_25_EXTRACT]], 25 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_26_INSERT202:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT199]], i32 [[DOTFCA_26_EXTRACT]], 26 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_27_INSERT205:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT202]], i32 [[DOTFCA_27_EXTRACT]], 27 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_28_INSERT208:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT205]], i32 [[DOTFCA_28_EXTRACT]], 28 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_29_INSERT211:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT208]], i32 [[DOTFCA_29_EXTRACT]], 29 ; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = load i32, ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], -8 ; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP7]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD2]], i32 [[TMP8]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT49]]), !continuation.registercount [[META32]] +; DXILCONTPOSTPROCESS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR_RELOAD2]], i32 [[TMP8]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT49]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT211]]) ; DXILCONTPOSTPROCESS-NEXT: unreachable -; DXILCONTPOSTPROCESS: 10: +; DXILCONTPOSTPROCESS: 12: ; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(21) ; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr addrspace(21) [[TMP10]], i32 0 ; DXILCONTPOSTPROCESS-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(21) [[TMP11]], align 4 @@ -1535,49 +2033,91 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT]], <3 x float> [[DOTFCA_3_EXTRACT20]], 3 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT]], float [[DOTFCA_4_EXTRACT22]], 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT]], i64 [[DOTFCA_5_EXTRACT24]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [30 x i32] poison, i32 [[DOTFCA_0_EXTRACT]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT]], i32 [[DOTFCA_1_EXTRACT]], 1 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_2_INSERT1:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT]], i32 [[DOTFCA_2_EXTRACT]], 2 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_INSERT1:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT1]], i32 [[DOTFCA_3_EXTRACT]], 3 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_INSERT1:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT1]], i32 [[DOTFCA_4_EXTRACT]], 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_INSERT1:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT1]], i32 [[DOTFCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT1]], i32 [[DOTFCA_6_EXTRACT]], 6 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT]], i32 [[DOTFCA_7_EXTRACT]], 7 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT]], i32 [[DOTFCA_8_EXTRACT]], 8 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT]], i32 [[DOTFCA_9_EXTRACT]], 9 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_10_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT]], i32 [[DOTFCA_10_EXTRACT]], 10 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_11_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT]], i32 [[DOTFCA_11_EXTRACT]], 11 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_12_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT]], i32 [[DOTFCA_12_EXTRACT]], 12 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_13_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT]], i32 [[DOTFCA_13_EXTRACT]], 13 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_14_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT]], i32 [[DOTFCA_14_EXTRACT]], 14 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_15_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT]], i32 [[DOTFCA_15_EXTRACT]], 15 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_16_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT]], i32 [[DOTFCA_16_EXTRACT]], 16 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_17_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT]], i32 [[DOTFCA_17_EXTRACT]], 17 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_18_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT]], i32 [[DOTFCA_18_EXTRACT]], 18 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_19_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT]], i32 [[DOTFCA_19_EXTRACT]], 19 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_20_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT]], i32 [[DOTFCA_20_EXTRACT]], 20 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_21_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT]], i32 [[DOTFCA_21_EXTRACT]], 21 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_22_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT]], i32 [[DOTFCA_22_EXTRACT]], 22 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_23_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT]], i32 [[DOTFCA_23_EXTRACT]], 23 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_24_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT]], i32 [[DOTFCA_24_EXTRACT]], 24 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_25_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT]], i32 [[DOTFCA_25_EXTRACT]], 25 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_26_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT]], i32 [[DOTFCA_26_EXTRACT]], 26 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_27_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT]], i32 [[DOTFCA_27_EXTRACT]], 27 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_28_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT]], i32 [[DOTFCA_28_EXTRACT]], 28 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_29_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT]], i32 [[DOTFCA_29_EXTRACT]], 29 ; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = load i32, ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], -8 ; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP13]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-NEXT: [[TMP14:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP14]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT]]), !continuation.registercount [[META32]] +; DXILCONTPOSTPROCESS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP14]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]) ; DXILCONTPOSTPROCESS-NEXT: unreachable ; ; ; DXILCONTPOSTPROCESS-LABEL: define void @MyMissShader( -; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META46:![0-9]+]] !continuation.registercount [[META33]] !continuation [[META47:![0-9]+]] !continuation.state [[META22]] { +; DXILCONTPOSTPROCESS-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]], [19 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META46:![0-9]+]] !continuation [[META47:![0-9]+]] { ; DXILCONTPOSTPROCESS-NEXT: AllocaSpillBB: ; DXILCONTPOSTPROCESS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; DXILCONTPOSTPROCESS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 1 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 2 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 3 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 4 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 6 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 7 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 8 +; DXILCONTPOSTPROCESS-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 9 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 0, 0 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 1, 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float -; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to float -; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP4]], i32 1 -; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP5]] to float -; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP6]], i32 2 -; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 -; DXILCONTPOSTPROCESS-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP7]] to float -; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP8]], i32 3 +; DXILCONTPOSTPROCESS-NEXT: [[TMP1:%.*]] = bitcast i32 [[PAYLOAD_FCA_0_EXTRACT]] to float +; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 +; DXILCONTPOSTPROCESS-NEXT: [[TMP2:%.*]] = bitcast i32 [[PAYLOAD_FCA_7_EXTRACT]] to float +; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP2]], i32 1 +; DXILCONTPOSTPROCESS-NEXT: [[TMP3:%.*]] = bitcast i32 [[PAYLOAD_FCA_8_EXTRACT]] to float +; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP3]], i32 2 +; DXILCONTPOSTPROCESS-NEXT: [[TMP4:%.*]] = bitcast i32 [[PAYLOAD_FCA_9_EXTRACT]] to float +; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP4]], i32 3 ; DXILCONTPOSTPROCESS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 0 -; DXILCONTPOSTPROCESS-NEXT: [[TMP9:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP9]], ptr addrspace(20) @REGISTERS, align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP5:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 1 -; DXILCONTPOSTPROCESS-NEXT: [[TMP10:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP10]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP6:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 2 -; DXILCONTPOSTPROCESS-NEXT: [[TMP11:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP11]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; DXILCONTPOSTPROCESS-NEXT: [[TMP7:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 ; DXILCONTPOSTPROCESS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 3 ; DXILCONTPOSTPROCESS-NEXT: [[TMP12:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-NEXT: store i32 [[TMP12]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 ; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison, <3 x i32> [[DOTFCA_0_0_EXTRACT]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_0_INSERT1:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP5]], 0 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT1]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT]], i32 [[TMP6]], 7 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT]], i32 [[TMP7]], 8 +; DXILCONTPOSTPROCESS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT]], i32 [[TMP12]], 9 ; DXILCONTPOSTPROCESS-NEXT: [[TMP13:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP13]], i64 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META33]] +; DXILCONTPOSTPROCESS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR]], i32 [[TMP13]], i64 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]], [21 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]) ; DXILCONTPOSTPROCESS-NEXT: unreachable ; ; @@ -1607,14 +2147,14 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; ; ; DXILCONTPOSTPROCESS-GLOBAL-LABEL: define void @_cont_KernelEntry( -; DXILCONTPOSTPROCESS-GLOBAL-SAME: ) #[[ATTR0]] !lgc.rt.shaderstage [[META35:![0-9]+]] !continuation.registercount [[META22:![0-9]+]] !continuation [[META36:![0-9]+]] { +; DXILCONTPOSTPROCESS-GLOBAL-SAME: ) #[[ATTR0]] !lgc.rt.shaderstage [[META35:![0-9]+]] !continuation [[META36:![0-9]+]] { ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP1:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(22) ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[CSPINIT:%.*]] = ptrtoint ptr @debug_global to i32 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void (i64, ...) @continuation.continue(i64 0, i32 [[TMP3]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison), !continuation.registercount [[META22]] +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void (...) @lgc.ilcps.continue(i64 0, i32 [[TMP3]], i64 undef, [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison) ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: ret void ; ; @@ -1633,71 +2173,86 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; ; ; DXILCONTPOSTPROCESS-GLOBAL-LABEL: define void @MyRayGen( -; DXILCONTPOSTPROCESS-GLOBAL-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3:[0-9]+]] !lgc.rt.shaderstage [[META22]] !continuation.registercount [[META22]] !continuation.entry [[META13:![0-9]+]] !continuation [[META37:![0-9]+]] !continuation.state [[META22]] { +; DXILCONTPOSTPROCESS-GLOBAL-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3:[0-9]+]] !lgc.rt.shaderstage [[META22:![0-9]+]] !continuation.entry [[META13:![0-9]+]] !continuation [[META37:![0-9]+]] { ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: AllocaSpillBB: ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP1:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(22) -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_EXTRACT20:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP3:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP4:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP5:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP3]]) ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP6:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP5]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP7:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP6]]) -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT20]], 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]], 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP8:%.*]] = call i64 @continuation.getAddrAndMD(ptr @MyRayGen.resume.0) ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TRAV_DATA2_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], i64 [[TMP8]], 5 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP9:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP9]], ptr addrspace(20) @REGISTERS, align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 1 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP10:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP10]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 2 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP11:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP11]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 3 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP12:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP12]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP9]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT]], i32 undef, 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT]], i32 undef, 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT]], i32 undef, 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT]], i32 undef, 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT]], i32 undef, 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT]], i32 undef, 6 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT]], i32 [[TMP10]], 7 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT]], i32 [[TMP11]], 8 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT]], i32 [[TMP12]], 9 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP13:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void (i64, i64, ...) @continuation.waitContinue(i64 4, i64 -1, i32 [[TMP13]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]]), !continuation.registercount [[META34:![0-9]+]], !continuation.returnedRegistercount [[META34]] +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void (...) @lgc.ilcps.waitContinue(i64 4, i64 -1, i32 [[TMP13]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]], [8 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]) ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: unreachable ; ; ; DXILCONTPOSTPROCESS-GLOBAL-LABEL: define dso_local void @MyRayGen.resume.0( -; DXILCONTPOSTPROCESS-GLOBAL-SAME: i32 [[CSPINIT:%.*]], i64 [[TMP0:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP1:%.*]]) !lgc.rt.shaderstage [[META22]] !continuation.registercount [[META34]] !continuation [[META37]] { +; DXILCONTPOSTPROCESS-GLOBAL-SAME: i32 [[CSPINIT:%.*]], i64 [[TMP0:%.*]], { [[STRUCT_DISPATCHSYSTEMDATA:%.*]], [21 x i32], [10 x i32] } [[TMP1:%.*]]) !lgc.rt.shaderstage [[META22]] !continuation [[META37]] { ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: entryresume.0: ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[SYSTEM_DATA_ALLOCA1:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP2:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP2]] to ptr addrspace(22) -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP1]], ptr [[SYSTEM_DATA_ALLOCA1]], align 4 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP22:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [21 x i32], [10 x i32] } [[TMP1]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP22]], ptr [[SYSTEM_DATA_ALLOCA1]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP20:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [21 x i32], [10 x i32] } [[TMP1]], 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP3:%.*]] = extractvalue [10 x i32] [[TMP20]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP20]], 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP20]], 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP20]], 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP20]], 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP20]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_6_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP20]], 6 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP5:%.*]] = extractvalue [10 x i32] [[TMP20]], 7 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP7:%.*]] = extractvalue [10 x i32] [[TMP20]], 8 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP9:%.*]] = extractvalue [10 x i32] [[TMP20]], 9 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to float ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> poison, float [[TMP4]], i32 0 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP5]] to float ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP6]], i32 1 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP7]] to float ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP8]], i32 2 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP9]] to float ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP10]], i32 3 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_EXTRACT6:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP1]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP21:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [21 x i32], [10 x i32] } [[TMP1]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_EXTRACT21:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP21]], 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP11:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP12:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[SYSTEM_DATA_ALLOCA1]]) ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[EXTRACT:%.*]] = extractelement <3 x i32> [[TMP12]], i8 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP13:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[SYSTEM_DATA_ALLOCA1]]) ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x i32> [[TMP13]], i8 1 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP20:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP11]]) -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP15:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP20]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 4098, i32 1033 }) +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP23:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP11]]) +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP15:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP23]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 4098, i32 1033 }) ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP16:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP17:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 1 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP18:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 2 @@ -1709,36 +2264,42 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; ; ; DXILCONTPOSTPROCESS-GLOBAL-LABEL: define void @MyClosestHitShader( -; DXILCONTPOSTPROCESS-GLOBAL-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META38:![0-9]+]] !continuation.registercount [[META34]] !continuation [[META39:![0-9]+]] !continuation.state [[META22]] { +; DXILCONTPOSTPROCESS-GLOBAL-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]], [19 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META38:![0-9]+]] !continuation [[META39:![0-9]+]] { ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: AllocaSpillBB: ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP1:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(22) +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 6 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 7 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 8 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 9 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 0, 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 1, 0 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to float -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> undef, float [[TMP4]], i32 0 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP5]] to float -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP6]], i32 1 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP7]] to float -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP8]], i32 2 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP9]] to float -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP10]], i32 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP3:%.*]] = bitcast i32 [[PAYLOAD_FCA_0_EXTRACT]] to float +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP4:%.*]] = bitcast i32 [[PAYLOAD_FCA_7_EXTRACT]] to float +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP4]], i32 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP5:%.*]] = bitcast i32 [[PAYLOAD_FCA_8_EXTRACT]] to float +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP5]], i32 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP6:%.*]] = bitcast i32 [[PAYLOAD_FCA_9_EXTRACT]] to float +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP6]], i32 3 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[VAL_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> [[DOTFCA_1_0_EXTRACT]], 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[VAL_I_FCA_0_INSERT]], 0 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_06_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 0 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP11:%.*]] = bitcast float [[DOTSROA_06_0_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP11]] to float -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[HITATTRS_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP12]], i32 0 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_06_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 1 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP13:%.*]] = bitcast float [[DOTSROA_06_4_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP14:%.*]] = bitcast i32 [[TMP13]] to float -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[HITATTRS_SROA_0_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[HITATTRS_SROA_0_0_VEC_INSERT]], float [[TMP14]], i32 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_011_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP7:%.*]] = bitcast float [[DOTSROA_011_0_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP7]] to float +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[HITATTRS_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP8]], i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_011_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP9:%.*]] = bitcast float [[DOTSROA_011_4_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP9]] to float +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[HITATTRS_SROA_0_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[HITATTRS_SROA_0_0_VEC_INSERT]], float [[TMP10]], i32 1 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP15:%.*]] = extractelement <2 x float> [[HITATTRS_SROA_0_4_VEC_INSERT]], i32 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP16:%.*]] = fsub fast float 1.000000e+00, [[TMP15]] @@ -1750,24 +2311,30 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP22:%.*]] = insertelement <4 x float> [[TMP21]], float 1.000000e+00, i64 3 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP22]], i32 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP23:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP23]], ptr addrspace(20) @REGISTERS, align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP22]], i32 1 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP24:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP24]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP22]], i32 2 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP25:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP25]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP22]], i32 3 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP26:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP26]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison, <3 x i32> [[DOTFCA_0_0_EXTRACT]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_INSERT1:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP23]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT1]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT]], i32 [[TMP24]], 7 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT]], i32 [[TMP25]], 8 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT]], i32 [[TMP26]], 9 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP27:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP27]], i64 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META34]] +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR]], i32 [[TMP27]], i64 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]], [21 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]) ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: unreachable ; ; ; DXILCONTPOSTPROCESS-GLOBAL-LABEL: define void @MyAnyHitShader( -; DXILCONTPOSTPROCESS-GLOBAL-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[TMP1:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META40:![0-9]+]] !continuation.registercount [[META34]] !continuation [[META41:![0-9]+]] !continuation.state [[META22]] { +; DXILCONTPOSTPROCESS-GLOBAL-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[TMP1:%.*]], [6 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META40:![0-9]+]] !continuation [[META41:![0-9]+]] { ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: AllocaSpillBB: ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_HITDATA]], align 8 @@ -1776,6 +2343,16 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP4:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr addrspace(22) +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 6 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 7 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 8 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 9 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 0, 0, 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store <3 x i32> [[DOTFCA_0_0_0_EXTRACT]], ptr [[DOTFCA_0_0_0_GEP]], align 4 @@ -1801,31 +2378,27 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_GEP:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 5 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i64 [[DOTFCA_5_EXTRACT]], ptr [[DOTFCA_5_GEP]], align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP7]] to float -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> undef, float [[TMP8]], i32 0 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP9]] to float -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP10]], i32 1 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP11]] to float -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP12]], i32 2 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP14:%.*]] = bitcast i32 [[TMP13]] to float -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP14]], i32 3 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[ADDR_I:%.*]] = getelementptr [[STRUCT_SYSTEMDATA:%.*]], ptr [[TMP15]], i32 0, i32 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP7:%.*]] = bitcast i32 [[PAYLOAD_FCA_0_EXTRACT]] to float +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> undef, float [[TMP7]], i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP8:%.*]] = bitcast i32 [[PAYLOAD_FCA_7_EXTRACT]] to float +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP8]], i32 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP9:%.*]] = bitcast i32 [[PAYLOAD_FCA_8_EXTRACT]] to float +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP9]], i32 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP10:%.*]] = bitcast i32 [[PAYLOAD_FCA_9_EXTRACT]] to float +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP10]], i32 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[ADDR_I:%.*]] = getelementptr [[STRUCT_SYSTEMDATA:%.*]], ptr [[TMP11]], i32 0, i32 1 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[VAL_I_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[ADDR_I]], i32 0, i32 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[VAL_I_FCA_0_LOAD:%.*]] = load <2 x float>, ptr [[VAL_I_FCA_0_GEP]], align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[VAL_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[VAL_I_FCA_0_LOAD]], 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[VAL_I_FCA_0_INSERT]], 0 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_060_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 0 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP16:%.*]] = bitcast float [[DOTSROA_060_0_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_060_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 1 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP17:%.*]] = bitcast float [[DOTSROA_060_4_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0108_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP12:%.*]] = bitcast float [[DOTSROA_0108_0_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0108_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP13:%.*]] = bitcast float [[DOTSROA_0108_4_VEC_EXTRACT]] to i32 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP1]], 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RESPTR_I3:%.*]] = getelementptr [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES_I4_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[RESPTR_I3]], i32 0, i32 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES_I4_FCA_0_LOAD:%.*]] = load float, ptr [[RES_I4_FCA_0_GEP]], align 4 @@ -1839,9 +2412,9 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES_I4_FCA_1_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I4_FCA_1_INSERT]], 1 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES_I4_FCA_1_INSERT_FCA_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP2]], i32 0, i32 1 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[RES_I4_FCA_1_INSERT_FCA_1_EXTRACT]], ptr [[RES_I4_FCA_1_INSERT_FCA_1_GEP]], align 4 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP19:%.*]] = call <3 x float> @_cont_ObjectRayOrigin3(ptr [[TMP18]], ptr [[TMP2]]) -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x float> [[TMP19]], i8 0 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP15:%.*]] = call <3 x float> @_cont_ObjectRayOrigin3(ptr [[TMP14]], ptr [[TMP2]]) +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x float> [[TMP15]], i8 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RESPTR_I:%.*]] = getelementptr [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES_I_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[RESPTR_I]], i32 0, i32 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES_I_FCA_0_LOAD:%.*]] = load float, ptr [[RES_I_FCA_0_GEP]], align 4 @@ -1855,9 +2428,9 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES_I_FCA_1_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_1_INSERT]], 1 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES_I_FCA_1_INSERT_FCA_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP3]], i32 0, i32 1 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[RES_I_FCA_1_INSERT_FCA_1_EXTRACT]], ptr [[RES_I_FCA_1_INSERT_FCA_1_GEP]], align 4 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP21:%.*]] = call <3 x float> @_cont_ObjectRayDirection3(ptr [[TMP20]], ptr [[TMP3]]) -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[EXTRACT:%.*]] = extractelement <3 x float> [[TMP21]], i8 0 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP17:%.*]] = call <3 x float> @_cont_ObjectRayDirection3(ptr [[TMP16]], ptr [[TMP3]]) +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[EXTRACT:%.*]] = extractelement <3 x float> [[TMP17]], i8 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RESPTR_I5:%.*]] = getelementptr [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES_I6_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[RESPTR_I5]], i32 0, i32 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES_I6_FCA_0_LOAD:%.*]] = load float, ptr [[RES_I6_FCA_0_GEP]], align 4 @@ -1867,32 +2440,28 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES_I6_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I6_FCA_0_INSERT]], i32 [[RES_I6_FCA_1_LOAD]], 1 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES_I6_FCA_1_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I6_FCA_1_INSERT]], 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES_I6_FCA_1_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I6_FCA_1_INSERT]], 1 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP23:%.*]] = fmul fast float [[RES_I6_FCA_1_INSERT_FCA_0_EXTRACT]], [[EXTRACT]] -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP24:%.*]] = fadd fast float [[TMP23]], [[EXTRACT1]] -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP25:%.*]] = fcmp fast ogt float [[TMP24]], 0.000000e+00 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP38:%.*]] -; DXILCONTPOSTPROCESS-GLOBAL: 26: -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP27]]) +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP19:%.*]] = fmul fast float [[RES_I6_FCA_1_INSERT_FCA_0_EXTRACT]], [[EXTRACT]] +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP20:%.*]] = fadd fast float [[TMP19]], [[EXTRACT1]] +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP21:%.*]] = fcmp fast ogt float [[TMP20]], 0.000000e+00 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: br i1 [[TMP21]], label [[TMP22:%.*]], label [[TMP34:%.*]] +; DXILCONTPOSTPROCESS-GLOBAL: 22: +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP23]]) ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 0 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP28:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP28]], ptr addrspace(20) @REGISTERS, align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP24:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 1 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP29:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP29]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP25:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 2 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP30:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP30]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP26:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 3 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP31:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP31]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT9:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP32:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT9]] to i32 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP33:%.*]] = bitcast i32 [[TMP32]] to float ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_062_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP33]], i32 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT11:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 1 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP34:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT11]] to i32 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP35:%.*]] = bitcast i32 [[TMP34]] to float +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP30:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT11]] to i32 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP35:%.*]] = bitcast i32 [[TMP30]] to float ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_062_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_062_0_VEC_INSERT]], float [[TMP35]], i32 1 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_062_4_VEC_INSERT]], 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 @@ -1900,17 +2469,17 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_EXTRACT25:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT]], 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[ADDR_I1]], i32 0, i32 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT25]], ptr [[DOTFCA_0_GEP]], align 4 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_GEP26:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_LOAD:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_GEP26]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_GEP70:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_LOAD:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_GEP70]], align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_LOAD]], 0, 0, 0 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_1_0_GEP27:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_1_0_LOAD:%.*]] = load <2 x float>, ptr [[DOTFCA_0_1_0_GEP27]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_1_0_GEP71:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_1_0_LOAD:%.*]] = load <2 x float>, ptr [[DOTFCA_0_1_0_GEP71]], align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT]], <2 x float> [[DOTFCA_0_1_0_LOAD]], 0, 1, 0 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_0_GEP28:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_0_LOAD:%.*]] = load float, ptr [[DOTFCA_1_0_GEP28]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_0_GEP72:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_0_LOAD:%.*]] = load float, ptr [[DOTFCA_1_0_GEP72]], align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], float [[DOTFCA_1_0_LOAD]], 1, 0 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_1_GEP29:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_1_LOAD:%.*]] = load i32, ptr [[DOTFCA_1_1_GEP29]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_1_GEP73:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_1_LOAD:%.*]] = load i32, ptr [[DOTFCA_1_1_GEP73]], align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], i32 [[DOTFCA_1_1_LOAD]], 1, 1 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_GEP30:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 2 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_2_GEP30]], align 4 @@ -1924,23 +2493,29 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_GEP33:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 5 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_LOAD:%.*]] = load i64, ptr [[DOTFCA_5_GEP33]], align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT]], i64 [[DOTFCA_5_LOAD]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_INSERT1:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP24]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT1]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_INSERT1:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_INSERT1:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT1]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_INSERT1:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT1]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_INSERT1:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT1]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT1]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT]], i32 [[TMP25]], 7 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT]], i32 [[TMP26]], 8 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT]], i32 [[TMP31]], 9 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP37:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP37]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT]]), !continuation.registercount [[META34]] +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR]], i32 [[TMP37]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT]], [8 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]) ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: unreachable -; DXILCONTPOSTPROCESS-GLOBAL: 38: +; DXILCONTPOSTPROCESS-GLOBAL: 34: ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void @_cont_AcceptHit(ptr [[SYSTEM_DATA_ALLOCA]]) ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_0_VEC_EXTRACT14:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP39:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT14]] to i32 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP39]], ptr addrspace(20) @REGISTERS, align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_4_VEC_EXTRACT18:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 1 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP40:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT18]] to i32 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP40]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_8_VEC_EXTRACT20:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 2 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP41:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT20]] to i32 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP41]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_12_VEC_EXTRACT23:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 3 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP42:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT23]] to i32 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP42]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[DOTFCA_0_EXTRACT]], i32 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP43:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT]] to i32 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP44:%.*]] = bitcast i32 [[TMP43]] to float @@ -1979,13 +2554,23 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_GEP57:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 5 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_LOAD58:%.*]] = load i64, ptr [[DOTFCA_5_GEP57]], align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_INSERT59:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT56]], i64 [[DOTFCA_5_LOAD58]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_INSERT27:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP39]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_INSERT30:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT27]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_INSERT33:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT30]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_INSERT36:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT33]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_INSERT39:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT36]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_INSERT42:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT39]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_6_INSERT45:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT42]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_7_INSERT48:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT45]], i32 [[TMP40]], 7 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_8_INSERT51:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT48]], i32 [[TMP41]], 8 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_9_INSERT54:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT51]], i32 [[TMP42]], 9 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP48:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP48]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT59]]), !continuation.registercount [[META34]] +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR]], i32 [[TMP48]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT59]], [8 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT54]]) ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: unreachable ; ; ; DXILCONTPOSTPROCESS-GLOBAL-LABEL: define void @MyIntersectionShader( -; DXILCONTPOSTPROCESS-GLOBAL-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META42:![0-9]+]] !continuation.registercount [[META33:![0-9]+]] !continuation [[META43:![0-9]+]] !continuation.stacksize [[META44:![0-9]+]] !continuation.state [[META44]] { +; DXILCONTPOSTPROCESS-GLOBAL-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]], [8 x i32] [[PADDING:%.*]], [30 x i32] [[PAYLOAD:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META42:![0-9]+]] !continuation [[META43:![0-9]+]] !continuation.stacksize [[META44:![0-9]+]] { ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: AllocaSpillBB: ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 @@ -1996,14 +2581,44 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP4]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP3]] ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i64 [[RETURNADDR]], ptr addrspace(22) [[TMP5]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 6 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 7 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 8 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 9 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_10_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 10 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_11_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 11 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_12_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 12 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_13_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 13 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_14_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 14 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_15_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 15 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_16_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 16 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_17_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 17 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_18_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 18 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_19_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 19 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_20_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 20 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_21_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 21 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_22_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 22 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_23_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 23 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_24_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 24 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_25_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 25 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_26_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 26 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_27_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 27 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_28_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 28 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_29_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 29 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 0, 0, 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 0, 1, 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 1, 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 1, 1 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 2 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 3 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 4 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_EXTRACT272:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_EXTRACT273:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_EXTRACT274:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_EXTRACT275:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 5 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA:%.*]] poison, float [[DOTFCA_1_0_EXTRACT]], 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES_I_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_0_INSERT]], i32 [[DOTFCA_1_1_EXTRACT]], 1 @@ -2016,62 +2631,152 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TRAV_DATA_I_FCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_0_0_INSERT]], <2 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TRAV_DATA_I_FCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_0_INSERT]], float [[DOTFCA_1_0_EXTRACT]], 1, 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TRAV_DATA_I_FCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_0_INSERT]], i32 [[DOTFCA_1_1_EXTRACT]], 1, 1 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TRAV_DATA_I_FCA_2_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_1_INSERT]], <3 x float> [[DOTFCA_2_EXTRACT]], 2 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TRAV_DATA_I_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_2_INSERT]], <3 x float> [[DOTFCA_3_EXTRACT]], 3 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TRAV_DATA_I_FCA_4_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_3_INSERT]], float [[DOTFCA_4_EXTRACT]], 4 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TRAV_DATA_I_FCA_5_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_4_INSERT]], i64 [[DOTFCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TRAV_DATA_I_FCA_2_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_1_INSERT]], <3 x float> [[DOTFCA_2_EXTRACT272]], 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TRAV_DATA_I_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_2_INSERT]], <3 x float> [[DOTFCA_3_EXTRACT273]], 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TRAV_DATA_I_FCA_4_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_3_INSERT]], float [[DOTFCA_4_EXTRACT274]], 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TRAV_DATA_I_FCA_5_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_4_INSERT]], i64 [[DOTFCA_5_EXTRACT275]], 5 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> undef, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_INSERT4:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_INSERT7:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT4]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_INSERT10:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT7]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_INSERT13:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT10]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_INSERT16:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT13]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_INSERT19:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT16]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_6_INSERT22:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT19]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_7_INSERT25:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT22]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_8_INSERT28:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT25]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_9_INSERT31:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT28]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_10_INSERT34:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT31]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_11_INSERT37:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT34]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_12_INSERT40:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT37]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_13_INSERT43:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT40]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_14_INSERT46:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT43]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_15_INSERT49:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT46]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_16_INSERT52:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT49]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_17_INSERT55:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT52]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_18_INSERT58:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT55]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_19_INSERT61:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT58]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_20_INSERT64:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT61]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_21_INSERT67:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT64]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_22_INSERT70:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT67]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_23_INSERT73:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT70]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_24_INSERT76:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT73]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_25_INSERT79:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT76]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_26_INSERT82:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT79]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_27_INSERT85:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT82]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_28_INSERT88:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT85]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_29_INSERT91:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT88]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP7:%.*]] = call i64 @continuation.getAddrAndMD(ptr @MyIntersectionShader.resume.0) ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP6:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void (i64, ...) @continuation.continue(i64 3, i32 [[TMP6]], i64 [[TMP7]], [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_5_INSERT]], float [[RES_I_FCA_1_INSERT_FCA_0_EXTRACT]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META33]], !continuation.returnedRegistercount [[META33]] +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void (...) @lgc.ilcps.continue(i64 3, i32 [[TMP6]], i64 [[TMP7]], [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_5_INSERT]], float [[RES_I_FCA_1_INSERT_FCA_0_EXTRACT]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT]], [20 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT91]]) ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: unreachable ; DXILCONTPOSTPROCESS-GLOBAL: accepthit.i: ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP8:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP9:%.*]] = bitcast i32 [[TMP8]] to float -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_065_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP9]], i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0345_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP9]], i32 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 1 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP10:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP10]] to float -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_065_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_065_0_VEC_INSERT]], float [[TMP11]], i32 1 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_INSERT64:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_065_4_VEC_INSERT]], 0 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT64]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0345_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0345_0_VEC_INSERT]], float [[TMP11]], i32 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_INSERT344:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0345_4_VEC_INSERT]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_EXTRACT304:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT344]], 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[ISEND_I:%.*]] = call i1 @opaqueIsEnd() ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: br i1 [[ISEND_I]], label [[TMP12:%.*]], label [[TMP16:%.*]] ; DXILCONTPOSTPROCESS-GLOBAL: 12: -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_INSERT28:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_EXTRACT]], 0, 0, 0 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_1_0_INSERT31:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT28]], <2 x float> [[DOTFCA_0_EXTRACT]], 0, 1, 0 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_0_INSERT34:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT31]], float [[DOTFCA_1_0_EXTRACT]], 1, 0 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_1_INSERT37:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_0_INSERT34]], i32 [[DOTFCA_1_1_EXTRACT]], 1, 1 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_INSERT40:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_1_INSERT37]], <3 x float> [[DOTFCA_2_EXTRACT]], 2 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_INSERT43:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT40]], <3 x float> [[DOTFCA_3_EXTRACT]], 3 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_INSERT46:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT43]], float [[DOTFCA_4_EXTRACT]], 4 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_INSERT49:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT46]], i64 [[DOTFCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_INSERT307:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_EXTRACT]], 0, 0, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_1_0_INSERT310:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT307]], <2 x float> [[DOTFCA_0_EXTRACT304]], 0, 1, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_0_INSERT313:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT310]], float [[DOTFCA_1_0_EXTRACT]], 1, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_1_INSERT316:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_0_INSERT313]], i32 [[DOTFCA_1_1_EXTRACT]], 1, 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_INSERT319:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_1_INSERT316]], <3 x float> [[DOTFCA_2_EXTRACT272]], 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_INSERT322:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT319]], <3 x float> [[DOTFCA_3_EXTRACT273]], 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_INSERT325:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT322]], float [[DOTFCA_4_EXTRACT274]], 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_INSERT328:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT325]], i64 [[DOTFCA_5_EXTRACT275]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_INSERT124:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_INSERT127:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT124]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_INSERT130:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT127]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_INSERT133:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT130]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_INSERT136:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT133]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_INSERT139:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT136]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_6_INSERT142:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT139]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_7_INSERT145:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT142]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_8_INSERT148:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT145]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_9_INSERT151:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT148]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_10_INSERT154:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT151]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_11_INSERT157:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT154]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_12_INSERT160:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT157]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_13_INSERT163:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT160]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_14_INSERT166:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT163]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_15_INSERT169:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT166]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_16_INSERT172:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT169]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_17_INSERT175:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT172]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_18_INSERT178:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT175]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_19_INSERT181:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT178]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_20_INSERT184:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT181]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_21_INSERT187:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT184]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_22_INSERT190:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT187]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_23_INSERT193:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT190]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_24_INSERT196:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT193]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_25_INSERT199:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT196]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_26_INSERT202:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT199]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_27_INSERT205:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT202]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_28_INSERT208:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT205]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_29_INSERT211:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT208]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP13:%.*]] = load i32, ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP14:%.*]] = add i32 [[TMP13]], -8 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP14]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP15:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP15]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT49]]), !continuation.registercount [[META33]] +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR]], i32 [[TMP15]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT328]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT211]]) ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: unreachable ; DXILCONTPOSTPROCESS-GLOBAL: 16: ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_EXTRACT]], 0, 0, 0 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT]], <2 x float> [[DOTFCA_0_EXTRACT]], 0, 1, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT]], <2 x float> [[DOTFCA_0_EXTRACT304]], 0, 1, 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], float [[DOTFCA_1_0_EXTRACT]], 1, 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], i32 [[DOTFCA_1_1_EXTRACT]], 1, 1 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], <3 x float> [[DOTFCA_2_EXTRACT]], 2 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT]], <3 x float> [[DOTFCA_3_EXTRACT]], 3 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT]], float [[DOTFCA_4_EXTRACT]], 4 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT]], i64 [[DOTFCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_INSERT281:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], <3 x float> [[DOTFCA_2_EXTRACT272]], 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_INSERT283:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT281]], <3 x float> [[DOTFCA_3_EXTRACT273]], 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_INSERT285:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT283]], float [[DOTFCA_4_EXTRACT274]], 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_INSERT287:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT285]], i64 [[DOTFCA_5_EXTRACT275]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_INSERT1:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT1]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_10_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_11_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_12_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_13_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_14_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_15_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_16_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_17_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_18_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_19_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_20_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_21_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_22_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_23_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_24_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_25_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_26_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_27_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_28_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_29_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP17:%.*]] = load i32, ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP18:%.*]] = add i32 [[TMP17]], -8 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP18]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP19:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP19]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT]]), !continuation.registercount [[META33]] +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR]], i32 [[TMP19]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT287]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]) ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: unreachable ; ; ; DXILCONTPOSTPROCESS-GLOBAL-LABEL: define dso_local void @MyIntersectionShader.resume.0( -; DXILCONTPOSTPROCESS-GLOBAL-SAME: i32 [[CSPINIT:%.*]], i64 [[TMP0:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP1:%.*]]) !lgc.rt.shaderstage [[META42]] !continuation.registercount [[META33]] !continuation [[META43]] { +; DXILCONTPOSTPROCESS-GLOBAL-SAME: i32 [[CSPINIT:%.*]], i64 [[TMP0:%.*]], { [[STRUCT_TRAVERSALDATA:%.*]], [8 x i32], [30 x i32] } [[TMP1:%.*]]) !lgc.rt.shaderstage [[META42]] !continuation [[META43]] { ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: entryresume.0: ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 @@ -2079,18 +2784,50 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP15]] to ptr addrspace(22) ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], -8 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_EXTRACT10:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP1]], 0, 0, 0 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_1_0_EXTRACT12:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP1]], 0, 1, 0 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_0_EXTRACT14:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP1]], 1, 0 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_1_EXTRACT16:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP1]], 1, 1 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_EXTRACT18:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP1]], 2 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_EXTRACT20:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP1]], 3 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_EXTRACT22:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP1]], 4 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_EXTRACT24:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP1]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP16:%.*]] = extractvalue { [[STRUCT_TRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP1]], 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_6_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 6 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_7_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 7 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_8_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 8 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_9_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 9 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_10_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 10 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_11_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 11 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_12_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 12 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_13_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 13 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_14_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 14 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_15_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 15 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_16_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 16 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_17_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 17 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_18_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 18 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_19_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 19 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_20_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 20 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_21_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 21 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_22_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 22 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_23_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 23 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_24_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 24 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_25_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 25 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_26_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 26 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_27_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 27 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_28_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 28 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_29_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 29 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP17:%.*]] = extractvalue { [[STRUCT_TRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP1]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_EXTRACT10:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP17]], 0, 0, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_1_0_EXTRACT12:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP17]], 0, 1, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_0_EXTRACT14:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP17]], 1, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_1_EXTRACT16:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP17]], 1, 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_EXTRACT18:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP17]], 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_EXTRACT20:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP17]], 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_EXTRACT22:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP17]], 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_EXTRACT24:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP17]], 5 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[ISEND_I:%.*]] = call i1 @opaqueIsEnd() ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: br i1 [[ISEND_I]], label [[TMP5:%.*]], label [[TMP10:%.*]] -; DXILCONTPOSTPROCESS-GLOBAL: 6: +; DXILCONTPOSTPROCESS-GLOBAL: 8: ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP4]] ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RETURNADDR_RELOAD2:%.*]] = load i64, ptr addrspace(22) [[TMP6]], align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_INSERT28:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_EXTRACT10]], 0, 0, 0 @@ -2101,13 +2838,43 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_INSERT43:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT40]], <3 x float> [[DOTFCA_3_EXTRACT20]], 3 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_INSERT46:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT43]], float [[DOTFCA_4_EXTRACT22]], 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_INSERT49:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT46]], i64 [[DOTFCA_5_EXTRACT24]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_INSERT124:%.*]] = insertvalue [30 x i32] poison, i32 [[DOTFCA_0_EXTRACT]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_INSERT127:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT124]], i32 [[DOTFCA_1_EXTRACT]], 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_INSERT130:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT127]], i32 [[DOTFCA_2_EXTRACT]], 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_INSERT133:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT130]], i32 [[DOTFCA_3_EXTRACT]], 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_INSERT136:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT133]], i32 [[DOTFCA_4_EXTRACT]], 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_INSERT139:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT136]], i32 [[DOTFCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_6_INSERT142:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT139]], i32 [[DOTFCA_6_EXTRACT]], 6 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_7_INSERT145:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT142]], i32 [[DOTFCA_7_EXTRACT]], 7 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_8_INSERT148:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT145]], i32 [[DOTFCA_8_EXTRACT]], 8 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_9_INSERT151:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT148]], i32 [[DOTFCA_9_EXTRACT]], 9 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_10_INSERT154:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT151]], i32 [[DOTFCA_10_EXTRACT]], 10 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_11_INSERT157:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT154]], i32 [[DOTFCA_11_EXTRACT]], 11 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_12_INSERT160:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT157]], i32 [[DOTFCA_12_EXTRACT]], 12 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_13_INSERT163:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT160]], i32 [[DOTFCA_13_EXTRACT]], 13 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_14_INSERT166:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT163]], i32 [[DOTFCA_14_EXTRACT]], 14 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_15_INSERT169:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT166]], i32 [[DOTFCA_15_EXTRACT]], 15 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_16_INSERT172:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT169]], i32 [[DOTFCA_16_EXTRACT]], 16 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_17_INSERT175:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT172]], i32 [[DOTFCA_17_EXTRACT]], 17 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_18_INSERT178:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT175]], i32 [[DOTFCA_18_EXTRACT]], 18 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_19_INSERT181:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT178]], i32 [[DOTFCA_19_EXTRACT]], 19 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_20_INSERT184:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT181]], i32 [[DOTFCA_20_EXTRACT]], 20 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_21_INSERT187:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT184]], i32 [[DOTFCA_21_EXTRACT]], 21 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_22_INSERT190:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT187]], i32 [[DOTFCA_22_EXTRACT]], 22 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_23_INSERT193:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT190]], i32 [[DOTFCA_23_EXTRACT]], 23 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_24_INSERT196:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT193]], i32 [[DOTFCA_24_EXTRACT]], 24 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_25_INSERT199:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT196]], i32 [[DOTFCA_25_EXTRACT]], 25 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_26_INSERT202:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT199]], i32 [[DOTFCA_26_EXTRACT]], 26 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_27_INSERT205:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT202]], i32 [[DOTFCA_27_EXTRACT]], 27 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_28_INSERT208:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT205]], i32 [[DOTFCA_28_EXTRACT]], 28 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_29_INSERT211:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT208]], i32 [[DOTFCA_29_EXTRACT]], 29 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP7:%.*]] = load i32, ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP8:%.*]] = add i32 [[TMP7]], -8 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP8]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP9:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD2]], i32 [[TMP9]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT49]]), !continuation.registercount [[META33]] +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR_RELOAD2]], i32 [[TMP9]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT49]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT211]]) ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: unreachable -; DXILCONTPOSTPROCESS-GLOBAL: 11: +; DXILCONTPOSTPROCESS-GLOBAL: 13: ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP4]] ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(22) [[TMP11]], align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_EXTRACT10]], 0, 0, 0 @@ -2118,16 +2885,46 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT]], <3 x float> [[DOTFCA_3_EXTRACT20]], 3 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT]], float [[DOTFCA_4_EXTRACT22]], 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT]], i64 [[DOTFCA_5_EXTRACT24]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [30 x i32] poison, i32 [[DOTFCA_0_EXTRACT]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT]], i32 [[DOTFCA_1_EXTRACT]], 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_INSERT1:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT]], i32 [[DOTFCA_2_EXTRACT]], 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_INSERT1:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT1]], i32 [[DOTFCA_3_EXTRACT]], 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_INSERT1:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT1]], i32 [[DOTFCA_4_EXTRACT]], 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_INSERT1:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT1]], i32 [[DOTFCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT1]], i32 [[DOTFCA_6_EXTRACT]], 6 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT]], i32 [[DOTFCA_7_EXTRACT]], 7 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT]], i32 [[DOTFCA_8_EXTRACT]], 8 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT]], i32 [[DOTFCA_9_EXTRACT]], 9 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_10_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT]], i32 [[DOTFCA_10_EXTRACT]], 10 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_11_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT]], i32 [[DOTFCA_11_EXTRACT]], 11 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_12_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT]], i32 [[DOTFCA_12_EXTRACT]], 12 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_13_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT]], i32 [[DOTFCA_13_EXTRACT]], 13 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_14_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT]], i32 [[DOTFCA_14_EXTRACT]], 14 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_15_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT]], i32 [[DOTFCA_15_EXTRACT]], 15 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_16_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT]], i32 [[DOTFCA_16_EXTRACT]], 16 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_17_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT]], i32 [[DOTFCA_17_EXTRACT]], 17 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_18_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT]], i32 [[DOTFCA_18_EXTRACT]], 18 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_19_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT]], i32 [[DOTFCA_19_EXTRACT]], 19 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_20_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT]], i32 [[DOTFCA_20_EXTRACT]], 20 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_21_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT]], i32 [[DOTFCA_21_EXTRACT]], 21 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_22_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT]], i32 [[DOTFCA_22_EXTRACT]], 22 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_23_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT]], i32 [[DOTFCA_23_EXTRACT]], 23 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_24_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT]], i32 [[DOTFCA_24_EXTRACT]], 24 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_25_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT]], i32 [[DOTFCA_25_EXTRACT]], 25 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_26_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT]], i32 [[DOTFCA_26_EXTRACT]], 26 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_27_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT]], i32 [[DOTFCA_27_EXTRACT]], 27 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_28_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT]], i32 [[DOTFCA_28_EXTRACT]], 28 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_29_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT]], i32 [[DOTFCA_29_EXTRACT]], 29 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP12:%.*]] = load i32, ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], -8 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP13]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP14:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP14]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT]]), !continuation.registercount [[META33]] +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP14]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]) ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: unreachable ; ; ; DXILCONTPOSTPROCESS-GLOBAL-LABEL: define void @MyIntersectionShaderLargeAttrs( -; DXILCONTPOSTPROCESS-GLOBAL-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META42]] !continuation.registercount [[META33]] !continuation [[META45:![0-9]+]] !continuation.stacksize [[META44]] !continuation.state [[META44]] { +; DXILCONTPOSTPROCESS-GLOBAL-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]], [8 x i32] [[PADDING:%.*]], [30 x i32] [[PAYLOAD:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META42]] !continuation [[META45:![0-9]+]] !continuation.stacksize [[META44]] { ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: AllocaSpillBB: ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 @@ -2138,14 +2935,44 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP4]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP3]] ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i64 [[RETURNADDR]], ptr addrspace(22) [[TMP5]], align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 6 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 7 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 8 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 9 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_10_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 10 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_11_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 11 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_12_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 12 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_13_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 13 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_14_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 14 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_15_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 15 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_16_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 16 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_17_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 17 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_18_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 18 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_19_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 19 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_20_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 20 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_21_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 21 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_22_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 22 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_23_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 23 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_24_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 24 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_25_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 25 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_26_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 26 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_27_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 27 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_28_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 28 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_29_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 29 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 0, 0, 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 0, 1, 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 1, 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 1, 1 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 2 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 3 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 4 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_EXTRACT272:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_EXTRACT273:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_EXTRACT274:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_EXTRACT275:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 5 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA:%.*]] poison, float [[DOTFCA_1_0_EXTRACT]], 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RES_I_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_0_INSERT]], i32 [[DOTFCA_1_1_EXTRACT]], 1 @@ -2158,10 +2985,10 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TRAV_DATA_I_FCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_0_0_INSERT]], <2 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TRAV_DATA_I_FCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_0_INSERT]], float [[DOTFCA_1_0_EXTRACT]], 1, 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TRAV_DATA_I_FCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_0_INSERT]], i32 [[DOTFCA_1_1_EXTRACT]], 1, 1 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TRAV_DATA_I_FCA_2_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_1_INSERT]], <3 x float> [[DOTFCA_2_EXTRACT]], 2 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TRAV_DATA_I_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_2_INSERT]], <3 x float> [[DOTFCA_3_EXTRACT]], 3 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TRAV_DATA_I_FCA_4_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_3_INSERT]], float [[DOTFCA_4_EXTRACT]], 4 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TRAV_DATA_I_FCA_5_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_4_INSERT]], i64 [[DOTFCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TRAV_DATA_I_FCA_2_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_1_INSERT]], <3 x float> [[DOTFCA_2_EXTRACT272]], 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TRAV_DATA_I_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_2_INSERT]], <3 x float> [[DOTFCA_3_EXTRACT273]], 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TRAV_DATA_I_FCA_4_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_3_INSERT]], float [[DOTFCA_4_EXTRACT274]], 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TRAV_DATA_I_FCA_5_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_4_INSERT]], i64 [[DOTFCA_5_EXTRACT275]], 5 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_LARGEINTERSECTIONATTRIBUTES:%.*]] poison, i32 100, 0, 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_LARGEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_0_INSERT]], i32 101, 0, 1 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_LARGEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_1_INSERT]], i32 102, 0, 2 @@ -2169,58 +2996,143 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_LARGEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_3_INSERT]], i32 104, 0, 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_LARGEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_4_INSERT]], i32 105, 0, 5 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_6_INSERT:%.*]] = insertvalue [[STRUCT_LARGEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_5_INSERT]], i32 106, 0, 6 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_INSERT4:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_INSERT7:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT4]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_INSERT10:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT7]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_INSERT13:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT10]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_INSERT16:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT13]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_INSERT19:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT16]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_6_INSERT22:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT19]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_7_INSERT25:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT22]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_8_INSERT28:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT25]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_9_INSERT31:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT28]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_10_INSERT34:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT31]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_11_INSERT37:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT34]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_12_INSERT40:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT37]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_13_INSERT43:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT40]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_14_INSERT46:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT43]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_15_INSERT49:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT46]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_16_INSERT52:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT49]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_17_INSERT55:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT52]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_18_INSERT58:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT55]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_19_INSERT61:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT58]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_20_INSERT64:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT61]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_21_INSERT67:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT64]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_22_INSERT70:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT67]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_23_INSERT73:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT70]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_24_INSERT76:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT73]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_25_INSERT79:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT76]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_26_INSERT82:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT79]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_27_INSERT85:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT82]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_28_INSERT88:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT85]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_29_INSERT91:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT88]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP7:%.*]] = call i64 @continuation.getAddrAndMD(ptr @MyIntersectionShaderLargeAttrs.resume.0) ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP6:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void (i64, ...) @continuation.continue(i64 3, i32 [[TMP6]], i64 [[TMP7]], [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_5_INSERT]], float [[RES_I_FCA_1_INSERT_FCA_0_EXTRACT]], i32 0, [[STRUCT_LARGEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_6_INSERT]]), !continuation.registercount [[META33]], !continuation.returnedRegistercount [[META33]] +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void (...) @lgc.ilcps.continue(i64 3, i32 [[TMP6]], i64 [[TMP7]], [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_5_INSERT]], float [[RES_I_FCA_1_INSERT_FCA_0_EXTRACT]], i32 0, [[STRUCT_LARGEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_6_INSERT]], [15 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT91]]) ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: unreachable ; DXILCONTPOSTPROCESS-GLOBAL: accepthit.i: ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP8:%.*]] = bitcast i32 100 to float -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_070_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP8]], i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0350_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP8]], i32 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP9:%.*]] = bitcast i32 101 to float -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_070_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_070_0_VEC_INSERT]], float [[TMP9]], i32 1 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 102, ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @REGISTERS, i32 1), align 4 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 103, ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @REGISTERS, i32 2), align 4 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 104, ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @REGISTERS, i32 3), align 4 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 105, ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @REGISTERS, i32 4), align 4 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 106, ptr addrspace(20) getelementptr (i32, ptr addrspace(20) @REGISTERS, i32 5), align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_070_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0350_0_VEC_INSERT]], float [[TMP9]], i32 1 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> [[DOTSROA_070_4_VEC_INSERT]], 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT]], 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[ISEND_I:%.*]] = call i1 @opaqueIsEnd() ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: br i1 [[ISEND_I]], label [[TMP10:%.*]], label [[TMP14:%.*]] ; DXILCONTPOSTPROCESS-GLOBAL: 10: -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_INSERT28:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_EXTRACT]], 0, 0, 0 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_1_0_INSERT31:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT28]], <2 x float> [[DOTFCA_0_EXTRACT]], 0, 1, 0 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_0_INSERT34:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT31]], float [[DOTFCA_1_0_EXTRACT]], 1, 0 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_1_INSERT37:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_0_INSERT34]], i32 [[DOTFCA_1_1_EXTRACT]], 1, 1 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_INSERT40:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_1_INSERT37]], <3 x float> [[DOTFCA_2_EXTRACT]], 2 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_INSERT43:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT40]], <3 x float> [[DOTFCA_3_EXTRACT]], 3 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_INSERT46:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT43]], float [[DOTFCA_4_EXTRACT]], 4 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_INSERT49:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT46]], i64 [[DOTFCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_INSERT307:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_EXTRACT]], 0, 0, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_1_0_INSERT310:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT307]], <2 x float> [[DOTFCA_0_EXTRACT]], 0, 1, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_0_INSERT313:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT310]], float [[DOTFCA_1_0_EXTRACT]], 1, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_1_INSERT316:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_0_INSERT313]], i32 [[DOTFCA_1_1_EXTRACT]], 1, 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_INSERT319:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_1_INSERT316]], <3 x float> [[DOTFCA_2_EXTRACT272]], 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_INSERT322:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT319]], <3 x float> [[DOTFCA_3_EXTRACT273]], 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_INSERT325:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT322]], float [[DOTFCA_4_EXTRACT274]], 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_INSERT328:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT325]], i64 [[DOTFCA_5_EXTRACT275]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_INSERT124:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_INSERT127:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT124]], i32 102, 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_INSERT130:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT127]], i32 103, 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_INSERT133:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT130]], i32 104, 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_INSERT136:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT133]], i32 105, 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_INSERT139:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT136]], i32 106, 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_6_INSERT142:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT139]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_7_INSERT145:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT142]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_8_INSERT148:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT145]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_9_INSERT151:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT148]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_10_INSERT154:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT151]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_11_INSERT157:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT154]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_12_INSERT160:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT157]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_13_INSERT163:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT160]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_14_INSERT166:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT163]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_15_INSERT169:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT166]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_16_INSERT172:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT169]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_17_INSERT175:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT172]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_18_INSERT178:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT175]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_19_INSERT181:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT178]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_20_INSERT184:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT181]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_21_INSERT187:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT184]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_22_INSERT190:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT187]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_23_INSERT193:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT190]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_24_INSERT196:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT193]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_25_INSERT199:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT196]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_26_INSERT202:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT199]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_27_INSERT205:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT202]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_28_INSERT208:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT205]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_29_INSERT211:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT208]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP11:%.*]] = load i32, ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], -8 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP12]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP13:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP13]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT49]]), !continuation.registercount [[META33]] +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR]], i32 [[TMP13]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT328]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT211]]) ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: unreachable ; DXILCONTPOSTPROCESS-GLOBAL: 14: ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_EXTRACT]], 0, 0, 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT]], <2 x float> [[DOTFCA_0_EXTRACT]], 0, 1, 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], float [[DOTFCA_1_0_EXTRACT]], 1, 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], i32 [[DOTFCA_1_1_EXTRACT]], 1, 1 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], <3 x float> [[DOTFCA_2_EXTRACT]], 2 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT]], <3 x float> [[DOTFCA_3_EXTRACT]], 3 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT]], float [[DOTFCA_4_EXTRACT]], 4 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT]], i64 [[DOTFCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_INSERT281:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], <3 x float> [[DOTFCA_2_EXTRACT272]], 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_INSERT283:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT281]], <3 x float> [[DOTFCA_3_EXTRACT273]], 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_INSERT285:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT283]], float [[DOTFCA_4_EXTRACT274]], 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_INSERT287:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT285]], i64 [[DOTFCA_5_EXTRACT275]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_INSERT1:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT1]], i32 102, 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT]], i32 103, 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT]], i32 104, 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT]], i32 105, 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT]], i32 106, 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_10_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_11_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_12_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_13_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_14_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_15_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_16_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_17_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_18_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_19_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_20_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_21_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_22_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_23_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_24_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_25_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_26_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_27_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_28_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_29_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP15:%.*]] = load i32, ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP16:%.*]] = add i32 [[TMP15]], -8 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP16]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP17:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP17]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT]]), !continuation.registercount [[META33]] +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR]], i32 [[TMP17]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT287]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]) ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: unreachable ; ; ; DXILCONTPOSTPROCESS-GLOBAL-LABEL: define dso_local void @MyIntersectionShaderLargeAttrs.resume.0( -; DXILCONTPOSTPROCESS-GLOBAL-SAME: i32 [[CSPINIT:%.*]], i64 [[TMP0:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP1:%.*]]) !lgc.rt.shaderstage [[META42]] !continuation.registercount [[META33]] !continuation [[META45]] { +; DXILCONTPOSTPROCESS-GLOBAL-SAME: i32 [[CSPINIT:%.*]], i64 [[TMP0:%.*]], { [[STRUCT_TRAVERSALDATA:%.*]], [8 x i32], [30 x i32] } [[TMP1:%.*]]) !lgc.rt.shaderstage [[META42]] !continuation [[META45]] { ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: entryresume.0: ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 @@ -2228,18 +3140,50 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP15]] to ptr addrspace(22) ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], -8 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_EXTRACT10:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP1]], 0, 0, 0 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_1_0_EXTRACT12:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP1]], 0, 1, 0 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_0_EXTRACT14:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP1]], 1, 0 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_1_EXTRACT16:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP1]], 1, 1 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_EXTRACT18:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP1]], 2 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_EXTRACT20:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP1]], 3 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_EXTRACT22:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP1]], 4 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_EXTRACT24:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP1]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP16:%.*]] = extractvalue { [[STRUCT_TRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP1]], 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_6_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 6 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_7_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 7 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_8_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 8 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_9_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 9 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_10_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 10 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_11_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 11 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_12_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 12 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_13_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 13 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_14_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 14 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_15_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 15 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_16_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 16 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_17_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 17 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_18_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 18 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_19_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 19 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_20_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 20 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_21_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 21 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_22_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 22 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_23_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 23 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_24_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 24 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_25_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 25 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_26_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 26 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_27_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 27 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_28_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 28 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_29_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP16]], 29 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP17:%.*]] = extractvalue { [[STRUCT_TRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP1]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_EXTRACT10:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP17]], 0, 0, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_1_0_EXTRACT12:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP17]], 0, 1, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_0_EXTRACT14:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP17]], 1, 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_1_EXTRACT16:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP17]], 1, 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_EXTRACT18:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP17]], 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_EXTRACT20:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP17]], 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_EXTRACT22:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP17]], 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_EXTRACT24:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP17]], 5 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[ISEND_I:%.*]] = call i1 @opaqueIsEnd() ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: br i1 [[ISEND_I]], label [[TMP5:%.*]], label [[TMP10:%.*]] -; DXILCONTPOSTPROCESS-GLOBAL: 6: +; DXILCONTPOSTPROCESS-GLOBAL: 8: ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP4]] ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RETURNADDR_RELOAD2:%.*]] = load i64, ptr addrspace(22) [[TMP6]], align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_INSERT28:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_EXTRACT10]], 0, 0, 0 @@ -2250,13 +3194,43 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_INSERT43:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT40]], <3 x float> [[DOTFCA_3_EXTRACT20]], 3 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_INSERT46:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT43]], float [[DOTFCA_4_EXTRACT22]], 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_INSERT49:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT46]], i64 [[DOTFCA_5_EXTRACT24]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_INSERT124:%.*]] = insertvalue [30 x i32] poison, i32 [[DOTFCA_0_EXTRACT]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_INSERT127:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT124]], i32 [[DOTFCA_1_EXTRACT]], 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_INSERT130:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT127]], i32 [[DOTFCA_2_EXTRACT]], 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_INSERT133:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT130]], i32 [[DOTFCA_3_EXTRACT]], 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_INSERT136:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT133]], i32 [[DOTFCA_4_EXTRACT]], 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_INSERT139:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT136]], i32 [[DOTFCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_6_INSERT142:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT139]], i32 [[DOTFCA_6_EXTRACT]], 6 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_7_INSERT145:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT142]], i32 [[DOTFCA_7_EXTRACT]], 7 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_8_INSERT148:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT145]], i32 [[DOTFCA_8_EXTRACT]], 8 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_9_INSERT151:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT148]], i32 [[DOTFCA_9_EXTRACT]], 9 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_10_INSERT154:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT151]], i32 [[DOTFCA_10_EXTRACT]], 10 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_11_INSERT157:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT154]], i32 [[DOTFCA_11_EXTRACT]], 11 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_12_INSERT160:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT157]], i32 [[DOTFCA_12_EXTRACT]], 12 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_13_INSERT163:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT160]], i32 [[DOTFCA_13_EXTRACT]], 13 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_14_INSERT166:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT163]], i32 [[DOTFCA_14_EXTRACT]], 14 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_15_INSERT169:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT166]], i32 [[DOTFCA_15_EXTRACT]], 15 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_16_INSERT172:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT169]], i32 [[DOTFCA_16_EXTRACT]], 16 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_17_INSERT175:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT172]], i32 [[DOTFCA_17_EXTRACT]], 17 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_18_INSERT178:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT175]], i32 [[DOTFCA_18_EXTRACT]], 18 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_19_INSERT181:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT178]], i32 [[DOTFCA_19_EXTRACT]], 19 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_20_INSERT184:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT181]], i32 [[DOTFCA_20_EXTRACT]], 20 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_21_INSERT187:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT184]], i32 [[DOTFCA_21_EXTRACT]], 21 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_22_INSERT190:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT187]], i32 [[DOTFCA_22_EXTRACT]], 22 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_23_INSERT193:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT190]], i32 [[DOTFCA_23_EXTRACT]], 23 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_24_INSERT196:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT193]], i32 [[DOTFCA_24_EXTRACT]], 24 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_25_INSERT199:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT196]], i32 [[DOTFCA_25_EXTRACT]], 25 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_26_INSERT202:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT199]], i32 [[DOTFCA_26_EXTRACT]], 26 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_27_INSERT205:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT202]], i32 [[DOTFCA_27_EXTRACT]], 27 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_28_INSERT208:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT205]], i32 [[DOTFCA_28_EXTRACT]], 28 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_29_INSERT211:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT208]], i32 [[DOTFCA_29_EXTRACT]], 29 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP7:%.*]] = load i32, ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP8:%.*]] = add i32 [[TMP7]], -8 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP8]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP9:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD2]], i32 [[TMP9]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT49]]), !continuation.registercount [[META33]] +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR_RELOAD2]], i32 [[TMP9]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT49]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT211]]) ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: unreachable -; DXILCONTPOSTPROCESS-GLOBAL: 11: +; DXILCONTPOSTPROCESS-GLOBAL: 13: ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr addrspace(22) [[TMP2]], i32 [[TMP4]] ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i64, ptr addrspace(22) [[TMP11]], align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_EXTRACT10]], 0, 0, 0 @@ -2267,51 +3241,93 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT]], <3 x float> [[DOTFCA_3_EXTRACT20]], 3 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT]], float [[DOTFCA_4_EXTRACT22]], 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT]], i64 [[DOTFCA_5_EXTRACT24]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [30 x i32] poison, i32 [[DOTFCA_0_EXTRACT]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT]], i32 [[DOTFCA_1_EXTRACT]], 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_INSERT1:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT]], i32 [[DOTFCA_2_EXTRACT]], 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_INSERT1:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT1]], i32 [[DOTFCA_3_EXTRACT]], 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_INSERT1:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT1]], i32 [[DOTFCA_4_EXTRACT]], 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_INSERT1:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT1]], i32 [[DOTFCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT1]], i32 [[DOTFCA_6_EXTRACT]], 6 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT]], i32 [[DOTFCA_7_EXTRACT]], 7 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT]], i32 [[DOTFCA_8_EXTRACT]], 8 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT]], i32 [[DOTFCA_9_EXTRACT]], 9 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_10_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT]], i32 [[DOTFCA_10_EXTRACT]], 10 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_11_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT]], i32 [[DOTFCA_11_EXTRACT]], 11 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_12_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT]], i32 [[DOTFCA_12_EXTRACT]], 12 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_13_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT]], i32 [[DOTFCA_13_EXTRACT]], 13 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_14_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT]], i32 [[DOTFCA_14_EXTRACT]], 14 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_15_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT]], i32 [[DOTFCA_15_EXTRACT]], 15 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_16_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT]], i32 [[DOTFCA_16_EXTRACT]], 16 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_17_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT]], i32 [[DOTFCA_17_EXTRACT]], 17 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_18_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT]], i32 [[DOTFCA_18_EXTRACT]], 18 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_19_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT]], i32 [[DOTFCA_19_EXTRACT]], 19 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_20_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT]], i32 [[DOTFCA_20_EXTRACT]], 20 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_21_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT]], i32 [[DOTFCA_21_EXTRACT]], 21 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_22_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT]], i32 [[DOTFCA_22_EXTRACT]], 22 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_23_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT]], i32 [[DOTFCA_23_EXTRACT]], 23 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_24_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT]], i32 [[DOTFCA_24_EXTRACT]], 24 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_25_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT]], i32 [[DOTFCA_25_EXTRACT]], 25 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_26_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT]], i32 [[DOTFCA_26_EXTRACT]], 26 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_27_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT]], i32 [[DOTFCA_27_EXTRACT]], 27 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_28_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT]], i32 [[DOTFCA_28_EXTRACT]], 28 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_29_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT]], i32 [[DOTFCA_29_EXTRACT]], 29 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP12:%.*]] = load i32, ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], -8 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP13]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP14:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP14]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT]]), !continuation.registercount [[META33]] +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR_RELOAD]], i32 [[TMP14]], i64 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]) ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: unreachable ; ; ; DXILCONTPOSTPROCESS-GLOBAL-LABEL: define void @MyMissShader( -; DXILCONTPOSTPROCESS-GLOBAL-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META46:![0-9]+]] !continuation.registercount [[META34]] !continuation [[META47:![0-9]+]] !continuation.state [[META22]] { +; DXILCONTPOSTPROCESS-GLOBAL-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[TMP0:%.*]], [19 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META46:![0-9]+]] !continuation [[META47:![0-9]+]] { ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: AllocaSpillBB: ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP1:%.*]] = call i64 @_cont_GetContinuationStackGlobalMemBase() ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr addrspace(22) +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 6 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 7 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 8 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 9 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 0, 0 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[TMP0]], 1, 0 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(20) @REGISTERS, align 4 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to float -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> undef, float [[TMP4]], i32 0 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP5]] to float -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP6]], i32 1 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP7]] to float -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP8]], i32 2 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP9]] to float -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP10]], i32 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP3:%.*]] = bitcast i32 [[PAYLOAD_FCA_0_EXTRACT]] to float +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP4:%.*]] = bitcast i32 [[PAYLOAD_FCA_7_EXTRACT]] to float +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP4]], i32 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP5:%.*]] = bitcast i32 [[PAYLOAD_FCA_8_EXTRACT]] to float +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP5]], i32 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP6:%.*]] = bitcast i32 [[PAYLOAD_FCA_9_EXTRACT]] to float +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP6]], i32 3 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 0 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP11:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP11]], ptr addrspace(20) @REGISTERS, align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP7:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 1 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP12:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP12]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 7), align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP8:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 2 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP13:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP13]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 8), align 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP9:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 3 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP14:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: store i32 [[TMP14]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @REGISTERS, i32 9), align 4 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison, <3 x i32> [[DOTFCA_0_0_EXTRACT]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_0_INSERT1:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP7]], 0 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT1]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT]], i32 [[TMP8]], 7 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT]], i32 [[TMP9]], 8 +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT]], i32 [[TMP14]], 9 ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: [[TMP15:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void (i64, ...) @continuation.continue(i64 [[RETURNADDR]], i32 [[TMP15]], i64 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]]), !continuation.registercount [[META34]] +; DXILCONTPOSTPROCESS-GLOBAL-NEXT: call void (...) @lgc.ilcps.continue(i64 [[RETURNADDR]], i32 [[TMP15]], i64 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT]], [21 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]) ; DXILCONTPOSTPROCESS-GLOBAL-NEXT: unreachable ; ; @@ -2344,7 +3360,7 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-SAME: ) #[[ATTR0]] !lgc.rt.shaderstage [[META35:![0-9]+]] !continuation.registercount [[META22:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[CSPINIT:%.*]] = ptrtoint ptr @debug_global to i32 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @_AmdContStackSetPtr(i32 [[CSPINIT]]) -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 0, [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison), !continuation.registercount [[META22]] +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.ilcps.continue(i64 0, i32 poison, i64 undef, [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison), !continuation.registercount [[META22]] ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: ret void ; ; @@ -2365,7 +3381,7 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define void @MyRayGen( ; LOWERRAYTRACINGPIPELINE-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURNADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3:[0-9]+]] !lgc.rt.shaderstage [[META22]] !lgc.cps [[META36:![0-9]+]] !continuation [[META37:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PAYLOAD_ALLOCA:%.*]] = alloca [30 x i32], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [10 x i32], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP2:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 @@ -2385,8 +3401,8 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TRAV_DATA2_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], i64 [[ADDR_I]], 5 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP4]], i32 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP11]], ptr [[PAYLOAD_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP11]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 1 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP14]], ptr [[TMP12]], align 4 @@ -2398,16 +3414,15 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 2 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP20]], ptr [[TMP18]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP21:%.*]] = load [10 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP21:%.*]] = load [10 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP22:%.*]] = call { [[STRUCT_DISPATCHSYSTEMDATA]], [21 x i32], [10 x i32] } (...) @lgc.cps.await__sl_s_struct.DispatchSystemDatasa21i32a10i32s(i32 4, i32 8, i64 -1, i32 5, [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]], [6 x i32] poison, [10 x i32] [[TMP21]]), !continuation.registercount [[META33:![0-9]+]], !continuation.wait.await [[META13:![0-9]+]], !continuation.returnedRegistercount [[META33]] ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP23:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [21 x i32], [10 x i32] } [[TMP22]], 2 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [10 x i32] [[TMP23]], ptr [[PAYLOAD_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP24:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [21 x i32], [10 x i32] } [[TMP22]], 0 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [10 x i32] [[TMP23]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_RAYPAYLOAD]] poison, ptr [[TMP4]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP4]], i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP26:%.*]] = load i32, ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP26:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[TMP25]], i32 1 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP27]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4 @@ -2419,6 +3434,7 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i32 2 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP35]], ptr [[TMP33]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP24:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [21 x i32], [10 x i32] } [[TMP22]], 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP24]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br label [[DOTSPLIT:%.*]] @@ -2443,15 +3459,15 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURNADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [19 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META41:![0-9]+]] !lgc.cps [[META42:![0-9]+]] !continuation [[META43:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP1:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PAYLOAD_ALLOCA:%.*]] = alloca [30 x i32], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [10 x i32], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[HITATTRS:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [10 x i32] [[PAYLOAD]], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [10 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_SYSTEMDATA]] [[SYSTEM_DATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP2]], i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP4:%.*]] = load i32, ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP4:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 1 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 @@ -2487,8 +3503,8 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store <4 x float> [[TMP27]], ptr [[TMP28]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP2]], i32 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP30]], ptr [[PAYLOAD_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP30]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[TMP29]], i32 1 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP33]], ptr [[TMP31]], align 4 @@ -2502,7 +3518,7 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP39]], ptr [[TMP37]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP41:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP40]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP42:%.*]] = load [10 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP42:%.*]] = load [10 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 6, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP41]], [21 x i32] poison, [10 x i32] [[TMP42]]), !continuation.registercount [[META33]] ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable ; @@ -2516,16 +3532,16 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP5:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP6:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_TRAVERSALDATA]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PAYLOAD_ALLOCA:%.*]] = alloca [30 x i32], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [10 x i32], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP7:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[ORIGHITATTRS:%.*]] = alloca [8 x i32], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[HITATTRSALLOCA:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [10 x i32] [[PAYLOAD]], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [10 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_TRAVERSALDATA]] [[SYSTEM_DATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP7]], i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP9:%.*]] = load i32, ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP9:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 1 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 4 @@ -2541,7 +3557,7 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[ADDR_I:%.*]] = getelementptr [[STRUCT_SYSTEMDATA:%.*]], ptr [[TMP19]], i32 0, i32 1 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[VAL_I:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[ADDR_I]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[VAL_I]], ptr [[TMP6]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN:%.*]], ptr [[PAYLOAD_ALLOCA]], i32 0, i32 0, i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD_ATTR_MAX_8_I32S_LAYOUT_1_ANYHIT_IN:%.*]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 0, i32 0, i32 1 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP6]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP21]], ptr [[ORIGHITATTRS]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[ORIGHITATTRS]], i32 1 @@ -2579,8 +3595,8 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP36]]) ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP7]], i32 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP38]], ptr [[PAYLOAD_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP39:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP38]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP39:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, ptr [[TMP37]], i32 1 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP41]], ptr [[TMP39]], align 4 @@ -2603,7 +3619,7 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[ADDR_I1:%.*]] = getelementptr [[STRUCT_SYSTEMDATA]], ptr [[TMP53]], i32 0, i32 1 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP52]], ptr [[ADDR_I1]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP54:%.*]] = load [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP55:%.*]] = load [10 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP55:%.*]] = load [10 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 40, {} poison, i32 poison, i32 poison, [[STRUCT_TRAVERSALDATA]] [[TMP54]], [8 x i32] poison, [10 x i32] [[TMP55]]), !continuation.registercount [[META33]] ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable ; LOWERRAYTRACINGPIPELINE-CPS: 56: @@ -2611,8 +3627,8 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @_cont_AcceptHit(ptr [[SYSTEM_DATA_ALLOCA]]) ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP7]], i32 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP58:%.*]] = load i32, ptr [[TMP57]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP58]], ptr [[PAYLOAD_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP59:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP58]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP59:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP60:%.*]] = getelementptr inbounds i32, ptr [[TMP57]], i32 1 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP61:%.*]] = load i32, ptr [[TMP60]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP61]], ptr [[TMP59]], align 4 @@ -2635,7 +3651,7 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[ADDR_I2:%.*]] = getelementptr [[STRUCT_SYSTEMDATA]], ptr [[TMP73]], i32 0, i32 1 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP72]], ptr [[ADDR_I2]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP74:%.*]] = load [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP75:%.*]] = load [10 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP75:%.*]] = load [10 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 40, {} poison, i32 poison, i32 poison, [[STRUCT_TRAVERSALDATA]] [[TMP74]], [8 x i32] poison, [10 x i32] [[TMP75]]), !continuation.registercount [[META33]] ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable ; @@ -2646,8 +3662,8 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_TRAVERSALDATA]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PAYLOAD_ALLOCA:%.*]] = alloca [30 x i32], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [30 x i32] [[PAYLOAD]], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [30 x i32], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [30 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_TRAVERSALDATA]] [[SYSTEM_DATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 @@ -2663,17 +3679,17 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TRAV_DATA_I:%.*]] = load [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP7:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[TMP3]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP8:%.*]] = load [30 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP8:%.*]] = load [30 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP9:%.*]] = call { [[STRUCT_TRAVERSALDATA]], [8 x i32], [30 x i32] } (...) @lgc.cps.await__sl_s_struct.TraversalDatasa8i32a30i32s(i32 3, i32 16, i32 5, float [[RES_I1]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP7]], [20 x i32] poison, [30 x i32] [[TMP8]]), !continuation.registercount [[META32:![0-9]+]], !continuation.returnedRegistercount [[META32]] ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP10:%.*]] = extractvalue { [[STRUCT_TRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP9]], 2 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [30 x i32] [[TMP10]], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [30 x i32] [[TMP10]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP11:%.*]] = extractvalue { [[STRUCT_TRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP9]], 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_TRAVERSALDATA]] [[TMP11]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br label [[_CONT_REPORTHIT_EXIT:%.*]] ; LOWERRAYTRACINGPIPELINE-CPS: accepthit.i: -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[PAYLOAD_ALLOCA]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 1 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP3]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP14]], ptr [[TMP2]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 1 @@ -2690,13 +3706,13 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br i1 [[ISEND_I]], label [[TMP20:%.*]], label [[TMP23:%.*]] ; LOWERRAYTRACINGPIPELINE-CPS: 20: ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP21:%.*]] = load [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP22:%.*]] = load [30 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP22:%.*]] = load [30 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 8, {} poison, i32 poison, i32 poison, [[STRUCT_TRAVERSALDATA]] [[TMP21]], [8 x i32] poison, [30 x i32] [[TMP22]]), !continuation.registercount [[META32]] ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable ; LOWERRAYTRACINGPIPELINE-CPS: 23: ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[TMP5]]) #[[ATTR1]] ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP24:%.*]] = load [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP25:%.*]] = load [30 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP25:%.*]] = load [30 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 8, {} poison, i32 poison, i32 poison, [[STRUCT_TRAVERSALDATA]] [[TMP24]], [8 x i32] poison, [30 x i32] [[TMP25]]), !continuation.registercount [[META32]] ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable ; @@ -2707,8 +3723,8 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_LARGEINTERSECTIONATTRIBUTES:%.*]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_TRAVERSALDATA]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PAYLOAD_ALLOCA:%.*]] = alloca [30 x i32], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [30 x i32] [[PAYLOAD]], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [30 x i32], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [30 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_TRAVERSALDATA]] [[SYSTEM_DATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 @@ -2738,17 +3754,17 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TRAV_DATA_I:%.*]] = load [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP7:%.*]] = load [[STRUCT_LARGEINTERSECTIONATTRIBUTES]], ptr [[TMP3]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP8:%.*]] = load [30 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP8:%.*]] = load [30 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP9:%.*]] = call { [[STRUCT_TRAVERSALDATA]], [8 x i32], [30 x i32] } (...) @lgc.cps.await__sl_s_struct.TraversalDatasa8i32a30i32s(i32 3, i32 16, i32 5, float [[RES_I1]], i32 0, [[STRUCT_LARGEINTERSECTIONATTRIBUTES]] [[TMP7]], [15 x i32] poison, [30 x i32] [[TMP8]]), !continuation.registercount [[META32]], !continuation.returnedRegistercount [[META32]] ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP10:%.*]] = extractvalue { [[STRUCT_TRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP9]], 2 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [30 x i32] [[TMP10]], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [30 x i32] [[TMP10]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP11:%.*]] = extractvalue { [[STRUCT_TRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP9]], 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_TRAVERSALDATA]] [[TMP11]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br label [[_CONT_REPORTHIT_EXIT:%.*]] ; LOWERRAYTRACINGPIPELINE-CPS: accepthit.i: -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[PAYLOAD_ALLOCA]], i32 1 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 1 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP3]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP14]], ptr [[TMP2]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 1 @@ -2784,13 +3800,13 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: br i1 [[ISEND_I]], label [[TMP34:%.*]], label [[TMP37:%.*]] ; LOWERRAYTRACINGPIPELINE-CPS: 34: ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP35:%.*]] = load [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP36:%.*]] = load [30 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP36:%.*]] = load [30 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 8, {} poison, i32 poison, i32 poison, [[STRUCT_TRAVERSALDATA]] [[TMP35]], [8 x i32] poison, [30 x i32] [[TMP36]]), !continuation.registercount [[META32]] ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable ; LOWERRAYTRACINGPIPELINE-CPS: 37: ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[TMP5]]) #[[ATTR1]] ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP38:%.*]] = load [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP39:%.*]] = load [30 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP39:%.*]] = load [30 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 8, {} poison, i32 poison, i32 poison, [[STRUCT_TRAVERSALDATA]] [[TMP38]], [8 x i32] poison, [30 x i32] [[TMP39]]), !continuation.registercount [[META32]] ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable ; @@ -2798,14 +3814,14 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-LABEL: define void @MyMissShader( ; LOWERRAYTRACINGPIPELINE-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURNADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [19 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META44]] !lgc.cps [[META42]] !continuation [[META49:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_SYSTEMDATA]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PAYLOAD_ALLOCA:%.*]] = alloca [30 x i32], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [10 x i32], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP1:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 8 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [10 x i32] [[PAYLOAD]], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [10 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store [[STRUCT_SYSTEMDATA]] [[SYSTEM_DATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP1]], i32 0 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP3:%.*]] = load i32, ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP3:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 1 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 @@ -2822,8 +3838,8 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store <4 x float> , ptr [[TMP13]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP1]], i32 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP15]], ptr [[PAYLOAD_ALLOCA]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_ALLOCA]], i32 7 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP15]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 1 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP18]], ptr [[TMP16]], align 4 @@ -2837,11 +3853,1142 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: store i32 [[TMP24]], ptr [[TMP22]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_SYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP26:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[TMP25]], align 4 -; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP27:%.*]] = load [10 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-CPS-NEXT: [[TMP27:%.*]] = load [10 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 6, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP26]], [21 x i32] poison, [10 x i32] [[TMP27]]), !continuation.registercount [[META33]] ; LOWERRAYTRACINGPIPELINE-CPS-NEXT: unreachable ; ; +; CLEANUP-CPS-LABEL: define i1 @_cont_IsEndSearch( +; CLEANUP-CPS-SAME: ptr [[DATA:%.*]]) #[[ATTR0:[0-9]+]] { +; CLEANUP-CPS-NEXT: [[ISEND:%.*]] = call i1 @opaqueIsEnd() +; CLEANUP-CPS-NEXT: ret i1 [[ISEND]] +; +; +; CLEANUP-CPS-LABEL: define %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes( +; CLEANUP-CPS-SAME: ptr [[DATA:%.*]]) #[[ATTR0]] { +; CLEANUP-CPS-NEXT: [[ADDR:%.*]] = getelementptr [[STRUCT_SYSTEMDATA:%.*]], ptr [[DATA]], i32 0, i32 1 +; CLEANUP-CPS-NEXT: [[VAL:%.*]] = load [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], ptr [[ADDR]], align 4 +; CLEANUP-CPS-NEXT: ret [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[VAL]] +; +; +; CLEANUP-CPS-LABEL: define void @_cont_SetTriangleHitAttributes( +; CLEANUP-CPS-SAME: ptr [[DATA:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[VAL:%.*]]) #[[ATTR0]] { +; CLEANUP-CPS-NEXT: [[ADDR:%.*]] = getelementptr [[STRUCT_SYSTEMDATA:%.*]], ptr [[DATA]], i32 0, i32 1 +; CLEANUP-CPS-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[VAL]], ptr [[ADDR]], align 4 +; CLEANUP-CPS-NEXT: ret void +; +; +; CLEANUP-CPS-LABEL: define i32 @_cont_GetLocalRootIndex( +; CLEANUP-CPS-SAME: ptr [[DATA:%.*]]) #[[ATTR0]] { +; CLEANUP-CPS-NEXT: ret i32 5 +; +; +; CLEANUP-CPS-LABEL: define void @_cont_KernelEntry( +; CLEANUP-CPS-SAME: ) #[[ATTR0]] !lgc.rt.shaderstage [[META35:![0-9]+]] !continuation.registercount [[META22:![0-9]+]] { +; CLEANUP-CPS-NEXT: [[CSPINIT:%.*]] = ptrtoint ptr @debug_global to i32 +; CLEANUP-CPS-NEXT: call void @_AmdContStackSetPtr(i32 [[CSPINIT]]) +; CLEANUP-CPS-NEXT: call void (...) @lgc.ilcps.continue(i64 0, i32 poison, i64 undef, [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison), !continuation.registercount [[META22]] +; CLEANUP-CPS-NEXT: ret void +; +; +; CLEANUP-CPS-LABEL: define %struct.HitData @_cont_GetCandidateState( +; CLEANUP-CPS-SAME: ptr [[DATA:%.*]]) #[[ATTR0]] { +; CLEANUP-CPS-NEXT: [[RESPTR:%.*]] = getelementptr [[STRUCT_TRAVERSALDATA:%.*]], ptr [[DATA]], i32 0, i32 1 +; CLEANUP-CPS-NEXT: [[RES:%.*]] = load [[STRUCT_HITDATA:%.*]], ptr [[RESPTR]], align 4 +; CLEANUP-CPS-NEXT: ret [[STRUCT_HITDATA]] [[RES]] +; +; +; CLEANUP-CPS-LABEL: define float @_cont_RayTCurrent( +; CLEANUP-CPS-SAME: ptr nocapture readnone [[DATA:%.*]], ptr [[HITDATA:%.*]]) { +; CLEANUP-CPS-NEXT: [[RESPTR:%.*]] = getelementptr [[STRUCT_HITDATA:%.*]], ptr [[HITDATA]], i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[RES:%.*]] = load float, ptr [[RESPTR]], align 4 +; CLEANUP-CPS-NEXT: ret float [[RES]] +; +; +; CLEANUP-CPS-LABEL: define void @MyRayGen( +; CLEANUP-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURNADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3:[0-9]+]] !lgc.rt.shaderstage [[META22]] !lgc.cps [[META36:![0-9]+]] !continuation [[META37:![0-9]+]] { +; CLEANUP-CPS-NEXT: AllocaSpillBB: +; CLEANUP-CPS-NEXT: [[DOTFCA_0_EXTRACT20:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 +; CLEANUP-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) +; CLEANUP-CPS-NEXT: [[TMP1:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 +; CLEANUP-CPS-NEXT: [[TMP2:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 +; CLEANUP-CPS-NEXT: [[TMP3:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP1]]) +; CLEANUP-CPS-NEXT: [[TMP4:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP3]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; CLEANUP-CPS-NEXT: [[TMP5:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP4]]) +; CLEANUP-CPS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT20]], 0 +; CLEANUP-CPS-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]], 0 +; CLEANUP-CPS-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 +; CLEANUP-CPS-NEXT: [[TMP6:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @MyRayGen.resume.0) +; CLEANUP-CPS-NEXT: [[TRAV_DATA2_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], i64 [[TMP6]], 5 +; CLEANUP-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 0 +; CLEANUP-CPS-NEXT: [[TMP7:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; CLEANUP-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 1 +; CLEANUP-CPS-NEXT: [[TMP8:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; CLEANUP-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 2 +; CLEANUP-CPS-NEXT: [[TMP9:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 +; CLEANUP-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 3 +; CLEANUP-CPS-NEXT: [[TMP10:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP7]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT]], i32 undef, 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT]], i32 undef, 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT]], i32 undef, 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT]], i32 undef, 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT]], i32 undef, 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT]], i32 undef, 6 +; CLEANUP-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT]], i32 [[TMP8]], 7 +; CLEANUP-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT]], i32 [[TMP9]], 8 +; CLEANUP-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT]], i32 [[TMP10]], 9 +; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 4, i32 -1, {} poison, i64 -1, i64 [[TMP6]], i32 5, [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]], [6 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]), !continuation.registercount [[META33:![0-9]+]], !continuation.wait.await [[META13:![0-9]+]], !continuation.returnedRegistercount [[META33]] +; CLEANUP-CPS-NEXT: unreachable +; +; +; CLEANUP-CPS-LABEL: define dso_local void @MyRayGen.resume.0( +; CLEANUP-CPS-SAME: {} [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], { [[STRUCT_DISPATCHSYSTEMDATA:%.*]], [21 x i32], [10 x i32] } [[TMP3:%.*]]) !lgc.rt.shaderstage [[META22]] !lgc.cps [[META36]] !continuation [[META37]] { +; CLEANUP-CPS-NEXT: entryresume.0: +; CLEANUP-CPS-NEXT: [[TMP4:%.*]] = alloca { [[STRUCT_DISPATCHSYSTEMDATA]], [21 x i32], [10 x i32] }, align 8 +; CLEANUP-CPS-NEXT: store { [[STRUCT_DISPATCHSYSTEMDATA]], [21 x i32], [10 x i32] } [[TMP3]], ptr [[TMP4]], align 4 +; CLEANUP-CPS-NEXT: [[TMP5:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [21 x i32], [10 x i32] } [[TMP3]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_6_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 6 +; CLEANUP-CPS-NEXT: [[DOTFCA_7_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 7 +; CLEANUP-CPS-NEXT: [[DOTFCA_8_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 8 +; CLEANUP-CPS-NEXT: [[DOTFCA_9_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 9 +; CLEANUP-CPS-NEXT: [[TMP6:%.*]] = bitcast i32 [[DOTFCA_0_EXTRACT]] to float +; CLEANUP-CPS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> poison, float [[TMP6]], i32 0 +; CLEANUP-CPS-NEXT: [[TMP7:%.*]] = bitcast i32 [[DOTFCA_7_EXTRACT]] to float +; CLEANUP-CPS-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP7]], i32 1 +; CLEANUP-CPS-NEXT: [[TMP8:%.*]] = bitcast i32 [[DOTFCA_8_EXTRACT]] to float +; CLEANUP-CPS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP8]], i32 2 +; CLEANUP-CPS-NEXT: [[TMP9:%.*]] = bitcast i32 [[DOTFCA_9_EXTRACT]] to float +; CLEANUP-CPS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP9]], i32 3 +; CLEANUP-CPS-NEXT: [[TMP10:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [21 x i32], [10 x i32] } [[TMP3]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_EXTRACT21:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP10]], 0 +; CLEANUP-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) +; CLEANUP-CPS-NEXT: [[TMP11:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 +; CLEANUP-CPS-NEXT: [[TMP12:%.*]] = getelementptr inbounds { [[STRUCT_DISPATCHSYSTEMDATA]], [21 x i32], [10 x i32] }, ptr [[TMP4]], i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[TMP13:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[TMP12]]) +; CLEANUP-CPS-NEXT: [[EXTRACT:%.*]] = extractelement <3 x i32> [[TMP13]], i8 0 +; CLEANUP-CPS-NEXT: [[TMP14:%.*]] = getelementptr inbounds { [[STRUCT_DISPATCHSYSTEMDATA]], [21 x i32], [10 x i32] }, ptr [[TMP4]], i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[TMP15:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[TMP14]]) +; CLEANUP-CPS-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x i32> [[TMP15]], i8 1 +; CLEANUP-CPS-NEXT: [[TMP16:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP11]]) +; CLEANUP-CPS-NEXT: [[TMP17:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP16]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 4098, i32 1033 }) +; CLEANUP-CPS-NEXT: [[TMP18:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 0 +; CLEANUP-CPS-NEXT: [[TMP19:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 1 +; CLEANUP-CPS-NEXT: [[TMP20:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 2 +; CLEANUP-CPS-NEXT: [[TMP21:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 3 +; CLEANUP-CPS-NEXT: call void @dx.op.textureStore.f32(i32 67, [[DX_TYPES_HANDLE]] [[TMP17]], i32 [[EXTRACT]], i32 [[EXTRACT1]], i32 undef, float [[TMP18]], float [[TMP19]], float [[TMP20]], float [[TMP21]], i8 15) +; CLEANUP-CPS-NEXT: ret void +; +; +; CLEANUP-CPS-LABEL: define void @MyClosestHitShader( +; CLEANUP-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURNADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [19 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META38:![0-9]+]] !lgc.cps [[META39:![0-9]+]] !continuation [[META40:![0-9]+]] { +; CLEANUP-CPS-NEXT: AllocaSpillBB: +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 0 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 1 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 2 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 3 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 4 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 5 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 6 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 7 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 8 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 9 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[SYSTEM_DATA]], 0, 0 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[SYSTEM_DATA]], 1, 0 +; CLEANUP-CPS-NEXT: [[TMP0:%.*]] = bitcast i32 [[PAYLOAD_FCA_0_EXTRACT]] to float +; CLEANUP-CPS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> undef, float [[TMP0]], i32 0 +; CLEANUP-CPS-NEXT: [[TMP1:%.*]] = bitcast i32 [[PAYLOAD_FCA_7_EXTRACT]] to float +; CLEANUP-CPS-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP1]], i32 1 +; CLEANUP-CPS-NEXT: [[TMP2:%.*]] = bitcast i32 [[PAYLOAD_FCA_8_EXTRACT]] to float +; CLEANUP-CPS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP2]], i32 2 +; CLEANUP-CPS-NEXT: [[TMP3:%.*]] = bitcast i32 [[PAYLOAD_FCA_9_EXTRACT]] to float +; CLEANUP-CPS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP3]], i32 3 +; CLEANUP-CPS-NEXT: [[VAL_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> [[SYSTEM_DATA_FCA_1_0_EXTRACT]], 0 +; CLEANUP-CPS-NEXT: [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[VAL_I_FCA_0_INSERT]], 0 +; CLEANUP-CPS-NEXT: [[DOTSROA_011_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 0 +; CLEANUP-CPS-NEXT: [[TMP4:%.*]] = bitcast float [[DOTSROA_011_0_VEC_EXTRACT]] to i32 +; CLEANUP-CPS-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float +; CLEANUP-CPS-NEXT: [[HITATTRS_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP5]], i32 0 +; CLEANUP-CPS-NEXT: [[DOTSROA_011_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 1 +; CLEANUP-CPS-NEXT: [[TMP6:%.*]] = bitcast float [[DOTSROA_011_4_VEC_EXTRACT]] to i32 +; CLEANUP-CPS-NEXT: [[TMP7:%.*]] = bitcast i32 [[TMP6]] to float +; CLEANUP-CPS-NEXT: [[HITATTRS_SROA_0_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[HITATTRS_SROA_0_0_VEC_INSERT]], float [[TMP7]], i32 1 +; CLEANUP-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) +; CLEANUP-CPS-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[HITATTRS_SROA_0_4_VEC_INSERT]], i32 0 +; CLEANUP-CPS-NEXT: [[TMP9:%.*]] = fsub fast float 1.000000e+00, [[TMP8]] +; CLEANUP-CPS-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[HITATTRS_SROA_0_4_VEC_INSERT]], i32 1 +; CLEANUP-CPS-NEXT: [[TMP11:%.*]] = fsub fast float [[TMP9]], [[TMP10]] +; CLEANUP-CPS-NEXT: [[TMP12:%.*]] = insertelement <4 x float> undef, float [[TMP11]], i64 0 +; CLEANUP-CPS-NEXT: [[TMP13:%.*]] = insertelement <4 x float> [[TMP12]], float [[TMP8]], i64 1 +; CLEANUP-CPS-NEXT: [[TMP14:%.*]] = insertelement <4 x float> [[TMP13]], float [[TMP10]], i64 2 +; CLEANUP-CPS-NEXT: [[TMP15:%.*]] = insertelement <4 x float> [[TMP14]], float 1.000000e+00, i64 3 +; CLEANUP-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP15]], i32 0 +; CLEANUP-CPS-NEXT: [[TMP16:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; CLEANUP-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP15]], i32 1 +; CLEANUP-CPS-NEXT: [[TMP17:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; CLEANUP-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP15]], i32 2 +; CLEANUP-CPS-NEXT: [[TMP18:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 +; CLEANUP-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[TMP15]], i32 3 +; CLEANUP-CPS-NEXT: [[TMP19:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT10:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison, <3 x i32> [[SYSTEM_DATA_FCA_0_0_EXTRACT]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP16]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; CLEANUP-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT]], i32 [[TMP17]], 7 +; CLEANUP-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT]], i32 [[TMP18]], 8 +; CLEANUP-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT]], i32 [[TMP19]], 9 +; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 6, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT10]], [21 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]), !continuation.registercount [[META33]] +; CLEANUP-CPS-NEXT: unreachable +; +; +; CLEANUP-CPS-LABEL: define void @MyAnyHitShader( +; CLEANUP-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURNADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[SYSTEM_DATA:%.*]], [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] [[HIT_ATTRS:%.*]], [6 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META39]] !lgc.cps [[META41:![0-9]+]] !continuation [[META42:![0-9]+]] { +; CLEANUP-CPS-NEXT: AllocaSpillBB: +; CLEANUP-CPS-NEXT: [[TMP0:%.*]] = alloca [[STRUCT_HITDATA:%.*]], align 8 +; CLEANUP-CPS-NEXT: [[TMP1:%.*]] = alloca [[STRUCT_HITDATA]], align 8 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_TRAVERSALDATA]], align 8 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 0 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 1 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 2 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 3 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 4 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 5 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 6 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 7 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 8 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 9 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[SYSTEM_DATA]], 0, 0, 0 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_0_0_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; CLEANUP-CPS-NEXT: store <3 x i32> [[SYSTEM_DATA_FCA_0_0_0_EXTRACT]], ptr [[SYSTEM_DATA_FCA_0_0_0_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[SYSTEM_DATA]], 0, 1, 0 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; CLEANUP-CPS-NEXT: store <2 x float> [[SYSTEM_DATA_FCA_0_1_0_EXTRACT]], ptr [[SYSTEM_DATA_FCA_0_1_0_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[SYSTEM_DATA]], 1, 0 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_1_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; CLEANUP-CPS-NEXT: store float [[SYSTEM_DATA_FCA_1_0_EXTRACT]], ptr [[SYSTEM_DATA_FCA_1_0_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[SYSTEM_DATA]], 1, 1 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_1_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; CLEANUP-CPS-NEXT: store i32 [[SYSTEM_DATA_FCA_1_1_EXTRACT]], ptr [[SYSTEM_DATA_FCA_1_1_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[SYSTEM_DATA]], 2 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 2 +; CLEANUP-CPS-NEXT: store <3 x float> [[SYSTEM_DATA_FCA_2_EXTRACT]], ptr [[SYSTEM_DATA_FCA_2_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[SYSTEM_DATA]], 3 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 3 +; CLEANUP-CPS-NEXT: store <3 x float> [[SYSTEM_DATA_FCA_3_EXTRACT]], ptr [[SYSTEM_DATA_FCA_3_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_4_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[SYSTEM_DATA]], 4 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_4_GEP:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 4 +; CLEANUP-CPS-NEXT: store float [[SYSTEM_DATA_FCA_4_EXTRACT]], ptr [[SYSTEM_DATA_FCA_4_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_5_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[SYSTEM_DATA]], 5 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_5_GEP:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 5 +; CLEANUP-CPS-NEXT: store i64 [[SYSTEM_DATA_FCA_5_EXTRACT]], ptr [[SYSTEM_DATA_FCA_5_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[TMP2:%.*]] = bitcast i32 [[PAYLOAD_FCA_0_EXTRACT]] to float +; CLEANUP-CPS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0 +; CLEANUP-CPS-NEXT: [[TMP3:%.*]] = bitcast i32 [[PAYLOAD_FCA_7_EXTRACT]] to float +; CLEANUP-CPS-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP3]], i32 1 +; CLEANUP-CPS-NEXT: [[TMP4:%.*]] = bitcast i32 [[PAYLOAD_FCA_8_EXTRACT]] to float +; CLEANUP-CPS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP4]], i32 2 +; CLEANUP-CPS-NEXT: [[TMP5:%.*]] = bitcast i32 [[PAYLOAD_FCA_9_EXTRACT]] to float +; CLEANUP-CPS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP5]], i32 3 +; CLEANUP-CPS-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[ADDR_I:%.*]] = getelementptr [[STRUCT_SYSTEMDATA:%.*]], ptr [[TMP6]], i32 0, i32 1 +; CLEANUP-CPS-NEXT: [[VAL_I_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[ADDR_I]], i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[VAL_I_FCA_0_LOAD:%.*]] = load <2 x float>, ptr [[VAL_I_FCA_0_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[VAL_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[VAL_I_FCA_0_LOAD]], 0 +; CLEANUP-CPS-NEXT: [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[VAL_I_FCA_0_INSERT]], 0 +; CLEANUP-CPS-NEXT: [[DOTSROA_0100_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 0 +; CLEANUP-CPS-NEXT: [[TMP7:%.*]] = bitcast float [[DOTSROA_0100_0_VEC_EXTRACT]] to i32 +; CLEANUP-CPS-NEXT: [[DOTSROA_0100_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL_I_FCA_0_INSERT_FCA_0_EXTRACT]], i32 1 +; CLEANUP-CPS-NEXT: [[TMP8:%.*]] = bitcast float [[DOTSROA_0100_4_VEC_EXTRACT]] to i32 +; CLEANUP-CPS-NEXT: [[HIT_ATTRS_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[HIT_ATTRS]], 0 +; CLEANUP-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) +; CLEANUP-CPS-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[RESPTR_I3:%.*]] = getelementptr [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1 +; CLEANUP-CPS-NEXT: [[RES_I4_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[RESPTR_I3]], i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[RES_I4_FCA_0_LOAD:%.*]] = load float, ptr [[RES_I4_FCA_0_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[RES_I4_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] poison, float [[RES_I4_FCA_0_LOAD]], 0 +; CLEANUP-CPS-NEXT: [[RES_I4_FCA_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[RESPTR_I3]], i32 0, i32 1 +; CLEANUP-CPS-NEXT: [[RES_I4_FCA_1_LOAD:%.*]] = load i32, ptr [[RES_I4_FCA_1_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[RES_I4_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I4_FCA_0_INSERT]], i32 [[RES_I4_FCA_1_LOAD]], 1 +; CLEANUP-CPS-NEXT: [[RES_I4_FCA_1_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I4_FCA_1_INSERT]], 0 +; CLEANUP-CPS-NEXT: [[RES_I4_FCA_1_INSERT_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP0]], i32 0, i32 0 +; CLEANUP-CPS-NEXT: store float [[RES_I4_FCA_1_INSERT_FCA_0_EXTRACT]], ptr [[RES_I4_FCA_1_INSERT_FCA_0_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[RES_I4_FCA_1_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I4_FCA_1_INSERT]], 1 +; CLEANUP-CPS-NEXT: [[RES_I4_FCA_1_INSERT_FCA_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP0]], i32 0, i32 1 +; CLEANUP-CPS-NEXT: store i32 [[RES_I4_FCA_1_INSERT_FCA_1_EXTRACT]], ptr [[RES_I4_FCA_1_INSERT_FCA_1_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[TMP10:%.*]] = call <3 x float> @_cont_ObjectRayOrigin3(ptr [[TMP9]], ptr [[TMP0]]) +; CLEANUP-CPS-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x float> [[TMP10]], i8 0 +; CLEANUP-CPS-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[RESPTR_I:%.*]] = getelementptr [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1 +; CLEANUP-CPS-NEXT: [[RES_I_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[RESPTR_I]], i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[RES_I_FCA_0_LOAD:%.*]] = load float, ptr [[RES_I_FCA_0_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[RES_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] poison, float [[RES_I_FCA_0_LOAD]], 0 +; CLEANUP-CPS-NEXT: [[RES_I_FCA_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[RESPTR_I]], i32 0, i32 1 +; CLEANUP-CPS-NEXT: [[RES_I_FCA_1_LOAD:%.*]] = load i32, ptr [[RES_I_FCA_1_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[RES_I_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_0_INSERT]], i32 [[RES_I_FCA_1_LOAD]], 1 +; CLEANUP-CPS-NEXT: [[RES_I_FCA_1_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_1_INSERT]], 0 +; CLEANUP-CPS-NEXT: [[RES_I_FCA_1_INSERT_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP1]], i32 0, i32 0 +; CLEANUP-CPS-NEXT: store float [[RES_I_FCA_1_INSERT_FCA_0_EXTRACT]], ptr [[RES_I_FCA_1_INSERT_FCA_0_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[RES_I_FCA_1_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_1_INSERT]], 1 +; CLEANUP-CPS-NEXT: [[RES_I_FCA_1_INSERT_FCA_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[TMP1]], i32 0, i32 1 +; CLEANUP-CPS-NEXT: store i32 [[RES_I_FCA_1_INSERT_FCA_1_EXTRACT]], ptr [[RES_I_FCA_1_INSERT_FCA_1_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[TMP12:%.*]] = call <3 x float> @_cont_ObjectRayDirection3(ptr [[TMP11]], ptr [[TMP1]]) +; CLEANUP-CPS-NEXT: [[EXTRACT:%.*]] = extractelement <3 x float> [[TMP12]], i8 0 +; CLEANUP-CPS-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[RESPTR_I5:%.*]] = getelementptr [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1 +; CLEANUP-CPS-NEXT: [[RES_I6_FCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[RESPTR_I5]], i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[RES_I6_FCA_0_LOAD:%.*]] = load float, ptr [[RES_I6_FCA_0_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[RES_I6_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] poison, float [[RES_I6_FCA_0_LOAD]], 0 +; CLEANUP-CPS-NEXT: [[RES_I6_FCA_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_HITDATA]], ptr [[RESPTR_I5]], i32 0, i32 1 +; CLEANUP-CPS-NEXT: [[RES_I6_FCA_1_LOAD:%.*]] = load i32, ptr [[RES_I6_FCA_1_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[RES_I6_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I6_FCA_0_INSERT]], i32 [[RES_I6_FCA_1_LOAD]], 1 +; CLEANUP-CPS-NEXT: [[RES_I6_FCA_1_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I6_FCA_1_INSERT]], 0 +; CLEANUP-CPS-NEXT: [[RES_I6_FCA_1_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I6_FCA_1_INSERT]], 1 +; CLEANUP-CPS-NEXT: [[TMP14:%.*]] = fmul fast float [[RES_I6_FCA_1_INSERT_FCA_0_EXTRACT]], [[EXTRACT]] +; CLEANUP-CPS-NEXT: [[TMP15:%.*]] = fadd fast float [[TMP14]], [[EXTRACT1]] +; CLEANUP-CPS-NEXT: [[TMP16:%.*]] = fcmp fast ogt float [[TMP15]], 0.000000e+00 +; CLEANUP-CPS-NEXT: br i1 [[TMP16]], label [[TMP17:%.*]], label [[TMP28:%.*]] +; CLEANUP-CPS: 17: +; CLEANUP-CPS-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0 +; CLEANUP-CPS-NEXT: call void @_cont_AcceptHitAndEndSearch(ptr [[TMP18]]) +; CLEANUP-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 0 +; CLEANUP-CPS-NEXT: [[TMP19:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; CLEANUP-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 1 +; CLEANUP-CPS-NEXT: [[TMP20:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; CLEANUP-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 2 +; CLEANUP-CPS-NEXT: [[TMP21:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 +; CLEANUP-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 3 +; CLEANUP-CPS-NEXT: [[TMP22:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 +; CLEANUP-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[HIT_ATTRS_FCA_0_EXTRACT]], i32 0 +; CLEANUP-CPS-NEXT: [[TMP23:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT]] to i32 +; CLEANUP-CPS-NEXT: [[TMP24:%.*]] = bitcast i32 [[TMP23]] to float +; CLEANUP-CPS-NEXT: [[DOTSROA_0103_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP24]], i32 0 +; CLEANUP-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[HIT_ATTRS_FCA_0_EXTRACT]], i32 1 +; CLEANUP-CPS-NEXT: [[TMP25:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT]] to i32 +; CLEANUP-CPS-NEXT: [[TMP26:%.*]] = bitcast i32 [[TMP25]] to float +; CLEANUP-CPS-NEXT: [[DOTSROA_0103_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0103_0_VEC_INSERT]], float [[TMP26]], i32 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT102:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0103_4_VEC_INSERT]], 0 +; CLEANUP-CPS-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[ADDR_I1:%.*]] = getelementptr [[STRUCT_SYSTEMDATA]], ptr [[TMP27]], i32 0, i32 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT102]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[ADDR_I1]], i32 0, i32 0 +; CLEANUP-CPS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT]], ptr [[DOTFCA_0_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_0_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_0_0_LOAD:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_LOAD]], 0, 0, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_0_LOAD:%.*]] = load <2 x float>, ptr [[DOTFCA_0_1_0_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT]], <2 x float> [[DOTFCA_0_1_0_LOAD]], 0, 1, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_0_GEP:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_0_LOAD:%.*]] = load float, ptr [[DOTFCA_1_0_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], float [[DOTFCA_1_0_LOAD]], 1, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_1_GEP:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_1_LOAD:%.*]] = load i32, ptr [[DOTFCA_1_1_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], i32 [[DOTFCA_1_1_LOAD]], 1, 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_GEP:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_2_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_INSERT70:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], <3 x float> [[DOTFCA_2_LOAD]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_GEP:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_LOAD:%.*]] = load <3 x float>, ptr [[DOTFCA_3_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_INSERT71:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT70]], <3 x float> [[DOTFCA_3_LOAD]], 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_GEP:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_LOAD:%.*]] = load float, ptr [[DOTFCA_4_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_INSERT72:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT71]], float [[DOTFCA_4_LOAD]], 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_GEP:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_LOAD:%.*]] = load i64, ptr [[DOTFCA_5_GEP]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_INSERT73:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT72]], i64 [[DOTFCA_5_LOAD]], 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP19]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; CLEANUP-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT]], i32 [[TMP20]], 7 +; CLEANUP-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT]], i32 [[TMP21]], 8 +; CLEANUP-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT]], i32 [[TMP22]], 9 +; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 40, {} poison, i32 poison, i32 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT73]], [8 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]), !continuation.registercount [[META33]] +; CLEANUP-CPS-NEXT: unreachable +; CLEANUP-CPS: 28: +; CLEANUP-CPS-NEXT: call void @_cont_AcceptHit(ptr [[SYSTEM_DATA_ALLOCA]]) +; CLEANUP-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT16:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 0 +; CLEANUP-CPS-NEXT: [[TMP29:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT16]] to i32 +; CLEANUP-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT19:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 1 +; CLEANUP-CPS-NEXT: [[TMP30:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT19]] to i32 +; CLEANUP-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT22:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 2 +; CLEANUP-CPS-NEXT: [[TMP31:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT22]] to i32 +; CLEANUP-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT25:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 3 +; CLEANUP-CPS-NEXT: [[TMP32:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT25]] to i32 +; CLEANUP-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT10:%.*]] = extractelement <2 x float> [[HIT_ATTRS_FCA_0_EXTRACT]], i32 0 +; CLEANUP-CPS-NEXT: [[TMP33:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_0_VEC_EXTRACT10]] to i32 +; CLEANUP-CPS-NEXT: [[TMP34:%.*]] = bitcast i32 [[TMP33]] to float +; CLEANUP-CPS-NEXT: [[DOTSROA_0107_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP34]], i32 0 +; CLEANUP-CPS-NEXT: [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT12:%.*]] = extractelement <2 x float> [[HIT_ATTRS_FCA_0_EXTRACT]], i32 1 +; CLEANUP-CPS-NEXT: [[TMP35:%.*]] = bitcast float [[HITATTRSALLOCA_SROA_0_4_VEC_EXTRACT12]] to i32 +; CLEANUP-CPS-NEXT: [[TMP36:%.*]] = bitcast i32 [[TMP35]] to float +; CLEANUP-CPS-NEXT: [[DOTSROA_0107_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0107_0_VEC_INSERT]], float [[TMP36]], i32 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT106:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0107_4_VEC_INSERT]], 0 +; CLEANUP-CPS-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[ADDR_I2:%.*]] = getelementptr [[STRUCT_SYSTEMDATA]], ptr [[TMP37]], i32 0, i32 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_EXTRACT74:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT106]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_GEP75:%.*]] = getelementptr inbounds [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], ptr [[ADDR_I2]], i32 0, i32 0 +; CLEANUP-CPS-NEXT: store <2 x float> [[DOTFCA_0_EXTRACT74]], ptr [[DOTFCA_0_GEP75]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_0_0_GEP76:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_0_0_LOAD77:%.*]] = load <3 x i32>, ptr [[DOTFCA_0_0_0_GEP76]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_0_0_INSERT78:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_LOAD77]], 0, 0, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_0_GEP79:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0, i32 1, i32 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_0_LOAD80:%.*]] = load <2 x float>, ptr [[DOTFCA_0_1_0_GEP79]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_0_INSERT81:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT78]], <2 x float> [[DOTFCA_0_1_0_LOAD80]], 0, 1, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_0_GEP82:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_0_LOAD83:%.*]] = load float, ptr [[DOTFCA_1_0_GEP82]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_0_INSERT84:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT81]], float [[DOTFCA_1_0_LOAD83]], 1, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_1_GEP85:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 1, i32 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_1_LOAD86:%.*]] = load i32, ptr [[DOTFCA_1_1_GEP85]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_1_INSERT87:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_0_INSERT84]], i32 [[DOTFCA_1_1_LOAD86]], 1, 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_GEP88:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_LOAD89:%.*]] = load <3 x float>, ptr [[DOTFCA_2_GEP88]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_INSERT90:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_1_INSERT87]], <3 x float> [[DOTFCA_2_LOAD89]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_GEP91:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_LOAD92:%.*]] = load <3 x float>, ptr [[DOTFCA_3_GEP91]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_INSERT93:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT90]], <3 x float> [[DOTFCA_3_LOAD92]], 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_GEP94:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_LOAD95:%.*]] = load float, ptr [[DOTFCA_4_GEP94]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_INSERT96:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT93]], float [[DOTFCA_4_LOAD95]], 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_GEP97:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_LOAD98:%.*]] = load i64, ptr [[DOTFCA_5_GEP97]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_INSERT99:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT96]], i64 [[DOTFCA_5_LOAD98]], 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT28:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP29]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_INSERT31:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT28]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_INSERT34:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT31]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_INSERT37:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT34]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_INSERT40:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT37]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_INSERT43:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT40]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_6_INSERT46:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT43]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; CLEANUP-CPS-NEXT: [[DOTFCA_7_INSERT49:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT46]], i32 [[TMP30]], 7 +; CLEANUP-CPS-NEXT: [[DOTFCA_8_INSERT52:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT49]], i32 [[TMP31]], 8 +; CLEANUP-CPS-NEXT: [[DOTFCA_9_INSERT55:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT52]], i32 [[TMP32]], 9 +; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 40, {} poison, i32 poison, i32 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT99]], [8 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT55]]), !continuation.registercount [[META33]] +; CLEANUP-CPS-NEXT: unreachable +; +; +; CLEANUP-CPS-LABEL: define void @MyIntersectionShader( +; CLEANUP-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURNADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [8 x i32] [[PADDING:%.*]], [30 x i32] [[PAYLOAD:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META36]] !lgc.cps [[META43:![0-9]+]] !continuation [[META44:![0-9]+]] { +; CLEANUP-CPS-NEXT: AllocaSpillBB: +; CLEANUP-CPS-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) +; CLEANUP-CPS-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[MYINTERSECTIONSHADER_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 +; CLEANUP-CPS-NEXT: store i32 [[RETURNADDR]], ptr addrspace(32) [[RETURNADDR_SPILL_ADDR]], align 4 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 0 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 1 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 2 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 3 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 4 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 5 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 6 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 7 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 8 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 9 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_10_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 10 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_11_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 11 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_12_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 12 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_13_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 13 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_14_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 14 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_15_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 15 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_16_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 16 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_17_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 17 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_18_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 18 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_19_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 19 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_20_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 20 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_21_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 21 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_22_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 22 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_23_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 23 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_24_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 24 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_25_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 25 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_26_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 26 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_27_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 27 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_28_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 28 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_29_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 29 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[SYSTEM_DATA]], 0, 0, 0 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[SYSTEM_DATA]], 0, 1, 0 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[SYSTEM_DATA]], 1, 0 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[SYSTEM_DATA]], 1, 1 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[SYSTEM_DATA]], 2 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[SYSTEM_DATA]], 3 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_4_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[SYSTEM_DATA]], 4 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_5_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[SYSTEM_DATA]], 5 +; CLEANUP-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) +; CLEANUP-CPS-NEXT: [[RES_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA:%.*]] poison, float [[SYSTEM_DATA_FCA_1_0_EXTRACT]], 0 +; CLEANUP-CPS-NEXT: [[RES_I_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_0_INSERT]], i32 [[SYSTEM_DATA_FCA_1_1_EXTRACT]], 1 +; CLEANUP-CPS-NEXT: [[RES_I_FCA_1_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_1_INSERT]], 0 +; CLEANUP-CPS-NEXT: [[RES_I_FCA_1_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_1_INSERT]], 1 +; CLEANUP-CPS-NEXT: [[DOANYHIT_I:%.*]] = fcmp fast ogt float [[RES_I_FCA_1_INSERT_FCA_0_EXTRACT]], 0.000000e+00 +; CLEANUP-CPS-NEXT: br i1 [[DOANYHIT_I]], label [[ANYHIT_I:%.*]], label [[ACCEPTHIT_I:%.*]] +; CLEANUP-CPS: anyhit.i: +; CLEANUP-CPS-NEXT: [[TRAV_DATA_I_FCA_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_FCA_0_0_0_EXTRACT]], 0, 0, 0 +; CLEANUP-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_0_0_INSERT]], <2 x float> [[SYSTEM_DATA_FCA_0_1_0_EXTRACT]], 0, 1, 0 +; CLEANUP-CPS-NEXT: [[TRAV_DATA_I_FCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_0_INSERT]], float [[SYSTEM_DATA_FCA_1_0_EXTRACT]], 1, 0 +; CLEANUP-CPS-NEXT: [[TRAV_DATA_I_FCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_0_INSERT]], i32 [[SYSTEM_DATA_FCA_1_1_EXTRACT]], 1, 1 +; CLEANUP-CPS-NEXT: [[TRAV_DATA_I_FCA_2_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_1_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_2_EXTRACT]], 2 +; CLEANUP-CPS-NEXT: [[TRAV_DATA_I_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_2_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_3_EXTRACT]], 3 +; CLEANUP-CPS-NEXT: [[TRAV_DATA_I_FCA_4_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_3_INSERT]], float [[SYSTEM_DATA_FCA_4_EXTRACT]], 4 +; CLEANUP-CPS-NEXT: [[TRAV_DATA_I_FCA_5_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_4_INSERT]], i64 [[SYSTEM_DATA_FCA_5_EXTRACT]], 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT327:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> undef, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT5:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_INSERT8:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT5]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_INSERT11:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT8]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_INSERT14:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT11]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_INSERT17:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT14]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_INSERT20:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT17]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_6_INSERT23:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT20]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; CLEANUP-CPS-NEXT: [[DOTFCA_7_INSERT26:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT23]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; CLEANUP-CPS-NEXT: [[DOTFCA_8_INSERT29:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT26]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; CLEANUP-CPS-NEXT: [[DOTFCA_9_INSERT32:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT29]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; CLEANUP-CPS-NEXT: [[DOTFCA_10_INSERT35:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT32]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; CLEANUP-CPS-NEXT: [[DOTFCA_11_INSERT38:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT35]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; CLEANUP-CPS-NEXT: [[DOTFCA_12_INSERT41:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT38]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; CLEANUP-CPS-NEXT: [[DOTFCA_13_INSERT44:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT41]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; CLEANUP-CPS-NEXT: [[DOTFCA_14_INSERT47:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT44]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; CLEANUP-CPS-NEXT: [[DOTFCA_15_INSERT50:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT47]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; CLEANUP-CPS-NEXT: [[DOTFCA_16_INSERT53:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT50]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; CLEANUP-CPS-NEXT: [[DOTFCA_17_INSERT56:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT53]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; CLEANUP-CPS-NEXT: [[DOTFCA_18_INSERT59:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT56]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; CLEANUP-CPS-NEXT: [[DOTFCA_19_INSERT62:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT59]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; CLEANUP-CPS-NEXT: [[DOTFCA_20_INSERT65:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT62]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; CLEANUP-CPS-NEXT: [[DOTFCA_21_INSERT68:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT65]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; CLEANUP-CPS-NEXT: [[DOTFCA_22_INSERT71:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT68]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; CLEANUP-CPS-NEXT: [[DOTFCA_23_INSERT74:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT71]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; CLEANUP-CPS-NEXT: [[DOTFCA_24_INSERT77:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT74]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; CLEANUP-CPS-NEXT: [[DOTFCA_25_INSERT80:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT77]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; CLEANUP-CPS-NEXT: [[DOTFCA_26_INSERT83:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT80]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; CLEANUP-CPS-NEXT: [[DOTFCA_27_INSERT86:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT83]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; CLEANUP-CPS-NEXT: [[DOTFCA_28_INSERT89:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT86]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; CLEANUP-CPS-NEXT: [[DOTFCA_29_INSERT92:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT89]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 +; CLEANUP-CPS-NEXT: [[TMP0:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @MyIntersectionShader.resume.0) +; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 3, i32 16, {} poison, i64 [[TMP0]], i32 5, float [[RES_I_FCA_1_INSERT_FCA_0_EXTRACT]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT327]], [20 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT92]]), !continuation.registercount [[META32:![0-9]+]], !continuation.returnedRegistercount [[META32]] +; CLEANUP-CPS-NEXT: unreachable +; CLEANUP-CPS: accepthit.i: +; CLEANUP-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 0 +; CLEANUP-CPS-NEXT: [[TMP1:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; CLEANUP-CPS-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float +; CLEANUP-CPS-NEXT: [[DOTSROA_0330_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP2]], i32 0 +; CLEANUP-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 1 +; CLEANUP-CPS-NEXT: [[TMP3:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; CLEANUP-CPS-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to float +; CLEANUP-CPS-NEXT: [[DOTSROA_0330_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0330_0_VEC_INSERT]], float [[TMP4]], i32 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT329:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] poison, <2 x float> [[DOTSROA_0330_4_VEC_INSERT]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_EXTRACT289:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT329]], 0 +; CLEANUP-CPS-NEXT: [[ISEND_I:%.*]] = call i1 @opaqueIsEnd() +; CLEANUP-CPS-NEXT: br i1 [[ISEND_I]], label [[TMP5:%.*]], label [[TMP6:%.*]] +; CLEANUP-CPS: 5: +; CLEANUP-CPS-NEXT: [[DOTFCA_0_0_0_INSERT292:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_FCA_0_0_0_EXTRACT]], 0, 0, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_0_INSERT295:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT292]], <2 x float> [[DOTFCA_0_EXTRACT289]], 0, 1, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_0_INSERT298:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT295]], float [[SYSTEM_DATA_FCA_1_0_EXTRACT]], 1, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_1_INSERT301:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_0_INSERT298]], i32 [[SYSTEM_DATA_FCA_1_1_EXTRACT]], 1, 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_INSERT304:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_1_INSERT301]], <3 x float> [[SYSTEM_DATA_FCA_2_EXTRACT]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_INSERT307:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT304]], <3 x float> [[SYSTEM_DATA_FCA_3_EXTRACT]], 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_INSERT310:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT307]], float [[SYSTEM_DATA_FCA_4_EXTRACT]], 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_INSERT313:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT310]], i64 [[SYSTEM_DATA_FCA_5_EXTRACT]], 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT125:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_INSERT128:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT125]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_INSERT131:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT128]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_INSERT134:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT131]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_INSERT137:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT134]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_INSERT140:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT137]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_6_INSERT143:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT140]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; CLEANUP-CPS-NEXT: [[DOTFCA_7_INSERT146:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT143]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; CLEANUP-CPS-NEXT: [[DOTFCA_8_INSERT149:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT146]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; CLEANUP-CPS-NEXT: [[DOTFCA_9_INSERT152:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT149]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; CLEANUP-CPS-NEXT: [[DOTFCA_10_INSERT155:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT152]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; CLEANUP-CPS-NEXT: [[DOTFCA_11_INSERT158:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT155]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; CLEANUP-CPS-NEXT: [[DOTFCA_12_INSERT161:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT158]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; CLEANUP-CPS-NEXT: [[DOTFCA_13_INSERT164:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT161]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; CLEANUP-CPS-NEXT: [[DOTFCA_14_INSERT167:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT164]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; CLEANUP-CPS-NEXT: [[DOTFCA_15_INSERT170:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT167]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; CLEANUP-CPS-NEXT: [[DOTFCA_16_INSERT173:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT170]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; CLEANUP-CPS-NEXT: [[DOTFCA_17_INSERT176:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT173]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; CLEANUP-CPS-NEXT: [[DOTFCA_18_INSERT179:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT176]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; CLEANUP-CPS-NEXT: [[DOTFCA_19_INSERT182:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT179]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; CLEANUP-CPS-NEXT: [[DOTFCA_20_INSERT185:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT182]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; CLEANUP-CPS-NEXT: [[DOTFCA_21_INSERT188:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT185]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; CLEANUP-CPS-NEXT: [[DOTFCA_22_INSERT191:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT188]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; CLEANUP-CPS-NEXT: [[DOTFCA_23_INSERT194:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT191]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; CLEANUP-CPS-NEXT: [[DOTFCA_24_INSERT197:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT194]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; CLEANUP-CPS-NEXT: [[DOTFCA_25_INSERT200:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT197]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; CLEANUP-CPS-NEXT: [[DOTFCA_26_INSERT203:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT200]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; CLEANUP-CPS-NEXT: [[DOTFCA_27_INSERT206:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT203]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; CLEANUP-CPS-NEXT: [[DOTFCA_28_INSERT209:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT206]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; CLEANUP-CPS-NEXT: [[DOTFCA_29_INSERT212:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT209]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 +; CLEANUP-CPS-NEXT: call void @lgc.cps.free(i32 8) +; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 8, {} poison, i32 poison, i32 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT313]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT212]]), !continuation.registercount [[META32]] +; CLEANUP-CPS-NEXT: unreachable +; CLEANUP-CPS: 6: +; CLEANUP-CPS-NEXT: [[DOTFCA_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_FCA_0_0_0_EXTRACT]], 0, 0, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT]], <2 x float> [[DOTFCA_0_EXTRACT289]], 0, 1, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], float [[SYSTEM_DATA_FCA_1_0_EXTRACT]], 1, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], i32 [[SYSTEM_DATA_FCA_1_1_EXTRACT]], 1, 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_INSERT273:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_2_EXTRACT]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_INSERT274:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT273]], <3 x float> [[SYSTEM_DATA_FCA_3_EXTRACT]], 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_INSERT275:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT274]], float [[SYSTEM_DATA_FCA_4_EXTRACT]], 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_INSERT276:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT275]], i64 [[SYSTEM_DATA_FCA_5_EXTRACT]], 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; CLEANUP-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; CLEANUP-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; CLEANUP-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; CLEANUP-CPS-NEXT: [[DOTFCA_10_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; CLEANUP-CPS-NEXT: [[DOTFCA_11_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; CLEANUP-CPS-NEXT: [[DOTFCA_12_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; CLEANUP-CPS-NEXT: [[DOTFCA_13_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; CLEANUP-CPS-NEXT: [[DOTFCA_14_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; CLEANUP-CPS-NEXT: [[DOTFCA_15_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; CLEANUP-CPS-NEXT: [[DOTFCA_16_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; CLEANUP-CPS-NEXT: [[DOTFCA_17_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; CLEANUP-CPS-NEXT: [[DOTFCA_18_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; CLEANUP-CPS-NEXT: [[DOTFCA_19_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; CLEANUP-CPS-NEXT: [[DOTFCA_20_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; CLEANUP-CPS-NEXT: [[DOTFCA_21_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; CLEANUP-CPS-NEXT: [[DOTFCA_22_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; CLEANUP-CPS-NEXT: [[DOTFCA_23_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; CLEANUP-CPS-NEXT: [[DOTFCA_24_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; CLEANUP-CPS-NEXT: [[DOTFCA_25_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; CLEANUP-CPS-NEXT: [[DOTFCA_26_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; CLEANUP-CPS-NEXT: [[DOTFCA_27_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; CLEANUP-CPS-NEXT: [[DOTFCA_28_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; CLEANUP-CPS-NEXT: [[DOTFCA_29_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 +; CLEANUP-CPS-NEXT: call void @lgc.cps.free(i32 8) +; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 8, {} poison, i32 poison, i32 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT276]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]), !continuation.registercount [[META32]] +; CLEANUP-CPS-NEXT: unreachable +; +; +; CLEANUP-CPS-LABEL: define dso_local void @MyIntersectionShader.resume.0( +; CLEANUP-CPS-SAME: {} [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], { [[STRUCT_TRAVERSALDATA:%.*]], [8 x i32], [30 x i32] } [[TMP3:%.*]]) !lgc.rt.shaderstage [[META36]] !lgc.cps [[META43]] !continuation [[META44]] { +; CLEANUP-CPS-NEXT: entryresume.0: +; CLEANUP-CPS-NEXT: [[TMP4:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 8) +; CLEANUP-CPS-NEXT: [[TMP5:%.*]] = extractvalue { [[STRUCT_TRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP3]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_6_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 6 +; CLEANUP-CPS-NEXT: [[DOTFCA_7_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 7 +; CLEANUP-CPS-NEXT: [[DOTFCA_8_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 8 +; CLEANUP-CPS-NEXT: [[DOTFCA_9_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 9 +; CLEANUP-CPS-NEXT: [[DOTFCA_10_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 10 +; CLEANUP-CPS-NEXT: [[DOTFCA_11_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 11 +; CLEANUP-CPS-NEXT: [[DOTFCA_12_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 12 +; CLEANUP-CPS-NEXT: [[DOTFCA_13_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 13 +; CLEANUP-CPS-NEXT: [[DOTFCA_14_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 14 +; CLEANUP-CPS-NEXT: [[DOTFCA_15_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 15 +; CLEANUP-CPS-NEXT: [[DOTFCA_16_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 16 +; CLEANUP-CPS-NEXT: [[DOTFCA_17_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 17 +; CLEANUP-CPS-NEXT: [[DOTFCA_18_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 18 +; CLEANUP-CPS-NEXT: [[DOTFCA_19_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 19 +; CLEANUP-CPS-NEXT: [[DOTFCA_20_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 20 +; CLEANUP-CPS-NEXT: [[DOTFCA_21_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 21 +; CLEANUP-CPS-NEXT: [[DOTFCA_22_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 22 +; CLEANUP-CPS-NEXT: [[DOTFCA_23_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 23 +; CLEANUP-CPS-NEXT: [[DOTFCA_24_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 24 +; CLEANUP-CPS-NEXT: [[DOTFCA_25_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 25 +; CLEANUP-CPS-NEXT: [[DOTFCA_26_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 26 +; CLEANUP-CPS-NEXT: [[DOTFCA_27_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 27 +; CLEANUP-CPS-NEXT: [[DOTFCA_28_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 28 +; CLEANUP-CPS-NEXT: [[DOTFCA_29_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 29 +; CLEANUP-CPS-NEXT: [[TMP6:%.*]] = extractvalue { [[STRUCT_TRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP3]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP6]], 0, 0, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP6]], 0, 1, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP6]], 1, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP6]], 1, 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_EXTRACT281:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP6]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_EXTRACT283:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP6]], 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_EXTRACT285:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP6]], 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_EXTRACT287:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP6]], 5 +; CLEANUP-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; CLEANUP-CPS-NEXT: [[ISEND_I:%.*]] = call i1 @opaqueIsEnd() +; CLEANUP-CPS-NEXT: br i1 [[ISEND_I]], label [[TMP7:%.*]], label [[TMP8:%.*]] +; CLEANUP-CPS: 7: +; CLEANUP-CPS-NEXT: [[RETURNADDR_RELOAD_ADDR1:%.*]] = getelementptr inbounds [[MYINTERSECTIONSHADER_FRAME:%.*]], ptr addrspace(32) [[TMP4]], i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[RETURNADDR_RELOAD2:%.*]] = load i32, ptr addrspace(32) [[RETURNADDR_RELOAD_ADDR1]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_0_0_INSERT292:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_EXTRACT]], 0, 0, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_0_INSERT295:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT292]], <2 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_0_INSERT298:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT295]], float [[DOTFCA_1_0_EXTRACT]], 1, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_1_INSERT301:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_0_INSERT298]], i32 [[DOTFCA_1_1_EXTRACT]], 1, 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_INSERT304:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_1_INSERT301]], <3 x float> [[DOTFCA_2_EXTRACT281]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_INSERT307:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT304]], <3 x float> [[DOTFCA_3_EXTRACT283]], 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_INSERT310:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT307]], float [[DOTFCA_4_EXTRACT285]], 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_INSERT313:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT310]], i64 [[DOTFCA_5_EXTRACT287]], 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT125:%.*]] = insertvalue [30 x i32] poison, i32 [[DOTFCA_0_EXTRACT]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_INSERT128:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT125]], i32 [[DOTFCA_1_EXTRACT]], 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_INSERT131:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT128]], i32 [[DOTFCA_2_EXTRACT]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_INSERT134:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT131]], i32 [[DOTFCA_3_EXTRACT]], 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_INSERT137:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT134]], i32 [[DOTFCA_4_EXTRACT]], 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_INSERT140:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT137]], i32 [[DOTFCA_5_EXTRACT]], 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_6_INSERT143:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT140]], i32 [[DOTFCA_6_EXTRACT]], 6 +; CLEANUP-CPS-NEXT: [[DOTFCA_7_INSERT146:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT143]], i32 [[DOTFCA_7_EXTRACT]], 7 +; CLEANUP-CPS-NEXT: [[DOTFCA_8_INSERT149:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT146]], i32 [[DOTFCA_8_EXTRACT]], 8 +; CLEANUP-CPS-NEXT: [[DOTFCA_9_INSERT152:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT149]], i32 [[DOTFCA_9_EXTRACT]], 9 +; CLEANUP-CPS-NEXT: [[DOTFCA_10_INSERT155:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT152]], i32 [[DOTFCA_10_EXTRACT]], 10 +; CLEANUP-CPS-NEXT: [[DOTFCA_11_INSERT158:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT155]], i32 [[DOTFCA_11_EXTRACT]], 11 +; CLEANUP-CPS-NEXT: [[DOTFCA_12_INSERT161:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT158]], i32 [[DOTFCA_12_EXTRACT]], 12 +; CLEANUP-CPS-NEXT: [[DOTFCA_13_INSERT164:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT161]], i32 [[DOTFCA_13_EXTRACT]], 13 +; CLEANUP-CPS-NEXT: [[DOTFCA_14_INSERT167:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT164]], i32 [[DOTFCA_14_EXTRACT]], 14 +; CLEANUP-CPS-NEXT: [[DOTFCA_15_INSERT170:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT167]], i32 [[DOTFCA_15_EXTRACT]], 15 +; CLEANUP-CPS-NEXT: [[DOTFCA_16_INSERT173:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT170]], i32 [[DOTFCA_16_EXTRACT]], 16 +; CLEANUP-CPS-NEXT: [[DOTFCA_17_INSERT176:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT173]], i32 [[DOTFCA_17_EXTRACT]], 17 +; CLEANUP-CPS-NEXT: [[DOTFCA_18_INSERT179:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT176]], i32 [[DOTFCA_18_EXTRACT]], 18 +; CLEANUP-CPS-NEXT: [[DOTFCA_19_INSERT182:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT179]], i32 [[DOTFCA_19_EXTRACT]], 19 +; CLEANUP-CPS-NEXT: [[DOTFCA_20_INSERT185:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT182]], i32 [[DOTFCA_20_EXTRACT]], 20 +; CLEANUP-CPS-NEXT: [[DOTFCA_21_INSERT188:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT185]], i32 [[DOTFCA_21_EXTRACT]], 21 +; CLEANUP-CPS-NEXT: [[DOTFCA_22_INSERT191:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT188]], i32 [[DOTFCA_22_EXTRACT]], 22 +; CLEANUP-CPS-NEXT: [[DOTFCA_23_INSERT194:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT191]], i32 [[DOTFCA_23_EXTRACT]], 23 +; CLEANUP-CPS-NEXT: [[DOTFCA_24_INSERT197:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT194]], i32 [[DOTFCA_24_EXTRACT]], 24 +; CLEANUP-CPS-NEXT: [[DOTFCA_25_INSERT200:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT197]], i32 [[DOTFCA_25_EXTRACT]], 25 +; CLEANUP-CPS-NEXT: [[DOTFCA_26_INSERT203:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT200]], i32 [[DOTFCA_26_EXTRACT]], 26 +; CLEANUP-CPS-NEXT: [[DOTFCA_27_INSERT206:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT203]], i32 [[DOTFCA_27_EXTRACT]], 27 +; CLEANUP-CPS-NEXT: [[DOTFCA_28_INSERT209:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT206]], i32 [[DOTFCA_28_EXTRACT]], 28 +; CLEANUP-CPS-NEXT: [[DOTFCA_29_INSERT212:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT209]], i32 [[DOTFCA_29_EXTRACT]], 29 +; CLEANUP-CPS-NEXT: call void @lgc.cps.free(i32 8) +; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR_RELOAD2]], i32 8, {} poison, i32 poison, i32 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT313]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT212]]), !continuation.registercount [[META32]] +; CLEANUP-CPS-NEXT: unreachable +; CLEANUP-CPS: 8: +; CLEANUP-CPS-NEXT: [[RETURNADDR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[MYINTERSECTIONSHADER_FRAME]], ptr addrspace(32) [[TMP4]], i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i32, ptr addrspace(32) [[RETURNADDR_RELOAD_ADDR]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_EXTRACT]], 0, 0, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT]], <2 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], float [[DOTFCA_1_0_EXTRACT]], 1, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], i32 [[DOTFCA_1_1_EXTRACT]], 1, 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_INSERT273:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], <3 x float> [[DOTFCA_2_EXTRACT281]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_INSERT274:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT273]], <3 x float> [[DOTFCA_3_EXTRACT283]], 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_INSERT275:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT274]], float [[DOTFCA_4_EXTRACT285]], 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_INSERT276:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT275]], i64 [[DOTFCA_5_EXTRACT287]], 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [30 x i32] poison, i32 [[DOTFCA_0_EXTRACT]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT]], i32 [[DOTFCA_1_EXTRACT]], 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT]], i32 [[DOTFCA_2_EXTRACT]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT]], i32 [[DOTFCA_3_EXTRACT]], 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT]], i32 [[DOTFCA_4_EXTRACT]], 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT]], i32 [[DOTFCA_5_EXTRACT]], 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT]], i32 [[DOTFCA_6_EXTRACT]], 6 +; CLEANUP-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT]], i32 [[DOTFCA_7_EXTRACT]], 7 +; CLEANUP-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT]], i32 [[DOTFCA_8_EXTRACT]], 8 +; CLEANUP-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT]], i32 [[DOTFCA_9_EXTRACT]], 9 +; CLEANUP-CPS-NEXT: [[DOTFCA_10_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT]], i32 [[DOTFCA_10_EXTRACT]], 10 +; CLEANUP-CPS-NEXT: [[DOTFCA_11_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT]], i32 [[DOTFCA_11_EXTRACT]], 11 +; CLEANUP-CPS-NEXT: [[DOTFCA_12_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT]], i32 [[DOTFCA_12_EXTRACT]], 12 +; CLEANUP-CPS-NEXT: [[DOTFCA_13_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT]], i32 [[DOTFCA_13_EXTRACT]], 13 +; CLEANUP-CPS-NEXT: [[DOTFCA_14_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT]], i32 [[DOTFCA_14_EXTRACT]], 14 +; CLEANUP-CPS-NEXT: [[DOTFCA_15_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT]], i32 [[DOTFCA_15_EXTRACT]], 15 +; CLEANUP-CPS-NEXT: [[DOTFCA_16_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT]], i32 [[DOTFCA_16_EXTRACT]], 16 +; CLEANUP-CPS-NEXT: [[DOTFCA_17_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT]], i32 [[DOTFCA_17_EXTRACT]], 17 +; CLEANUP-CPS-NEXT: [[DOTFCA_18_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT]], i32 [[DOTFCA_18_EXTRACT]], 18 +; CLEANUP-CPS-NEXT: [[DOTFCA_19_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT]], i32 [[DOTFCA_19_EXTRACT]], 19 +; CLEANUP-CPS-NEXT: [[DOTFCA_20_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT]], i32 [[DOTFCA_20_EXTRACT]], 20 +; CLEANUP-CPS-NEXT: [[DOTFCA_21_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT]], i32 [[DOTFCA_21_EXTRACT]], 21 +; CLEANUP-CPS-NEXT: [[DOTFCA_22_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT]], i32 [[DOTFCA_22_EXTRACT]], 22 +; CLEANUP-CPS-NEXT: [[DOTFCA_23_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT]], i32 [[DOTFCA_23_EXTRACT]], 23 +; CLEANUP-CPS-NEXT: [[DOTFCA_24_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT]], i32 [[DOTFCA_24_EXTRACT]], 24 +; CLEANUP-CPS-NEXT: [[DOTFCA_25_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT]], i32 [[DOTFCA_25_EXTRACT]], 25 +; CLEANUP-CPS-NEXT: [[DOTFCA_26_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT]], i32 [[DOTFCA_26_EXTRACT]], 26 +; CLEANUP-CPS-NEXT: [[DOTFCA_27_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT]], i32 [[DOTFCA_27_EXTRACT]], 27 +; CLEANUP-CPS-NEXT: [[DOTFCA_28_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT]], i32 [[DOTFCA_28_EXTRACT]], 28 +; CLEANUP-CPS-NEXT: [[DOTFCA_29_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT]], i32 [[DOTFCA_29_EXTRACT]], 29 +; CLEANUP-CPS-NEXT: call void @lgc.cps.free(i32 8) +; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR_RELOAD]], i32 8, {} poison, i32 poison, i32 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT276]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]), !continuation.registercount [[META32]] +; CLEANUP-CPS-NEXT: unreachable +; +; +; CLEANUP-CPS-LABEL: define void @MyIntersectionShaderLargeAttrs( +; CLEANUP-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURNADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [8 x i32] [[PADDING:%.*]], [30 x i32] [[PAYLOAD:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META36]] !lgc.cps [[META43]] !continuation [[META45:![0-9]+]] { +; CLEANUP-CPS-NEXT: AllocaSpillBB: +; CLEANUP-CPS-NEXT: [[CONT_STATE_STACK_SEGMENT:%.*]] = call ptr addrspace(32) @lgc.cps.alloc(i32 8) +; CLEANUP-CPS-NEXT: [[RETURNADDR_SPILL_ADDR:%.*]] = getelementptr inbounds [[MYINTERSECTIONSHADERLARGEATTRS_FRAME:%.*]], ptr addrspace(32) [[CONT_STATE_STACK_SEGMENT]], i32 0, i32 0 +; CLEANUP-CPS-NEXT: store i32 [[RETURNADDR]], ptr addrspace(32) [[RETURNADDR_SPILL_ADDR]], align 4 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 0 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 1 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 2 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 3 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 4 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 5 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 6 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 7 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 8 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 9 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_10_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 10 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_11_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 11 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_12_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 12 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_13_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 13 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_14_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 14 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_15_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 15 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_16_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 16 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_17_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 17 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_18_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 18 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_19_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 19 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_20_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 20 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_21_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 21 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_22_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 22 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_23_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 23 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_24_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 24 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_25_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 25 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_26_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 26 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_27_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 27 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_28_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 28 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_29_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 29 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[SYSTEM_DATA]], 0, 0, 0 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[SYSTEM_DATA]], 0, 1, 0 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[SYSTEM_DATA]], 1, 0 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[SYSTEM_DATA]], 1, 1 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_2_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[SYSTEM_DATA]], 2 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_3_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[SYSTEM_DATA]], 3 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_4_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[SYSTEM_DATA]], 4 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_5_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[SYSTEM_DATA]], 5 +; CLEANUP-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) +; CLEANUP-CPS-NEXT: [[RES_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA:%.*]] poison, float [[SYSTEM_DATA_FCA_1_0_EXTRACT]], 0 +; CLEANUP-CPS-NEXT: [[RES_I_FCA_1_INSERT:%.*]] = insertvalue [[STRUCT_HITDATA]] [[RES_I_FCA_0_INSERT]], i32 [[SYSTEM_DATA_FCA_1_1_EXTRACT]], 1 +; CLEANUP-CPS-NEXT: [[RES_I_FCA_1_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_1_INSERT]], 0 +; CLEANUP-CPS-NEXT: [[RES_I_FCA_1_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_HITDATA]] [[RES_I_FCA_1_INSERT]], 1 +; CLEANUP-CPS-NEXT: [[DOANYHIT_I:%.*]] = fcmp fast ogt float [[RES_I_FCA_1_INSERT_FCA_0_EXTRACT]], 0.000000e+00 +; CLEANUP-CPS-NEXT: br i1 [[DOANYHIT_I]], label [[ANYHIT_I:%.*]], label [[ACCEPTHIT_I:%.*]] +; CLEANUP-CPS: anyhit.i: +; CLEANUP-CPS-NEXT: [[TRAV_DATA_I_FCA_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_FCA_0_0_0_EXTRACT]], 0, 0, 0 +; CLEANUP-CPS-NEXT: [[TRAV_DATA_I_FCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_0_0_INSERT]], <2 x float> [[SYSTEM_DATA_FCA_0_1_0_EXTRACT]], 0, 1, 0 +; CLEANUP-CPS-NEXT: [[TRAV_DATA_I_FCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_0_1_0_INSERT]], float [[SYSTEM_DATA_FCA_1_0_EXTRACT]], 1, 0 +; CLEANUP-CPS-NEXT: [[TRAV_DATA_I_FCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_0_INSERT]], i32 [[SYSTEM_DATA_FCA_1_1_EXTRACT]], 1, 1 +; CLEANUP-CPS-NEXT: [[TRAV_DATA_I_FCA_2_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_1_1_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_2_EXTRACT]], 2 +; CLEANUP-CPS-NEXT: [[TRAV_DATA_I_FCA_3_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_2_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_3_EXTRACT]], 3 +; CLEANUP-CPS-NEXT: [[TRAV_DATA_I_FCA_4_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_3_INSERT]], float [[SYSTEM_DATA_FCA_4_EXTRACT]], 4 +; CLEANUP-CPS-NEXT: [[TRAV_DATA_I_FCA_5_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I_FCA_4_INSERT]], i64 [[SYSTEM_DATA_FCA_5_EXTRACT]], 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_LARGEINTERSECTIONATTRIBUTES:%.*]] poison, i32 100, 0, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_LARGEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_0_INSERT]], i32 101, 0, 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_LARGEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_1_INSERT]], i32 102, 0, 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_LARGEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_2_INSERT]], i32 103, 0, 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_4_INSERT:%.*]] = insertvalue [[STRUCT_LARGEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_3_INSERT]], i32 104, 0, 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_5_INSERT:%.*]] = insertvalue [[STRUCT_LARGEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_4_INSERT]], i32 105, 0, 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_6_INSERT:%.*]] = insertvalue [[STRUCT_LARGEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_5_INSERT]], i32 106, 0, 6 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT5:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_INSERT8:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT5]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_INSERT11:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT8]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_INSERT14:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT11]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_INSERT17:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT14]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_INSERT20:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT17]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_6_INSERT23:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT20]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; CLEANUP-CPS-NEXT: [[DOTFCA_7_INSERT26:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT23]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; CLEANUP-CPS-NEXT: [[DOTFCA_8_INSERT29:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT26]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; CLEANUP-CPS-NEXT: [[DOTFCA_9_INSERT32:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT29]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; CLEANUP-CPS-NEXT: [[DOTFCA_10_INSERT35:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT32]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; CLEANUP-CPS-NEXT: [[DOTFCA_11_INSERT38:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT35]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; CLEANUP-CPS-NEXT: [[DOTFCA_12_INSERT41:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT38]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; CLEANUP-CPS-NEXT: [[DOTFCA_13_INSERT44:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT41]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; CLEANUP-CPS-NEXT: [[DOTFCA_14_INSERT47:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT44]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; CLEANUP-CPS-NEXT: [[DOTFCA_15_INSERT50:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT47]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; CLEANUP-CPS-NEXT: [[DOTFCA_16_INSERT53:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT50]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; CLEANUP-CPS-NEXT: [[DOTFCA_17_INSERT56:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT53]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; CLEANUP-CPS-NEXT: [[DOTFCA_18_INSERT59:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT56]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; CLEANUP-CPS-NEXT: [[DOTFCA_19_INSERT62:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT59]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; CLEANUP-CPS-NEXT: [[DOTFCA_20_INSERT65:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT62]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; CLEANUP-CPS-NEXT: [[DOTFCA_21_INSERT68:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT65]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; CLEANUP-CPS-NEXT: [[DOTFCA_22_INSERT71:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT68]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; CLEANUP-CPS-NEXT: [[DOTFCA_23_INSERT74:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT71]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; CLEANUP-CPS-NEXT: [[DOTFCA_24_INSERT77:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT74]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; CLEANUP-CPS-NEXT: [[DOTFCA_25_INSERT80:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT77]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; CLEANUP-CPS-NEXT: [[DOTFCA_26_INSERT83:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT80]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; CLEANUP-CPS-NEXT: [[DOTFCA_27_INSERT86:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT83]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; CLEANUP-CPS-NEXT: [[DOTFCA_28_INSERT89:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT86]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; CLEANUP-CPS-NEXT: [[DOTFCA_29_INSERT92:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT89]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 +; CLEANUP-CPS-NEXT: [[TMP0:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @MyIntersectionShaderLargeAttrs.resume.0) +; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 3, i32 16, {} poison, i64 [[TMP0]], i32 5, float [[RES_I_FCA_1_INSERT_FCA_0_EXTRACT]], i32 0, [[STRUCT_LARGEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_6_INSERT]], [15 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT92]]), !continuation.registercount [[META32]], !continuation.returnedRegistercount [[META32]] +; CLEANUP-CPS-NEXT: unreachable +; CLEANUP-CPS: accepthit.i: +; CLEANUP-CPS-NEXT: [[TMP1:%.*]] = bitcast i32 100 to float +; CLEANUP-CPS-NEXT: [[DOTSROA_0335_0_VEC_INSERT:%.*]] = insertelement <2 x float> undef, float [[TMP1]], i32 0 +; CLEANUP-CPS-NEXT: [[TMP2:%.*]] = bitcast i32 101 to float +; CLEANUP-CPS-NEXT: [[DOTSROA_0335_4_VEC_INSERT:%.*]] = insertelement <2 x float> [[DOTSROA_0335_0_VEC_INSERT]], float [[TMP2]], i32 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT334:%.*]] = insertvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]] poison, <2 x float> [[DOTSROA_0335_4_VEC_INSERT]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_EXTRACT289:%.*]] = extractvalue [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT334]], 0 +; CLEANUP-CPS-NEXT: [[ISEND_I:%.*]] = call i1 @opaqueIsEnd() +; CLEANUP-CPS-NEXT: br i1 [[ISEND_I]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; CLEANUP-CPS: 3: +; CLEANUP-CPS-NEXT: [[DOTFCA_0_0_0_INSERT292:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_FCA_0_0_0_EXTRACT]], 0, 0, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_0_INSERT295:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT292]], <2 x float> [[DOTFCA_0_EXTRACT289]], 0, 1, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_0_INSERT298:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT295]], float [[SYSTEM_DATA_FCA_1_0_EXTRACT]], 1, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_1_INSERT301:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_0_INSERT298]], i32 [[SYSTEM_DATA_FCA_1_1_EXTRACT]], 1, 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_INSERT304:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_1_INSERT301]], <3 x float> [[SYSTEM_DATA_FCA_2_EXTRACT]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_INSERT307:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT304]], <3 x float> [[SYSTEM_DATA_FCA_3_EXTRACT]], 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_INSERT310:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT307]], float [[SYSTEM_DATA_FCA_4_EXTRACT]], 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_INSERT313:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT310]], i64 [[SYSTEM_DATA_FCA_5_EXTRACT]], 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT125:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_INSERT128:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT125]], i32 102, 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_INSERT131:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT128]], i32 103, 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_INSERT134:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT131]], i32 104, 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_INSERT137:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT134]], i32 105, 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_INSERT140:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT137]], i32 106, 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_6_INSERT143:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT140]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; CLEANUP-CPS-NEXT: [[DOTFCA_7_INSERT146:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT143]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; CLEANUP-CPS-NEXT: [[DOTFCA_8_INSERT149:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT146]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; CLEANUP-CPS-NEXT: [[DOTFCA_9_INSERT152:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT149]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; CLEANUP-CPS-NEXT: [[DOTFCA_10_INSERT155:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT152]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; CLEANUP-CPS-NEXT: [[DOTFCA_11_INSERT158:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT155]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; CLEANUP-CPS-NEXT: [[DOTFCA_12_INSERT161:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT158]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; CLEANUP-CPS-NEXT: [[DOTFCA_13_INSERT164:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT161]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; CLEANUP-CPS-NEXT: [[DOTFCA_14_INSERT167:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT164]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; CLEANUP-CPS-NEXT: [[DOTFCA_15_INSERT170:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT167]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; CLEANUP-CPS-NEXT: [[DOTFCA_16_INSERT173:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT170]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; CLEANUP-CPS-NEXT: [[DOTFCA_17_INSERT176:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT173]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; CLEANUP-CPS-NEXT: [[DOTFCA_18_INSERT179:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT176]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; CLEANUP-CPS-NEXT: [[DOTFCA_19_INSERT182:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT179]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; CLEANUP-CPS-NEXT: [[DOTFCA_20_INSERT185:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT182]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; CLEANUP-CPS-NEXT: [[DOTFCA_21_INSERT188:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT185]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; CLEANUP-CPS-NEXT: [[DOTFCA_22_INSERT191:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT188]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; CLEANUP-CPS-NEXT: [[DOTFCA_23_INSERT194:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT191]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; CLEANUP-CPS-NEXT: [[DOTFCA_24_INSERT197:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT194]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; CLEANUP-CPS-NEXT: [[DOTFCA_25_INSERT200:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT197]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; CLEANUP-CPS-NEXT: [[DOTFCA_26_INSERT203:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT200]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; CLEANUP-CPS-NEXT: [[DOTFCA_27_INSERT206:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT203]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; CLEANUP-CPS-NEXT: [[DOTFCA_28_INSERT209:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT206]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; CLEANUP-CPS-NEXT: [[DOTFCA_29_INSERT212:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT209]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 +; CLEANUP-CPS-NEXT: call void @lgc.cps.free(i32 8) +; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 8, {} poison, i32 poison, i32 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT313]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT212]]), !continuation.registercount [[META32]] +; CLEANUP-CPS-NEXT: unreachable +; CLEANUP-CPS: 4: +; CLEANUP-CPS-NEXT: [[DOTFCA_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_FCA_0_0_0_EXTRACT]], 0, 0, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT]], <2 x float> [[DOTFCA_0_EXTRACT289]], 0, 1, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], float [[SYSTEM_DATA_FCA_1_0_EXTRACT]], 1, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], i32 [[SYSTEM_DATA_FCA_1_1_EXTRACT]], 1, 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_INSERT273:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], <3 x float> [[SYSTEM_DATA_FCA_2_EXTRACT]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_INSERT274:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT273]], <3 x float> [[SYSTEM_DATA_FCA_3_EXTRACT]], 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_INSERT275:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT274]], float [[SYSTEM_DATA_FCA_4_EXTRACT]], 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_INSERT276:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT275]], i64 [[SYSTEM_DATA_FCA_5_EXTRACT]], 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT]], i32 102, 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT]], i32 103, 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT]], i32 104, 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT]], i32 105, 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT]], i32 106, 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; CLEANUP-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; CLEANUP-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; CLEANUP-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; CLEANUP-CPS-NEXT: [[DOTFCA_10_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; CLEANUP-CPS-NEXT: [[DOTFCA_11_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; CLEANUP-CPS-NEXT: [[DOTFCA_12_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; CLEANUP-CPS-NEXT: [[DOTFCA_13_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; CLEANUP-CPS-NEXT: [[DOTFCA_14_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; CLEANUP-CPS-NEXT: [[DOTFCA_15_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; CLEANUP-CPS-NEXT: [[DOTFCA_16_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; CLEANUP-CPS-NEXT: [[DOTFCA_17_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; CLEANUP-CPS-NEXT: [[DOTFCA_18_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; CLEANUP-CPS-NEXT: [[DOTFCA_19_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; CLEANUP-CPS-NEXT: [[DOTFCA_20_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; CLEANUP-CPS-NEXT: [[DOTFCA_21_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; CLEANUP-CPS-NEXT: [[DOTFCA_22_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; CLEANUP-CPS-NEXT: [[DOTFCA_23_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; CLEANUP-CPS-NEXT: [[DOTFCA_24_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; CLEANUP-CPS-NEXT: [[DOTFCA_25_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; CLEANUP-CPS-NEXT: [[DOTFCA_26_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; CLEANUP-CPS-NEXT: [[DOTFCA_27_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; CLEANUP-CPS-NEXT: [[DOTFCA_28_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; CLEANUP-CPS-NEXT: [[DOTFCA_29_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 +; CLEANUP-CPS-NEXT: call void @lgc.cps.free(i32 8) +; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 8, {} poison, i32 poison, i32 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT276]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]), !continuation.registercount [[META32]] +; CLEANUP-CPS-NEXT: unreachable +; +; +; CLEANUP-CPS-LABEL: define dso_local void @MyIntersectionShaderLargeAttrs.resume.0( +; CLEANUP-CPS-SAME: {} [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], { [[STRUCT_TRAVERSALDATA:%.*]], [8 x i32], [30 x i32] } [[TMP3:%.*]]) !lgc.rt.shaderstage [[META36]] !lgc.cps [[META43]] !continuation [[META45]] { +; CLEANUP-CPS-NEXT: entryresume.0: +; CLEANUP-CPS-NEXT: [[TMP4:%.*]] = call ptr addrspace(32) @lgc.cps.peek(i32 8) +; CLEANUP-CPS-NEXT: [[TMP5:%.*]] = extractvalue { [[STRUCT_TRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP3]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_6_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 6 +; CLEANUP-CPS-NEXT: [[DOTFCA_7_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 7 +; CLEANUP-CPS-NEXT: [[DOTFCA_8_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 8 +; CLEANUP-CPS-NEXT: [[DOTFCA_9_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 9 +; CLEANUP-CPS-NEXT: [[DOTFCA_10_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 10 +; CLEANUP-CPS-NEXT: [[DOTFCA_11_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 11 +; CLEANUP-CPS-NEXT: [[DOTFCA_12_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 12 +; CLEANUP-CPS-NEXT: [[DOTFCA_13_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 13 +; CLEANUP-CPS-NEXT: [[DOTFCA_14_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 14 +; CLEANUP-CPS-NEXT: [[DOTFCA_15_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 15 +; CLEANUP-CPS-NEXT: [[DOTFCA_16_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 16 +; CLEANUP-CPS-NEXT: [[DOTFCA_17_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 17 +; CLEANUP-CPS-NEXT: [[DOTFCA_18_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 18 +; CLEANUP-CPS-NEXT: [[DOTFCA_19_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 19 +; CLEANUP-CPS-NEXT: [[DOTFCA_20_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 20 +; CLEANUP-CPS-NEXT: [[DOTFCA_21_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 21 +; CLEANUP-CPS-NEXT: [[DOTFCA_22_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 22 +; CLEANUP-CPS-NEXT: [[DOTFCA_23_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 23 +; CLEANUP-CPS-NEXT: [[DOTFCA_24_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 24 +; CLEANUP-CPS-NEXT: [[DOTFCA_25_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 25 +; CLEANUP-CPS-NEXT: [[DOTFCA_26_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 26 +; CLEANUP-CPS-NEXT: [[DOTFCA_27_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 27 +; CLEANUP-CPS-NEXT: [[DOTFCA_28_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 28 +; CLEANUP-CPS-NEXT: [[DOTFCA_29_EXTRACT:%.*]] = extractvalue [30 x i32] [[TMP5]], 29 +; CLEANUP-CPS-NEXT: [[TMP6:%.*]] = extractvalue { [[STRUCT_TRAVERSALDATA]], [8 x i32], [30 x i32] } [[TMP3]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP6]], 0, 0, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP6]], 0, 1, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP6]], 1, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_1_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP6]], 1, 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_EXTRACT281:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP6]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_EXTRACT283:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP6]], 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_EXTRACT285:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP6]], 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_EXTRACT287:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP6]], 5 +; CLEANUP-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 5) +; CLEANUP-CPS-NEXT: [[ISEND_I:%.*]] = call i1 @opaqueIsEnd() +; CLEANUP-CPS-NEXT: br i1 [[ISEND_I]], label [[TMP7:%.*]], label [[TMP8:%.*]] +; CLEANUP-CPS: 7: +; CLEANUP-CPS-NEXT: [[RETURNADDR_RELOAD_ADDR5:%.*]] = getelementptr inbounds [[MYINTERSECTIONSHADERLARGEATTRS_FRAME:%.*]], ptr addrspace(32) [[TMP4]], i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[RETURNADDR_RELOAD6:%.*]] = load i32, ptr addrspace(32) [[RETURNADDR_RELOAD_ADDR5]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_0_0_INSERT292:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_EXTRACT]], 0, 0, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_0_INSERT295:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT292]], <2 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_0_INSERT298:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT295]], float [[DOTFCA_1_0_EXTRACT]], 1, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_1_INSERT301:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_0_INSERT298]], i32 [[DOTFCA_1_1_EXTRACT]], 1, 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_INSERT304:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_1_INSERT301]], <3 x float> [[DOTFCA_2_EXTRACT281]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_INSERT307:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT304]], <3 x float> [[DOTFCA_3_EXTRACT283]], 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_INSERT310:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT307]], float [[DOTFCA_4_EXTRACT285]], 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_INSERT313:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT310]], i64 [[DOTFCA_5_EXTRACT287]], 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT125:%.*]] = insertvalue [30 x i32] poison, i32 [[DOTFCA_0_EXTRACT]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_INSERT128:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT125]], i32 [[DOTFCA_1_EXTRACT]], 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_INSERT131:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT128]], i32 [[DOTFCA_2_EXTRACT]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_INSERT134:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT131]], i32 [[DOTFCA_3_EXTRACT]], 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_INSERT137:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT134]], i32 [[DOTFCA_4_EXTRACT]], 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_INSERT140:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT137]], i32 [[DOTFCA_5_EXTRACT]], 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_6_INSERT143:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT140]], i32 [[DOTFCA_6_EXTRACT]], 6 +; CLEANUP-CPS-NEXT: [[DOTFCA_7_INSERT146:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT143]], i32 [[DOTFCA_7_EXTRACT]], 7 +; CLEANUP-CPS-NEXT: [[DOTFCA_8_INSERT149:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT146]], i32 [[DOTFCA_8_EXTRACT]], 8 +; CLEANUP-CPS-NEXT: [[DOTFCA_9_INSERT152:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT149]], i32 [[DOTFCA_9_EXTRACT]], 9 +; CLEANUP-CPS-NEXT: [[DOTFCA_10_INSERT155:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT152]], i32 [[DOTFCA_10_EXTRACT]], 10 +; CLEANUP-CPS-NEXT: [[DOTFCA_11_INSERT158:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT155]], i32 [[DOTFCA_11_EXTRACT]], 11 +; CLEANUP-CPS-NEXT: [[DOTFCA_12_INSERT161:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT158]], i32 [[DOTFCA_12_EXTRACT]], 12 +; CLEANUP-CPS-NEXT: [[DOTFCA_13_INSERT164:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT161]], i32 [[DOTFCA_13_EXTRACT]], 13 +; CLEANUP-CPS-NEXT: [[DOTFCA_14_INSERT167:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT164]], i32 [[DOTFCA_14_EXTRACT]], 14 +; CLEANUP-CPS-NEXT: [[DOTFCA_15_INSERT170:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT167]], i32 [[DOTFCA_15_EXTRACT]], 15 +; CLEANUP-CPS-NEXT: [[DOTFCA_16_INSERT173:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT170]], i32 [[DOTFCA_16_EXTRACT]], 16 +; CLEANUP-CPS-NEXT: [[DOTFCA_17_INSERT176:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT173]], i32 [[DOTFCA_17_EXTRACT]], 17 +; CLEANUP-CPS-NEXT: [[DOTFCA_18_INSERT179:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT176]], i32 [[DOTFCA_18_EXTRACT]], 18 +; CLEANUP-CPS-NEXT: [[DOTFCA_19_INSERT182:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT179]], i32 [[DOTFCA_19_EXTRACT]], 19 +; CLEANUP-CPS-NEXT: [[DOTFCA_20_INSERT185:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT182]], i32 [[DOTFCA_20_EXTRACT]], 20 +; CLEANUP-CPS-NEXT: [[DOTFCA_21_INSERT188:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT185]], i32 [[DOTFCA_21_EXTRACT]], 21 +; CLEANUP-CPS-NEXT: [[DOTFCA_22_INSERT191:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT188]], i32 [[DOTFCA_22_EXTRACT]], 22 +; CLEANUP-CPS-NEXT: [[DOTFCA_23_INSERT194:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT191]], i32 [[DOTFCA_23_EXTRACT]], 23 +; CLEANUP-CPS-NEXT: [[DOTFCA_24_INSERT197:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT194]], i32 [[DOTFCA_24_EXTRACT]], 24 +; CLEANUP-CPS-NEXT: [[DOTFCA_25_INSERT200:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT197]], i32 [[DOTFCA_25_EXTRACT]], 25 +; CLEANUP-CPS-NEXT: [[DOTFCA_26_INSERT203:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT200]], i32 [[DOTFCA_26_EXTRACT]], 26 +; CLEANUP-CPS-NEXT: [[DOTFCA_27_INSERT206:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT203]], i32 [[DOTFCA_27_EXTRACT]], 27 +; CLEANUP-CPS-NEXT: [[DOTFCA_28_INSERT209:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT206]], i32 [[DOTFCA_28_EXTRACT]], 28 +; CLEANUP-CPS-NEXT: [[DOTFCA_29_INSERT212:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT209]], i32 [[DOTFCA_29_EXTRACT]], 29 +; CLEANUP-CPS-NEXT: call void @lgc.cps.free(i32 8) +; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR_RELOAD6]], i32 8, {} poison, i32 poison, i32 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT313]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT212]]), !continuation.registercount [[META32]] +; CLEANUP-CPS-NEXT: unreachable +; CLEANUP-CPS: 8: +; CLEANUP-CPS-NEXT: [[RETURNADDR_RELOAD_ADDR:%.*]] = getelementptr inbounds [[MYINTERSECTIONSHADERLARGEATTRS_FRAME]], ptr addrspace(32) [[TMP4]], i32 0, i32 0 +; CLEANUP-CPS-NEXT: [[RETURNADDR_RELOAD:%.*]] = load i32, ptr addrspace(32) [[RETURNADDR_RELOAD_ADDR]], align 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[DOTFCA_0_0_0_EXTRACT]], 0, 0, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT]], <2 x float> [[DOTFCA_0_1_0_EXTRACT]], 0, 1, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_0_INSERT]], float [[DOTFCA_1_0_EXTRACT]], 1, 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_1_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_0_INSERT]], i32 [[DOTFCA_1_1_EXTRACT]], 1, 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_INSERT273:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_1_1_INSERT]], <3 x float> [[DOTFCA_2_EXTRACT281]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_INSERT274:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_2_INSERT273]], <3 x float> [[DOTFCA_3_EXTRACT283]], 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_INSERT275:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_3_INSERT274]], float [[DOTFCA_4_EXTRACT285]], 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_INSERT276:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_4_INSERT275]], i64 [[DOTFCA_5_EXTRACT287]], 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [30 x i32] poison, i32 [[DOTFCA_0_EXTRACT]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT]], i32 [[DOTFCA_1_EXTRACT]], 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT]], i32 [[DOTFCA_2_EXTRACT]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT]], i32 [[DOTFCA_3_EXTRACT]], 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT]], i32 [[DOTFCA_4_EXTRACT]], 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT]], i32 [[DOTFCA_5_EXTRACT]], 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT]], i32 [[DOTFCA_6_EXTRACT]], 6 +; CLEANUP-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT]], i32 [[DOTFCA_7_EXTRACT]], 7 +; CLEANUP-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT]], i32 [[DOTFCA_8_EXTRACT]], 8 +; CLEANUP-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT]], i32 [[DOTFCA_9_EXTRACT]], 9 +; CLEANUP-CPS-NEXT: [[DOTFCA_10_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT]], i32 [[DOTFCA_10_EXTRACT]], 10 +; CLEANUP-CPS-NEXT: [[DOTFCA_11_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT]], i32 [[DOTFCA_11_EXTRACT]], 11 +; CLEANUP-CPS-NEXT: [[DOTFCA_12_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT]], i32 [[DOTFCA_12_EXTRACT]], 12 +; CLEANUP-CPS-NEXT: [[DOTFCA_13_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT]], i32 [[DOTFCA_13_EXTRACT]], 13 +; CLEANUP-CPS-NEXT: [[DOTFCA_14_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT]], i32 [[DOTFCA_14_EXTRACT]], 14 +; CLEANUP-CPS-NEXT: [[DOTFCA_15_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT]], i32 [[DOTFCA_15_EXTRACT]], 15 +; CLEANUP-CPS-NEXT: [[DOTFCA_16_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT]], i32 [[DOTFCA_16_EXTRACT]], 16 +; CLEANUP-CPS-NEXT: [[DOTFCA_17_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT]], i32 [[DOTFCA_17_EXTRACT]], 17 +; CLEANUP-CPS-NEXT: [[DOTFCA_18_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT]], i32 [[DOTFCA_18_EXTRACT]], 18 +; CLEANUP-CPS-NEXT: [[DOTFCA_19_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT]], i32 [[DOTFCA_19_EXTRACT]], 19 +; CLEANUP-CPS-NEXT: [[DOTFCA_20_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT]], i32 [[DOTFCA_20_EXTRACT]], 20 +; CLEANUP-CPS-NEXT: [[DOTFCA_21_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT]], i32 [[DOTFCA_21_EXTRACT]], 21 +; CLEANUP-CPS-NEXT: [[DOTFCA_22_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT]], i32 [[DOTFCA_22_EXTRACT]], 22 +; CLEANUP-CPS-NEXT: [[DOTFCA_23_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT]], i32 [[DOTFCA_23_EXTRACT]], 23 +; CLEANUP-CPS-NEXT: [[DOTFCA_24_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT]], i32 [[DOTFCA_24_EXTRACT]], 24 +; CLEANUP-CPS-NEXT: [[DOTFCA_25_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT]], i32 [[DOTFCA_25_EXTRACT]], 25 +; CLEANUP-CPS-NEXT: [[DOTFCA_26_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT]], i32 [[DOTFCA_26_EXTRACT]], 26 +; CLEANUP-CPS-NEXT: [[DOTFCA_27_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT]], i32 [[DOTFCA_27_EXTRACT]], 27 +; CLEANUP-CPS-NEXT: [[DOTFCA_28_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT]], i32 [[DOTFCA_28_EXTRACT]], 28 +; CLEANUP-CPS-NEXT: [[DOTFCA_29_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT]], i32 [[DOTFCA_29_EXTRACT]], 29 +; CLEANUP-CPS-NEXT: call void @lgc.cps.free(i32 8) +; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR_RELOAD]], i32 8, {} poison, i32 poison, i32 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT276]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]), !continuation.registercount [[META32]] +; CLEANUP-CPS-NEXT: unreachable +; +; +; CLEANUP-CPS-LABEL: define void @MyMissShader( +; CLEANUP-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURNADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_SYSTEMDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [19 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) #[[ATTR3]] !lgc.rt.shaderstage [[META41]] !lgc.cps [[META39]] !continuation [[META46:![0-9]+]] { +; CLEANUP-CPS-NEXT: AllocaSpillBB: +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 0 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 1 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 2 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 3 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 4 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 5 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 6 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 7 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 8 +; CLEANUP-CPS-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [10 x i32] [[PAYLOAD]], 9 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[SYSTEM_DATA]], 0, 0 +; CLEANUP-CPS-NEXT: [[SYSTEM_DATA_FCA_1_0_EXTRACT:%.*]] = extractvalue [[STRUCT_SYSTEMDATA]] [[SYSTEM_DATA]], 1, 0 +; CLEANUP-CPS-NEXT: [[TMP0:%.*]] = bitcast i32 [[PAYLOAD_FCA_0_EXTRACT]] to float +; CLEANUP-CPS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> undef, float [[TMP0]], i32 0 +; CLEANUP-CPS-NEXT: [[TMP1:%.*]] = bitcast i32 [[PAYLOAD_FCA_7_EXTRACT]] to float +; CLEANUP-CPS-NEXT: [[DOTSROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_0_VEC_INSERT]], float [[TMP1]], i32 1 +; CLEANUP-CPS-NEXT: [[TMP2:%.*]] = bitcast i32 [[PAYLOAD_FCA_8_EXTRACT]] to float +; CLEANUP-CPS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP2]], i32 2 +; CLEANUP-CPS-NEXT: [[TMP3:%.*]] = bitcast i32 [[PAYLOAD_FCA_9_EXTRACT]] to float +; CLEANUP-CPS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP3]], i32 3 +; CLEANUP-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 [[SHADER_INDEX]]) +; CLEANUP-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 0 +; CLEANUP-CPS-NEXT: [[TMP4:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; CLEANUP-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 1 +; CLEANUP-CPS-NEXT: [[TMP5:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 +; CLEANUP-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 2 +; CLEANUP-CPS-NEXT: [[TMP6:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 +; CLEANUP-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> , i32 3 +; CLEANUP-CPS-NEXT: [[TMP7:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT9:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison, <3 x i32> [[SYSTEM_DATA_FCA_0_0_EXTRACT]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP4]], 0 +; CLEANUP-CPS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; CLEANUP-CPS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; CLEANUP-CPS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; CLEANUP-CPS-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_3_INSERT]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; CLEANUP-CPS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; CLEANUP-CPS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; CLEANUP-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT]], i32 [[TMP5]], 7 +; CLEANUP-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT]], i32 [[TMP6]], 8 +; CLEANUP-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT]], i32 [[TMP7]], 9 +; CLEANUP-CPS-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 6, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT9]], [21 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]), !continuation.registercount [[META33]] +; CLEANUP-CPS-NEXT: unreachable +; +; ; DXILCONTPOSTPROCESS-CPS-LABEL: define i1 @_cont_IsEndSearch( ; DXILCONTPOSTPROCESS-CPS-SAME: ptr [[DATA:%.*]]) #[[ATTR0:[0-9]+]] { ; DXILCONTPOSTPROCESS-CPS-NEXT: [[ISEND:%.*]] = call i1 @opaqueIsEnd() @@ -2868,12 +5015,12 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; ; ; DXILCONTPOSTPROCESS-CPS-LABEL: define void @_cont_KernelEntry( -; DXILCONTPOSTPROCESS-CPS-SAME: ) #[[ATTR0]] !lgc.rt.shaderstage [[META35:![0-9]+]] !continuation.registercount [[META22:![0-9]+]] { +; DXILCONTPOSTPROCESS-CPS-SAME: ) #[[ATTR0]] !lgc.rt.shaderstage [[META35:![0-9]+]] { ; DXILCONTPOSTPROCESS-CPS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; DXILCONTPOSTPROCESS-CPS-NEXT: [[CSPINIT:%.*]] = ptrtoint ptr @debug_global to i32 ; DXILCONTPOSTPROCESS-CPS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 0, i32 [[TMP1]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison), !continuation.registercount [[META22]] +; DXILCONTPOSTPROCESS-CPS-NEXT: call void (...) @lgc.ilcps.continue(i64 0, i32 [[TMP1]], i64 undef, [[STRUCT_DISPATCHSYSTEMDATA:%.*]] poison) ; DXILCONTPOSTPROCESS-CPS-NEXT: ret void ; ; @@ -2892,31 +5039,31 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; ; ; DXILCONTPOSTPROCESS-CPS-LABEL: define void @MyRayGen( -; DXILCONTPOSTPROCESS-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[CSPINIT:%.*]], i32 [[RETURNADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3:[0-9]+]] !lgc.rt.shaderstage [[META22]] !lgc.cps [[META36:![0-9]+]] !continuation [[META37:![0-9]+]] { +; DXILCONTPOSTPROCESS-CPS-SAME: {} [[CONT_STATE:%.*]], i32 [[CSPINIT:%.*]], i32 [[RETURNADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) #[[ATTR3:[0-9]+]] !lgc.rt.shaderstage [[META22:![0-9]+]] !lgc.cps [[META36:![0-9]+]] !continuation [[META37:![0-9]+]] { ; DXILCONTPOSTPROCESS-CPS-NEXT: AllocaSpillBB: ; DXILCONTPOSTPROCESS-CPS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; DXILCONTPOSTPROCESS-CPS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT20:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 ; DXILCONTPOSTPROCESS-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) -; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP1:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 -; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP2:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 -; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP3:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP1]]) -; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP4:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP3]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) -; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP5:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP4]]) +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP4:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP5:%.*]] = load [[DX_TYPES_HANDLE]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP3:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP4]]) +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP7:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP3]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 16, i32 0 }) +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP8:%.*]] = call i64 @amd.dx.getAccelStructAddr([[DX_TYPES_HANDLE]] [[TMP7]]) ; DXILCONTPOSTPROCESS-CPS-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, <3 x i32> [[DOTFCA_0_EXTRACT20]], 0 ; DXILCONTPOSTPROCESS-CPS-NEXT: [[SYS_DATA_I:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] undef, [[STRUCT_DISPATCHSYSTEMDATA]] [[DIS_DATA_I_FCA_0_INSERT]], 0 ; DXILCONTPOSTPROCESS-CPS-NEXT: [[TRAV_DATA_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA:%.*]] undef, [[STRUCT_SYSTEMDATA]] [[SYS_DATA_I]], 0 -; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP8:%.*]] = call i64 @continuation.getAddrAndMD(ptr @MyRayGen.resume.0) -; DXILCONTPOSTPROCESS-CPS-NEXT: [[TRAV_DATA2_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], i64 [[TMP8]], 5 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP9:%.*]] = call i64 @continuation.getAddrAndMD(ptr @MyRayGen.resume.0) +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TRAV_DATA2_I:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA_I]], i64 [[TMP9]], 5 ; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 0 -; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP7:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP10:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT]] to i32 ; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 1 ; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP11:%.*]] = bitcast float [[DOTSROA_0_4_VEC_EXTRACT]] to i32 ; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 2 -; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP9:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP12:%.*]] = bitcast float [[DOTSROA_0_8_VEC_EXTRACT]] to i32 ; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x float> zeroinitializer, i32 3 -; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP10:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 -; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP7]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP13:%.*]] = bitcast float [[DOTSROA_0_12_VEC_EXTRACT]] to i32 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [10 x i32] poison, i32 [[TMP10]], 0 ; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_0_INSERT]], i32 undef, 1 ; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_1_INSERT]], i32 undef, 2 ; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_2_INSERT]], i32 undef, 3 @@ -2924,10 +5071,10 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_4_INSERT]], i32 undef, 5 ; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_5_INSERT]], i32 undef, 6 ; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT]], i32 [[TMP11]], 7 -; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT]], i32 [[TMP9]], 8 -; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT]], i32 [[TMP10]], 9 -; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP13:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-CPS-NEXT: call void (i64, i64, ...) @continuation.waitContinue(i64 4, i64 -1, i32 [[TMP13]], i64 [[TMP8]], i32 5, [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]], [6 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]), !continuation.registercount [[META33:![0-9]+]], !continuation.returnedRegistercount [[META33]] +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT]], i32 [[TMP12]], 8 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT]], i32 [[TMP13]], 9 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP14:%.*]] = load i32, ptr [[CSP]], align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: call void (...) @lgc.ilcps.continue(i64 4, i32 [[TMP14]], i64 -1, i64 [[TMP9]], i32 5, [[STRUCT_TRAVERSALDATA]] [[TRAV_DATA2_I]], [6 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]) ; DXILCONTPOSTPROCESS-CPS-NEXT: unreachable ; ; @@ -2938,18 +5085,17 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-CPS-NEXT: [[CSP:%.*]] = alloca i32, align 4 ; DXILCONTPOSTPROCESS-CPS-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-CPS-NEXT: store { [[STRUCT_DISPATCHSYSTEMDATA]], [21 x i32], [10 x i32] } [[TMP3]], ptr [[TMP4]], align 4 -; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP5:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [21 x i32], [10 x i32] } [[TMP3]], 2 -; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 0 -; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 1 -; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_2_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 2 -; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_3_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 3 -; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_4_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 4 -; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_5_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 5 -; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_6_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 6 -; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_7_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 7 -; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_8_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 8 -; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_9_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP5]], 9 -; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP6:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [21 x i32], [10 x i32] } [[TMP3]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP6:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [21 x i32], [10 x i32] } [[TMP3]], 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP6]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP6]], 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_2_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP6]], 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_3_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP6]], 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_4_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP6]], 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_5_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP6]], 5 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_6_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP6]], 6 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_7_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP6]], 7 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_8_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP6]], 8 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_9_EXTRACT:%.*]] = extractvalue [10 x i32] [[TMP6]], 9 ; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP7:%.*]] = bitcast i32 [[DOTFCA_0_EXTRACT]] to float ; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x float> poison, float [[TMP7]], i32 0 ; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP8:%.*]] = bitcast i32 [[DOTFCA_7_EXTRACT]] to float @@ -2958,22 +5104,23 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_4_VEC_INSERT]], float [[TMP9]], i32 2 ; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP10:%.*]] = bitcast i32 [[DOTFCA_9_EXTRACT]] to float ; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[DOTSROA_0_8_VEC_INSERT]], float [[TMP10]], i32 3 -; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT21:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP6]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP11:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [21 x i32], [10 x i32] } [[TMP3]], 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_EXTRACT21:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP11]], 0 ; DXILCONTPOSTPROCESS-CPS-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) -; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP11:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 -; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP12:%.*]] = getelementptr inbounds { [[STRUCT_DISPATCHSYSTEMDATA]], [21 x i32], [10 x i32] }, ptr [[TMP4]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP13:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[TMP12]]) -; DXILCONTPOSTPROCESS-CPS-NEXT: [[EXTRACT:%.*]] = extractelement <3 x i32> [[TMP13]], i8 0 -; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP14:%.*]] = getelementptr inbounds { [[STRUCT_DISPATCHSYSTEMDATA]], [21 x i32], [10 x i32] }, ptr [[TMP4]], i32 0, i32 0 -; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP15:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[TMP14]]) -; DXILCONTPOSTPROCESS-CPS-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x i32> [[TMP15]], i8 1 -; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP16:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP11]]) -; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP17:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP16]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 4098, i32 1033 }) -; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP18:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 0 -; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP19:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 1 -; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP20:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 2 -; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP21:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 3 -; DXILCONTPOSTPROCESS-CPS-NEXT: call void @dx.op.textureStore.f32(i32 67, [[DX_TYPES_HANDLE]] [[TMP17]], i32 [[EXTRACT]], i32 [[EXTRACT1]], i32 undef, float [[TMP18]], float [[TMP19]], float [[TMP20]], float [[TMP21]], i8 15) +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP12:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?RenderTarget@@3V?$RWTexture2D@V?$vector@M$03@@@@A", align 4 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP13:%.*]] = getelementptr inbounds { [[STRUCT_DISPATCHSYSTEMDATA]], [21 x i32], [10 x i32] }, ptr [[TMP4]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP14:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[TMP13]]) +; DXILCONTPOSTPROCESS-CPS-NEXT: [[EXTRACT:%.*]] = extractelement <3 x i32> [[TMP14]], i8 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP15:%.*]] = getelementptr inbounds { [[STRUCT_DISPATCHSYSTEMDATA]], [21 x i32], [10 x i32] }, ptr [[TMP4]], i32 0, i32 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP16:%.*]] = call <3 x i32> @_cont_DispatchRaysIndex3(ptr [[TMP15]]) +; DXILCONTPOSTPROCESS-CPS-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x i32> [[TMP16]], i8 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP17:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 160, [[DX_TYPES_HANDLE]] [[TMP12]]) +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP18:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](i32 216, [[DX_TYPES_HANDLE]] [[TMP17]], [[DX_TYPES_RESOURCEPROPERTIES:%.*]] { i32 4098, i32 1033 }) +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP19:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 0 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP20:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 1 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP21:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 2 +; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP22:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i64 3 +; DXILCONTPOSTPROCESS-CPS-NEXT: call void @dx.op.textureStore.f32(i32 67, [[DX_TYPES_HANDLE]] [[TMP18]], i32 [[EXTRACT]], i32 [[EXTRACT1]], i32 undef, float [[TMP19]], float [[TMP20]], float [[TMP21]], float [[TMP22]], i8 15) ; DXILCONTPOSTPROCESS-CPS-NEXT: ret void ; ; @@ -3040,12 +5187,9 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT]], i32 [[TMP17]], 7 ; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT]], i32 [[TMP18]], 8 ; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT]], i32 [[TMP19]], 9 -; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP20:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP21:%.*]] = add i32 [[TMP20]], 0 -; DXILCONTPOSTPROCESS-CPS-NEXT: store i32 [[TMP21]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP22:%.*]] = zext i32 [[RETURNADDR]] to i64 ; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP23:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP22]], i32 [[TMP23]], i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT10]], [21 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]), !continuation.registercount [[META33]] +; DXILCONTPOSTPROCESS-CPS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[TMP22]], i32 [[TMP23]], i64 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT10]], [21 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]) ; DXILCONTPOSTPROCESS-CPS-NEXT: unreachable ; ; @@ -3216,14 +5360,11 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT]], i32 [[TMP20]], 7 ; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT]], i32 [[TMP21]], 8 ; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT]], i32 [[TMP22]], 9 -; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP28:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP29:%.*]] = add i32 [[TMP28]], 0 -; DXILCONTPOSTPROCESS-CPS-NEXT: store i32 [[TMP29]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP30:%.*]] = zext i32 [[RETURNADDR]] to i64 ; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP31:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP30]], i32 [[TMP31]], i32 poison, i32 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT72]], [8 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]), !continuation.registercount [[META33]] +; DXILCONTPOSTPROCESS-CPS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[TMP30]], i32 [[TMP31]], i64 poison, i32 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT72]], [8 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]) ; DXILCONTPOSTPROCESS-CPS-NEXT: unreachable -; DXILCONTPOSTPROCESS-CPS: 32: +; DXILCONTPOSTPROCESS-CPS: 30: ; DXILCONTPOSTPROCESS-CPS-NEXT: call void @_cont_AcceptHit(ptr [[SYSTEM_DATA_ALLOCA]]) ; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT14:%.*]] = extractelement <4 x float> [[DOTSROA_0_12_VEC_INSERT]], i32 0 ; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP33:%.*]] = bitcast float [[DOTSROA_0_0_VEC_EXTRACT14]] to i32 @@ -3281,12 +5422,9 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_7_INSERT48:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT45]], i32 [[TMP34]], 7 ; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_8_INSERT51:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT48]], i32 [[TMP35]], 8 ; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_9_INSERT54:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT51]], i32 [[TMP36]], 9 -; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP42:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP43:%.*]] = add i32 [[TMP42]], 0 -; DXILCONTPOSTPROCESS-CPS-NEXT: store i32 [[TMP43]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP44:%.*]] = zext i32 [[RETURNADDR]] to i64 ; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP45:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP44]], i32 [[TMP45]], i32 poison, i32 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT98]], [8 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT54]]), !continuation.registercount [[META33]] +; DXILCONTPOSTPROCESS-CPS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[TMP44]], i32 [[TMP45]], i64 poison, i32 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT98]], [8 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT54]]) ; DXILCONTPOSTPROCESS-CPS-NEXT: unreachable ; ; @@ -3388,7 +5526,7 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_29_INSERT91:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT88]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 ; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP5:%.*]] = call i64 @continuation.getAddrAndMD(ptr @MyIntersectionShader.resume.0) ; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP4:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 3, i32 [[TMP4]], i64 [[TMP5]], i32 5, float [[RES_I_FCA_1_INSERT_FCA_0_EXTRACT]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT326]], [20 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT91]]), !continuation.registercount [[META32:![0-9]+]], !continuation.returnedRegistercount [[META32]] +; DXILCONTPOSTPROCESS-CPS-NEXT: call void (...) @lgc.ilcps.continue(i64 3, i32 [[TMP4]], i64 [[TMP5]], i32 5, float [[RES_I_FCA_1_INSERT_FCA_0_EXTRACT]], i32 0, [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_INSERT326]], [20 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT91]]) ; DXILCONTPOSTPROCESS-CPS-NEXT: unreachable ; DXILCONTPOSTPROCESS-CPS: accepthit.i: ; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTSROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> undef, i32 0 @@ -3447,7 +5585,7 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-CPS-NEXT: store i32 [[TMP12]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP13:%.*]] = zext i32 [[RETURNADDR]] to i64 ; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP14:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP13]], i32 [[TMP14]], i32 poison, i32 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT312]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT211]]), !continuation.registercount [[META32]] +; DXILCONTPOSTPROCESS-CPS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[TMP13]], i32 [[TMP14]], i64 poison, i32 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT312]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT211]]) ; DXILCONTPOSTPROCESS-CPS-NEXT: unreachable ; DXILCONTPOSTPROCESS-CPS: 15: ; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_FCA_0_0_0_EXTRACT]], 0, 0, 0 @@ -3493,7 +5631,7 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-CPS-NEXT: store i32 [[TMP17]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP18:%.*]] = zext i32 [[RETURNADDR]] to i64 ; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP19:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP18]], i32 [[TMP19]], i32 poison, i32 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT275]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]), !continuation.registercount [[META32]] +; DXILCONTPOSTPROCESS-CPS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[TMP18]], i32 [[TMP19]], i64 poison, i32 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT275]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]) ; DXILCONTPOSTPROCESS-CPS-NEXT: unreachable ; ; @@ -3594,7 +5732,7 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-CPS-NEXT: store i32 [[TMP12]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP13:%.*]] = zext i32 [[RETURN_ADDR_RELOAD2]] to i64 ; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP14:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP13]], i32 [[TMP14]], i32 poison, i32 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT312]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT211]]), !continuation.registercount [[META32]] +; DXILCONTPOSTPROCESS-CPS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[TMP13]], i32 [[TMP14]], i64 poison, i32 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT312]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT211]]) ; DXILCONTPOSTPROCESS-CPS-NEXT: unreachable ; DXILCONTPOSTPROCESS-CPS: 15: ; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP16:%.*]] = inttoptr i32 [[TMP5]] to ptr addrspace(21) @@ -3643,7 +5781,7 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-CPS-NEXT: store i32 [[TMP19]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP20:%.*]] = zext i32 [[RETURN_ADDR_RELOAD]] to i64 ; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP21:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP20]], i32 [[TMP21]], i32 poison, i32 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT275]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]), !continuation.registercount [[META32]] +; DXILCONTPOSTPROCESS-CPS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[TMP20]], i32 [[TMP21]], i64 poison, i32 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT275]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]) ; DXILCONTPOSTPROCESS-CPS-NEXT: unreachable ; ; @@ -3751,7 +5889,7 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_29_INSERT91:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT88]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 ; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP5:%.*]] = call i64 @continuation.getAddrAndMD(ptr @MyIntersectionShaderLargeAttrs.resume.0) ; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP4:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 3, i32 [[TMP4]], i64 [[TMP5]], i32 5, float [[RES_I_FCA_1_INSERT_FCA_0_EXTRACT]], i32 0, [[STRUCT_LARGEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_6_INSERT]], [15 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT91]]), !continuation.registercount [[META32]], !continuation.returnedRegistercount [[META32]] +; DXILCONTPOSTPROCESS-CPS-NEXT: call void (...) @lgc.ilcps.continue(i64 3, i32 [[TMP4]], i64 [[TMP5]], i32 5, float [[RES_I_FCA_1_INSERT_FCA_0_EXTRACT]], i32 0, [[STRUCT_LARGEINTERSECTIONATTRIBUTES]] [[DOTFCA_0_6_INSERT]], [15 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT91]]) ; DXILCONTPOSTPROCESS-CPS-NEXT: unreachable ; DXILCONTPOSTPROCESS-CPS: accepthit.i: ; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP6:%.*]] = bitcast i32 100 to float @@ -3806,7 +5944,7 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-CPS-NEXT: store i32 [[TMP10]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP11:%.*]] = zext i32 [[RETURNADDR]] to i64 ; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP12:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP11]], i32 [[TMP12]], i32 poison, i32 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT312]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT211]]), !continuation.registercount [[META32]] +; DXILCONTPOSTPROCESS-CPS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[TMP11]], i32 [[TMP12]], i64 poison, i32 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT312]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT211]]) ; DXILCONTPOSTPROCESS-CPS-NEXT: unreachable ; DXILCONTPOSTPROCESS-CPS: 13: ; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, <3 x i32> [[SYSTEM_DATA_FCA_0_0_0_EXTRACT]], 0, 0, 0 @@ -3852,7 +5990,7 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-CPS-NEXT: store i32 [[TMP15]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP16:%.*]] = zext i32 [[RETURNADDR]] to i64 ; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP17:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP16]], i32 [[TMP17]], i32 poison, i32 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT275]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]), !continuation.registercount [[META32]] +; DXILCONTPOSTPROCESS-CPS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[TMP16]], i32 [[TMP17]], i64 poison, i32 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT275]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]) ; DXILCONTPOSTPROCESS-CPS-NEXT: unreachable ; ; @@ -3953,7 +6091,7 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-CPS-NEXT: store i32 [[TMP12]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP13:%.*]] = zext i32 [[RETURN_ADDR_RELOAD6]] to i64 ; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP14:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP13]], i32 [[TMP14]], i32 poison, i32 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT312]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT211]]), !continuation.registercount [[META32]] +; DXILCONTPOSTPROCESS-CPS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[TMP13]], i32 [[TMP14]], i64 poison, i32 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT312]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT211]]) ; DXILCONTPOSTPROCESS-CPS-NEXT: unreachable ; DXILCONTPOSTPROCESS-CPS: 15: ; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP16:%.*]] = inttoptr i32 [[TMP5]] to ptr addrspace(21) @@ -4002,7 +6140,7 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-CPS-NEXT: store i32 [[TMP19]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP20:%.*]] = zext i32 [[RETURN_ADDR_RELOAD]] to i64 ; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP21:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP20]], i32 [[TMP21]], i32 poison, i32 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT275]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]), !continuation.registercount [[META32]] +; DXILCONTPOSTPROCESS-CPS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[TMP20]], i32 [[TMP21]], i64 poison, i32 poison, [[STRUCT_TRAVERSALDATA]] [[DOTFCA_5_INSERT275]], [8 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]) ; DXILCONTPOSTPROCESS-CPS-NEXT: unreachable ; ; @@ -4051,11 +6189,8 @@ attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_6_INSERT]], i32 [[TMP5]], 7 ; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_7_INSERT]], i32 [[TMP6]], 8 ; DXILCONTPOSTPROCESS-CPS-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [10 x i32] [[DOTFCA_8_INSERT]], i32 [[TMP7]], 9 -; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP8:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], 0 -; DXILCONTPOSTPROCESS-CPS-NEXT: store i32 [[TMP9]], ptr [[CSP]], align 4 ; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP10:%.*]] = zext i32 [[RETURNADDR]] to i64 ; DXILCONTPOSTPROCESS-CPS-NEXT: [[TMP11:%.*]] = load i32, ptr [[CSP]], align 4 -; DXILCONTPOSTPROCESS-CPS-NEXT: call void (i64, ...) @continuation.continue(i64 [[TMP10]], i32 [[TMP11]], i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT9]], [21 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]), !continuation.registercount [[META33]] +; DXILCONTPOSTPROCESS-CPS-NEXT: call void (...) @lgc.ilcps.continue(i64 [[TMP10]], i32 [[TMP11]], i64 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT9]], [21 x i32] poison, [10 x i32] [[DOTFCA_9_INSERT]]) ; DXILCONTPOSTPROCESS-CPS-NEXT: unreachable ; diff --git a/llvmraytracing/test/dx/traversal-empty-payload.ll b/llvmraytracing/test/dx/traversal-empty-payload.ll new file mode 100644 index 0000000000..2e91b8b695 --- /dev/null +++ b/llvmraytracing/test/dx/traversal-empty-payload.ll @@ -0,0 +1,117 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 3 +; RUN: opt --verify-each -passes='lower-raytracing-pipeline,lint,continuations-lint,remove-types-metadata' -S %s --lint-abort-on-error | FileCheck --check-prefix=EMPTYPAYLOAD %s +; RUN: opt --verify-each -passes='lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,dxil-cleanup-continuations,lint,dxil-cont-post-process,lint,continuations-lint,remove-types-metadata' -S %s --lint-abort-on-error | FileCheck --check-prefix=EMPTYPAYLOAD-ALL %s + +; Test that we handle empty payload without creating additional stores and loads. + +target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:16-i32:32-i64:32-f16:16-f32:32-f64:32-v8:8-v16:16-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" + +%struct.TraversalData = type { %struct.SystemData, i32 } +%struct.SystemData = type { %struct.DispatchSystemData, float } +%struct.DispatchSystemData = type { i32 } + +!continuation.preservedPayloadRegisterCount = !{!8} ; EMPTY_PAYLOAD + +declare !pointeetys !4 i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData*) + +declare !pointeetys !6 i1 @_cont_ReportHit(%struct.TraversalData* %data, float %t, i32 %hitKind) + +declare void @lgc.ilcps.continue(...) + +declare void @lgc.ilcps.waitContinue(...) + +declare i64 @lgc.cps.as.continuation.reference__i64(...) #3 + +; Function Attrs: alwaysinline nounwind +define void @_cont_Traversal(%struct.TraversalData %data) #1 !lgc.rt.shaderstage !7 { + %1 = alloca %struct.TraversalData, align 8 + store %struct.TraversalData %data, ptr %1, align 4 + %2 = getelementptr inbounds %struct.TraversalData, ptr %1, i32 0, i32 1 + %3 = load i32, ptr %2, align 4 + %4 = icmp eq i32 %3, 0 + %5 = getelementptr inbounds %struct.TraversalData, ptr %1, i32 0, i32 0 + br i1 %4, label %9, label %6 + +6: ; preds = %0 + %7 = load %struct.SystemData, ptr %5, align 4 + %8 = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @_cont_Traversal) + call void (...) @lgc.ilcps.waitContinue(i64 1, i64 -1, i32 0, i64 %8, %struct.SystemData %7) + unreachable + +9: ; preds = %0 + %10 = load %struct.SystemData, ptr %5, align 4 + call void (...) @lgc.ilcps.waitContinue(i64 0, i64 -1, i32 2, i64 poison, %struct.SystemData %10) + unreachable +} + +attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="0" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="0" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind } + +!0 = !{!"function", i32 poison, !1} +!1 = !{i32 0, %struct.TraversalData poison} +!2 = !{!"function", i32 poison, !1, i32 poison} +!3 = !{!"function", !"void", !1, i32 poison, i32 poison} +!4 = !{%struct.DispatchSystemData poison} +!5 = !{i32 0, %struct.DispatchSystemData poison} +!6 = !{%struct.TraversalData poison} +!7 = !{i32 6} +!8 = !{i32 0} +; EMPTYPAYLOAD-LABEL: define %struct.TraversalData @_cont_Traversal( +; EMPTYPAYLOAD-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]], [0 x i32] [[PADDING:%.*]], [0 x i32] [[PAYLOAD:%.*]]) #[[ATTR0:[0-9]+]] !lgc.rt.shaderstage [[META3:![0-9]+]] !continuation.registercount [[META0:![0-9]+]] !continuation [[META4:![0-9]+]] { +; EMPTYPAYLOAD-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_TRAVERSALDATA]], align 8 +; EMPTYPAYLOAD-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_TRAVERSALDATA]], align 8 +; EMPTYPAYLOAD-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [0 x i32], align 4 +; EMPTYPAYLOAD-NEXT: store [[STRUCT_TRAVERSALDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; EMPTYPAYLOAD-NEXT: store [[STRUCT_TRAVERSALDATA]] [[TMP0]], ptr [[TMP2]], align 4 +; EMPTYPAYLOAD-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[TMP2]], i32 0, i32 1 +; EMPTYPAYLOAD-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +; EMPTYPAYLOAD-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0 +; EMPTYPAYLOAD-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_TRAVERSALDATA]], ptr [[TMP2]], i32 0, i32 0 +; EMPTYPAYLOAD-NEXT: br i1 [[TMP5]], label [[TMP12:%.*]], label [[TMP7:%.*]] +; EMPTYPAYLOAD: 7: +; EMPTYPAYLOAD-NEXT: [[TMP8:%.*]] = load [[STRUCT_SYSTEMDATA:%.*]], ptr [[TMP6]], align 4 +; EMPTYPAYLOAD-NEXT: [[TMP9:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @_cont_Traversal) +; EMPTYPAYLOAD-NEXT: [[TMP10:%.*]] = load [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; EMPTYPAYLOAD-NEXT: call void (...) @lgc.cps.jump(i64 1, i32 -1, {} poison, i64 [[TMP9]], [[STRUCT_SYSTEMDATA]] [[TMP8]]), !continuation.registercount [[META0]], !waitmask [[META5:![0-9]+]] +; EMPTYPAYLOAD-NEXT: unreachable +; EMPTYPAYLOAD: 11: +; EMPTYPAYLOAD-NEXT: [[TMP13:%.*]] = load [[STRUCT_SYSTEMDATA]], ptr [[TMP6]], align 4 +; EMPTYPAYLOAD-NEXT: [[TMP14:%.*]] = load [[STRUCT_TRAVERSALDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 +; EMPTYPAYLOAD-NEXT: call void (...) @lgc.cps.jump(i64 0, i32 -1, {} poison, i64 poison, [[STRUCT_SYSTEMDATA]] [[TMP13]]), !continuation.registercount [[META0]], !waitmask [[META5]] +; EMPTYPAYLOAD-NEXT: unreachable +; +; +; EMPTYPAYLOAD-ALL-LABEL: define void @_cont_Traversal( +; EMPTYPAYLOAD-ALL-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]], [0 x i32] [[PADDING:%.*]], [0 x i32] [[PAYLOAD:%.*]]) #[[ATTR0:[0-9]+]] !lgc.rt.shaderstage [[META3:![0-9]+]] !continuation.registercount [[META0:![0-9]+]] !continuation [[META4:![0-9]+]] { +; EMPTYPAYLOAD-ALL-NEXT: AllocaSpillBB: +; EMPTYPAYLOAD-ALL-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; EMPTYPAYLOAD-ALL-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; EMPTYPAYLOAD-ALL-NEXT: [[DOTFCA_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 0, 0, 0 +; EMPTYPAYLOAD-ALL-NEXT: [[DOTFCA_0_1_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 0, 1 +; EMPTYPAYLOAD-ALL-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 1 +; EMPTYPAYLOAD-ALL-NEXT: [[DOTFCA_0_0_0_EXTRACT15:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 0, 0, 0 +; EMPTYPAYLOAD-ALL-NEXT: [[DOTFCA_0_1_EXTRACT16:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 0, 1 +; EMPTYPAYLOAD-ALL-NEXT: [[DOTFCA_1_EXTRACT17:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 1 +; EMPTYPAYLOAD-ALL-NEXT: [[TMP1:%.*]] = icmp eq i32 [[DOTFCA_1_EXTRACT17]], 0 +; EMPTYPAYLOAD-ALL-NEXT: br i1 [[TMP1]], label [[TMP7:%.*]], label [[TMP2:%.*]] +; EMPTYPAYLOAD-ALL: 2: +; EMPTYPAYLOAD-ALL-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] poison, i32 [[DOTFCA_0_0_0_EXTRACT15]], 0, 0 +; EMPTYPAYLOAD-ALL-NEXT: [[DOTFCA_1_INSERT19:%.*]] = insertvalue [[STRUCT_SYSTEMDATA]] [[DOTFCA_0_0_INSERT]], float [[DOTFCA_0_1_EXTRACT16]], 1 +; EMPTYPAYLOAD-ALL-NEXT: [[TMP3:%.*]] = call i64 @continuation.getAddrAndMD(ptr @_cont_Traversal) +; EMPTYPAYLOAD-ALL-NEXT: [[DOTFCA_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, i32 [[DOTFCA_0_0_0_EXTRACT]], 0, 0, 0 +; EMPTYPAYLOAD-ALL-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT]], float [[DOTFCA_0_1_EXTRACT]], 0, 1 +; EMPTYPAYLOAD-ALL-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_INSERT]], i32 [[DOTFCA_1_EXTRACT]], 1 +; EMPTYPAYLOAD-ALL-NEXT: [[TMP6:%.*]] = load i32, ptr [[CSP]], align 4 +; EMPTYPAYLOAD-ALL-NEXT: call void (...) @lgc.ilcps.waitContinue(i64 1, i64 -1, i32 [[TMP6]], i64 [[TMP3]], [[STRUCT_SYSTEMDATA]] [[DOTFCA_1_INSERT19]]) +; EMPTYPAYLOAD-ALL-NEXT: unreachable +; EMPTYPAYLOAD-ALL: 5: +; EMPTYPAYLOAD-ALL-NEXT: [[DOTFCA_0_0_INSERT22:%.*]] = insertvalue [[STRUCT_SYSTEMDATA]] poison, i32 [[DOTFCA_0_0_0_EXTRACT15]], 0, 0 +; EMPTYPAYLOAD-ALL-NEXT: [[DOTFCA_1_INSERT25:%.*]] = insertvalue [[STRUCT_SYSTEMDATA]] [[DOTFCA_0_0_INSERT22]], float [[DOTFCA_0_1_EXTRACT16]], 1 +; EMPTYPAYLOAD-ALL-NEXT: [[DOTFCA_0_0_0_INSERT6:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, i32 [[DOTFCA_0_0_0_EXTRACT]], 0, 0, 0 +; EMPTYPAYLOAD-ALL-NEXT: [[DOTFCA_0_1_INSERT9:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT6]], float [[DOTFCA_0_1_EXTRACT]], 0, 1 +; EMPTYPAYLOAD-ALL-NEXT: [[DOTFCA_1_INSERT12:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_INSERT9]], i32 [[DOTFCA_1_EXTRACT]], 1 +; EMPTYPAYLOAD-ALL-NEXT: [[TMP10:%.*]] = load i32, ptr [[CSP]], align 4 +; EMPTYPAYLOAD-ALL-NEXT: call void (...) @lgc.ilcps.waitContinue(i64 0, i64 -1, i32 [[TMP10]], i64 poison, [[STRUCT_SYSTEMDATA]] [[DOTFCA_1_INSERT25]]) +; EMPTYPAYLOAD-ALL-NEXT: unreachable +; diff --git a/llvmraytracing/test/dx/traversal-passthrough-payload.ll b/llvmraytracing/test/dx/traversal-passthrough-payload.ll new file mode 100644 index 0000000000..1d11e94765 --- /dev/null +++ b/llvmraytracing/test/dx/traversal-passthrough-payload.ll @@ -0,0 +1,228 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 3 +; RUN: grep -v PRESERVED_REGCOUNT %s | opt --verify-each -passes='lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,dxil-cont-post-process,lint,continuations-lint,remove-types-metadata' -S --lint-abort-on-error | FileCheck --check-prefix=MAXPAYLOADSIZE %s +; RUN: opt --verify-each -passes='lower-raytracing-pipeline,lint,sroa,lint,lower-await,lint,coro-early,dxil-coro-split,coro-cleanup,lint,legacy-cleanup-continuations,lint,dxil-cont-post-process,lint,continuations-lint,remove-types-metadata' -S %s --lint-abort-on-error | FileCheck --check-prefix=PRESERVEDPAYLOADSIZE %s + +; Test that we pass either the maximum or the computed, preserved payload size through _cont_Traversal. + +target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16:16-i32:32-i64:32-f16:16-f32:32-f64:32-v8:8-v16:16-v32:32-v48:32-v64:32-v80:32-v96:32-v112:32-v128:32-v144:32-v160:32-v176:32-v192:32-v208:32-v224:32-v240:32-v256:32-n8:16:32" + +%struct.TraversalData = type { %struct.SystemData, i32 } +%struct.SystemData = type { %struct.DispatchSystemData, float } +%struct.DispatchSystemData = type { i32 } + +!continuation.preservedPayloadRegisterCount = !{!8} ; PRESERVED_REGCOUNT + +declare !pointeetys !4 i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData*) + +declare !pointeetys !6 i1 @_cont_ReportHit(%struct.TraversalData* %data, float %t, i32 %hitKind) + +declare void @lgc.ilcps.continue(...) + +declare void @lgc.ilcps.waitContinue(...) + +declare i64 @lgc.cps.as.continuation.reference__i64(...) #3 + +; Function Attrs: alwaysinline nounwind +define void @_cont_Traversal(%struct.TraversalData %data) #1 !lgc.rt.shaderstage !7 { + %1 = alloca %struct.TraversalData, align 8 + store %struct.TraversalData %data, ptr %1, align 4 + %2 = getelementptr inbounds %struct.TraversalData, ptr %1, i32 0, i32 1 + %3 = load i32, ptr %2, align 4 + %4 = icmp eq i32 %3, 0 + %5 = getelementptr inbounds %struct.TraversalData, ptr %1, i32 0, i32 0 + br i1 %4, label %9, label %6 + +6: ; preds = %0 + %7 = load %struct.SystemData, ptr %5, align 4 + %8 = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @_cont_Traversal) + call void (...) @lgc.ilcps.waitContinue(i64 1, i64 -1, i32 0, i64 %8, %struct.SystemData %7) + unreachable + +9: ; preds = %0 + %10 = load %struct.SystemData, ptr %5, align 4 + call void (...) @lgc.ilcps.waitContinue(i64 0, i64 -1, i32 2, i64 poison, %struct.SystemData %10) + unreachable +} + +attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="0" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="0" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind } + +!0 = !{!"function", i32 poison, !1} +!1 = !{i32 0, %struct.TraversalData poison} +!2 = !{!"function", i32 poison, !1, i32 poison} +!3 = !{!"function", !"void", !1, i32 poison, i32 poison} +!4 = !{%struct.DispatchSystemData poison} +!5 = !{i32 0, %struct.DispatchSystemData poison} +!6 = !{%struct.TraversalData poison} +!7 = !{i32 6} +!8 = !{i32 4} ; PRESERVED_REGCOUNT +; MAXPAYLOADSIZE-LABEL: define void @_cont_Traversal( +; MAXPAYLOADSIZE-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]], [8 x i32] [[PADDING:%.*]], [30 x i32] [[PAYLOAD:%.*]]) #[[ATTR0:[0-9]+]] !lgc.rt.shaderstage [[META2:![0-9]+]] !continuation.registercount [[META0:![0-9]+]] !continuation [[META3:![0-9]+]] !continuation.state [[META4:![0-9]+]] { +; MAXPAYLOADSIZE-NEXT: AllocaSpillBB: +; MAXPAYLOADSIZE-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; MAXPAYLOADSIZE-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; MAXPAYLOADSIZE-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 0 +; MAXPAYLOADSIZE-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 1 +; MAXPAYLOADSIZE-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 2 +; MAXPAYLOADSIZE-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 3 +; MAXPAYLOADSIZE-NEXT: [[PAYLOAD_FCA_4_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 4 +; MAXPAYLOADSIZE-NEXT: [[PAYLOAD_FCA_5_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 5 +; MAXPAYLOADSIZE-NEXT: [[PAYLOAD_FCA_6_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 6 +; MAXPAYLOADSIZE-NEXT: [[PAYLOAD_FCA_7_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 7 +; MAXPAYLOADSIZE-NEXT: [[PAYLOAD_FCA_8_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 8 +; MAXPAYLOADSIZE-NEXT: [[PAYLOAD_FCA_9_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 9 +; MAXPAYLOADSIZE-NEXT: [[PAYLOAD_FCA_10_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 10 +; MAXPAYLOADSIZE-NEXT: [[PAYLOAD_FCA_11_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 11 +; MAXPAYLOADSIZE-NEXT: [[PAYLOAD_FCA_12_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 12 +; MAXPAYLOADSIZE-NEXT: [[PAYLOAD_FCA_13_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 13 +; MAXPAYLOADSIZE-NEXT: [[PAYLOAD_FCA_14_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 14 +; MAXPAYLOADSIZE-NEXT: [[PAYLOAD_FCA_15_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 15 +; MAXPAYLOADSIZE-NEXT: [[PAYLOAD_FCA_16_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 16 +; MAXPAYLOADSIZE-NEXT: [[PAYLOAD_FCA_17_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 17 +; MAXPAYLOADSIZE-NEXT: [[PAYLOAD_FCA_18_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 18 +; MAXPAYLOADSIZE-NEXT: [[PAYLOAD_FCA_19_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 19 +; MAXPAYLOADSIZE-NEXT: [[PAYLOAD_FCA_20_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 20 +; MAXPAYLOADSIZE-NEXT: [[PAYLOAD_FCA_21_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 21 +; MAXPAYLOADSIZE-NEXT: [[PAYLOAD_FCA_22_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 22 +; MAXPAYLOADSIZE-NEXT: [[PAYLOAD_FCA_23_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 23 +; MAXPAYLOADSIZE-NEXT: [[PAYLOAD_FCA_24_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 24 +; MAXPAYLOADSIZE-NEXT: [[PAYLOAD_FCA_25_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 25 +; MAXPAYLOADSIZE-NEXT: [[PAYLOAD_FCA_26_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 26 +; MAXPAYLOADSIZE-NEXT: [[PAYLOAD_FCA_27_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 27 +; MAXPAYLOADSIZE-NEXT: [[PAYLOAD_FCA_28_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 28 +; MAXPAYLOADSIZE-NEXT: [[PAYLOAD_FCA_29_EXTRACT:%.*]] = extractvalue [30 x i32] [[PAYLOAD]], 29 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 0, 0, 0 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_0_1_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 0, 1 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 1 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_0_0_0_EXTRACT136:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 0, 0, 0 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_0_1_EXTRACT137:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 0, 1 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_1_EXTRACT138:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 1 +; MAXPAYLOADSIZE-NEXT: [[TMP1:%.*]] = icmp eq i32 [[DOTFCA_1_EXTRACT138]], 0 +; MAXPAYLOADSIZE-NEXT: br i1 [[TMP1]], label [[TMP5:%.*]], label [[TMP2:%.*]] +; MAXPAYLOADSIZE: 2: +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] poison, i32 [[DOTFCA_0_0_0_EXTRACT136]], 0, 0 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_1_INSERT140:%.*]] = insertvalue [[STRUCT_SYSTEMDATA]] [[DOTFCA_0_0_INSERT]], float [[DOTFCA_0_1_EXTRACT137]], 1 +; MAXPAYLOADSIZE-NEXT: [[TMP3:%.*]] = call i64 @continuation.getAddrAndMD(ptr @_cont_Traversal) +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, i32 [[DOTFCA_0_0_0_EXTRACT]], 0, 0, 0 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT]], float [[DOTFCA_0_1_EXTRACT]], 0, 1 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_1_INSERT124:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_INSERT]], i32 [[DOTFCA_1_EXTRACT]], 1 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_4_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_5_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_6_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_7_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_8_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_9_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_10_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_11_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_12_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_13_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_14_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_15_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_16_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_17_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_18_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_19_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_20_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_21_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_22_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_23_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_24_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_25_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_26_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_27_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_28_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_29_INSERT:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 +; MAXPAYLOADSIZE-NEXT: [[TMP4:%.*]] = load i32, ptr [[CSP]], align 4 +; MAXPAYLOADSIZE-NEXT: call void (...) @lgc.ilcps.waitContinue(i64 1, i64 -1, i32 [[TMP4]], i64 [[TMP3]], [[STRUCT_SYSTEMDATA]] [[DOTFCA_1_INSERT140]], [9 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT]]) +; MAXPAYLOADSIZE-NEXT: unreachable +; MAXPAYLOADSIZE: 5: +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_0_0_INSERT143:%.*]] = insertvalue [[STRUCT_SYSTEMDATA]] poison, i32 [[DOTFCA_0_0_0_EXTRACT136]], 0, 0 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_1_INSERT146:%.*]] = insertvalue [[STRUCT_SYSTEMDATA]] [[DOTFCA_0_0_INSERT143]], float [[DOTFCA_0_1_EXTRACT137]], 1 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_0_0_0_INSERT127:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, i32 [[DOTFCA_0_0_0_EXTRACT]], 0, 0, 0 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_0_1_INSERT130:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT127]], float [[DOTFCA_0_1_EXTRACT]], 0, 1 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_1_INSERT133:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_INSERT130]], i32 [[DOTFCA_1_EXTRACT]], 1 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_0_INSERT3:%.*]] = insertvalue [30 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_1_INSERT6:%.*]] = insertvalue [30 x i32] [[DOTFCA_0_INSERT3]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_2_INSERT9:%.*]] = insertvalue [30 x i32] [[DOTFCA_1_INSERT6]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_3_INSERT12:%.*]] = insertvalue [30 x i32] [[DOTFCA_2_INSERT9]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_4_INSERT15:%.*]] = insertvalue [30 x i32] [[DOTFCA_3_INSERT12]], i32 [[PAYLOAD_FCA_4_EXTRACT]], 4 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_5_INSERT18:%.*]] = insertvalue [30 x i32] [[DOTFCA_4_INSERT15]], i32 [[PAYLOAD_FCA_5_EXTRACT]], 5 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_6_INSERT21:%.*]] = insertvalue [30 x i32] [[DOTFCA_5_INSERT18]], i32 [[PAYLOAD_FCA_6_EXTRACT]], 6 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_7_INSERT24:%.*]] = insertvalue [30 x i32] [[DOTFCA_6_INSERT21]], i32 [[PAYLOAD_FCA_7_EXTRACT]], 7 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_8_INSERT27:%.*]] = insertvalue [30 x i32] [[DOTFCA_7_INSERT24]], i32 [[PAYLOAD_FCA_8_EXTRACT]], 8 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_9_INSERT30:%.*]] = insertvalue [30 x i32] [[DOTFCA_8_INSERT27]], i32 [[PAYLOAD_FCA_9_EXTRACT]], 9 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_10_INSERT33:%.*]] = insertvalue [30 x i32] [[DOTFCA_9_INSERT30]], i32 [[PAYLOAD_FCA_10_EXTRACT]], 10 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_11_INSERT36:%.*]] = insertvalue [30 x i32] [[DOTFCA_10_INSERT33]], i32 [[PAYLOAD_FCA_11_EXTRACT]], 11 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_12_INSERT39:%.*]] = insertvalue [30 x i32] [[DOTFCA_11_INSERT36]], i32 [[PAYLOAD_FCA_12_EXTRACT]], 12 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_13_INSERT42:%.*]] = insertvalue [30 x i32] [[DOTFCA_12_INSERT39]], i32 [[PAYLOAD_FCA_13_EXTRACT]], 13 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_14_INSERT45:%.*]] = insertvalue [30 x i32] [[DOTFCA_13_INSERT42]], i32 [[PAYLOAD_FCA_14_EXTRACT]], 14 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_15_INSERT48:%.*]] = insertvalue [30 x i32] [[DOTFCA_14_INSERT45]], i32 [[PAYLOAD_FCA_15_EXTRACT]], 15 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_16_INSERT51:%.*]] = insertvalue [30 x i32] [[DOTFCA_15_INSERT48]], i32 [[PAYLOAD_FCA_16_EXTRACT]], 16 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_17_INSERT54:%.*]] = insertvalue [30 x i32] [[DOTFCA_16_INSERT51]], i32 [[PAYLOAD_FCA_17_EXTRACT]], 17 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_18_INSERT57:%.*]] = insertvalue [30 x i32] [[DOTFCA_17_INSERT54]], i32 [[PAYLOAD_FCA_18_EXTRACT]], 18 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_19_INSERT60:%.*]] = insertvalue [30 x i32] [[DOTFCA_18_INSERT57]], i32 [[PAYLOAD_FCA_19_EXTRACT]], 19 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_20_INSERT63:%.*]] = insertvalue [30 x i32] [[DOTFCA_19_INSERT60]], i32 [[PAYLOAD_FCA_20_EXTRACT]], 20 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_21_INSERT66:%.*]] = insertvalue [30 x i32] [[DOTFCA_20_INSERT63]], i32 [[PAYLOAD_FCA_21_EXTRACT]], 21 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_22_INSERT69:%.*]] = insertvalue [30 x i32] [[DOTFCA_21_INSERT66]], i32 [[PAYLOAD_FCA_22_EXTRACT]], 22 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_23_INSERT72:%.*]] = insertvalue [30 x i32] [[DOTFCA_22_INSERT69]], i32 [[PAYLOAD_FCA_23_EXTRACT]], 23 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_24_INSERT75:%.*]] = insertvalue [30 x i32] [[DOTFCA_23_INSERT72]], i32 [[PAYLOAD_FCA_24_EXTRACT]], 24 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_25_INSERT78:%.*]] = insertvalue [30 x i32] [[DOTFCA_24_INSERT75]], i32 [[PAYLOAD_FCA_25_EXTRACT]], 25 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_26_INSERT81:%.*]] = insertvalue [30 x i32] [[DOTFCA_25_INSERT78]], i32 [[PAYLOAD_FCA_26_EXTRACT]], 26 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_27_INSERT84:%.*]] = insertvalue [30 x i32] [[DOTFCA_26_INSERT81]], i32 [[PAYLOAD_FCA_27_EXTRACT]], 27 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_28_INSERT87:%.*]] = insertvalue [30 x i32] [[DOTFCA_27_INSERT84]], i32 [[PAYLOAD_FCA_28_EXTRACT]], 28 +; MAXPAYLOADSIZE-NEXT: [[DOTFCA_29_INSERT90:%.*]] = insertvalue [30 x i32] [[DOTFCA_28_INSERT87]], i32 [[PAYLOAD_FCA_29_EXTRACT]], 29 +; MAXPAYLOADSIZE-NEXT: [[TMP6:%.*]] = load i32, ptr [[CSP]], align 4 +; MAXPAYLOADSIZE-NEXT: call void (...) @lgc.ilcps.waitContinue(i64 0, i64 -1, i32 [[TMP6]], i64 poison, [[STRUCT_SYSTEMDATA]] [[DOTFCA_1_INSERT146]], [9 x i32] poison, [30 x i32] [[DOTFCA_29_INSERT90]]) +; MAXPAYLOADSIZE-NEXT: unreachable +; +; +; PRESERVEDPAYLOADSIZE-LABEL: define void @_cont_Traversal( +; PRESERVEDPAYLOADSIZE-SAME: i32 [[CSPINIT:%.*]], i64 [[RETURNADDR:%.*]], [[STRUCT_TRAVERSALDATA:%.*]] [[TMP0:%.*]], [8 x i32] [[PADDING:%.*]], [4 x i32] [[PAYLOAD:%.*]]) #[[ATTR0:[0-9]+]] !lgc.rt.shaderstage [[META3:![0-9]+]] !continuation.registercount [[META0:![0-9]+]] !continuation [[META4:![0-9]+]] !continuation.state [[META5:![0-9]+]] { +; PRESERVEDPAYLOADSIZE-NEXT: AllocaSpillBB: +; PRESERVEDPAYLOADSIZE-NEXT: [[CSP:%.*]] = alloca i32, align 4 +; PRESERVEDPAYLOADSIZE-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 +; PRESERVEDPAYLOADSIZE-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i32] [[PAYLOAD]], 0 +; PRESERVEDPAYLOADSIZE-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i32] [[PAYLOAD]], 1 +; PRESERVEDPAYLOADSIZE-NEXT: [[PAYLOAD_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i32] [[PAYLOAD]], 2 +; PRESERVEDPAYLOADSIZE-NEXT: [[PAYLOAD_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i32] [[PAYLOAD]], 3 +; PRESERVEDPAYLOADSIZE-NEXT: [[DOTFCA_0_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 0, 0, 0 +; PRESERVEDPAYLOADSIZE-NEXT: [[DOTFCA_0_1_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 0, 1 +; PRESERVEDPAYLOADSIZE-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 1 +; PRESERVEDPAYLOADSIZE-NEXT: [[DOTFCA_0_0_0_EXTRACT32:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 0, 0, 0 +; PRESERVEDPAYLOADSIZE-NEXT: [[DOTFCA_0_1_EXTRACT33:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 0, 1 +; PRESERVEDPAYLOADSIZE-NEXT: [[DOTFCA_1_EXTRACT34:%.*]] = extractvalue [[STRUCT_TRAVERSALDATA]] [[TMP0]], 1 +; PRESERVEDPAYLOADSIZE-NEXT: [[TMP1:%.*]] = icmp eq i32 [[DOTFCA_1_EXTRACT34]], 0 +; PRESERVEDPAYLOADSIZE-NEXT: br i1 [[TMP1]], label [[TMP5:%.*]], label [[TMP2:%.*]] +; PRESERVEDPAYLOADSIZE: 2: +; PRESERVEDPAYLOADSIZE-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_SYSTEMDATA:%.*]] poison, i32 [[DOTFCA_0_0_0_EXTRACT32]], 0, 0 +; PRESERVEDPAYLOADSIZE-NEXT: [[DOTFCA_1_INSERT36:%.*]] = insertvalue [[STRUCT_SYSTEMDATA]] [[DOTFCA_0_0_INSERT]], float [[DOTFCA_0_1_EXTRACT33]], 1 +; PRESERVEDPAYLOADSIZE-NEXT: [[TMP3:%.*]] = call i64 @continuation.getAddrAndMD(ptr @_cont_Traversal) +; PRESERVEDPAYLOADSIZE-NEXT: [[DOTFCA_0_0_0_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, i32 [[DOTFCA_0_0_0_EXTRACT]], 0, 0, 0 +; PRESERVEDPAYLOADSIZE-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT]], float [[DOTFCA_0_1_EXTRACT]], 0, 1 +; PRESERVEDPAYLOADSIZE-NEXT: [[DOTFCA_1_INSERT20:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_INSERT]], i32 [[DOTFCA_1_EXTRACT]], 1 +; PRESERVEDPAYLOADSIZE-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [4 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; PRESERVEDPAYLOADSIZE-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [4 x i32] [[DOTFCA_0_INSERT]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; PRESERVEDPAYLOADSIZE-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [4 x i32] [[DOTFCA_1_INSERT]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; PRESERVEDPAYLOADSIZE-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [4 x i32] [[DOTFCA_2_INSERT]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; PRESERVEDPAYLOADSIZE-NEXT: [[TMP4:%.*]] = load i32, ptr [[CSP]], align 4 +; PRESERVEDPAYLOADSIZE-NEXT: call void (...) @lgc.ilcps.waitContinue(i64 1, i64 -1, i32 [[TMP4]], i64 [[TMP3]], [[STRUCT_SYSTEMDATA]] [[DOTFCA_1_INSERT36]], [9 x i32] poison, [4 x i32] [[DOTFCA_3_INSERT]]) +; PRESERVEDPAYLOADSIZE-NEXT: unreachable +; PRESERVEDPAYLOADSIZE: 5: +; PRESERVEDPAYLOADSIZE-NEXT: [[DOTFCA_0_0_INSERT39:%.*]] = insertvalue [[STRUCT_SYSTEMDATA]] poison, i32 [[DOTFCA_0_0_0_EXTRACT32]], 0, 0 +; PRESERVEDPAYLOADSIZE-NEXT: [[DOTFCA_1_INSERT42:%.*]] = insertvalue [[STRUCT_SYSTEMDATA]] [[DOTFCA_0_0_INSERT39]], float [[DOTFCA_0_1_EXTRACT33]], 1 +; PRESERVEDPAYLOADSIZE-NEXT: [[DOTFCA_0_0_0_INSERT23:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] poison, i32 [[DOTFCA_0_0_0_EXTRACT]], 0, 0, 0 +; PRESERVEDPAYLOADSIZE-NEXT: [[DOTFCA_0_1_INSERT26:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_0_0_INSERT23]], float [[DOTFCA_0_1_EXTRACT]], 0, 1 +; PRESERVEDPAYLOADSIZE-NEXT: [[DOTFCA_1_INSERT29:%.*]] = insertvalue [[STRUCT_TRAVERSALDATA]] [[DOTFCA_0_1_INSERT26]], i32 [[DOTFCA_1_EXTRACT]], 1 +; PRESERVEDPAYLOADSIZE-NEXT: [[DOTFCA_0_INSERT3:%.*]] = insertvalue [4 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 +; PRESERVEDPAYLOADSIZE-NEXT: [[DOTFCA_1_INSERT6:%.*]] = insertvalue [4 x i32] [[DOTFCA_0_INSERT3]], i32 [[PAYLOAD_FCA_1_EXTRACT]], 1 +; PRESERVEDPAYLOADSIZE-NEXT: [[DOTFCA_2_INSERT9:%.*]] = insertvalue [4 x i32] [[DOTFCA_1_INSERT6]], i32 [[PAYLOAD_FCA_2_EXTRACT]], 2 +; PRESERVEDPAYLOADSIZE-NEXT: [[DOTFCA_3_INSERT12:%.*]] = insertvalue [4 x i32] [[DOTFCA_2_INSERT9]], i32 [[PAYLOAD_FCA_3_EXTRACT]], 3 +; PRESERVEDPAYLOADSIZE-NEXT: [[TMP6:%.*]] = load i32, ptr [[CSP]], align 4 +; PRESERVEDPAYLOADSIZE-NEXT: call void (...) @lgc.ilcps.waitContinue(i64 0, i64 -1, i32 [[TMP6]], i64 poison, [[STRUCT_SYSTEMDATA]] [[DOTFCA_1_INSERT42]], [9 x i32] poison, [4 x i32] [[DOTFCA_3_INSERT12]]) +; PRESERVEDPAYLOADSIZE-NEXT: unreachable +; diff --git a/llvmraytracing/test/dx/unnamed-type-intrinsics.ll b/llvmraytracing/test/dx/unnamed-type-intrinsics.ll index 39271713a9..2af4c8002b 100644 --- a/llvmraytracing/test/dx/unnamed-type-intrinsics.ll +++ b/llvmraytracing/test/dx/unnamed-type-intrinsics.ll @@ -26,54 +26,52 @@ target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16: declare i32 @_cont_GetContinuationStackAddr() #0 -declare %0 @_cont_SetupRayGen() #0 - declare %0 @_AmdAwaitTraversal(i64, %1) #0 declare %0 @_AmdAwaitShader(i64, %0) #0 declare %3 @_AmdAwaitAnyHit(i64, %3, float, i32) #0 -declare !types !17 %struct.HitData @_cont_GetCandidateState(%3*) #0 +declare !pointeetys !17 %struct.HitData @_cont_GetCandidateState(%3*) #0 -declare !types !19 %struct.HitData @_cont_GetCommittedState(%2*) #0 +declare !pointeetys !19 %struct.HitData @_cont_GetCommittedState(%2*) #0 -declare !types !21 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%2*) #0 +declare !pointeetys !21 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%2*) #0 -declare !types !22 void @_cont_SetTriangleHitAttributes(%2*, %struct.BuiltInTriangleIntersectionAttributes) #0 +declare !pointeetys !22 void @_cont_SetTriangleHitAttributes(%2*, %struct.BuiltInTriangleIntersectionAttributes) #0 -declare !types !23 i32 @_cont_GetLocalRootIndex(%0*) +declare !pointeetys !23 i32 @_cont_GetLocalRootIndex(%0*) -declare !types !25 i1 @_cont_IsEndSearch(%1*) #0 +declare !pointeetys !25 i1 @_cont_IsEndSearch(%1*) #0 -declare !types !27 i32 @_cont_HitKind(%2*) #0 +declare !pointeetys !27 i32 @_cont_HitKind(%2*) #0 ; Function Attrs: nounwind declare i64 @_AmdGetResumePointAddr() #1 ; Function Attrs: nounwind -declare !types !28 void @_AmdRestoreSystemData(%0*) #1 +declare !pointeetys !28 void @_AmdRestoreSystemData(%0*) #1 ; Function Attrs: nounwind -declare !types !29 void @_AmdRestoreSystemDataAnyHit(%3*) #1 +declare !pointeetys !29 void @_AmdRestoreSystemDataAnyHit(%3*) #1 ; Function Attrs: nounwind -declare !types !28 void @_cont_AcceptHitAndEndSearch(%0* nocapture readnone) #1 +declare !pointeetys !28 void @_cont_AcceptHitAndEndSearch(%0* nocapture readnone) #1 ; Function Attrs: nounwind -declare !types !29 void @_cont_AcceptHit(%3* nocapture readnone) #1 +declare !pointeetys !29 void @_cont_AcceptHit(%3* nocapture readnone) #1 ; Function Attrs: nounwind -declare !types !28 void @_cont_IgnoreHit(%0* nocapture readnone) #1 +declare !pointeetys !28 void @_cont_IgnoreHit(%0* nocapture readnone) #1 ; Function Attrs: nounwind -declare !types !29 void @_AmdAcceptHitAttributes(%3* nocapture readnone) #1 +declare !pointeetys !29 void @_AmdAcceptHitAttributes(%3* nocapture readnone) #1 -define void @_cont_ExitRayGen(ptr nocapture readonly %data) alwaysinline nounwind !types !{!"function", !"void", !{i32 0, %0 poison}} { +define void @_cont_ExitRayGen(ptr nocapture readonly %data) alwaysinline nounwind !pointeetys !{%0 poison} { ret void } -define void @_cont_TraceRay(%0* %data, i64 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, float %6, float %7, float %8, float %9, float %10, float %11, float %12, float %13) #0 !types !30 { +define void @_cont_TraceRay(%0* %data, i64 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, float %6, float %7, float %8, float %9, float %10, float %11, float %12, float %13) #0 !pointeetys !30 { %dis_data = load %0, %0* %data, align 4 %sys_data = insertvalue %2 undef, %0 %dis_data, 0 %trav_data = insertvalue %1 undef, %2 %sys_data, 0 @@ -85,7 +83,7 @@ define void @_cont_TraceRay(%0* %data, i64 %0, i32 %1, i32 %2, i32 %3, i32 %4, i ret void } -define void @_cont_CallShader(%0* %data, i32 %0) #0 !types !31 { +define void @_cont_CallShader(%0* %data, i32 %0) #0 !pointeetys !31 { %dis_data = load %0, %0* %data, align 4 %newdata = call %0 @_AmdAwaitShader(i64 2, %0 %dis_data) store %0 %newdata, %0* %data, align 4 @@ -93,7 +91,7 @@ define void @_cont_CallShader(%0* %data, i32 %0) #0 !types !31 { ret void } -define i1 @_cont_ReportHit(%3* %data, float %t, i32 %hitKind) #0 !types !32 { +define i1 @_cont_ReportHit(%3* %data, float %t, i32 %hitKind) #0 !pointeetys !32 { %origTPtr = getelementptr inbounds %3, %3* %data, i32 0, i32 0, i32 4 %origT = load float, float* %origTPtr, align 4 %isNoHit = fcmp fast uge float %t, %origT @@ -112,7 +110,7 @@ isEnd: ; preds = %0 ret i1 false } -define <3 x i32> @_cont_DispatchRaysIndex3(%0* %data) !types !33 { +define <3 x i32> @_cont_DispatchRaysIndex3(%0* %data) !pointeetys !33 { %resPtr.1 = getelementptr %0, %0* %data, i32 0, i32 0, i32 0 %res.1 = load i32, i32* %resPtr.1, align 4 %resPtr.2 = getelementptr %0, %0* %data, i32 0, i32 0, i32 1 @@ -125,7 +123,7 @@ define <3 x i32> @_cont_DispatchRaysIndex3(%0* %data) !types !33 { ret <3 x i32> %val.2 } -define <3 x float> @_cont_ObjectRayOrigin3(%0* nocapture readnone %data, %struct.HitData* %hitData) !types !34 { +define <3 x float> @_cont_ObjectRayOrigin3(%0* nocapture readnone %data, %struct.HitData* %hitData) !pointeetys !34 { %resPtr.1 = getelementptr %struct.HitData, %struct.HitData* %hitData, i32 0, i32 0, i32 0 %res.1 = load float, float* %resPtr.1, align 4 %resPtr.2 = getelementptr %struct.HitData, %struct.HitData* %hitData, i32 0, i32 0, i32 1 @@ -138,7 +136,7 @@ define <3 x float> @_cont_ObjectRayOrigin3(%0* nocapture readnone %data, %struct ret <3 x float> %val.2 } -define <3 x float> @_cont_ObjectRayDirection3(%0* nocapture readnone %data, %struct.HitData* %hitData) !types !34 { +define <3 x float> @_cont_ObjectRayDirection3(%0* nocapture readnone %data, %struct.HitData* %hitData) !pointeetys !34 { %resPtr.1 = getelementptr %struct.HitData, %struct.HitData* %hitData, i32 0, i32 1, i32 0 %res.1 = load float, float* %resPtr.1, align 4 %resPtr.2 = getelementptr %struct.HitData, %struct.HitData* %hitData, i32 0, i32 1, i32 1 @@ -151,7 +149,7 @@ define <3 x float> @_cont_ObjectRayDirection3(%0* nocapture readnone %data, %str ret <3 x float> %val.2 } -define float @_cont_RayTCurrent(%0* nocapture readnone %data, %struct.HitData* %hitData) !types !36 { +define float @_cont_RayTCurrent(%0* nocapture readnone %data, %struct.HitData* %hitData) !pointeetys !36 { %resPtr = getelementptr %struct.HitData, %struct.HitData* %hitData, i32 0, i32 2 %res = load float, float* %resPtr, align 4 ret float %res @@ -184,7 +182,7 @@ define void @MyRayGen() #2 { } ; Function Attrs: nounwind -define void @MyClosestHit(%struct.RayPayload* noalias nocapture %payload, %struct.BuiltInTriangleIntersectionAttributes* nocapture readonly %attr) #2 !types !40 { +define void @MyClosestHit(%struct.RayPayload* noalias nocapture %payload, %struct.BuiltInTriangleIntersectionAttributes* nocapture readonly %attr) #2 !pointeetys !40 { %1 = getelementptr inbounds %struct.BuiltInTriangleIntersectionAttributes, %struct.BuiltInTriangleIntersectionAttributes* %attr, i32 0, i32 0 %2 = load <2 x float>, <2 x float>* %1, align 4 %3 = extractelement <2 x float> %2, i32 0 @@ -201,7 +199,7 @@ define void @MyClosestHit(%struct.RayPayload* noalias nocapture %payload, %struc } ; Function Attrs: nounwind -declare !types !43 void @dx.op.traceRay.struct.RayPayload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.RayPayload*) #1 +declare !pointeetys !43 void @dx.op.traceRay.struct.RayPayload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.RayPayload*) #1 ; Function Attrs: nounwind declare void @dx.op.textureStore.f32(i32, %dx.types.Handle, i32, i32, i32, float, float, float, float, i8) #1 @@ -223,7 +221,7 @@ declare void @dx.op.acceptHitAndEndSearch(i32) #0 declare void @dx.op.ignoreHit(i32) #0 ; Function Attrs: nounwind -declare !types !44 i1 @dx.op.reportHit.struct.BuiltInTriangleIntersectionAttributes(i32, float, i32, %struct.BuiltInTriangleIntersectionAttributes*) #1 +declare !pointeetys !44 i1 @dx.op.reportHit.struct.BuiltInTriangleIntersectionAttributes(i32, float, i32, %struct.BuiltInTriangleIntersectionAttributes*) #1 ; Function Attrs: nounwind memory(none) declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #3 @@ -232,10 +230,10 @@ declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types. declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) #4 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) -declare !types !45 void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #5 +declare !pointeetys !45 void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #5 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) -declare !types !45 void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #5 +declare !pointeetys !45 void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #5 attributes #0 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="0" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind } @@ -269,36 +267,35 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re !14 = !{i32 0} !15 = !{void ()* @MyRayGen, !"MyRayGen", null, null, !16} !16 = !{i32 8, i32 7, i32 5, !14} -!17 = !{!"function", %struct.HitData poison, !18} +!17 = !{%3 poison} !18 = !{i32 0, %3 poison} -!19 = !{!"function", %struct.HitData poison, !20} +!19 = !{%2 poison} !20 = !{i32 0, %2 poison} -!21 = !{!"function", %struct.BuiltInTriangleIntersectionAttributes poison, !20} -!22 = !{!"function", !"void", !20, %struct.BuiltInTriangleIntersectionAttributes poison} -!23 = !{!"function", i32 poison, !24} +!21 = !{%2 poison} +!22 = !{%2 poison} +!23 = !{%0 poison} !24 = !{i32 0, %0 poison} -!25 = !{!"function", i1 poison, !26} +!25 = !{%1 poison} !26 = !{i32 0, %1 poison} -!27 = !{!"function", i32 poison, !20} -!28 = !{!"function", !"void", !24} -!29 = !{!"function", !"void", !18} -!30 = !{!"function", !"void", !24, i64 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison} -!31 = !{!"function", !"void", !24, i32 poison} -!32 = !{!"function", i1 poison, !18, float poison, i32 poison} -!33 = !{!"function", <3 x i32> poison, !24} -!34 = !{!"function", <3 x float> poison, !24, !35} +!27 = !{%2 poison} +!28 = !{%0 poison} +!29 = !{%3 poison} +!30 = !{%0 poison} +!31 = !{%0 poison} +!32 = !{%3 poison} +!33 = !{%0 poison} +!34 = !{null, %0 poison, %struct.HitData poison} !35 = !{i32 0, %struct.HitData poison} -!36 = !{!"function", float poison, !24, !35} +!36 = !{null, %0 poison, %struct.HitData poison} !37 = !{!38, !38, i64 0} !38 = !{!"omnipotent char", !39, i64 0} !39 = !{!"Simple C/C++ TBAA"} -!40 = !{!"function", !"void", !41, !42} +!40 = !{null, %struct.RayPayload poison, %struct.BuiltInTriangleIntersectionAttributes poison} !41 = !{i32 0, %struct.RayPayload poison} !42 = !{i32 0, %struct.BuiltInTriangleIntersectionAttributes poison} -!43 = !{!"function", !"void", i32 poison, %dx.types.Handle poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, !41} -!44 = !{!"function", i1 poison, i32 poison, float poison, i32 poison, !42} -!45 = !{!"function", !"void", i64 poison, !46} -!46 = !{i32 0, i8 poison} +!43 = !{%struct.RayPayload poison} +!44 = !{%struct.BuiltInTriangleIntersectionAttributes poison} +!45 = !{i8 poison} ; LOWERRAYTRACINGPIPELINE-LABEL: define <3 x i32> @_cont_DispatchRaysIndex3( ; LOWERRAYTRACINGPIPELINE-SAME: ptr [[DATA:%.*]]) { ; LOWERRAYTRACINGPIPELINE-NEXT: [[RESPTR_1:%.*]] = getelementptr [[TMP0:%.*]], ptr [[DATA]], i32 0, i32 0, i32 0 @@ -351,6 +348,7 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-LABEL: define void @MyRayGen( ; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[TMP0:%.*]] [[TMP0]]) #[[ATTR2:[0-9]+]] !lgc.rt.shaderstage [[META14:![0-9]+]] !continuation.entry [[META20:![0-9]+]] !continuation.registercount [[META14]] !continuation [[META21:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[TMP0]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [10 x i32], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[TMP0]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2:%.*]] = load [[DX_TYPES_HANDLE:%.*]], ptr @"\01?Scene@@3URaytracingAccelerationStructure@@A", align 4 @@ -370,71 +368,86 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-NEXT: [[TRAV_DATA2_I:%.*]] = insertvalue [[TMP1]] [[TRAV_DATA_I]], i64 [[ADDR_I]], 5 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP4]], i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP10]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP13]], ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP13]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP14]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP16]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP16]], ptr [[TMP12]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP18]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP18]], ptr [[TMP15]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 2 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 2 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP20]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = call ptr inttoptr (i64 4 to ptr)([[TMP1]] [[TRAV_DATA2_I]], i64 poison), !continuation.registercount [[META18:![0-9]+]], !continuation.returnedRegistercount [[META18]] -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = call [[TMP0]] [[AWAIT:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[TMP21]]) +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP20]], ptr [[TMP25]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP21:%.*]] = load [10 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP28:%.*]] = call ptr inttoptr (i64 4 to ptr)(i64 poison, [[TMP1]] [[TRAV_DATA2_I]], [16 x i32] poison, [10 x i32] [[TMP21]]), !continuation.registercount [[META18:![0-9]+]], !continuation.returnedRegistercount [[META18]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP35:%.*]] = call { [[TMP0]], [33 x i32], [10 x i32] } @await(ptr [[TMP28]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP24:%.*]] = extractvalue { [[TMP0]], [33 x i32], [10 x i32] } [[TMP35]], 2 +; LOWERRAYTRACINGPIPELINE-NEXT: store [10 x i32] [[TMP24]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_RAYPAYLOAD]] poison, ptr [[TMP4]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP4]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP26:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP26]], ptr [[TMP23]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP29:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP36]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP29]], ptr [[TMP27]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP31:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[TMP36]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP37]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP33:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[TMP36]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP34]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP33]], ptr [[TMP32]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP22:%.*]] = extractvalue { [[TMP0]], [33 x i32], [10 x i32] } [[TMP35]], 0 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[TMP0]] [[TMP22]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; LOWERRAYTRACINGPIPELINE-NEXT: br label [[DOTSPLIT:%.*]] ; LOWERRAYTRACINGPIPELINE: .split: -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP34:%.*]] = load <4 x float>, ptr [[TMP6]], align 4, !tbaa [[TBAA22]] -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP35:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.index() -; LOWERRAYTRACINGPIPELINE-NEXT: [[EXTRACT:%.*]] = extractelement <3 x i32> [[TMP35]], i8 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP36:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.index() -; LOWERRAYTRACINGPIPELINE-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x i32> [[TMP36]], i8 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP37:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE]](i32 160, [[DX_TYPES_HANDLE]] [[TMP3]]) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP38:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE]](i32 216, [[DX_TYPES_HANDLE]] [[TMP37]], [[DX_TYPES_RESOURCEPROPERTIES]] { i32 4098, i32 1033 }) -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP39:%.*]] = extractelement <4 x float> [[TMP34]], i64 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP40:%.*]] = extractelement <4 x float> [[TMP34]], i64 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP41:%.*]] = extractelement <4 x float> [[TMP34]], i64 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP42:%.*]] = extractelement <4 x float> [[TMP34]], i64 3 -; LOWERRAYTRACINGPIPELINE-NEXT: call void @dx.op.textureStore.f32(i32 67, [[DX_TYPES_HANDLE]] [[TMP38]], i32 [[EXTRACT]], i32 [[EXTRACT1]], i32 undef, float [[TMP39]], float [[TMP40]], float [[TMP41]], float [[TMP42]], i8 15) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP49:%.*]] = load <4 x float>, ptr [[TMP6]], align 4, !tbaa [[TBAA22]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP50:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.index() +; LOWERRAYTRACINGPIPELINE-NEXT: [[EXTRACT:%.*]] = extractelement <3 x i32> [[TMP50]], i8 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP51:%.*]] = call <3 x i32> @lgc.rt.dispatch.rays.index() +; LOWERRAYTRACINGPIPELINE-NEXT: [[EXTRACT1:%.*]] = extractelement <3 x i32> [[TMP51]], i8 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP40:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_CREATEHANDLEFORLIB_DX_TYPES_HANDLE]](i32 160, [[DX_TYPES_HANDLE]] [[TMP3]]) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP53:%.*]] = call [[DX_TYPES_HANDLE]] [[DX_OP_ANNOTATEHANDLE]](i32 216, [[DX_TYPES_HANDLE]] [[TMP40]], [[DX_TYPES_RESOURCEPROPERTIES]] { i32 4098, i32 1033 }) +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP54:%.*]] = extractelement <4 x float> [[TMP49]], i64 0 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP55:%.*]] = extractelement <4 x float> [[TMP49]], i64 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP56:%.*]] = extractelement <4 x float> [[TMP49]], i64 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP57:%.*]] = extractelement <4 x float> [[TMP49]], i64 3 +; LOWERRAYTRACINGPIPELINE-NEXT: call void @dx.op.textureStore.f32(i32 67, [[DX_TYPES_HANDLE]] [[TMP53]], i32 [[EXTRACT]], i32 [[EXTRACT1]], i32 undef, float [[TMP54]], float [[TMP55]], float [[TMP56]], float [[TMP57]], i8 15) ; LOWERRAYTRACINGPIPELINE-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[TMP5]]) #[[ATTR1]] ; LOWERRAYTRACINGPIPELINE-NEXT: ret void ; ; ; LOWERRAYTRACINGPIPELINE-LABEL: define %0 @MyClosestHit( -; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[TMP2:%.*]] [[TMP0:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META25:![0-9]+]] !continuation.registercount [[META18]] !continuation [[META26:![0-9]+]] { +; LOWERRAYTRACINGPIPELINE-SAME: i64 [[RETURNADDR:%.*]], [[TMP2:%.*]] [[TMP0:%.*]], [33 x i32] [[PADDING:%.*]], [10 x i32] [[PAYLOAD:%.*]]) #[[ATTR2]] !lgc.rt.shaderstage [[META25:![0-9]+]] !continuation.registercount [[META18]] !continuation [[META26:![0-9]+]] { ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP2]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[TMP2]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [10 x i32], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP3:%.*]] = alloca [[STRUCT_RAYPAYLOAD:%.*]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: [[HITATTRS:%.*]] = alloca [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]], align 8 +; LOWERRAYTRACINGPIPELINE-NEXT: store [10 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[TMP2]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[TMP2]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP5:%.*]] = call i32 @_cont_GetLocalRootIndex(ptr [[TMP4]]) ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP3]], i32 0 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP9:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP9]], ptr [[TMP6]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP8]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP12]], ptr [[TMP10]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 1 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 1 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP19]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP14]], ptr [[TMP13]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 2 -; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 2 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP20]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP16]], ptr [[TMP15]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP17:%.*]] = call [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[_CONT_GETTRIANGLEHITATTRIBUTES:@[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]](ptr [[SYSTEM_DATA_ALLOCA]]) ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_BUILTINTRIANGLEINTERSECTIONATTRIBUTES]] [[TMP17]], ptr [[TMP2]], align 4 @@ -459,18 +472,22 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re ; LOWERRAYTRACINGPIPELINE-NEXT: store <4 x float> [[TMP33]], ptr [[TMP34]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_RAYPAYLOAD]], ptr [[TMP3]], i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP35]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP38]], ptr addrspace(20) @PAYLOAD, align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP38]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 7 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP39:%.*]] = getelementptr inbounds i32, ptr [[TMP35]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP39]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP41]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 7), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP41]], ptr [[TMP36]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, ptr [[TMP36]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP42:%.*]] = getelementptr inbounds i32, ptr [[TMP39]], i32 1 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP43]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 8), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP43]], ptr [[TMP40]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, ptr [[TMP36]], i32 2 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP44:%.*]] = getelementptr inbounds i32, ptr [[TMP39]], i32 2 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP44]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP45]], ptr addrspace(20) getelementptr inbounds (i32, ptr addrspace(20) @PAYLOAD, i32 9), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[TMP45]], ptr [[TMP48]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[TMP2]], ptr [[SYSTEM_DATA_ALLOCA]], i32 0, i32 0 ; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP47:%.*]] = load [[TMP0]], ptr [[TMP46]], align 4 -; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[TMP0]] [[TMP47]]), !continuation.registercount [[META18]] +; LOWERRAYTRACINGPIPELINE-NEXT: [[TMP49:%.*]] = load [10 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[TMP0]] [[TMP47]], [33 x i32] poison, [10 x i32] [[TMP49]]), !continuation.registercount [[META18]] ; LOWERRAYTRACINGPIPELINE-NEXT: unreachable ; diff --git a/llvmraytracing/test/dx/wrong-system-data.ll b/llvmraytracing/test/dx/wrong-system-data.ll index a9b04e6555..f988c02ec9 100644 --- a/llvmraytracing/test/dx/wrong-system-data.ll +++ b/llvmraytracing/test/dx/wrong-system-data.ll @@ -1,4 +1,4 @@ -; RUN: not --crash opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,remove-types-metadata' -S %s --lint-abort-on-error 2>&1 | FileCheck %s +; RUN: not --crash opt --verify-each -passes='dxil-cont-lgc-rt-op-converter,lint,lower-raytracing-pipeline,continuations-lint,remove-types-metadata' -S %s --lint-abort-on-error 2>&1 | FileCheck %s ; CHECK: Invalid system data struct: Did not contain the needed struct type @@ -19,35 +19,33 @@ target datalayout = "e-m:e-p:64:32-p20:32:32-p21:32:32-p32:32:32-i1:32-i8:8-i16: declare i64 @_AmdGetTraversalAddr() #0 -declare !types !31 %struct.TraversalData @_AmdAnyHit(i64, %struct.TraversalData*) #0 +declare !pointeetys !31 %struct.TraversalData @_AmdAnyHit(i64, %struct.TraversalData*) #0 declare i32 @_cont_GetContinuationStackAddr() #0 -declare !types !33 i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData*) #0 +declare !pointeetys !33 i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData*) #0 declare %struct.DispatchSystemData @_AmdTraversal(%struct.TraversalData) #0 -declare %struct.DispatchSystemData @_cont_SetupRayGen() #0 - declare void @_AmdEnqueue(i64, %struct.SystemData) #0 declare void @_AmdWaitEnqueue(i64, i64, %struct.SystemData) #0 declare void @_AmdEnqueueAnyHit(i64, %struct.TraversalData) #0 -declare !types !35 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData*) #0 +declare !pointeetys !35 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData*) #0 -declare !types !37 void @_cont_SetTriangleHitAttributes(%struct.SystemData*, %struct.BuiltInTriangleIntersectionAttributes) #0 +declare !pointeetys !37 void @_cont_SetTriangleHitAttributes(%struct.SystemData*, %struct.BuiltInTriangleIntersectionAttributes) #0 -declare !types !38 i1 @_cont_IsEndSearch(%struct.TraversalData*) #0 +declare !pointeetys !38 i1 @_cont_IsEndSearch(%struct.TraversalData*) #0 -declare !types !39 i32 @_cont_HitKind(%struct.SystemData*) #0 +declare !pointeetys !39 i32 @_cont_HitKind(%struct.SystemData*) #0 -define void @_cont_ExitRayGen(ptr nocapture readonly %data) alwaysinline nounwind !types !{!"function", !"void", !{i32 0, %struct.DispatchSystemData poison}} { +define void @_cont_ExitRayGen(ptr nocapture readonly %data) alwaysinline nounwind !pointeetys !{%struct.DispatchSystemData poison} { ret void } -define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, float %6, float %7, float %8, float %9, float %10, float %11, float %12, float %13) #0 !types !40 { +define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, float %6, float %7, float %8, float %9, float %10, float %11, float %12, float %13) #0 !pointeetys !40 { %sys_data = insertvalue %struct.SystemData undef, i32 1, 0 %trav_data = insertvalue %struct.TraversalData undef, %struct.SystemData %sys_data, 0 %newdata = call %struct.DispatchSystemData @_AmdTraversal(%struct.TraversalData %trav_data) @@ -55,7 +53,7 @@ define void @_cont_TraceRay(%struct.DispatchSystemData* %data, i64 %0, i32 %1, i ret void } -define i1 @_cont_ReportHit(%struct.TraversalData* %data, float %0, i32 %1) #0 !types !41 { +define i1 @_cont_ReportHit(%struct.TraversalData* %data, float %0, i32 %1) #0 !pointeetys !41 { ret i1 true } @@ -74,7 +72,7 @@ define void @"\01?MyRaygenShader@@YAXXZ"() #1 { } ; Function Attrs: nounwind -define void @"\01?MyClosestHitShader@@YAXURayPayload@@UBuiltInTriangleIntersectionAttributes@@@Z"(%struct.RayPayload* noalias nocapture %payload, %struct.BuiltInTriangleIntersectionAttributes* nocapture readonly %attr) #1 !types !45 { +define void @"\01?MyClosestHitShader@@YAXURayPayload@@UBuiltInTriangleIntersectionAttributes@@@Z"(%struct.RayPayload* noalias nocapture %payload, %struct.BuiltInTriangleIntersectionAttributes* nocapture readonly %attr) #1 !pointeetys !45 { %1 = getelementptr inbounds %struct.BuiltInTriangleIntersectionAttributes, %struct.BuiltInTriangleIntersectionAttributes* %attr, i32 0, i32 0 %2 = load <2 x float>, <2 x float>* %1, align 4 %3 = extractelement <2 x float> %2, i32 0 @@ -91,7 +89,7 @@ define void @"\01?MyClosestHitShader@@YAXURayPayload@@UBuiltInTriangleIntersecti } ; Function Attrs: nounwind -define void @"\01?MyAnyHitShader@@YAXURayPayload@@UBuiltInTriangleIntersectionAttributes@@@Z"(%struct.RayPayload* noalias nocapture %payload, %struct.BuiltInTriangleIntersectionAttributes* nocapture readnone %attr) #1 !types !45 { +define void @"\01?MyAnyHitShader@@YAXURayPayload@@UBuiltInTriangleIntersectionAttributes@@@Z"(%struct.RayPayload* noalias nocapture %payload, %struct.BuiltInTriangleIntersectionAttributes* nocapture readnone %attr) #1 !pointeetys !45 { %1 = getelementptr inbounds %struct.RayPayload, %struct.RayPayload* %payload, i32 0, i32 0 %2 = load <4 x float>, <4 x float>* %1, align 4 %3 = call float @dx.op.objectRayOrigin.f32(i32 149, i8 0) @@ -124,14 +122,14 @@ define void @"\01?MyIntersectionShader@@YAXXZ"() #1 { } ; Function Attrs: nounwind -define void @"\01?MyMissShader@@YAXURayPayload@@@Z"(%struct.RayPayload* noalias nocapture %payload) #1 !types !48 { +define void @"\01?MyMissShader@@YAXURayPayload@@@Z"(%struct.RayPayload* noalias nocapture %payload) #1 !pointeetys !48 { %1 = getelementptr inbounds %struct.RayPayload, %struct.RayPayload* %payload, i32 0, i32 0 store <4 x float> , <4 x float>* %1, align 4 ret void } ; Function Attrs: nounwind -declare !types !49 void @dx.op.traceRay.struct.RayPayload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.RayPayload*) #2 +declare !pointeetys !49 void @dx.op.traceRay.struct.RayPayload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.RayPayload*) #2 ; Function Attrs: nounwind declare void @dx.op.textureStore.f32(i32, %dx.types.Handle, i32, i32, i32, float, float, float, float, i8) #2 @@ -151,7 +149,7 @@ declare float @dx.op.rayTCurrent.f32(i32) #4 declare void @dx.op.acceptHitAndEndSearch(i32) #0 ; Function Attrs: nounwind -declare !types !50 i1 @dx.op.reportHit.struct.BuiltInTriangleIntersectionAttributes(i32, float, i32, %struct.BuiltInTriangleIntersectionAttributes*) #2 +declare !pointeetys !50 i1 @dx.op.reportHit.struct.BuiltInTriangleIntersectionAttributes(i32, float, i32, %struct.BuiltInTriangleIntersectionAttributes*) #2 ; Function Attrs: nounwind memory(none) declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #3 @@ -160,10 +158,10 @@ declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types. declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) #4 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) -declare !types !51 void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #5 +declare !pointeetys !51 void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #5 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) -declare !types !51 void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #5 +declare !pointeetys !51 void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #5 attributes #0 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="0" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="0" "unsafe-fp-math"="false" "use-soft-float"="false" } @@ -211,25 +209,24 @@ attributes #5 = { nocallback nofree nosync nounwind willreturn memory(argmem: re !28 = !{i32 8, i32 11, i32 6, i32 16, i32 5, !22} !29 = !{void ()* @"\01?MyRaygenShader@@YAXXZ", !"\01?MyRaygenShader@@YAXXZ", null, null, !30} !30 = !{i32 8, i32 7, i32 5, !22} -!31 = !{!"function", %struct.TraversalData poison, i64 poison, !32} +!31 = !{%struct.TraversalData poison} !32 = !{i32 0, %struct.TraversalData poison} -!33 = !{!"function", i32 poison, !34} +!33 = !{%struct.DispatchSystemData poison} !34 = !{i32 0, %struct.DispatchSystemData poison} -!35 = !{!"function", %struct.BuiltInTriangleIntersectionAttributes poison, !36} +!35 = !{%struct.SystemData poison} !36 = !{i32 0, %struct.SystemData poison} -!37 = !{!"function", !"void", !36, %struct.BuiltInTriangleIntersectionAttributes poison} -!38 = !{!"function", i1 poison, !32} -!39 = !{!"function", i32 poison, !36} -!40 = !{!"function", !"void", !34, i64 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison} -!41 = !{!"function", i1 poison, !32, float poison, i32 poison} +!37 = !{%struct.SystemData poison} +!38 = !{%struct.TraversalData poison} +!39 = !{%struct.SystemData poison} +!40 = !{%struct.DispatchSystemData poison} +!41 = !{%struct.TraversalData poison} !42 = !{!43, !43, i64 0} !43 = !{!"omnipotent char", !44, i64 0} !44 = !{!"Simple C/C++ TBAA"} -!45 = !{!"function", !"void", !46, !47} +!45 = !{null, %struct.RayPayload poison, %struct.BuiltInTriangleIntersectionAttributes poison} !46 = !{i32 0, %struct.RayPayload poison} !47 = !{i32 0, %struct.BuiltInTriangleIntersectionAttributes poison} -!48 = !{!"function", !"void", !46} -!49 = !{!"function", !"void", i32 poison, %dx.types.Handle poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, !46} -!50 = !{!"function", i1 poison, i32 poison, float poison, i32 poison, !47} -!51 = !{!"function", !"void", i64 poison, !52} -!52 = !{i32 0, i8 poison} +!48 = !{%struct.RayPayload poison} +!49 = !{%struct.RayPayload poison} +!50 = !{%struct.BuiltInTriangleIntersectionAttributes poison} +!51 = !{i8 poison} diff --git a/llvmraytracing/test/intrinsics/discard-values.ll b/llvmraytracing/test/intrinsics/discard-values.ll index eee9c4c7d9..4c8fea3732 100644 --- a/llvmraytracing/test/intrinsics/discard-values.ll +++ b/llvmraytracing/test/intrinsics/discard-values.ll @@ -8,9 +8,7 @@ declare float @_AmdGetUninitializedF32() declare i32 @_AmdGetUninitializedI32() declare %struct.AnyHitData @_AmdGetUninitializedStruct() -declare %struct.DispatchSystemData @_cont_SetupRayGen() - -declare !types !10 i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData*) +declare !pointeetys !10 i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData*) define float @discard_f32() { ; CHECK-LABEL: define float @discard_f32() { @@ -58,5 +56,4 @@ entry: !7 = !{i32 0} !8 = !{i32 0, i64 65536} !9 = !{i32 21} -!10 = !{!"function", i32 poison, !11} -!11 = !{i32 0, %struct.DispatchSystemData poison} +!10 = !{%struct.DispatchSystemData poison} diff --git a/llvmraytracing/test/intrinsics/get-func-addr-not-found.ll b/llvmraytracing/test/intrinsics/get-func-addr-not-found.ll index d9dc80eff3..e2ff1964d3 100644 --- a/llvmraytracing/test/intrinsics/get-func-addr-not-found.ll +++ b/llvmraytracing/test/intrinsics/get-func-addr-not-found.ll @@ -3,14 +3,15 @@ ; CHECK: ERROR: Did not find function '' requested by _AmdGetFuncAddr %struct.DispatchSystemData = type { i32 } +%struct.TraversalData = type { } declare i64 @_AmdGetFuncAddr() -declare %struct.DispatchSystemData @_cont_SetupRayGen() +declare !pointeetys !8 i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData*) -declare !types !8 i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData*) +declare !pointeetys !11 i1 @_cont_ReportHit(%struct.TraversalData* %data, float %t, i32 %hitKind) -define void @_cont_ExitRayGen(ptr nocapture readonly %data) alwaysinline nounwind !types !{!"function", !"void", !{i32 0, %struct.DispatchSystemData poison}} { +define void @_cont_ExitRayGen(ptr nocapture readonly %data) alwaysinline nounwind !pointeetys !8 { ret void } @@ -31,5 +32,7 @@ entry: !5 = !{i32 0} !6 = !{i32 0, i64 65536} !7 = !{i32 21} -!8 = !{!"function", i32 poison, !9} +!8 = !{%struct.DispatchSystemData poison} !9 = !{i32 0, %struct.DispatchSystemData poison} +!10 = !{i32 0, %struct.TraversalData poison} +!11 = !{%struct.TraversalData poison} diff --git a/llvmraytracing/test/intrinsics/get-func-addr.ll b/llvmraytracing/test/intrinsics/get-func-addr.ll index 1a4d83707a..2a7fc4294e 100644 --- a/llvmraytracing/test/intrinsics/get-func-addr.ll +++ b/llvmraytracing/test/intrinsics/get-func-addr.ll @@ -4,13 +4,13 @@ %struct.DispatchSystemData = type { i32 } declare i64 @_AmdGetFuncAddrMyFunc() -declare i32 @_AmdGetFuncAddrMyFunc2() -declare %struct.DispatchSystemData @_cont_SetupRayGen() +%struct.TraversalData = type { } -declare !types !8 i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData*) +declare !pointeetys !8 i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData*) +declare !pointeetys !12 i1 @_cont_ReportHit(%struct.TraversalData* %data, float %t, i32 %hitKind) -define void @_cont_ExitRayGen(ptr nocapture readonly %data) alwaysinline nounwind !types !{!"function", !"void", !{i32 0, %struct.DispatchSystemData poison}} { +define void @_cont_ExitRayGen(ptr nocapture readonly %data) alwaysinline nounwind !pointeetys !8 { ret void } @@ -19,20 +19,17 @@ define { i64, i32 } @main() !lgc.rt.shaderstage !10 { ; CHECK-SAME: (i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META5:![0-9]+]] !continuation.entry [[META11:![0-9]+]] !continuation.registercount [[META5]] !continuation [[META12:![0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 +; CHECK-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [0 x i32], align 4 ; CHECK-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; CHECK-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) ; CHECK-NEXT: [[TMP1:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @MyFunc) -; CHECK-NEXT: [[TMP2:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference__i32(ptr @MyFunc2) ; CHECK-NEXT: [[V0:%.*]] = insertvalue { i64, i32 } undef, i64 [[TMP1]], 0 -; CHECK-NEXT: [[V1:%.*]] = insertvalue { i64, i32 } undef, i32 [[TMP2]], 1 ; CHECK-NEXT: ret void ; entry: %val = call i64 @_AmdGetFuncAddrMyFunc() - %val2 = call i32 @_AmdGetFuncAddrMyFunc2() %v0 = insertvalue { i64, i32 } undef, i64 %val, 0 - %v1 = insertvalue { i64, i32 } undef, i32 %val2, 1 - ret { i64, i32 } %v1 + ret { i64, i32 } %v0 } define i32 @MyFunc() { @@ -42,13 +39,6 @@ define i32 @MyFunc() { ret i32 5 } -define i32 @MyFunc2() { -; CHECK-LABEL: define i32 @MyFunc2() { -; CHECK-NEXT: ret i32 5 -; - ret i32 5 -} - !dx.entryPoints = !{!0, !3} !continuation.stackAddrspace = !{!7} @@ -60,6 +50,9 @@ define i32 @MyFunc2() { !5 = !{i32 0} !6 = !{i32 0, i64 65536} !7 = !{i32 21} -!8 = !{!"function", i32 poison, !9} +!8 = !{%struct.DispatchSystemData poison} !9 = !{i32 0, %struct.DispatchSystemData poison} !10 = !{i32 0} +!11 = !{i32 0, %struct.TraversalData poison} +!12 = !{%struct.TraversalData poison} + diff --git a/llvmraytracing/test/intrinsics/shader-start.ll b/llvmraytracing/test/intrinsics/shader-start.ll index cb0aef05a0..d2f50b0b25 100644 --- a/llvmraytracing/test/intrinsics/shader-start.ll +++ b/llvmraytracing/test/intrinsics/shader-start.ll @@ -2,29 +2,35 @@ ; RUN: opt --verify-each -passes='lower-raytracing-pipeline,lint' -S %s --lint-abort-on-error | FileCheck %s %struct.DispatchSystemData = type { i32 } +%struct.SystemData = type { %struct.DispatchSystemData } %struct.HitData = type { float, i32 } +%struct.BuiltInTriangleIntersectionAttributes = type { <2 x float> } -declare !types !8 i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData*) -declare !types !13 i1 @_cont_ReportHit(%struct.DispatchSystemData* %data, float %t, i32 %hitKind) +declare !pointeetys !8 i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData*) +declare !pointeetys !13 i1 @_cont_ReportHit(%struct.DispatchSystemData* %data, float %t, i32 %hitKind) +declare !pointeetys !15 %struct.BuiltInTriangleIntersectionAttributes @_cont_GetTriangleHitAttributes(%struct.SystemData*) #0 define void @main() !lgc.rt.shaderstage !10 { ; CHECK-LABEL: define %struct.DispatchSystemData @main( -; CHECK-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]]) !lgc.rt.shaderstage [[META5:![0-9]+]] !continuation.registercount [[META0:![0-9]+]] !continuation [[META6:![0-9]+]] { +; CHECK-SAME: i64 [[RETURNADDR:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[TMP0:%.*]], [8 x i32] [[PADDING:%.*]], [30 x i32] [[PAYLOAD:%.*]]) !lgc.rt.shaderstage [[META5:![0-9]+]] !continuation.registercount [[META0:![0-9]+]] !continuation [[META6:![0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 +; CHECK-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [30 x i32], align 4 +; CHECK-NEXT: store [30 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; CHECK-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; CHECK-NEXT: store i32 123, ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; CHECK-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP1]]), !continuation.registercount [[META0]] +; CHECK-NEXT: [[TMP2:%.*]] = load [30 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; CHECK-NEXT: call void (...) @lgc.ilcps.return(i64 [[RETURNADDR]], [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP1]], [8 x i32] poison, [30 x i32] [[TMP2]]), !continuation.registercount [[META0]] ; CHECK-NEXT: unreachable ; entry: ret void } -define void @_cont_ShaderStart(%struct.DispatchSystemData* %data) !types !11 { +define void @_cont_ShaderStart(%struct.DispatchSystemData* %data) !pointeetys !11 { ; CHECK-LABEL: define void @_cont_ShaderStart( -; CHECK-SAME: ptr [[DATA:%.*]]) !types [[META7:![0-9]+]] { +; CHECK-SAME: ptr [[DATA:%.*]]) !pointeetys [[META3:![0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_DISPATCHSYSTEMDATA:%.*]], ptr [[DATA]], i32 0, i32 0 ; CHECK-NEXT: store i32 123, ptr [[TMP0]], align 4 @@ -44,16 +50,17 @@ entry: !5 = !{i32 0} !6 = !{i32 0, i64 65536} !7 = !{i32 21} -!8 = !{!"function", i32 poison, !9} +!8 = !{%struct.DispatchSystemData poison} !9 = !{i32 0, %struct.DispatchSystemData poison} !10 = !{i32 1} -!11 = !{!"function", !"void", !9} +!11 = !{%struct.DispatchSystemData poison} !12 = !{i32 0, %struct.DispatchSystemData poison} -!13 = !{!"function", <3 x i32> poison, !12} +!13 = !{%struct.DispatchSystemData poison} +!14 = !{i32 0, %struct.SystemData poison} +!15 = !{%struct.SystemData poison} ;. ; CHECK: [[META0]] = !{i32 30} -; CHECK: [[META4:![0-9]+]] = !{i32 0, %struct.DispatchSystemData poison} +; CHECK: [[META3]] = !{%struct.DispatchSystemData poison} ; CHECK: [[META5]] = !{i32 1} ; CHECK: [[META6]] = !{ptr @main} -; CHECK: [[META7]] = !{!"function", !"void", [[META4]]} ;. diff --git a/llvmraytracing/test/lgccps/call-shader-i1-payload.ll b/llvmraytracing/test/lgccps/call-shader-i1-payload.ll index e21043d994..6af154dc4d 100644 --- a/llvmraytracing/test/lgccps/call-shader-i1-payload.ll +++ b/llvmraytracing/test/lgccps/call-shader-i1-payload.ll @@ -12,7 +12,7 @@ %struct.AnyHitTraversalData = type { { { i32, i32 }, { i64, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i32, i64 } }, { float, i32, i32, i32, i32 } } ; Need _cont_ReportHit to get anyhit traversal system data type -declare !types !8 i1 @_cont_ReportHit(%struct.AnyHitTraversalData* %data, float %t, i32 %hitKind) +declare !pointeetys !8 i1 @_cont_ReportHit(%struct.AnyHitTraversalData* %data, float %t, i32 %hitKind) ; Function Attrs: alwaysinline declare %struct.DispatchSystemData @_AmdAwaitTraversal(i64, %struct.TraversalData) #0 @@ -21,19 +21,19 @@ declare %struct.DispatchSystemData @_AmdAwaitTraversal(i64, %struct.TraversalDat declare %struct.DispatchSystemData @_AmdAwaitShader(i64, %struct.DispatchSystemData) #0 ; Function Attrs: alwaysinline -define i32 @_cont_GetLocalRootIndex(ptr %data) #0 !types !1 { +define i32 @_cont_GetLocalRootIndex(ptr %data) #0 !pointeetys !1 { ret i32 5 } ; Function Attrs: alwaysinline -define void @_cont_CallShader(ptr %data, i32 %0) #0 !types !2 { +define void @_cont_CallShader(ptr %data, i32 %0) #0 !pointeetys !2 { %dis_data = load %struct.DispatchSystemData, ptr %data, align 4 %newdata = call %struct.DispatchSystemData @_AmdAwaitShader(i64 2, %struct.DispatchSystemData %dis_data) store %struct.DispatchSystemData %newdata, ptr %data, align 4 ret void } -define void @called(ptr %params) !types !3 !cont.payload.type !4 !lgc.rt.shaderstage !5 { +define void @called(ptr %params) !pointeetys !3 !cont.payload.type !4 !lgc.rt.shaderstage !5 { call void (...) @lgc.rt.call.callable.shader(i32 2, ptr %params, i32 4), !cont.payload.type !4 ret void } @@ -47,9 +47,9 @@ attributes #1 = { nounwind willreturn memory(argmem: readwrite, inaccessiblemem: !lgc.cps.module = !{} !0 = !{i32 0, %struct.DispatchSystemData poison} -!1 = !{!"function", i32 poison, !0} -!2 = !{!"function", !"void", !0, i32 poison} -!3 = !{!"function", !"void", !6} +!1 = !{%struct.DispatchSystemData poison} +!2 = !{%struct.DispatchSystemData poison} +!3 = !{%struct.MyParams poison} !4 = !{%struct.MyParams poison} !5 = !{i32 5} !6 = !{i32 0, %struct.MyParams poison} @@ -57,107 +57,107 @@ attributes #1 = { nounwind willreturn memory(argmem: readwrite, inaccessiblemem: !8 = !{!"function", i1 poison, !7, float poison, i32 poison} ; LOWER-RAYTRACING-PIPELINE-LABEL: define i32 @_cont_GetLocalRootIndex( -; LOWER-RAYTRACING-PIPELINE-SAME: ptr [[DATA:%.*]]) #[[ATTR0:[0-9]+]] !types [[META3:![0-9]+]] { +; LOWER-RAYTRACING-PIPELINE-SAME: ptr [[DATA:%.*]]) #[[ATTR0:[0-9]+]] !pointeetys [[META3:![0-9]+]] { ; LOWER-RAYTRACING-PIPELINE-NEXT: ret i32 5 ; ; ; LOWER-RAYTRACING-PIPELINE-LABEL: define void @called( -; LOWER-RAYTRACING-PIPELINE-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURNADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [41 x i32] [[PADDING:%.*]], [2 x i32] [[PAYLOAD:%.*]]) !lgc.rt.shaderstage [[META5:![0-9]+]] !lgc.cps [[META1:![0-9]+]] !continuation [[META6:![0-9]+]] { +; LOWER-RAYTRACING-PIPELINE-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURNADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [8 x i32] [[PADDING:%.*]], [2 x i32] [[PAYLOAD:%.*]]) !lgc.rt.shaderstage [[META4:![0-9]+]] !lgc.cps [[META1:![0-9]+]] !continuation [[META5:![0-9]+]] { ; LOWER-RAYTRACING-PIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 -; LOWER-RAYTRACING-PIPELINE-NEXT: [[PAYLOAD_ALLOCA:%.*]] = alloca [30 x i32], align 4 +; LOWER-RAYTRACING-PIPELINE-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA:%.*]] = alloca [2 x i32], align 4 ; LOWER-RAYTRACING-PIPELINE-NEXT: [[TMP1:%.*]] = alloca [[STRUCT_MYPARAMS:%.*]], align 8 -; LOWER-RAYTRACING-PIPELINE-NEXT: store [2 x i32] [[PAYLOAD]], ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWER-RAYTRACING-PIPELINE-NEXT: store [2 x i32] [[PAYLOAD]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWER-RAYTRACING-PIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[SYSTEM_DATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWER-RAYTRACING-PIPELINE-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_MYPARAMS]], ptr [[TMP1]], i32 0 -; LOWER-RAYTRACING-PIPELINE-NEXT: [[TMP3:%.*]] = load i32, ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWER-RAYTRACING-PIPELINE-NEXT: [[TMP3:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWER-RAYTRACING-PIPELINE-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 ; LOWER-RAYTRACING-PIPELINE-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 1 -; LOWER-RAYTRACING-PIPELINE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_ALLOCA]], i32 1 +; LOWER-RAYTRACING-PIPELINE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 1 ; LOWER-RAYTRACING-PIPELINE-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 ; LOWER-RAYTRACING-PIPELINE-NEXT: store i32 [[TMP6]], ptr [[TMP4]], align 4 ; LOWER-RAYTRACING-PIPELINE-NEXT: [[DIS_DATA_I:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWER-RAYTRACING-PIPELINE-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_MYPARAMS]], ptr [[TMP1]], i32 0 ; LOWER-RAYTRACING-PIPELINE-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -; LOWER-RAYTRACING-PIPELINE-NEXT: store i32 [[TMP8]], ptr [[PAYLOAD_ALLOCA]], align 4 -; LOWER-RAYTRACING-PIPELINE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_ALLOCA]], i32 1 +; LOWER-RAYTRACING-PIPELINE-NEXT: store i32 [[TMP8]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWER-RAYTRACING-PIPELINE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 1 ; LOWER-RAYTRACING-PIPELINE-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 1 ; LOWER-RAYTRACING-PIPELINE-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP23]], align 4 ; LOWER-RAYTRACING-PIPELINE-NEXT: store i32 [[TMP11]], ptr [[TMP10]], align 4 -; LOWER-RAYTRACING-PIPELINE-NEXT: [[TMP12:%.*]] = load [2 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 -; LOWER-RAYTRACING-PIPELINE-NEXT: [[TMP13:%.*]] = call { [[STRUCT_DISPATCHSYSTEMDATA]], [41 x i32], [2 x i32] } (...) @lgc.cps.await__sl_s_struct.DispatchSystemDatasa41i32a2i32s(i32 2, i32 4, i32 5, [42 x i32] poison, [2 x i32] [[TMP12]]), !continuation.registercount [[META1]], !continuation.returnedRegistercount [[META1]] -; LOWER-RAYTRACING-PIPELINE-NEXT: [[TMP14:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [41 x i32], [2 x i32] } [[TMP13]], 2 -; LOWER-RAYTRACING-PIPELINE-NEXT: store [2 x i32] [[TMP14]], ptr [[PAYLOAD_ALLOCA]], align 4 -; LOWER-RAYTRACING-PIPELINE-NEXT: [[TMP15:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [41 x i32], [2 x i32] } [[TMP13]], 0 +; LOWER-RAYTRACING-PIPELINE-NEXT: [[TMP12:%.*]] = load [2 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWER-RAYTRACING-PIPELINE-NEXT: [[TMP13:%.*]] = call { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [2 x i32] } (...) @lgc.cps.await__sl_s_struct.DispatchSystemDatasa8i32a2i32s(i32 2, i32 4, i32 5, [9 x i32] poison, [2 x i32] [[TMP12]]), !continuation.registercount [[META1]], !continuation.returnedRegistercount [[META1]] +; LOWER-RAYTRACING-PIPELINE-NEXT: [[TMP14:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [2 x i32] } [[TMP13]], 2 +; LOWER-RAYTRACING-PIPELINE-NEXT: store [2 x i32] [[TMP14]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWER-RAYTRACING-PIPELINE-NEXT: store [[STRUCT_MYPARAMS]] poison, ptr [[TMP1]], align 4 ; LOWER-RAYTRACING-PIPELINE-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_MYPARAMS]], ptr [[TMP1]], i32 0 -; LOWER-RAYTRACING-PIPELINE-NEXT: [[TMP17:%.*]] = load i32, ptr [[PAYLOAD_ALLOCA]], align 4 +; LOWER-RAYTRACING-PIPELINE-NEXT: [[TMP17:%.*]] = load i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 ; LOWER-RAYTRACING-PIPELINE-NEXT: store i32 [[TMP17]], ptr [[TMP16]], align 4 ; LOWER-RAYTRACING-PIPELINE-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 1 -; LOWER-RAYTRACING-PIPELINE-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_ALLOCA]], i32 1 +; LOWER-RAYTRACING-PIPELINE-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 1 ; LOWER-RAYTRACING-PIPELINE-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 ; LOWER-RAYTRACING-PIPELINE-NEXT: store i32 [[TMP20]], ptr [[TMP18]], align 4 +; LOWER-RAYTRACING-PIPELINE-NEXT: [[TMP15:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [2 x i32] } [[TMP13]], 0 ; LOWER-RAYTRACING-PIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP15]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWER-RAYTRACING-PIPELINE-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_MYPARAMS]], ptr [[TMP1]], i32 0 ; LOWER-RAYTRACING-PIPELINE-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -; LOWER-RAYTRACING-PIPELINE-NEXT: store i32 [[TMP22]], ptr [[PAYLOAD_ALLOCA]], align 4 -; LOWER-RAYTRACING-PIPELINE-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_ALLOCA]], i32 1 +; LOWER-RAYTRACING-PIPELINE-NEXT: store i32 [[TMP22]], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWER-RAYTRACING-PIPELINE-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], i32 1 ; LOWER-RAYTRACING-PIPELINE-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i32 1 ; LOWER-RAYTRACING-PIPELINE-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP28]], align 4 ; LOWER-RAYTRACING-PIPELINE-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 ; LOWER-RAYTRACING-PIPELINE-NEXT: [[TMP26:%.*]] = load [[STRUCT_DISPATCHSYSTEMDATA]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 -; LOWER-RAYTRACING-PIPELINE-NEXT: [[TMP27:%.*]] = load [2 x i32], ptr [[PAYLOAD_ALLOCA]], align 4 -; LOWER-RAYTRACING-PIPELINE-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 6, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP26]], [41 x i32] poison, [2 x i32] [[TMP27]]), !continuation.registercount [[META1]] +; LOWER-RAYTRACING-PIPELINE-NEXT: [[TMP27:%.*]] = load [2 x i32], ptr [[PAYLOAD_SERIALIZATION_ALLOCA]], align 4 +; LOWER-RAYTRACING-PIPELINE-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 6, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP26]], [8 x i32] poison, [2 x i32] [[TMP27]]), !continuation.registercount [[META1]] ; LOWER-RAYTRACING-PIPELINE-NEXT: unreachable ; ; ; SROA-LABEL: define i32 @_cont_GetLocalRootIndex( -; SROA-SAME: ptr [[DATA:%.*]]) #[[ATTR0:[0-9]+]] !types [[META3:![0-9]+]] { +; SROA-SAME: ptr [[DATA:%.*]]) #[[ATTR0:[0-9]+]] !pointeetys [[META3:![0-9]+]] { ; SROA-NEXT: ret i32 5 ; ; ; SROA-LABEL: define void @called( -; SROA-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURNADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [41 x i32] [[PADDING:%.*]], [2 x i32] [[PAYLOAD:%.*]]) !lgc.rt.shaderstage [[META5:![0-9]+]] !lgc.cps [[META1:![0-9]+]] !continuation [[META6:![0-9]+]] { +; SROA-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURNADDR:%.*]], i32 [[SHADER_INDEX:%.*]], [[STRUCT_DISPATCHSYSTEMDATA:%.*]] [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [8 x i32] [[PADDING:%.*]], [2 x i32] [[PAYLOAD:%.*]]) !lgc.rt.shaderstage [[META4:![0-9]+]] !lgc.cps [[META1:![0-9]+]] !continuation [[META5:![0-9]+]] { ; SROA-NEXT: [[DOTSROA_5:%.*]] = alloca i8, align 4 ; SROA-NEXT: [[PAYLOAD_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i32] [[PAYLOAD]], 0 ; SROA-NEXT: [[PAYLOAD_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i32] [[PAYLOAD]], 1 -; SROA-NEXT: [[PAYLOAD_ALLOCA_SROA_8_4_EXTRACT_TRUNC:%.*]] = trunc i32 [[PAYLOAD_FCA_1_EXTRACT]] to i8 -; SROA-NEXT: [[PAYLOAD_ALLOCA_SROA_16_4_EXTRACT_SHIFT:%.*]] = lshr i32 [[PAYLOAD_FCA_1_EXTRACT]], 8 -; SROA-NEXT: [[PAYLOAD_ALLOCA_SROA_16_4_EXTRACT_TRUNC:%.*]] = trunc i32 [[PAYLOAD_ALLOCA_SROA_16_4_EXTRACT_SHIFT]] to i24 +; SROA-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA_SROA_8_4_EXTRACT_TRUNC:%.*]] = trunc i32 [[PAYLOAD_FCA_1_EXTRACT]] to i8 +; SROA-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA_SROA_16_4_EXTRACT_SHIFT:%.*]] = lshr i32 [[PAYLOAD_FCA_1_EXTRACT]], 8 +; SROA-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA_SROA_16_4_EXTRACT_TRUNC:%.*]] = trunc i32 [[PAYLOAD_SERIALIZATION_ALLOCA_SROA_16_4_EXTRACT_SHIFT]] to i24 ; SROA-NEXT: [[SYSTEM_DATA_FCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[SYSTEM_DATA]], 0 -; SROA-NEXT: store i8 [[PAYLOAD_ALLOCA_SROA_8_4_EXTRACT_TRUNC]], ptr [[DOTSROA_5]], align 4 +; SROA-NEXT: store i8 [[PAYLOAD_SERIALIZATION_ALLOCA_SROA_8_4_EXTRACT_TRUNC]], ptr [[DOTSROA_5]], align 4 ; SROA-NEXT: [[DIS_DATA_I_FCA_0_INSERT:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[SYSTEM_DATA_FCA_0_EXTRACT]], 0 ; SROA-NEXT: [[DOTSROA_5_0__SROA_5_4_2:%.*]] = load i8, ptr [[DOTSROA_5]], align 4 ; SROA-NEXT: [[DOTFCA_0_INSERT5:%.*]] = insertvalue [2 x i32] poison, i32 [[PAYLOAD_FCA_0_EXTRACT]], 0 -; SROA-NEXT: [[PAYLOAD_ALLOCA_SROA_16_4_INSERT_EXT19:%.*]] = zext i24 [[PAYLOAD_ALLOCA_SROA_16_4_EXTRACT_TRUNC]] to i32 -; SROA-NEXT: [[PAYLOAD_ALLOCA_SROA_16_4_INSERT_SHIFT20:%.*]] = shl i32 [[PAYLOAD_ALLOCA_SROA_16_4_INSERT_EXT19]], 8 -; SROA-NEXT: [[PAYLOAD_ALLOCA_SROA_16_4_INSERT_MASK21:%.*]] = and i32 undef, 255 -; SROA-NEXT: [[PAYLOAD_ALLOCA_SROA_16_4_INSERT_INSERT22:%.*]] = or i32 [[PAYLOAD_ALLOCA_SROA_16_4_INSERT_MASK21]], [[PAYLOAD_ALLOCA_SROA_16_4_INSERT_SHIFT20]] -; SROA-NEXT: [[PAYLOAD_ALLOCA_SROA_8_4_INSERT_EXT15:%.*]] = zext i8 [[DOTSROA_5_0__SROA_5_4_2]] to i32 -; SROA-NEXT: [[PAYLOAD_ALLOCA_SROA_8_4_INSERT_MASK16:%.*]] = and i32 [[PAYLOAD_ALLOCA_SROA_16_4_INSERT_INSERT22]], -256 -; SROA-NEXT: [[PAYLOAD_ALLOCA_SROA_8_4_INSERT_INSERT17:%.*]] = or i32 [[PAYLOAD_ALLOCA_SROA_8_4_INSERT_MASK16]], [[PAYLOAD_ALLOCA_SROA_8_4_INSERT_EXT15]] -; SROA-NEXT: [[DOTFCA_1_INSERT8:%.*]] = insertvalue [2 x i32] [[DOTFCA_0_INSERT5]], i32 [[PAYLOAD_ALLOCA_SROA_8_4_INSERT_INSERT17]], 1 -; SROA-NEXT: [[TMP1:%.*]] = call { [[STRUCT_DISPATCHSYSTEMDATA]], [41 x i32], [2 x i32] } (...) @lgc.cps.await__sl_s_struct.DispatchSystemDatasa41i32a2i32s(i32 2, i32 4, i32 5, [42 x i32] poison, [2 x i32] [[DOTFCA_1_INSERT8]]), !continuation.registercount [[META1]], !continuation.returnedRegistercount [[META1]] -; SROA-NEXT: [[TMP2:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [41 x i32], [2 x i32] } [[TMP1]], 2 +; SROA-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA_SROA_16_4_INSERT_EXT19:%.*]] = zext i24 [[PAYLOAD_SERIALIZATION_ALLOCA_SROA_16_4_EXTRACT_TRUNC]] to i32 +; SROA-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA_SROA_16_4_INSERT_SHIFT20:%.*]] = shl i32 [[PAYLOAD_SERIALIZATION_ALLOCA_SROA_16_4_INSERT_EXT19]], 8 +; SROA-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA_SROA_16_4_INSERT_MASK21:%.*]] = and i32 undef, 255 +; SROA-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA_SROA_16_4_INSERT_INSERT22:%.*]] = or i32 [[PAYLOAD_SERIALIZATION_ALLOCA_SROA_16_4_INSERT_MASK21]], [[PAYLOAD_SERIALIZATION_ALLOCA_SROA_16_4_INSERT_SHIFT20]] +; SROA-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA_SROA_8_4_INSERT_EXT15:%.*]] = zext i8 [[DOTSROA_5_0__SROA_5_4_2]] to i32 +; SROA-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA_SROA_8_4_INSERT_MASK16:%.*]] = and i32 [[PAYLOAD_SERIALIZATION_ALLOCA_SROA_16_4_INSERT_INSERT22]], -256 +; SROA-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA_SROA_8_4_INSERT_INSERT17:%.*]] = or i32 [[PAYLOAD_SERIALIZATION_ALLOCA_SROA_8_4_INSERT_MASK16]], [[PAYLOAD_SERIALIZATION_ALLOCA_SROA_8_4_INSERT_EXT15]] +; SROA-NEXT: [[DOTFCA_1_INSERT8:%.*]] = insertvalue [2 x i32] [[DOTFCA_0_INSERT5]], i32 [[PAYLOAD_SERIALIZATION_ALLOCA_SROA_8_4_INSERT_INSERT17]], 1 +; SROA-NEXT: [[TMP1:%.*]] = call { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [2 x i32] } (...) @lgc.cps.await__sl_s_struct.DispatchSystemDatasa8i32a2i32s(i32 2, i32 4, i32 5, [9 x i32] poison, [2 x i32] [[DOTFCA_1_INSERT8]]), !continuation.registercount [[META1]], !continuation.returnedRegistercount [[META1]] +; SROA-NEXT: [[TMP2:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [2 x i32] } [[TMP1]], 2 ; SROA-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x i32] [[TMP2]], 0 ; SROA-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x i32] [[TMP2]], 1 -; SROA-NEXT: [[PAYLOAD_ALLOCA_SROA_8_4_EXTRACT_TRUNC18:%.*]] = trunc i32 [[DOTFCA_1_EXTRACT]] to i8 -; SROA-NEXT: [[PAYLOAD_ALLOCA_SROA_16_4_EXTRACT_SHIFT23:%.*]] = lshr i32 [[DOTFCA_1_EXTRACT]], 8 -; SROA-NEXT: [[PAYLOAD_ALLOCA_SROA_16_4_EXTRACT_TRUNC24:%.*]] = trunc i32 [[PAYLOAD_ALLOCA_SROA_16_4_EXTRACT_SHIFT23]] to i24 -; SROA-NEXT: [[TMP3:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [41 x i32], [2 x i32] } [[TMP1]], 0 +; SROA-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA_SROA_8_4_EXTRACT_TRUNC18:%.*]] = trunc i32 [[DOTFCA_1_EXTRACT]] to i8 +; SROA-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA_SROA_16_4_EXTRACT_SHIFT23:%.*]] = lshr i32 [[DOTFCA_1_EXTRACT]], 8 +; SROA-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA_SROA_16_4_EXTRACT_TRUNC24:%.*]] = trunc i32 [[PAYLOAD_SERIALIZATION_ALLOCA_SROA_16_4_EXTRACT_SHIFT23]] to i24 ; SROA-NEXT: store i1 poison, ptr [[DOTSROA_5]], align 4 -; SROA-NEXT: store i8 [[PAYLOAD_ALLOCA_SROA_8_4_EXTRACT_TRUNC18]], ptr [[DOTSROA_5]], align 4 +; SROA-NEXT: store i8 [[PAYLOAD_SERIALIZATION_ALLOCA_SROA_8_4_EXTRACT_TRUNC18]], ptr [[DOTSROA_5]], align 4 +; SROA-NEXT: [[TMP3:%.*]] = extractvalue { [[STRUCT_DISPATCHSYSTEMDATA]], [8 x i32], [2 x i32] } [[TMP1]], 0 ; SROA-NEXT: [[DOTFCA_0_EXTRACT27:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP3]], 0 ; SROA-NEXT: [[DOTSROA_5_0__SROA_5_4_:%.*]] = load i8, ptr [[DOTSROA_5]], align 4 ; SROA-NEXT: [[DOTFCA_0_INSERT26:%.*]] = insertvalue [[STRUCT_DISPATCHSYSTEMDATA]] poison, i32 [[DOTFCA_0_EXTRACT27]], 0 ; SROA-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [2 x i32] poison, i32 [[DOTFCA_0_EXTRACT]], 0 -; SROA-NEXT: [[PAYLOAD_ALLOCA_SROA_16_4_INSERT_EXT:%.*]] = zext i24 [[PAYLOAD_ALLOCA_SROA_16_4_EXTRACT_TRUNC24]] to i32 -; SROA-NEXT: [[PAYLOAD_ALLOCA_SROA_16_4_INSERT_SHIFT:%.*]] = shl i32 [[PAYLOAD_ALLOCA_SROA_16_4_INSERT_EXT]], 8 -; SROA-NEXT: [[PAYLOAD_ALLOCA_SROA_16_4_INSERT_MASK:%.*]] = and i32 undef, 255 -; SROA-NEXT: [[PAYLOAD_ALLOCA_SROA_16_4_INSERT_INSERT:%.*]] = or i32 [[PAYLOAD_ALLOCA_SROA_16_4_INSERT_MASK]], [[PAYLOAD_ALLOCA_SROA_16_4_INSERT_SHIFT]] -; SROA-NEXT: [[PAYLOAD_ALLOCA_SROA_8_4_INSERT_EXT:%.*]] = zext i8 [[DOTSROA_5_0__SROA_5_4_]] to i32 -; SROA-NEXT: [[PAYLOAD_ALLOCA_SROA_8_4_INSERT_MASK:%.*]] = and i32 [[PAYLOAD_ALLOCA_SROA_16_4_INSERT_INSERT]], -256 -; SROA-NEXT: [[PAYLOAD_ALLOCA_SROA_8_4_INSERT_INSERT:%.*]] = or i32 [[PAYLOAD_ALLOCA_SROA_8_4_INSERT_MASK]], [[PAYLOAD_ALLOCA_SROA_8_4_INSERT_EXT]] -; SROA-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[DOTFCA_0_INSERT]], i32 [[PAYLOAD_ALLOCA_SROA_8_4_INSERT_INSERT]], 1 -; SROA-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 6, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT26]], [41 x i32] poison, [2 x i32] [[DOTFCA_1_INSERT]]), !continuation.registercount [[META1]] +; SROA-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA_SROA_16_4_INSERT_EXT:%.*]] = zext i24 [[PAYLOAD_SERIALIZATION_ALLOCA_SROA_16_4_EXTRACT_TRUNC24]] to i32 +; SROA-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA_SROA_16_4_INSERT_SHIFT:%.*]] = shl i32 [[PAYLOAD_SERIALIZATION_ALLOCA_SROA_16_4_INSERT_EXT]], 8 +; SROA-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA_SROA_16_4_INSERT_MASK:%.*]] = and i32 undef, 255 +; SROA-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA_SROA_16_4_INSERT_INSERT:%.*]] = or i32 [[PAYLOAD_SERIALIZATION_ALLOCA_SROA_16_4_INSERT_MASK]], [[PAYLOAD_SERIALIZATION_ALLOCA_SROA_16_4_INSERT_SHIFT]] +; SROA-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA_SROA_8_4_INSERT_EXT:%.*]] = zext i8 [[DOTSROA_5_0__SROA_5_4_]] to i32 +; SROA-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA_SROA_8_4_INSERT_MASK:%.*]] = and i32 [[PAYLOAD_SERIALIZATION_ALLOCA_SROA_16_4_INSERT_INSERT]], -256 +; SROA-NEXT: [[PAYLOAD_SERIALIZATION_ALLOCA_SROA_8_4_INSERT_INSERT:%.*]] = or i32 [[PAYLOAD_SERIALIZATION_ALLOCA_SROA_8_4_INSERT_MASK]], [[PAYLOAD_SERIALIZATION_ALLOCA_SROA_8_4_INSERT_EXT]] +; SROA-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[DOTFCA_0_INSERT]], i32 [[PAYLOAD_SERIALIZATION_ALLOCA_SROA_8_4_INSERT_INSERT]], 1 +; SROA-NEXT: call void (...) @lgc.cps.jump(i32 [[RETURNADDR]], i32 6, {} poison, i32 poison, i32 poison, [[STRUCT_DISPATCHSYSTEMDATA]] [[DOTFCA_0_INSERT26]], [8 x i32] poison, [2 x i32] [[DOTFCA_1_INSERT]]), !continuation.registercount [[META1]] ; SROA-NEXT: unreachable ; diff --git a/llvmraytracing/test/lgccps/lower-traversal.ll b/llvmraytracing/test/lgccps/lower-traversal.ll index 395bf3bf1c..0614d85f58 100644 --- a/llvmraytracing/test/lgccps/lower-traversal.ll +++ b/llvmraytracing/test/lgccps/lower-traversal.ll @@ -7,15 +7,15 @@ %struct.DispatchSystemData = type { i32 } ; Need _cont_ReportHit to get system data type -declare !types !6 i1 @_cont_ReportHit(%struct.AnyHitTraversalData* %data, float %t, i32 %hitKind) +declare !pointeetys !6 i1 @_cont_ReportHit(%struct.AnyHitTraversalData* %data, float %t, i32 %hitKind) -declare !types !10 i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData*) +declare !pointeetys !10 i32 @_cont_GetLocalRootIndex(%struct.DispatchSystemData*) declare i64 @_AmdGetCurrentFuncAddr() -define dso_local spir_func { { float, i32, i32, i32, i32 }, <2 x float>, i32 } @_cont_Traversal(ptr addrspace(5) %0) local_unnamed_addr !lgc.shaderstage !0 !types !1 !lgc.rt.shaderstage !3 { +define dso_local spir_func { { float, i32, i32, i32, i32 }, <2 x float>, i32 } @_cont_Traversal(ptr addrspace(5) %0) local_unnamed_addr !lgc.shaderstage !0 !pointeetys !1 !lgc.rt.shaderstage !3 { ; CHECK-ATTRSIZE-16-LABEL: define dso_local spir_func void @_cont_Traversal( -; CHECK-ATTRSIZE-16-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURNADDR:%.*]], i32 [[SHADER_INDEX:%.*]], { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [7 x i32] [[PADDING:%.*]], [8 x i32] [[PAYLOAD:%.*]]) local_unnamed_addr !lgc.shaderstage [[META6:![0-9]+]] !lgc.rt.shaderstage [[META7:![0-9]+]] !lgc.cps [[META8:![0-9]+]] !continuation [[META9:![0-9]+]] { +; CHECK-ATTRSIZE-16-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURNADDR:%.*]], i32 [[SHADER_INDEX:%.*]], { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [7 x i32] [[PADDING:%.*]], [8 x i32] [[PAYLOAD:%.*]]) local_unnamed_addr !lgc.shaderstage [[META5:![0-9]+]] !lgc.rt.shaderstage [[META6:![0-9]+]] !lgc.cps [[META7:![0-9]+]] !continuation [[META8:![0-9]+]] { ; CHECK-ATTRSIZE-16-NEXT: .entry: ; CHECK-ATTRSIZE-16-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } }, align 16, addrspace(5) ; CHECK-ATTRSIZE-16-NEXT: store { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[SYSTEM_DATA]], ptr addrspace(5) [[SYSTEM_DATA_ALLOCA]], align 16 @@ -95,7 +95,7 @@ define dso_local spir_func { { float, i32, i32, i32, i32 }, <2 x float>, i32 } @ ; CHECK-ATTRSIZE-16-NEXT: [[DOT0:%.*]] = phi i32 [ [[TMP57]], [[TMP56]] ], [ undef, [[TMP46]] ] ; CHECK-ATTRSIZE-16-NEXT: [[DOTSROA_0128_0_EXTRACT_TRUNC:%.*]] = phi i32 [ [[TMP65]], [[TMP56]] ], [ 0, [[TMP46]] ] ; CHECK-ATTRSIZE-16-NEXT: [[DOTNOT542:%.*]] = icmp eq i32 [[DOTSROA_0128_0_EXTRACT_TRUNC]], 0 -; CHECK-ATTRSIZE-16-NEXT: br i1 [[DOTNOT542]], label [[TMP106:%.*]], label [[TMP66:%.*]] +; CHECK-ATTRSIZE-16-NEXT: br i1 [[DOTNOT542]], label [[TMP107:%.*]], label [[TMP66:%.*]] ; CHECK-ATTRSIZE-16: 66: ; CHECK-ATTRSIZE-16-NEXT: [[DOTSROA_0130_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[TMP44]] to i32 ; CHECK-ATTRSIZE-16-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } poison, <3 x i32> [[TMP2]], 0, 0 @@ -169,10 +169,11 @@ define dso_local spir_func { { float, i32, i32, i32, i32 }, <2 x float>, i32 } @ ; CHECK-ATTRSIZE-16-NEXT: [[DOTSROA_0150_0_VEC_EXTRACT:%.*]] = extractelement <2 x i32> [[DOTSROA_0501_0]], i64 0 ; CHECK-ATTRSIZE-16-NEXT: [[DOTNOT540:%.*]] = icmp eq i32 [[DOTSROA_0150_0_VEC_EXTRACT]], 0 ; CHECK-ATTRSIZE-16-NEXT: [[OR_COND:%.*]] = or i1 [[TMP103]], [[DOTNOT540]] -; CHECK-ATTRSIZE-16-NEXT: br i1 [[OR_COND]], label [[TMP106]], label [[TMP104:%.*]] +; CHECK-ATTRSIZE-16-NEXT: br i1 [[OR_COND]], label [[TMP107]], label [[TMP104:%.*]] ; CHECK-ATTRSIZE-16: 104: -; CHECK-ATTRSIZE-16-NEXT: [[TMP105:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @_cont_Traversal) -; CHECK-ATTRSIZE-16-NEXT: [[DOTSROA_0320_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[TMP105]] to i32 +; CHECK-ATTRSIZE-16-NEXT: [[TMP105:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference__i32(ptr @_cont_Traversal) +; CHECK-ATTRSIZE-16-NEXT: [[TMP106:%.*]] = zext i32 [[TMP105]] to i64 +; CHECK-ATTRSIZE-16-NEXT: [[DOTSROA_0320_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[TMP106]] to i32 ; CHECK-ATTRSIZE-16-NEXT: [[DOTFCA_0_0_INSERT322:%.*]] = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } poison, <3 x i32> [[TMP2]], 0, 0 ; CHECK-ATTRSIZE-16-NEXT: [[DOTFCA_0_1_INSERT323:%.*]] = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[DOTFCA_0_0_INSERT322]], i32 [[TMP83]], 0, 1 ; CHECK-ATTRSIZE-16-NEXT: [[DOTFCA_1_0_INSERT324:%.*]] = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[DOTFCA_0_1_INSERT323]], i64 [[TMP6]], 1, 0 @@ -197,7 +198,7 @@ define dso_local spir_func { { float, i32, i32, i32, i32 }, <2 x float>, i32 } @ ; CHECK-ATTRSIZE-16-NEXT: [[DOTFCA_2_8_INSERT343:%.*]] = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[DOTFCA_2_7_INSERT342]], i64 [[TMP44]], 2, 8 ; CHECK-ATTRSIZE-16-NEXT: call void (...) @lgc.cps.jump(i32 [[DOTSROA_0150_0_VEC_EXTRACT]], i32 -1, {} poison, i32 [[DOTSROA_0320_0_EXTRACT_TRUNC]], i32 [[TMP83]], { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[DOTFCA_2_8_INSERT343]], [7 x i32] poison, [8 x i32] [[PAYLOAD]]) ; CHECK-ATTRSIZE-16-NEXT: unreachable -; CHECK-ATTRSIZE-16: 106: +; CHECK-ATTRSIZE-16: 107: ; CHECK-ATTRSIZE-16-NEXT: [[DOTSROA_7_0:%.*]] = phi i32 [ [[TMP4]], [[DOTEXIT2]] ], [ [[TMP83]], [[DOTEXIT5]] ] ; CHECK-ATTRSIZE-16-NEXT: [[DOTSROA_0373_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[TMP44]] to i32 ; CHECK-ATTRSIZE-16-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue { <3 x i32>, i32 } poison, <3 x i32> [[TMP2]], 0 @@ -206,7 +207,7 @@ define dso_local spir_func { { float, i32, i32, i32, i32 }, <2 x float>, i32 } @ ; CHECK-ATTRSIZE-16-NEXT: unreachable ; ; CHECK-ATTRSIZE-8-LABEL: define dso_local spir_func void @_cont_Traversal( -; CHECK-ATTRSIZE-8-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURNADDR:%.*]], i32 [[SHADER_INDEX:%.*]], { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [5 x i32] [[PADDING:%.*]], [8 x i32] [[PAYLOAD:%.*]]) local_unnamed_addr !lgc.shaderstage [[META5:![0-9]+]] !lgc.rt.shaderstage [[META6:![0-9]+]] !lgc.cps [[META7:![0-9]+]] !continuation [[META8:![0-9]+]] { +; CHECK-ATTRSIZE-8-SAME: {} [[CONT_STATE:%.*]], i32 [[RETURNADDR:%.*]], i32 [[SHADER_INDEX:%.*]], { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[SYSTEM_DATA:%.*]], {} [[HIT_ATTRS:%.*]], [5 x i32] [[PADDING:%.*]], [8 x i32] [[PAYLOAD:%.*]]) local_unnamed_addr !lgc.shaderstage [[META4:![0-9]+]] !lgc.rt.shaderstage [[META5:![0-9]+]] !lgc.cps [[META6:![0-9]+]] !continuation [[META7:![0-9]+]] { ; CHECK-ATTRSIZE-8-NEXT: .entry: ; CHECK-ATTRSIZE-8-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } }, align 16, addrspace(5) ; CHECK-ATTRSIZE-8-NEXT: store { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[SYSTEM_DATA]], ptr addrspace(5) [[SYSTEM_DATA_ALLOCA]], align 16 @@ -286,7 +287,7 @@ define dso_local spir_func { { float, i32, i32, i32, i32 }, <2 x float>, i32 } @ ; CHECK-ATTRSIZE-8-NEXT: [[DOT0:%.*]] = phi i32 [ [[TMP57]], [[TMP56]] ], [ undef, [[TMP46]] ] ; CHECK-ATTRSIZE-8-NEXT: [[DOTSROA_0128_0_EXTRACT_TRUNC:%.*]] = phi i32 [ [[TMP65]], [[TMP56]] ], [ 0, [[TMP46]] ] ; CHECK-ATTRSIZE-8-NEXT: [[DOTNOT542:%.*]] = icmp eq i32 [[DOTSROA_0128_0_EXTRACT_TRUNC]], 0 -; CHECK-ATTRSIZE-8-NEXT: br i1 [[DOTNOT542]], label [[TMP106:%.*]], label [[TMP66:%.*]] +; CHECK-ATTRSIZE-8-NEXT: br i1 [[DOTNOT542]], label [[TMP107:%.*]], label [[TMP66:%.*]] ; CHECK-ATTRSIZE-8: 66: ; CHECK-ATTRSIZE-8-NEXT: [[DOTSROA_0130_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[TMP44]] to i32 ; CHECK-ATTRSIZE-8-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } poison, <3 x i32> [[TMP2]], 0, 0 @@ -360,10 +361,11 @@ define dso_local spir_func { { float, i32, i32, i32, i32 }, <2 x float>, i32 } @ ; CHECK-ATTRSIZE-8-NEXT: [[DOTSROA_0150_0_VEC_EXTRACT:%.*]] = extractelement <2 x i32> [[DOTSROA_0501_0]], i64 0 ; CHECK-ATTRSIZE-8-NEXT: [[DOTNOT540:%.*]] = icmp eq i32 [[DOTSROA_0150_0_VEC_EXTRACT]], 0 ; CHECK-ATTRSIZE-8-NEXT: [[OR_COND:%.*]] = or i1 [[TMP103]], [[DOTNOT540]] -; CHECK-ATTRSIZE-8-NEXT: br i1 [[OR_COND]], label [[TMP106]], label [[TMP104:%.*]] +; CHECK-ATTRSIZE-8-NEXT: br i1 [[OR_COND]], label [[TMP107]], label [[TMP104:%.*]] ; CHECK-ATTRSIZE-8: 104: -; CHECK-ATTRSIZE-8-NEXT: [[TMP105:%.*]] = call i64 (...) @lgc.cps.as.continuation.reference__i64(ptr @_cont_Traversal) -; CHECK-ATTRSIZE-8-NEXT: [[DOTSROA_0320_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[TMP105]] to i32 +; CHECK-ATTRSIZE-8-NEXT: [[TMP105:%.*]] = call i32 (...) @lgc.cps.as.continuation.reference__i32(ptr @_cont_Traversal) +; CHECK-ATTRSIZE-8-NEXT: [[TMP106:%.*]] = zext i32 [[TMP105]] to i64 +; CHECK-ATTRSIZE-8-NEXT: [[DOTSROA_0320_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[TMP106]] to i32 ; CHECK-ATTRSIZE-8-NEXT: [[DOTFCA_0_0_INSERT322:%.*]] = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } poison, <3 x i32> [[TMP2]], 0, 0 ; CHECK-ATTRSIZE-8-NEXT: [[DOTFCA_0_1_INSERT323:%.*]] = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[DOTFCA_0_0_INSERT322]], i32 [[TMP83]], 0, 1 ; CHECK-ATTRSIZE-8-NEXT: [[DOTFCA_1_0_INSERT324:%.*]] = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[DOTFCA_0_1_INSERT323]], i64 [[TMP6]], 1, 0 @@ -388,7 +390,7 @@ define dso_local spir_func { { float, i32, i32, i32, i32 }, <2 x float>, i32 } @ ; CHECK-ATTRSIZE-8-NEXT: [[DOTFCA_2_8_INSERT343:%.*]] = insertvalue { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[DOTFCA_2_7_INSERT342]], i64 [[TMP44]], 2, 8 ; CHECK-ATTRSIZE-8-NEXT: call void (...) @lgc.cps.jump(i32 [[DOTSROA_0150_0_VEC_EXTRACT]], i32 -1, {} poison, i32 [[DOTSROA_0320_0_EXTRACT_TRUNC]], i32 [[TMP83]], { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } [[DOTFCA_2_8_INSERT343]], [5 x i32] poison, [8 x i32] [[PAYLOAD]]) ; CHECK-ATTRSIZE-8-NEXT: unreachable -; CHECK-ATTRSIZE-8: 106: +; CHECK-ATTRSIZE-8: 107: ; CHECK-ATTRSIZE-8-NEXT: [[DOTSROA_7_0:%.*]] = phi i32 [ [[TMP4]], [[DOTEXIT2]] ], [ [[TMP83]], [[DOTEXIT5]] ] ; CHECK-ATTRSIZE-8-NEXT: [[DOTSROA_0373_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[TMP44]] to i32 ; CHECK-ATTRSIZE-8-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue { <3 x i32>, i32 } poison, <3 x i32> [[TMP2]], 0 @@ -602,13 +604,13 @@ declare ptr @llvm.invariant.start.p7(i64 immarg %0, ptr addrspace(7) nocapture % !lgc.rt.max.attribute.size = !{!4} !0 = !{i32 7} -!1 = !{!"function", { { float, i32, i32, i32, i32 }, <2 x float>, i32 } poison, !2} +!1 = !{ { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } poison} !2 = !{i32 5, { { <3 x i32>, i32 }, { i64, i32, i32, <3 x float>, <3 x float>, float, float }, { { float, i32, i32, i32, i32 }, <2 x float>, i32, i32, i32, i32, i32, i32, i64 } } poison} !3 = !{i32 6} !4 = !{i32 16} ; HITATTR_SIZE_16 !4 = !{i32 8} ; HITATTR_SIZE_8 !5 = !{i32 0, %struct.AnyHitTraversalData poison} -!6 = !{!"function", i1 poison, !5, float poison, i32 poison} +!6 = !{ %struct.AnyHitTraversalData poison} !7 = !{i32 8} !9 = !{i32 0, %struct.DispatchSystemData poison} -!10 = !{!"function", i32 poison, !9} +!10 = !{%struct.DispatchSystemData poison} diff --git a/llvmraytracing/unittests/RemainingArgumentDwordTests.cpp b/llvmraytracing/unittests/RemainingArgumentDwordTests.cpp index 4e06b1c1ee..85e22ad31a 100644 --- a/llvmraytracing/unittests/RemainingArgumentDwordTests.cpp +++ b/llvmraytracing/unittests/RemainingArgumentDwordTests.cpp @@ -10,8 +10,8 @@ * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * - * The above copyright notice and this permission notice shall be included in - *all copies or substantial portions of the Software. + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, @@ -41,16 +41,14 @@ using namespace llvm; // Add a small DSL to add tests. // Define local context and DL per test. -#define DECLARE_LLVM_LOCALS(TestName) \ - LLVMContext context_##TestName; \ - DataLayout DL_##TestName( \ - "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:" \ - "32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:" \ - "32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-" \ - "v2048:2048-n32:64-S32-A5-G1-ni:7:8"); +#define DECLARE_LLVM_LOCALS(TestName) \ + LLVMContext context_##TestName; \ + DataLayout DL_##TestName("e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:" \ + "32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:" \ + "32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-" \ + "v2048:2048-n32:64-S32-A5-G1-ni:7:8"); -#define DECLARE_LLVM_LOCALS_SIMPLE(ExpectedCount, TypeName) \ - DECLARE_LLVM_LOCALS(ExpectedCount##TypeName) +#define DECLARE_LLVM_LOCALS_SIMPLE(ExpectedCount, TypeName) DECLARE_LLVM_LOCALS(ExpectedCount##TypeName) // Get the context based on a test name. #define LLVM_CONTEXT(TestName) context_##TestName @@ -59,47 +57,40 @@ using namespace llvm; #define LLVM_DL(TestName) DL_##TestName // Get a type based on a context. -#define GET_TYPE_INITIALIZER(TestName, TypeName) \ - Type::get##TypeName##Ty(LLVM_CONTEXT(TestName)) +#define GET_TYPE_INITIALIZER(TestName, TypeName) Type::get##TypeName##Ty(LLVM_CONTEXT(TestName)) // Test the simple case where we are checking the size of a single type. -#define TEST_DWORD_COUNT(TypeName, ExpectedCount) \ - TEST(LgcCpsUnitTests, ExpectedCount##TypeName) { \ - DECLARE_LLVM_LOCALS_SIMPLE(ExpectedCount, TypeName) \ - unsigned dwordCount = lgc::cps::getArgumentDwordCount( \ - LLVM_DL(ExpectedCount##TypeName), \ - GET_TYPE_INITIALIZER(ExpectedCount##TypeName, TypeName)); \ - EXPECT_EQ(dwordCount, static_cast(ExpectedCount)); \ +#define TEST_DWORD_COUNT(TypeName, ExpectedCount) \ + TEST(LgcCpsUnitTests, ExpectedCount##TypeName) { \ + DECLARE_LLVM_LOCALS_SIMPLE(ExpectedCount, TypeName) \ + unsigned dwordCount = lgc::cps::getArgumentDwordCount(LLVM_DL(ExpectedCount##TypeName), \ + GET_TYPE_INITIALIZER(ExpectedCount##TypeName, TypeName)); \ + EXPECT_EQ(dwordCount, static_cast(ExpectedCount)); \ } // Test the case where we are checking the size of a vector of elements. -#define TEST_DWORD_COUNT_VECTOR(TestName, TypeName, NumElements, \ - ExpectedCount) \ - TEST(LgcCpsUnitTests, TestName) { \ - DECLARE_LLVM_LOCALS(TestName) \ - unsigned dwordCount = lgc::cps::getArgumentDwordCount( \ - LLVM_DL(TestName), \ - FixedVectorType::get(GET_TYPE_INITIALIZER(TestName, TypeName), \ - NumElements)); \ - EXPECT_EQ(dwordCount, static_cast(ExpectedCount)); \ +#define TEST_DWORD_COUNT_VECTOR(TestName, TypeName, NumElements, ExpectedCount) \ + TEST(LgcCpsUnitTests, TestName) { \ + DECLARE_LLVM_LOCALS(TestName) \ + unsigned dwordCount = lgc::cps::getArgumentDwordCount( \ + LLVM_DL(TestName), FixedVectorType::get(GET_TYPE_INITIALIZER(TestName, TypeName), NumElements)); \ + EXPECT_EQ(dwordCount, static_cast(ExpectedCount)); \ } // Test the case where we are checking the size of struct of arbitrary elements. -#define TEST_DWORD_COUNT_STRUCT(TestName, ExpectedCount, ...) \ - TEST(LgcCpsUnitTests, TestName) { \ - DECLARE_LLVM_LOCALS(TestName) \ - unsigned dwordCount = lgc::cps::getArgumentDwordCount( \ - LLVM_DL(TestName), StructType::get(__VA_ARGS__)); \ - EXPECT_EQ(dwordCount, static_cast(ExpectedCount)); \ +#define TEST_DWORD_COUNT_STRUCT(TestName, ExpectedCount, ...) \ + TEST(LgcCpsUnitTests, TestName) { \ + DECLARE_LLVM_LOCALS(TestName) \ + unsigned dwordCount = lgc::cps::getArgumentDwordCount(LLVM_DL(TestName), StructType::get(__VA_ARGS__)); \ + EXPECT_EQ(dwordCount, static_cast(ExpectedCount)); \ } // Test the case where we are checking a list of arbitrary elements. -#define TEST_DWORD_COUNT_LIST(TestName, ExpectedCount, ...) \ - TEST(LgcCpsUnitTests, TestName) { \ - DECLARE_LLVM_LOCALS(TestName) \ - unsigned dwordCount = \ - lgc::cps::getArgumentDwordCount(LLVM_DL(TestName), {__VA_ARGS__}); \ - EXPECT_EQ(dwordCount, static_cast(ExpectedCount)); \ +#define TEST_DWORD_COUNT_LIST(TestName, ExpectedCount, ...) \ + TEST(LgcCpsUnitTests, TestName) { \ + DECLARE_LLVM_LOCALS(TestName) \ + unsigned dwordCount = lgc::cps::getArgumentDwordCount(LLVM_DL(TestName), {__VA_ARGS__}); \ + EXPECT_EQ(dwordCount, static_cast(ExpectedCount)); \ } TEST_DWORD_COUNT(Int1, 1) @@ -111,17 +102,12 @@ TEST_DWORD_COUNT(Float, 1) TEST_DWORD_COUNT(Double, 2) TEST_DWORD_COUNT(FP128, 4) TEST_DWORD_COUNT_VECTOR(VecI64Test, Int64, 3, 6) -TEST_DWORD_COUNT_STRUCT(StructFPTest, 6, - Type::getDoubleTy(LLVM_CONTEXT(StructFPTest)), +TEST_DWORD_COUNT_STRUCT(StructFPTest, 6, Type::getDoubleTy(LLVM_CONTEXT(StructFPTest)), Type::getFP128Ty(LLVM_CONTEXT(StructFPTest))) -TEST_DWORD_COUNT_STRUCT( - StructPtrTest, 2, - LLVM_DL(StructPtrTest).getIntPtrType(LLVM_CONTEXT(StructPtrTest), 32)) -TEST_DWORD_COUNT_LIST( - ListFloatStructTest, 6, - ArrayType::get(Type::getFloatTy(LLVM_CONTEXT(ListFloatStructTest)), 4), - StructType::get(Type::getInt32Ty(LLVM_CONTEXT(ListFloatStructTest)), - Type::getInt16Ty(LLVM_CONTEXT(ListFloatStructTest)))) +TEST_DWORD_COUNT_STRUCT(StructPtrTest, 2, LLVM_DL(StructPtrTest).getIntPtrType(LLVM_CONTEXT(StructPtrTest), 32)) +TEST_DWORD_COUNT_LIST(ListFloatStructTest, 6, ArrayType::get(Type::getFloatTy(LLVM_CONTEXT(ListFloatStructTest)), 4), + StructType::get(Type::getInt32Ty(LLVM_CONTEXT(ListFloatStructTest)), + Type::getInt16Ty(LLVM_CONTEXT(ListFloatStructTest)))) #undef DECLARE_LLVM_LOCALS #undef DECLARE_LLVM_LOCALS_SIMPLE diff --git a/tool/dumper/CMakeLists.txt b/tool/dumper/CMakeLists.txt index 5cd8b18acf..03c0c75372 100644 --- a/tool/dumper/CMakeLists.txt +++ b/tool/dumper/CMakeLists.txt @@ -75,4 +75,5 @@ if(ICD_BUILD_LLPC) target_compile_definitions(dumper PRIVATE ICD_BUILD_LLPC) endif() target_link_libraries(dumper PUBLIC dumper_base) +target_compile_definitions(dumper PRIVATE NOMINMAX) diff --git a/tool/dumper/vkgcPipelineDumper.cpp b/tool/dumper/vkgcPipelineDumper.cpp index 534fce5bc2..921438cd41 100644 --- a/tool/dumper/vkgcPipelineDumper.cpp +++ b/tool/dumper/vkgcPipelineDumper.cpp @@ -711,6 +711,7 @@ void PipelineDumper::dumpPipelineShaderInfo(const PipelineShaderInfo *shaderInfo dumpFile << "options.backwardPropagateNoContract = " << shaderInfo->options.backwardPropagateNoContract << "\n"; dumpFile << "options.forwardPropagateNoContract = " << shaderInfo->options.forwardPropagateNoContract << "\n"; dumpFile << "options.constantBufferBindingOffset = " << shaderInfo->options.constantBufferBindingOffset << "\n"; + dumpFile << "options.imageSampleDrefReturnsRgba = " << shaderInfo->options.imageSampleDrefReturnsRgba << "\n"; dumpFile << "\n"; // clang-format on } @@ -936,17 +937,29 @@ void PipelineDumper::dumpPipelineOptions(const PipelineOptions *options, std::os dumpFile << "options.internalRtShaders = " << options->internalRtShaders << "\n"; dumpFile << "options.forceNonUniformResourceIndexStageMask = " << options->forceNonUniformResourceIndexStageMask << "\n"; - dumpFile << "options.replaceSetWithResourceType = " << options->getGlState().replaceSetWithResourceType << "\n"; - dumpFile << "options.disableSampleMask = " << options->getGlState().disableSampleMask << "\n"; - dumpFile << "options.buildResourcesDataForShaderModule = " << options->getGlState().buildResourcesDataForShaderModule +#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 73 + const char *glStatePrefix = "options."; +#else + const char *glStatePrefix = "options.glState."; +#endif + dumpFile << glStatePrefix << "replaceSetWithResourceType = " << options->getGlState().replaceSetWithResourceType + << "\n"; + dumpFile << glStatePrefix << "disableSampleMask = " << options->getGlState().disableSampleMask << "\n"; + dumpFile << glStatePrefix + << "buildResourcesDataForShaderModule = " << options->getGlState().buildResourcesDataForShaderModule << "\n"; + dumpFile << glStatePrefix << "disableTruncCoordForGather = " << options->getGlState().disableTruncCoordForGather + << "\n"; + dumpFile << glStatePrefix << "enableCombinedTexture = " << options->getGlState().enableCombinedTexture << "\n"; + dumpFile << glStatePrefix << "vertex64BitsAttribSingleLoc = " << options->getGlState().vertex64BitsAttribSingleLoc << "\n"; - dumpFile << "options.disableTruncCoordForGather = " << options->getGlState().disableTruncCoordForGather << "\n"; - dumpFile << "options.enableCombinedTexture = " << options->getGlState().enableCombinedTexture << "\n"; - dumpFile << "options.vertex64BitsAttribSingleLoc = " << options->getGlState().vertex64BitsAttribSingleLoc << "\n"; - dumpFile << "options.enableFragColor = " << options->getGlState().enableFragColor << "\n"; - dumpFile << "options.disableBaseVertex = " << options->getGlState().disableBaseVertex << "\n"; - dumpFile << "options.enablePrimGeneratedQuery = " << options->enablePrimGeneratedQuery << "\n"; - dumpFile << "options.disablePerCompFetch = " << options->disablePerCompFetch << "\n"; + dumpFile << glStatePrefix << "enableFragColor = " << options->getGlState().enableFragColor << "\n"; + dumpFile << glStatePrefix << "disableBaseVertex = " << options->getGlState().disableBaseVertex << "\n"; + dumpFile << glStatePrefix << "enablePrimGeneratedQuery = " << options->enablePrimGeneratedQuery << "\n"; + dumpFile << glStatePrefix << "disablePerCompFetch = " << options->disablePerCompFetch << "\n"; + dumpFile << glStatePrefix << "enablePolygonStipple = " << options->getGlState().enablePolygonStipple << "\n"; + dumpFile << glStatePrefix << "enableLineSmooth = " << options->getGlState().enableLineSmooth << "\n"; + dumpFile << glStatePrefix << "emulateWideLineStipple = " << options->getGlState().emulateWideLineStipple << "\n"; + dumpFile << glStatePrefix << "enablePointSmooth = " << options->getGlState().enablePointSmooth << "\n"; } // ===================================================================================================================== @@ -1897,6 +1910,10 @@ void PipelineDumper::updateHashForPipelineOptions(const PipelineOptions *options hasher->Update(options->getGlState().enableFragColor); hasher->Update(options->getGlState().disableBaseVertex); hasher->Update(options->enablePrimGeneratedQuery); + hasher->Update(options->getGlState().enablePolygonStipple); + hasher->Update(options->getGlState().enableLineSmooth); + hasher->Update(options->getGlState().emulateWideLineStipple); + hasher->Update(options->getGlState().enablePointSmooth); // disablePerCompFetch has been handled in updateHashForNonFragmentState } @@ -1986,6 +2003,7 @@ void PipelineDumper::updateHashForPipelineShaderInfo(ShaderStage stage, const Pi hasher->Update(options.constantBufferBindingOffset); hasher->Update(options.backwardPropagateNoContract); hasher->Update(options.forwardPropagateNoContract); + hasher->Update(options.imageSampleDrefReturnsRgba); } } } diff --git a/tool/vfx/vfxPipelineDoc.cpp b/tool/vfx/vfxPipelineDoc.cpp index f3e9ed993a..caad47db8e 100644 --- a/tool/vfx/vfxPipelineDoc.cpp +++ b/tool/vfx/vfxPipelineDoc.cpp @@ -413,6 +413,9 @@ bool PipelineDocument::getPtrOfSubSection(Section *section, unsigned lineNum, co #endif CASE_SUBSECTION(MemberTypeExtendedRobustness, SectionExtendedRobustness) CASE_SUBSECTION(MemberTypeAdvancedBlendInfo, SectionAdvancedBlendInfo) +#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION >= 73 + CASE_SUBSECTION(MemberTypeGlState, SectionGlState) +#endif default: result = Document::getPtrOfSubSection(section, lineNum, memberName, memberType, isWriteAccess, arrayIndex, ptrOut, errorMsg); diff --git a/tool/vfx/vfxSection.h b/tool/vfx/vfxSection.h index 5a3f348fa6..7a817eb03f 100644 --- a/tool/vfx/vfxSection.h +++ b/tool/vfx/vfxSection.h @@ -132,15 +132,18 @@ enum MemberType : unsigned { MemberTypeGpurtOption, // VFX member type: SectionGpurtOption MemberTypeExtendedRobustness, // VFX member type: SectionExtendedRobustness MemberTypeAdvancedBlendInfo, // VFX member type: SectionAdvancedBlendInfo - MemberTypeGlAttribLocation, // GL vertex attribute location - MemberTypeGlShaderInfo, // GL SPIRV parameters - MemberTypeGlVertexAttrib, // GL vertex input attribute - MemberTypeGlVertexBinding, // GL vertex input binding - MemberTypeGlVertexFormat, // GL vertex attribute format - MemberTypeGlSpirvPipelineLayout, // GL SPIRV explicit pipeline layout - MemberTypeGlPatchParameter, // GL program patch parameter - MemberTypeGlSpeicalizeUniformDesc, // GL program specialized uniform - MemberTypeGlFfxTexturekey, // GL FFX texture key +#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION >= 73 + MemberTypeGlState, // VFX member type: SectionGlState +#endif + MemberTypeGlAttribLocation, // GL vertex attribute location + MemberTypeGlShaderInfo, // GL SPIRV parameters + MemberTypeGlVertexAttrib, // GL vertex input attribute + MemberTypeGlVertexBinding, // GL vertex input binding + MemberTypeGlVertexFormat, // GL vertex attribute format + MemberTypeGlSpirvPipelineLayout, // GL SPIRV explicit pipeline layout + MemberTypeGlPatchParameter, // GL program patch parameter + MemberTypeGlSpeicalizeUniformDesc, // GL program specialized uniform + MemberTypeGlFfxTexturekey, // GL FFX texture key }; // ===================================================================================================================== diff --git a/tool/vfx/vfxVkSection.h b/tool/vfx/vfxVkSection.h index ce4cc75742..6c65f74e5c 100644 --- a/tool/vfx/vfxVkSection.h +++ b/tool/vfx/vfxVkSection.h @@ -256,6 +256,7 @@ class SectionShaderOption : public Section { INIT_STATE_MEMBER_NAME_TO_ADDR(SectionShaderOption, backwardPropagateNoContract, MemberTypeBool, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionShaderOption, forwardPropagateNoContract, MemberTypeBool, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionShaderOption, constantBufferBindingOffset, MemberTypeInt, false); + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionShaderOption, imageSampleDrefReturnsRgba, MemberTypeBool, false); return addrTableInitializer; }(); return {addrTable.data(), addrTable.size()}; @@ -426,6 +427,45 @@ class SectionExtendedRobustness : public Section { SubState m_state; }; +#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION >= 73 +// ===================================================================================================================== +// Represents the sub section GLState +class SectionGlState : public Section { +public: + typedef Vkgc::PipelineOptions::GLState SubState; + + SectionGlState() : Section(getAddrTable(), SectionTypeUnset, "glState") { memset(&m_state, 0, sizeof(m_state)); } + + void getSubState(SubState &state) { state = m_state; }; + SubState &getSubStateRef() { return m_state; }; + +private: + static StrToMemberAddrArrayRef getAddrTable() { + static std::vector addrTable = []() { + std::vector addrTableInitializer; + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionGlState, replaceSetWithResourceType, MemberTypeBool, false); + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionGlState, disableSampleMask, MemberTypeBool, false); + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionGlState, buildResourcesDataForShaderModule, MemberTypeBool, false); + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionGlState, disableTruncCoordForGather, MemberTypeBool, false); + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionGlState, enableCombinedTexture, MemberTypeBool, false); + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionGlState, vertex64BitsAttribSingleLoc, MemberTypeBool, false); + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionGlState, enableFragColor, MemberTypeBool, false); + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionGlState, disableBaseVertex, MemberTypeBool, false); + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionGlState, bindlessTextureMode, MemberTypeBool, false); + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionGlState, bindlessImageMode, MemberTypeBool, false); + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionGlState, enablePolygonStipple, MemberTypeBool, false); + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionGlState, enableLineSmooth, MemberTypeBool, false); + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionGlState, emulateWideLineStipple, MemberTypeBool, false); + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionGlState, enablePointSmooth, MemberTypeBool, false); + return addrTableInitializer; + }(); + return {addrTable.data(), addrTable.size()}; + } + + SubState m_state; +}; +#endif + // ===================================================================================================================== // Represents the sub section pipeline option class SectionPipelineOption : public Section { @@ -438,6 +478,9 @@ class SectionPipelineOption : public Section { void getSubState(SubState &state) { m_extendedRobustness.getSubState(m_state.extendedRobustness); +#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION >= 73 + m_glState.getSubState(m_state.glState); +#endif state = m_state; }; SubState &getSubStateRef() { return m_state; }; @@ -474,6 +517,7 @@ class SectionPipelineOption : public Section { INIT_STATE_MEMBER_NAME_TO_ADDR(SectionPipelineOption, optimizeTessFactor, MemberTypeBool, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionPipelineOption, enableInterpModePatch, MemberTypeBool, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionPipelineOption, pageMigrationEnabled, MemberTypeBool, false); +#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 73 INIT_STATE_MEMBER_NAME_TO_ADDR(SectionPipelineOption, replaceSetWithResourceType, MemberTypeBool, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionPipelineOption, disableSampleMask, MemberTypeBool, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionPipelineOption, buildResourcesDataForShaderModule, MemberTypeBool, false); @@ -482,6 +526,15 @@ class SectionPipelineOption : public Section { INIT_STATE_MEMBER_NAME_TO_ADDR(SectionPipelineOption, vertex64BitsAttribSingleLoc, MemberTypeBool, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionPipelineOption, enableFragColor, MemberTypeBool, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionPipelineOption, disableBaseVertex, MemberTypeBool, false); + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionPipelineOption, bindlessTextureMode, MemberTypeBool, false); + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionPipelineOption, bindlessImageMode, MemberTypeBool, false); + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionPipelineOption, enablePolygonStipple, MemberTypeBool, false); + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionPipelineOption, enableLineSmooth, MemberTypeBool, false); + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionPipelineOption, emulateWideLineStipple, MemberTypeBool, false); + INIT_STATE_MEMBER_NAME_TO_ADDR(SectionPipelineOption, enablePointSmooth, MemberTypeBool, false); +#else + INIT_MEMBER_NAME_TO_ADDR(SectionPipelineOption, m_glState, MemberTypeGlState, true); +#endif INIT_STATE_MEMBER_NAME_TO_ADDR(SectionPipelineOption, enablePrimGeneratedQuery, MemberTypeBool, false); INIT_STATE_MEMBER_NAME_TO_ADDR(SectionPipelineOption, disablePerCompFetch, MemberTypeBool, false); return addrTableInitializer; @@ -491,6 +544,9 @@ class SectionPipelineOption : public Section { SubState m_state; SectionExtendedRobustness m_extendedRobustness; +#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION >= 73 + SectionGlState m_glState; +#endif }; // ===================================================================================================================== diff --git a/version/include/llpc/GpurtIntrinsics.h b/version/include/llpc/GpurtIntrinsics.h index be1ebbfdc5..90072cc393 100644 --- a/version/include/llpc/GpurtIntrinsics.h +++ b/version/include/llpc/GpurtIntrinsics.h @@ -63,7 +63,6 @@ #endif #define CONTINUATIONS_LGC_STACK_LOWERING 1 -#define CONTINUATIONS_USE_DUMMY_RET_ADDR 1 //===================================================================================================================== // Continuation intrinsics diff --git a/version/include/llpcVersion.h.in b/version/include/llpcVersion.h.in index e8d33c7a96..8fe424cf37 100644 --- a/version/include/llpcVersion.h.in +++ b/version/include/llpcVersion.h.in @@ -37,6 +37,9 @@ // %Version History // | %Version | Change Description | // | -------- | ----------------------------------------------------------------------------------------------------- | +// | 74.0 | Replace LlpcRaytracingMode::None with LlpcRaytracingMode::Auto. Now LLPC can choose continuations mode| +// | | automatically. Add isCps to RayTracingPipelineBuildOut to notify client continuations mode is chosen. | +// | 73.2 | Add imageSampleDrefReturnsRgba to ShaderOptions | // | 73.0 | Add all the ogl specific pipeline options in a new structure GLState | // | 72.4 | Add enableFlatShade to GraphicsPipelineBuildInfo. | // | 72.3 | Add enableColorClampVs and enableColorClampFs to GraphicsPipelineBuildInfo. | @@ -183,10 +186,10 @@ #pragma once /// LLPC major interface version. -#define LLPC_INTERFACE_MAJOR_VERSION 72 +#define LLPC_INTERFACE_MAJOR_VERSION 74 /// LLPC minor interface version. -#define LLPC_INTERFACE_MINOR_VERSION 2 +#define LLPC_INTERFACE_MINOR_VERSION 0 /// The client's LLPC major interface version #ifndef LLPC_CLIENT_INTERFACE_MAJOR_VERSION