From c1abb9cde984316d041b4fc4c2286d0d2009c540 Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Fri, 11 Oct 2024 19:23:33 +0300 Subject: [PATCH] Add AMDGCNSPIRV specific (and mostly temporary) delta. --- lib/SPIRV/OCLToSPIRV.cpp | 3 + lib/SPIRV/OCLUtil.cpp | 4 +- lib/SPIRV/OCLUtil.h | 37 ++ lib/SPIRV/SPIRVReader.cpp | 556 ++++++++++++++++-- lib/SPIRV/SPIRVToLLVMDbgTran.cpp | 4 +- lib/SPIRV/SPIRVToOCL.cpp | 3 + lib/SPIRV/SPIRVToOCL20.cpp | 136 ++++- lib/SPIRV/SPIRVUtil.cpp | 13 +- lib/SPIRV/SPIRVWriter.cpp | 77 ++- lib/SPIRV/libSPIRV/SPIRVInstruction.h | 16 +- .../group-instructions.ll | 2 +- .../OpenCL/atomic_syncscope_test.ll | 6 +- 12 files changed, 767 insertions(+), 90 deletions(-) diff --git a/lib/SPIRV/OCLToSPIRV.cpp b/lib/SPIRV/OCLToSPIRV.cpp index a8cc6d231..19c15c7de 100644 --- a/lib/SPIRV/OCLToSPIRV.cpp +++ b/lib/SPIRV/OCLToSPIRV.cpp @@ -315,6 +315,9 @@ void OCLToSPIRVBase::visitCallInst(CallInst &CI) { if (DemangledName == kOCLBuiltinName::WorkGroupBarrier || DemangledName == kOCLBuiltinName::Barrier || DemangledName == kOCLBuiltinName::SubGroupBarrier) { + if (F->arg_size() != 1 && F->arg_size() != 2 && + F->getParent()->getTargetTriple() == "spirv64-amd-amdhsa") + return; // Somebody used the name. visitCallBarrier(&CI); return; } diff --git a/lib/SPIRV/OCLUtil.cpp b/lib/SPIRV/OCLUtil.cpp index 146bdaa3e..59618fa6d 100644 --- a/lib/SPIRV/OCLUtil.cpp +++ b/lib/SPIRV/OCLUtil.cpp @@ -147,7 +147,7 @@ template <> void SPIRVMap::init() { } template <> void SPIRVMap::init() { - add("work_item", ScopeInvocation); + add("singlethread", ScopeInvocation); add("workgroup", ScopeWorkgroup); add("device", ScopeDevice); add("all_svm_devices", ScopeCrossDevice); @@ -658,6 +658,8 @@ template <> void LLVMSPIRVAtomicRmwOpCodeMap::init() { add(llvm::AtomicRMWInst::FAdd, OpAtomicFAddEXT); add(llvm::AtomicRMWInst::FMin, OpAtomicFMinEXT); add(llvm::AtomicRMWInst::FMax, OpAtomicFMaxEXT); + add(llvm::AtomicRMWInst::UIncWrap, OpAtomicIIncrement); + add(llvm::AtomicRMWInst::UDecWrap, OpAtomicIDecrement); } } // namespace SPIRV diff --git a/lib/SPIRV/OCLUtil.h b/lib/SPIRV/OCLUtil.h index 35cf6ddff..dba82a47b 100644 --- a/lib/SPIRV/OCLUtil.h +++ b/lib/SPIRV/OCLUtil.h @@ -491,6 +491,43 @@ inline OCLMemOrderKind mapSPIRVMemOrderToOCL(unsigned Sema) { return OCLMemOrderMap::rmap(extractSPIRVMemOrderSemantic(Sema)); } +inline unsigned int mapAMDGCNAddrSpaceToSPIRV(unsigned int AS) { + switch (AS) { + case 0: + return SPIRAS_Generic; + case 1: + return SPIRAS_Global; + case 3: + return SPIRAS_Local; + case 4: + return SPIRAS_Constant; + case 5: + return SPIRAS_Private; + default: + llvm_unreachable("Unexpected AMDGCN Address Space"); + return UINT_MAX; + } +} + +inline SPIRAddressSpace mapSPIRVAddrSpaceToAMDGPU(SPIRVStorageClassKind SPVAS) { + switch (SPVAS) { + case StorageClassCrossWorkgroup: + return static_cast(1); + case StorageClassUniformConstant: + return static_cast(4); + case StorageClassWorkgroup: + return static_cast(3); + case StorageClassPrivate: + case StorageClassFunction: + return static_cast(5); + case StorageClassGeneric: + return static_cast(0); + default: + llvm_unreachable("Unexpected StorageClass"); + return static_cast(UINT_MAX); + } +} + bool isPipeOrAddressSpaceCastBI(const StringRef MangledName); bool isEnqueueKernelBI(const StringRef MangledName); bool isKernelQueryBI(const StringRef MangledName); diff --git a/lib/SPIRV/SPIRVReader.cpp b/lib/SPIRV/SPIRVReader.cpp index 6b9e49ed0..a2680039a 100644 --- a/lib/SPIRV/SPIRVReader.cpp +++ b/lib/SPIRV/SPIRVReader.cpp @@ -74,6 +74,7 @@ #include "llvm/IR/PassInstrumentation.h" #include "llvm/IR/Type.h" #include "llvm/IR/TypedPointerType.h" +#include "llvm/MC/TargetRegistry.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -328,6 +329,7 @@ Type *SPIRVToLLVM::transType(SPIRVType *T, bool UseTPT) { SPIRVDBG(spvdbgs() << "[transType] " << *T << " -> ";) T->validate(); + auto IsAMDGCN = M->getTargetTriple() == "amdgcn-amd-amdhsa"; switch (static_cast(T->getOpCode())) { case OpTypeVoid: return mapType(T, Type::getVoidTy(*Context)); @@ -342,27 +344,36 @@ Type *SPIRVToLLVM::transType(SPIRVType *T, bool UseTPT) { // and evaluated before the LLVM ArrayType can be constructed. auto *LenExpr = static_cast(T)->getLength(); auto *LenValue = cast(transValue(LenExpr, nullptr, nullptr)); + if (LenValue->getZExtValue() == UINT32_MAX && IsAMDGCN) + return mapType(T, ArrayType::get(transType(T->getArrayElementType()), 0)); return mapType(T, ArrayType::get(transType(T->getArrayElementType()), LenValue->getZExtValue())); } case internal::OpTypeTokenINTEL: return mapType(T, Type::getTokenTy(*Context)); case OpTypePointer: { - unsigned AS = SPIRSPIRVAddrSpaceMap::rmap(T->getPointerStorageClass()); + unsigned AS = + IsAMDGCN ? mapSPIRVAddrSpaceToAMDGPU(T->getPointerStorageClass()) : + SPIRSPIRVAddrSpaceMap::rmap(T->getPointerStorageClass()); if (AS == SPIRAS_CodeSectionINTEL && !BM->shouldEmitFunctionPtrAddrSpace()) - AS = SPIRAS_Private; + AS = IsAMDGCN ? + M->getDataLayout().getProgramAddressSpace() : SPIRAS_Private; if (BM->shouldEmitFunctionPtrAddrSpace() && T->getPointerElementType()->getOpCode() == OpTypeFunction) - AS = SPIRAS_CodeSectionINTEL; + AS = IsAMDGCN ? + M->getDataLayout().getProgramAddressSpace() :SPIRAS_CodeSectionINTEL; Type *ElementTy = transType(T->getPointerElementType(), UseTPT); if (UseTPT) return TypedPointerType::get(ElementTy, AS); return mapType(T, PointerType::get(ElementTy, AS)); } case OpTypeUntypedPointerKHR: { - unsigned AS = SPIRSPIRVAddrSpaceMap::rmap(T->getPointerStorageClass()); + unsigned AS = IsAMDGCN ? + mapSPIRVAddrSpaceToAMDGPU(T->getPointerStorageClass()) : + SPIRSPIRVAddrSpaceMap::rmap(T->getPointerStorageClass()); if (AS == SPIRAS_CodeSectionINTEL && !BM->shouldEmitFunctionPtrAddrSpace()) - AS = SPIRAS_Private; + AS = IsAMDGCN ? + M->getDataLayout().getProgramAddressSpace() : SPIRAS_Private; return mapType(T, PointerType::get(*Context, AS)); } case OpTypeVector: @@ -1063,6 +1074,12 @@ Value *SPIRVToLLVM::transConvertInst(SPIRVValue *BV, Function *F, case OpFConvert: CO = IsExt ? Instruction::FPExt : Instruction::FPTrunc; break; + case OpBitcast: + if (Src->getType()->isPointerTy() && Dst->isPointerTy() && + Src->getType()->getPointerAddressSpace() != Dst->getPointerAddressSpace() && + M->getTargetTriple() == "amdgcn-amd-amdhsa") + CO = Instruction::AddrSpaceCast; + break; default: CO = static_cast(OpCodeMap::rmap(BC->getOpCode())); } @@ -1158,7 +1175,7 @@ Value *SPIRVToLLVM::transCmpInst(SPIRVValue *BV, BasicBlock *BB, Function *F) { OP = OpFOrdNotEqual; if (BT->isTypeVectorOrScalarInt() || BT->isTypeVectorOrScalarBool() || - BT->isTypePointer()) + BT->isTypePointer() || BT->isTypeVectorPointer()) Inst = Builder.CreateICmp(CmpMap::rmap(OP), Op0, Op1); else if (BT->isTypeVectorOrScalarFloat()) Inst = Builder.CreateFCmp(CmpMap::rmap(OP), Op0, Op1); @@ -1491,8 +1508,9 @@ Value *SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *BV, Function *F, auto *BCCTy = cast(transType(BCC->getType())); auto Members = BCCTy->getNumElements(); auto Constants = CV.size(); - // if we try to initialize constant TypeStruct, add bitcasts - // if src and dst types are both pointers but to different types + // if we try to initialize constant TypeStruct, add addrspacecasts + // if src and dst types are both pointers but to different address spaces; + // with opaque pointers the only possible mismatch can be in the AS. if (Members == Constants) { for (unsigned I = 0; I < Members; ++I) { if (CV[I]->getType() == BCCTy->getElementType(I)) @@ -1542,9 +1560,10 @@ Value *SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *BV, Function *F, static_cast(BV); SPIRVFunction *F = BC->getFunction(); BV->setName(F->getName()); - const unsigned AS = BM->shouldEmitFunctionPtrAddrSpace() - ? SPIRAS_CodeSectionINTEL - : SPIRAS_Private; + const unsigned AS = M->getTargetTriple() == "amdgcn-amd-amdhsa" ? + M->getDataLayout().getProgramAddressSpace() : + (BM->shouldEmitFunctionPtrAddrSpace() ? SPIRAS_CodeSectionINTEL + : SPIRAS_Private); return mapValue(BV, transFunction(F, AS)); } @@ -1579,7 +1598,10 @@ Value *SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *BV, Function *F, if (BS == StorageClassFunction && !Init) { assert(BB && "Invalid BB"); - return mapValue(BV, new AllocaInst(Ty, 0, BV->getName(), BB)); + return mapValue(BV, + new AllocaInst(Ty, + M->getDataLayout().getAllocaAddrSpace(), + BV->getName(), BB)); } SPIRAddressSpace AddrSpace; @@ -1590,7 +1612,8 @@ Value *SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *BV, Function *F, AddrSpace = VectorComputeUtil::getVCGlobalVarAddressSpace(BS); Initializer = UndefValue::get(Ty); } else - AddrSpace = SPIRSPIRVAddrSpaceMap::rmap(BS); + AddrSpace = M->getTargetTriple() == "amdgcn-amd-amdhsa" ? + mapSPIRVAddrSpaceToAMDGPU(BS) : SPIRSPIRVAddrSpaceMap::rmap(BS); // Force SPIRV BuiltIn variable's name to be __spirv_BuiltInXXXX. // No matter what BV's linkage name is. SPIRVBuiltinVariableKind BVKind; @@ -1600,7 +1623,7 @@ Value *SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *BV, Function *F, /*Initializer=*/nullptr, BV->getName(), 0, GlobalVariable::NotThreadLocal, AddrSpace); auto *Res = mapValue(BV, LVar); - if (Init) + if (Init && BS != SPIRVStorageClassKind::StorageClassWorkgroup) Initializer = dyn_cast(transValue(Init, F, BB, false)); else if (LinkageTy == GlobalValue::CommonLinkage) // In LLVM, variables with common linkage type must be initialized to 0. @@ -1617,6 +1640,12 @@ Value *SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *BV, Function *F, ? GlobalValue::UnnamedAddr::Global : GlobalValue::UnnamedAddr::None); LVar->setInitializer(Initializer); + if (BVar->hasDecorate(DecorationUserTypeGOOGLE) && + M->getTargetTriple() == "amdgcn-amd-amdhsa") { + const auto Dec = BM->get( + *BVar->getDecorate(DecorationUserTypeGOOGLE).cbegin()); + LVar->setExternallyInitialized(Dec->getStr() == "externally_initialized"); + } if (IsVectorCompute) { LVar->addAttribute(kVCMetadata::VCGlobalVariable); @@ -1630,6 +1659,11 @@ Value *SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *BV, Function *F, LVar->addAttribute(SEVAttr.value().getKindAsString(), SEVAttr.value().getValueAsString()); } + if (M->getTargetTriple() == "amdgcn-amd-amdhsa" && + LVar->hasAppendingLinkage() && + (LVar->getName() == "llvm.compiler.used" || + LVar->getName() == "llvm.used")) + LVar->setSection("llvm.metadata"); return Res; } @@ -1687,8 +1721,9 @@ Value *SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *BV, Function *F, auto *VLA = static_cast(BV); llvm::Type *Ty = transType(BV->getType()->getPointerElementType()); llvm::Value *ArrSize = transValue(VLA->getOperand(0), F, BB); - return mapValue( - BV, new AllocaInst(Ty, SPIRAS_Private, ArrSize, BV->getName(), BB)); + return mapValue(BV, new AllocaInst(Ty, + M->getDataLayout().getAllocaAddrSpace(), + ArrSize, BV->getName(), BB)); } case OpRestoreMemoryINTEL: { @@ -1731,6 +1766,13 @@ Value *SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *BV, Function *F, Phi->foreachPair([&](SPIRVValue *IncomingV, SPIRVBasicBlock *IncomingBB, size_t Index) { auto *Translated = transValue(IncomingV, F, BB); + if (LPhi->getType() != Translated->getType() && + LPhi->getType()->isPointerTy() && + F->getParent()->getTargetTriple() == "amdgcn-amd-amdhsa") + // TODO: AMDSPV - due to reverse translating const globals to AS4, + // these mismatches might occur; find a better way to handle it. + Translated = ConstantExpr::getAddrSpaceCast(cast(Translated), + LPhi->getType()); LPhi->addIncoming(Translated, dyn_cast(transValue(IncomingBB, F, BB))); }); @@ -1841,11 +1883,18 @@ Value *SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *BV, Function *F, if (BB) { Builder.SetInsertPoint(BB); } - return mapValue(BV, - Builder.CreateSelect(transValue(BS->getCondition(), F, BB), - transValue(BS->getTrueValue(), F, BB), - transValue(BS->getFalseValue(), F, BB), - BV->getName())); + auto Cond = transValue(BS->getCondition(), F, BB); + auto True = transValue(BS->getTrueValue(), F, BB); + auto False = transValue(BS->getFalseValue(), F, BB); + if (True->getType() != False->getType() && True->getType()->isPointerTy()) { + if (isa(True)) + True = True->stripPointerCasts(); + if (isa(False)) + False = False->stripPointerCasts(); + if (True->getType() != False->getType()) + llvm_unreachable("Ill-formed Select"); + } + return mapValue(BV, Builder.CreateSelect(Cond, True, False, BV->getName())); } case OpLine: @@ -2139,7 +2188,8 @@ Value *SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *BV, Function *F, case OpCopyObject: { SPIRVCopyObject *CO = static_cast(BV); auto *Ty = transType(CO->getOperand()->getType()); - AllocaInst *AI = new AllocaInst(Ty, 0, "", BB); + AllocaInst *AI = + new AllocaInst(Ty, M->getDataLayout().getAllocaAddrSpace(), "", BB); new StoreInst(transValue(CO->getOperand(), F, BB), AI, BB); LoadInst *LI = new LoadInst(Ty, AI, "", BB); return mapValue(BV, LI); @@ -2168,7 +2218,8 @@ Value *SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *BV, Function *F, BaseSPVTy->isTypeVector() ? transType( BaseSPVTy->getVectorComponentType()->getPointerElementType()) - : transType(BaseSPVTy->getPointerElementType()); + : transType(AC->isUntyped() ? AC->getBaseType() + : BaseSPVTy->getPointerElementType()); auto Index = transValue(AC->getIndices(), F, BB); if (!AC->hasPtrIndex()) Index.insert(Index.begin(), getInt32(M, 0)); @@ -2203,6 +2254,10 @@ Value *SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *BV, Function *F, V = GEP; } else { auto *CT = cast(Base); + if (auto CE = dyn_cast(CT)) + if (CE->isCast() && CE->getOpcode() == Instruction::AddrSpaceCast) + if (auto GV = dyn_cast(CE->getOperand(0))) + BaseTy = GV->getValueType(); V = ConstantExpr::getGetElementPtr(BaseTy, CT, Index, IsInbound); } return mapValue(BV, V); @@ -2264,7 +2319,8 @@ Value *SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *BV, Function *F, if (!HasRtValues) return mapValue(BV, ConstantArray::get(AT, CV)); - AllocaInst *Alloca = new AllocaInst(AT, SPIRAS_Private, "", BB); + AllocaInst *Alloca = + new AllocaInst(AT, M->getDataLayout().getAllocaAddrSpace(), "", BB); // get pointer to the element of the array // store the result of argument @@ -2283,7 +2339,8 @@ Value *SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *BV, Function *F, if (!HasRtValues) return mapValue(BV, ConstantStruct::get(ST, CV)); - AllocaInst *Alloca = new AllocaInst(ST, SPIRAS_Private, "", BB); + AllocaInst *Alloca = + new AllocaInst(ST, M->getDataLayout().getAllocaAddrSpace(), "", BB); // get pointer to the element of structure // store the result of argument @@ -2434,6 +2491,7 @@ Value *SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *BV, Function *F, BC->getName(), BB); setCallingConv(Call); setAttrByCalledFunc(Call); + applyFPFastMathModeDecorations(BV, Call); return mapValue(BV, Call); } @@ -2445,14 +2503,18 @@ Value *SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *BV, Function *F, SPIRVFunctionPointerCallINTEL *BC = static_cast(BV); auto *V = transValue(BC->getCalledValue(), F, BB); - auto *SpirvFnTy = BC->getCalledValue()->getType()->getPointerElementType(); - auto *FnTy = cast(transType(SpirvFnTy)); + auto *RetTy = transType(BC->getType()); + auto ArgsTy = transTypeVector(BC->getArgumentValueTypes()); + auto *FnTy = FunctionType::get(RetTy, ArgsTy, false); auto *Call = CallInst::Create( FnTy, V, transValue(BC->getArgumentValues(), F, BB), BC->getName(), BB); - transFunctionPointerCallArgumentAttributes( - BV, Call, static_cast(SpirvFnTy)); + if (!BC->getCalledValue()->getType()->getPointerElementType()->isTypeUntypedPointerKHR()) + transFunctionPointerCallArgumentAttributes( + BV, Call, + static_cast(BC->getCalledValue()->getType()->getPointerElementType())); // Assuming we are calling a regular device function - Call->setCallingConv(CallingConv::SPIR_FUNC); + Call->setCallingConv(M->getTargetTriple() == "amdgcn-amd-amdhsa" ? + CallingConv::C : CallingConv::SPIR_FUNC); // Don't set attributes, because at translation time we don't know which // function exactly we are calling. return mapValue(BV, Call); @@ -2825,8 +2887,12 @@ Value *SPIRVToLLVM::transFixedPointInst(SPIRVInstruction *BI, BasicBlock *BB) { std::vector Args; Args.reserve(8); if (RetTy->getIntegerBitWidth() > 64) { - llvm::PointerType *RetPtrTy = llvm::PointerType::get(RetTy, SPIRAS_Generic); - Value *Alloca = new AllocaInst(RetTy, SPIRAS_Private, "", BB); + llvm::PointerType *RetPtrTy = llvm::PointerType::get( + RetTy, + M->getTargetTriple() == "amdgcn-amd-amdhsa" ? + mapSPIRVAddrSpaceToAMDGPU(StorageClassGeneric) : SPIRAS_Generic); + Value *Alloca = + new AllocaInst(RetTy, M->getDataLayout().getAllocaAddrSpace(), "", BB); Value *RetValPtr = new AddrSpaceCastInst(Alloca, RetPtrTy, "", BB); ArgTys.emplace_back(RetPtrTy); Args.emplace_back(RetValPtr); @@ -2852,7 +2918,8 @@ Value *SPIRVToLLVM::transFixedPointInst(SPIRVInstruction *BI, BasicBlock *BB) { FunctionCallee FCallee = M->getOrInsertFunction(FuncName, FT); auto *Func = cast(FCallee.getCallee()); - Func->setCallingConv(CallingConv::SPIR_FUNC); + Func->setCallingConv(M->getTargetTriple() == "amdgcn-amd-amdhsa" ? + CallingConv::C : CallingConv::SPIR_FUNC); if (isFuncNoUnwind()) Func->addFnAttr(Attribute::NoUnwind); @@ -2948,9 +3015,13 @@ Value *SPIRVToLLVM::transArbFloatInst(SPIRVInstruction *BI, BasicBlock *BB, std::vector Args; if (RetTy->getIntegerBitWidth() > 64) { - llvm::PointerType *RetPtrTy = llvm::PointerType::get(RetTy, SPIRAS_Generic); + llvm::PointerType *RetPtrTy = llvm::PointerType::get( + RetTy, + M->getTargetTriple() == "amdgcn-amd-amdhsa" ? + mapSPIRVAddrSpaceToAMDGPU(StorageClassGeneric) : SPIRAS_Generic); ArgTys.push_back(RetPtrTy); - Value *Alloca = new AllocaInst(RetTy, SPIRAS_Private, "", BB); + Value *Alloca = + new AllocaInst(RetTy, M->getDataLayout().getAllocaAddrSpace(), "", BB); Value *RetValPtr = new AddrSpaceCastInst(Alloca, RetPtrTy, "", BB); Args.push_back(RetValPtr); } @@ -2995,7 +3066,8 @@ Value *SPIRVToLLVM::transArbFloatInst(SPIRVInstruction *BI, BasicBlock *BB, FunctionCallee FCallee = M->getOrInsertFunction(FuncName, FT); auto *Func = cast(FCallee.getCallee()); - Func->setCallingConv(CallingConv::SPIR_FUNC); + Func->setCallingConv(M->getTargetTriple() == "amdgcn-amd-amdhsa" ? + CallingConv::C : CallingConv::SPIR_FUNC); if (isFuncNoUnwind()) Func->addFnAttr(Attribute::NoUnwind); @@ -3128,7 +3200,8 @@ Function *SPIRVToLLVM::transFunction(SPIRVFunction *BF, unsigned AS) { auto BFName = I.getFirst()->getName(); if (BF->getName() == BFName) { auto *F = I.getSecond(); - F->setCallingConv(CallingConv::SPIR_KERNEL); + F->setCallingConv(M->getTargetTriple() == "amdgcn-amd-amdhsa" ? + CallingConv::AMDGPU_KERNEL : CallingConv::SPIR_KERNEL); F->setLinkage(GlobalValue::ExternalLinkage); F->setDSOLocal(false); F = cast(mapValue(BF, F)); @@ -3142,6 +3215,21 @@ Function *SPIRVToLLVM::transFunction(SPIRVFunction *BF, unsigned AS) { auto Linkage = IsKernel ? GlobalValue::ExternalLinkage : transLinkageType(BF); FunctionType *FT = cast(transType(BF->getFunctionType())); std::string FuncName = BF->getName(); + if (M->getTargetTriple() == "amdgcn-amd-amdhsa") { + if (FuncName == "fprintf" || FuncName == "sprintf" || + FuncName == "snprintf" || FuncName == "__isoc23_fscanf" || + FuncName == "_ZSt24__throw_out_of_range_fmtPKcz" || + FuncName.find("_ZN9__gnu_cxx12__to_xstring") == 0 || + FuncName == "die" || FuncName == "print_and_log" || + FuncName == "print_to_log" || FuncName == "syscall") + FT = FunctionType::get(FT->getReturnType(), FT->params(), true); + if (FuncName == "llvm.threadlocal.address.p4") + FuncName = "llvm.threadlocal.address.p0"; + if (FuncName == "llvm.va_start.p4") + FuncName = "llvm.va_start.p0"; + if (FuncName == "llvm.va_end.p4") + FuncName = "llvm.va_end.p0"; + } StringRef FuncNameRef(FuncName); // Transform "@spirv.llvm_memset_p0i8_i32.volatile" to @llvm.memset.p0i8.i32 // assuming llvm.memset is supported by the device compiler. If this @@ -3164,11 +3252,12 @@ Function *SPIRVToLLVM::transFunction(SPIRVFunction *BF, unsigned AS) { if (!F) F = Function::Create(FT, Linkage, AS, FuncName, M); F = cast(mapValue(BF, F)); - mapFunction(BF, F); if (F->isIntrinsic()) { - if (F->getIntrinsicID() != Intrinsic::umul_with_overflow) + if (F->getIntrinsicID() != Intrinsic::umul_with_overflow) { + mapFunction(BF, F); return F; + } std::string Name = F->getName().str(); auto *ST = cast(F->getReturnType()); auto *FT = F->getFunctionType(); @@ -3177,11 +3266,17 @@ Function *SPIRVToLLVM::transFunction(SPIRVFunction *BF, unsigned AS) { F->setName("old_" + Name); auto *NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(), Name, F->getParent()); + mapFunction(BF, NewFn); return NewFn; } - F->setCallingConv(IsKernel ? CallingConv::SPIR_KERNEL - : CallingConv::SPIR_FUNC); + mapFunction(BF, F); + + if (M->getTargetTriple() == "amdgcn-amd-amdhsa") + F->setCallingConv(IsKernel ? CallingConv::AMDGPU_KERNEL : CallingConv::C); + else + F->setCallingConv(IsKernel ? CallingConv::SPIR_KERNEL + : CallingConv::SPIR_FUNC); transFunctionAttrs(BF, F); // Creating all basic blocks before creating instructions. @@ -3198,7 +3293,10 @@ Function *SPIRVToLLVM::transFunction(SPIRVFunction *BF, unsigned AS) { } } - validatePhiPredecessors(F); + // TODO: this is temporarily disabled as it breaks some more complex code + // patterns that are otherwise correctly(-ish) handled + if (M->getTargetTriple() != "amdgcn-amd-amdhsa") + validatePhiPredecessors(F); transLLVMLoopMetadata(F); return F; @@ -3265,7 +3363,10 @@ SPIRVToLLVM::transOCLBuiltinPostproc(SPIRVInstruction *BI, CallInst *CI, Value *SPIRVToLLVM::transBlockInvoke(SPIRVValue *Invoke, BasicBlock *BB) { auto *TranslatedInvoke = transFunction(static_cast(Invoke)); - auto *Int8PtrTyGen = PointerType::get(*Context, SPIRAS_Generic); + auto *Int8PtrTyGen = PointerType::get( + *Context, + M->getTargetTriple() == "amdgcn-amd-amdhsa" ? + mapSPIRVAddrSpaceToAMDGPU(StorageClassGeneric) : SPIRAS_Generic); return CastInst::CreatePointerBitCastOrAddrSpaceCast(TranslatedInvoke, Int8PtrTyGen, "", BB); } @@ -3279,7 +3380,10 @@ Instruction *SPIRVToLLVM::transWGSizeQueryBI(SPIRVInstruction *BI, Function *F = M->getFunction(FName); if (!F) { - auto *Int8PtrTyGen = PointerType::get(*Context, SPIRAS_Generic); + auto *Int8PtrTyGen = PointerType::get( + *Context, + M->getTargetTriple() == "amdgcn-amd-amdhsa" ? + mapSPIRVAddrSpaceToAMDGPU(StorageClassGeneric) : SPIRAS_Generic); FunctionType *FT = FunctionType::get(Type::getInt32Ty(*Context), {Int8PtrTyGen, Int8PtrTyGen}, false); F = Function::Create(FT, GlobalValue::ExternalLinkage, FName, M); @@ -3304,7 +3408,10 @@ Instruction *SPIRVToLLVM::transSGSizeQueryBI(SPIRVInstruction *BI, auto Ops = BI->getOperands(); Function *F = M->getFunction(FName); if (!F) { - auto *Int8PtrTyGen = PointerType::get(*Context, SPIRAS_Generic); + auto *Int8PtrTyGen = PointerType::get( + *Context, + M->getTargetTriple() == "amdgcn-amd-amdhsa" ? + mapSPIRVAddrSpaceToAMDGPU(StorageClassGeneric) : SPIRAS_Generic); SmallVector Tys = { transType(Ops[0]->getType()), // ndrange Int8PtrTyGen, // block_invoke @@ -3331,6 +3438,11 @@ Instruction *SPIRVToLLVM::transBuiltinFromInst(const std::string &FuncName, BasicBlock *BB) { std::string MangledName; auto Ops = BI->getOperands(); + if ((FuncName == "__spirv_AtomicIIncrement" || + FuncName == "__spirv_AtomicIDecrement") && + M->getTargetTriple() == "amdgcn-amd-amdhsa") + Ops.insert(Ops.end(), + BM->getValue(*BI->getDecorate(DecorationMaxByteOffsetId).cbegin())); Type *RetTy = BI->hasType() ? transType(BI->getType()) : Type::getVoidTy(*Context); transOCLBuiltinFromInstPreproc(BI, RetTy, Ops); @@ -3338,12 +3450,28 @@ Instruction *SPIRVToLLVM::transBuiltinFromInst(const std::string &FuncName, transTypeVector(SPIRVInstruction::getOperandTypes(Ops), true); for (auto &I : ArgTys) { if (isa(I)) { - I = TypedPointerType::get(I, SPIRAS_Private); + I = TypedPointerType::get(I, + M->getTargetTriple() == "amdgcn-amd-amdhsa" ? + M->getDataLayout().getProgramAddressSpace() + : SPIRAS_Private); } } if (BM->getDesiredBIsRepresentation() != BIsRepresentation::SPIRVFriendlyIR) - mangleOpenClBuiltin(FuncName, ArgTys, MangledName); + if (M->getTargetTriple() == "amdgcn-amd-amdhsa") { + auto TmpTys = ArgTys; + for (auto &&Ty : TmpTys) { + if (auto TPT = dyn_cast(Ty)) + Ty = TypedPointerType::get(TPT->getElementType(), + mapAMDGCNAddrSpaceToSPIRV(TPT->getAddressSpace())); + else if (isa(Ty)) + Ty = PointerType::get(Ty->getContext(), + mapAMDGCNAddrSpaceToSPIRV(Ty->getPointerAddressSpace())); + } + mangleOpenClBuiltin(FuncName, TmpTys, MangledName); + } else { + mangleOpenClBuiltin(FuncName, ArgTys, MangledName); + } else MangledName = getSPIRVFriendlyIRFunctionName(FuncName, BI->getOpCode(), ArgTys, Ops); @@ -3366,7 +3494,9 @@ Instruction *SPIRVToLLVM::transBuiltinFromInst(const std::string &FuncName, if (!Func || Func->getFunctionType() != FT) { LLVM_DEBUG(for (auto &I : ArgTys) { dbgs() << *I << '\n'; }); Func = Function::Create(FT, GlobalValue::ExternalLinkage, MangledName, M); - Func->setCallingConv(CallingConv::SPIR_FUNC); + Func->setCallingConv(M->getTargetTriple() == "amdgcn-amd-amdhsa" ? + CallingConv::C : + CallingConv::SPIR_FUNC); if (isFuncNoUnwind()) Func->addFnAttr(Attribute::NoUnwind); auto OC = BI->getOpCode(); @@ -3390,6 +3520,7 @@ Instruction *SPIRVToLLVM::transBuiltinFromInst(const std::string &FuncName, SPIRVDBG(spvdbgs() << "[transInstToBuiltinCall] " << *BI << " -> "; dbgs() << *Call << '\n';) Instruction *Inst = transOCLBuiltinPostproc(BI, Call, BB, FuncName); + applyFPFastMathModeDecorations(BI, Call); return Inst; } @@ -3616,10 +3747,49 @@ bool SPIRVToLLVM::translate() { DbgTran->addDbgInfoVersion(); DbgTran->finalize(); + if (M->getTargetTriple() != "amdgcn-amd-amdhsa") + return true; + // TODO: this is temporary hardcoding, but will ultimately get handled in the + // FE. + M->addModuleFlag(llvm::Module::Error, "amdhsa_code_object_version", 500); + M->addModuleFlag(llvm::Module::Error, "amdgpu_printf_kind", + llvm::MDString::get(M->getContext(), "hostcall")); + StringRef Name = "__oclc_ABI_version"; + llvm::GlobalVariable *OriginalGV = M->getNamedGlobal(Name); + if (OriginalGV && + !llvm::GlobalVariable::isExternalLinkage(OriginalGV->getLinkage())) + return true; + + auto *Type = llvm::IntegerType::getIntNTy(M->getContext(), 32); + llvm::Constant *COV = llvm::ConstantInt::get(Type, 500); + + auto *GV = new llvm::GlobalVariable( + *M, Type, true, llvm::GlobalValue::WeakODRLinkage, COV, Name, + nullptr, llvm::GlobalValue::ThreadLocalMode::NotThreadLocal, + mapSPIRVAddrSpaceToAMDGPU(StorageClassUniformConstant)); + GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local); + GV->setVisibility(llvm::GlobalValue::VisibilityTypes::HiddenVisibility); + + if (OriginalGV) { + OriginalGV->replaceAllUsesWith(GV); + GV->takeName(OriginalGV); + OriginalGV->eraseFromParent(); + } + return true; } bool SPIRVToLLVM::transAddressingModel() { + if (BM->getGeneratorVer() == UINT16_MAX) { + // TODO: we should use the Target registry here instead of hardcoding + M->setTargetTriple("amdgcn-amd-amdhsa"); + M->setDataLayout( + "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-" + "p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-" + "v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-" + "v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"); + return true; + } switch (BM->getAddressingModel()) { case AddressingModelPhysical64: M->setTargetTriple(SPIR_TARGETTRIPLE64); @@ -4013,7 +4183,10 @@ void SPIRVToLLVM::transUserSemantic(SPIRV::SPIRVFunction *Fun) { Constant *C = ConstantExpr::getPointerBitCastOrAddrSpaceCast(TransFun, ResType); - Type *Int8PtrTyPrivate = PointerType::get(*Context, SPIRAS_Private); + Type *Int8PtrTyPrivate = PointerType::get( + *Context, + M->getTargetTriple() == "amdgcn-amd-amdhsa" ? + mapSPIRVAddrSpaceToAMDGPU(StorageClassFunction) : SPIRAS_Private); IntegerType *Int32Ty = Type::getInt32Ty(*Context); llvm::Constant *Fields[5] = { @@ -4340,13 +4513,23 @@ bool SPIRVToLLVM::transMetadata() { BF->getExecutionMode(internal::ExecutionModeFastCompositeKernelINTEL)) F->addFnAttr(kVCMetadata::VCFCEntry); - if (F->getCallingConv() != CallingConv::SPIR_KERNEL) + if (F->getCallingConv() != CallingConv::SPIR_KERNEL && + F->getCallingConv() != CallingConv::AMDGPU_KERNEL) continue; + if (F->getCallingConv() == CallingConv::AMDGPU_KERNEL) { + F->addFnAttr("uniform-work-group-size", "true"); + F->addFnAttr(Attribute::Convergent); + F->addFnAttr(Attribute::MustProgress); + } // Generate metadata for reqd_work_group_size if (auto *EM = BF->getExecutionMode(ExecutionModeLocalSize)) { - F->setMetadata(kSPIR2MD::WGSize, - getMDNodeStringIntVec(Context, EM->getLiterals())); + if (M->getTargetTriple() == "amdgcn-amd-amdhsa") + F->addFnAttr("amdgpu-flat-work-group-size", + "1," + llvm::utostr(EM->getLiterals().front())); + else + F->setMetadata(kSPIR2MD::WGSize, + getMDNodeStringIntVec(Context, EM->getLiterals())); } // Generate metadata for work_group_size_hint if (auto *EM = BF->getExecutionMode(ExecutionModeLocalSizeHint)) { @@ -4506,7 +4689,8 @@ bool SPIRVToLLVM::transMetadata() { bool SPIRVToLLVM::transOCLMetadata(SPIRVFunction *BF) { Function *F = static_cast(getTranslatedValue(BF)); assert(F && "Invalid translated function"); - if (F->getCallingConv() != CallingConv::SPIR_KERNEL) + if (F->getCallingConv() != CallingConv::SPIR_KERNEL && + F->getCallingConv() != CallingConv::AMDGPU_KERNEL) return true; if (BF->hasDecorate(DecorationVectorComputeFunctionINTEL)) @@ -4517,9 +4701,12 @@ bool SPIRVToLLVM::transOCLMetadata(SPIRVFunction *BF) { Context, SPIR_MD_KERNEL_ARG_ADDR_SPACE, BF, F, [=](SPIRVFunctionParameter *Arg) { SPIRVType *ArgTy = Arg->getType(); - SPIRAddressSpace AS = SPIRAS_Private; + SPIRAddressSpace AS = M->getTargetTriple() == "amdgcn-amd-amdhsa" ? + mapSPIRVAddrSpaceToAMDGPU(StorageClassFunction) : SPIRAS_Private; if (ArgTy->isTypePointer()) - AS = SPIRSPIRVAddrSpaceMap::rmap(ArgTy->getPointerStorageClass()); + AS = M->getTargetTriple() == "amdgcn-amd-amdhsa" ? + mapSPIRVAddrSpaceToAMDGPU(ArgTy->getPointerStorageClass()) : + SPIRSPIRVAddrSpaceMap::rmap(ArgTy->getPointerStorageClass()); else if (ArgTy->isTypeOCLImage() || ArgTy->isTypePipe()) AS = SPIRAS_Global; return ConstantAsMetadata::get( @@ -4835,6 +5022,254 @@ bool SPIRVToLLVM::transAlign(SPIRVValue *BV, Value *V) { return true; } +static Instruction *transLLVMFromExtInst(SPIRVToLLVM &Reader, OCLExtOpKind Op, + SPIRVExtInst *BC, Type *RetTy, + vector ArgTys, + BasicBlock *BB) { + opaquifyTypedPointers(ArgTys); + + Intrinsic::ID ID = Intrinsic::not_intrinsic; + ArrayRef Formals(ArgTys); + switch (Op) { + // Acos = 0, + // Acosh = 1, + // Acospi = 2, + // Asin = 3, + // Asinh = 4, + // Asinpi = 5, + case OpenCLLIB::Atan: + ID = Intrinsic::atan; + break; + // Atan2 = 7, + // Atanh = 8, + // Atanpi = 9, + // Atan2pi = 10, + case OpenCLLIB::Ceil: ID = + Intrinsic::ceil; + break; + case OpenCLLIB::Copysign: + ID = Intrinsic::copysign; + Formals = ArrayRef(RetTy); + break; + // Cos = 14, + // Cosh = 15, + // Cospi = 16, + // Erfc = 17, + // Erf = 18, + case OpenCLLIB::Exp: + case OpenCLLIB::Native_exp: + ID = Intrinsic::exp; + break; + // Exp2 = 20, + // Exp10 = 21, + // Expm1 = 22, + case OpenCLLIB::Fabs: ID = + Intrinsic::fabs; + break; + // Fdim = 24, + case OpenCLLIB::Floor: + ID = Intrinsic::floor; + break; + case OpenCLLIB::Fma: + ID = Intrinsic::fma; + Formals = ArrayRef(RetTy); + break; + case OpenCLLIB::Fmax: + ID = Intrinsic::maxnum; + Formals = ArrayRef(RetTy); + break; + case OpenCLLIB::Fmin: + ID = Intrinsic::minnum; + Formals = ArrayRef(RetTy); + break; + // Fmod = 29, + // Fract = 30, + case OpenCLLIB::Frexp: + ID = Intrinsic::frexp; + break; + // Hypot = 32, + // Ilogb = 33, + // Ldexp = 34, + // Lgamma = 35, + // Lgamma_r = 36, + case OpenCLLIB::Log: + case OpenCLLIB::Native_log: + ID = Intrinsic::log; + break; + case OpenCLLIB::Log2: + ID = Intrinsic::log2; + break; + case OpenCLLIB::Log10: + ID = Intrinsic::log10; + break; + // Log1p = 40, + // Logb = 41, + // Mad = 42, + // Maxmag = 43, + // Minmag = 44, + // Modf = 45, + // Nan = 46, + // Nextafter = 47, + case OpenCLLIB::Pow: + ID = Intrinsic::pow; + Formals = ArrayRef(RetTy); + break; + case OpenCLLIB::Pown: + ID = Intrinsic::powi; + Formals = ArrayRef(RetTy); + break; + // Powr = 50, + // Remainder = 51, + // Remquo = 52, + case OpenCLLIB::Rint: + ID = Intrinsic::rint; + break; + // Rootn = 54, + case OpenCLLIB::Round: + ID = Intrinsic::round; + break; + // Rsqrt = 56, + case OpenCLLIB::Sin: + case OpenCLLIB::Half_sin: + case OpenCLLIB::Native_sin: + ID = Intrinsic::sin; + break; + // Sincos = 58, + // Sinh = 59, + // Sinpi = 60, + case OpenCLLIB::Sqrt: + case OpenCLLIB::Half_sqrt: + case OpenCLLIB::Native_sqrt: + ID = Intrinsic::sqrt; + break; + case OpenCLLIB::Tan: + case OpenCLLIB::Half_tan: + case OpenCLLIB::Native_tan: + ID = Intrinsic::tan; + break; + // Tanh = 63, + // Tanpi = 64, + // Tgamma = 65, + case OpenCLLIB::Trunc: + ID = Intrinsic::trunc; + break; + // Half_cos = 67, + // Half_divide = 68, + // Half_exp = 69, + // Half_exp2 = 70, + // Half_exp10 = 71, + // Half_log = 72, + // Half_log2 = 73, + // Half_log10 = 74, + // Half_powr = 75, + // Half_recip = 76, + // Half_rsqrt = 77, + // Half_sin = 78, + case OpenCLLIB::Cos: + case OpenCLLIB::Half_cos: + case OpenCLLIB::Native_cos: + ID = Intrinsic::cos; + break; + // Native_divide = 82, + // Native_exp2 = 84, + // Native_exp10 = 85, + // Native_log2 = 87, + // Native_log10 = 88, + // Native_powr = 89, + // Native_recip = 90, + // Native_rsqrt = 91, + // case OpenCLLIB::Native_sin: ID = Intrinsic::sin; break; + case OpenCLLIB::SAbs: ID = + Intrinsic::abs; + break; + case OpenCLLIB::Clz: ID = + Intrinsic::ctlz; + break; + case OpenCLLIB::Ctz: + ID = Intrinsic::cttz; + break; + case OpenCLLIB::USub_sat: + ID = Intrinsic::usub_sat; + Formals = ArrayRef(RetTy); + break; + default: + break; + } + + auto M = BB->getParent()->getParent(); + Function *F = nullptr; + if (ID == Intrinsic::not_intrinsic) { + if (Op == OpenCLLIB::Printf) { + F = M->getFunction("printf"); + } else { + errs() << "Failed to handle OpenCL Extended Op: " << Op << '\n'; + std::abort(); + } + } else if (ID == Intrinsic::frexp || ID == Intrinsic::powi) { + F = Intrinsic::getDeclaration( + M, ID, {Formals[0], IntegerType::getInt32Ty(M->getContext())}); + } else { + F = Intrinsic::getDeclaration(M, ID, Formals); + } + + auto Actuals = Reader.transValue(BC->getArgValues(), F, BB); + + if (ID == Intrinsic::frexp) { // TODO: this should've been done in the FE. + auto CI = CallInst::Create(F, {Actuals[0]}, BC->getName(), BB); + auto Exp = ExtractValueInst::Create(CI, 1, "exponent", BB); + new StoreInst(Exp, Actuals[1], BB); + return ExtractValueInst::Create(CI, 0, "fraction", BB); + } + + if (ID == Intrinsic::abs || ID == Intrinsic::ctlz || ID == Intrinsic::cttz) + Actuals.push_back(ConstantInt::getBool(M->getContext(), false)); + + + // Function *F = nullptr; + // if (ID == Intrinsic::not_intrinsic) { + // if (ExtOp == OpenCLLIB::Printf) + // MangledName = "printf"; + // else if (ExtOp == OpenCLLIB::Cbrt) // TODO: AMDSPV JANK, cbrt is not handled + // std::abort(); + // else if (ExtOp != OpenCLLIB::Step) // TODO: AMDSPV JANK, step is not handled + // std::abort(); + + // F = M->getFunction(MangledName); + // if (!F) { + // FunctionType *FT = FunctionType::get(RetTy, ArgTypes, false); + // F = Function::Create(FT, GlobalValue::ExternalLinkage, MangledName, M); + // F->setCallingConv(CallingConv::C); + // if (isFuncNoUnwind()) + // F->addFnAttr(Attribute::NoUnwind); + // if (isFuncReadNone(UnmangledName)) + // F->setDoesNotAccessMemory(); + // } + // } else if (ID == Intrinsic::frexp || ID == Intrinsic::powi) { + // F = Intrinsic::getDeclaration(M, ID, {Formals[0], + // IntegerType::getInt32Ty(M->getContext())}); + // } else { + // F = Intrinsic::getDeclaration(M, ID, Formals); + // } + // auto Args = transValue(BC->getArgValues(), F, BB); + // switch (ID) { // TODO: AMDSPV JANK, mismatched signature handling + // case (Intrinsic::abs): + // case (Intrinsic::ctlz): + // case (Intrinsic::cttz): + // Args.push_back(ConstantInt::getBool(M->getContext(), false)); + // break; + // default: break; + // } + + // if (Op) + // UnmangledName.find("native") == 0) + CallInst *CI = CallInst::Create(F, Actuals, BC->getName(), BB); + addFnAttr(CI, Attribute::NoUnwind); + applyFPFastMathModeDecorations(BC, CI); + // CI->setFast(true); + + return CI; +} + Instruction *SPIRVToLLVM::transOCLBuiltinFromExtInst(SPIRVExtInst *BC, BasicBlock *BB) { assert(BB && "Invalid BB"); @@ -4858,6 +5293,9 @@ Instruction *SPIRVToLLVM::transOCLBuiltinFromExtInst(SPIRVExtInst *BC, } Type *RetTy = transType(BC->getType()); + if (M->getTargetTriple() == "amdgcn-amd-amdhsa") + return transLLVMFromExtInst( + *this, ExtOp, BC, RetTy, std::move(ArgTypes), BB); std::string MangledName = getSPIRVFriendlyIRFunctionName(ExtOp, ArgTypes, RetTy); opaquifyTypedPointers(ArgTypes); @@ -4883,6 +5321,7 @@ Instruction *SPIRVToLLVM::transOCLBuiltinFromExtInst(SPIRVExtInst *BC, CallInst *CI = CallInst::Create(F, Args, BC->getName(), BB); setCallingConv(CI); addFnAttr(CI, Attribute::NoUnwind); + applyFPFastMathModeDecorations(BC, CI); return CI; } @@ -5027,7 +5466,12 @@ SPIRVToLLVM::transLinkageType(const SPIRVValue *V) { case LinkageTypeExport: if (V->getOpCode() == OpVariable || V->getOpCode() == OpUntypedVariableKHR) { - if (static_cast(V)->getInitializer() == 0) + if (static_cast(V)->getStorageClass() == + StorageClassWorkgroup && + (!V->getType()->isTypeArray() || + V->getType()->getArrayLength() != UINT32_MAX)) + return GlobalValue::InternalLinkage; + if (static_cast(V)->getInitializer() == 0) // Tentative definition return GlobalValue::CommonLinkage; } diff --git a/lib/SPIRV/SPIRVToLLVMDbgTran.cpp b/lib/SPIRV/SPIRVToLLVMDbgTran.cpp index 77f0cf0c3..342a2059f 100644 --- a/lib/SPIRV/SPIRVToLLVMDbgTran.cpp +++ b/lib/SPIRV/SPIRVToLLVMDbgTran.cpp @@ -1580,7 +1580,9 @@ SPIRVToLLVMDbgTran::transDebugIntrinsic(const SPIRVExtInst *DebugInst, // DIBuilder::insertDeclare doesn't allow to pass nullptr for the Storage // parameter. To work around this limitation we create a dummy temp // alloca, use it to create llvm.dbg.declare, and then remove the alloca. - auto *AI = new AllocaInst(Type::getInt8Ty(M->getContext()), 0, "tmp", BB); + auto *AI = new AllocaInst(Type::getInt8Ty(M->getContext()), + BB->getParent()->getParent()->getDataLayout().getAllocaAddrSpace(), + "tmp", BB); DbgInstPtr DbgDeclare = DIB.insertDeclare( AI, LocalVar.first, GetExpression(Ops[ExpressionIdx]), LocalVar.second, BB); diff --git a/lib/SPIRV/SPIRVToOCL.cpp b/lib/SPIRV/SPIRVToOCL.cpp index 09adac158..4e74874e1 100644 --- a/lib/SPIRV/SPIRVToOCL.cpp +++ b/lib/SPIRV/SPIRVToOCL.cpp @@ -231,6 +231,9 @@ void SPIRVToOCLBase::visitCastInst(CastInst &Cast) { !isa(Cast) && !isa(Cast)) return; + if (M->getTargetTriple() == "amdgcn-amd-amdhsa") + return; + Type const *SrcTy = Cast.getSrcTy(); Type *DstVecTy = Cast.getDestTy(); // Leave scalar casts as is. Skip boolean vector casts becase there diff --git a/lib/SPIRV/SPIRVToOCL20.cpp b/lib/SPIRV/SPIRVToOCL20.cpp index 0dee165ac..056de9606 100644 --- a/lib/SPIRV/SPIRVToOCL20.cpp +++ b/lib/SPIRV/SPIRVToOCL20.cpp @@ -38,6 +38,7 @@ #include "OCLUtil.h" #include "SPIRVToOCL.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/IR/Verifier.h" #define DEBUG_TYPE "spvtocl20" @@ -74,7 +75,118 @@ bool SPIRVToOCL20Base::runSPIRVToOCL(Module &Module) { return true; } +static SyncScope::ID mapOpenCLScopeToAMDGPU(LLVMContext &Ctx, uint64_t S) { + if (S == OCLMS_work_item) + return SyncScope::SingleThread; + if (S == OCLMS_work_group) + return Ctx.getOrInsertSyncScopeID("workgroup"); + if (S == OCLMS_device) + return Ctx.getOrInsertSyncScopeID("agent"); + if (S == OCLMS_sub_group) + return Ctx.getOrInsertSyncScopeID("wavefront"); + return SyncScope::System; +} + +static void translateSPIRVCmpXchgToLLVM(CallInst *CI, Op OC) { + auto Ptr = CI->getOperand(0); + auto Cmp = CI->getOperand(1); + auto New = CI->getOperand(4); + + assert(isa(CI->getArgOperand(CI->arg_size() - 4))); // Skip New. + assert(isa(CI->getArgOperand(CI->arg_size() - 3))); // Skip New. + assert(isa(CI->getArgOperand(CI->arg_size() - 1))); + + auto SuccessOrder = // Offset NotAtomic and Unordered + static_cast(getArgAsInt(CI, CI->arg_size() - 4) + 2); + auto FailOrder = // Offset NotAtomic and Unordered + static_cast(getArgAsInt(CI, CI->arg_size() - 3) + 2); + SyncScope::ID S = mapOpenCLScopeToAMDGPU(CI->getContext(), + getArgAsInt(CI, CI->arg_size() - 1)); + IRBuilder<> Builder(CI); + auto CmpXchg = Builder.CreateAtomicCmpXchg(Ptr, Cmp, New, {}, SuccessOrder, + FailOrder, S); + // OpAtomicCompareExchangeWeak has been deprecated and subsequently removed + // from SPIR-V versions newer than 1.4, and currently there's no way to encode + // the weak bit. + //CmpXchg->setWeak(OC == OpAtomicCompareExchangeWeak); + + CI->replaceAllUsesWith( + Builder.CreateZExt(Builder.CreateExtractValue(CmpXchg, 0), CI->getType())); + CI->dropAllReferences(); + CI->eraseFromParent(); +} + +static void translateSPIRVAtomicBuiltinToLLVMAtomicOp(CallInst *CI, Op OC) { + if (OC == OpAtomicCompareExchange || OC == OpAtomicCompareExchangeWeak) + return translateSPIRVCmpXchgToLLVM(CI, OC); + + static const DenseMap SPIRVtoLLVM{ + {OpAtomicAnd, AtomicRMWInst::And}, + {OpAtomicExchange, AtomicRMWInst::Xchg}, + {OpAtomicFAddEXT, AtomicRMWInst::FAdd}, + {OpAtomicFMaxEXT, AtomicRMWInst::FMax}, + {OpAtomicFMinEXT, AtomicRMWInst::FMin}, + {OpAtomicIAdd, AtomicRMWInst::Add}, + {OpAtomicIDecrement, AtomicRMWInst::UDecWrap}, + {OpAtomicIIncrement, AtomicRMWInst::UIncWrap}, + {OpAtomicISub, AtomicRMWInst::Sub}, + {OpAtomicOr, AtomicRMWInst::Or}, + {OpAtomicSMax, AtomicRMWInst::Max}, + {OpAtomicSMin, AtomicRMWInst::Min}, + {OpAtomicUMax, AtomicRMWInst::UMax}, + {OpAtomicUMin, AtomicRMWInst::UMin}, + {OpAtomicXor, AtomicRMWInst::Xor} + }; + + assert(isa(CI->getArgOperand(CI->arg_size() - 1))); + assert(isa(CI->getArgOperand(CI->arg_size() - 2))); + + auto Order = // Offset NotAtomic and Unordered + static_cast(getArgAsInt(CI, CI->arg_size() - 2) + 2); + auto S = mapOpenCLScopeToAMDGPU(CI->getContext(), + getArgAsInt(CI, CI->arg_size() - 1)); + + IRBuilder<> Builder(CI); + if (OC == OpAtomicLoad) { + auto LD = Builder.CreateLoad(CI->getType(), CI->getOperand(0)); + LD->setAtomic(Order, S); + CI->replaceAllUsesWith(LD); + } else if (OC == OpAtomicStore) { + auto ST = Builder.CreateStore(CI->getOperand(1), CI->getOperand(0)); + ST->setAtomic(Order, S); + CI->replaceAllUsesWith(ST); + } else { + auto RMW = Builder.CreateAtomicRMW(SPIRVtoLLVM.at(OC), CI->getOperand(0), + CI->getOperand(1), {}, Order, S); + CI->replaceAllUsesWith(RMW); + } + + CI->dropAllReferences(); + CI->eraseFromParent(); +} + +static void visitCallLLVMFence(CallInst *CI) { // TODO: AMDSPV JANK, this is incorrect + auto MS = transSPIRVMemoryScopeIntoOCLMemoryScope(CI->getArgOperand(0), CI); + auto MO = transSPIRVMemorySemanticsIntoOCLMemoryOrder(CI->getArgOperand(1), + CI); + assert(isa(MS)); + assert(isa(MO)); + + auto O = static_cast(cast(MO)->getZExtValue() + 2); + auto S = mapOpenCLScopeToAMDGPU(CI->getContext(), + cast(MS)->getZExtValue()); + IRBuilder<> Builder(CI); + + CI->replaceAllUsesWith(Builder.CreateFence(O, S)); + + CI->dropAllReferences(); + CI->eraseFromParent(); +} + void SPIRVToOCL20Base::visitCallSPIRVMemoryBarrier(CallInst *CI) { + if (M->getTargetTriple() == "amdgcn-amd-amdhsa") + return visitCallLLVMFence(CI); + Value *MemScope = SPIRV::transSPIRVMemoryScopeIntoOCLMemoryScope(CI->getArgOperand(0), CI); Value *MemFenceFlags = SPIRV::transSPIRVMemorySemanticsIntoOCLMemFenceFlags( @@ -133,6 +245,9 @@ void SPIRVToOCL20Base::mutateAtomicName(CallInst *CI, Op OC) { void SPIRVToOCL20Base::visitCallSPIRVAtomicBuiltin(CallInst *CI, Op OC) { CallInst *CIG = mutateCommonAtomicArguments(CI, OC); + if (M->getTargetTriple() == "amdgcn-amd-amdhsa") + return translateSPIRVAtomicBuiltinToLLVMAtomicOp(CIG, OC); + switch (OC) { case OpAtomicIIncrement: case OpAtomicIDecrement: @@ -174,12 +289,14 @@ CallInst *SPIRVToOCL20Base::mutateCommonAtomicArguments(CallInst *CI, Op OC) { Mutator.mapArgs([=](IRBuilder<> &Builder, Value *PtrArg, Type *PtrArgTy) { if (auto *TypedPtrTy = dyn_cast(PtrArgTy)) { - if (TypedPtrTy->getAddressSpace() != SPIRAS_Generic) { + unsigned AS = M->getTargetTriple() == "amdgcn-amd-amdhsa" ? + mapSPIRVAddrSpaceToAMDGPU(StorageClassGeneric) : SPIRAS_Generic; + if (TypedPtrTy->getAddressSpace() != AS) { Type *ElementTy = TypedPtrTy->getElementType(); - Type *FixedPtr = PointerType::get(ElementTy, SPIRAS_Generic); + Type *FixedPtr = PointerType::get(ElementTy, AS); PtrArg = Builder.CreateAddrSpaceCast(PtrArg, FixedPtr, PtrArg->getName() + ".as"); - PtrArgTy = TypedPointerType::get(ElementTy, SPIRAS_Generic); + PtrArgTy = TypedPointerType::get(ElementTy, AS); } } return std::make_pair(PtrArg, PtrArgTy); @@ -207,7 +324,8 @@ void SPIRVToOCL20Base::visitCallSPIRVAtomicCmpExchg(CallInst *CI) { // value by pointer passed as 2nd argument (aka expected) while SPIR-V // instructions returns this new/original value as a resulting value. AllocaInst *PExpected = new AllocaInst( - MemTy, 0, "expected", + MemTy, CI->getParent()->getParent()->getDataLayout().getAllocaAddrSpace(), + "expected", CI->getParent()->getParent()->getEntryBlock().getFirstInsertionPt()); PExpected->setAlignment(Align(MemTy->getScalarSizeInBits() / 8)); @@ -221,7 +339,9 @@ void SPIRVToOCL20Base::visitCallSPIRVAtomicCmpExchg(CallInst *CI) { .mapArg(1, [=](IRBuilder<> &Builder, Value *Expected) { Builder.CreateStore(Expected, PExpected); - unsigned AddrSpc = SPIRAS_Generic; + unsigned AddrSpc = M->getTargetTriple() == "amdgcn-amd-amdhsa" ? + mapSPIRVAddrSpaceToAMDGPU(StorageClassGeneric) : + SPIRAS_Generic; Type *PtrTyAS = PointerType::get(PExpected->getType(), AddrSpc); Value *V = Builder.CreateAddrSpaceCast( PExpected, PtrTyAS, PExpected->getName() + ".as"); @@ -263,10 +383,12 @@ void SPIRVToOCL20Base::visitCallSPIRVEnqueueKernel(CallInst *CI, Op OC) { auto Mutator = mutateCallInst(CI, FName.str()); Mutator.mapArg(6, [=](IRBuilder<> &Builder, Value *Invoke) { + unsigned AS = M->getTargetTriple() == "amdgcn-amd-amdhsa" ? + mapSPIRVAddrSpaceToAMDGPU(StorageClassGeneric) : SPIRAS_Generic; Value *Replace = CastInst::CreatePointerBitCastOrAddrSpaceCast( - Invoke, Builder.getPtrTy(SPIRAS_Generic), "", CI->getIterator()); + Invoke, Builder.getPtrTy(AS), "", CI->getIterator()); return std::make_pair( - Replace, TypedPointerType::get(Builder.getInt8Ty(), SPIRAS_Generic)); + Replace, TypedPointerType::get(Builder.getInt8Ty(), AS)); }); if (!HasVaargs) { diff --git a/lib/SPIRV/SPIRVUtil.cpp b/lib/SPIRV/SPIRVUtil.cpp index 4b0f721da..110158a39 100644 --- a/lib/SPIRV/SPIRVUtil.cpp +++ b/lib/SPIRV/SPIRVUtil.cpp @@ -303,7 +303,10 @@ Function *getOrCreateFunction(Module *M, Type *RetTy, ArrayRef ArgTypes, if (F) NewF->setDSOLocal(F->isDSOLocal()); F = NewF; - F->setCallingConv(CallingConv::SPIR_FUNC); + if (M->getTargetTriple() == "amdgcn-amd-amdhsa") + F->setCallingConv(CallingConv::C); + else + F->setCallingConv(CallingConv::SPIR_FUNC); if (Attrs) F->setAttributes(*Attrs); } @@ -1509,7 +1512,9 @@ Value *getScalarOrArrayConstantInt(BasicBlock::iterator Pos, Type *T, auto *AT = ArrayType::get(ET, Len); std::vector EV(Len, ConstantInt::get(ET, V, IsSigned)); auto *CA = ConstantArray::get(AT, EV); - auto *Alloca = new AllocaInst(AT, 0, "", Pos); + auto *Alloca = new AllocaInst( + AT, Pos->getParent()->getParent()->getDataLayout().getAllocaAddrSpace(), + "", Pos); new StoreInst(CA, Alloca, Pos); auto *Zero = ConstantInt::getNullValue(Type::getInt32Ty(T->getContext())); Value *Index[] = {Zero, Zero}; @@ -2291,7 +2296,9 @@ bool postProcessBuiltinWithArrayArguments(Function *F, auto *T = I->getType(); if (!T->isArrayTy()) continue; - auto *Alloca = new AllocaInst(T, 0, "", FBegin); + auto *Alloca = new AllocaInst( + T, F->getParent()->getDataLayout().getAllocaAddrSpace(), "", + FBegin); new StoreInst(I, Alloca, false, CI->getIterator()); auto *Zero = ConstantInt::getNullValue(Type::getInt32Ty(T->getContext())); diff --git a/lib/SPIRV/SPIRVWriter.cpp b/lib/SPIRV/SPIRVWriter.cpp index 687afffe7..6f7984171 100644 --- a/lib/SPIRV/SPIRVWriter.cpp +++ b/lib/SPIRV/SPIRVWriter.cpp @@ -450,7 +450,9 @@ SPIRVType *LLVMToSPIRVBase::transType(Type *T) { // SPIR-V 1.3 s3.32.6: Length is the number of elements in the array. // It must be at least 1. const auto ArraySize = - T->getArrayNumElements() ? T->getArrayNumElements() : 1; + T->getArrayNumElements() ? T->getArrayNumElements() : + (M->getTargetTriple() == "spirv64-amd-amdhsa" ? UINT32_MAX : 1); + Type *ElTy = T->getArrayElementType(); SPIRVType *TransType = BM->addArrayType( transType(ElTy), @@ -755,7 +757,9 @@ SPIRVType *LLVMToSPIRVBase::transPointerType(SPIRVType *ET, unsigned AddrSpc) { return transPointerType(ET, SPIRAS_Private); if (BM->isAllowedToUseExtension(ExtensionID::SPV_KHR_untyped_pointers) && !(ET->isTypeArray() || ET->isTypeVector() || ET->isTypeStruct() || - ET->isTypeImage() || ET->isTypeSampler() || ET->isTypePipe())) { + ET->isTypeImage() || ET->isTypeSampler() || ET->isTypePipe() || + (M->getTargetTriple() == "spirv64-amd-amdhsa" && + ET->getOpCode() == OpTypeFunction))) { TranslatedTy = BM->addUntypedPointerKHRType( SPIRSPIRVAddrSpaceMap::map(static_cast(AddrSpc))); } else { @@ -843,10 +847,15 @@ SPIRVType *LLVMToSPIRVBase::transScavengedType(Value *V) { // error. To be on the safe side, an assertion is added to check printf // never reaches this point. assert(F->getName() != "printf"); - BM->getErrorLog().checkError(!FnTy->isVarArg(), - SPIRVEC_UnsupportedVarArgFunction); + if (M->getTargetTriple() != "spirv64-amd-amdhsa") + BM->getErrorLog().checkError(!FnTy->isVarArg(), + SPIRVEC_UnsupportedVarArgFunction); SPIRVType *RT = transType(FnTy->getReturnType()); + if (M->getTargetTriple() == "spirv64-amd-amdhsa" && + F->hasName() && F->getName().contains("dispatch.ptr")) + RT = transType(PointerType::get(F->getContext(), SPIRAS_Constant)); + std::vector PT; for (Argument &Arg : F->args()) { assert(OCLTypeToSPIRVPtr); @@ -1550,8 +1559,10 @@ SPIRVInstruction *LLVMToSPIRVBase::transBinaryInst(BinaryOperator *B, SPIRVInstruction *LLVMToSPIRVBase::transCmpInst(CmpInst *Cmp, SPIRVBasicBlock *BB) { auto *Op0 = Cmp->getOperand(0); - SPIRVValue *TOp0 = transValue(Op0, BB); - SPIRVValue *TOp1 = transValue(Cmp->getOperand(1), BB); + SPIRVValue *TOp0 = + transValue(Op0, BB, true, FuncTransMode::Pointer); + SPIRVValue *TOp1 = + transValue(Cmp->getOperand(1), BB, true, FuncTransMode::Pointer); if (Op0->getType()->isPointerTy()) { auto P = Cmp->getPredicate(); if (BM->isAllowedToUseVersion(VersionNumber::SPIRV_1_4) && @@ -1583,15 +1594,20 @@ SPIRVValue *LLVMToSPIRVBase::transUnaryInst(UnaryInstruction *U, return BM->addUndef(ExpectedTy); } } + if (isa(U) && M->getTargetTriple() == "spirv64-amd-amdhsa") { + SPIRVType *ExpectedTy = transScavengedType(U); + return BM->addUndef(ExpectedTy); + } Op BOC = OpNop; if (auto *Cast = dyn_cast(U)) { const auto SrcAddrSpace = Cast->getSrcTy()->getPointerAddressSpace(); const auto DestAddrSpace = Cast->getDestTy()->getPointerAddressSpace(); if (DestAddrSpace == SPIRAS_Generic) { - getErrorLog().checkError( - SrcAddrSpace != SPIRAS_Constant, SPIRVEC_InvalidModule, U, - "Casts from constant address space to generic are illegal\n"); + if (M->getTargetTriple() != "spirv64-amd-amdhsa") + getErrorLog().checkError( + SrcAddrSpace != SPIRAS_Constant, SPIRVEC_InvalidModule, U, + "Casts from constant address space to generic are illegal\n"); BOC = OpPtrCastToGeneric; // In SPIR-V only casts to/from generic are allowed. But with // SPV_INTEL_usm_storage_classes we can also have casts from global_device @@ -2109,6 +2125,10 @@ LLVMToSPIRVBase::transValueWithoutDecoration(Value *V, SPIRVBasicBlock *BB, : nullptr, GV->isConstant(), transLinkageType(GV), BVarInit, GV->getName().str(), StorageClass, nullptr)); + if (GV->isExternallyInitialized() && + M->getTargetTriple() == "spirv64-amd-amdhsa") + BVar->addDecorate(DecorationUserTypeGOOGLE, + BM->getString("externally_initialized")->getId()); if (IsVectorCompute) { BVar->addDecorate(DecorationVectorComputeVariableINTEL); @@ -2153,7 +2173,7 @@ LLVMToSPIRVBase::transValueWithoutDecoration(Value *V, SPIRVBasicBlock *BB, assert(BV); // Don't store pointer constants in the map -- they are opaque and thus we // might reuse the wrong type (Example: a null value) if we do so. - if (V->getType()->isPointerTy()) + if (V->getType()->isPtrOrPtrVectorTy()) return BV; return mapValue(V, BV); } @@ -2243,7 +2263,11 @@ LLVMToSPIRVBase::transValueWithoutDecoration(Value *V, SPIRVBasicBlock *BB, auto *FrexpResult = transValue(RV, BB); SPIRVValue *IntFromFrexpResult = static_cast(FrexpResult)->getArgValues()[1]; - IntFromFrexpResult = BM->addLoadInst(IntFromFrexpResult, {}, BB); + IntFromFrexpResult = + BM->addLoadInst(IntFromFrexpResult, {}, BB, + BM->isAllowedToUseExtension(ExtensionID::SPV_KHR_untyped_pointers) + ? transType(cast(RV->getType())->getTypeAtIndex(1)) + : nullptr); std::vector Operands = {FrexpResult->getId(), IntFromFrexpResult->getId()}; @@ -2280,7 +2304,10 @@ LLVMToSPIRVBase::transValueWithoutDecoration(Value *V, SPIRVBasicBlock *BB, BB)); if (AllocaInst *Alc = dyn_cast(V)) { - SPIRVType *TranslatedTy = transScavengedType(V); + SPIRVType *TranslatedTy = M->getTargetTriple() != "spirv64-amd-amdhsa" ? + transScavengedType(V) : + BM->addPointerType(StorageClassFunction, + transType(Alc->getAllocatedType())); if (Alc->isArrayAllocation()) { SPIRVValue *Length = transValue(Alc->getArraySize(), BB); assert(Length && "Couldn't translate array size!"); @@ -2462,7 +2489,12 @@ LLVMToSPIRVBase::transValueWithoutDecoration(Value *V, SPIRVBasicBlock *BB, // Idx = 1 SPIRVValue *IntFromFrexpResult = static_cast(Val)->getArgValues()[1]; - IntFromFrexpResult = BM->addLoadInst(IntFromFrexpResult, {}, BB); + IntFromFrexpResult = + BM->addLoadInst( + IntFromFrexpResult, {}, BB, + BM->isAllowedToUseExtension(ExtensionID::SPV_KHR_untyped_pointers) + ? transType(Ext->getType()) + : nullptr); return mapValue(V, IntFromFrexpResult); } } @@ -2611,6 +2643,15 @@ LLVMToSPIRVBase::transValueWithoutDecoration(Value *V, SPIRVBasicBlock *BB, // Implement FSub through FNegate and AtomicFAddExt Ops[3] = BM->addUnaryInst(OpFNegate, Ty, OpVals[3], BB)->getId(); OC = OpAtomicFAddEXT; + } else if (Op == AtomicRMWInst::UIncWrap || Op == AtomicRMWInst::UDecWrap) { + OC = LLVMSPIRVAtomicRmwOpCodeMap::map(Op); + auto WrapV = Ops.back(); + Ops.pop_back(); + auto IncDec = mapValue(V, BM->addInstTemplate(OC, Ops, BB, Ty)); + IncDec->addDecorate(new SPIRVDecorate(DecorationMaxByteOffsetId, IncDec, + WrapV)); + return IncDec; + // TODO: figure out handling of saturating val. } else OC = LLVMSPIRVAtomicRmwOpCodeMap::map(Op); @@ -3061,7 +3102,8 @@ bool LLVMToSPIRVBase::transDecoration(Value *V, SPIRVValue *BV) { Opcode == Instruction::FMul || Opcode == Instruction::FDiv || Opcode == Instruction::FRem || ((Opcode == Instruction::FNeg || Opcode == Instruction::FCmp) && - BM->isAllowedToUseVersion(VersionNumber::SPIRV_1_6))) { + BM->isAllowedToUseVersion(VersionNumber::SPIRV_1_6)) || + Opcode == Instruction::Call) { FastMathFlags FMF = BVF->getFastMathFlags(); SPIRVWord M{0}; if (FMF.isFast()) @@ -3137,6 +3179,8 @@ void LLVMToSPIRVBase::transMemAliasingINTELDecorations(Instruction *Inst, if (!BM->isAllowedToUseExtension( ExtensionID::SPV_INTEL_memory_access_aliasing)) return; + if (!BV->hasId() && M->getTargetTriple() == "spirv64-amd-amdhsa") // Fences + return; if (MDNode *AliasingListMD = Inst->getMetadata(LLVMContext::MD_alias_scope)) { auto *MemAliasList = addMemAliasingINTELInstructions(BM, AliasingListMD); if (!MemAliasList) @@ -5826,7 +5870,10 @@ bool isEmptyLLVMModule(Module *M) { } bool LLVMToSPIRVBase::translate() { - BM->setGeneratorVer(KTranslatorVer); + if (M->getTargetTriple() == "spirv64-amd-amdhsa") + BM->setGeneratorVer(UINT16_MAX); + else + BM->setGeneratorVer(KTranslatorVer); if (isEmptyLLVMModule(M)) BM->addCapability(CapabilityLinkage); diff --git a/lib/SPIRV/libSPIRV/SPIRVInstruction.h b/lib/SPIRV/libSPIRV/SPIRVInstruction.h index 67aaee4ba..cae638f3c 100644 --- a/lib/SPIRV/libSPIRV/SPIRVInstruction.h +++ b/lib/SPIRV/libSPIRV/SPIRVInstruction.h @@ -651,6 +651,8 @@ class SPIRVStore : public SPIRVInstruction, public SPIRVMemoryAccess { (getValueType(PtrId) ->getPointerElementType() ->isTypeUntypedPointerKHR() || + (getValueType(PtrId)->getPointerElementType()->isTypePointer() && + getValueType(ValId)->isTypeUntypedPointerKHR()) || getValueType(PtrId)->getPointerElementType() == getValueType(ValId)) && "Inconsistent operand types"); } @@ -762,7 +764,7 @@ class SPIRVBinary : public SPIRVInstTemplateBase { } else if (isBinaryPtrOpCode(OpCode)) { assert((Op1Ty->isTypePointer() && Op2Ty->isTypePointer()) && "Invalid types for PtrEqual, PtrNotEqual, or PtrDiff instruction"); - if (!Op1Ty->isTypeUntypedPointerKHR() || + if (!Op1Ty->isTypeUntypedPointerKHR() && !Op2Ty->isTypeUntypedPointerKHR()) assert( static_cast(Op1Ty)->getElementType() == @@ -1010,7 +1012,11 @@ class SPIRVPhi : public SPIRVInstruction { assert(OpCode == OC); assert(Pairs.size() % 2 == 0); foreachPair([=](SPIRVValue *IncomingV, SPIRVBasicBlock *IncomingBB) { - assert(IncomingV->isForward() || IncomingV->getType() == Type); + assert(IncomingV->isForward() || IncomingV->getType() == Type || + (IncomingV->getType()->isTypePointer() && + Type->isTypeUntypedPointerKHR()) || + (IncomingV->getType()->isTypeUntypedPointerKHR() && + Type->isTypePointer())); assert(IncomingBB->isBasicBlock() || IncomingBB->isForward()); }); SPIRVInstruction::validate(); @@ -1132,6 +1138,9 @@ class SPIRVSelectBase : public SPIRVInstTemplateBase { : getValueType(Condition); (void)ConTy; assert(ConTy->isTypeBool() && "Invalid type"); + if (getType()->getOpCode() != OpTypeUntypedPointerKHR && + getValueType(Op1)->getOpCode() != OpTypeUntypedPointerKHR && + getValueType(Op2)->getOpCode() != OpTypeUntypedPointerKHR) assert(getType() == getValueType(Op1) && getType() == getValueType(Op2) && "Inconsistent type"); } @@ -1250,7 +1259,8 @@ class SPIRVSwitch : public SPIRVInstruction { return static_cast(getValue(Default)); } size_t getLiteralSize() const { - unsigned ByteWidth = getSelect()->getType()->getBitWidth() / 8; + unsigned ByteWidth = + std::max(getSelect()->getType()->getBitWidth() / 8, 1u); unsigned Remainder = (ByteWidth % sizeof(SPIRVWord)) != 0; return (ByteWidth / sizeof(SPIRVWord)) + Remainder; } diff --git a/test/extensions/KHR/SPV_KHR_uniform_group_instructions/group-instructions.ll b/test/extensions/KHR/SPV_KHR_uniform_group_instructions/group-instructions.ll index 881a455ec..6a8ccf994 100644 --- a/test/extensions/KHR/SPV_KHR_uniform_group_instructions/group-instructions.ll +++ b/test/extensions/KHR/SPV_KHR_uniform_group_instructions/group-instructions.ll @@ -52,7 +52,7 @@ ; CHECK-amd-llvm-spirv: %call5 = call spir_func i1 @_Z25__spirv_GroupLogicalOrKHR{{.*}}(i32 2, i32 0, i1 false) ; CHECK-amd-llvm-spirv: %call6 = call spir_func i1 @_Z26__spirv_GroupLogicalXorKHR{{.*}}(i32 2, i32 0, i1 false) ; CHECK-amd-llvm-spirv: %call7 = call spir_func i32 @_Z20__spirv_GroupIMulKHR{{.*}}(i32 2, i32 0, i32 0) -; CHECK-amd-llvm-spirv: %call8 = call spir_func half @_Z20__spirv_GroupFMulKHR{{.*}}(i32 2, i32 0, half 0xH0000) +; CHECK-amd-llvm-spirv: %call8 = call fast spir_func half @_Z20__spirv_GroupFMulKHR{{.*}}(i32 2, i32 0, half 0xH0000) ; ModuleID = 'source.bc' source_filename = "group_operations.cpp" diff --git a/test/transcoding/OpenCL/atomic_syncscope_test.ll b/test/transcoding/OpenCL/atomic_syncscope_test.ll index 157d4b4e1..27fbaff70 100644 --- a/test/transcoding/OpenCL/atomic_syncscope_test.ll +++ b/test/transcoding/OpenCL/atomic_syncscope_test.ll @@ -26,9 +26,9 @@ target triple = "spir64" ; 4 - sub_group ; CHECK-SPIRV-DAG: Constant [[#]] [[#ConstInt0:]] 0 +; CHECK-SPIRV-DAG: Constant [[#]] [[#ConstInt2:]] 2 ; CHECK-SPIRV-DAG: Constant [[#]] [[#SequentiallyConsistent:]] 16 ; CHECK-SPIRV-DAG: Constant [[#]] [[#ConstInt1:]] 1 -; CHECK-SPIRV-DAG: Constant [[#]] [[#ConstInt2:]] 2 ; CHECK-SPIRV-DAG: Constant [[#]] [[#ConstInt3:]] 3 ; CHECK-SPIRV-DAG: Constant [[#]] [[#ConstInt4:]] 4 ; CHECK-SPIRV-DAG: Constant [[#]] [[#Const2Power30:]] 1073741824 @@ -82,8 +82,8 @@ entry: define dso_local void @fi3(ptr nocapture noundef %i, ptr nocapture noundef %ui) local_unnamed_addr #0 { entry: - %0 = atomicrmw and ptr %i, i32 1 syncscope("work_item") seq_cst, align 4 - %1 = atomicrmw min ptr %i, i32 1 syncscope("all_svm_devices") seq_cst, align 4 + %0 = atomicrmw and ptr %i, i32 1 syncscope("singlethread") seq_cst, align 4 + %1 = atomicrmw min ptr %i, i32 1 seq_cst, align 4 %2 = atomicrmw max ptr %i, i32 1 syncscope("wrong_scope") seq_cst, align 4 %3 = atomicrmw umin ptr %ui, i32 1 syncscope("workgroup") seq_cst, align 4 %4 = atomicrmw umax ptr %ui, i32 1 syncscope("workgroup") seq_cst, align 4