Skip to content

Commit

Permalink
Add AMDGCNSPIRV specific (and mostly temporary) delta.
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexVlx committed Oct 11, 2024
1 parent f579047 commit c1abb9c
Show file tree
Hide file tree
Showing 12 changed files with 767 additions and 90 deletions.
3 changes: 3 additions & 0 deletions lib/SPIRV/OCLToSPIRV.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,9 @@ void OCLToSPIRVBase::visitCallInst(CallInst &CI) {
if (DemangledName == kOCLBuiltinName::WorkGroupBarrier ||
DemangledName == kOCLBuiltinName::Barrier ||
DemangledName == kOCLBuiltinName::SubGroupBarrier) {
if (F->arg_size() != 1 && F->arg_size() != 2 &&
F->getParent()->getTargetTriple() == "spirv64-amd-amdhsa")
return; // Somebody used the name.
visitCallBarrier(&CI);
return;
}
Expand Down
4 changes: 3 additions & 1 deletion lib/SPIRV/OCLUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ template <> void SPIRVMap<OCLScopeKind, Scope>::init() {
}

template <> void SPIRVMap<std::string, Scope>::init() {
add("work_item", ScopeInvocation);
add("singlethread", ScopeInvocation);
add("workgroup", ScopeWorkgroup);
add("device", ScopeDevice);
add("all_svm_devices", ScopeCrossDevice);
Expand Down Expand Up @@ -658,6 +658,8 @@ template <> void LLVMSPIRVAtomicRmwOpCodeMap::init() {
add(llvm::AtomicRMWInst::FAdd, OpAtomicFAddEXT);
add(llvm::AtomicRMWInst::FMin, OpAtomicFMinEXT);
add(llvm::AtomicRMWInst::FMax, OpAtomicFMaxEXT);
add(llvm::AtomicRMWInst::UIncWrap, OpAtomicIIncrement);
add(llvm::AtomicRMWInst::UDecWrap, OpAtomicIDecrement);
}

} // namespace SPIRV
Expand Down
37 changes: 37 additions & 0 deletions lib/SPIRV/OCLUtil.h
Original file line number Diff line number Diff line change
Expand Up @@ -491,6 +491,43 @@ inline OCLMemOrderKind mapSPIRVMemOrderToOCL(unsigned Sema) {
return OCLMemOrderMap::rmap(extractSPIRVMemOrderSemantic(Sema));
}

inline unsigned int mapAMDGCNAddrSpaceToSPIRV(unsigned int AS) {
switch (AS) {
case 0:
return SPIRAS_Generic;
case 1:
return SPIRAS_Global;
case 3:
return SPIRAS_Local;
case 4:
return SPIRAS_Constant;
case 5:
return SPIRAS_Private;
default:
llvm_unreachable("Unexpected AMDGCN Address Space");
return UINT_MAX;
}
}

inline SPIRAddressSpace mapSPIRVAddrSpaceToAMDGPU(SPIRVStorageClassKind SPVAS) {
switch (SPVAS) {
case StorageClassCrossWorkgroup:
return static_cast<SPIRAddressSpace>(1);
case StorageClassUniformConstant:
return static_cast<SPIRAddressSpace>(4);
case StorageClassWorkgroup:
return static_cast<SPIRAddressSpace>(3);
case StorageClassPrivate:
case StorageClassFunction:
return static_cast<SPIRAddressSpace>(5);
case StorageClassGeneric:
return static_cast<SPIRAddressSpace>(0);
default:
llvm_unreachable("Unexpected StorageClass");
return static_cast<SPIRAddressSpace>(UINT_MAX);
}
}

bool isPipeOrAddressSpaceCastBI(const StringRef MangledName);
bool isEnqueueKernelBI(const StringRef MangledName);
bool isKernelQueryBI(const StringRef MangledName);
Expand Down
556 changes: 500 additions & 56 deletions lib/SPIRV/SPIRVReader.cpp

Large diffs are not rendered by default.

4 changes: 3 additions & 1 deletion lib/SPIRV/SPIRVToLLVMDbgTran.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1580,7 +1580,9 @@ SPIRVToLLVMDbgTran::transDebugIntrinsic(const SPIRVExtInst *DebugInst,
// DIBuilder::insertDeclare doesn't allow to pass nullptr for the Storage
// parameter. To work around this limitation we create a dummy temp
// alloca, use it to create llvm.dbg.declare, and then remove the alloca.
auto *AI = new AllocaInst(Type::getInt8Ty(M->getContext()), 0, "tmp", BB);
auto *AI = new AllocaInst(Type::getInt8Ty(M->getContext()),
BB->getParent()->getParent()->getDataLayout().getAllocaAddrSpace(),
"tmp", BB);
DbgInstPtr DbgDeclare = DIB.insertDeclare(
AI, LocalVar.first, GetExpression(Ops[ExpressionIdx]),
LocalVar.second, BB);
Expand Down
3 changes: 3 additions & 0 deletions lib/SPIRV/SPIRVToOCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,9 @@ void SPIRVToOCLBase::visitCastInst(CastInst &Cast) {
!isa<UIToFPInst>(Cast) && !isa<SIToFPInst>(Cast))
return;

if (M->getTargetTriple() == "amdgcn-amd-amdhsa")
return;

Type const *SrcTy = Cast.getSrcTy();
Type *DstVecTy = Cast.getDestTy();
// Leave scalar casts as is. Skip boolean vector casts becase there
Expand Down
136 changes: 129 additions & 7 deletions lib/SPIRV/SPIRVToOCL20.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@

#include "OCLUtil.h"
#include "SPIRVToOCL.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/IR/Verifier.h"

#define DEBUG_TYPE "spvtocl20"
Expand Down Expand Up @@ -74,7 +75,118 @@ bool SPIRVToOCL20Base::runSPIRVToOCL(Module &Module) {
return true;
}

static SyncScope::ID mapOpenCLScopeToAMDGPU(LLVMContext &Ctx, uint64_t S) {
if (S == OCLMS_work_item)
return SyncScope::SingleThread;
if (S == OCLMS_work_group)
return Ctx.getOrInsertSyncScopeID("workgroup");
if (S == OCLMS_device)
return Ctx.getOrInsertSyncScopeID("agent");
if (S == OCLMS_sub_group)
return Ctx.getOrInsertSyncScopeID("wavefront");
return SyncScope::System;
}

static void translateSPIRVCmpXchgToLLVM(CallInst *CI, Op OC) {
auto Ptr = CI->getOperand(0);
auto Cmp = CI->getOperand(1);
auto New = CI->getOperand(4);

assert(isa<ConstantInt>(CI->getArgOperand(CI->arg_size() - 4))); // Skip New.
assert(isa<ConstantInt>(CI->getArgOperand(CI->arg_size() - 3))); // Skip New.
assert(isa<ConstantInt>(CI->getArgOperand(CI->arg_size() - 1)));

auto SuccessOrder = // Offset NotAtomic and Unordered
static_cast<AtomicOrdering>(getArgAsInt(CI, CI->arg_size() - 4) + 2);
auto FailOrder = // Offset NotAtomic and Unordered
static_cast<AtomicOrdering>(getArgAsInt(CI, CI->arg_size() - 3) + 2);
SyncScope::ID S = mapOpenCLScopeToAMDGPU(CI->getContext(),
getArgAsInt(CI, CI->arg_size() - 1));
IRBuilder<> Builder(CI);
auto CmpXchg = Builder.CreateAtomicCmpXchg(Ptr, Cmp, New, {}, SuccessOrder,
FailOrder, S);
// OpAtomicCompareExchangeWeak has been deprecated and subsequently removed
// from SPIR-V versions newer than 1.4, and currently there's no way to encode
// the weak bit.
//CmpXchg->setWeak(OC == OpAtomicCompareExchangeWeak);

CI->replaceAllUsesWith(
Builder.CreateZExt(Builder.CreateExtractValue(CmpXchg, 0), CI->getType()));
CI->dropAllReferences();
CI->eraseFromParent();
}

static void translateSPIRVAtomicBuiltinToLLVMAtomicOp(CallInst *CI, Op OC) {
if (OC == OpAtomicCompareExchange || OC == OpAtomicCompareExchangeWeak)
return translateSPIRVCmpXchgToLLVM(CI, OC);

static const DenseMap<Op, AtomicRMWInst::BinOp> SPIRVtoLLVM{
{OpAtomicAnd, AtomicRMWInst::And},
{OpAtomicExchange, AtomicRMWInst::Xchg},
{OpAtomicFAddEXT, AtomicRMWInst::FAdd},
{OpAtomicFMaxEXT, AtomicRMWInst::FMax},
{OpAtomicFMinEXT, AtomicRMWInst::FMin},
{OpAtomicIAdd, AtomicRMWInst::Add},
{OpAtomicIDecrement, AtomicRMWInst::UDecWrap},
{OpAtomicIIncrement, AtomicRMWInst::UIncWrap},
{OpAtomicISub, AtomicRMWInst::Sub},
{OpAtomicOr, AtomicRMWInst::Or},
{OpAtomicSMax, AtomicRMWInst::Max},
{OpAtomicSMin, AtomicRMWInst::Min},
{OpAtomicUMax, AtomicRMWInst::UMax},
{OpAtomicUMin, AtomicRMWInst::UMin},
{OpAtomicXor, AtomicRMWInst::Xor}
};

assert(isa<ConstantInt>(CI->getArgOperand(CI->arg_size() - 1)));
assert(isa<ConstantInt>(CI->getArgOperand(CI->arg_size() - 2)));

auto Order = // Offset NotAtomic and Unordered
static_cast<AtomicOrdering>(getArgAsInt(CI, CI->arg_size() - 2) + 2);
auto S = mapOpenCLScopeToAMDGPU(CI->getContext(),
getArgAsInt(CI, CI->arg_size() - 1));

IRBuilder<> Builder(CI);
if (OC == OpAtomicLoad) {
auto LD = Builder.CreateLoad(CI->getType(), CI->getOperand(0));
LD->setAtomic(Order, S);
CI->replaceAllUsesWith(LD);
} else if (OC == OpAtomicStore) {
auto ST = Builder.CreateStore(CI->getOperand(1), CI->getOperand(0));
ST->setAtomic(Order, S);
CI->replaceAllUsesWith(ST);
} else {
auto RMW = Builder.CreateAtomicRMW(SPIRVtoLLVM.at(OC), CI->getOperand(0),
CI->getOperand(1), {}, Order, S);
CI->replaceAllUsesWith(RMW);
}

CI->dropAllReferences();
CI->eraseFromParent();
}

static void visitCallLLVMFence(CallInst *CI) { // TODO: AMDSPV JANK, this is incorrect
auto MS = transSPIRVMemoryScopeIntoOCLMemoryScope(CI->getArgOperand(0), CI);
auto MO = transSPIRVMemorySemanticsIntoOCLMemoryOrder(CI->getArgOperand(1),
CI);
assert(isa<ConstantInt>(MS));
assert(isa<ConstantInt>(MO));

auto O = static_cast<AtomicOrdering>(cast<ConstantInt>(MO)->getZExtValue() + 2);
auto S = mapOpenCLScopeToAMDGPU(CI->getContext(),
cast<ConstantInt>(MS)->getZExtValue());
IRBuilder<> Builder(CI);

CI->replaceAllUsesWith(Builder.CreateFence(O, S));

CI->dropAllReferences();
CI->eraseFromParent();
}

void SPIRVToOCL20Base::visitCallSPIRVMemoryBarrier(CallInst *CI) {
if (M->getTargetTriple() == "amdgcn-amd-amdhsa")
return visitCallLLVMFence(CI);

Value *MemScope =
SPIRV::transSPIRVMemoryScopeIntoOCLMemoryScope(CI->getArgOperand(0), CI);
Value *MemFenceFlags = SPIRV::transSPIRVMemorySemanticsIntoOCLMemFenceFlags(
Expand Down Expand Up @@ -133,6 +245,9 @@ void SPIRVToOCL20Base::mutateAtomicName(CallInst *CI, Op OC) {
void SPIRVToOCL20Base::visitCallSPIRVAtomicBuiltin(CallInst *CI, Op OC) {
CallInst *CIG = mutateCommonAtomicArguments(CI, OC);

if (M->getTargetTriple() == "amdgcn-amd-amdhsa")
return translateSPIRVAtomicBuiltinToLLVMAtomicOp(CIG, OC);

switch (OC) {
case OpAtomicIIncrement:
case OpAtomicIDecrement:
Expand Down Expand Up @@ -174,12 +289,14 @@ CallInst *SPIRVToOCL20Base::mutateCommonAtomicArguments(CallInst *CI, Op OC) {

Mutator.mapArgs([=](IRBuilder<> &Builder, Value *PtrArg, Type *PtrArgTy) {
if (auto *TypedPtrTy = dyn_cast<TypedPointerType>(PtrArgTy)) {
if (TypedPtrTy->getAddressSpace() != SPIRAS_Generic) {
unsigned AS = M->getTargetTriple() == "amdgcn-amd-amdhsa" ?
mapSPIRVAddrSpaceToAMDGPU(StorageClassGeneric) : SPIRAS_Generic;
if (TypedPtrTy->getAddressSpace() != AS) {
Type *ElementTy = TypedPtrTy->getElementType();
Type *FixedPtr = PointerType::get(ElementTy, SPIRAS_Generic);
Type *FixedPtr = PointerType::get(ElementTy, AS);
PtrArg = Builder.CreateAddrSpaceCast(PtrArg, FixedPtr,
PtrArg->getName() + ".as");
PtrArgTy = TypedPointerType::get(ElementTy, SPIRAS_Generic);
PtrArgTy = TypedPointerType::get(ElementTy, AS);
}
}
return std::make_pair(PtrArg, PtrArgTy);
Expand Down Expand Up @@ -207,7 +324,8 @@ void SPIRVToOCL20Base::visitCallSPIRVAtomicCmpExchg(CallInst *CI) {
// value by pointer passed as 2nd argument (aka expected) while SPIR-V
// instructions returns this new/original value as a resulting value.
AllocaInst *PExpected = new AllocaInst(
MemTy, 0, "expected",
MemTy, CI->getParent()->getParent()->getDataLayout().getAllocaAddrSpace(),
"expected",
CI->getParent()->getParent()->getEntryBlock().getFirstInsertionPt());
PExpected->setAlignment(Align(MemTy->getScalarSizeInBits() / 8));

Expand All @@ -221,7 +339,9 @@ void SPIRVToOCL20Base::visitCallSPIRVAtomicCmpExchg(CallInst *CI) {
.mapArg(1,
[=](IRBuilder<> &Builder, Value *Expected) {
Builder.CreateStore(Expected, PExpected);
unsigned AddrSpc = SPIRAS_Generic;
unsigned AddrSpc = M->getTargetTriple() == "amdgcn-amd-amdhsa" ?
mapSPIRVAddrSpaceToAMDGPU(StorageClassGeneric) :
SPIRAS_Generic;
Type *PtrTyAS = PointerType::get(PExpected->getType(), AddrSpc);
Value *V = Builder.CreateAddrSpaceCast(
PExpected, PtrTyAS, PExpected->getName() + ".as");
Expand Down Expand Up @@ -263,10 +383,12 @@ void SPIRVToOCL20Base::visitCallSPIRVEnqueueKernel(CallInst *CI, Op OC) {

auto Mutator = mutateCallInst(CI, FName.str());
Mutator.mapArg(6, [=](IRBuilder<> &Builder, Value *Invoke) {
unsigned AS = M->getTargetTriple() == "amdgcn-amd-amdhsa" ?
mapSPIRVAddrSpaceToAMDGPU(StorageClassGeneric) : SPIRAS_Generic;
Value *Replace = CastInst::CreatePointerBitCastOrAddrSpaceCast(
Invoke, Builder.getPtrTy(SPIRAS_Generic), "", CI->getIterator());
Invoke, Builder.getPtrTy(AS), "", CI->getIterator());
return std::make_pair(
Replace, TypedPointerType::get(Builder.getInt8Ty(), SPIRAS_Generic));
Replace, TypedPointerType::get(Builder.getInt8Ty(), AS));
});

if (!HasVaargs) {
Expand Down
13 changes: 10 additions & 3 deletions lib/SPIRV/SPIRVUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,10 @@ Function *getOrCreateFunction(Module *M, Type *RetTy, ArrayRef<Type *> ArgTypes,
if (F)
NewF->setDSOLocal(F->isDSOLocal());
F = NewF;
F->setCallingConv(CallingConv::SPIR_FUNC);
if (M->getTargetTriple() == "amdgcn-amd-amdhsa")
F->setCallingConv(CallingConv::C);
else
F->setCallingConv(CallingConv::SPIR_FUNC);
if (Attrs)
F->setAttributes(*Attrs);
}
Expand Down Expand Up @@ -1509,7 +1512,9 @@ Value *getScalarOrArrayConstantInt(BasicBlock::iterator Pos, Type *T,
auto *AT = ArrayType::get(ET, Len);
std::vector<Constant *> EV(Len, ConstantInt::get(ET, V, IsSigned));
auto *CA = ConstantArray::get(AT, EV);
auto *Alloca = new AllocaInst(AT, 0, "", Pos);
auto *Alloca = new AllocaInst(
AT, Pos->getParent()->getParent()->getDataLayout().getAllocaAddrSpace(),
"", Pos);
new StoreInst(CA, Alloca, Pos);
auto *Zero = ConstantInt::getNullValue(Type::getInt32Ty(T->getContext()));
Value *Index[] = {Zero, Zero};
Expand Down Expand Up @@ -2291,7 +2296,9 @@ bool postProcessBuiltinWithArrayArguments(Function *F,
auto *T = I->getType();
if (!T->isArrayTy())
continue;
auto *Alloca = new AllocaInst(T, 0, "", FBegin);
auto *Alloca = new AllocaInst(
T, F->getParent()->getDataLayout().getAllocaAddrSpace(), "",
FBegin);
new StoreInst(I, Alloca, false, CI->getIterator());
auto *Zero =
ConstantInt::getNullValue(Type::getInt32Ty(T->getContext()));
Expand Down
Loading

0 comments on commit c1abb9c

Please sign in to comment.