Skip to content

Commit

Permalink
[CIR][Dialect] Support OpenCL work group uniformity attribute (llvm#896)
Browse files Browse the repository at this point in the history
> To keep information about whether an OpenCL kernel has uniform work
> group size or not, clang generates 'uniform-work-group-size' function
> attribute for every kernel:
> 
> "uniform-work-group-size"="true" for OpenCL 1.2 and lower,
> "uniform-work-group-size"="true" for OpenCL 2.0 and higher if
'-cl-uniform-work-group-size' option was specified,
> "uniform-work-group-size"="false" for OpenCL 2.0 and higher if no
'-cl-uniform-work-group-size' options was specified.
> If the function is not an OpenCL kernel, 'uniform-work-group-size'
> attribute isn't generated.
> 
> *From [Differential 43570](https://reviews.llvm.org/D43570)*

This PR introduces the `OpenCLKernelUniformWorkGroupSizeAttr` attribute
to the ClangIR pipeline, towards the completeness in attributes for
OpenCL. While this attribute is represented as a unit attribute in MLIR,
its absence signifies either non-kernel functions or a `false` value for
kernel functions. To match the original LLVM IR behavior, we also
consider whether a function is an OpenCL kernel during lowering:

* If the function is not a kernel, the attribute is ignored. No LLVM
function attribute is set.
* If the function is a kernel:
* and the `OpenCLKernelUniformWorkGroupSizeAttr` is present, we generate
the LLVM function attribute `"uniform-work-group-size"="true"`.
    * If absent, we generate `"uniform-work-group-size"="false"`.
  • Loading branch information
seven-mile authored and smeenai committed Oct 9, 2024
1 parent 6cf73da commit 4dc4b4c
Show file tree
Hide file tree
Showing 4 changed files with 88 additions and 1 deletion.
21 changes: 21 additions & 0 deletions clang/include/clang/CIR/Dialect/IR/CIROpenCLAttrs.td
Original file line number Diff line number Diff line change
Expand Up @@ -185,4 +185,25 @@ def OpenCLKernelAttr : CIRUnitAttr<
let storageType = [{ OpenCLKernelAttr }];
}

//===----------------------------------------------------------------------===//
// OpenCLKernelUniformWorkGroupSizeAttr
//===----------------------------------------------------------------------===//

def OpenCLKernelUniformWorkGroupSizeAttr : CIRUnitAttr<
"OpenCLKernelUniformWorkGroupSize", "cl.uniform_work_group_size"> {
let summary = "OpenCL kernel work-group uniformity";
let description = [{
In OpenCL v2.0, work groups can either be uniform or non-uniform.
This attribute is associated with kernels to represent the work group type.
Non-kernel entities should not interact with this attribute.

Clang's `-cl-uniform-work-group-size` compilation option provides a hint to
the compiler, indicating that the global work size should be a multiple of
the work-group size specified in the `clEnqueueNDRangeKernel` function,
thereby ensuring that the work groups are uniform.
}];

let storageType = [{ OpenCLKernelUniformWorkGroupSizeAttr }];
}

#endif // MLIR_CIR_DIALECT_CIR_OPENCL_ATTRS
17 changes: 16 additions & 1 deletion clang/lib/CIR/CodeGen/CIRGenCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -433,7 +433,22 @@ void CIRGenModule::constructAttributeList(StringRef Name,
auto cirKernelAttr =
mlir::cir::OpenCLKernelAttr::get(builder.getContext());
funcAttrs.set(cirKernelAttr.getMnemonic(), cirKernelAttr);
assert(!MissingFeatures::openCL());

auto uniformAttr = mlir::cir::OpenCLKernelUniformWorkGroupSizeAttr::get(
builder.getContext());
if (getLangOpts().OpenCLVersion <= 120) {
// OpenCL v1.2 Work groups are always uniform
funcAttrs.set(uniformAttr.getMnemonic(), uniformAttr);
} else {
// OpenCL v2.0 Work groups may be whether uniform or not.
// '-cl-uniform-work-group-size' compile option gets a hint
// to the compiler that the global work-size be a multiple of
// the work-group size specified to clEnqueueNDRangeKernel
// (i.e. work groups are uniform).
if (getLangOpts().OffloadUniformBlock) {
funcAttrs.set(uniformAttr.getMnemonic(), uniformAttr);
}
}
}

if (TargetDecl->hasAttr<CUDAGlobalAttr>() &&
Expand Down
10 changes: 10 additions & 0 deletions clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVMIR.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ class CIRDialectLLVMIRTranslationInterface
mlir::NamedAttribute attribute,
mlir::LLVM::ModuleTranslation &moduleTranslation) const {
llvm::Function *llvmFunc = moduleTranslation.lookupFunction(func.getName());
llvm::LLVMContext &llvmCtx = moduleTranslation.getLLVMContext();
if (auto extraAttr = mlir::dyn_cast<mlir::cir::ExtraFuncAttributesAttr>(
attribute.getValue())) {
for (auto attr : extraAttr.getElements()) {
Expand All @@ -110,6 +111,15 @@ class CIRDialectLLVMIRTranslationInterface
llvmFunc->addFnAttr(llvm::Attribute::NoUnwind);
} else if (mlir::dyn_cast<mlir::cir::ConvergentAttr>(attr.getValue())) {
llvmFunc->addFnAttr(llvm::Attribute::Convergent);
} else if (mlir::dyn_cast<mlir::cir::OpenCLKernelAttr>(
attr.getValue())) {
const auto uniformAttrName =
mlir::cir::OpenCLKernelUniformWorkGroupSizeAttr::getMnemonic();
const bool isUniform =
extraAttr.getElements().getNamed(uniformAttrName).has_value();
auto attrs = llvmFunc->getAttributes().addFnAttribute(
llvmCtx, "uniform-work-group-size", isUniform ? "true" : "false");
llvmFunc->setAttributes(attrs);
} else if (auto clKernelMetadata =
mlir::dyn_cast<mlir::cir::OpenCLKernelMetadataAttr>(
attr.getValue())) {
Expand Down
41 changes: 41 additions & 0 deletions clang/test/CIR/CodeGen/OpenCL/cl-uniform-wg-size.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// RUN: %clang_cc1 -fclangir -triple=spirv64-unknown-unknown -emit-cir -O0 -cl-std=CL1.2 -o %t.cl12.cir %s
// RUN: FileCheck %s -input-file=%t.cl12.cir -check-prefixes CIR,CIR-UNIFORM
// RUN: %clang_cc1 -fclangir -triple=spirv64-unknown-unknown -emit-cir -O0 -cl-std=CL2.0 -o %t.cl20.cir %s
// RUN: FileCheck %s -input-file=%t.cl20.cir -check-prefixes CIR,CIR-NONUNIFORM
// RUN: %clang_cc1 -fclangir -triple=spirv64-unknown-unknown -emit-cir -O0 -cl-std=CL2.0 -cl-uniform-work-group-size -o %t.cl20.uniform1.cir %s
// RUN: FileCheck %s -input-file=%t.cl20.uniform1.cir -check-prefixes CIR,CIR-UNIFORM
// RUN: %clang_cc1 -fclangir -triple=spirv64-unknown-unknown -emit-cir -O0 -cl-std=CL2.0 -foffload-uniform-block -o %t.cl20.uniform2.cir %s
// RUN: FileCheck %s -input-file=%t.cl20.uniform2.cir -check-prefixes CIR,CIR-UNIFORM

// RUN: %clang_cc1 -fclangir -triple=spirv64-unknown-unknown -emit-llvm -O0 -cl-std=CL1.2 -o %t.cl12.ll %s
// RUN: FileCheck %s -input-file=%t.cl12.ll -check-prefixes LLVM,LLVM-UNIFORM
// RUN: %clang_cc1 -fclangir -triple=spirv64-unknown-unknown -emit-llvm -O0 -cl-std=CL2.0 -o %t.cl20.ll %s
// RUN: FileCheck %s -input-file=%t.cl20.ll -check-prefixes LLVM,LLVM-NONUNIFORM
// RUN: %clang_cc1 -fclangir -triple=spirv64-unknown-unknown -emit-llvm -O0 -cl-std=CL2.0 -cl-uniform-work-group-size -o %t.cl20.uniform1.ll %s
// RUN: FileCheck %s -input-file=%t.cl20.uniform1.ll -check-prefixes LLVM,LLVM-UNIFORM
// RUN: %clang_cc1 -fclangir -triple=spirv64-unknown-unknown -emit-llvm -O0 -cl-std=CL2.0 -foffload-uniform-block -o %t.cl20.uniform2.ll %s
// RUN: FileCheck %s -input-file=%t.cl20.uniform2.ll -check-prefixes LLVM,LLVM-UNIFORM

// CIR-LABEL: #fn_attr =
// CIR: cl.kernel = #cir.cl.kernel
// CIR-UNIFORM: cl.uniform_work_group_size = #cir.cl.uniform_work_group_size
// CIR-NONUNIFORM-NOT: cl.uniform_work_group_size = #cir.cl.uniform_work_group_size

// CIR-LABEL: #fn_attr1 =
// CIR-NOT: cl.kernel = #cir.cl.kernel
// CIR-NOT: cl.uniform_work_group_size

kernel void ker() {};
// CIR: cir.func @ker{{.*}} extra(#fn_attr) {
// LLVM: define{{.*}}@ker() #0

void foo() {};
// CIR: cir.func @foo{{.*}} extra(#fn_attr1) {
// LLVM: define{{.*}}@foo() #1

// LLVM-LABEL: attributes #0
// LLVM-UNIFORM: "uniform-work-group-size"="true"
// LLVM-NONUNIFORM: "uniform-work-group-size"="false"

// LLVM-LABEL: attributes #1
// LLVM-NOT: uniform-work-group-size

0 comments on commit 4dc4b4c

Please sign in to comment.