Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CIR] Lower nested local constant alloca #1261

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions clang/include/clang/CIR/Dialect/IR/CIROps.td
Original file line number Diff line number Diff line change
Expand Up @@ -3562,6 +3562,28 @@ def LLVMIntrinsicCallOp : CIR_Op<"llvm.intrinsic"> {

}

//===----------------------------------------------------------------------===//
// InvariantGroupOp
//===----------------------------------------------------------------------===//

def InvariantGroupOp
: CIR_Op<"invariant_group", [Pure, SameOperandsAndResultType]> {
let summary = "Start an invariant group";
let description = [{
The `cir.invariant_group` operation takes a single pointer value as argument
and returns the same pointer value with a fresh invariant group. All loads
and stores that access the returned pointer value are presumed by the
optimizer to load or store the same value.
}];

let arguments = (ins CIR_PointerType:$ptr);
let results = (outs CIR_PointerType:$result);

let assemblyFormat = [{
$ptr `:` type($result) attr-dict
}];
}

//===----------------------------------------------------------------------===//
// DeleteArrayOp
//===----------------------------------------------------------------------===//
Expand Down
48 changes: 40 additions & 8 deletions clang/lib/CIR/Dialect/Transforms/HoistAllocas.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,27 @@ struct HoistAllocasPass : public HoistAllocasBase<HoistAllocasPass> {
void runOnOperation() override;
};

static bool isOpInLoop(mlir::Operation *op) {
return op->getParentOfType<cir::LoopOpInterface>();
}

static bool isWhileCondition(cir::AllocaOp alloca) {
for (mlir::Operation *user : alloca->getUsers()) {
if (!mlir::isa<cir::StoreOp>(user))
continue;

auto store = mlir::cast<cir::StoreOp>(user);
mlir::Operation *storeParentOp = store->getParentOp();
if (!mlir::isa<cir::WhileOp>(storeParentOp))
continue;

auto whileOp = mlir::cast<cir::WhileOp>(storeParentOp);
return &whileOp.getCond() == store->getParentRegion();
}

return false;
}

static void process(cir::FuncOp func) {
if (func.getRegion().empty())
return;
Expand All @@ -49,16 +70,27 @@ static void process(cir::FuncOp func) {
mlir::Operation *insertPoint = &*entryBlock.begin();

for (auto alloca : allocas) {
alloca->moveBefore(insertPoint);
if (alloca.getConstant()) {
// Hoisted alloca may come from the body of a loop, in which case the
// stack slot is re-used by multiple objects alive in different iterations
// of the loop. In theory, each of these objects are still constant within
// their lifetimes, but currently we're not emitting metadata to further
// describe this. So for now let's behave conservatively and remove the
// const flag on nested allocas when hoisting them.
alloca.setConstant(false);
if (isOpInLoop(alloca)) {
mlir::OpBuilder builder(alloca);
auto invariantGroupOp =
builder.create<cir::InvariantGroupOp>(alloca.getLoc(), alloca);
alloca->replaceUsesWithIf(
invariantGroupOp,
[op = invariantGroupOp.getOperation()](mlir::OpOperand &use) {
return use.getOwner() != op;
});
} else if (isWhileCondition(alloca)) {
// The alloca represents a variable declared as the condition of a while
// loop. In CIR, the alloca would be emitted at a scope outside of the
// while loop. We have to remove the constant flag during hoisting,
// otherwise we would be telling the optimizer that the alloca-ed value
// is constant across all iterations of the while loop.
alloca.setConstant(false);
}
}

alloca->moveBefore(insertPoint);
}
}

Expand Down
42 changes: 29 additions & 13 deletions clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1649,6 +1649,15 @@ getLLVMMemOrder(std::optional<cir::MemOrder> &memorder) {
llvm_unreachable("unknown memory order");
}

static bool isLoadOrStoreInvariant(mlir::Value addr) {
if (auto addrAllocaOp =
mlir::dyn_cast_if_present<cir::AllocaOp>(addr.getDefiningOp()))
return addrAllocaOp.getConstant();
if (mlir::isa_and_present<cir::InvariantGroupOp>(addr.getDefiningOp()))
return true;
return false;
}

mlir::LogicalResult CIRToLLVMLoadOpLowering::matchAndRewrite(
cir::LoadOp op, OpAdaptor adaptor,
mlir::ConversionPatternRewriter &rewriter) const {
Expand All @@ -1668,12 +1677,8 @@ mlir::LogicalResult CIRToLLVMLoadOpLowering::matchAndRewrite(
auto invariant = false;
// Under -O1 or higher optimization levels, add the invariant metadata if the
// load operation loads from a constant object.
if (lowerMod &&
lowerMod->getContext().getCodeGenOpts().OptimizationLevel > 0) {
auto addrAllocaOp =
mlir::dyn_cast_if_present<cir::AllocaOp>(op.getAddr().getDefiningOp());
invariant = addrAllocaOp && addrAllocaOp.getConstant();
}
if (lowerMod && lowerMod->getContext().getCodeGenOpts().OptimizationLevel > 0)
invariant = isLoadOrStoreInvariant(op.getAddr());

// TODO: nontemporal, syncscope.
auto newLoad = rewriter.create<mlir::LLVM::LoadOp>(
Expand Down Expand Up @@ -1708,12 +1713,8 @@ mlir::LogicalResult CIRToLLVMStoreOpLowering::matchAndRewrite(
auto invariant = false;
// Under -O1 or higher optimization levels, add the invariant metadata if the
// store operation stores to a constant object.
if (lowerMod &&
lowerMod->getContext().getCodeGenOpts().OptimizationLevel > 0) {
auto addrAllocaOp =
mlir::dyn_cast_if_present<cir::AllocaOp>(op.getAddr().getDefiningOp());
invariant = addrAllocaOp && addrAllocaOp.getConstant();
}
if (lowerMod && lowerMod->getContext().getCodeGenOpts().OptimizationLevel > 0)
invariant = isLoadOrStoreInvariant(op.getAddr());

// Convert adapted value to its memory type if needed.
mlir::Value value = emitToMemory(rewriter, dataLayout,
Expand Down Expand Up @@ -3700,6 +3701,20 @@ mlir::LogicalResult CIRToLLVMInlineAsmOpLowering::matchAndRewrite(
return mlir::success();
}

mlir::LogicalResult CIRToLLVMInvariantGroupOpLowering::matchAndRewrite(
cir::InvariantGroupOp op, OpAdaptor adaptor,
mlir::ConversionPatternRewriter &rewriter) const {
if (!lowerMod ||
lowerMod->getContext().getCodeGenOpts().OptimizationLevel == 0) {
rewriter.replaceOp(op, adaptor.getPtr());
return mlir::success();
}

rewriter.replaceOpWithNewOp<mlir::LLVM::LaunderInvariantGroupOp>(
op, adaptor.getPtr());
return mlir::success();
}

mlir::LogicalResult CIRToLLVMPrefetchOpLowering::matchAndRewrite(
cir::PrefetchOp op, OpAdaptor adaptor,
mlir::ConversionPatternRewriter &rewriter) const {
Expand Down Expand Up @@ -4143,7 +4158,8 @@ void populateCIRToLLVMConversionPatterns(
CIRToLLVMBaseDataMemberOpLowering,
CIRToLLVMCmpOpLowering,
CIRToLLVMDerivedDataMemberOpLowering,
CIRToLLVMGetRuntimeMemberOpLowering
CIRToLLVMGetRuntimeMemberOpLowering,
CIRToLLVMInvariantGroupOpLowering
// clang-format on
>(converter, patterns.getContext(), lowerModule);
patterns.add<
Expand Down
15 changes: 15 additions & 0 deletions clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h
Original file line number Diff line number Diff line change
Expand Up @@ -962,6 +962,21 @@ class CIRToLLVMInlineAsmOpLowering
mlir::ConversionPatternRewriter &) const override;
};

class CIRToLLVMInvariantGroupOpLowering
: public mlir::OpConversionPattern<cir::InvariantGroupOp> {
cir::LowerModule *lowerMod;

public:
CIRToLLVMInvariantGroupOpLowering(const mlir::TypeConverter &typeConverter,
mlir::MLIRContext *context,
cir::LowerModule *lowerModule)
: OpConversionPattern(typeConverter, context), lowerMod(lowerModule) {}

mlir::LogicalResult
matchAndRewrite(cir::InvariantGroupOp op, OpAdaptor,
mlir::ConversionPatternRewriter &) const override;
};

class CIRToLLVMPrefetchOpLowering
: public mlir::OpConversionPattern<cir::PrefetchOp> {
public:
Expand Down
85 changes: 85 additions & 0 deletions clang/test/CIR/CodeGen/const-alloca.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

int produce_int();
void blackbox(const int &);
void consume(int);

void local_const_int() {
const int x = produce_int();
Expand Down Expand Up @@ -85,3 +86,87 @@ int local_const_optimize() {
// LLVM-NEXT: call void @_Z8blackboxRKi(ptr nonnull %[[#slot]])
// LLVM-NEXT: ret i32 %[[#init]]
// LLVM-NEXT: }

int local_scoped_const() {
{
const int x = produce_int();
blackbox(x);
return x;
}
}

// CIR-LABEL: @_Z18local_scoped_constv()
// CIR: cir.scope {
// CIR-NEXT: %[[#x_slot:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init, const]
// CIR-NEXT: %[[#init:]] = cir.call @_Z11produce_intv() : () -> !s32i
// CIR-NEXT: cir.store %[[#init]], %[[#x_slot]] : !s32i, !cir.ptr<!s32i> tbaa([#tbaa])
// CIR-NEXT: cir.call @_Z8blackboxRKi(%[[#x_slot]]) : (!cir.ptr<!s32i>) -> ()
// CIR-NEXT: %[[#x_reload:]] = cir.load %[[#x_slot]] : !cir.ptr<!s32i>, !s32i tbaa([#tbaa])
// CIR-NEXT: cir.store %[[#x_reload]], %[[#ret_slot:]] : !s32i, !cir.ptr<!s32i>
// CIR-NEXT: %[[#ret:]] = cir.load %[[#ret_slot]] : !cir.ptr<!s32i>, !s32i
// CIR-NEXT: cir.return %[[#ret]] : !s32i
// CIR-NEXT: }
// CIR: }

// LLVM-LABEL: @_Z18local_scoped_constv()
// LLVM-NEXT: %[[#x_slot:]] = alloca i32, align 4
// LLVM-NEXT: %[[#init:]] = tail call i32 @_Z11produce_intv()
// LLVM-NEXT: store i32 %[[#init]], ptr %[[#x_slot]], align 4, !invariant.group !{{.+}}
// LLVM-NEXT: call void @_Z8blackboxRKi(ptr nonnull %[[#x_slot]])
// LLVM-NEXT: ret i32 %[[#init]]
// LLVM-NEXT: }

void local_const_in_loop() {
for (int i = 0; i < 10; ++i) {
const int x = produce_int();
blackbox(x);
consume(x);
}
}

// CIR-LABEL: @_Z19local_const_in_loopv
// CIR: cir.scope {
// CIR: cir.for : cond {
// CIR: } body {
// CIR-NEXT: cir.scope {
// CIR-NEXT: %[[#x_slot:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init, const]
// CIR-NEXT: %[[#init:]] = cir.call @_Z11produce_intv() : () -> !s32i
// CIR-NEXT: cir.store %[[#init]], %[[#x_slot]] : !s32i, !cir.ptr<!s32i> tbaa([#tbaa])
// CIR-NEXT: cir.call @_Z8blackboxRKi(%[[#x_slot]]) : (!cir.ptr<!s32i>) -> ()
// CIR-NEXT: %[[#x_reload:]] = cir.load %[[#x_slot]] : !cir.ptr<!s32i>, !s32i tbaa([#tbaa])
// CIR-NEXT: cir.call @_Z7consumei(%[[#x_reload]]) : (!s32i) -> ()
// CIR-NEXT: }
// CIR-NEXT: cir.yield
// CIR-NEXT: } step {
// CIR: }
// CIR-NEXT: }
// CIR-NEXT: cir.return
// CIR-NEXT: }

// LLVM-LABEL: @_Z19local_const_in_loopv()
// LLVM: %[[#x_ptr:]] = call ptr @llvm.launder.invariant.group.p0(ptr nonnull %1)
// LLVM-NEXT: %[[#init:]] = call i32 @_Z11produce_intv()
// LLVM-NEXT: store i32 %[[#init]], ptr %[[#x_ptr]], align 4, !invariant.group !{{.+}}
// LLVM-NEXT: call void @_Z8blackboxRKi(ptr nonnull %[[#x_ptr]])
// LLVM-NEXT: call void @_Z7consumei(i32 %[[#init]])
// LLVM: }

void local_const_in_while_condition() {
while (const int x = produce_int()) {
blackbox(x);
}
}

// LLVM-LABEL: @_Z30local_const_in_while_conditionv()
// LLVM: %[[#x_slot:]] = alloca i32, align 4
// LLVM-NEXT: %[[#init:]] = tail call i32 @_Z11produce_intv()
// LLVM-NEXT: store i32 %[[#init]], ptr %[[#x_slot]], align 4
// LLVM-NEXT: %[[loop_cond:.+]] = icmp eq i32 %[[#init]], 0
// LLVM-NEXT: br i1 %[[loop_cond]], label %{{.+}}, label %[[loop_body:.+]]
// LLVM: [[loop_body]]:
// LLVM-NEXT: call void @_Z8blackboxRKi(ptr nonnull %[[#x_slot]])
// LLVM-NEXT: %[[#next:]] = call i32 @_Z11produce_intv()
// LLVM-NEXT: store i32 %[[#next]], ptr %[[#x_slot]], align 4
// LLVM-NEXT: %[[cond:.+]] = icmp eq i32 %[[#next]], 0
// LLVM-NEXT: br i1 %[[cond]], label %{{.+}}, label %[[loop_body]]
// LLVM: }
Loading