-
Notifications
You must be signed in to change notification settings - Fork 110
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[CIR][Transform] Add constant load elimination pass
This patch tries to give a simple initial implementation for eliminating redundant loads of constant objects, an idea originally posted by OfekShilon. Specifically, this patch consists of two parts: * It adds a new unit attribute `const` to the `cir.alloca` operation. Presence of this attribute indicates that the alloca-ed object is declared `const` in the input source program. CIRGen is updated accordingly to start emitting this new attribute. * It adds a new pass to the CIR optimization pipeline. This new pass runs on function level, and identifies and eliminates all redundant loads of a constant alloca-ed object.
- Loading branch information
Showing
9 changed files
with
250 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
116 changes: 116 additions & 0 deletions
116
clang/lib/CIR/Dialect/Transforms/ConstLoadElimination.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
//===- ConstLoadElimination.cpp - performs redundant load elimination -----===// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#include "PassDetail.h" | ||
#include "mlir/IR/Dominance.h" | ||
#include "mlir/Pass/Pass.h" | ||
#include "clang/CIR/Dialect/IR/CIRDialect.h" | ||
#include "clang/CIR/Dialect/Passes.h" | ||
|
||
using namespace mlir; | ||
using namespace cir; | ||
|
||
namespace { | ||
|
||
void processConstAlloca(DominanceInfo &dom, AllocaOp alloca) { | ||
assert(alloca.getConstant() && "must be a constant alloca"); | ||
|
||
// First find out all loads and stores to the alloca-ed object. | ||
SmallVector<LoadOp> allLoads; | ||
SmallVector<StoreOp> allStores; | ||
for (Operation *user : alloca->getUsers()) { | ||
if (auto load = dyn_cast<LoadOp>(user)) | ||
allLoads.push_back(load); | ||
else if (auto store = dyn_cast<StoreOp>(user)) | ||
allStores.push_back(store); | ||
} | ||
|
||
// For each non-volatile load: | ||
// - If there is a load operation that properly dominates it, replace the | ||
// load with that dominator load. This process is "recursive": if load A | ||
// dominates load B and load B dominates load C, we should eventually | ||
// replace load C with load A. | ||
// - If there is a store operation that dominates it, replace the load with | ||
// the stored value. | ||
|
||
// Record the "immediate dominator" load of a load. During the process if we | ||
// find a store dominates the load, replace that load directly. | ||
DenseMap<LoadOp, LoadOp> idomLoad; | ||
for (LoadOp &load : allLoads) { | ||
// Try to replace the load with a previous store directly. | ||
// Note that volatile loads are not candidates for elimination. | ||
if (!load.getIsVolatile()) { | ||
for (StoreOp store : allStores) { | ||
if (dom.dominates(store, load)) { | ||
load.replaceAllUsesWith(store.getValue()); | ||
load.erase(); | ||
load = nullptr; | ||
break; | ||
} | ||
} | ||
if (!load) | ||
continue; | ||
} | ||
|
||
// No store dominates the load. Find the "immediate dominator" load for the | ||
// load. | ||
for (LoadOp domLoad : allLoads) { | ||
if (dom.properlyDominates(domLoad.getOperation(), load)) { | ||
idomLoad[load] = domLoad; | ||
break; | ||
} | ||
} | ||
} | ||
|
||
// Try to replace load with previous loads. | ||
for (LoadOp load : allLoads) { | ||
if (!load) { | ||
// Already replaced by a store. | ||
continue; | ||
} | ||
|
||
// Volatile loads are not candidates for elimination. | ||
if (load.getIsVolatile()) | ||
continue; | ||
|
||
// Follow the "immediate dominator" link to find the load for replacement. | ||
LoadOp target = load; | ||
while (idomLoad.contains(target)) | ||
target = idomLoad[target]; | ||
|
||
if (load != target) { | ||
load->replaceAllUsesWith(target); | ||
load->erase(); | ||
} | ||
} | ||
} | ||
|
||
void processFunc(mlir::cir::FuncOp func) { | ||
SmallVector<AllocaOp> constAllocaList; | ||
func->walk([&](AllocaOp alloca) { | ||
if (alloca.getConstant()) | ||
constAllocaList.push_back(alloca); | ||
}); | ||
|
||
DominanceInfo dom; | ||
for (AllocaOp alloca : constAllocaList) | ||
processConstAlloca(dom, alloca); | ||
} | ||
|
||
struct ConstLoadEliminationPass | ||
: public ConstLoadEliminationBase<ConstLoadEliminationPass> { | ||
using ConstLoadEliminationBase::ConstLoadEliminationBase; | ||
|
||
void runOnOperation() override { getOperation()->walk(processFunc); } | ||
}; | ||
|
||
} // namespace | ||
|
||
std::unique_ptr<Pass> mlir::createConstLoadEliminationPass() { | ||
return std::make_unique<ConstLoadEliminationPass>(); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir | ||
// RUN: FileCheck --input-file=%t.cir %s | ||
|
||
int produce_int(); | ||
|
||
void local_const_int() { | ||
const int x = produce_int(); | ||
} | ||
|
||
// CHECK-LABEL: @_Z15local_const_intv | ||
// CHECK: %{{.+}} = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init, const] | ||
// CHECK: } | ||
|
||
void param_const_int(const int x) {} | ||
|
||
// CHECK-LABEL: @_Z15param_const_inti | ||
// CHECK: %{{.+}} = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init, const] | ||
// CHECK: } | ||
|
||
struct Foo { | ||
int a; | ||
int b; | ||
}; | ||
|
||
Foo produce_foo(); | ||
|
||
void local_const_struct() { | ||
const Foo x = produce_foo(); | ||
} | ||
|
||
// CHECK-LABEL: @_Z18local_const_structv | ||
// CHECK: %{{.+}} = cir.alloca !ty_Foo, !cir.ptr<!ty_Foo>, ["x", init, const] | ||
// CHECK: } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O1 -fclangir -emit-cir %s -o %t.cir | ||
// FileCheck --input-file=%t.cir %s | ||
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O1 -fclangir -fclangir-mem2reg -emit-cir %s -o %t.cir | ||
// FileCheck --input-file=%t.cir %s --check-prefix=MEM2REG | ||
|
||
int produce_int(); | ||
void blackbox(const int &); | ||
void blackbox(const volatile int &); | ||
|
||
int load_local_const_int() { | ||
const int x = produce_int(); | ||
int a = x; | ||
blackbox(x); | ||
int b = x; | ||
return a + b; | ||
} | ||
|
||
// CHECK-LABEL: @_Z20load_local_const_intv | ||
// CHECK: %[[#x_slot:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init, const] {alignment = 4 : i64} | ||
// CHECK-NEXT: %[[#a_slot:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] {alignment = 4 : i64} | ||
// CHECK-NEXT: %[[#b_slot:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["b", init] {alignment = 4 : i64} | ||
// CHECK-NEXT: %[[#init:]] = cir.call @_Z11produce_intv() : () -> !s32i | ||
// CHECK-NEXT: cir.store %[[#init]], %[[#x_slot]] : !s32i, !cir.ptr<!s32i> | ||
// CHECK-NEXT: cir.store %[[#init]], %[[#a_slot]] : !s32i, !cir.ptr<!s32i> | ||
// CHECK-NEXT: cir.call @_Z8blackboxRKi(%[[#x_slot]]) : (!cir.ptr<!s32i>) -> () | ||
// CHECK-NEXT: cir.store %[[#init]], %[[#b_slot]] : !s32i, !cir.ptr<!s32i> | ||
// CHECK: } | ||
|
||
// MEM2REG-LABEL: @_Z20load_local_const_intv | ||
// MEM2REG-NEXT: %[[#x_slot:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init, const] {alignment = 4 : i64} | ||
// MEM2REG-NEXT: %[[#init:]] = cir.call @_Z11produce_intv() : () -> !s32i | ||
// MEM2REG-NEXT: cir.store %[[#init]], %[[#x_slot]] : !s32i, !cir.ptr<!s32i> | ||
// MEM2REG-NEXT: cir.call @_Z8blackboxRKi(%[[#x_slot]]) : (!cir.ptr<!s32i>) -> () | ||
// MEM2REG-NEXT: %{{.+}} = cir.binop(add, %[[#init]], %[[#init]]) nsw : !s32i | ||
// MEM2REG: } | ||
|
||
int load_volatile_local_const_int() { | ||
const volatile int x = produce_int(); | ||
int a = x; | ||
blackbox(x); | ||
int b = x; | ||
return a + b; | ||
} | ||
|
||
// CHECKLABEL: @_Z29load_volatile_local_const_intv | ||
// CHECK: %[[#x_slot:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init, const] {alignment = 4 : i64} | ||
// CHECK-NEXT: %[[#a_slot:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] {alignment = 4 : i64} | ||
// CHECK-NEXT: %[[#b_slot:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["b", init] {alignment = 4 : i64} | ||
// CHECK-NEXT: %[[#init:]] = cir.call @_Z11produce_intv() : () -> !s32i | ||
// CHECK-NEXT: cir.store volatile %[[#init]], %[[#x_slot]] : !s32i, !cir.ptr<!s32i> | ||
// CHECK-NEXT: %[[#reload_1:]] = cir.load volatile %[[#x_slot]] : !cir.ptr<!s32i>, !s32i | ||
// CHECK-NEXT: cir.store %[[#reload_1]], %[[#a_slot]] : !s32i, !cir.ptr<!s32i> | ||
// CHECK-NEXT: cir.call @_Z8blackboxRVKi(%[[#x_slot]]) : (!cir.ptr<!s32i>) -> () | ||
// CHECK-NEXT: %[[#reload_2:]] = cir.load volatile %[[#x_slot]] : !cir.ptr<!s32i>, !s32i | ||
// CHECK-NEXT: cir.store %[[#reload_2]], %[[#b_slot]] : !s32i, !cir.ptr<!s32i> | ||
// CHECK: } | ||
|
||
// MEM2REG-LABEL: @_Z29load_volatile_local_const_intv | ||
// MEM2REG-NEXT: %[[#x_slot:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init, const] {alignment = 4 : i64} | ||
// MEM2REG-NEXT: %[[#init:]] = cir.call @_Z11produce_intv() : () -> !s32i | ||
// MEM2REG-NEXT: cir.store volatile %[[#init]], %[[#x_slot]] : !s32i, !cir.ptr<!s32i> | ||
// MEM2REG-NEXT: %{{.+}} = cir.load volatile %[[#x_slot]] : !cir.ptr<!s32i>, !s32i | ||
// MEM2REG-NEXT: cir.call @_Z8blackboxRVKi(%[[#x_slot]]) : (!cir.ptr<!s32i>) -> () | ||
// MEM2REG-NEXT: %{{.+}} = cir.load volatile %[[#x_slot]] : !cir.ptr<!s32i>, !s32i | ||
// MEM2REG: } |