From 20bcbb943ce6e86e4f4b17a545507eff482184d8 Mon Sep 17 00:00:00 2001 From: "Chibuoyim (Wilson) Ogbonna" Date: Tue, 17 Sep 2024 21:00:02 +0300 Subject: [PATCH] [CIR][CodeGen] Fix packed structures (#839) Consider the following code snippet `test.c`: ``` typedef struct { char b; int c; } D; typedef struct { D e; int f; } E; void f1() { E a = {}; } ``` When emitting the CIR using `bin/clang test.c -Xclang -fclangir -Xclang -emit-cir -S -o -` the current implementation gets: ``` NYI UNREACHABLE executed at ~/clangir/clang/lib/CIR/CodeGen/CIRGenExprConst.cpp:338! ``` This is only one of the many tests where this happens. Comparing the implementations of `CIR/CodeGen/CIRRecordLayoutBuilder.cpp` and clang's codegen `lib/CodeGen/CGRecordLayoutBuilder.cpp`, there is some padding missing for packed structures, and some alignments that need to be corrected. This PR also updates 2 existing tests. In the first test, `structural-binding.cpp`, I updated some `cir.get_member` indexes. In the second test, `packed-structs.c`, I updated the `cir` layout for the structure, and added more tests. I have compared the changes I made in the tests to the original clang codegen and everything seems fine. --- .../CIR/CodeGen/CIRRecordLayoutBuilder.cpp | 35 +++++- clang/lib/CIR/Dialect/IR/CIRDataLayout.cpp | 7 +- clang/test/CIR/CodeGen/packed-structs.c | 110 ++++++++++++++++-- clang/test/CIR/CodeGen/structural-binding.cpp | 10 +- 4 files changed, 145 insertions(+), 17 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRRecordLayoutBuilder.cpp b/clang/lib/CIR/CodeGen/CIRRecordLayoutBuilder.cpp index 7932bb8891db..301b1efc6ab5 100644 --- a/clang/lib/CIR/CodeGen/CIRRecordLayoutBuilder.cpp +++ b/clang/lib/CIR/CodeGen/CIRRecordLayoutBuilder.cpp @@ -69,6 +69,8 @@ struct CIRRecordLowering final { /// Determines if we need a packed llvm struct. void determinePacked(bool NVBaseType); + /// Inserts padding everywhere it's needed. + void insertPadding(); void computeVolatileBitfields(); void accumulateBases(); @@ -294,6 +296,7 @@ void CIRRecordLowering::lower(bool nonVirtualBaseType) { members.push_back(StorageInfo(Size, getUIntNType(8))); determinePacked(nonVirtualBaseType); + insertPadding(); members.pop_back(); fillOutputFields(); @@ -657,6 +660,33 @@ void CIRRecordLowering::determinePacked(bool NVBaseType) { members.back().data = getUIntNType(astContext.toBits(Alignment)); } +void CIRRecordLowering::insertPadding() { + std::vector> Padding; + CharUnits Size = CharUnits::Zero(); + for (std::vector::const_iterator Member = members.begin(), + MemberEnd = members.end(); + Member != MemberEnd; ++Member) { + if (!Member->data) + continue; + CharUnits Offset = Member->offset; + assert(Offset >= Size); + // Insert padding if we need to. + if (Offset != + Size.alignTo(isPacked ? CharUnits::One() : getAlignment(Member->data))) + Padding.push_back(std::make_pair(Size, Offset - Size)); + Size = Offset + getSize(Member->data); + } + if (Padding.empty()) + return; + // Add the padding to the Members list and sort it. + for (std::vector>::const_iterator + Pad = Padding.begin(), + PadEnd = Padding.end(); + Pad != PadEnd; ++Pad) + members.push_back(StorageInfo(Pad->first, getByteArrayType(Pad->second))); + llvm::stable_sort(members); +} + std::unique_ptr CIRGenTypes::computeRecordLayout(const RecordDecl *D, mlir::cir::StructType *Ty) { @@ -674,9 +704,8 @@ CIRGenTypes::computeRecordLayout(const RecordDecl *D, CIRRecordLowering baseBuilder(*this, D, /*Packed=*/builder.isPacked); baseBuilder.lower(/*NonVirtualBaseType=*/true); auto baseIdentifier = getRecordTypeName(D, ".base"); - BaseTy = - Builder.getCompleteStructTy(baseBuilder.fieldTypes, baseIdentifier, - /*packed=*/false, D); + BaseTy = Builder.getCompleteStructTy( + baseBuilder.fieldTypes, baseIdentifier, baseBuilder.isPacked, D); // TODO(cir): add something like addRecordTypeName // BaseTy and Ty must agree on their packedness for getCIRFieldNo to work diff --git a/clang/lib/CIR/Dialect/IR/CIRDataLayout.cpp b/clang/lib/CIR/Dialect/IR/CIRDataLayout.cpp index c030cd2f352f..2dadc469f789 100644 --- a/clang/lib/CIR/Dialect/IR/CIRDataLayout.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRDataLayout.cpp @@ -23,7 +23,8 @@ StructLayout::StructLayout(mlir::cir::StructType ST, const CIRDataLayout &DL) assert(!::cir::MissingFeatures::recordDeclIsPacked() && "Cannot identify packed structs"); - const llvm::Align TyAlign = DL.getABITypeAlign(Ty); + const llvm::Align TyAlign = + ST.getPacked() ? llvm::Align(1) : DL.getABITypeAlign(Ty); // Add padding if necessary to align the data element properly. // Currently the only structure with scalable size will be the homogeneous @@ -171,6 +172,10 @@ llvm::Align CIRDataLayout::getAlignment(mlir::Type Ty, bool abiOrPref) const { if (::cir::MissingFeatures::recordDeclIsPacked() && abiOrPref) llvm_unreachable("NYI"); + auto stTy = llvm::dyn_cast(Ty); + if (stTy && stTy.getPacked() && abiOrPref) + return llvm::Align(1); + // Get the layout annotation... which is lazily created on demand. const StructLayout *Layout = getStructLayout(llvm::cast(Ty)); diff --git a/clang/test/CIR/CodeGen/packed-structs.c b/clang/test/CIR/CodeGen/packed-structs.c index 3488418f13b0..2379c8d06896 100644 --- a/clang/test/CIR/CodeGen/packed-structs.c +++ b/clang/test/CIR/CodeGen/packed-structs.c @@ -1,5 +1,7 @@ // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir -// RUN: FileCheck --input-file=%t.cir %s +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s #pragma pack(1) @@ -20,18 +22,110 @@ typedef struct { } __attribute__((aligned(2))) C; -// CHECK: !ty_A = !cir.struct -// CHECK: !ty_C = !cir.struct -// CHECK: !ty_B = !cir.struct}> +// CIR: !ty_A = !cir.struct +// CIR: !ty_C = !cir.struct +// CIR: !ty_D = !cir.struct}> +// CIR: !ty_I = !cir.struct, ["a"] {alignment = 1 : i64} -// CHECK: %1 = cir.alloca !ty_B, !cir.ptr, ["b"] {alignment = 1 : i64} -// CHECK: %2 = cir.alloca !ty_C, !cir.ptr, ["c"] {alignment = 2 : i64} +// LLVM: %struct.A = type <{ i32, i8 }> +// LLVM: %struct.B = type <{ i32, i8, [6 x %struct.A] }> +// LLVM: %struct.C = type <{ i32, i8, i8 }> +// LLVM: %struct.E = type <{ %struct.D, i32 }> +// LLVM: %struct.D = type <{ i8, i8, i32 }> +// LLVM: %struct.G = type { %struct.F, i8 } +// LLVM: %struct.F = type <{ i64, i8 }> +// LLVM: %struct.J = type <{ i8, i8, i8, i8, %struct.I, i32 }> +// LLVM: %struct.I = type <{ i8, %struct.H }> +// LLVM: %struct.H = type { i32, %union.anon.{{.*}} } + +// CIR: cir.func {{.*@foo()}} +// CIR: {{.*}} = cir.alloca !ty_A, !cir.ptr, ["a"] {alignment = 1 : i64} +// CIR: {{.*}} = cir.alloca !ty_B, !cir.ptr, ["b"] {alignment = 1 : i64} +// CIR: {{.*}} = cir.alloca !ty_C, !cir.ptr, ["c"] {alignment = 2 : i64} + +// LLVM: {{.*}} = alloca %struct.A, i64 1, align 1 +// LLVM: {{.*}} = alloca %struct.B, i64 1, align 1 +// LLVM: {{.*}} = alloca %struct.C, i64 1, align 2 void foo() { A a; B b; C c; } +#pragma pack(2) + +typedef struct { + char b; + int c; +} D; + +typedef struct { + D e; + int f; +} E; + +// CIR: cir.func {{.*@f1()}} +// CIR: {{.*}} = cir.alloca !ty_E, !cir.ptr, ["a"] {alignment = 2 : i64} + +// LLVM: {{.*}} = alloca %struct.E, i64 1, align 2 +void f1() { + E a = {}; +} + +#pragma pack(1) + +typedef struct { + long b; + char c; +} F; + +typedef struct { + F e; + char f; +} G; + +// CIR: cir.func {{.*@f2()}} +// CIR: {{.*}} = cir.alloca !ty_G, !cir.ptr, ["a"] {alignment = 1 : i64} + +// LLVM: {{.*}} = alloca %struct.G, i64 1, align 1 +void f2() { + G a = {}; +} + +#pragma pack(1) + +typedef struct { + int d0; + union { + char null; + int val; + } value; +} H; + +typedef struct { + char t; + H d; +} I; +typedef struct { + char a0; + char a1; + char a2; + char a3; + I c; + int a; +} J; + +// CIR: cir.func {{.*@f3()}} +// CIR: {{.*}} = cir.alloca !ty_J, !cir.ptr, ["a"] {alignment = 1 : i64} + +// LLVM: {{.*}} = alloca %struct.J, i64 1, align 1 +void f3() { + J a = {0}; +} diff --git a/clang/test/CIR/CodeGen/structural-binding.cpp b/clang/test/CIR/CodeGen/structural-binding.cpp index c37788b9d678..d70a9509c6f9 100644 --- a/clang/test/CIR/CodeGen/structural-binding.cpp +++ b/clang/test/CIR/CodeGen/structural-binding.cpp @@ -52,19 +52,19 @@ void f(A &a) { // CIR: %[[a:.*]] = cir.load %1 : !cir.ptr>, !cir.ptr // CIR: {{.*}} = cir.get_member %[[a]][0] {name = "a"} : !cir.ptr -> !cir.ptr // CIR: %[[a:.*]] = cir.load %1 : !cir.ptr>, !cir.ptr - // CIR: {{.*}} = cir.get_member %[[a]][1] {name = "b"} : !cir.ptr -> !cir.ptr + // CIR: {{.*}} = cir.get_member %[[a]][2] {name = "b"} : !cir.ptr -> !cir.ptr // CIR: %[[a:.*]] = cir.load %1 : !cir.ptr>, !cir.ptr - // CIR: {{.*}} = cir.get_member %[[a]][2] {name = "c"} : !cir.ptr -> !cir.ptr + // CIR: {{.*}} = cir.get_member %[[a]][3] {name = "c"} : !cir.ptr -> !cir.ptr // LLVM: {{.*}} = getelementptr %struct.A, ptr {{.*}}, i32 0, i32 0 - // LLVM: {{.*}} = getelementptr %struct.A, ptr {{.*}}, i32 0, i32 1 // LLVM: {{.*}} = getelementptr %struct.A, ptr {{.*}}, i32 0, i32 2 + // LLVM: {{.*}} = getelementptr %struct.A, ptr {{.*}}, i32 0, i32 3 auto [x2, y2, z2] = a; (x2, y2, z2); // CIR: cir.call @_ZN1AC1ERKS_(%2, {{.*}}) : (!cir.ptr, !cir.ptr) -> () // CIR: {{.*}} = cir.get_member %2[0] {name = "a"} : !cir.ptr -> !cir.ptr - // CIR: {{.*}} = cir.get_member %2[1] {name = "b"} : !cir.ptr -> !cir.ptr - // CIR: {{.*}} = cir.get_member %2[2] {name = "c"} : !cir.ptr -> !cir.ptr + // CIR: {{.*}} = cir.get_member %2[2] {name = "b"} : !cir.ptr -> !cir.ptr + // CIR: {{.*}} = cir.get_member %2[3] {name = "c"} : !cir.ptr -> !cir.ptr // for the rest, just expect the codegen does't crash auto &&[x3, y3, z3] = a;