Previously, I posted a topic (Inconsistency between GCC and LLVM in mcmodel=large) to discuss the different relocation types used between LLVM and GCC. Afterwards, I created a new option to disable LLVM from generating group relocations for AArch64, as shown below:
clang -mcmodel=large -mno-large-group-reloc -c -o test.large.nogroup.o test.c
clang -mcmodel=large -c -o test.large.o test.c
readelf -r test.large.o
Relocation section '.rela.text' at offset 0x240 contains 5 entries:
Offset Info Type Sym. Value Sym. Name + Addend
000000000000 000400000108 R_AARCH64_MOVW_UA 0000000000000000 .rodata.str1.1 + 0
000000000004 00040000010a R_AARCH64_MOVW_UA 0000000000000000 .rodata.str1.1 + 0
000000000008 00040000010c R_AARCH64_MOVW_UA 0000000000000000 .rodata.str1.1 + 0
00000000000c 00040000010d R_AARCH64_MOVW_UA 0000000000000000 .rodata.str1.1 + 0
00000000002c 00080000011b R_AARCH64_CALL26 0000000000000000 test + 0
Relocation section '.rela.eh_frame' at offset 0x2b8 contains 2 entries:
Offset Info Type Sym. Value Sym. Name + Addend
00000000001c 000200000104 R_AARCH64_PREL64 0000000000000000 .text + 0
000000000038 000200000104 R_AARCH64_PREL64 0000000000000000 .text + 14
readelf -r test.large.nogroup.o
Relocation section '.rela.text' at offset 0x238 contains 3 entries:
Offset Info Type Sym. Value Sym. Name + Addend
000000000000 000400000113 R_AARCH64_ADR_PRE 0000000000000000 .rodata.str1.1 + 0
000000000004 000400000115 R_AARCH64_ADD_ABS 0000000000000000 .rodata.str1.1 + 0
000000000024 00080000011b R_AARCH64_CALL26 0000000000000000 test + 0
Relocation section '.rela.eh_frame' at offset 0x280 contains 2 entries:
Offset Info Type Sym. Value Sym. Name + Addend
00000000001c 000200000104 R_AARCH64_PREL64 0000000000000000 .text + 0
000000000038 000200000104 R_AARCH64_PREL64 0000000000000000 .text + c
And here’s my patch based on clang 15.0.7:
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 3cab37b21aaf..3cff00167510 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -3587,6 +3587,8 @@ def mvscale_max_EQ : Joined<["-"], "mvscale-max=">,
HelpText<"Specify the vscale maximum. Defaults to the"
" vector length agnostic value of \"0\". (AArch64 only)">,
MarshallingInfoInt<LangOpts<"VScaleMax">>;
+def mno_large_group_reloc: Flag<["-"], "mno-large-group-reloc">, Group<m_aarch64_Features_Group>,
+ HelpText<"Disable group relocation type when code model is large">;
def msign_return_address_EQ : Joined<["-"], "msign-return-address=">,
Flags<[CC1Option]>, Group<m_Group>, Values<"none,all,non-leaf">,
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 3704ed858668..217600ccbee7 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -4789,6 +4789,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
if (Args.getLastArg(options::OPT_save_temps_EQ))
Args.AddLastArg(CmdArgs, options::OPT_save_temps_EQ);
+ if (Args.getLastArg(options::OPT_mno_large_group_reloc)){
+ CmdArgs.push_back("-mllvm");
+ CmdArgs.push_back("-mno-large-group-reloc");
+ }
+
auto *MemProfArg = Args.getLastArg(options::OPT_fmemory_profile,
options::OPT_fmemory_profile_EQ,
options::OPT_fno_memory_profile);
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index eb8d0552173d..b0379c77ebab 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -65,6 +65,10 @@ namespace {
#include "AArch64GenGlobalISel.inc"
#undef GET_GLOBALISEL_PREDICATE_BITSET
+static cl::opt<bool> DisableLargeGroupReloc(
+ "mno-large-group-reloc",
+ cl::desc("Disable group relocation type when code model is large"),
+ cl::init(false));
class AArch64InstructionSelector : public InstructionSelector {
public:
@@ -2741,7 +2745,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
if (OpFlags & AArch64II::MO_GOT) {
I.setDesc(TII.get(AArch64::LOADgot));
I.getOperand(1).setTargetFlags(OpFlags);
- } else if (TM.getCodeModel() == CodeModel::Large) {
+ } else if (TM.getCodeModel() == CodeModel::Large && !DisableLargeGroupReloc) {
// Materialize the global using movz/movk instructions.
materializeLargeCMVal(I, GV, OpFlags);
I.eraseFromParent();
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-blockaddress.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-blockaddress.mir
index 91f0724a329b..21eef5828972 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-blockaddress.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-blockaddress.mir
@@ -1,6 +1,7 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=aarch64-unknown-unknown -o - -verify-machineinstrs -run-pass=instruction-select %s | FileCheck %s
# RUN: llc -mtriple=aarch64-unknown-unknown -o - -verify-machineinstrs -run-pass=instruction-select -code-model=large %s | FileCheck %s --check-prefix=LARGE
+# RUN: llc -mtriple=aarch64-unknown-unknown -o - -verify-machineinstrs -run-pass=instruction-select -code-model=large -mno-large-group-reloc %s | FileCheck %s --check-prefix=NO-LARGE-GROUP-RELOC
--- |
; ModuleID = 'blockaddress.ll'
source_filename = "blockaddress.ll"
@@ -49,6 +50,16 @@ body: |
; LARGE: STRXui [[MOVKXi2]], [[MOVKXi5]], 0 :: (store (p0) into @addr)
; LARGE: BR [[MOVKXi2]]
; LARGE: bb.1.block (address-taken):
+ ; NO-LARGE-GROUP-RELOC-LABEL: name: test_blockaddress
+ ; NO-LARGE-GROUP-RELOC: bb.0 (%ir-block.0):
+ ; NO-LARGE-GROUP-RELOC: [[MOVZXi:%[0-9]+]]:gpr64 = MOVZXi target-flags(aarch64-g0, aarch64-nc) blockaddress(@test_blockaddress, %ir-block.block), 0
+ ; NO-LARGE-GROUP-RELOC: [[MOVKXi:%[0-9]+]]:gpr64 = MOVKXi [[MOVZXi]], target-flags(aarch64-g1, aarch64-nc) blockaddress(@test_blockaddress, %ir-block.block), 16
+ ; NO-LARGE-GROUP-RELOC: [[MOVKXi1:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi]], target-flags(aarch64-g2, aarch64-nc) blockaddress(@test_blockaddress, %ir-block.block), 32
+ ; NO-LARGE-GROUP-RELOC: [[MOVKXi2:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi1]], target-flags(aarch64-g3) blockaddress(@test_blockaddress, %ir-block.block), 48
+ ; NO-LARGE-GROUP-RELOC: [[MOVaddr:%[0-9]+]]:gpr64common = MOVaddr target-flags(aarch64-page) @addr, target-flags(aarch64-pageoff, aarch64-nc) @addr
+ ; NO-LARGE-GROUP-RELOC: STRXui [[MOVKXi2]], [[MOVaddr]], 0 :: (store (p0) into @addr)
+ ; NO-LARGE-GROUP-RELOC: BR [[MOVKXi2]]
+ ; NO-LARGE-GROUP-RELOC: bb.1.block (address-taken):
bb.1 (%ir-block.0):
%0:gpr(p0) = G_BLOCK_ADDR blockaddress(@test_blockaddress, %ir-block.block)
%1:gpr(p0) = G_GLOBAL_VALUE @addr
What are your thoughts on this approach? I would appreciate hearing your opinion on the matter.