Patch to Disable Group Relocation in AArch64 Large Code Model

Previously, I posted a topic (Inconsistency between GCC and LLVM in mcmodel=large) to discuss the different relocation types used between LLVM and GCC. Afterwards, I created a new option to disable LLVM from generating group relocations for AArch64, as shown below:

clang -mcmodel=large -mno-large-group-reloc -c -o test.large.nogroup.o test.c

clang -mcmodel=large -c -o test.large.o test.c

readelf -r test.large.o

Relocation section '.rela.text' at offset 0x240 contains 5 entries:
  Offset          Info           Type           Sym. Value    Sym. Name + Addend
000000000000  000400000108 R_AARCH64_MOVW_UA 0000000000000000 .rodata.str1.1 + 0
000000000004  00040000010a R_AARCH64_MOVW_UA 0000000000000000 .rodata.str1.1 + 0
000000000008  00040000010c R_AARCH64_MOVW_UA 0000000000000000 .rodata.str1.1 + 0
00000000000c  00040000010d R_AARCH64_MOVW_UA 0000000000000000 .rodata.str1.1 + 0
00000000002c  00080000011b R_AARCH64_CALL26  0000000000000000 test + 0

Relocation section '.rela.eh_frame' at offset 0x2b8 contains 2 entries:
  Offset          Info           Type           Sym. Value    Sym. Name + Addend
00000000001c  000200000104 R_AARCH64_PREL64  0000000000000000 .text + 0
000000000038  000200000104 R_AARCH64_PREL64  0000000000000000 .text + 14

readelf -r test.large.nogroup.o

Relocation section '.rela.text' at offset 0x238 contains 3 entries:
  Offset          Info           Type           Sym. Value    Sym. Name + Addend
000000000000  000400000113 R_AARCH64_ADR_PRE 0000000000000000 .rodata.str1.1 + 0
000000000004  000400000115 R_AARCH64_ADD_ABS 0000000000000000 .rodata.str1.1 + 0
000000000024  00080000011b R_AARCH64_CALL26  0000000000000000 test + 0

Relocation section '.rela.eh_frame' at offset 0x280 contains 2 entries:
  Offset          Info           Type           Sym. Value    Sym. Name + Addend
00000000001c  000200000104 R_AARCH64_PREL64  0000000000000000 .text + 0
000000000038  000200000104 R_AARCH64_PREL64  0000000000000000 .text + c

And here’s my patch based on clang 15.0.7:

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 3cab37b21aaf..3cff00167510 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -3587,6 +3587,8 @@ def mvscale_max_EQ : Joined<["-"], "mvscale-max=">,
   HelpText<"Specify the vscale maximum. Defaults to the"
            " vector length agnostic value of \"0\". (AArch64 only)">,
   MarshallingInfoInt<LangOpts<"VScaleMax">>;
+def mno_large_group_reloc: Flag<["-"], "mno-large-group-reloc">, Group<m_aarch64_Features_Group>,
+  HelpText<"Disable group relocation type when code model is large">;
 
 def msign_return_address_EQ : Joined<["-"], "msign-return-address=">,
   Flags<[CC1Option]>, Group<m_Group>, Values<"none,all,non-leaf">,
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 3704ed858668..217600ccbee7 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -4789,6 +4789,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   if (Args.getLastArg(options::OPT_save_temps_EQ))
     Args.AddLastArg(CmdArgs, options::OPT_save_temps_EQ);
 
+  if (Args.getLastArg(options::OPT_mno_large_group_reloc)){
+    CmdArgs.push_back("-mllvm");
+    CmdArgs.push_back("-mno-large-group-reloc");
+  }
+
   auto *MemProfArg = Args.getLastArg(options::OPT_fmemory_profile,
                                      options::OPT_fmemory_profile_EQ,
                                      options::OPT_fno_memory_profile);
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index eb8d0552173d..b0379c77ebab 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -65,6 +65,10 @@ namespace {
 #include "AArch64GenGlobalISel.inc"
 #undef GET_GLOBALISEL_PREDICATE_BITSET
 
+static cl::opt<bool> DisableLargeGroupReloc(
+  "mno-large-group-reloc",
+  cl::desc("Disable group relocation type when code model is large"),
+  cl::init(false));
 
 class AArch64InstructionSelector : public InstructionSelector {
 public:
@@ -2741,7 +2745,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
     if (OpFlags & AArch64II::MO_GOT) {
       I.setDesc(TII.get(AArch64::LOADgot));
       I.getOperand(1).setTargetFlags(OpFlags);
-    } else if (TM.getCodeModel() == CodeModel::Large) {
+    } else if (TM.getCodeModel() == CodeModel::Large && !DisableLargeGroupReloc) {
       // Materialize the global using movz/movk instructions.
       materializeLargeCMVal(I, GV, OpFlags);
       I.eraseFromParent();
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-blockaddress.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-blockaddress.mir
index 91f0724a329b..21eef5828972 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-blockaddress.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-blockaddress.mir
@@ -1,6 +1,7 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=aarch64-unknown-unknown -o - -verify-machineinstrs -run-pass=instruction-select %s | FileCheck %s
 # RUN: llc -mtriple=aarch64-unknown-unknown -o - -verify-machineinstrs -run-pass=instruction-select -code-model=large %s | FileCheck %s --check-prefix=LARGE
+# RUN: llc -mtriple=aarch64-unknown-unknown -o - -verify-machineinstrs -run-pass=instruction-select -code-model=large -mno-large-group-reloc %s | FileCheck %s --check-prefix=NO-LARGE-GROUP-RELOC
 --- |
   ; ModuleID = 'blockaddress.ll'
   source_filename = "blockaddress.ll"
@@ -49,6 +50,16 @@ body:             |
   ; LARGE:   STRXui [[MOVKXi2]], [[MOVKXi5]], 0 :: (store (p0) into @addr)
   ; LARGE:   BR [[MOVKXi2]]
   ; LARGE: bb.1.block (address-taken):
+  ; NO-LARGE-GROUP-RELOC-LABEL: name: test_blockaddress
+  ; NO-LARGE-GROUP-RELOC: bb.0 (%ir-block.0):
+  ; NO-LARGE-GROUP-RELOC: [[MOVZXi:%[0-9]+]]:gpr64 = MOVZXi target-flags(aarch64-g0, aarch64-nc) blockaddress(@test_blockaddress, %ir-block.block), 0
+  ; NO-LARGE-GROUP-RELOC: [[MOVKXi:%[0-9]+]]:gpr64 = MOVKXi [[MOVZXi]], target-flags(aarch64-g1, aarch64-nc) blockaddress(@test_blockaddress, %ir-block.block), 16
+  ; NO-LARGE-GROUP-RELOC: [[MOVKXi1:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi]], target-flags(aarch64-g2, aarch64-nc) blockaddress(@test_blockaddress, %ir-block.block), 32
+  ; NO-LARGE-GROUP-RELOC: [[MOVKXi2:%[0-9]+]]:gpr64 = MOVKXi [[MOVKXi1]], target-flags(aarch64-g3) blockaddress(@test_blockaddress, %ir-block.block), 48
+  ; NO-LARGE-GROUP-RELOC: [[MOVaddr:%[0-9]+]]:gpr64common = MOVaddr target-flags(aarch64-page) @addr, target-flags(aarch64-pageoff, aarch64-nc) @addr
+  ; NO-LARGE-GROUP-RELOC: STRXui [[MOVKXi2]], [[MOVaddr]], 0 :: (store (p0) into @addr)
+  ; NO-LARGE-GROUP-RELOC: BR [[MOVKXi2]]
+  ; NO-LARGE-GROUP-RELOC: bb.1.block (address-taken):
   bb.1 (%ir-block.0):
     %0:gpr(p0) = G_BLOCK_ADDR blockaddress(@test_blockaddress, %ir-block.block)
     %1:gpr(p0) = G_GLOBAL_VALUE @addr

What are your thoughts on this approach? I would appreciate hearing your opinion on the matter.

I think Arm engineers have their plan of implementing code models. Such an option essentially creates a new code model variant and requires more thoughts. Compiler driver option parity is great, and such extensions should be discussed with GCC as well.

On the patch side, translating a driver option to a cl::opt is hack. Some existing features use this because it is easy, but for new stuff we should implement them properly. Certain features are implement as TargetOptions (e.g. -ffunction-sections) and some are implemented as function attributes, depending on whether fine-grained control under LTO is useful.

Ok. Thank you so much for your insightful comment. I have learned a great deal from the information you provided.