[RFC PATCH 1/2] x86: Fix ModR/M byte output in 16-bit addressing mode

This attempts to address http://llvm.org/bugs/show_bug.cgi?id=18220
It also fixes a test which was requiring the *wrong* output.

I'm relatively happy with this part, and it even solves most of the hard
part of feature request for .code16 in bug 8684 — which was actually why
I started prodding at this. But I could do with some help with the
16-bit signed relocation handling, which I've split into a subsequent
patch.

diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 7952607..12a30cf 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -402,6 +402,56 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,

   unsigned BaseRegNo = BaseReg ? GetX86RegNum(Base) : -1U;

+ // 16-bit addressing forms of the ModR/M byte have a different encoding for
+ // the R/M field and are far more limited in which registers can be used.
+ if (Is16BitMemOperand(MI, Op)) {
+ if (BaseReg) {
+ // See Table 2-1 "16-Bit Addressing Forms with the ModR/M byte"
+ static const int R16Table = { 0, 0, 0, 7, 0, 6, 4, 5 };
+ unsigned RMfield = R16Table[BaseRegNo];

Although this fixes all the test cases I've tested with so far, this is
almost certainly broken in a number of cases where the *signedness*
matters. Could really do with some help from someone with a bit more
clue about the details...

diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index ab95eb6..24c2bb1 100644
--- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -42,6 +42,7 @@ static unsigned getFixupKindLog2Size(unsigned Kind) {
   case FK_SecRel_1:
   case FK_Data_1: return 0;
   case FK_PCRel_2:
+ case X86::reloc_signed_2byte:
   case FK_SecRel_2:
   case FK_Data_2: return 1;
   case FK_PCRel_4:
@@ -88,6 +89,7 @@ public:
       { "reloc_riprel_4byte", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel },
       { "reloc_riprel_4byte_movq_load", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel},
       { "reloc_signed_4byte", 0, 4 * 8, 0},
+ { "reloc_signed_2byte", 0, 2 * 8, 0},
       { "reloc_global_offset_table", 0, 4 * 8, 0}
     };

diff --git a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
index 3ddd865..c1be5ba 100644
--- a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
@@ -89,6 +89,7 @@ unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target,
           break;
         }
         break;
+ case X86::reloc_signed_2byte:
       case FK_PCRel_2:
         assert(Modifier == MCSymbolRefExpr::VK_None);
         Type = ELF::R_X86_64_PC16;
@@ -146,6 +147,15 @@ unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target,
       case FK_Data_4:
         Type = ELF::R_X86_64_32;
         break;
+ case X86::reloc_signed_2byte:
+ switch (Modifier) {
+ default:
+ llvm_unreachable("Unimplemented");
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_X86_64_16;
+ break;
+ }
+ break;
       case FK_Data_2: Type = ELF::R_X86_64_16; break;
       case FK_PCRel_1:
       case FK_Data_1: Type = ELF::R_X86_64_8; break;
@@ -226,6 +236,7 @@ unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target,
           break;
         }
         break;
+ case X86::reloc_signed_2byte:
       case FK_Data_2: Type = ELF::R_386_16; break;
       case FK_PCRel_1:
       case FK_Data_1: Type = ELF::R_386_8; break;
diff --git a/lib/Target/X86/MCTargetDesc/X86FixupKinds.h b/lib/Target/X86/MCTargetDesc/X86FixupKinds.h
index f2e34cb..6ea3d70 100644
--- a/lib/Target/X86/MCTargetDesc/X86FixupKinds.h
+++ b/lib/Target/X86/MCTargetDesc/X86FixupKinds.h
@@ -20,6 +20,9 @@ enum Fixups {
   reloc_signed_4byte, // 32-bit signed. Unlike FK_Data_4
                                              // this will be sign extended at
                                              // runtime.
+ reloc_signed_2byte, // 16-bit signed. Unlike FK_Data_2
+ // this will be sign extended at
+ // runtime.
   reloc_global_offset_table, // 32-bit, relative to the start
                                              // of the instruction. Used only
                                              // for _GLOBAL_OFFSET_TABLE_.
diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 12a30cf..3f7ed26 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -321,7 +321,8 @@ EmitImmediate(const MCOperand &DispOp, SMLoc Loc, unsigned Size,
   // If we have an immoffset, add it to the expression.
   if ((FixupKind == FK_Data_4 ||
        FixupKind == FK_Data_8 ||
- FixupKind == MCFixupKind(X86::reloc_signed_4byte))) {
+ FixupKind == MCFixupKind(X86::reloc_signed_4byte) ||
+ FixupKind == MCFixupKind(X86::reloc_signed_2byte))) {
     GlobalOffsetTableExprKind Kind = StartsWithGlobalOffsetTable(Expr);
     if (Kind != GOT_None) {
       assert(ImmOffset == 0);
@@ -331,13 +332,13 @@ EmitImmediate(const MCOperand &DispOp, SMLoc Loc, unsigned Size,
         ImmOffset = CurByte;
     } else if (Expr->getKind() == MCExpr::SymbolRef) {
       if (HasSecRelSymbolRef(Expr)) {
- FixupKind = MCFixupKind(FK_SecRel_4);
+ FixupKind = MCFixupKind(Size == 2 ? FK_SecRel_2 : FK_SecRel_4);
       }
     } else if (Expr->getKind() == MCExpr::Binary) {
       const MCBinaryExpr *Bin = static_cast<const MCBinaryExpr*>(Expr);
       if (HasSecRelSymbolRef(Bin->getLHS())
           >> HasSecRelSymbolRef(Bin->getRHS())) {
- FixupKind = MCFixupKind(FK_SecRel_4);
+ FixupKind = MCFixupKind(Size == 2 ? FK_SecRel_2 : FK_SecRel_4);
       }
     }
   }
@@ -445,10 +446,9 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
       EmitByte(ModRMByte(0, RegOpcodeField, 6), CurByte, OS);
     }

- // FIXME: Yes we can!
- assert(Disp.isImm() && "cannot emit 16-bit relocation");
     // Emit 16-bit displacement for plain disp16 or [REG]+disp16 cases.
- EmitImmediate(Disp, MI.getLoc(), 2, FK_Data_2, CurByte, OS, Fixups);
+ EmitImmediate(Disp, MI.getLoc(), 2, MCFixupKind(X86::reloc_signed_2byte), CurByte, OS,
+ Fixups);
     return;
   }

diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
index 0f16621..8a9ac87 100644
--- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
@@ -86,6 +86,7 @@ static unsigned getFixupKindLog2Size(unsigned Kind) {
   case FK_PCRel_1:
   case FK_Data_1: return 0;
   case FK_PCRel_2:
+ case X86::reloc_signed_2byte:
   case FK_Data_2: return 1;
   case FK_PCRel_4:
     // FIXME: Remove these!!!
@@ -336,6 +337,9 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer,
         if (Kind == X86::reloc_signed_4byte)
           report_fatal_error("32-bit absolute addressing is not supported in "
                              "64-bit mode", false);
+ else if (Kind == X86::reloc_signed_2byte)
+ report_fatal_error("16-bit absolute addressing is not supported in "
+ "64-bit mode", false);
       }
     }
   }

Hi David,

I’m catching up on email at the moment so I don’t know if you’ve done this, but patches should go to llvm-commits for review if you wouldn’t mind.

Thanks!

-eric

I have done so. I've since worked out that the signed 16-bit relocation
is probably entirely pointless and I should just drop the assert() from
the first patch in the series, and drop the second patch entirely. And
I've gone a little further and have .code16 actually working for the
"test" case which provoked me into looking at it in the first place. But
since nobody's responded, it didn't seem worth spamming the list with
another series just yet. I'll update my git tree at
http://git.infradead.org/users/dwmw2/llvm.git

Sounds good. I’m pretty backlogged but I’ll try to get to it as soon as I can. I’ve poked Jim as well on the email to have multiple people on the hook. :slight_smile:

-eric