llc generated machine assembly code for NASM

Hello,

I am new here. This is my first post.
I spent about three days on trying out LLVM by developing a front-end which is able to generate LLVM IR for a subset of C language. I have not applied any optimization pass yet. On linux, it can go all the way through with the system native assemlber and linker to generate executables. And it runs. I ran into problems on Windows since the generated assembly code is not able to be assembled by NASM.

Here is an example C code:

int gv;
int foo(int p)
{
int lv;
gv = p;
if (p > 5) {
lv = 0;
while (gv>0) {
lv = lv + gv;
gv = gv - 1;
}
}
else {
lv = 1;
while (gv>0) {
lv = lv * gv;
gv = gv - 1;
}
}
return lv;
}

Here is the LLVM IR I generated:

; ModuleID = ‘my cool jit’
@gv = weak global i32 0 ; <i32*> [#uses=9]
define i32 @foo(i32 %p) {
entry:
%lv = alloca i32 ; <i32*> [#uses=8]
%p1 = alloca i32 ; <i32*> [#uses=3]
store i32 %p, i32* %p1
store i32 0, i32* %lv
%p2 = load i32* %p1 ; [#uses=1]
store i32 %p2, i32* @gv
%p3 = load i32* %p1 ; [#uses=1]
%0 = icmp ugt i32 %p3, 5 ; [#uses=1]
%1 = zext i1 %0 to i32 ; [#uses=1]
%2 = icmp ne i32 %1, 0 ; [#uses=1]
br i1 %2, label %then, label %else
then: ; preds = %entry
store i32 0, i32* %lv
br label %while
while: ; preds = %whilebody, %then
%gv = load i32* @gv ; [#uses=1]
%3 = icmp ugt i32 %gv, 0 ; [#uses=1]
%4 = zext i1 %3 to i32 ; [#uses=1]
%5 = icmp ne i32 %4, 0 ; [#uses=1]
br i1 %5, label %whilebody, label %whilecont
whilebody: ; preds = %while
%lv4 = load i32* %lv ; [#uses=1]
%gv5 = load i32* @gv ; [#uses=1]
%6 = add i32 %lv4, %gv5 ; [#uses=1]
store i32 %6, i32* %lv
%gv6 = load i32* @gv ; [#uses=1]
%7 = sub i32 %gv6, 1 ; [#uses=1]
store i32 %7, i32* @gv
br label %while
whilecont: ; preds = %while
br label %ifcont
else: ; preds = %entry
store i32 1, i32* %lv
br label %while7
while7: ; preds = %whilebody9, %else
%gv8 = load i32* @gv ; [#uses=1]
%8 = icmp ugt i32 %gv8, 0 ; [#uses=1]
%9 = zext i1 %8 to i32 ; [#uses=1]
%10 = icmp ne i32 %9, 0 ; [#uses=1]
br i1 %10, label %whilebody9, label %whilecont13
whilebody9: ; preds = %while7
%lv10 = load i32* %lv ; [#uses=1]
%gv11 = load i32* @gv ; [#uses=1]
%11 = mul i32 %lv10, %gv11 ; [#uses=1]
store i32 %11, i32* %lv
%gv12 = load i32* @gv ; [#uses=1]
%12 = sub i32 %gv12, 1 ; [#uses=1]
store i32 %12, i32* @gv
br label %while7
whilecont13: ; preds = %while7
br label %ifcont
ifcont: ; preds = %whilecont13, %whilecont
%lv14 = load i32* %lv ; [#uses=1]
ret i32 %lv14
}

And here is the assembly code I generated with “llc -march=x86 -x86-asm-sytax=intel filename”.
.686
.MMX
.XMM
.model flat
EXTERN _abort:near

.text
public _foo
ALIGN 16
_foo proc near
sub ESP, 8
$label1:
mov EAX, DWORD PTR [ESP + 12]
mov DWORD PTR [ESP], EAX
mov DWORD PTR [ESP + 4], 0
mov EAX, DWORD PTR [ESP]
mov DWORD PTR [_gv], EAX
cmp DWORD PTR [ESP], 6
jb $BB1_4
$BB1_1: ; %then
mov DWORD PTR [ESP + 4], 0
ALIGN 16
$BB1_2: ; %while
; Loop Depth 1
; Loop Header
; Inner Loop
cmp DWORD PTR [_gv], 0
je $BB1_7
$BB1_3: ; %whilebody
; Loop Depth 1
; Loop Header is BB1_2
; Inner Loop
mov EAX, DWORD PTR [_gv]
add DWORD PTR [ESP + 4], EAX
dec DWORD PTR [_gv]
jmp $BB1_2
$BB1_4: ; %else
mov DWORD PTR [ESP + 4], 1
jmp $BB1_6
ALIGN 16
$BB1_5: ; %whilebody9
; Loop Depth 1
; Loop Header is BB1_6
; Inner Loop
mov EAX, DWORD PTR [_gv]
imul EAX, DWORD PTR [ESP + 4]
mov DWORD PTR [ESP + 4], EAX
dec DWORD PTR [_gv]
$BB1_6: ; %while7
; Loop Depth 1
; Loop Header
; Inner Loop
cmp DWORD PTR [_gv], 0
jne $BB1_5
$BB1_7: ; %ifcont
mov EAX, DWORD PTR [ESP + 4]
add ESP, 8
ret
_foo endp
_gv? SEGEMNT PARA common ‘COMMON’
_gv: ; gv
db 4 dup(0)
_gv? ends
end

The above assembly code cannot be assembled by NASM. I got the following error message:

cg1.s:1: error: attempt to define a local label before any non-local labels
cg1.s:2: error: attempt to define a local label before any non-local labels
cg1.s:3: error: attempt to define a local label before any non-local labels
cg1.s:4: error: attempt to define a local label before any non-local labels
cg1.s:4: error: parser: instruction expected
cg1.s:6: error: binary format does not support any special symbol types
cg1.s:9: error: attempt to define a local label before any non-local labels
cg1.s:10: error: parser: instruction expected
cg1.s:12: error: parser: instruction expected
cg1.s:15: error: comma, colon or end of line expected
cg1.s:16: error: comma, colon or end of line expected
cg1.s:17: error: comma, colon or end of line expected
cg1.s:18: error: comma, colon or end of line expected
cg1.s:19: error: comma, colon or end of line expected
cg1.s:20: error: comma, colon or end of line expected
cg1.s:23: error: comma, colon or end of line expected
cg1.s:29: error: comma, colon or end of line expected
cg1.s:35: error: comma, colon or end of line expected
cg1.s:36: error: comma, colon or end of line expected
cg1.s:37: error: comma, colon or end of line expected
cg1.s:40: error: comma, colon or end of line expected
cg1.s:47: error: comma, colon or end of line expected
cg1.s:48: error: comma, colon or end of line expected
cg1.s:49: error: comma, colon or end of line expected
cg1.s:50: error: comma, colon or end of line expected
cg1.s:55: error: comma, colon or end of line expected
cg1.s:58: error: comma, colon or end of line expected
cg1.s:61: error: symbol _foo' redefined cg1.s:61: error: parser: instruction expected cg1.s:62: error: parser: instruction expected cg1.s:64: error: comma expected after operand 1 cg1.s:65: error: symbol _gv?’ redefined
cg1.s:65: error: parser: instruction expected

I am not sure whether there is any command line option that is required for generating NASM assembly code. Looked into the document and command line help. But I cannot find anything that helps. I would appreciate it if anyone could provide some hint.

BTW: The getting started document with Visual Studio looks very out-of-dated.

Thanks,

Bengu

Hello

system native assemlber and linker to generate executables. And it runs. I
ran into problems on Windows since the generated assembly code is not able
to be assembled by NASM.

That's correct. Use AT&T asm printer and GNU as everywhere. Everything
else is too weak to be usable and thus one should consider intel asm
printer to be of "listing quality".

Thanks for Anton’s reply.
Does anybody have an idea of roughly how much effort is required to make intel asm printer to be usable? If it is within weeks, probably I can give it a try.
Thanks,

Bengu

Hello

Does anybody have an idea of roughly how much effort is required to make
intel asm printer to be usable? If it is within weeks, probably I can give
it a try.

The required efforts equal to ones required to write new assembler.
"Too weak to be usable" means "it's not possible to represent many
important constructs with masm/nasm/fasm".

Wow. It's perhaps too much of a distraction, but I'm curious about the details of this. It's probably because it's first mission is to be a compiler back-end, but the common wisdom is that gas is bare-bones and masm is featureful. Clearly that doesn't hold for what LLVM needs, and you imply it's true for the whole family of assemblers that use Intel syntax. Very surprising to those of us (well, me anyway) with minimal time writing actual assembly.

Is any of this documented somewhere I can just go off and read, or is it just lore in the LLVM community?

Dustin

Could you give me some examples of such constructs that are not possible to represent with MASM/NASM/FASM but are able to represent with gas?

Thanks,

Bengu

Could you give me some examples of such constructs that are not possible to
represent with MASM/NASM/FASM but are able to represent with gas?

weak / linkonce stuff are easy example. There are alot of others,
which I don't recall.

Wow. It's perhaps too much of a distraction, but I'm curious about the
details of this. It's probably because it's first mission is to be a
compiler back-end, but the common wisdom is that gas is bare-bones and
masm is featureful.

Think about one thing: vcpp emits object file directly, not via masm.
The generated listings (via /Fa option) cannot be assembled, in
general.

Is any of this documented somewhere I can just go off and read, or is it
just lore in the LLVM community?

At some time (say, LLVM 1.7-1.8 dates) it was a hope, that intel
asmprinter will be as usable as gnu as. Unfortunately, it turned out
that this is not possible.
Pick any non-trivial C++ app and try to convert the output to masm /
nasm and you'll get an idea (linkonce / weak stuff is an example, but
not the last problem).

The long term plans are to finish the integrated assembler. We currently have active work to finish the assembler for Macho X86 32/64 targets. I suspect that ELF will follow shortly after that, though I don't know who exactly will do the work. Adding PECOFF support should not be that hard, and would obviate the dependence on a third-party assembler.

MASM is not a production quality assembler IMO, which is why Microsoft doesn't use it with VC++. I don't know whether nasm or fasm are, but I think it's more productive to work on the integrated assembler (again, once the macho support is complete and reliable) than to add nasm/fasm support.

-Chris

I have one thing not clear to me. If the llvm diect generate object
code. Then how about the llvm assembler processing llvm code with
inline assembly? Indeed, it's seems now llvm is direct using gas
syntax in the assembly code. And because it's generate .S files, so it
can be manipulated by binutils. But there is one day, no binutils any
further, how to deal with these inline assembly. Generally, there is
two major inline assembly at all, one is vc++ style, and one is gcc
style. Then llvm mc will support which one or both or it's own inline
assembly?

It will include a full assembly parser. Initially it will support GAS style assembly, but should be extensible to support intel style if anyone is interested. llvm-gcc already supports microsoft/codewarrior style inline asm by translating it (in the frontend) to gcc-style inline asm.

-Chris

Hi, Anton,

For a simple example, I took the machine assembly code generated by llc and with a little manual change on the assebmly code (the way how llc generates .text segment is not accepted by MASM), it is able to be assembled and linked by MASM. And it runs. Haven’t tried it on NASM yet.

I am sure that you’ve run into many problems when efforts were carried out to implement the Intel ASM printer between version 1.6 and 1.7. I’d appreciated it if you could point me to any document with regard to relavent problems at that time. Probably I don’t need a fully functional production quality assembler for my purpose. If I could find out that the gap between the current llc and what I need is not huge, I would like to enhance the llc Intel ASM printer by myself.

Thanks,
Bengu

Hello

I am sure that you've run into many problems when efforts were carried out
to implement the Intel ASM printer between version 1.6 and 1.7. I'd
appreciated it if you could point me to any document with regard to relavent
problems at that time.

I don't have such document.
Just grab some non-trivial c++ app (with several source files) and try
to generate an assembler.

I am sure that you've run into many problems when efforts were carried out
to implement the Intel ASM printer between version 1.6 and 1.7. I'd
appreciated it if you could point me to any document with regard to relavent
problems at that time.

I don't have such document.
Just grab some non-trivial c++ app (with several source files) and try
to generate an assembler.

And I should say that trivial examples indeed worked in the past.
Something larger than "hello world" - not.

Yes, my example is very simple but definitely more complex than “hello, world”. I will try more complex examples to find out what is missing.

FYI: With some slight manual modification, it also works for NASM.

Bengu