Unwanted push/pop on Cortex-M.

Hi,

I have this code:

void platform_putchar(int, char);
void out_char( char ch );

void out_char( char ch )
{
platform_putchar (0, ch);
}

I’m compiling with the following clang invocation:

$ /usr/local/vendor/toolchains/llvm/3.3/armv7m/bin/armv7m-none-eabi-clang -mcpu=cortex-m4 -mfloat-abi=soft -mthumb -nostdinc -ffreestanding -ffunction-sections -fdata-sections -fno-exceptions -fno-rtti -fomit-frame-pointer -momit-leaf-frame-pointer -nostdinc -v -Os -S -o a.s a.c
vendor-clang version 3.3 (tags/RELEASE_33/final) (based on LLVM 3.3)
Target: armv7m-none–eabi
Thread model: posix
“/usr/local/vendor-20130805-b8d59d2/toolchains/llvm/3.3/armv7m/bin/armv7m-none-eabi-clang” -cc1 -triple thumbv7em-none–eabi -S -disable-free -main-file-name a.c -mrelocation-model static -fmath-errno -mconstructor-aliases -target-abi aapcs -target-cpu cortex-m4 -msoft-float -mfloat-abi soft -target-feature +soft-float -target-feature +soft-float-abi -target-feature -neon -target-linker-version 2.22 -momit-leaf-frame-pointer -v -ffunction-sections -fdata-sections -coverage-file /tmp/a.s -nostdsysteminc -nobuiltininc -resource-dir /usr/local/vendor-20130805-b8d59d2/toolchains/llvm/3.3/armv7m/bin/…/lib/clang/3.3 -Os -fno-dwarf-directory-asm -fdebug-compilation-dir /tmp -ferror-limit 19 -fmessage-length 207 -ffreestanding -mstackrealign -fno-rtti -fno-signed-char -fobjc-runtime=gcc -fobjc-default-synthesize-properties -fdiagnostics-show-option -fcolor-diagnostics -backend-option -vectorize-loops -o a.s -x c a.c
clang -cc1 version 3.3 based upon LLVM 3.3 default target armv7m-none-eabi
#include “…” search starts here:
End of search list.

$ cat a.s
.syntax unified
.eabi_attribute 6, 10
.eabi_attribute 9, 2
.eabi_attribute 10, 5
.fpu vfpv4
.eabi_attribute 20, 1
.eabi_attribute 21, 1
.eabi_attribute 23, 3
.eabi_attribute 24, 1
.eabi_attribute 25, 1
.eabi_attribute 44, 1
.file “a.c”
.section .text.out_char,“ax”,%progbits
.globl out_char
.align 2
.type out_char,%function
.code 16
.thumb_func
out_char:
push.w {r11, lr}
mov r1, r0
movs r0, #0
bl platform_putchar
pop.w {r11, pc}
.Ltmp0:
.size out_char, .Ltmp0-out_char

The one question I have is:
why can’t this out_char function be compiled into:

out_char:
mov r1, r0
mov r0, #0
bl platform_putchar
bx lr

What’s the clang/llvm module responsible for generating the prologue and epilogue for this function?

I looked into CodeGen/PrologEpilogInserter, ARMFrameLowering.cpp Thumb1FrameLowering.cpp but it’s not 100% clear how the code gen decides what the stack frame look like.

Here’s the -emit-llvm output

target datalayout = “e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:32-n32-S64”
target triple = “thumbv7em-none–eabi”

; Function Attrs: nounwind optsize
define void @out_char(char)(i8 zeroext %ch) #0 {
entry:
tail call void @llvm.dbg.value(metadata !{i8 %ch}, i64 0, metadata !10), !dbg !11
tail call void @platform_putchar(int, char)(i32 0, i8 zeroext %ch) #3, !dbg !12
ret void, !dbg !13
}

; Function Attrs: optsize
declare void @platform_putchar(int, char)(i32, i8 zeroext) #1

; Function Attrs: nounwind readnone
declare void @llvm.dbg.value(metadata, i64, metadata) #2

attributes #0 = { nounwind optsize “less-precise-fpmad”=“false” “no-frame-pointer-elim”=“false” “no-frame-pointer-elim-non-leaf”=“false” “no-infs-fp-math”=“false” “no-nans-fp-math”=“false” “unsafe-fp-math”=“false” “use-soft-float”=“true” }
attributes #1 = { optsize “less-precise-fpmad”=“false” “no-frame-pointer-elim”=“false” “no-frame-pointer-elim-non-leaf”=“false” “no-infs-fp-math”=“false” “no-nans-fp-math”=“false” “unsafe-fp-math”=“false” “use-soft-float”=“true” }
attributes #2 = { nounwind readnone }
attributes #3 = { nobuiltin nounwind optsize }

llvm.dbg.cu = !{!0}

!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !“nestlabs-clang version 3.3 (tags/RELEASE_33/final) (based on LLVM 3.3)”, i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/a.c] [DW_LANG_C_plus_plus]
!1 = metadata !{metadata !“a.c”, metadata !"/tmp"}
!2 = metadata !{i32 0}
!3 = metadata !{metadata !4}
!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !“out_char”, metadata !“out_char”, metadata !“out_char(char)”, i32 6, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i8)* @out_char(char), null, null, metadata !9, i32 7} ; [ DW_TAG_subprogram ] [line 6] [def] [scope 7] [out_char]
!5 = metadata !{i32 786473, metadata !1} ; [ DW_TAG_file_type ] [/tmp/a.c]
!6 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
!7 = metadata !{null, metadata !8}
!8 = metadata !{i32 786468, null, null, metadata !“char”, i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_unsigned_char]
!9 = metadata !{metadata !10}
!10 = metadata !{i32 786689, metadata !4, metadata !“ch”, metadata !5, i32 16777222, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [ch] [line 6]
!11 = metadata !{i32 6, i32 0, metadata !4, null}
!12 = metadata !{i32 8, i32 0, metadata !4, null}
!13 = metadata !{i32 9, i32 0, metadata !4, null}

Thanks

Andrea.

Hi Andrea,

That is because the LR is the fixed register as per the
http://infocenter.arm.com/help/topic/com.arm.doc.ihi0042e/IHI0042E_aapcs.pdf
and out_char() function is not the leaf function ,Hence compiler
tends to save and restore the LR and the save and restore of
register r11 is to align stack for 8 bytes as per ARM EABI.

Thanks

~Umesh

Umesh,

Makes some sort of sense to me, OTOH:If instead of choosing r11 as a “dummy” to align the stack we had chosen some other register in the range r0-r7 then we could have emitted the PUSH encoding T1 (2 bytes opcode) as opposed to the encoding T2 (which is a 4 bytes opcode).

A

Hi andrea,
R11 treated as frame pointer at arm backend , which is fixed again .

Thanks
Umesh

Umesh,

From Target/ARM/ARMRegisterInfo.td, it looks like FP for Thumb should be r7 instead of r11.

// Register classes.
//
// pc == Program Counter
// lr == Link Register
// sp == Stack Pointer
// r12 == ip (scratch)
// r7 == Frame Pointer (thumb-style backtraces)
// r9 == May be reserved as Thread Register
// r11 == Frame Pointer (arm-style backtraces)
// r10 == Stack Limit

Regardless, I’m compiling with -fomit-frame-pointer, so it should not really matter, right?

Andrea