I am beginner in LLVM Backend. To learn how to write LLVM Backend, I am writing simple instruction selection patterns. I found some instructions from here.
def rvrs : Instruction {//REV8_RV64
bits<32> Inst;
bits<32> SoftFail = 0;
bits<5> rs1;
bits<5> rd;
let Namespace = "RISCV";
let hasSideEffects = 0;
let mayLoad = 0;
let mayStore = 0;
let Size = 4;
let Inst{31-25} = 0b0110101; /*funct7*/ //I will change this and funct3
let Inst{24-20} = 0b11000; /**/
let Inst{19-15} = rs1; /*rs1*/
let Inst{14-12} = 0b101; /*funct3*/
let Inst{11-7} = rd;
let Inst{6-0} = 0b0000000; /*opcode*/
dag OutOperandList = (outs GPR:$rd);
dag InOperandList = (ins GPR:$rs1);
let AsmString = "rvrs\t$rd, $rs1";
}
def : Pat< //pattern for rvrs
(or
( or
(srl GPR:$src1, (i32 24)),
(srl
(and GPR:$src1, (i32 0xff0000)), (i32 8)
)
),
( or
(and
(shl GPR:$src1, (i32 8)), (i32 0xff0000)
),
(shl GPR:$src1, (i32 24))
)
),
(rvrs GPR:$src1)
>;
These are the instruction definition and selectionDAG code that I wrote in RISCVInstrInfo.td
. My aim is to support REV8_RV64
instruction in the website linked above.
unsigned int rvrs_fun(unsigned int rs1){
return (rs1 >> 24) |
(( 0x00ff0000 & rs1) >> 8) |
( 0x00ff0000 & (rs1 << 8)) |
(rs1 << 24);
}
This is my C code. I compiled this code by following two instructions sequantially.
/home/llvm/Downloads/clang+llvm-14.0.0-x86_64-linux-gnu-ubuntu-18.04/bin/clang-14 -target riscv32-linux-gnu -S -emit-llvm -g rvrs_fun.c
/home/llvm/llvm-project/RISCV_Backend/bin/llc -mtriple=riscv32 rvrs_fun.ll
However, I couldn’t see the result as I expected. The new instruction is not added instead of the others.
The output assembly is below.
.attribute 4, 16
.attribute 5, "rv32i2p0"
.file "rvrs_fun.c"
.globl rvrs_fun # -- Begin function rvrs_fun
.p2align 1
.type rvrs_fun,@function
rvrs_fun: # @rvrs_fun
.Lfunc_begin0:
.file 0 "/home/llvm/LLVM-CustomInstr" "rvrs_fun.c" md5 0xf2eeb2a31d03a1070d759bc607d31ea9
.loc 0 17 0 # rvrs_fun.c:17:0
.cfi_sections .debug_frame
.cfi_startproc
# %bb.0:
addi sp, sp, -16
.cfi_def_cfa_offset 16
sw ra, 12(sp) # 4-byte Folded Spill
sw s0, 8(sp) # 4-byte Folded Spill
.cfi_offset ra, -4
.cfi_offset s0, -8
addi s0, sp, 16
.cfi_def_cfa s0, 0
sw a0, -12(s0)
.Ltmp0:
.loc 0 18 14 prologue_end # rvrs_fun.c:18:14
lw a0, -12(s0)
.loc 0 18 18 is_stmt 0 # rvrs_fun.c:18:18
srli a1, a0, 24
lui a2, 4080
.loc 0 19 27 is_stmt 1 # rvrs_fun.c:19:27
and a3, a0, a2
.loc 0 19 34 is_stmt 0 # rvrs_fun.c:19:34
srli a3, a3, 8
.loc 0 18 25 is_stmt 1 # rvrs_fun.c:18:25
or a1, a1, a3
.loc 0 20 33 # rvrs_fun.c:20:33
slli a3, a0, 8
.loc 0 20 26 is_stmt 0 # rvrs_fun.c:20:26
and a2, a2, a3
.loc 0 19 40 is_stmt 1 # rvrs_fun.c:19:40
or a1, a1, a2
.loc 0 21 18 # rvrs_fun.c:21:18
slli a0, a0, 24
.loc 0 20 40 # rvrs_fun.c:20:40
or a0, a0, a1
.loc 0 18 5 # rvrs_fun.c:18:5
lw ra, 12(sp) # 4-byte Folded Reload
lw s0, 8(sp) # 4-byte Folded Reload
addi sp, sp, 16
ret
.Ltmp1:
.Lfunc_end0:
.size rvrs_fun, .Lfunc_end0-rvrs_fun
.cfi_endproc
# -- End function
.section .debug_abbrev,"",@progbits
.byte 1 # Abbreviation Code
.byte 17 # DW_TAG_compile_unit
.byte 1 # DW_CHILDREN_yes
.byte 37 # DW_AT_producer
.byte 37 # DW_FORM_strx1
.byte 19 # DW_AT_language
.byte 5 # DW_FORM_data2
.byte 3 # DW_AT_name
.byte 37 # DW_FORM_strx1
.byte 114 # DW_AT_str_offsets_base
.byte 23 # DW_FORM_sec_offset
.byte 16 # DW_AT_stmt_list
.byte 23 # DW_FORM_sec_offset
.byte 27 # DW_AT_comp_dir
.byte 37 # DW_FORM_strx1
.byte 17 # DW_AT_low_pc
.byte 27 # DW_FORM_addrx
.byte 18 # DW_AT_high_pc
.byte 6 # DW_FORM_data4
.byte 115 # DW_AT_addr_base
.byte 23 # DW_FORM_sec_offset
.byte 0 # EOM(1)
.byte 0 # EOM(2)
.byte 2 # Abbreviation Code
.byte 46 # DW_TAG_subprogram
.byte 1 # DW_CHILDREN_yes
.byte 17 # DW_AT_low_pc
.byte 27 # DW_FORM_addrx
.byte 18 # DW_AT_high_pc
.byte 6 # DW_FORM_data4
.byte 64 # DW_AT_frame_base
.byte 24 # DW_FORM_exprloc
.byte 3 # DW_AT_name
.byte 37 # DW_FORM_strx1
.byte 58 # DW_AT_decl_file
.byte 11 # DW_FORM_data1
.byte 59 # DW_AT_decl_line
.byte 11 # DW_FORM_data1
.byte 39 # DW_AT_prototyped
.byte 25 # DW_FORM_flag_present
.byte 73 # DW_AT_type
.byte 19 # DW_FORM_ref4
.byte 63 # DW_AT_external
.byte 25 # DW_FORM_flag_present
.byte 0 # EOM(1)
.byte 0 # EOM(2)
.byte 3 # Abbreviation Code
.byte 5 # DW_TAG_formal_parameter
.byte 0 # DW_CHILDREN_no
.byte 2 # DW_AT_location
.byte 24 # DW_FORM_exprloc
.byte 3 # DW_AT_name
.byte 37 # DW_FORM_strx1
.byte 58 # DW_AT_decl_file
.byte 11 # DW_FORM_data1
.byte 59 # DW_AT_decl_line
.byte 11 # DW_FORM_data1
.byte 73 # DW_AT_type
.byte 19 # DW_FORM_ref4
.byte 0 # EOM(1)
.byte 0 # EOM(2)
.byte 4 # Abbreviation Code
.byte 36 # DW_TAG_base_type
.byte 0 # DW_CHILDREN_no
.byte 3 # DW_AT_name
.byte 37 # DW_FORM_strx1
.byte 62 # DW_AT_encoding
.byte 11 # DW_FORM_data1
.byte 11 # DW_AT_byte_size
.byte 11 # DW_FORM_data1
.byte 0 # EOM(1)
.byte 0 # EOM(2)
.byte 0 # EOM(3)
.section .debug_info,"",@progbits
.Lcu_begin0:
.word .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
.Ldebug_info_start0:
.half 5 # DWARF version number
.byte 1 # DWARF Unit Type
.byte 4 # Address Size (in bytes)
.word .debug_abbrev # Offset Into Abbrev. Section
.byte 1 # Abbrev [1] 0xc:0x37 DW_TAG_compile_unit
.byte 0 # DW_AT_producer
.half 12 # DW_AT_language
.byte 1 # DW_AT_name
.word .Lstr_offsets_base0 # DW_AT_str_offsets_base
.word .Lline_table_start0 # DW_AT_stmt_list
.byte 2 # DW_AT_comp_dir
.byte 0 # DW_AT_low_pc
.word .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc
.word .Laddr_table_base0 # DW_AT_addr_base
.byte 2 # Abbrev [2] 0x23:0x1b DW_TAG_subprogram
.byte 0 # DW_AT_low_pc
.word .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc
.byte 1 # DW_AT_frame_base
.byte 88
.byte 3 # DW_AT_name
.byte 0 # DW_AT_decl_file
.byte 17 # DW_AT_decl_line
# DW_AT_prototyped
.word 62 # DW_AT_type
# DW_AT_external
.byte 3 # Abbrev [3] 0x32:0xb DW_TAG_formal_parameter
.byte 2 # DW_AT_location
.byte 145
.byte 116
.byte 5 # DW_AT_name
.byte 0 # DW_AT_decl_file
.byte 17 # DW_AT_decl_line
.word 62 # DW_AT_type
.byte 0 # End Of Children Mark
.byte 4 # Abbrev [4] 0x3e:0x4 DW_TAG_base_type
.byte 4 # DW_AT_name
.byte 7 # DW_AT_encoding
.byte 4 # DW_AT_byte_size
.byte 0 # End Of Children Mark
.Ldebug_info_end0:
.section .debug_str_offsets,"",@progbits
.word 28 # Length of String Offsets Set
.half 5
.half 0
.Lstr_offsets_base0:
.section .debug_str,"MS",@progbits,1
.Linfo_string0:
.asciz "clang version 14.0.0" # string offset=0
.Linfo_string1:
.asciz "rvrs_fun.c" # string offset=21
.Linfo_string2:
.asciz "/home/llvm/LLVM-CustomInstr" # string offset=32
.Linfo_string3:
.asciz "rvrs_fun" # string offset=60
.Linfo_string4:
.asciz "unsigned int" # string offset=69
.Linfo_string5:
.asciz "rs1" # string offset=82
.section .debug_str_offsets,"",@progbits
.word .Linfo_string0
.word .Linfo_string1
.word .Linfo_string2
.word .Linfo_string3
.word .Linfo_string4
.word .Linfo_string5
.section .debug_addr,"",@progbits
.word .Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution
.Ldebug_addr_start0:
.half 5 # DWARF version number
.byte 4 # Address size
.byte 0 # Segment selector size
.Laddr_table_base0:
.word .Lfunc_begin0
.Ldebug_addr_end0:
.ident "clang version 14.0.0"
.section ".note.GNU-stack","",@progbits
.section .debug_line,"",@progbits
.Lline_table_start0:
I also tried changing the value in the selection dag from 0x00ff0000
to 0xff0
because I don’t know whether the value is taken after converted as in assembly or not.
I searched the REV8_RV64
keyword in files to see the original selectionDAG but it calls another function called bswap
. I couldn’t follow functions in different files to see how it is implemented.
Could you help me to learn the problem and solution?
If the selectionDAG and instruction definitions are somewhere, could you help me to find them?