bpf compilation using clang

Hi all,

I am trying to insert instructions into the bpf using the bpf syscall,
the instructions were generated using the following command line:

clang -I ~/Builds/bpf_rss/iproute2/include -Wall -target bpf -O2
-emit-llvm -c upstream/qemu/hw/net/rss_tap_bpf_program.c -o - | llc
-march=bpf -filetype=obj -o tap_bpf_program.o

and then were translated to bpf instructions using the BPFCparser tool

Every time I try to insert the array of instructions the verfier fails
with the following error:

back-edge from insn 363 to 364

or something similar even though the compilation succeeds.

I have multiple maps in my code and I translate only the code section
to bpf insns structure and insert it using the bpf syscall as follows:

    memset(&attr, 0, sizeof(attr));
    attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
    attr.insn_cnt = ARRAY_SIZE(l3_l4_hash_insns);
    attr.insns = (__u64) (unsigned long) (l3_l4_hash_insns);
    attr.license = (__u64) (unsigned long) ("Dual BSD/GPL");
    attr.log_buf = (__u64) (unsigned long) (buffer);
    attr.log_level = 7;
    attr.log_size = BUFF_LEN;
    attr.kern_version = 0;

    ret = sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr));

I am not sure how to debug this error since the instructions are in
binary and the precompiled source code doesn't seem to contain any
weird loops or goto instructions...

Is there a way to identify which line of source code is causing these errors?

Thanks!

ping?
Who is maintaining bpf compilation

Ping.

I am not sure how to debug this error since the instructions are in
binary and the precompiled source code doesn't seem to contain any
weird loops or goto instructions...

Is there a way to identify which line of source code is causing these errors?

First step would be to look at the assembly and try to work out what
instructions 363 and 364 are. If you add "-S" to the clang
command-line it should get you some assembly. With luck the problem
will be around line "363 + header rubbish" and there will be an
obvious pair of instructions where the earlier one refers to the
result of a later one; with even more luck it'll be easy to map those
instructions back to your source (though it's probably an LLVM bug to
produce them in the first place).

If you send files reproducing the issue here people will have more
information to give you advice.

and then were translated to bpf instructions using the BPFCparser tool

You seem to be the only person on the internet to have mentioned this
tool. If it's mangling the instruction stream it's also a candidate
for introducing bugs.

Cheers.

Tim.

> I am not sure how to debug this error since the instructions are in
> binary and the precompiled source code doesn't seem to contain any
> weird loops or goto instructions...
>
> Is there a way to identify which line of source code is causing these errors?

First step would be to look at the assembly and try to work out what
instructions 363 and 364 are. If you add "-S" to the clang
command-line it should get you some assembly. With luck the problem
will be around line "363 + header rubbish" and there will be an
obvious pair of instructions where the earlier one refers to the
result of a later one; with even more luck it'll be easy to map those
instructions back to your source (though it's probably an LLVM bug to
produce them in the first place).

If you send files reproducing the issue here people will have more
information to give you advice.

> and then were translated to bpf instructions using the BPFCparser tool

You seem to be the only person on the internet to have mentioned this
tool. If it's mangling the instruction stream it's also a candidate
for introducing bugs.

It's a tool that I created for translating object files to
instructions. It already uses the -S flag
I couldn't find any other tool which does this!

* I have reworked he source code a bit since my last email and now the error is:
back-edge from insn 35 to 18

* The compilation command line:
clang -I ~/Builds/bpf_rss/iproute2/include -Wall -Wno-unused-value
-Wno-pointer-sign -Wno-compare-distinct-pointer-types
-Wno-gnu-variable-sized-type-not-at-end -Wno-tautological-compare
-Wno-unknown-warning-option -Wno-address-of-packed-member -target bpf
-O2 -emit-llvm -c upstream/qemu/hw/net/rss_tap_bpf_program.c -o - |
llc -march=bpf -filetype=obj -o tap_bpf_program.o

* Here you can find the source code:

/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
* Copyright 2017 Mellanox Technologies, Ltd
*/

#include <stdint.h>
#include <stddef.h>
#include <stdbool.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <asm/types.h>
#include <linux/in.h>
#include <linux/if.h>
#include <linux/if_ether.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/if_tunnel.h>
#include <linux/filter.h>
#include <linux/bpf.h>

#include "tap_rss.h"
#include "bpf_api.h"
#include "rss_bpf_api.h"

/** Create IPv4 address */
#define IPv4(a, b, c, d) ((__u32)(((a) & 0xff) << 24) | \
                (((b) & 0xff) << 16) | \
                (((c) & 0xff) << 8) | \
                ((d) & 0xff))

#define PORT(a, b) ((__u16)(((a) & 0xff) << 8) | \
                ((b) & 0xff))

/*
* The queue number is offset by a unique QUEUE_OFFSET, to distinguish
* packets that have gone through this rule (skb->cb[1] != 0) from others.
*/
#define PIN_GLOBAL_NS 2

#define KEY_IDX 0
#define BPF_MAP_ID_KEY 1

struct vlan_hdr {
        __be16 h_vlan_TCI;
        __be16 h_vlan_encapsulated_proto;
};

struct virtio_net_hdr_rss {
    __u32 rss_hash_function;
    __u32 hash_function_flags;
    uint8_t rss_hash_key[40];
    __u32 rss_indirection_table_length;
    uint8_t rss_indirection_table[128];
};

struct bpf_elf_map __attribute__((section("maps"), used))
map_rss = {
        .type = BPF_MAP_TYPE_ARRAY,
        .id = BPF_MAP_ID_KEY,
        .size_key = sizeof(__u32),
        .size_value = sizeof(struct virtio_net_hdr_rss),
        .max_elem = 1,
        .pinning = PIN_GLOBAL_NS,

};

struct ipv4_l3_l4_tuple {
        __u32 src_addr;
        __u32 dst_addr;
        __u16 dport;
        __u16 sport;
} __attribute__((packed));

struct ipv6_l3_l4_tuple {
        __u8 src_addr[16];
        __u8 dst_addr[16];
        __u16 dport;
        __u16 sport;
} __attribute__((packed));

static const __u8 def_rss_key[] = {
        0xd1, 0x81, 0xc6, 0x2c,
        0xf7, 0xf4, 0xdb, 0x5b,
        0x19, 0x83, 0xa2, 0xfc,
        0x94, 0x3e, 0x1a, 0xdb,
        0xd9, 0x38, 0x9e, 0x6b,
        0xd1, 0x03, 0x9c, 0x2c,
        0xa7, 0x44, 0x99, 0xad,
        0x59, 0x3d, 0x56, 0xd9,
        0xf3, 0x25, 0x3c, 0x06,
        0x2a, 0xdc, 0x1f, 0xfc,
};

static __u32 __attribute__((always_inline))
rte_softrss_be(const __u32 *input_tuple, const uint8_t *rss_key,
                __u8 input_len)
{
        __u32 i, j, hash = 0;
#pragma unroll
        for (j = 0; j < input_len; j++) {
#pragma unroll
                for (i = 0; i < 32; i++) {
                        if (input_tuple[j] & (1 << (31 - i))) {
                                hash ^= ((const __u32 *)rss_key)[j] << i |
                                (__u32)((uint64_t)
                                (((const __u32 *)rss_key)[j + 1])
                                        >> (32 - i));
                        }
                }
        }
        return hash;
}

static int __attribute__((always_inline))
rss_l3_l4(struct __sk_buff *skb)
{
        __u64 proto = load_half(skb, 12);
        __u64 nhoff = ETH_HLEN;
        __u32 key_idx = 0xdeadbeef;
        __u32 hash = 0;
        struct virtio_net_hdr_rss * rss_conf;
        struct rss_key *rsskey;
        int j = 0;
        __u8 *key = 0;
        __u32 len = 0;
        __u32 queue = 0;
        __u32 q = 0;

        rss_conf = (struct virtio_net_hdr_rss *)
map_lookup_elem(&map_rss, &key_idx);
        if (!rss_conf) {
                printt("hash(): rss key is not configured\n");
                return -2;
        }
        key = (__u8 *)rss_conf->rss_hash_key;

        if (proto == ETH_P_8021AD) {
                proto = load_half(skb, nhoff + offsetof(struct vlan_hdr,

h_vlan_encapsulated_proto));
                nhoff += sizeof(struct vlan_hdr);
        }

        if (proto == ETH_P_8021Q) {
                proto = load_half(skb, nhoff + offsetof(struct vlan_hdr,

h_vlan_encapsulated_proto));
                nhoff += sizeof(struct vlan_hdr);
        }

        if (likely(proto == ETH_P_IP)) {
                struct ipv4_l3_l4_tuple v4_tuple = {
                        .src_addr = IPv4(load_byte(skb, nhoff +
offsetof(struct iphdr, saddr)),
                                         load_byte(skb, nhoff +
offsetof(struct iphdr, saddr) + 1),
                                         load_byte(skb, nhoff +
offsetof(struct iphdr, saddr) + 2),
                                         load_byte(skb, nhoff +
offsetof(struct iphdr, saddr) + 3)),
                        .dst_addr = IPv4(load_byte(skb, nhoff +
offsetof(struct iphdr, daddr)),
                                         load_byte(skb, nhoff +
offsetof(struct iphdr, daddr) + 1),
                                         load_byte(skb, nhoff +
offsetof(struct iphdr, daddr) + 2),
                                         load_byte(skb, nhoff +
offsetof(struct iphdr, daddr) + 3)),
                        .sport = PORT(load_byte(skb, nhoff +
sizeof(struct iphdr)),
                                      load_byte(skb, nhoff +
sizeof(struct iphdr) + 1)),
                        .dport = PORT(load_byte(skb, nhoff +
sizeof(struct iphdr) + 2),
                                      load_byte(skb, nhoff +
sizeof(struct iphdr) + 3))
                };
                __u8 input_len = sizeof(v4_tuple) / sizeof(__u32);
                if (rss_conf->hash_function_flags & (1 << HASH_FIELD_IPV4_L3))
                        input_len--;
                hash = rte_softrss_be((__u32 *)&v4_tuple, key, 3);
        } else if (proto == htons(ETH_P_IPV6)) {
                struct ipv6_l3_l4_tuple v6_tuple;
                for (j = 0; j < 4; j++)
                        *((uint32_t *)&v6_tuple.src_addr + j) =
                                load_word(skb, nhoff + offsetof(struct
ipv6hdr, saddr) + j);
                for (j = 0; j < 4; j++)
                        *((uint32_t *)&v6_tuple.dst_addr + j) =
                                load_word(skb, nhoff + offsetof(struct
ipv6hdr, daddr) + j);
                v6_tuple.sport = PORT(load_byte(skb, nhoff +
sizeof(struct ipv6hdr)),
                                      load_byte(skb, nhoff +
sizeof(struct ipv6hdr) + 1));
                v6_tuple.dport = PORT(load_byte(skb, nhoff +
sizeof(struct ipv6hdr) + 2),
                                      load_byte(skb, nhoff +
sizeof(struct ipv6hdr) + 3));

                __u8 input_len = sizeof(v6_tuple) / sizeof(__u32);
                if (rss_conf->hash_function_flags & (1 << HASH_FIELD_IPV6_L3))
                        input_len--;
                hash = rte_softrss_be((__u32 *)&v6_tuple, key, 9);
        } else {
                return -1;
        }

        queue = rsskey->queues[(hash % rsskey->nb_queues) &
                                       (TAP_MAX_QUEUES - 1)];
        printt("queue: 0x%x hash: 0x%x\n" ,queue, hash);
        return queue;
}

#define RSS(L) \
        __section(#L) int \
                L ## _hash(struct __sk_buff *skb) \
        { \
                return rss_ ## L (skb); \
        }

RSS(l3_l4)

BPF_LICENSE("Dual BSD/GPL");

I'm afraid I don't have the headers to compile it myself, and couldn't
track them down easily. The preprocessed output (with -E as well)
would be more useful (as an attachment).

At the moment I can't really link that loop back to anything in the
source. It seems to have an iteration count of 40 which doesn't match
with anything I can see.

Cheers.

Tim.

The headers can be found here:
iproute2$ git remote -v
origin https://git.kernel.org/pub/scm/network/iproute2/iproute2.git/ (fetch)
origin https://git.kernel.org/pub/scm/network/iproute2/iproute2.git/ (push)

I have more headers which I have created. I can push this to github if
needed and you'll have all of the dependencies.

iteration count of 40 is the max key size.

I have mistakenly sent the wrong source code. This is the up-to-date one:

#include <stdint.h>
#include <stddef.h>
#include <stdbool.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <asm/types.h>
#include <linux/in.h>
#include <linux/if.h>
#include <linux/if_ether.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/if_tunnel.h>
#include <linux/filter.h>
#include <linux/bpf.h>

#include "bpf_api.h"
#include "rss_tap_bpf.h"

/** Create IPv4 address */
#define IPv4(a, b, c, d) ((__u32)(((a) & 0xff) << 24) | \
    (((b) & 0xff) << 16) | \
    (((c) & 0xff) << 8) | \
    ((d) & 0xff))

#define PORT(a, b) ((__u16)(((a) & 0xff) << 8) | \
    ((b) & 0xff))

#define KEY_IDX 0

struct vlan_hdr {
  __be16 h_vlan_TCI;
  __be16 h_vlan_encapsulated_proto;
};

//struct bpf_elf_map __attribute__((section("maps"), used))
struct bpf_elf_map __attribute__((section("maps"), used)) map_rss = {
  .type = BPF_MAP_TYPE_ARRAY,
  .size_key = sizeof(__u32),
  .size_value = sizeof(struct rss_key),
  .max_elem = 1,
};

//struct bpf_elf_map __attribute__((section("maps"), used))
struct bpf_elf_map __attribute__((section("maps"), used)) map_rss_key = {
  .type = BPF_MAP_TYPE_ARRAY,
  .size_key = sizeof(__u32),
  .size_value = sizeof(__u8),
  .max_elem = RSS_MAX_KEY_SIZE,
};

//struct bpf_elf_map __attribute__((section("maps"), used))
struct bpf_elf_map __attribute__((section("maps"), used))
map_rss_indirection = {
  .type = BPF_MAP_TYPE_ARRAY,
  .size_key = sizeof(__u32),
  .size_value = sizeof(__u32),
  .max_elem = RSS_MAX_INDIRECTION_SIZE,
};

struct ipv4_l3_l4_tuple {
  __u32 src_addr;
  __u32 dst_addr;
  __u16 dport;
  __u16 sport;
} __attribute__((packed));

struct ipv6_l3_l4_tuple {
  __u8 src_addr[16];
  __u8 dst_addr[16];
  __u16 dport;
  __u16 sport;
} __attribute__((packed));

static __u32 __attribute__((always_inline))
rte_softrss_be(const __u32 *input_tuple, const __u8 *key,
    __u8 input_len)
{
    __u32 i, j, hash = 0;
#pragma unroll
    for (j = 0; j < input_len; j++) {
#pragma unroll
        for (i = 0; i < 32; i++) {
            if (input_tuple[j] & (1 << (31 - i))) {
                hash ^= ((const __u32 *) key)[j] << i |
                (__u32)((uint64_t)
                (((const __u32 *) key)[j + 1])
                >> (32 - i));
            }
        }
    }
    return hash;
}

static void __attribute__((always_inline))
fill_key_to_array(__u8 *array)
{
    __u32 i = 0;
    __u8 *elem;
    for(i = 0; i < RSS_MAX_KEY_SIZE; i++)
    {
        elem = map_lookup_elem(&map_rss_key, &i);
        array[i] = elem == NULL ? '0' : *elem;
    }
}

static int __attribute__((always_inline))
rss_l3_l4(struct __sk_buff *skb)
{
    __u64 proto = load_half(skb, 12);
    __u64 nhoff = ETH_HLEN;
    __u32 key_idx = 0xdeadbeef;
    __u32 hash = 0;
    int j = 0;
    __u32 * queue;
    __u8 key[RSS_MAX_KEY_SIZE];
    struct rss_key *rss_key;

    rss_key = (struct rss_key *) map_lookup_elem(&map_rss, &key_idx);
    if (!rss_key) {
        return -1;
    }

    fill_key_to_array(key);

if (proto == ETH_P_8021AD) {
proto = load_half(skb, nhoff + offsetof(struct vlan_hdr,
h_vlan_encapsulated_proto));
nhoff += sizeof(struct vlan_hdr);
}

if (proto == ETH_P_8021Q) {
proto = load_half(skb, nhoff + offsetof(struct vlan_hdr,
h_vlan_encapsulated_proto));
nhoff += sizeof(struct vlan_hdr);
}

if (likely(proto == ETH_P_IP)) {
      struct ipv4_l3_l4_tuple v4_tuple = {
.src_addr = IPv4(load_byte(skb, nhoff + offsetof(struct iphdr, saddr)),
load_byte(skb, nhoff + offsetof(struct iphdr, saddr) + 1),
load_byte(skb, nhoff + offsetof(struct iphdr, saddr) + 2),
load_byte(skb, nhoff + offsetof(struct iphdr, saddr) + 3)),
.dst_addr = IPv4(load_byte(skb, nhoff + offsetof(struct iphdr, daddr)),
load_byte(skb, nhoff + offsetof(struct iphdr, daddr) + 1),
load_byte(skb, nhoff + offsetof(struct iphdr, daddr) + 2),
load_byte(skb, nhoff + offsetof(struct iphdr, daddr) + 3)),
.sport = PORT(load_byte(skb, nhoff + sizeof(struct iphdr)),
      load_byte(skb, nhoff + sizeof(struct iphdr) + 1)),
.dport = PORT(load_byte(skb, nhoff + sizeof(struct iphdr) + 2),
      load_byte(skb, nhoff + sizeof(struct iphdr) + 3))
};
__u8 input_len = sizeof(v4_tuple) / sizeof(__u32);
if (rss_key->hash_fields & (1 << HASH_FIELD_IPV4_L3))
input_len--;
hash = rte_softrss_be((__u32 *)&v4_tuple, key, 3);
} else if (proto == htons(ETH_P_IPV6)) {
struct ipv6_l3_l4_tuple v6_tuple;
for (j = 0; j < 4; j++)
*((uint32_t *)&v6_tuple.src_addr + j) =
load_word(skb, nhoff + offsetof(struct ipv6hdr, saddr) + j);
for (j = 0; j < 4; j++)
*((uint32_t *)&v6_tuple.dst_addr + j) =
load_word(skb, nhoff + offsetof(struct ipv6hdr, daddr) + j);
v6_tuple.sport = PORT(load_byte(skb, nhoff + sizeof(struct ipv6hdr)),
      load_byte(skb, nhoff + sizeof(struct ipv6hdr) + 1));
v6_tuple.dport = PORT(load_byte(skb, nhoff + sizeof(struct ipv6hdr) + 2),
      load_byte(skb, nhoff + sizeof(struct ipv6hdr) + 3));

__u8 input_len = sizeof(v6_tuple) / sizeof(__u32);
if (rss_key->hash_fields & (1 << HASH_FIELD_IPV6_L3))
input_len--;
hash = rte_softrss_be((__u32 *)&v6_tuple, key, 9);
} else {
return -1;
}
    __u32 indirection_index = hash % rss_key->nb_queues;
    queue = (__u32 *) map_lookup_elem(&map_rss_indirection, &indirection_index);

    return queue == NULL ? -1 : (int) *queue;
}

#define RSS(L) \
        __section(#L) int \
                L ## _hash(struct __sk_buff *skb) \
        { \
            return rss_ ## L(skb); \
        }

RSS(l3_l4)

BPF_LICENSE("Dual BSD/GPL");On Tue, Sep 25, 2018 at 1:21 PM Tim

iteration count of 40 is the max key size.

OK, then it's almost certainly this loop:

    for(i = 0; i < RSS_MAX_KEY_SIZE; i++)
    {
        elem = map_lookup_elem(&map_rss_key, &i);
        array[i] = elem == NULL ? '0' : *elem;
    }

Loops need to be marked "#pragma unroll" and completely linearized
because (apparently) they're not allowed in BPF programs.

It actually seems like a pretty fragile situation because that pragma
is only an instruction to unroll if possible. A higher quality
implementation would find some way to make a loop in the final code an
error before you tried to load it into the kernel. But it is what it
is.

Cheers.

Tim.