461 lines
15 KiB
Diff
461 lines
15 KiB
Diff
From 80dcb40f8442f79a043c520ae9eef067519ee7ca Mon Sep 17 00:00:00 2001
|
|
From: Andrea Claudi <aclaudi@redhat.com>
|
|
Date: Thu, 13 Jun 2019 14:37:56 +0200
|
|
Subject: [PATCH] bpf: implement bpf to bpf calls support
|
|
|
|
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1716361
|
|
Upstream Status: iproute2.git commit b5cb33aec65cb
|
|
|
|
commit b5cb33aec65cb77183abbdfa5b61ecc9877ec776
|
|
Author: Daniel Borkmann <daniel@iogearbox.net>
|
|
Date: Wed Jul 18 01:31:21 2018 +0200
|
|
|
|
bpf: implement bpf to bpf calls support
|
|
|
|
Implement missing bpf to bpf calls support. The loader will
|
|
recognize .text section and handle relocation entries that
|
|
are emitted by LLVM.
|
|
|
|
First step is processing of map related relocation entries
|
|
for .text section, and in a second step loader will copy .text
|
|
section into program section and adjust call instruction
|
|
offset accordingly.
|
|
|
|
Example with test_xdp_noinline.o from kernel selftests:
|
|
|
|
1) Every function as __attribute__ ((always_inline)), rest
|
|
left unchanged:
|
|
|
|
# ip -force link set dev lo xdp obj test_xdp_noinline.o sec xdp-test
|
|
# ip a
|
|
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 xdpgeneric/id:233 qdisc noqueue state UNKNOWN group default qlen 1000
|
|
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
|
|
inet 127.0.0.1/8 scope host lo
|
|
valid_lft forever preferred_lft forever
|
|
inet6 ::1/128 scope host
|
|
valid_lft forever preferred_lft forever
|
|
[...]
|
|
# bpftool prog dump xlated id 233
|
|
[...]
|
|
1669: (2d) if r3 > r2 goto pc+4
|
|
1670: (79) r2 = *(u64 *)(r10 -136)
|
|
1671: (61) r2 = *(u32 *)(r2 +0)
|
|
1672: (63) *(u32 *)(r1 +0) = r2
|
|
1673: (b7) r0 = 1
|
|
1674: (95) exit <-- 1674 insns total
|
|
|
|
2) Every function as __attribute__ ((noinline)), rest
|
|
left unchanged:
|
|
|
|
# ip -force link set dev lo xdp obj test_xdp_noinline.o sec xdp-test
|
|
# ip a
|
|
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 xdpgeneric/id:236 qdisc noqueue state UNKNOWN group default qlen 1000
|
|
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
|
|
inet 127.0.0.1/8 scope host lo
|
|
valid_lft forever preferred_lft forever
|
|
inet6 ::1/128 scope host
|
|
valid_lft forever preferred_lft forever
|
|
[...]
|
|
# bpftool prog dump xlated id 236
|
|
[...]
|
|
1000: (bf) r1 = r6
|
|
1001: (b7) r2 = 24
|
|
1002: (85) call pc+3 <-- pc-relative call insns
|
|
1003: (1f) r7 -= r0
|
|
1004: (bf) r0 = r7
|
|
1005: (95) exit
|
|
1006: (bf) r0 = r1
|
|
1007: (bf) r1 = r2
|
|
1008: (67) r1 <<= 32
|
|
1009: (77) r1 >>= 32
|
|
1010: (bf) r3 = r0
|
|
1011: (6f) r3 <<= r1
|
|
1012: (87) r2 = -r2
|
|
1013: (57) r2 &= 31
|
|
1014: (67) r0 <<= 32
|
|
1015: (77) r0 >>= 32
|
|
1016: (7f) r0 >>= r2
|
|
1017: (4f) r0 |= r3
|
|
1018: (95) exit <-- 1018 insns total
|
|
|
|
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
|
|
Signed-off-by: David Ahern <dsahern@gmail.com>
|
|
---
|
|
lib/bpf.c | 233 ++++++++++++++++++++++++++++++++++++------------------
|
|
1 file changed, 157 insertions(+), 76 deletions(-)
|
|
|
|
diff --git a/lib/bpf.c b/lib/bpf.c
|
|
index ead8b5a7219f0..1b87490555050 100644
|
|
--- a/lib/bpf.c
|
|
+++ b/lib/bpf.c
|
|
@@ -1109,7 +1109,8 @@ int bpf_prog_load(enum bpf_prog_type type, const struct bpf_insn *insns,
|
|
#ifdef HAVE_ELF
|
|
struct bpf_elf_prog {
|
|
enum bpf_prog_type type;
|
|
- const struct bpf_insn *insns;
|
|
+ struct bpf_insn *insns;
|
|
+ unsigned int insns_num;
|
|
size_t size;
|
|
const char *license;
|
|
};
|
|
@@ -1135,11 +1136,13 @@ struct bpf_elf_ctx {
|
|
int map_fds[ELF_MAX_MAPS];
|
|
struct bpf_elf_map maps[ELF_MAX_MAPS];
|
|
struct bpf_map_ext maps_ext[ELF_MAX_MAPS];
|
|
+ struct bpf_elf_prog prog_text;
|
|
int sym_num;
|
|
int map_num;
|
|
int map_len;
|
|
bool *sec_done;
|
|
int sec_maps;
|
|
+ int sec_text;
|
|
char license[ELF_MAX_LICENSE_LEN];
|
|
enum bpf_prog_type type;
|
|
__u32 ifindex;
|
|
@@ -1904,12 +1907,25 @@ static int bpf_fetch_strtab(struct bpf_elf_ctx *ctx, int section,
|
|
return 0;
|
|
}
|
|
|
|
+static int bpf_fetch_text(struct bpf_elf_ctx *ctx, int section,
|
|
+ struct bpf_elf_sec_data *data)
|
|
+{
|
|
+ ctx->sec_text = section;
|
|
+ ctx->sec_done[section] = true;
|
|
+ return 0;
|
|
+}
|
|
+
|
|
static bool bpf_has_map_data(const struct bpf_elf_ctx *ctx)
|
|
{
|
|
return ctx->sym_tab && ctx->str_tab && ctx->sec_maps;
|
|
}
|
|
|
|
-static int bpf_fetch_ancillary(struct bpf_elf_ctx *ctx)
|
|
+static bool bpf_has_call_data(const struct bpf_elf_ctx *ctx)
|
|
+{
|
|
+ return ctx->sec_text;
|
|
+}
|
|
+
|
|
+static int bpf_fetch_ancillary(struct bpf_elf_ctx *ctx, bool check_text_sec)
|
|
{
|
|
struct bpf_elf_sec_data data;
|
|
int i, ret = -1;
|
|
@@ -1925,6 +1941,11 @@ static int bpf_fetch_ancillary(struct bpf_elf_ctx *ctx)
|
|
else if (data.sec_hdr.sh_type == SHT_PROGBITS &&
|
|
!strcmp(data.sec_name, ELF_SECTION_LICENSE))
|
|
ret = bpf_fetch_license(ctx, i, &data);
|
|
+ else if (data.sec_hdr.sh_type == SHT_PROGBITS &&
|
|
+ (data.sec_hdr.sh_flags & SHF_EXECINSTR) &&
|
|
+ !strcmp(data.sec_name, ".text") &&
|
|
+ check_text_sec)
|
|
+ ret = bpf_fetch_text(ctx, i, &data);
|
|
else if (data.sec_hdr.sh_type == SHT_SYMTAB &&
|
|
!strcmp(data.sec_name, ".symtab"))
|
|
ret = bpf_fetch_symtab(ctx, i, &data);
|
|
@@ -1969,17 +1990,18 @@ static int bpf_fetch_prog(struct bpf_elf_ctx *ctx, const char *section,
|
|
ret = bpf_fill_section_data(ctx, i, &data);
|
|
if (ret < 0 ||
|
|
!(data.sec_hdr.sh_type == SHT_PROGBITS &&
|
|
- data.sec_hdr.sh_flags & SHF_EXECINSTR &&
|
|
+ (data.sec_hdr.sh_flags & SHF_EXECINSTR) &&
|
|
!strcmp(data.sec_name, section)))
|
|
continue;
|
|
|
|
*sseen = true;
|
|
|
|
memset(&prog, 0, sizeof(prog));
|
|
- prog.type = ctx->type;
|
|
- prog.insns = data.sec_data->d_buf;
|
|
- prog.size = data.sec_data->d_size;
|
|
- prog.license = ctx->license;
|
|
+ prog.type = ctx->type;
|
|
+ prog.license = ctx->license;
|
|
+ prog.size = data.sec_data->d_size;
|
|
+ prog.insns_num = prog.size / sizeof(struct bpf_insn);
|
|
+ prog.insns = data.sec_data->d_buf;
|
|
|
|
fd = bpf_prog_attach(section, &prog, ctx);
|
|
if (fd < 0)
|
|
@@ -1992,84 +2014,120 @@ static int bpf_fetch_prog(struct bpf_elf_ctx *ctx, const char *section,
|
|
return fd;
|
|
}
|
|
|
|
-struct bpf_tail_call_props {
|
|
- unsigned int total;
|
|
- unsigned int jited;
|
|
+struct bpf_relo_props {
|
|
+ struct bpf_tail_call {
|
|
+ unsigned int total;
|
|
+ unsigned int jited;
|
|
+ } tc;
|
|
+ int main_num;
|
|
};
|
|
|
|
+static int bpf_apply_relo_map(struct bpf_elf_ctx *ctx, struct bpf_elf_prog *prog,
|
|
+ GElf_Rel *relo, GElf_Sym *sym,
|
|
+ struct bpf_relo_props *props)
|
|
+{
|
|
+ unsigned int insn_off = relo->r_offset / sizeof(struct bpf_insn);
|
|
+ unsigned int map_idx = sym->st_value / ctx->map_len;
|
|
+
|
|
+ if (insn_off >= prog->insns_num)
|
|
+ return -EINVAL;
|
|
+ if (prog->insns[insn_off].code != (BPF_LD | BPF_IMM | BPF_DW)) {
|
|
+ fprintf(stderr, "ELF contains relo data for non ld64 instruction at offset %u! Compiler bug?!\n",
|
|
+ insn_off);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ if (map_idx >= ARRAY_SIZE(ctx->map_fds))
|
|
+ return -EINVAL;
|
|
+ if (!ctx->map_fds[map_idx])
|
|
+ return -EINVAL;
|
|
+ if (ctx->maps[map_idx].type == BPF_MAP_TYPE_PROG_ARRAY) {
|
|
+ props->tc.total++;
|
|
+ if (ctx->maps_ext[map_idx].owner.jited ||
|
|
+ (ctx->maps_ext[map_idx].owner.type == 0 &&
|
|
+ ctx->cfg.jit_enabled))
|
|
+ props->tc.jited++;
|
|
+ }
|
|
+
|
|
+ prog->insns[insn_off].src_reg = BPF_PSEUDO_MAP_FD;
|
|
+ prog->insns[insn_off].imm = ctx->map_fds[map_idx];
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int bpf_apply_relo_call(struct bpf_elf_ctx *ctx, struct bpf_elf_prog *prog,
|
|
+ GElf_Rel *relo, GElf_Sym *sym,
|
|
+ struct bpf_relo_props *props)
|
|
+{
|
|
+ unsigned int insn_off = relo->r_offset / sizeof(struct bpf_insn);
|
|
+ struct bpf_elf_prog *prog_text = &ctx->prog_text;
|
|
+
|
|
+ if (insn_off >= prog->insns_num)
|
|
+ return -EINVAL;
|
|
+ if (prog->insns[insn_off].code != (BPF_JMP | BPF_CALL) &&
|
|
+ prog->insns[insn_off].src_reg != BPF_PSEUDO_CALL) {
|
|
+ fprintf(stderr, "ELF contains relo data for non call instruction at offset %u! Compiler bug?!\n",
|
|
+ insn_off);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ if (!props->main_num) {
|
|
+ struct bpf_insn *insns = realloc(prog->insns,
|
|
+ prog->size + prog_text->size);
|
|
+ if (!insns)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ memcpy(insns + prog->insns_num, prog_text->insns,
|
|
+ prog_text->size);
|
|
+ props->main_num = prog->insns_num;
|
|
+ prog->insns = insns;
|
|
+ prog->insns_num += prog_text->insns_num;
|
|
+ prog->size += prog_text->size;
|
|
+ }
|
|
+
|
|
+ prog->insns[insn_off].imm += props->main_num - insn_off;
|
|
+ return 0;
|
|
+}
|
|
+
|
|
static int bpf_apply_relo_data(struct bpf_elf_ctx *ctx,
|
|
struct bpf_elf_sec_data *data_relo,
|
|
- struct bpf_elf_sec_data *data_insn,
|
|
- struct bpf_tail_call_props *props)
|
|
+ struct bpf_elf_prog *prog,
|
|
+ struct bpf_relo_props *props)
|
|
{
|
|
- Elf_Data *idata = data_insn->sec_data;
|
|
GElf_Shdr *rhdr = &data_relo->sec_hdr;
|
|
int relo_ent, relo_num = rhdr->sh_size / rhdr->sh_entsize;
|
|
- struct bpf_insn *insns = idata->d_buf;
|
|
- unsigned int num_insns = idata->d_size / sizeof(*insns);
|
|
|
|
for (relo_ent = 0; relo_ent < relo_num; relo_ent++) {
|
|
- unsigned int ioff, rmap;
|
|
GElf_Rel relo;
|
|
GElf_Sym sym;
|
|
+ int ret = -EIO;
|
|
|
|
if (gelf_getrel(data_relo->sec_data, relo_ent, &relo) != &relo)
|
|
return -EIO;
|
|
-
|
|
- ioff = relo.r_offset / sizeof(struct bpf_insn);
|
|
- if (ioff >= num_insns ||
|
|
- insns[ioff].code != (BPF_LD | BPF_IMM | BPF_DW)) {
|
|
- fprintf(stderr, "ELF contains relo data for non ld64 instruction at offset %u! Compiler bug?!\n",
|
|
- ioff);
|
|
- fprintf(stderr, " - Current section: %s\n", data_relo->sec_name);
|
|
- if (ioff < num_insns &&
|
|
- insns[ioff].code == (BPF_JMP | BPF_CALL))
|
|
- fprintf(stderr, " - Try to annotate functions with always_inline attribute!\n");
|
|
- return -EINVAL;
|
|
- }
|
|
-
|
|
if (gelf_getsym(ctx->sym_tab, GELF_R_SYM(relo.r_info), &sym) != &sym)
|
|
return -EIO;
|
|
- if (sym.st_shndx != ctx->sec_maps) {
|
|
- fprintf(stderr, "ELF contains non-map related relo data in entry %u pointing to section %u! Compiler bug?!\n",
|
|
- relo_ent, sym.st_shndx);
|
|
- return -EIO;
|
|
- }
|
|
|
|
- rmap = sym.st_value / ctx->map_len;
|
|
- if (rmap >= ARRAY_SIZE(ctx->map_fds))
|
|
- return -EINVAL;
|
|
- if (!ctx->map_fds[rmap])
|
|
- return -EINVAL;
|
|
- if (ctx->maps[rmap].type == BPF_MAP_TYPE_PROG_ARRAY) {
|
|
- props->total++;
|
|
- if (ctx->maps_ext[rmap].owner.jited ||
|
|
- (ctx->maps_ext[rmap].owner.type == 0 &&
|
|
- ctx->cfg.jit_enabled))
|
|
- props->jited++;
|
|
- }
|
|
-
|
|
- if (ctx->verbose)
|
|
- fprintf(stderr, "Map \'%s\' (%d) injected into prog section \'%s\' at offset %u!\n",
|
|
- bpf_str_tab_name(ctx, &sym), ctx->map_fds[rmap],
|
|
- data_insn->sec_name, ioff);
|
|
-
|
|
- insns[ioff].src_reg = BPF_PSEUDO_MAP_FD;
|
|
- insns[ioff].imm = ctx->map_fds[rmap];
|
|
+ if (sym.st_shndx == ctx->sec_maps)
|
|
+ ret = bpf_apply_relo_map(ctx, prog, &relo, &sym, props);
|
|
+ else if (sym.st_shndx == ctx->sec_text)
|
|
+ ret = bpf_apply_relo_call(ctx, prog, &relo, &sym, props);
|
|
+ else
|
|
+ fprintf(stderr, "ELF contains non-{map,call} related relo data in entry %u pointing to section %u! Compiler bug?!\n",
|
|
+ relo_ent, sym.st_shndx);
|
|
+ if (ret < 0)
|
|
+ return ret;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int bpf_fetch_prog_relo(struct bpf_elf_ctx *ctx, const char *section,
|
|
- bool *lderr, bool *sseen)
|
|
+ bool *lderr, bool *sseen, struct bpf_elf_prog *prog)
|
|
{
|
|
struct bpf_elf_sec_data data_relo, data_insn;
|
|
- struct bpf_elf_prog prog;
|
|
int ret, idx, i, fd = -1;
|
|
|
|
for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
|
|
- struct bpf_tail_call_props props = {};
|
|
+ struct bpf_relo_props props = {};
|
|
|
|
ret = bpf_fill_section_data(ctx, i, &data_relo);
|
|
if (ret < 0 || data_relo.sec_hdr.sh_type != SHT_REL)
|
|
@@ -2080,40 +2138,54 @@ static int bpf_fetch_prog_relo(struct bpf_elf_ctx *ctx, const char *section,
|
|
ret = bpf_fill_section_data(ctx, idx, &data_insn);
|
|
if (ret < 0 ||
|
|
!(data_insn.sec_hdr.sh_type == SHT_PROGBITS &&
|
|
- data_insn.sec_hdr.sh_flags & SHF_EXECINSTR &&
|
|
+ (data_insn.sec_hdr.sh_flags & SHF_EXECINSTR) &&
|
|
!strcmp(data_insn.sec_name, section)))
|
|
continue;
|
|
+ if (sseen)
|
|
+ *sseen = true;
|
|
+
|
|
+ memset(prog, 0, sizeof(*prog));
|
|
+ prog->type = ctx->type;
|
|
+ prog->license = ctx->license;
|
|
+ prog->size = data_insn.sec_data->d_size;
|
|
+ prog->insns_num = prog->size / sizeof(struct bpf_insn);
|
|
+ prog->insns = malloc(prog->size);
|
|
+ if (!prog->insns) {
|
|
+ *lderr = true;
|
|
+ return -ENOMEM;
|
|
+ }
|
|
|
|
- *sseen = true;
|
|
+ memcpy(prog->insns, data_insn.sec_data->d_buf, prog->size);
|
|
|
|
- ret = bpf_apply_relo_data(ctx, &data_relo, &data_insn, &props);
|
|
+ ret = bpf_apply_relo_data(ctx, &data_relo, prog, &props);
|
|
if (ret < 0) {
|
|
*lderr = true;
|
|
+ if (ctx->sec_text != idx)
|
|
+ free(prog->insns);
|
|
return ret;
|
|
}
|
|
+ if (ctx->sec_text == idx) {
|
|
+ fd = 0;
|
|
+ goto out;
|
|
+ }
|
|
|
|
- memset(&prog, 0, sizeof(prog));
|
|
- prog.type = ctx->type;
|
|
- prog.insns = data_insn.sec_data->d_buf;
|
|
- prog.size = data_insn.sec_data->d_size;
|
|
- prog.license = ctx->license;
|
|
-
|
|
- fd = bpf_prog_attach(section, &prog, ctx);
|
|
+ fd = bpf_prog_attach(section, prog, ctx);
|
|
+ free(prog->insns);
|
|
if (fd < 0) {
|
|
*lderr = true;
|
|
- if (props.total) {
|
|
+ if (props.tc.total) {
|
|
if (ctx->cfg.jit_enabled &&
|
|
- props.total != props.jited)
|
|
+ props.tc.total != props.tc.jited)
|
|
fprintf(stderr, "JIT enabled, but only %u/%u tail call maps in the program have JITed owner!\n",
|
|
- props.jited, props.total);
|
|
+ props.tc.jited, props.tc.total);
|
|
if (!ctx->cfg.jit_enabled &&
|
|
- props.jited)
|
|
+ props.tc.jited)
|
|
fprintf(stderr, "JIT disabled, but %u/%u tail call maps in the program have JITed owner!\n",
|
|
- props.jited, props.total);
|
|
+ props.tc.jited, props.tc.total);
|
|
}
|
|
return fd;
|
|
}
|
|
-
|
|
+out:
|
|
ctx->sec_done[i] = true;
|
|
ctx->sec_done[idx] = true;
|
|
break;
|
|
@@ -2125,10 +2197,18 @@ static int bpf_fetch_prog_relo(struct bpf_elf_ctx *ctx, const char *section,
|
|
static int bpf_fetch_prog_sec(struct bpf_elf_ctx *ctx, const char *section)
|
|
{
|
|
bool lderr = false, sseen = false;
|
|
+ struct bpf_elf_prog prog;
|
|
int ret = -1;
|
|
|
|
- if (bpf_has_map_data(ctx))
|
|
- ret = bpf_fetch_prog_relo(ctx, section, &lderr, &sseen);
|
|
+ if (bpf_has_call_data(ctx)) {
|
|
+ ret = bpf_fetch_prog_relo(ctx, ".text", &lderr, NULL,
|
|
+ &ctx->prog_text);
|
|
+ if (ret < 0)
|
|
+ return ret;
|
|
+ }
|
|
+
|
|
+ if (bpf_has_map_data(ctx) || bpf_has_call_data(ctx))
|
|
+ ret = bpf_fetch_prog_relo(ctx, section, &lderr, &sseen, &prog);
|
|
if (ret < 0 && !lderr)
|
|
ret = bpf_fetch_prog(ctx, section, &sseen);
|
|
if (ret < 0 && !sseen)
|
|
@@ -2525,6 +2605,7 @@ static void bpf_elf_ctx_destroy(struct bpf_elf_ctx *ctx, bool failure)
|
|
|
|
bpf_hash_destroy(ctx);
|
|
|
|
+ free(ctx->prog_text.insns);
|
|
free(ctx->sec_done);
|
|
free(ctx->log);
|
|
|
|
@@ -2546,7 +2627,7 @@ static int bpf_obj_open(const char *pathname, enum bpf_prog_type type,
|
|
return ret;
|
|
}
|
|
|
|
- ret = bpf_fetch_ancillary(ctx);
|
|
+ ret = bpf_fetch_ancillary(ctx, strcmp(section, ".text"));
|
|
if (ret < 0) {
|
|
fprintf(stderr, "Error fetching ELF ancillary data!\n");
|
|
goto out;
|
|
--
|
|
2.20.1
|
|
|