Linux v4.20-rc7-6-gddfbab46539f
This commit is contained in:
parent
f94c9af093
commit
abc158d467
173
bpf-fix-bpf_jit_limit-knob.patch
Normal file
173
bpf-fix-bpf_jit_limit-knob.patch
Normal file
@ -0,0 +1,173 @@
|
||||
From fdadd04931c2d7cd294dc5b2b342863f94be53a3 Mon Sep 17 00:00:00 2001
|
||||
From: Daniel Borkmann <daniel@iogearbox.net>
|
||||
Date: Tue, 11 Dec 2018 12:14:12 +0100
|
||||
Subject: bpf: fix bpf_jit_limit knob for PAGE_SIZE >= 64K
|
||||
|
||||
Michael and Sandipan report:
|
||||
|
||||
Commit ede95a63b5 introduced a bpf_jit_limit tuneable to limit BPF
|
||||
JIT allocations. At compile time it defaults to PAGE_SIZE * 40000,
|
||||
and is adjusted again at init time if MODULES_VADDR is defined.
|
||||
|
||||
For ppc64 kernels, MODULES_VADDR isn't defined, so we're stuck with
|
||||
the compile-time default at boot-time, which is 0x9c400000 when
|
||||
using 64K page size. This overflows the signed 32-bit bpf_jit_limit
|
||||
value:
|
||||
|
||||
root@ubuntu:/tmp# cat /proc/sys/net/core/bpf_jit_limit
|
||||
-1673527296
|
||||
|
||||
and can cause various unexpected failures throughout the network
|
||||
stack. In one case `strace dhclient eth0` reported:
|
||||
|
||||
setsockopt(5, SOL_SOCKET, SO_ATTACH_FILTER, {len=11, filter=0x105dd27f8},
|
||||
16) = -1 ENOTSUPP (Unknown error 524)
|
||||
|
||||
and similar failures can be seen with tools like tcpdump. This doesn't
|
||||
always reproduce however, and I'm not sure why. The more consistent
|
||||
failure I've seen is an Ubuntu 18.04 KVM guest booted on a POWER9
|
||||
host would time out on systemd/netplan configuring a virtio-net NIC
|
||||
with no noticeable errors in the logs.
|
||||
|
||||
Given this and also given that in near future some architectures like
|
||||
arm64 will have a custom area for BPF JIT image allocations we should
|
||||
get rid of the BPF_JIT_LIMIT_DEFAULT fallback / default entirely. For
|
||||
4.21, we have an overridable bpf_jit_alloc_exec(), bpf_jit_free_exec()
|
||||
so therefore add another overridable bpf_jit_alloc_exec_limit() helper
|
||||
function which returns the possible size of the memory area for deriving
|
||||
the default heuristic in bpf_jit_charge_init().
|
||||
|
||||
Like bpf_jit_alloc_exec() and bpf_jit_free_exec(), the new
|
||||
bpf_jit_alloc_exec_limit() assumes that module_alloc() is the default
|
||||
JIT memory provider, and therefore in case archs implement their custom
|
||||
module_alloc() we use MODULES_{END,_VADDR} for limits and otherwise for
|
||||
vmalloc_exec() cases like on ppc64 we use VMALLOC_{END,_START}.
|
||||
|
||||
Additionally, for archs supporting large page sizes, we should change
|
||||
the sysctl to be handled as long to not run into sysctl restrictions
|
||||
in future.
|
||||
|
||||
Fixes: ede95a63b5e8 ("bpf: add bpf_jit_limit knob to restrict unpriv allocations")
|
||||
Reported-by: Sandipan Das <sandipan@linux.ibm.com>
|
||||
Reported-by: Michael Roth <mdroth@linux.vnet.ibm.com>
|
||||
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
|
||||
Tested-by: Michael Roth <mdroth@linux.vnet.ibm.com>
|
||||
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
|
||||
---
|
||||
include/linux/filter.h | 2 +-
|
||||
kernel/bpf/core.c | 21 +++++++++++++++------
|
||||
net/core/sysctl_net_core.c | 20 +++++++++++++++++---
|
||||
3 files changed, 33 insertions(+), 10 deletions(-)
|
||||
|
||||
diff --git a/include/linux/filter.h b/include/linux/filter.h
|
||||
index 795ff0b869bb..a8b9d90a8042 100644
|
||||
--- a/include/linux/filter.h
|
||||
+++ b/include/linux/filter.h
|
||||
@@ -861,7 +861,7 @@ bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
|
||||
extern int bpf_jit_enable;
|
||||
extern int bpf_jit_harden;
|
||||
extern int bpf_jit_kallsyms;
|
||||
-extern int bpf_jit_limit;
|
||||
+extern long bpf_jit_limit;
|
||||
|
||||
typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size);
|
||||
|
||||
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
|
||||
index b1a3545d0ec8..b2890c268cb3 100644
|
||||
--- a/kernel/bpf/core.c
|
||||
+++ b/kernel/bpf/core.c
|
||||
@@ -365,13 +365,11 @@ void bpf_prog_kallsyms_del_all(struct bpf_prog *fp)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BPF_JIT
|
||||
-# define BPF_JIT_LIMIT_DEFAULT (PAGE_SIZE * 40000)
|
||||
-
|
||||
/* All BPF JIT sysctl knobs here. */
|
||||
int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_ALWAYS_ON);
|
||||
int bpf_jit_harden __read_mostly;
|
||||
int bpf_jit_kallsyms __read_mostly;
|
||||
-int bpf_jit_limit __read_mostly = BPF_JIT_LIMIT_DEFAULT;
|
||||
+long bpf_jit_limit __read_mostly;
|
||||
|
||||
static __always_inline void
|
||||
bpf_get_prog_addr_region(const struct bpf_prog *prog,
|
||||
@@ -580,16 +578,27 @@ int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
|
||||
|
||||
static atomic_long_t bpf_jit_current;
|
||||
|
||||
+/* Can be overridden by an arch's JIT compiler if it has a custom,
|
||||
+ * dedicated BPF backend memory area, or if neither of the two
|
||||
+ * below apply.
|
||||
+ */
|
||||
+u64 __weak bpf_jit_alloc_exec_limit(void)
|
||||
+{
|
||||
#if defined(MODULES_VADDR)
|
||||
+ return MODULES_END - MODULES_VADDR;
|
||||
+#else
|
||||
+ return VMALLOC_END - VMALLOC_START;
|
||||
+#endif
|
||||
+}
|
||||
+
|
||||
static int __init bpf_jit_charge_init(void)
|
||||
{
|
||||
/* Only used as heuristic here to derive limit. */
|
||||
- bpf_jit_limit = min_t(u64, round_up((MODULES_END - MODULES_VADDR) >> 2,
|
||||
- PAGE_SIZE), INT_MAX);
|
||||
+ bpf_jit_limit = min_t(u64, round_up(bpf_jit_alloc_exec_limit() >> 2,
|
||||
+ PAGE_SIZE), LONG_MAX);
|
||||
return 0;
|
||||
}
|
||||
pure_initcall(bpf_jit_charge_init);
|
||||
-#endif
|
||||
|
||||
static int bpf_jit_charge_modmem(u32 pages)
|
||||
{
|
||||
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
|
||||
index 37b4667128a3..d67ec17f2cc8 100644
|
||||
--- a/net/core/sysctl_net_core.c
|
||||
+++ b/net/core/sysctl_net_core.c
|
||||
@@ -28,6 +28,8 @@ static int two __maybe_unused = 2;
|
||||
static int min_sndbuf = SOCK_MIN_SNDBUF;
|
||||
static int min_rcvbuf = SOCK_MIN_RCVBUF;
|
||||
static int max_skb_frags = MAX_SKB_FRAGS;
|
||||
+static long long_one __maybe_unused = 1;
|
||||
+static long long_max __maybe_unused = LONG_MAX;
|
||||
|
||||
static int net_msg_warn; /* Unused, but still a sysctl */
|
||||
|
||||
@@ -289,6 +291,17 @@ proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write,
|
||||
|
||||
return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
|
||||
}
|
||||
+
|
||||
+static int
|
||||
+proc_dolongvec_minmax_bpf_restricted(struct ctl_table *table, int write,
|
||||
+ void __user *buffer, size_t *lenp,
|
||||
+ loff_t *ppos)
|
||||
+{
|
||||
+ if (!capable(CAP_SYS_ADMIN))
|
||||
+ return -EPERM;
|
||||
+
|
||||
+ return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
|
||||
+}
|
||||
#endif
|
||||
|
||||
static struct ctl_table net_core_table[] = {
|
||||
@@ -398,10 +411,11 @@ static struct ctl_table net_core_table[] = {
|
||||
{
|
||||
.procname = "bpf_jit_limit",
|
||||
.data = &bpf_jit_limit,
|
||||
- .maxlen = sizeof(int),
|
||||
+ .maxlen = sizeof(long),
|
||||
.mode = 0600,
|
||||
- .proc_handler = proc_dointvec_minmax_bpf_restricted,
|
||||
- .extra1 = &one,
|
||||
+ .proc_handler = proc_dolongvec_minmax_bpf_restricted,
|
||||
+ .extra1 = &long_one,
|
||||
+ .extra2 = &long_max,
|
||||
},
|
||||
#endif
|
||||
{
|
||||
--
|
||||
cgit 1.2-0.3.lf.el7
|
||||
|
2
gitrev
2
gitrev
@ -1 +1 @@
|
||||
7566ec393f4161572ba6f11ad5171fd5d59b0fbd
|
||||
ddfbab46539f2d37a9e9d357b054486b51f7dc27
|
||||
|
11
kernel.spec
11
kernel.spec
@ -69,7 +69,7 @@ Summary: The Linux kernel
|
||||
# The rc snapshot level
|
||||
%global rcrev 7
|
||||
# The git snapshot level
|
||||
%define gitrev 0
|
||||
%define gitrev 1
|
||||
# Set rpm version accordingly
|
||||
%define rpmversion 4.%{upstream_sublevel}.0
|
||||
%endif
|
||||
@ -122,7 +122,7 @@ Summary: The Linux kernel
|
||||
# Set debugbuildsenabled to 1 for production (build separate debug kernels)
|
||||
# and 0 for rawhide (all kernels are debug kernels).
|
||||
# See also 'make debug' and 'make release'.
|
||||
%define debugbuildsenabled 1
|
||||
%define debugbuildsenabled 0
|
||||
|
||||
# Kernel headers are being split out into a separate package
|
||||
%if 0%{?fedora}
|
||||
@ -612,6 +612,9 @@ Patch504: iio-accel-kxcjk1013-Add-more-hardware-ids.patch
|
||||
# rhbz 1645070 patch queued upstream for merging into 4.21
|
||||
Patch505: asus-fx503-keyb.patch
|
||||
|
||||
# rhbz 1647947
|
||||
Patch506: bpf-fix-bpf_jit_limit-knob.patch
|
||||
|
||||
# END OF PATCH DEFINITIONS
|
||||
|
||||
%endif
|
||||
@ -1884,6 +1887,10 @@ fi
|
||||
#
|
||||
#
|
||||
%changelog
|
||||
* Tue Dec 18 2018 Justin M. Forbes <jforbes@fedoraproject.org> - 4.20.0-0.rc7.git1.1
|
||||
- Linux v4.20-rc7-6-gddfbab46539f
|
||||
- Reenable debugging options.
|
||||
|
||||
* Mon Dec 17 2018 Justin M. Forbes <jforbes@fedoraproject.org> - 4.20.0-0.rc7.git0.1
|
||||
- Linux v4.20-rc7
|
||||
|
||||
|
1
sources
1
sources
@ -1,2 +1,3 @@
|
||||
SHA512 (linux-4.19.tar.xz) = ab67cc746b375a8b135e8b23e35e1d6787930d19b3c26b2679787d62951cbdbc3bb66f8ededeb9b890e5008b2459397f9018f1a6772fdef67780b06a4cb9f6f4
|
||||
SHA512 (patch-4.20-rc7.xz) = 84c35b95f08454f3920b1400e6fee8c6f30ebfdcc9a32f447d2124867b22a17da87c0d1496dd22512ddb4d6c0ce9457acddb6d6167e8c673d44b3f2a585486bd
|
||||
SHA512 (patch-4.20-rc7-git1.xz) = 1be1f4d521267a23b3682926dd7b6cf638d8bd1073dd14575007b7736714668229fd2e0b6532e50d9ff07a3079210741e3bd37c52ecab9706435db546e495f51
|
||||
|
Loading…
Reference in New Issue
Block a user