106 lines
3.9 KiB
Diff
106 lines
3.9 KiB
Diff
From ffc6a52bd285a476b547312012078af69220574b Mon Sep 17 00:00:00 2001
|
|
From: Lubomir Rintel <lkundrak@v3.sk>
|
|
Date: Mon, 10 Oct 2022 18:37:31 +0200
|
|
Subject: [PATCH] teamd: stop iterating callbacks when a loop restart is
|
|
requested
|
|
|
|
A crash was observed:
|
|
|
|
Added loop callback: dbus_watch, 0x560d279e4530
|
|
Added loop callback: dbus_watch, 0x560d279e4580
|
|
...
|
|
Removed loop callback: dbus_watch, 0x560d279e4580
|
|
Removed loop callback: dbus_watch, 0x560d279e4530
|
|
Aug 31 11:54:11 holaprdel kernel: traps: teamd[557] general protection fault ip:560d26469a55 sp:7ffd43ca9650 error:0 in teamd[560d26463000+16000]
|
|
|
|
Traceback (from a different run than above):
|
|
|
|
Core was generated by `/usr/bin/teamd -o -n -U -D -N -t team0 -gg'.
|
|
Program terminated with signal SIGSEGV, Segmentation fault.
|
|
#0 0x00005600ac090a55 in teamd_run_loop_do_callbacks (ctx=0x5600ad9bac70, fds=0x7fff40861250, lcb_list=0x5600ad9bad58) at /usr/src/debug/libteam-1.31-14.el9.x86_64/teamd/teamd.c:321
|
|
321 list_for_each_node_entry_safe(lcb, tmp, lcb_list, list) {
|
|
(gdb) bt
|
|
#0 0x00005600ac090a55 in teamd_run_loop_do_callbacks (ctx=0x5600ad9bac70, fds=0x7fff40861250, lcb_list=0x5600ad9bad58) at /usr/src/debug/libteam-1.31-14.el9.x86_64/teamd/teamd.c:321
|
|
#1 teamd_run_loop_run (ctx=0x5600ad9bac70) at /usr/src/debug/libteam-1.31-14.el9.x86_64/teamd/teamd.c:415
|
|
#2 0x00005600ac08d8cb in teamd_start (p_ret=<synthetic pointer>, ctx=<optimized out>) at /usr/src/debug/libteam-1.31-14.el9.x86_64/teamd/teamd.c:1557
|
|
#3 main (argc=<optimized out>, argv=<optimized out>) at /usr/src/debug/libteam-1.31-14.el9.x86_64/teamd/teamd.c:1876
|
|
(gdb) print *lcb
|
|
Cannot access memory at address 0x9dd29944fb7e97fc
|
|
(gdb) print *tmp
|
|
Cannot access memory at address 0x9dd29944fb7e9804
|
|
(gdb)
|
|
|
|
What has happened is that libdbus called the remove_watch callback twice
|
|
in a single go, causing two callbacks being destroyed in one iteration
|
|
of teamd_run_loop_do_callbacks()'s list_for_each_node_entry_safe().
|
|
|
|
This basically turns the _safe() variant of the macro unhelpful, as tmp
|
|
points to stale data anyway.
|
|
|
|
Let's use the unsafe variant then, and terminate the loop once
|
|
teamd_loop_callback_del() asks for main loop's attention via
|
|
teamd_run_loop_restart(). If there are other callbacks pending an action,
|
|
they will get their turn in the next main loop iteration.
|
|
|
|
Signed-off-by: Lubomir Rintel <lkundrak@v3.sk>
|
|
Signed-off-by: Jiri Pirko <jiri@nvidia.com>
|
|
---
|
|
teamd/teamd.c | 26 ++++++++++++++++++++++++--
|
|
1 file changed, 24 insertions(+), 2 deletions(-)
|
|
|
|
diff --git a/teamd/teamd.c b/teamd/teamd.c
|
|
index b310140..a89b702 100644
|
|
--- a/teamd/teamd.c
|
|
+++ b/teamd/teamd.c
|
|
@@ -309,16 +309,28 @@ static void teamd_run_loop_set_fds(struct list_item *lcb_list,
|
|
}
|
|
}
|
|
|
|
+static int teamd_check_ctrl(struct teamd_context *ctx)
|
|
+{
|
|
+ int ctrl_fd = ctx->run_loop.ctrl_pipe_r;
|
|
+ struct timeval tv;
|
|
+ fd_set rfds;
|
|
+
|
|
+ FD_ZERO(&rfds);
|
|
+ FD_SET(ctrl_fd, &rfds);
|
|
+ tv.tv_sec = tv.tv_usec = 0;
|
|
+
|
|
+ return select(ctrl_fd + 1, &rfds, NULL, NULL, &tv);
|
|
+}
|
|
+
|
|
static int teamd_run_loop_do_callbacks(struct list_item *lcb_list, fd_set *fds,
|
|
struct teamd_context *ctx)
|
|
{
|
|
struct teamd_loop_callback *lcb;
|
|
- struct teamd_loop_callback *tmp;
|
|
int i;
|
|
int events;
|
|
int err;
|
|
|
|
- list_for_each_node_entry_safe(lcb, tmp, lcb_list, list) {
|
|
+ list_for_each_node_entry(lcb, lcb_list, list) {
|
|
for (i = 0; i < 3; i++) {
|
|
if (!(lcb->fd_event & (1 << i)))
|
|
continue;
|
|
@@ -339,6 +351,16 @@ static int teamd_run_loop_do_callbacks(struct list_item *lcb_list, fd_set *fds,
|
|
teamd_log_dbg(ctx, "Failed loop callback: %s, %p",
|
|
lcb->name, lcb->priv);
|
|
}
|
|
+
|
|
+ /*
|
|
+ * If there's a control byte ready, it's possible that
|
|
+ * one or more entries have been removed from the
|
|
+ * callback list and restart has been requested. In any
|
|
+ * case, let the main loop deal with it first so that
|
|
+ * we know we're safe to proceed.
|
|
+ */
|
|
+ if (teamd_check_ctrl(ctx))
|
|
+ return 0;
|
|
}
|
|
}
|
|
return 0;
|
|
--
|
|
2.31.1
|
|
|