libteam/libteam-teamd-stop-iterating-callbacks-when-a-loop-restart-i.patch

From ffc6a52bd285a476b547312012078af69220574b Mon Sep 17 00:00:00 2001
From: Lubomir Rintel <lkundrak@v3.sk>
Date: Mon, 10 Oct 2022 18:37:31 +0200
Subject: [PATCH] teamd: stop iterating callbacks when a loop restart is
 requested

A crash was observed:

  Added loop callback: dbus_watch, 0x560d279e4530
  Added loop callback: dbus_watch, 0x560d279e4580
  ...
  Removed loop callback: dbus_watch, 0x560d279e4580
  Removed loop callback: dbus_watch, 0x560d279e4530
  Aug 31 11:54:11 holaprdel kernel: traps: teamd[557] general protection fault ip:560d26469a55 sp:7ffd43ca9650 error:0 in teamd[560d26463000+16000]

Traceback (from a different run than above):

  Core was generated by `/usr/bin/teamd -o -n -U -D -N -t team0 -gg'.
  Program terminated with signal SIGSEGV, Segmentation fault.
  #0  0x00005600ac090a55 in teamd_run_loop_do_callbacks (ctx=0x5600ad9bac70, fds=0x7fff40861250, lcb_list=0x5600ad9bad58) at /usr/src/debug/libteam-1.31-14.el9.x86_64/teamd/teamd.c:321
  321		list_for_each_node_entry_safe(lcb, tmp, lcb_list, list) {
  (gdb) bt
  #0  0x00005600ac090a55 in teamd_run_loop_do_callbacks (ctx=0x5600ad9bac70, fds=0x7fff40861250, lcb_list=0x5600ad9bad58) at /usr/src/debug/libteam-1.31-14.el9.x86_64/teamd/teamd.c:321
  #1  teamd_run_loop_run (ctx=0x5600ad9bac70) at /usr/src/debug/libteam-1.31-14.el9.x86_64/teamd/teamd.c:415
  #2  0x00005600ac08d8cb in teamd_start (p_ret=<synthetic pointer>, ctx=<optimized out>) at /usr/src/debug/libteam-1.31-14.el9.x86_64/teamd/teamd.c:1557
  #3  main (argc=<optimized out>, argv=<optimized out>) at /usr/src/debug/libteam-1.31-14.el9.x86_64/teamd/teamd.c:1876
  (gdb) print *lcb
  Cannot access memory at address 0x9dd29944fb7e97fc
  (gdb) print *tmp
  Cannot access memory at address 0x9dd29944fb7e9804
  (gdb)

What has happened is that libdbus called the remove_watch callback twice
in a single go, causing two callbacks being destroyed in one iteration
of teamd_run_loop_do_callbacks()'s list_for_each_node_entry_safe().

This basically turns the _safe() variant of the macro unhelpful, as tmp
points to stale data anyway.

Let's use the unsafe variant then, and terminate the loop once
teamd_loop_callback_del() asks for main loop's attention via
teamd_run_loop_restart(). If there are other callbacks pending an action,
they will get their turn in the next main loop iteration.

Signed-off-by: Lubomir Rintel <lkundrak@v3.sk>
Signed-off-by: Jiri Pirko <jiri@nvidia.com>
---
 teamd/teamd.c | 26 ++++++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/teamd/teamd.c b/teamd/teamd.c
index b310140..a89b702 100644
--- a/teamd/teamd.c
+++ b/teamd/teamd.c
@@ -309,16 +309,28 @@ static void teamd_run_loop_set_fds(struct list_item *lcb_list,
 	}
 }
 
+static int teamd_check_ctrl(struct teamd_context *ctx)
+{
+	int ctrl_fd = ctx->run_loop.ctrl_pipe_r;
+	struct timeval tv;
+	fd_set rfds;
+
+	FD_ZERO(&rfds);
+	FD_SET(ctrl_fd, &rfds);
+	tv.tv_sec = tv.tv_usec = 0;
+
+	return select(ctrl_fd + 1, &rfds, NULL, NULL, &tv);
+}
+
 static int teamd_run_loop_do_callbacks(struct list_item *lcb_list, fd_set *fds,
 				       struct teamd_context *ctx)
 {
 	struct teamd_loop_callback *lcb;
-	struct teamd_loop_callback *tmp;
 	int i;
 	int events;
 	int err;
 
-	list_for_each_node_entry_safe(lcb, tmp, lcb_list, list) {
+	list_for_each_node_entry(lcb, lcb_list, list) {
 		for (i = 0; i < 3; i++) {
 			if (!(lcb->fd_event & (1 << i)))
 				continue;
@@ -339,6 +351,16 @@ static int teamd_run_loop_do_callbacks(struct list_item *lcb_list, fd_set *fds,
 				teamd_log_dbg(ctx, "Failed loop callback: %s, %p",
 					      lcb->name, lcb->priv);
 			}
+
+			/*
+			 * If there's a control byte ready, it's possible that
+			 * one or more entries have been removed from the
+			 * callback list and restart has been requested. In any
+			 * case, let the main loop deal with it first so that
+			 * we know we're safe to proceed.
+			 */
+			if (teamd_check_ctrl(ctx))
+				return 0;
 		}
 	}
 	return 0;
-- 
2.31.1
Auto sync2gitlab import of libteam-1.31-4.el8.src.rpm 2022-12-08 12:12:16 +00:00			`From ffc6a52bd285a476b547312012078af69220574b Mon Sep 17 00:00:00 2001`
			`From: Lubomir Rintel <lkundrak@v3.sk>`
			`Date: Mon, 10 Oct 2022 18:37:31 +0200`
			`Subject: [PATCH] teamd: stop iterating callbacks when a loop restart is`
			`requested`

			`A crash was observed:`

			`Added loop callback: dbus_watch, 0x560d279e4530`
			`Added loop callback: dbus_watch, 0x560d279e4580`
			`...`
			`Removed loop callback: dbus_watch, 0x560d279e4580`
			`Removed loop callback: dbus_watch, 0x560d279e4530`
			`Aug 31 11:54:11 holaprdel kernel: traps: teamd[557] general protection fault ip:560d26469a55 sp:7ffd43ca9650 error:0 in teamd[560d26463000+16000]`

			`Traceback (from a different run than above):`

			Core was generated by `/usr/bin/teamd -o -n -U -D -N -t team0 -gg'.
			`Program terminated with signal SIGSEGV, Segmentation fault.`
			`#0 0x00005600ac090a55 in teamd_run_loop_do_callbacks (ctx=0x5600ad9bac70, fds=0x7fff40861250, lcb_list=0x5600ad9bad58) at /usr/src/debug/libteam-1.31-14.el9.x86_64/teamd/teamd.c:321`
			`321 list_for_each_node_entry_safe(lcb, tmp, lcb_list, list) {`
			`(gdb) bt`
			`#0 0x00005600ac090a55 in teamd_run_loop_do_callbacks (ctx=0x5600ad9bac70, fds=0x7fff40861250, lcb_list=0x5600ad9bad58) at /usr/src/debug/libteam-1.31-14.el9.x86_64/teamd/teamd.c:321`
			`#1 teamd_run_loop_run (ctx=0x5600ad9bac70) at /usr/src/debug/libteam-1.31-14.el9.x86_64/teamd/teamd.c:415`
			`#2 0x00005600ac08d8cb in teamd_start (p_ret=<synthetic pointer>, ctx=<optimized out>) at /usr/src/debug/libteam-1.31-14.el9.x86_64/teamd/teamd.c:1557`
			`#3 main (argc=<optimized out>, argv=<optimized out>) at /usr/src/debug/libteam-1.31-14.el9.x86_64/teamd/teamd.c:1876`
			`(gdb) print *lcb`
			`Cannot access memory at address 0x9dd29944fb7e97fc`
			`(gdb) print *tmp`
			`Cannot access memory at address 0x9dd29944fb7e9804`
			`(gdb)`

			`What has happened is that libdbus called the remove_watch callback twice`
			`in a single go, causing two callbacks being destroyed in one iteration`
			`of teamd_run_loop_do_callbacks()'s list_for_each_node_entry_safe().`

			`This basically turns the _safe() variant of the macro unhelpful, as tmp`
			`points to stale data anyway.`

			`Let's use the unsafe variant then, and terminate the loop once`
			`teamd_loop_callback_del() asks for main loop's attention via`
			`teamd_run_loop_restart(). If there are other callbacks pending an action,`
			`they will get their turn in the next main loop iteration.`

			`Signed-off-by: Lubomir Rintel <lkundrak@v3.sk>`
			`Signed-off-by: Jiri Pirko <jiri@nvidia.com>`
			`---`
			`teamd/teamd.c \| 26 ++++++++++++++++++++++++--`
			`1 file changed, 24 insertions(+), 2 deletions(-)`

			`diff --git a/teamd/teamd.c b/teamd/teamd.c`
			`index b310140..a89b702 100644`
			`--- a/teamd/teamd.c`
			`+++ b/teamd/teamd.c`
			`@@ -309,16 +309,28 @@ static void teamd_run_loop_set_fds(struct list_item *lcb_list,`
			`}`
			`}`

			`+static int teamd_check_ctrl(struct teamd_context *ctx)`
			`+{`
			`+ int ctrl_fd = ctx->run_loop.ctrl_pipe_r;`
			`+ struct timeval tv;`
			`+ fd_set rfds;`
			`+`
			`+ FD_ZERO(&rfds);`
			`+ FD_SET(ctrl_fd, &rfds);`
			`+ tv.tv_sec = tv.tv_usec = 0;`
			`+`
			`+ return select(ctrl_fd + 1, &rfds, NULL, NULL, &tv);`
			`+}`
			`+`
			`static int teamd_run_loop_do_callbacks(struct list_item lcb_list, fd_set fds,`
			`struct teamd_context *ctx)`
			`{`
			`struct teamd_loop_callback *lcb;`
			`- struct teamd_loop_callback *tmp;`
			`int i;`
			`int events;`
			`int err;`

			`- list_for_each_node_entry_safe(lcb, tmp, lcb_list, list) {`
			`+ list_for_each_node_entry(lcb, lcb_list, list) {`
			`for (i = 0; i < 3; i++) {`
			`if (!(lcb->fd_event & (1 << i)))`
			`continue;`
			`@@ -339,6 +351,16 @@ static int teamd_run_loop_do_callbacks(struct list_item lcb_list, fd_set fds,`
			`teamd_log_dbg(ctx, "Failed loop callback: %s, %p",`
			`lcb->name, lcb->priv);`
			`}`
			`+`
			`+ /*`
			`+ * If there's a control byte ready, it's possible that`
			`+ * one or more entries have been removed from the`
			`+ * callback list and restart has been requested. In any`
			`+ * case, let the main loop deal with it first so that`
			`+ * we know we're safe to proceed.`
			`+ */`
			`+ if (teamd_check_ctrl(ctx))`
			`+ return 0;`
			`}`
			`}`
			`return 0;`
			`--`
			`2.31.1`