import CS linuxptp-3.1.1-6.el8

2023-09-27 13:36:56 +00:00 · 2023-09-27 13:36:56 +00:00 · 6f93014f35
commit 6f93014f35
parent fe3b903284
4 changed files with 185 additions and 1 deletions
--- a/SOURCES/linuxptp-eintr.patch
+++ b/SOURCES/linuxptp-eintr.patch
@ -0,0 +1,30 @@
+commit b8ecf4ddd6533437020c4589a6f7bcd9a62307ad
+Author: Miroslav Lichvar <mlichvar@redhat.com>
+Date:   Tue Sep 13 13:44:10 2022 +0200
+
+    sk: Handle EINTR when waiting for transmit timestamp.
+    
+    If ptp4l received a signal in poll() waiting for a transmit timestamp,
+    multiple (possibly confusing) error messages are logged before exit.
+    
+    If poll() returns with EINTR, call it once again to get the timestamp
+    and avoid logging the errors. Don't call it in a loop to avoid getting
+    stuck in case the timestamp is lost and the signal is repeated before
+    the poll timeout can be reached.
+    
+    Signed-off-by: Miroslav Lichvar <mlichvar@redhat.com>
+
+diff --git a/sk.c b/sk.c
+index 8be0708..3595649 100644
+--- a/sk.c
+++ b/sk.c
+@@ -349,6 +349,9 @@ int sk_receive(int fd, void *buf, int buflen,
+ 	if (flags == MSG_ERRQUEUE) {
+ 		struct pollfd pfd = { fd, sk_events, 0 };
+ 		res = poll(&pfd, 1, sk_tx_timeout);
+		/* Retry once on EINTR to avoid logging errors before exit */
+		if (res < 0 && errno == EINTR)
+			res = poll(&pfd, 1, sk_tx_timeout);
+ 		if (res < 1) {
+ 			pr_err(res ? "poll for tx timestamp failed: %m" :
+ 			             "timed out while polling for tx timestamp");
--- a/SOURCES/linuxptp-faultrearm.patch
+++ b/SOURCES/linuxptp-faultrearm.patch
@ -0,0 +1,52 @@
+commit 134dc3c4655fcd9f314a5e56cd50db2f87366f5a
+Author: davidjm via Linuxptp-devel <linuxptp-devel@lists.sourceforge.net>
+Date:   Wed Nov 23 15:50:30 2022 -0800
+
+    Don't re-arm fault clearing timer on unrelated netlink events
+    
+    Set the timer only when an event causes the port to transition to the
+    FAULTY state, rather than potentially re-arming the timeout when an
+    event occurs while the port was already FAULTY.
+    
+    Concretely this occurs when a port is in fault, perhaps due to a
+    single time out while polling for tx-timestamp. If any other port in the
+    system (including unrelated ones ptp4l does not even know about) cause
+    netlink messages to be sent. As it stands, clock_poll() will note that
+    the port is in fault (from before, not due to the current event) and
+    reset the timeout to its original value.
+    
+    If such unrelated netlink messages arrive at a regular enough cadence
+    the timeout may be repeatedly reset, not trigger on time (if at all) and
+    the port may not get a chance to clear its fault, perhaps indefinitely.
+    
+    Signed-off-by: David Mirabito <davidjm@arista.com>
+
+diff --git a/clock.c b/clock.c
+index eea7983..451473e 100644
+--- a/clock.c
+++ b/clock.c
+@@ -1586,6 +1586,7 @@ void clock_set_sde(struct clock *c, int sde)
+ int clock_poll(struct clock *c)
+ {
+ 	int cnt, i;
+	enum port_state prior_state;
+ 	enum fsm_event event;
+ 	struct pollfd *cur;
+ 	struct port *p;
+@@ -1609,6 +1610,7 @@ int clock_poll(struct clock *c)
+ 		/* Let the ports handle their events. */
+ 		for (i = 0; i < N_POLLFD; i++) {
+ 			if (cur[i].revents & (POLLIN|POLLPRI|POLLERR)) {
+				prior_state = port_state(p);
+ 				if (cur[i].revents & POLLERR) {
+ 					pr_err("port %d: unexpected socket error",
+ 					       port_number(p));
+@@ -1624,7 +1626,7 @@ int clock_poll(struct clock *c)
+ 				}
+ 				port_dispatch(p, event, 0);
+ 				/* Clear any fault after a little while. */
+-				if (PS_FAULTY == port_state(p)) {
+				if ((PS_FAULTY == port_state(p)) && (prior_state != PS_FAULTY)) {
+ 					clock_fault_timeout(p, 1);
+ 					break;
+ 				}
--- a/SOURCES/linuxptp-soerror.patch
+++ b/SOURCES/linuxptp-soerror.patch
@ -0,0 +1,84 @@
+commit 2db8da6d1e3db074c01516c74899d42089039bc8
+Author: Miroslav Lichvar <mlichvar@redhat.com>
+Date:   Wed Apr 26 13:45:41 2023 +0200
+
+    Clear pending errors on sockets.
+    
+    When the netlink socket of a port (used for receiving link up/down
+    events) had an error (e.g. ENOBUFS due to the kernel sending too many
+    messages), ptp4l switched the port to the faulty state, but it kept
+    getting POLLERR on the socket and logged "port 1: unexpected socket
+    error" in an infinite loop.
+    
+    Unlike the PTP event and general sockets, the netlink sockets cannot be
+    closed in the faulty state as they are needed to receive the link up event.
+    
+    Instead, receive and clear the error on all descriptors getting POLLERR
+    with getsockopt(SO_ERROR). Include the error in the log message together
+    with the descriptor index to make it easier to debug issues like this in
+    future.
+    
+    (Rebased to 3.1.1)
+    
+    Signed-off-by: Miroslav Lichvar <mlichvar@redhat.com>
+
+diff --git a/clock.c b/clock.c
+index 469aab6..2821fc4 100644
+--- a/clock.c
+++ b/clock.c
+@@ -1611,8 +1611,10 @@ int clock_poll(struct clock *c)
+ 			if (cur[i].revents & (POLLIN|POLLPRI|POLLERR)) {
+ 				prior_state = port_state(p);
+ 				if (cur[i].revents & POLLERR) {
+-					pr_err("port %d: unexpected socket error",
+-					       port_number(p));
+					int error = sk_get_error(cur[i].fd);
+					pr_err("port %d: error on fda[%d]: %s",
+					       port_number(p), i,
+					       strerror(error));
+ 					event = EV_FAULT_DETECTED;
+ 				} else {
+ 					event = port_event(p, i);
+diff --git a/sk.c b/sk.c
+index 3595649..47d8c3b 100644
+--- a/sk.c
+++ b/sk.c
+@@ -413,6 +413,20 @@ int sk_receive(int fd, void *buf, int buflen,
+ 	return cnt < 0 ? -errno : cnt;
+ }
+ 
+int sk_get_error(int fd)
+{
+	socklen_t len;
+	int error;
+
+	len = sizeof (error);
+	if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &error, &len) < 0) {
+		pr_err("getsockopt SO_ERROR failed: %m");
+		return -1;
+	}
+
+	return error;
+}
+
+ int sk_set_priority(int fd, int family, uint8_t dscp)
+ {
+ 	int level, optname, tos;
+diff --git a/sk.h b/sk.h
+index 04d26ee..ba88e2f 100644
+--- a/sk.h
+++ b/sk.h
+@@ -109,6 +109,13 @@ int sk_interface_addr(const char *name, int family, struct address *addr);
+ int sk_receive(int fd, void *buf, int buflen,
+ 	       struct address *addr, struct hw_timestamp *hwts, int flags);
+ 
+/**
+ * Get and clear a pending socket error.
+ * @param fd      An open socket.
+ * @return        The error.
+ */
+int sk_get_error(int fd);
+
+ /**
+  * Set DSCP value for socket.
+  * @param fd     An open socket.
--- a/SPECS/linuxptp.spec
+++ b/SPECS/linuxptp.spec
@ -4,7 +4,7 @@

 Name:		linuxptp
 Version:	3.1.1
-Release:	3%{?dist}
+Release:	6%{?dist}
 Summary:	PTP implementation for Linux

 Group:		System Environment/Base
@ -41,6 +41,12 @@ Patch9:		linuxptp-zerolength.patch
 Patch10:	linuxptp-clockcheck.patch
 # handle PHC read failing with EBUSY in phc2sys
 Patch11:	linuxptp-phcerr.patch
+# handle EINTR when waiting for transmit timestamp
+Patch15:	linuxptp-eintr.patch
+# don't re-arm fault clearing timer on unrelated netlink events
+Patch17:	linuxptp-faultrearm.patch
+# clear pending errors on sockets
+Patch18:	linuxptp-soerror.patch

 BuildRequires:	kernel-headers > 4.18.0-87
 BuildRequires:	systemd
@ -66,6 +72,9 @@ Supporting legacy APIs and other platforms is not a goal.
 %patch9 -p1 -b .zerolength
 %patch10 -p1 -b .clockcheck
 %patch11 -p1 -b .phcerr
+%patch15 -p1 -b .eintr
+%patch17 -p1 -b .faultrearm
+%patch18 -p1 -b .soerror
 mv linuxptp-testsuite-%{testsuite_ver}* testsuite
 mv clknetsim-%{clknetsim_ver}* testsuite/clknetsim

@ -129,6 +138,15 @@ PATH=..:$PATH ./run
 %{_mandir}/man8/*.8*

 %changelog
+* Wed May 03 2023 Miroslav Lichvar <mlichvar@redhat.com> 3.1.1-6
+- clear pending errors on sockets (#2192560)
+
+* Wed Apr 12 2023 Miroslav Lichvar <mlichvar@redhat.com> 3.1.1-5
+- handle EINTR when waiting for transmit timestamp (#2123224)
+
+* Mon Mar 20 2023 Miroslav Lichvar <mlichvar@redhat.com> 3.1.1-4
+- don't re-arm fault clearing timer on unrelated netlink events (#2174900)
+
 * Wed Jun 29 2022 Miroslav Lichvar <mlichvar@redhat.com> 3.1.1-3
 - handle PHC read failing with EBUSY in phc2sys (#2079129)