rasdaemon: fix excessive block event messages

Only track block error events instead of logging sucessful ones. The
upstream patch (9c86f6255f67a8) had to be modified since it relies on
the kernel version to determine if the new trace point is available or
not. Such tests are meaningless in RHEL as we backport a lot of patches
while keeping the kernel version intact.

Resolves: RHEL-8708

Signed-off-by: Aristeu Rozanski <arozansk@redhat.com>
This commit is contained in:
Aristeu Rozanski 2024-05-23 11:26:07 -04:00
parent c355651dbd
commit 5cde7234ff
4 changed files with 173 additions and 1 deletions

View File

@ -0,0 +1,22 @@
commit 885e546add918457c453bd3f753ac7df90b39e36
Author: weidongkl <weidongkl@sina.com>
Date: Tue Sep 19 16:29:21 2023 +0800
Add a space between "diskerror_event" and "store"
Signed-off-by: weidongkl <weidongkl@sina.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
diff --git a/ras-record.c b/ras-record.c
index a5f99ae..6b050bb 100644
--- a/ras-record.c
+++ b/ras-record.c
@@ -484,7 +484,7 @@ int ras_store_diskerror_event(struct ras_events *ras, struct diskerror_event *ev
if (!priv || !priv->stmt_diskerror_event)
return 0;
- log(TERM, LOG_INFO, "diskerror_eventstore: %p\n", priv->stmt_diskerror_event);
+ log(TERM, LOG_INFO, "diskerror_event store: %p\n", priv->stmt_diskerror_event);
sqlite3_bind_text(priv->stmt_diskerror_event, 1, ev->timestamp, -1, NULL);
sqlite3_bind_text(priv->stmt_diskerror_event, 2, ev->dev, -1, NULL);

View File

@ -0,0 +1,24 @@
commit 9bd84aef87978b806178a73ed33c39d6c442fc1f
Author: weidong <weidongkl@sina.com>
Date: Tue Aug 8 08:59:12 2023 +0000
add ':' before error output
All prints except disk are preceded by a colon
Signed-off-by: weidong <weidongkl@sina.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in
index dc326d3..13078c2 100755
--- a/util/ras-mc-ctl.in
+++ b/util/ras-mc-ctl.in
@@ -1469,7 +1469,7 @@ sub errors
$out .= "\n";
}
if ($out ne "") {
- print "Disk errors\n$out\n";
+ print "Disk errors:\n$out\n";
} else {
print "No disk errors.\n\n";
}

View File

@ -0,0 +1,117 @@
commit 9c86f6255f67a8bae28cd46c54500fc16bfc7a30
Author: Yang Shi <shy828301@gmail.com>
Date: Mon Apr 4 16:34:05 2022 -0700
rasdaemon: use the new block_rq_error tracepoint
Since Linux 5.18-rc1 a new block tracepoint called block_rq_error is
available for tracing disk error events dedicatedly. Currently
rasdaemon is using block_rq_complete which also traces successful cases.
It incurs excessive tracing logs and somehow overhead since the event is
triggered quite often.
Use the new tracepoint for disk error reporting, and the new trace point
has the same format as block_rq_complete.
Signed-off-by: Yang Shi <shy828301@gmail.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
---
ras-events.c | 53 ++++++++++-------------------------------------------
ras-record.c | 2 +-
2 files changed, 11 insertions(+), 44 deletions(-)
--- rasdaemon-0.6.7.orig/ras-events.c 2024-05-14 11:05:40.020599541 -0400
+++ rasdaemon-0.6.7/ras-events.c 2024-05-14 11:06:38.831067957 -0400
@@ -27,6 +27,7 @@ * Foundation, Inc., 51 Franklin Street,
#include <sys/poll.h>
#include <signal.h>
#include <sys/signalfd.h>
+#include <linux/version.h>
#include "libtrace/kbuffer.h"
#include "libtrace/event-parse.h"
#include "ras-mc-handler.h"
@@ -229,7 +230,7 @@ if (rc < 0) {
#endif
#ifdef HAVE_DISKERROR
- rc |= __toggle_ras_mc_event(ras, "block", "block_rq_complete", enable);
+ rc |= __toggle_ras_mc_event(ras, "block", "block_rq_error", enable);
#endif
#ifdef HAVE_MEMORY_FAILURE
@@ -241,37 +242,6 @@ free_ras:
return rc;
}
-/*
- * Set kernel filter. libtrace doesn't provide an API for setting filters
- * in kernel, we have to implement it here.
- */
-static int filter_ras_mc_event(struct ras_events *ras, char *group, char *event,
- const char *filter_str)
-{
- int fd, rc;
- char fname[MAX_PATH + 1];
-
- snprintf(fname, sizeof(fname), "events/%s/%s/filter", group, event);
- fd = open_trace(ras, fname, O_RDWR | O_APPEND);
- if (fd < 0) {
- log(ALL, LOG_WARNING, "Can't open filter file\n");
- return errno;
- }
-
- rc = write(fd, filter_str ,strlen(filter_str));
- if (rc < 0) {
- log(ALL, LOG_WARNING, "Can't write to filter file\n");
- close(fd);
- return rc;
- }
- close(fd);
- if (!rc) {
- log(ALL, LOG_WARNING, "Nothing was written on filter file\n");
- return EIO;
- }
-
- return 0;
-}
/*
* Tracing read code
@@ -901,17 +871,14 @@ (void)open("/sys/kernel/debug/ras/daemon
#endif
#ifdef HAVE_DISKERROR
- rc = filter_ras_mc_event(ras, "block", "block_rq_complete", "error != 0");
- if (!rc) {
- rc = add_event_handler(ras, pevent, page_size, "block",
- "block_rq_complete", ras_diskerror_event_handler,
- NULL, DISKERROR_EVENT);
- if (!rc)
- num_events++;
- else
- log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
- "block", "block_rq_complete");
- }
+ rc = add_event_handler(ras, pevent, page_size, "block",
+ "block_rq_error", ras_diskerror_event_handler,
+ NULL, DISKERROR_EVENT);
+ if (!rc)
+ num_events++;
+ else
+ log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
+ "block", "block_rq_error");
#endif
#ifdef HAVE_MEMORY_FAILURE
--- rasdaemon-0.6.7.orig/ras-record.c 2024-05-14 11:07:24.573654494 -0400
+++ rasdaemon-0.6.7/ras-record.c 2024-05-14 11:07:07.626807674 -0400
@@ -456,7 +456,7 @@ return 0;
#endif
/*
- * Table and functions to handle block:block_rq_complete
+ * Table and functions to handle block:block_rq_error
*/
#ifdef HAVE_DISKERROR

View File

@ -1,6 +1,6 @@
Name: rasdaemon
Version: 0.6.7
Release: 10%{?dist}
Release: 11%{?dist}
Summary: Utility to receive RAS error tracings
License: GPL-2.0-only
URL: http://git.infradead.org/users/mchehab/rasdaemon.git
@ -33,6 +33,9 @@ Patch24: 1f74a59ee33b7448b00d7ba13d5ecd4918b9853c.patch
Patch25: 2d15882a0cbfce0b905039bebc811ac8311cd739.patch
Patch26: c785d309dcbdeb7ecd219975244f3944a8d047e9.patch
Patch27: b6a64416ab31b66ce92cabcc7fa1f3c5e9db2e87.patch
Patch28: 9c86f6255f67a8bae28cd46c54500fc16bfc7a30.patch
Patch29: 9bd84aef87978b806178a73ed33c39d6c442fc1f.patch
Patch30: 885e546add918457c453bd3f753ac7df90b39e36.patch
ExcludeArch: s390 s390x
BuildRequires: make
@ -95,6 +98,9 @@ an utility for reporting current error counts from the EDAC sysfs files.
%patch25 -p1
%patch26 -p1
%patch27 -p1
%patch28 -p1
%patch29 -p1
%patch30 -p1
# The tarball is locked in time the first time aclocal was ran and will keep
# requiring an older version of automake
@ -130,6 +136,9 @@ sed -i "s/^PAGE_CE_ACTION=.*/PAGE_CE_ACTION=account/" %{buildroot}/%{_sysconfdir
%{_sysconfdir}/sysconfig/rasdaemon
%changelog
* Wed May 08 2024 Aristeu Rozanski <aris@redhat.com> 0.6.7-11
- Fix excessive block messages [RHEL-8708]
* Wed Jan 10 2024 Aristeu Rozanski <aris@redhat.com> 0.6.7-10
- Update License string to use SPDX [RHELMISC-1262]