diff --git a/0001-ppc64-update-the-NR_CPUS-to-8192.patch b/0001-ppc64-update-the-NR_CPUS-to-8192.patch new file mode 100644 index 0000000..c5608f6 --- /dev/null +++ b/0001-ppc64-update-the-NR_CPUS-to-8192.patch @@ -0,0 +1,31 @@ +From ae52398a13fa9a238279114ed671c7c514c154ee Mon Sep 17 00:00:00 2001 +From: Sourabh Jain +Date: Mon, 9 May 2022 12:49:56 +0530 +Subject: [PATCH 01/18] ppc64: update the NR_CPUS to 8192 + +Since the kernel commit 2d8ae638bb86 ("powerpc: Make the NR_CPUS max 8192") +the NR_CPUS on Linux kernel ranges from 1-8192. So let's match NR_CPUS with +the max NR_CPUS count on the Linux kernel. + +Signed-off-by: Sourabh Jain +Signed-off-by: Lianbo Jiang +--- + defs.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/defs.h b/defs.h +index 1e8360d65a3b..a6735d07b32f 100644 +--- a/defs.h ++++ b/defs.h +@@ -136,7 +136,7 @@ + #define NR_CPUS (4096) + #endif + #ifdef PPC64 +-#define NR_CPUS (2048) ++#define NR_CPUS (8192) + #endif + #ifdef S390 + #define NR_CPUS (512) +-- +2.30.2 + diff --git a/0002-sbitmapq-remove-struct-and-member-validation-in-sbit.patch b/0002-sbitmapq-remove-struct-and-member-validation-in-sbit.patch new file mode 100644 index 0000000..5b4fc47 --- /dev/null +++ b/0002-sbitmapq-remove-struct-and-member-validation-in-sbit.patch @@ -0,0 +1,62 @@ +From 364b2e413c69daf189d2bc0238e3ba9b0dcbd937 Mon Sep 17 00:00:00 2001 +From: Lianbo Jiang +Date: Mon, 23 May 2022 18:04:13 +0800 +Subject: [PATCH 02/18] sbitmapq: remove struct and member validation in + sbitmapq_init() + +Let's remove the struct and member validation from sbitmapq_init(), which +will help the crash to display the actual error when the sbitmapq fails. + +Without the patch: + crash> sbitmapq ffff8e99d0dc8010 + sbitmapq: command not supported or applicable on this architecture or kernel + +With the patch: + crash> sbitmapq ffff8e99d0dc8010 + + sbitmapq: invalid structure member offset: sbitmap_queue_alloc_hint + FILE: sbitmap.c LINE: 365 FUNCTION: sbitmap_queue_context_load() + +Signed-off-by: Lianbo Jiang +--- + sbitmap.c | 24 ------------------------ + 1 file changed, 24 deletions(-) + +diff --git a/sbitmap.c b/sbitmap.c +index 96a61e6c2c71..7693eef6cebd 100644 +--- a/sbitmap.c ++++ b/sbitmap.c +@@ -525,30 +525,6 @@ void sbitmapq_init(void) + MEMBER_OFFSET_INIT(sbq_wait_state_wait_cnt, "sbq_wait_state", "wait_cnt"); + MEMBER_OFFSET_INIT(sbq_wait_state_wait, "sbq_wait_state", "wait"); + +- if (!VALID_SIZE(sbitmap_word) || +- !VALID_SIZE(sbitmap) || +- !VALID_SIZE(sbitmap_queue) || +- !VALID_SIZE(sbq_wait_state) || +- INVALID_MEMBER(sbitmap_word_depth) || +- INVALID_MEMBER(sbitmap_word_word) || +- INVALID_MEMBER(sbitmap_word_cleared) || +- INVALID_MEMBER(sbitmap_depth) || +- INVALID_MEMBER(sbitmap_shift) || +- INVALID_MEMBER(sbitmap_map_nr) || +- INVALID_MEMBER(sbitmap_map) || +- INVALID_MEMBER(sbitmap_queue_sb) || +- INVALID_MEMBER(sbitmap_queue_alloc_hint) || +- INVALID_MEMBER(sbitmap_queue_wake_batch) || +- INVALID_MEMBER(sbitmap_queue_wake_index) || +- INVALID_MEMBER(sbitmap_queue_ws) || +- INVALID_MEMBER(sbitmap_queue_ws_active) || +- INVALID_MEMBER(sbitmap_queue_round_robin) || +- INVALID_MEMBER(sbitmap_queue_min_shallow_depth) || +- INVALID_MEMBER(sbq_wait_state_wait_cnt) || +- INVALID_MEMBER(sbq_wait_state_wait)) { +- command_not_supported(); +- } +- + sb_flags |= SB_FLAG_INIT; + } + +-- +2.30.2 + diff --git a/0003-sbitmapq-fix-invalid-offset-for-sbitmap_queue_alloc_.patch b/0003-sbitmapq-fix-invalid-offset-for-sbitmap_queue_alloc_.patch new file mode 100644 index 0000000..eb1d17f --- /dev/null +++ b/0003-sbitmapq-fix-invalid-offset-for-sbitmap_queue_alloc_.patch @@ -0,0 +1,118 @@ +From a295cb40cd5d24fb5995cc78d29c5def3843d285 Mon Sep 17 00:00:00 2001 +From: Lianbo Jiang +Date: Mon, 23 May 2022 18:04:14 +0800 +Subject: [PATCH 03/18] sbitmapq: fix invalid offset for + "sbitmap_queue_alloc_hint" on Linux v5.13-rc1 + +Kernel commit c548e62bcf6a ("scsi: sbitmap: Move allocation hint +into sbitmap") moved the alloc_hint member from struct sbitmap_queue +to struct sbitmap. Without the patch, the sbitmapq will fail: + + crash> sbitmapq 0xffff8e99d0dc8010 + + sbitmapq: invalid structure member offset: sbitmap_queue_alloc_hint + FILE: sbitmap.c LINE: 365 FUNCTION: sbitmap_queue_context_load() + +Signed-off-by: Lianbo Jiang +--- + defs.h | 2 ++ + sbitmap.c | 14 ++++++++++++-- + symbols.c | 2 ++ + 3 files changed, 16 insertions(+), 2 deletions(-) + +diff --git a/defs.h b/defs.h +index a6735d07b32f..0aeb98c4f654 100644 +--- a/defs.h ++++ b/defs.h +@@ -2168,6 +2168,7 @@ struct offset_table { /* stash of commonly-used offsets */ + long sbitmap_queue_min_shallow_depth; + long sbq_wait_state_wait_cnt; + long sbq_wait_state_wait; ++ long sbitmap_alloc_hint; + }; + + struct size_table { /* stash of commonly-used sizes */ +@@ -5907,6 +5908,7 @@ struct sbitmap_context { + unsigned shift; + unsigned map_nr; + ulong map_addr; ++ ulong alloc_hint; + }; + + typedef bool (*sbitmap_for_each_fn)(unsigned int idx, void *p); +diff --git a/sbitmap.c b/sbitmap.c +index 7693eef6cebd..2921d5447c65 100644 +--- a/sbitmap.c ++++ b/sbitmap.c +@@ -285,6 +285,7 @@ void sbitmap_for_each_set(const struct sbitmap_context *sc, + static void sbitmap_queue_show(const struct sbitmap_queue_context *sqc, + const struct sbitmap_context *sc) + { ++ ulong alloc_hint_addr = 0; + int cpus = get_cpus_possible(); + int sbq_wait_state_size, wait_cnt_off, wait_off, list_head_off; + char *sbq_wait_state_buf; +@@ -297,6 +298,11 @@ static void sbitmap_queue_show(const struct sbitmap_queue_context *sqc, + fprintf(fp, "bits_per_word = %u\n", 1U << sc->shift); + fprintf(fp, "map_nr = %u\n", sc->map_nr); + ++ if (VALID_MEMBER(sbitmap_queue_alloc_hint)) ++ alloc_hint_addr = sqc->alloc_hint; ++ else if (VALID_MEMBER(sbitmap_alloc_hint)) /* 5.13 and later */ ++ alloc_hint_addr = sc->alloc_hint; ++ + fputs("alloc_hint = {", fp); + first = true; + for (i = 0; i < cpus; i++) { +@@ -307,7 +313,7 @@ static void sbitmap_queue_show(const struct sbitmap_queue_context *sqc, + fprintf(fp, ", "); + first = false; + +- ptr = kt->__per_cpu_offset[i] + sqc->alloc_hint; ++ ptr = kt->__per_cpu_offset[i] + alloc_hint_addr; + readmem(ptr, KVADDR, &val, sizeof(val), "alloc_hint", FAULT_ON_ERROR); + + fprintf(fp, "%u", val); +@@ -362,7 +368,8 @@ static void sbitmap_queue_context_load(ulong addr, struct sbitmap_queue_context + error(FATAL, "cannot read sbitmap_queue\n"); + } + +- sqc->alloc_hint = ULONG(sbitmap_queue_buf + OFFSET(sbitmap_queue_alloc_hint)); ++ if (VALID_MEMBER(sbitmap_queue_alloc_hint)) ++ sqc->alloc_hint = ULONG(sbitmap_queue_buf + OFFSET(sbitmap_queue_alloc_hint)); + sqc->wake_batch = UINT(sbitmap_queue_buf + OFFSET(sbitmap_queue_wake_batch)); + sqc->wake_index = INT(sbitmap_queue_buf + OFFSET(sbitmap_queue_wake_index)); + sqc->ws_addr = ULONG(sbitmap_queue_buf + OFFSET(sbitmap_queue_ws)); +@@ -387,6 +394,8 @@ void sbitmap_context_load(ulong addr, struct sbitmap_context *sc) + sc->shift = UINT(sbitmap_buf + OFFSET(sbitmap_shift)); + sc->map_nr = UINT(sbitmap_buf + OFFSET(sbitmap_map_nr)); + sc->map_addr = ULONG(sbitmap_buf + OFFSET(sbitmap_map)); ++ if (VALID_MEMBER(sbitmap_alloc_hint)) ++ sc->alloc_hint = ULONG(sbitmap_buf + OFFSET(sbitmap_alloc_hint)); + + FREEBUF(sbitmap_buf); + } +@@ -512,6 +521,7 @@ void sbitmapq_init(void) + MEMBER_OFFSET_INIT(sbitmap_shift, "sbitmap", "shift"); + MEMBER_OFFSET_INIT(sbitmap_map_nr, "sbitmap", "map_nr"); + MEMBER_OFFSET_INIT(sbitmap_map, "sbitmap", "map"); ++ MEMBER_OFFSET_INIT(sbitmap_alloc_hint, "sbitmap", "alloc_hint"); + + MEMBER_OFFSET_INIT(sbitmap_queue_sb, "sbitmap_queue", "sb"); + MEMBER_OFFSET_INIT(sbitmap_queue_alloc_hint, "sbitmap_queue", "alloc_hint"); +diff --git a/symbols.c b/symbols.c +index ba5e2741347d..fd0eb06899f0 100644 +--- a/symbols.c ++++ b/symbols.c +@@ -10708,6 +10708,8 @@ dump_offset_table(char *spec, ulong makestruct) + OFFSET(sbitmap_map_nr)); + fprintf(fp, " sbitmap_map: %ld\n", + OFFSET(sbitmap_map)); ++ fprintf(fp, " sbitmap_alloc_hint: %ld\n", ++ OFFSET(sbitmap_alloc_hint)); + fprintf(fp, " sbitmap_queue_sb: %ld\n", + OFFSET(sbitmap_queue_sb)); + fprintf(fp, " sbitmap_queue_alloc_hint: %ld\n", +-- +2.30.2 + diff --git a/0004-sbitmapq-fix-invalid-offset-for-sbitmap_queue_round_.patch b/0004-sbitmapq-fix-invalid-offset-for-sbitmap_queue_round_.patch new file mode 100644 index 0000000..1a2b611 --- /dev/null +++ b/0004-sbitmapq-fix-invalid-offset-for-sbitmap_queue_round_.patch @@ -0,0 +1,103 @@ +From 530fe6ad7e4d7ff6254596c1219d25ed929e3867 Mon Sep 17 00:00:00 2001 +From: Lianbo Jiang +Date: Mon, 23 May 2022 18:04:15 +0800 +Subject: [PATCH 04/18] sbitmapq: fix invalid offset for + "sbitmap_queue_round_robin" on Linux v5.13-rc1 + +Kernel commit efe1f3a1d583 ("scsi: sbitmap: Maintain allocation +round_robin in sbitmap") moved the round_robin member from struct +sbitmap_queue to struct sbitmap. Without the patch, the sbitmapq +will fail: + + crash> sbitmapq 0xffff8e99d0dc8010 + + sbitmapq: invalid structure member offset: sbitmap_queue_round_robin + FILE: sbitmap.c LINE: 378 FUNCTION: sbitmap_queue_context_load() + +Signed-off-by: Lianbo Jiang +--- + defs.h | 2 ++ + sbitmap.c | 12 ++++++++++-- + symbols.c | 2 ++ + 3 files changed, 14 insertions(+), 2 deletions(-) + +diff --git a/defs.h b/defs.h +index 0aeb98c4f654..ecbced24d2e3 100644 +--- a/defs.h ++++ b/defs.h +@@ -2169,6 +2169,7 @@ struct offset_table { /* stash of commonly-used offsets */ + long sbq_wait_state_wait_cnt; + long sbq_wait_state_wait; + long sbitmap_alloc_hint; ++ long sbitmap_round_robin; + }; + + struct size_table { /* stash of commonly-used sizes */ +@@ -5909,6 +5910,7 @@ struct sbitmap_context { + unsigned map_nr; + ulong map_addr; + ulong alloc_hint; ++ bool round_robin; + }; + + typedef bool (*sbitmap_for_each_fn)(unsigned int idx, void *p); +diff --git a/sbitmap.c b/sbitmap.c +index 2921d5447c65..7b318b533702 100644 +--- a/sbitmap.c ++++ b/sbitmap.c +@@ -352,7 +352,11 @@ static void sbitmap_queue_show(const struct sbitmap_queue_context *sqc, + + FREEBUF(sbq_wait_state_buf); + +- fprintf(fp, "round_robin = %d\n", sqc->round_robin); ++ if (VALID_MEMBER(sbitmap_queue_round_robin)) ++ fprintf(fp, "round_robin = %d\n", sqc->round_robin); ++ else if (VALID_MEMBER(sbitmap_round_robin)) /* 5.13 and later */ ++ fprintf(fp, "round_robin = %d\n", sc->round_robin); ++ + fprintf(fp, "min_shallow_depth = %u\n", sqc->min_shallow_depth); + } + +@@ -374,7 +378,8 @@ static void sbitmap_queue_context_load(ulong addr, struct sbitmap_queue_context + sqc->wake_index = INT(sbitmap_queue_buf + OFFSET(sbitmap_queue_wake_index)); + sqc->ws_addr = ULONG(sbitmap_queue_buf + OFFSET(sbitmap_queue_ws)); + sqc->ws_active = INT(sbitmap_queue_buf + OFFSET(sbitmap_queue_ws_active)); +- sqc->round_robin = BOOL(sbitmap_queue_buf + OFFSET(sbitmap_queue_round_robin)); ++ if (VALID_MEMBER(sbitmap_queue_round_robin)) ++ sqc->round_robin = BOOL(sbitmap_queue_buf + OFFSET(sbitmap_queue_round_robin)); + sqc->min_shallow_depth = UINT(sbitmap_queue_buf + OFFSET(sbitmap_queue_min_shallow_depth)); + + FREEBUF(sbitmap_queue_buf); +@@ -396,6 +401,8 @@ void sbitmap_context_load(ulong addr, struct sbitmap_context *sc) + sc->map_addr = ULONG(sbitmap_buf + OFFSET(sbitmap_map)); + if (VALID_MEMBER(sbitmap_alloc_hint)) + sc->alloc_hint = ULONG(sbitmap_buf + OFFSET(sbitmap_alloc_hint)); ++ if (VALID_MEMBER(sbitmap_round_robin)) ++ sc->round_robin = BOOL(sbitmap_buf + OFFSET(sbitmap_round_robin)); + + FREEBUF(sbitmap_buf); + } +@@ -522,6 +529,7 @@ void sbitmapq_init(void) + MEMBER_OFFSET_INIT(sbitmap_map_nr, "sbitmap", "map_nr"); + MEMBER_OFFSET_INIT(sbitmap_map, "sbitmap", "map"); + MEMBER_OFFSET_INIT(sbitmap_alloc_hint, "sbitmap", "alloc_hint"); ++ MEMBER_OFFSET_INIT(sbitmap_round_robin, "sbitmap", "round_robin"); + + MEMBER_OFFSET_INIT(sbitmap_queue_sb, "sbitmap_queue", "sb"); + MEMBER_OFFSET_INIT(sbitmap_queue_alloc_hint, "sbitmap_queue", "alloc_hint"); +diff --git a/symbols.c b/symbols.c +index fd0eb06899f0..5d12a021c769 100644 +--- a/symbols.c ++++ b/symbols.c +@@ -10710,6 +10710,8 @@ dump_offset_table(char *spec, ulong makestruct) + OFFSET(sbitmap_map)); + fprintf(fp, " sbitmap_alloc_hint: %ld\n", + OFFSET(sbitmap_alloc_hint)); ++ fprintf(fp, " sbitmap_round_robin: %ld\n", ++ OFFSET(sbitmap_round_robin)); + fprintf(fp, " sbitmap_queue_sb: %ld\n", + OFFSET(sbitmap_queue_sb)); + fprintf(fp, " sbitmap_queue_alloc_hint: %ld\n", +-- +2.30.2 + diff --git a/0005-sbitmapq-fix-invalid-offset-for-sbitmap_word_depth-o.patch b/0005-sbitmapq-fix-invalid-offset-for-sbitmap_word_depth-o.patch new file mode 100644 index 0000000..838fb5b --- /dev/null +++ b/0005-sbitmapq-fix-invalid-offset-for-sbitmap_word_depth-o.patch @@ -0,0 +1,101 @@ +From 3750803f6ae5f5ad071f86ca916dbbb17b7a83a5 Mon Sep 17 00:00:00 2001 +From: Lianbo Jiang +Date: Mon, 23 May 2022 18:04:16 +0800 +Subject: [PATCH 05/18] sbitmapq: fix invalid offset for "sbitmap_word_depth" + on Linux v5.18-rc1 + +Kernel commit 3301bc53358a ("lib/sbitmap: kill 'depth' from sbitmap_word") +removed the depth member from struct sbitmap_word. Without the patch, the +sbitmapq will fail: + + crash> sbitmapq 0xffff8e99d0dc8010 + + sbitmapq: invalid structure member offset: sbitmap_word_depth + FILE: sbitmap.c LINE: 84 FUNCTION: __sbitmap_weight() + +Signed-off-by: Lianbo Jiang +--- + sbitmap.c | 19 +++++++++++-------- + 1 file changed, 11 insertions(+), 8 deletions(-) + +diff --git a/sbitmap.c b/sbitmap.c +index 7b318b533702..e8ebd62fe01c 100644 +--- a/sbitmap.c ++++ b/sbitmap.c +@@ -78,10 +78,16 @@ static unsigned long bitmap_weight(unsigned long bitmap, unsigned int bits) + return w; + } + ++static inline unsigned int __map_depth(const struct sbitmap_context *sc, int index) ++{ ++ if (index == sc->map_nr - 1) ++ return sc->depth - (index << sc->shift); ++ return 1U << sc->shift; ++} ++ + static unsigned int __sbitmap_weight(const struct sbitmap_context *sc, bool set) + { + const ulong sbitmap_word_size = SIZE(sbitmap_word); +- const ulong w_depth_off = OFFSET(sbitmap_word_depth); + const ulong w_word_off = OFFSET(sbitmap_word_word); + const ulong w_cleared_off = OFFSET(sbitmap_word_cleared); + +@@ -99,7 +105,7 @@ static unsigned int __sbitmap_weight(const struct sbitmap_context *sc, bool set) + error(FATAL, "cannot read sbitmap_word\n"); + } + +- depth = ULONG(sbitmap_word_buf + w_depth_off); ++ depth = __map_depth(sc, i); + + if (set) { + word = ULONG(sbitmap_word_buf + w_word_off); +@@ -142,7 +148,6 @@ static void sbitmap_emit_byte(unsigned int offset, uint8_t byte) + static void sbitmap_bitmap_show(const struct sbitmap_context *sc) + { + const ulong sbitmap_word_size = SIZE(sbitmap_word); +- const ulong w_depth_off = OFFSET(sbitmap_word_depth); + const ulong w_word_off = OFFSET(sbitmap_word_word); + const ulong w_cleared_off = OFFSET(sbitmap_word_cleared); + +@@ -165,7 +170,7 @@ static void sbitmap_bitmap_show(const struct sbitmap_context *sc) + + word = ULONG(sbitmap_word_buf + w_word_off); + cleared = ULONG(sbitmap_word_buf + w_cleared_off); +- word_bits = ULONG(sbitmap_word_buf + w_depth_off); ++ word_bits = __map_depth(sc, i); + + word &= ~cleared; + +@@ -213,7 +218,6 @@ static void __sbitmap_for_each_set(const struct sbitmap_context *sc, + unsigned int start, sbitmap_for_each_fn fn, void *data) + { + const ulong sbitmap_word_size = SIZE(sbitmap_word); +- const ulong w_depth_off = OFFSET(sbitmap_word_depth); + const ulong w_word_off = OFFSET(sbitmap_word_word); + const ulong w_cleared_off = OFFSET(sbitmap_word_cleared); + +@@ -232,7 +236,7 @@ static void __sbitmap_for_each_set(const struct sbitmap_context *sc, + + while (scanned < sc->depth) { + unsigned long w_addr = sc->map_addr + (sbitmap_word_size * index); +- unsigned long w_depth, w_word, w_cleared; ++ unsigned long w_word, w_cleared; + unsigned long word, depth; + + if (!readmem(w_addr, KVADDR, sbitmap_word_buf, sbitmap_word_size, "sbitmap_word", RETURN_ON_ERROR)) { +@@ -240,11 +244,10 @@ static void __sbitmap_for_each_set(const struct sbitmap_context *sc, + error(FATAL, "cannot read sbitmap_word\n"); + } + +- w_depth = ULONG(sbitmap_word_buf + w_depth_off); + w_word = ULONG(sbitmap_word_buf + w_word_off); + w_cleared = ULONG(sbitmap_word_buf + w_cleared_off); + +- depth = min(w_depth - nr, sc->depth - scanned); ++ depth = min(__map_depth(sc, index) - nr, sc->depth - scanned); + + scanned += depth; + word = w_word & ~w_cleared; +-- +2.30.2 + diff --git a/0007-bt-x86_64-filter-out-idle-task-stack.patch b/0007-bt-x86_64-filter-out-idle-task-stack.patch new file mode 100644 index 0000000..ebbc530 --- /dev/null +++ b/0007-bt-x86_64-filter-out-idle-task-stack.patch @@ -0,0 +1,205 @@ +From 6833262bf87177d8affe4f91b2e7d2c76ecdf636 Mon Sep 17 00:00:00 2001 +From: Qi Zheng +Date: Tue, 24 May 2022 20:25:53 +0800 +Subject: [PATCH 07/18] bt: x86_64: filter out idle task stack + +When we use crash to troubleshoot softlockup and other problems, +we often use the 'bt -a' command to print the stacks of running +processes on all CPUs. But now some servers have hundreds of CPUs +(such as AMD machines), which causes the 'bt -a' command to output +a lot of process stacks. And many of these stacks are the stacks +of the idle process, which are not needed by us. + +Therefore, in order to reduce this part of the interference information, +this patch adds the -n option to the bt command. When we specify +'-n idle' (meaning no idle), the stack of the idle process will be +filtered out, thus speeding up our troubleshooting. + +And the option works only for crash dumps captured by kdump. + +The command output is as follows: +crash> bt -a -n idle +[...] +PID: 0 TASK: ffff889ff8c34380 CPU: 8 COMMAND: "swapper/8" + +PID: 0 TASK: ffff889ff8c32d00 CPU: 9 COMMAND: "swapper/9" + +PID: 0 TASK: ffff889ff8c31680 CPU: 10 COMMAND: "swapper/10" + +PID: 0 TASK: ffff889ff8c35a00 CPU: 11 COMMAND: "swapper/11" + +PID: 0 TASK: ffff889ff8c3c380 CPU: 12 COMMAND: "swapper/12" + +PID: 150773 TASK: ffff889fe85a1680 CPU: 13 COMMAND: "bash" + #0 [ffffc9000d35bcd0] machine_kexec at ffffffff8105a407 + #1 [ffffc9000d35bd28] __crash_kexec at ffffffff8113033d + #2 [ffffc9000d35bdf0] panic at ffffffff81081930 + #3 [ffffc9000d35be70] sysrq_handle_crash at ffffffff814e38d1 + #4 [ffffc9000d35be78] __handle_sysrq.cold.12 at ffffffff814e4175 + #5 [ffffc9000d35bea8] write_sysrq_trigger at ffffffff814e404b + #6 [ffffc9000d35beb8] proc_reg_write at ffffffff81330d86 + #7 [ffffc9000d35bed0] vfs_write at ffffffff812a72d5 + #8 [ffffc9000d35bf00] ksys_write at ffffffff812a7579 + #9 [ffffc9000d35bf38] do_syscall_64 at ffffffff81004259 + RIP: 00007fa7abcdc274 RSP: 00007fffa731f678 RFLAGS: 00000246 + RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007fa7abcdc274 + RDX: 0000000000000002 RSI: 0000563ca51ee6d0 RDI: 0000000000000001 + RBP: 0000563ca51ee6d0 R8: 000000000000000a R9: 00007fa7abd6be80 + R10: 000000000000000a R11: 0000000000000246 R12: 00007fa7abdad760 + R13: 0000000000000002 R14: 00007fa7abda8760 R15: 0000000000000002 + ORIG_RAX: 0000000000000001 CS: 0033 SS: 002b +[...] + +Signed-off-by: Qi Zheng +Acked-by: Kazuhito Hagio +Acked-by: Lianbo Jiang +Signed-off-by: Lianbo Jiang +--- + defs.h | 1 + + help.c | 33 ++++++++++++++++++++++++++++++++- + kernel.c | 13 ++++++++++++- + x86_64.c | 8 ++++++++ + 4 files changed, 53 insertions(+), 2 deletions(-) + +diff --git a/defs.h b/defs.h +index ecbced24d2e3..c8444b4e54eb 100644 +--- a/defs.h ++++ b/defs.h +@@ -5832,6 +5832,7 @@ ulong cpu_map_addr(const char *type); + #define BT_SHOW_ALL_REGS (0x2000000000000ULL) + #define BT_REGS_NOT_FOUND (0x4000000000000ULL) + #define BT_OVERFLOW_STACK (0x8000000000000ULL) ++#define BT_SKIP_IDLE (0x10000000000000ULL) + #define BT_SYMBOL_OFFSET (BT_SYMBOLIC_ARGS) + + #define BT_REF_HEXVAL (0x1) +diff --git a/help.c b/help.c +index 51a0fe3d687c..e1bbc5abe029 100644 +--- a/help.c ++++ b/help.c +@@ -1909,12 +1909,14 @@ char *help_bt[] = { + "bt", + "backtrace", + "[-a|-c cpu(s)|-g|-r|-t|-T|-l|-e|-E|-f|-F|-o|-O|-v|-p] [-R ref] [-s [-x|d]]" +-"\n [-I ip] [-S sp] [pid | task]", ++"\n [-I ip] [-S sp] [-n idle] [pid | task]", + " Display a kernel stack backtrace. If no arguments are given, the stack", + " trace of the current context will be displayed.\n", + " -a displays the stack traces of the active task on each CPU.", + " (only applicable to crash dumps)", + " -A same as -a, but also displays vector registers (S390X only).", ++" -n idle filter the stack of idle tasks (x86_64).", ++" (only applicable to crash dumps)", + " -p display the stack trace of the panic task only.", + " (only applicable to crash dumps)", + " -c cpu display the stack trace of the active task on one or more CPUs,", +@@ -2004,6 +2006,35 @@ char *help_bt[] = { + " DS: 002b ESI: bfffc8a0 ES: 002b EDI: 00000000 ", + " SS: 002b ESP: bfffc82c EBP: bfffd224 ", + " CS: 0023 EIP: 400d032e ERR: 0000008e EFLAGS: 00000246 ", ++" ", ++" Display the stack trace of the active task(s) when the kernel panicked,", ++" and filter out the stack of the idle tasks:", ++" ", ++" %s> bt -a -n idle", ++" ...", ++" PID: 0 TASK: ffff889ff8c35a00 CPU: 11 COMMAND: \"swapper/11\"", ++" ", ++" PID: 0 TASK: ffff889ff8c3c380 CPU: 12 COMMAND: \"swapper/12\"", ++" ", ++" PID: 150773 TASK: ffff889fe85a1680 CPU: 13 COMMAND: \"bash\"", ++" #0 [ffffc9000d35bcd0] machine_kexec at ffffffff8105a407", ++" #1 [ffffc9000d35bd28] __crash_kexec at ffffffff8113033d", ++" #2 [ffffc9000d35bdf0] panic at ffffffff81081930", ++" #3 [ffffc9000d35be70] sysrq_handle_crash at ffffffff814e38d1", ++" #4 [ffffc9000d35be78] __handle_sysrq.cold.12 at ffffffff814e4175", ++" #5 [ffffc9000d35bea8] write_sysrq_trigger at ffffffff814e404b", ++" #6 [ffffc9000d35beb8] proc_reg_write at ffffffff81330d86", ++" #7 [ffffc9000d35bed0] vfs_write at ffffffff812a72d5", ++" #8 [ffffc9000d35bf00] ksys_write at ffffffff812a7579", ++" #9 [ffffc9000d35bf38] do_syscall_64 at ffffffff81004259", ++" RIP: 00007fa7abcdc274 RSP: 00007fffa731f678 RFLAGS: 00000246", ++" RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007fa7abcdc274", ++" RDX: 0000000000000002 RSI: 0000563ca51ee6d0 RDI: 0000000000000001", ++" RBP: 0000563ca51ee6d0 R8: 000000000000000a R9: 00007fa7abd6be80", ++" R10: 000000000000000a R11: 0000000000000246 R12: 00007fa7abdad760", ++" R13: 0000000000000002 R14: 00007fa7abda8760 R15: 0000000000000002", ++" ORIG_RAX: 0000000000000001 CS: 0033 SS: 002b", ++" ...", + "\n Display the stack trace of the active task on CPU 0 and 1:\n", + " %s> bt -c 0,1", + " PID: 0 TASK: ffffffff81a8d020 CPU: 0 COMMAND: \"swapper\"", +diff --git a/kernel.c b/kernel.c +index d0921cf567d9..411e9da1e54f 100644 +--- a/kernel.c ++++ b/kernel.c +@@ -2503,7 +2503,7 @@ cmd_bt(void) + if (kt->flags & USE_OPT_BT) + bt->flags |= BT_OPT_BACK_TRACE; + +- while ((c = getopt(argcnt, args, "D:fFI:S:c:aAloreEgstTdxR:Ovp")) != EOF) { ++ while ((c = getopt(argcnt, args, "D:fFI:S:c:n:aAloreEgstTdxR:Ovp")) != EOF) { + switch (c) + { + case 'f': +@@ -2672,6 +2672,13 @@ cmd_bt(void) + active++; + break; + ++ case 'n': ++ if (machine_type("X86_64") && STREQ(optarg, "idle")) ++ bt->flags |= BT_SKIP_IDLE; ++ else ++ option_not_supported(c); ++ break; ++ + case 'r': + bt->flags |= BT_RAW; + break; +@@ -3092,6 +3099,10 @@ back_trace(struct bt_info *bt) + } else + machdep->get_stack_frame(bt, &eip, &esp); + ++ /* skip idle task stack */ ++ if (bt->flags & BT_SKIP_IDLE) ++ return; ++ + if (bt->flags & BT_KSTACKP) { + bt->stkptr = esp; + return; +diff --git a/x86_64.c b/x86_64.c +index ecaefd2f46a8..cfafbcc4dabe 100644 +--- a/x86_64.c ++++ b/x86_64.c +@@ -4918,6 +4918,9 @@ x86_64_get_stack_frame(struct bt_info *bt, ulong *pcp, ulong *spp) + if (bt->flags & BT_DUMPFILE_SEARCH) + return x86_64_get_dumpfile_stack_frame(bt, pcp, spp); + ++ if (bt->flags & BT_SKIP_IDLE) ++ bt->flags &= ~BT_SKIP_IDLE; ++ + if (pcp) + *pcp = x86_64_get_pc(bt); + if (spp) +@@ -4960,6 +4963,9 @@ x86_64_get_dumpfile_stack_frame(struct bt_info *bt_in, ulong *rip, ulong *rsp) + estack = -1; + panic = FALSE; + ++ if (bt_in->flags & BT_SKIP_IDLE) ++ bt_in->flags &= ~BT_SKIP_IDLE; ++ + panic_task = tt->panic_task == bt->task ? TRUE : FALSE; + + if (panic_task && bt->machdep) { +@@ -5098,6 +5104,8 @@ next_sysrq: + if (!panic_task && STREQ(sym, "crash_nmi_callback")) { + *rip = *up; + *rsp = bt->stackbase + ((char *)(up) - bt->stackbuf); ++ if ((bt->flags & BT_SKIP_IDLE) && is_idle_thread(bt->task)) ++ bt_in->flags |= BT_SKIP_IDLE; + return; + } + +-- +2.30.2 + diff --git a/0008-bt-arm64-add-support-for-bt-n-idle.patch b/0008-bt-arm64-add-support-for-bt-n-idle.patch new file mode 100644 index 0000000..4712ac4 --- /dev/null +++ b/0008-bt-arm64-add-support-for-bt-n-idle.patch @@ -0,0 +1,96 @@ +From 0f162febebc4d11a165dd40cee00f3b0ba691a52 Mon Sep 17 00:00:00 2001 +From: Qi Zheng +Date: Tue, 24 May 2022 20:25:54 +0800 +Subject: [PATCH 08/18] bt: arm64: add support for 'bt -n idle' + +The '-n idle' option of bt command can help us filter the +stack of the idle process when debugging the dumpfiles +captured by kdump. + +This patch supports this feature on ARM64. + +Signed-off-by: Qi Zheng +Signed-off-by: Lianbo Jiang +--- + arm64.c | 19 ++++++++++++++++--- + help.c | 2 +- + kernel.c | 3 ++- + 3 files changed, 19 insertions(+), 5 deletions(-) + +diff --git a/arm64.c b/arm64.c +index 65f6cdf69fa6..0f615cf52bef 100644 +--- a/arm64.c ++++ b/arm64.c +@@ -3681,6 +3681,12 @@ arm64_get_dumpfile_stackframe(struct bt_info *bt, struct arm64_stackframe *frame + { + struct machine_specific *ms = machdep->machspec; + struct arm64_pt_regs *ptregs; ++ bool skip = false; ++ ++ if (bt->flags & BT_SKIP_IDLE) { ++ skip = true; ++ bt->flags &= ~BT_SKIP_IDLE; ++ } + + if (!ms->panic_task_regs || + (!ms->panic_task_regs[bt->tc->processor].sp && +@@ -3713,8 +3719,11 @@ try_kernel: + } + + if (arm64_in_kdump_text(bt, frame) || +- arm64_in_kdump_text_on_irq_stack(bt)) ++ arm64_in_kdump_text_on_irq_stack(bt)) { + bt->flags |= BT_KDUMP_ADJUST; ++ if (skip && is_idle_thread(bt->task)) ++ bt->flags |= BT_SKIP_IDLE; ++ } + + return TRUE; + } +@@ -3738,10 +3747,14 @@ arm64_get_stack_frame(struct bt_info *bt, ulong *pcp, ulong *spp) + int ret; + struct arm64_stackframe stackframe = { 0 }; + +- if (DUMPFILE() && is_task_active(bt->task)) ++ if (DUMPFILE() && is_task_active(bt->task)) { + ret = arm64_get_dumpfile_stackframe(bt, &stackframe); +- else ++ } else { ++ if (bt->flags & BT_SKIP_IDLE) ++ bt->flags &= ~BT_SKIP_IDLE; ++ + ret = arm64_get_stackframe(bt, &stackframe); ++ } + + if (!ret) + error(WARNING, +diff --git a/help.c b/help.c +index e1bbc5abe029..99214c1590fa 100644 +--- a/help.c ++++ b/help.c +@@ -1915,7 +1915,7 @@ char *help_bt[] = { + " -a displays the stack traces of the active task on each CPU.", + " (only applicable to crash dumps)", + " -A same as -a, but also displays vector registers (S390X only).", +-" -n idle filter the stack of idle tasks (x86_64).", ++" -n idle filter the stack of idle tasks (x86_64, arm64).", + " (only applicable to crash dumps)", + " -p display the stack trace of the panic task only.", + " (only applicable to crash dumps)", +diff --git a/kernel.c b/kernel.c +index 411e9da1e54f..a521ef30cdb0 100644 +--- a/kernel.c ++++ b/kernel.c +@@ -2673,7 +2673,8 @@ cmd_bt(void) + break; + + case 'n': +- if (machine_type("X86_64") && STREQ(optarg, "idle")) ++ if ((machine_type("X86_64") || machine_type("ARM64")) && ++ STREQ(optarg, "idle")) + bt->flags |= BT_SKIP_IDLE; + else + option_not_supported(c); +-- +2.30.2 + diff --git a/0010-Enhance-dev-d-D-options-to-support-blk-mq-sbitmap.patch b/0010-Enhance-dev-d-D-options-to-support-blk-mq-sbitmap.patch new file mode 100644 index 0000000..6e8b008 --- /dev/null +++ b/0010-Enhance-dev-d-D-options-to-support-blk-mq-sbitmap.patch @@ -0,0 +1,392 @@ +From 7095c8fd029e3a33117e3b67de73f504686ebfe2 Mon Sep 17 00:00:00 2001 +From: Lianbo Jiang +Date: Thu, 2 Jun 2022 20:12:55 +0800 +Subject: [PATCH 10/18] Enhance "dev -d|-D" options to support blk-mq sbitmap + +Since Linux 5.16-rc1, which kernel commit 9a14d6ce4135 ("block: remove +debugfs blk_mq_ctx dispatched/merged/completed attributes") removed the +members from struct blk_mq_ctx, crash has not displayed disk I/O statistics +for multiqueue (blk-mq) devices. + +Let's parse the sbitmap in blk-mq layer to support it. + +Signed-off-by: Lianbo Jiang +Signed-off-by: Kazuhito Hagio +--- + defs.h | 11 +++ + dev.c | 244 +++++++++++++++++++++++++++++++++++++++++++++--------- + symbols.c | 22 +++++ + 3 files changed, 238 insertions(+), 39 deletions(-) + +diff --git a/defs.h b/defs.h +index c8444b4e54eb..2681586a33dc 100644 +--- a/defs.h ++++ b/defs.h +@@ -2170,6 +2170,16 @@ struct offset_table { /* stash of commonly-used offsets */ + long sbq_wait_state_wait; + long sbitmap_alloc_hint; + long sbitmap_round_robin; ++ long request_cmd_flags; ++ long request_q; ++ long request_state; ++ long request_queue_queue_hw_ctx; ++ long request_queue_nr_hw_queues; ++ long blk_mq_hw_ctx_tags; ++ long blk_mq_tags_bitmap_tags; ++ long blk_mq_tags_breserved_tags; ++ long blk_mq_tags_nr_reserved_tags; ++ long blk_mq_tags_rqs; + }; + + struct size_table { /* stash of commonly-used sizes */ +@@ -2339,6 +2349,7 @@ struct size_table { /* stash of commonly-used sizes */ + long sbitmap; + long sbitmap_queue; + long sbq_wait_state; ++ long blk_mq_tags; + }; + + struct array_table { +diff --git a/dev.c b/dev.c +index a493e51ac95c..4be4c96df8b0 100644 +--- a/dev.c ++++ b/dev.c +@@ -4238,19 +4238,176 @@ get_one_mctx_diskio(unsigned long mctx, struct diskio *io) + io->write = (dispatch[1] - comp[1]); + } + ++typedef bool (busy_tag_iter_fn)(ulong rq, void *data); ++ ++struct mq_inflight { ++ ulong q; ++ struct diskio *dio; ++}; ++ ++struct bt_iter_data { ++ ulong tags; ++ uint reserved; ++ uint nr_reserved_tags; ++ busy_tag_iter_fn *fn; ++ void *data; ++}; ++ ++/* ++ * See the include/linux/blk_types.h and include/linux/blk-mq.h ++ */ ++#define MQ_RQ_IN_FLIGHT 1 ++#define REQ_OP_BITS 8 ++#define REQ_OP_MASK ((1 << REQ_OP_BITS) - 1) ++ ++static uint op_is_write(uint op) ++{ ++ return (op & REQ_OP_MASK) & 1; ++} ++ ++static bool mq_check_inflight(ulong rq, void *data) ++{ ++ uint cmd_flags = 0, state = 0; ++ ulong addr = 0, queue = 0; ++ struct mq_inflight *mi = data; ++ ++ if (!IS_KVADDR(rq)) ++ return TRUE; ++ ++ addr = rq + OFFSET(request_q); ++ if (!readmem(addr, KVADDR, &queue, sizeof(ulong), "request.q", RETURN_ON_ERROR)) ++ return FALSE; ++ ++ addr = rq + OFFSET(request_cmd_flags); ++ if (!readmem(addr, KVADDR, &cmd_flags, sizeof(uint), "request.cmd_flags", RETURN_ON_ERROR)) ++ return FALSE; ++ ++ addr = rq + OFFSET(request_state); ++ if (!readmem(addr, KVADDR, &state, sizeof(uint), "request.state", RETURN_ON_ERROR)) ++ return FALSE; ++ ++ if (queue == mi->q && state == MQ_RQ_IN_FLIGHT) { ++ if (op_is_write(cmd_flags)) ++ mi->dio->write++; ++ else ++ mi->dio->read++; ++ } ++ ++ return TRUE; ++} ++ ++static bool bt_iter(uint bitnr, void *data) ++{ ++ ulong addr = 0, rqs_addr = 0, rq = 0; ++ struct bt_iter_data *iter_data = data; ++ ulong tag = iter_data->tags; ++ ++ if (!iter_data->reserved) ++ bitnr += iter_data->nr_reserved_tags; ++ ++ /* rqs */ ++ addr = tag + OFFSET(blk_mq_tags_rqs); ++ if (!readmem(addr, KVADDR, &rqs_addr, sizeof(void *), "blk_mq_tags.rqs", RETURN_ON_ERROR)) ++ return FALSE; ++ ++ addr = rqs_addr + bitnr * sizeof(ulong); /* rqs[bitnr] */ ++ if (!readmem(addr, KVADDR, &rq, sizeof(ulong), "blk_mq_tags.rqs[]", RETURN_ON_ERROR)) ++ return FALSE; ++ ++ return iter_data->fn(rq, iter_data->data); ++} ++ ++static void bt_for_each(ulong q, ulong tags, ulong sbq, uint reserved, uint nr_resvd_tags, struct diskio *dio) ++{ ++ struct sbitmap_context sc = {0}; ++ struct mq_inflight mi = { ++ .q = q, ++ .dio = dio, ++ }; ++ struct bt_iter_data iter_data = { ++ .tags = tags, ++ .reserved = reserved, ++ .nr_reserved_tags = nr_resvd_tags, ++ .fn = mq_check_inflight, ++ .data = &mi, ++ }; ++ ++ sbitmap_context_load(sbq + OFFSET(sbitmap_queue_sb), &sc); ++ sbitmap_for_each_set(&sc, bt_iter, &iter_data); ++} ++ ++static void queue_for_each_hw_ctx(ulong q, ulong *hctx, uint cnt, struct diskio *dio) ++{ ++ uint i; ++ ++ for (i = 0; i < cnt; i++) { ++ ulong addr = 0, tags = 0; ++ uint nr_reserved_tags = 0; ++ ++ /* Tags owned by the block driver */ ++ addr = hctx[i] + OFFSET(blk_mq_hw_ctx_tags); ++ if (!readmem(addr, KVADDR, &tags, sizeof(ulong), ++ "blk_mq_hw_ctx.tags", RETURN_ON_ERROR)) ++ break; ++ ++ addr = tags + OFFSET(blk_mq_tags_nr_reserved_tags); ++ if (!readmem(addr, KVADDR, &nr_reserved_tags, sizeof(uint), ++ "blk_mq_tags_nr_reserved_tags", RETURN_ON_ERROR)) ++ break; ++ ++ if (nr_reserved_tags) { ++ addr = tags + OFFSET(blk_mq_tags_breserved_tags); ++ bt_for_each(q, tags, addr, 1, nr_reserved_tags, dio); ++ } ++ addr = tags + OFFSET(blk_mq_tags_bitmap_tags); ++ bt_for_each(q, tags, addr, 0, nr_reserved_tags, dio); ++ } ++} ++ ++static void get_mq_diskio_from_hw_queues(ulong q, struct diskio *dio) ++{ ++ uint cnt = 0; ++ ulong addr = 0, hctx_addr = 0; ++ ulong *hctx_array = NULL; ++ ++ addr = q + OFFSET(request_queue_nr_hw_queues); ++ readmem(addr, KVADDR, &cnt, sizeof(uint), ++ "request_queue.nr_hw_queues", FAULT_ON_ERROR); ++ ++ addr = q + OFFSET(request_queue_queue_hw_ctx); ++ readmem(addr, KVADDR, &hctx_addr, sizeof(void *), ++ "request_queue.queue_hw_ctx", FAULT_ON_ERROR); ++ ++ hctx_array = (ulong *)GETBUF(sizeof(void *) * cnt); ++ if (!hctx_array) ++ error(FATAL, "fail to get memory for the hctx_array\n"); ++ ++ if (!readmem(hctx_addr, KVADDR, hctx_array, sizeof(void *) * cnt, ++ "request_queue.queue_hw_ctx[]", RETURN_ON_ERROR)) { ++ FREEBUF(hctx_array); ++ return; ++ } ++ ++ queue_for_each_hw_ctx(q, hctx_array, cnt, dio); ++ ++ FREEBUF(hctx_array); ++} ++ + static void + get_mq_diskio(unsigned long q, unsigned long *mq_count) + { + int cpu; + unsigned long queue_ctx; + unsigned long mctx_addr; +- struct diskio tmp; ++ struct diskio tmp = {0}; + + if (INVALID_MEMBER(blk_mq_ctx_rq_dispatched) || +- INVALID_MEMBER(blk_mq_ctx_rq_completed)) ++ INVALID_MEMBER(blk_mq_ctx_rq_completed)) { ++ get_mq_diskio_from_hw_queues(q, &tmp); ++ mq_count[0] = tmp.read; ++ mq_count[1] = tmp.write; + return; +- +- memset(&tmp, 0x00, sizeof(struct diskio)); ++ } + + readmem(q + OFFSET(request_queue_queue_ctx), KVADDR, &queue_ctx, + sizeof(ulong), "request_queue.queue_ctx", +@@ -4479,41 +4636,24 @@ display_one_diskio(struct iter *i, unsigned long gendisk, ulong flags) + && (io.read + io.write == 0)) + return; + +- if (use_mq_interface(queue_addr) && +- (INVALID_MEMBER(blk_mq_ctx_rq_dispatched) || +- INVALID_MEMBER(blk_mq_ctx_rq_completed))) +- fprintf(fp, "%s%s%s %s%s%s%s %s%s%s", +- mkstring(buf0, 5, RJUST|INT_DEC, (char *)(unsigned long)major), +- space(MINSPACE), +- mkstring(buf1, VADDR_PRLEN, LJUST|LONG_HEX, (char *)gendisk), +- space(MINSPACE), +- mkstring(buf2, 10, LJUST, disk_name), +- space(MINSPACE), +- mkstring(buf3, VADDR_PRLEN <= 11 ? 11 : VADDR_PRLEN, +- LJUST|LONG_HEX, (char *)queue_addr), +- space(MINSPACE), +- mkstring(buf4, 17, RJUST, "(not supported)"), +- space(MINSPACE)); +- +- else +- fprintf(fp, "%s%s%s %s%s%s%s %s%5d%s%s%s%s%s", +- mkstring(buf0, 5, RJUST|INT_DEC, (char *)(unsigned long)major), +- space(MINSPACE), +- mkstring(buf1, VADDR_PRLEN, LJUST|LONG_HEX, (char *)gendisk), +- space(MINSPACE), +- mkstring(buf2, 10, LJUST, disk_name), +- space(MINSPACE), +- mkstring(buf3, VADDR_PRLEN <= 11 ? 11 : VADDR_PRLEN, +- LJUST|LONG_HEX, (char *)queue_addr), +- space(MINSPACE), +- io.read + io.write, +- space(MINSPACE), +- mkstring(buf4, 5, RJUST|INT_DEC, +- (char *)(unsigned long)io.read), +- space(MINSPACE), +- mkstring(buf5, 5, RJUST|INT_DEC, +- (char *)(unsigned long)io.write), +- space(MINSPACE)); ++ fprintf(fp, "%s%s%s %s%s%s%s %s%5d%s%s%s%s%s", ++ mkstring(buf0, 5, RJUST|INT_DEC, (char *)(unsigned long)major), ++ space(MINSPACE), ++ mkstring(buf1, VADDR_PRLEN, LJUST|LONG_HEX, (char *)gendisk), ++ space(MINSPACE), ++ mkstring(buf2, 10, LJUST, disk_name), ++ space(MINSPACE), ++ mkstring(buf3, VADDR_PRLEN <= 11 ? 11 : VADDR_PRLEN, ++ LJUST|LONG_HEX, (char *)queue_addr), ++ space(MINSPACE), ++ io.read + io.write, ++ space(MINSPACE), ++ mkstring(buf4, 5, RJUST|INT_DEC, ++ (char *)(unsigned long)io.read), ++ space(MINSPACE), ++ mkstring(buf5, 5, RJUST|INT_DEC, ++ (char *)(unsigned long)io.write), ++ space(MINSPACE)); + + if (VALID_MEMBER(request_queue_in_flight)) { + if (!use_mq_interface(queue_addr)) { +@@ -4597,6 +4737,9 @@ void diskio_init(void) + MEMBER_OFFSET_INIT(kobject_entry, "kobject", "entry"); + MEMBER_OFFSET_INIT(kset_list, "kset", "list"); + MEMBER_OFFSET_INIT(request_list_count, "request_list", "count"); ++ MEMBER_OFFSET_INIT(request_cmd_flags, "request", "cmd_flags"); ++ MEMBER_OFFSET_INIT(request_q, "request", "q"); ++ MEMBER_OFFSET_INIT(request_state, "request", "state"); + MEMBER_OFFSET_INIT(request_queue_in_flight, "request_queue", + "in_flight"); + if (MEMBER_EXISTS("request_queue", "rq")) +@@ -4608,10 +4751,33 @@ void diskio_init(void) + "mq_ops"); + ANON_MEMBER_OFFSET_INIT(request_queue_queue_ctx, + "request_queue", "queue_ctx"); ++ MEMBER_OFFSET_INIT(request_queue_queue_hw_ctx, ++ "request_queue", "queue_hw_ctx"); ++ MEMBER_OFFSET_INIT(request_queue_nr_hw_queues, ++ "request_queue", "nr_hw_queues"); + MEMBER_OFFSET_INIT(blk_mq_ctx_rq_dispatched, "blk_mq_ctx", + "rq_dispatched"); + MEMBER_OFFSET_INIT(blk_mq_ctx_rq_completed, "blk_mq_ctx", + "rq_completed"); ++ MEMBER_OFFSET_INIT(blk_mq_hw_ctx_tags, "blk_mq_hw_ctx", "tags"); ++ MEMBER_OFFSET_INIT(blk_mq_tags_bitmap_tags, "blk_mq_tags", ++ "bitmap_tags"); ++ MEMBER_OFFSET_INIT(blk_mq_tags_breserved_tags, "blk_mq_tags", ++ "breserved_tags"); ++ MEMBER_OFFSET_INIT(blk_mq_tags_nr_reserved_tags, "blk_mq_tags", ++ "nr_reserved_tags"); ++ MEMBER_OFFSET_INIT(blk_mq_tags_rqs, "blk_mq_tags", "rqs"); ++ STRUCT_SIZE_INIT(blk_mq_tags, "blk_mq_tags"); ++ STRUCT_SIZE_INIT(sbitmap, "sbitmap"); ++ STRUCT_SIZE_INIT(sbitmap_word, "sbitmap_word"); ++ MEMBER_OFFSET_INIT(sbitmap_word_word, "sbitmap_word", "word"); ++ MEMBER_OFFSET_INIT(sbitmap_word_cleared, "sbitmap_word", "cleared"); ++ MEMBER_OFFSET_INIT(sbitmap_depth, "sbitmap", "depth"); ++ MEMBER_OFFSET_INIT(sbitmap_shift, "sbitmap", "shift"); ++ MEMBER_OFFSET_INIT(sbitmap_map_nr, "sbitmap", "map_nr"); ++ MEMBER_OFFSET_INIT(sbitmap_map, "sbitmap", "map"); ++ MEMBER_OFFSET_INIT(sbitmap_queue_sb, "sbitmap_queue", "sb"); ++ + } + MEMBER_OFFSET_INIT(subsys_private_klist_devices, "subsys_private", + "klist_devices"); +diff --git a/symbols.c b/symbols.c +index 5d12a021c769..c1f09556d710 100644 +--- a/symbols.c ++++ b/symbols.c +@@ -10385,6 +10385,12 @@ dump_offset_table(char *spec, ulong makestruct) + OFFSET(kset_list)); + fprintf(fp, " request_list_count: %ld\n", + OFFSET(request_list_count)); ++ fprintf(fp, " request_cmd_flags: %ld\n", ++ OFFSET(request_cmd_flags)); ++ fprintf(fp, " request_q: %ld\n", ++ OFFSET(request_q)); ++ fprintf(fp, " request_state: %ld\n", ++ OFFSET(request_state)); + fprintf(fp, " request_queue_in_flight: %ld\n", + OFFSET(request_queue_in_flight)); + fprintf(fp, " request_queue_rq: %ld\n", +@@ -10393,10 +10399,25 @@ dump_offset_table(char *spec, ulong makestruct) + OFFSET(request_queue_mq_ops)); + fprintf(fp, " request_queue_queue_ctx: %ld\n", + OFFSET(request_queue_queue_ctx)); ++ fprintf(fp, " request_queue_queue_hw_ctx: %ld\n", ++ OFFSET(request_queue_queue_hw_ctx)); ++ fprintf(fp, " request_queue_nr_hw_queues: %ld\n", ++ OFFSET(request_queue_nr_hw_queues)); + fprintf(fp, " blk_mq_ctx_rq_dispatched: %ld\n", + OFFSET(blk_mq_ctx_rq_dispatched)); + fprintf(fp, " blk_mq_ctx_rq_completed: %ld\n", + OFFSET(blk_mq_ctx_rq_completed)); ++ fprintf(fp, " blk_mq_hw_ctx_tags: %ld\n", ++ OFFSET(blk_mq_hw_ctx_tags)); ++ fprintf(fp, " blk_mq_tags_bitmap_tags: %ld\n", ++ OFFSET(blk_mq_tags_bitmap_tags)); ++ fprintf(fp, " blk_mq_tags_breserved_tags: %ld\n", ++ OFFSET(blk_mq_tags_breserved_tags)); ++ fprintf(fp, " blk_mq_tags_nr_reserved_tags: %ld\n", ++ OFFSET(blk_mq_tags_nr_reserved_tags)); ++ fprintf(fp, " blk_mq_tags_rqs: %ld\n", ++ OFFSET(blk_mq_tags_rqs)); ++ + fprintf(fp, " subsys_private_klist_devices: %ld\n", + OFFSET(subsys_private_klist_devices)); + fprintf(fp, " subsystem_kset: %ld\n", +@@ -11003,6 +11024,7 @@ dump_offset_table(char *spec, ulong makestruct) + fprintf(fp, " sbitmap: %ld\n", SIZE(sbitmap)); + fprintf(fp, " sbitmap_queue: %ld\n", SIZE(sbitmap_queue)); + fprintf(fp, " sbq_wait_state: %ld\n", SIZE(sbq_wait_state)); ++ fprintf(fp, " blk_mq_tags: %ld\n", SIZE(blk_mq_tags)); + + fprintf(fp, "\n array_table:\n"); + /* +-- +2.30.2 + diff --git a/0011-Fix-for-dev-d-D-options-to-support-blk-mq-change-on-.patch b/0011-Fix-for-dev-d-D-options-to-support-blk-mq-change-on-.patch new file mode 100644 index 0000000..6167735 --- /dev/null +++ b/0011-Fix-for-dev-d-D-options-to-support-blk-mq-change-on-.patch @@ -0,0 +1,121 @@ +From 68ce0b9a35d77d767872dd1a729c50e4695a30a8 Mon Sep 17 00:00:00 2001 +From: Lianbo Jiang +Date: Thu, 2 Jun 2022 20:12:56 +0800 +Subject: [PATCH 11/18] Fix for "dev -d|-D" options to support blk-mq change on + Linux v5.18-rc1 + +Kernel commit 4e5cc99e1e48 ("blk-mq: manage hctx map via xarray") removed +the "queue_hw_ctx" member from struct request_queue at Linux v5.18-rc1, +and replaced it with a struct xarray "hctx_table". Without the patch, the +"dev -d|-D" options will print an error: + + crash> dev -d + MAJOR GENDISK NAME REQUEST_QUEUE TOTAL READ WRITE + + dev: invalid structure member offset: request_queue_queue_hw_ctx + +With the patch: + crash> dev -d + MAJOR GENDISK NAME REQUEST_QUEUE TOTAL READ WRITE + 8 ffff8e99d0a1ae00 sda ffff8e9c14c59980 10 6 4 + +Signed-off-by: Lianbo Jiang +--- + defs.h | 1 + + dev.c | 42 +++++++++++++++++++++++++++++++++--------- + symbols.c | 2 ++ + 3 files changed, 36 insertions(+), 9 deletions(-) + +diff --git a/defs.h b/defs.h +index 2681586a33dc..7d3b73422f48 100644 +--- a/defs.h ++++ b/defs.h +@@ -2180,6 +2180,7 @@ struct offset_table { /* stash of commonly-used offsets */ + long blk_mq_tags_breserved_tags; + long blk_mq_tags_nr_reserved_tags; + long blk_mq_tags_rqs; ++ long request_queue_hctx_table; + }; + + struct size_table { /* stash of commonly-used sizes */ +diff --git a/dev.c b/dev.c +index 4be4c96df8b0..0172c83ffaea 100644 +--- a/dev.c ++++ b/dev.c +@@ -4369,20 +4369,42 @@ static void get_mq_diskio_from_hw_queues(ulong q, struct diskio *dio) + uint cnt = 0; + ulong addr = 0, hctx_addr = 0; + ulong *hctx_array = NULL; ++ struct list_pair *lp = NULL; ++ ++ if (VALID_MEMBER(request_queue_hctx_table)) { ++ addr = q + OFFSET(request_queue_hctx_table); ++ cnt = do_xarray(addr, XARRAY_COUNT, NULL); ++ lp = (struct list_pair *)GETBUF(sizeof(struct list_pair) * (cnt + 1)); ++ if (!lp) ++ error(FATAL, "fail to get memory for list_pair.\n"); ++ lp[0].index = cnt; ++ cnt = do_xarray(addr, XARRAY_GATHER, lp); ++ } else { ++ addr = q + OFFSET(request_queue_nr_hw_queues); ++ readmem(addr, KVADDR, &cnt, sizeof(uint), ++ "request_queue.nr_hw_queues", FAULT_ON_ERROR); + +- addr = q + OFFSET(request_queue_nr_hw_queues); +- readmem(addr, KVADDR, &cnt, sizeof(uint), +- "request_queue.nr_hw_queues", FAULT_ON_ERROR); +- +- addr = q + OFFSET(request_queue_queue_hw_ctx); +- readmem(addr, KVADDR, &hctx_addr, sizeof(void *), +- "request_queue.queue_hw_ctx", FAULT_ON_ERROR); ++ addr = q + OFFSET(request_queue_queue_hw_ctx); ++ readmem(addr, KVADDR, &hctx_addr, sizeof(void *), ++ "request_queue.queue_hw_ctx", FAULT_ON_ERROR); ++ } + + hctx_array = (ulong *)GETBUF(sizeof(void *) * cnt); +- if (!hctx_array) ++ if (!hctx_array) { ++ if (lp) ++ FREEBUF(lp); + error(FATAL, "fail to get memory for the hctx_array\n"); ++ } ++ ++ if (lp && hctx_array) { ++ uint i; ++ ++ /* copy it from list_pair to hctx_array */ ++ for (i = 0; i < cnt; i++) ++ hctx_array[i] = (ulong)lp[i].value; + +- if (!readmem(hctx_addr, KVADDR, hctx_array, sizeof(void *) * cnt, ++ FREEBUF(lp); ++ } else if (!readmem(hctx_addr, KVADDR, hctx_array, sizeof(void *) * cnt, + "request_queue.queue_hw_ctx[]", RETURN_ON_ERROR)) { + FREEBUF(hctx_array); + return; +@@ -4755,6 +4777,8 @@ void diskio_init(void) + "request_queue", "queue_hw_ctx"); + MEMBER_OFFSET_INIT(request_queue_nr_hw_queues, + "request_queue", "nr_hw_queues"); ++ MEMBER_OFFSET_INIT(request_queue_hctx_table, ++ "request_queue", "hctx_table"); + MEMBER_OFFSET_INIT(blk_mq_ctx_rq_dispatched, "blk_mq_ctx", + "rq_dispatched"); + MEMBER_OFFSET_INIT(blk_mq_ctx_rq_completed, "blk_mq_ctx", +diff --git a/symbols.c b/symbols.c +index c1f09556d710..bee1faf92c83 100644 +--- a/symbols.c ++++ b/symbols.c +@@ -10403,6 +10403,8 @@ dump_offset_table(char *spec, ulong makestruct) + OFFSET(request_queue_queue_hw_ctx)); + fprintf(fp, " request_queue_nr_hw_queues: %ld\n", + OFFSET(request_queue_nr_hw_queues)); ++ fprintf(fp, " request_queue_hctx_table: %ld\n", ++ OFFSET(request_queue_hctx_table)); + fprintf(fp, " blk_mq_ctx_rq_dispatched: %ld\n", + OFFSET(blk_mq_ctx_rq_dispatched)); + fprintf(fp, " blk_mq_ctx_rq_completed: %ld\n", +-- +2.30.2 + diff --git a/0012-Doc-update-man-page-for-the-bpf-and-sbitmapq-command.patch b/0012-Doc-update-man-page-for-the-bpf-and-sbitmapq-command.patch new file mode 100644 index 0000000..fc436ae --- /dev/null +++ b/0012-Doc-update-man-page-for-the-bpf-and-sbitmapq-command.patch @@ -0,0 +1,43 @@ +From c672d7a4c290712b32c54329cbdc1e74d122e813 Mon Sep 17 00:00:00 2001 +From: Lianbo Jiang +Date: Mon, 6 Jun 2022 19:09:16 +0800 +Subject: [PATCH 12/18] Doc: update man page for the "bpf" and "sbitmapq" + commands + +The information of the "bpf" and "sbitmapq" commands is missing in the man +page of the crash utility. Let's add it to the man page. + +Signed-off-by: Lianbo Jiang +--- + crash.8 | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/crash.8 b/crash.8 +index 1f3657b11e4c..e553a0b4adb3 100644 +--- a/crash.8 ++++ b/crash.8 +@@ -584,6 +584,9 @@ creates a single-word alias for a command. + .I ascii + displays an ascii chart or translates a numeric value into its ascii components. + .TP ++.I bpf ++provides information on currently-loaded eBPF programs and maps. ++.TP + .I bt + displays a task's kernel-stack backtrace. If it is given the + .I \-a +@@ -706,6 +709,11 @@ number of seconds between each command execution. + .I runq + displays the tasks on the run queue. + .TP ++.I sbitmapq ++dumps the contents of the sbitmap_queue structure and the used ++bits in the bitmap. Also, it shows the dump of a structure array ++associated with the sbitmap_queue. ++.TP + .I search + searches a range of user or kernel memory space for given value. + .TP +-- +2.30.2 + diff --git a/0013-sbitmapq-Fix-for-sbitmap_queue-without-ws_active-mem.patch b/0013-sbitmapq-Fix-for-sbitmap_queue-without-ws_active-mem.patch new file mode 100644 index 0000000..c9d7b95 --- /dev/null +++ b/0013-sbitmapq-Fix-for-sbitmap_queue-without-ws_active-mem.patch @@ -0,0 +1,48 @@ +From 9ce31a14d1083cbb2beb4a8e6eb7b88234b79a99 Mon Sep 17 00:00:00 2001 +From: Kazuhito Hagio +Date: Fri, 10 Jun 2022 11:49:47 +0900 +Subject: [PATCH 13/18] sbitmapq: Fix for sbitmap_queue without ws_active + member + +The sbitmap_queue.ws_active member was added by kernel commit 5d2ee7122c73 +("sbitmap: optimize wakeup check") at Linux 5.0. Without the patch, on +earlier kernels the "sbitmapq" command fails with the following error: + + crash> sbitmapq ffff8f1a3611cf10 + + sbitmapq: invalid structure member offset: sbitmap_queue_ws_active + FILE: sbitmap.c LINE: 393 FUNCTION: sbitmap_queue_context_load() + +Signed-off-by: Kazuhito Hagio +Signed-off-by: Lianbo Jiang +--- + sbitmap.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/sbitmap.c b/sbitmap.c +index e8ebd62fe01c..152c28e6875f 100644 +--- a/sbitmap.c ++++ b/sbitmap.c +@@ -325,7 +325,8 @@ static void sbitmap_queue_show(const struct sbitmap_queue_context *sqc, + + fprintf(fp, "wake_batch = %u\n", sqc->wake_batch); + fprintf(fp, "wake_index = %d\n", sqc->wake_index); +- fprintf(fp, "ws_active = %d\n", sqc->ws_active); ++ if (VALID_MEMBER(sbitmap_queue_ws_active)) /* 5.0 and later */ ++ fprintf(fp, "ws_active = %d\n", sqc->ws_active); + + sbq_wait_state_size = SIZE(sbq_wait_state); + wait_cnt_off = OFFSET(sbq_wait_state_wait_cnt); +@@ -380,7 +381,8 @@ static void sbitmap_queue_context_load(ulong addr, struct sbitmap_queue_context + sqc->wake_batch = UINT(sbitmap_queue_buf + OFFSET(sbitmap_queue_wake_batch)); + sqc->wake_index = INT(sbitmap_queue_buf + OFFSET(sbitmap_queue_wake_index)); + sqc->ws_addr = ULONG(sbitmap_queue_buf + OFFSET(sbitmap_queue_ws)); +- sqc->ws_active = INT(sbitmap_queue_buf + OFFSET(sbitmap_queue_ws_active)); ++ if (VALID_MEMBER(sbitmap_queue_ws_active)) ++ sqc->ws_active = INT(sbitmap_queue_buf + OFFSET(sbitmap_queue_ws_active)); + if (VALID_MEMBER(sbitmap_queue_round_robin)) + sqc->round_robin = BOOL(sbitmap_queue_buf + OFFSET(sbitmap_queue_round_robin)); + sqc->min_shallow_depth = UINT(sbitmap_queue_buf + OFFSET(sbitmap_queue_min_shallow_depth)); +-- +2.30.2 + diff --git a/0014-sbitmapq-Fix-for-sbitmap_word-without-cleared-member.patch b/0014-sbitmapq-Fix-for-sbitmap_word-without-cleared-member.patch new file mode 100644 index 0000000..6018402 --- /dev/null +++ b/0014-sbitmapq-Fix-for-sbitmap_word-without-cleared-member.patch @@ -0,0 +1,110 @@ +From 0d3e86fee5eead93b521a0e20a0e099ede4ab72b Mon Sep 17 00:00:00 2001 +From: Kazuhito Hagio +Date: Fri, 10 Jun 2022 11:49:47 +0900 +Subject: [PATCH 14/18] sbitmapq: Fix for sbitmap_word without cleared member + +The sbitmap_word.cleared member was added by kernel commit ea86ea2cdced +("sbitmap: ammortize cost of clearing bits") at Linux 5.0. Without the +patch, on earlier kernels the "sbitmapq" command fails with the +following error: + + crash> sbitmapq ffff8f1a3611cf10 + + sbitmapq: invalid structure member offset: sbitmap_word_cleared + FILE: sbitmap.c LINE: 92 FUNCTION: __sbitmap_weight() + +Signed-off-by: Kazuhito Hagio +Signed-off-by: Lianbo Jiang +--- + sbitmap.c | 26 ++++++++++++++++++-------- + 1 file changed, 18 insertions(+), 8 deletions(-) + +diff --git a/sbitmap.c b/sbitmap.c +index 152c28e6875f..c9f7209f9e3e 100644 +--- a/sbitmap.c ++++ b/sbitmap.c +@@ -89,7 +89,6 @@ static unsigned int __sbitmap_weight(const struct sbitmap_context *sc, bool set) + { + const ulong sbitmap_word_size = SIZE(sbitmap_word); + const ulong w_word_off = OFFSET(sbitmap_word_word); +- const ulong w_cleared_off = OFFSET(sbitmap_word_cleared); + + unsigned int weight = 0; + ulong addr = sc->map_addr; +@@ -111,7 +110,10 @@ static unsigned int __sbitmap_weight(const struct sbitmap_context *sc, bool set) + word = ULONG(sbitmap_word_buf + w_word_off); + weight += bitmap_weight(word, depth); + } else { +- cleared = ULONG(sbitmap_word_buf + w_cleared_off); ++ if (VALID_MEMBER(sbitmap_word_cleared)) ++ cleared = ULONG(sbitmap_word_buf + OFFSET(sbitmap_word_cleared)); ++ else ++ cleared = 0; + weight += bitmap_weight(cleared, depth); + } + +@@ -130,7 +132,10 @@ static unsigned int sbitmap_weight(const struct sbitmap_context *sc) + + static unsigned int sbitmap_cleared(const struct sbitmap_context *sc) + { +- return __sbitmap_weight(sc, false); ++ if (VALID_MEMBER(sbitmap_word_cleared)) /* 5.0 and later */ ++ return __sbitmap_weight(sc, false); ++ ++ return 0; + } + + static void sbitmap_emit_byte(unsigned int offset, uint8_t byte) +@@ -149,7 +154,6 @@ static void sbitmap_bitmap_show(const struct sbitmap_context *sc) + { + const ulong sbitmap_word_size = SIZE(sbitmap_word); + const ulong w_word_off = OFFSET(sbitmap_word_word); +- const ulong w_cleared_off = OFFSET(sbitmap_word_cleared); + + uint8_t byte = 0; + unsigned int byte_bits = 0; +@@ -169,7 +173,10 @@ static void sbitmap_bitmap_show(const struct sbitmap_context *sc) + } + + word = ULONG(sbitmap_word_buf + w_word_off); +- cleared = ULONG(sbitmap_word_buf + w_cleared_off); ++ if (VALID_MEMBER(sbitmap_word_cleared)) ++ cleared = ULONG(sbitmap_word_buf + OFFSET(sbitmap_word_cleared)); ++ else ++ cleared = 0; + word_bits = __map_depth(sc, i); + + word &= ~cleared; +@@ -219,7 +226,6 @@ static void __sbitmap_for_each_set(const struct sbitmap_context *sc, + { + const ulong sbitmap_word_size = SIZE(sbitmap_word); + const ulong w_word_off = OFFSET(sbitmap_word_word); +- const ulong w_cleared_off = OFFSET(sbitmap_word_cleared); + + unsigned int index; + unsigned int nr; +@@ -245,7 +251,10 @@ static void __sbitmap_for_each_set(const struct sbitmap_context *sc, + } + + w_word = ULONG(sbitmap_word_buf + w_word_off); +- w_cleared = ULONG(sbitmap_word_buf + w_cleared_off); ++ if (VALID_MEMBER(sbitmap_word_cleared)) ++ w_cleared = ULONG(sbitmap_word_buf + OFFSET(sbitmap_word_cleared)); ++ else ++ w_cleared = 0; + + depth = min(__map_depth(sc, index) - nr, sc->depth - scanned); + +@@ -297,7 +306,8 @@ static void sbitmap_queue_show(const struct sbitmap_queue_context *sqc, + + fprintf(fp, "depth = %u\n", sc->depth); + fprintf(fp, "busy = %u\n", sbitmap_weight(sc) - sbitmap_cleared(sc)); +- fprintf(fp, "cleared = %u\n", sbitmap_cleared(sc)); ++ if (VALID_MEMBER(sbitmap_word_cleared)) /* 5.0 and later */ ++ fprintf(fp, "cleared = %u\n", sbitmap_cleared(sc)); + fprintf(fp, "bits_per_word = %u\n", 1U << sc->shift); + fprintf(fp, "map_nr = %u\n", sc->map_nr); + +-- +2.30.2 + diff --git a/0015-sbitmapq-Fix-for-sbitmap_queue-without-min_shallow_d.patch b/0015-sbitmapq-Fix-for-sbitmap_queue-without-min_shallow_d.patch new file mode 100644 index 0000000..3871a6f --- /dev/null +++ b/0015-sbitmapq-Fix-for-sbitmap_queue-without-min_shallow_d.patch @@ -0,0 +1,49 @@ +From 12fe6c7cdd768f87ce6e903a2bbfb0c0591585c5 Mon Sep 17 00:00:00 2001 +From: Kazuhito Hagio +Date: Fri, 10 Jun 2022 11:49:47 +0900 +Subject: [PATCH 15/18] sbitmapq: Fix for sbitmap_queue without + min_shallow_depth member + +The sbitmap_queue.min_shallow_depth member was added by kernel commit +a327553965de ("sbitmap: fix missed wakeups caused by sbitmap_queue_get_shallow()") +at Linux 4.18. Without the patch, on earlier kernels the "sbitmapq" +command fails with the following error: + + crash> sbitmapq ffff89bb7638ee50 + + sbitmapq: invalid structure member offset: sbitmap_queue_min_shallow_depth + FILE: sbitmap.c LINE: 398 FUNCTION: sbitmap_queue_context_load() + +Signed-off-by: Kazuhito Hagio +Signed-off-by: Lianbo Jiang +--- + sbitmap.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/sbitmap.c b/sbitmap.c +index c9f7209f9e3e..bb2f19e6207b 100644 +--- a/sbitmap.c ++++ b/sbitmap.c +@@ -371,7 +371,8 @@ static void sbitmap_queue_show(const struct sbitmap_queue_context *sqc, + else if (VALID_MEMBER(sbitmap_round_robin)) /* 5.13 and later */ + fprintf(fp, "round_robin = %d\n", sc->round_robin); + +- fprintf(fp, "min_shallow_depth = %u\n", sqc->min_shallow_depth); ++ if (VALID_MEMBER(sbitmap_queue_min_shallow_depth)) /* 4.18 and later */ ++ fprintf(fp, "min_shallow_depth = %u\n", sqc->min_shallow_depth); + } + + static void sbitmap_queue_context_load(ulong addr, struct sbitmap_queue_context *sqc) +@@ -395,7 +396,8 @@ static void sbitmap_queue_context_load(ulong addr, struct sbitmap_queue_context + sqc->ws_active = INT(sbitmap_queue_buf + OFFSET(sbitmap_queue_ws_active)); + if (VALID_MEMBER(sbitmap_queue_round_robin)) + sqc->round_robin = BOOL(sbitmap_queue_buf + OFFSET(sbitmap_queue_round_robin)); +- sqc->min_shallow_depth = UINT(sbitmap_queue_buf + OFFSET(sbitmap_queue_min_shallow_depth)); ++ if (VALID_MEMBER(sbitmap_queue_min_shallow_depth)) ++ sqc->min_shallow_depth = UINT(sbitmap_queue_buf + OFFSET(sbitmap_queue_min_shallow_depth)); + + FREEBUF(sbitmap_queue_buf); + } +-- +2.30.2 + diff --git a/0016-Make-dev-d-D-options-parse-sbitmap-on-Linux-4.18-and.patch b/0016-Make-dev-d-D-options-parse-sbitmap-on-Linux-4.18-and.patch new file mode 100644 index 0000000..cc88afd --- /dev/null +++ b/0016-Make-dev-d-D-options-parse-sbitmap-on-Linux-4.18-and.patch @@ -0,0 +1,84 @@ +From c07068266b41450ca6821ee0a1a3adf34206015f Mon Sep 17 00:00:00 2001 +From: Kazuhito Hagio +Date: Fri, 10 Jun 2022 15:21:53 +0900 +Subject: [PATCH 16/18] Make "dev -d|-D" options parse sbitmap on Linux 4.18 + and later + +There have been a few reports that the "dev -d|-D" options displayed +incorrect I/O stats due to racy blk_mq_ctx.rq_* counters. To fix it, +make the options parse sbitmap to count I/O stats on Linux 4.18 and +later kernels, which include RHEL8 ones. + +To do this, adjust to the blk_mq_tags structure of Linux 5.10 through +5.15 kernels, which contain kernel commit 222a5ae03cdd ("blk-mq: Use +pointers for blk_mq_tags bitmap tags") and do not contain ae0f1a732f4a +("blk-mq: Stop using pointers for blk_mq_tags bitmap tags"). + +Signed-off-by: Kazuhito Hagio +Signed-off-by: Lianbo Jiang +--- + dev.c | 25 +++++++++++++++++++++++-- + 1 file changed, 23 insertions(+), 2 deletions(-) + +diff --git a/dev.c b/dev.c +index 0172c83ffaea..db97f8aebdc2 100644 +--- a/dev.c ++++ b/dev.c +@@ -4339,6 +4339,10 @@ static void bt_for_each(ulong q, ulong tags, ulong sbq, uint reserved, uint nr_r + static void queue_for_each_hw_ctx(ulong q, ulong *hctx, uint cnt, struct diskio *dio) + { + uint i; ++ int bitmap_tags_is_ptr = 0; ++ ++ if (MEMBER_TYPE("blk_mq_tags", "bitmap_tags") == TYPE_CODE_PTR) ++ bitmap_tags_is_ptr = 1; + + for (i = 0; i < cnt; i++) { + ulong addr = 0, tags = 0; +@@ -4357,9 +4361,17 @@ static void queue_for_each_hw_ctx(ulong q, ulong *hctx, uint cnt, struct diskio + + if (nr_reserved_tags) { + addr = tags + OFFSET(blk_mq_tags_breserved_tags); ++ if (bitmap_tags_is_ptr && ++ !readmem(addr, KVADDR, &addr, sizeof(ulong), ++ "blk_mq_tags.bitmap_tags", RETURN_ON_ERROR)) ++ break; + bt_for_each(q, tags, addr, 1, nr_reserved_tags, dio); + } + addr = tags + OFFSET(blk_mq_tags_bitmap_tags); ++ if (bitmap_tags_is_ptr && ++ !readmem(addr, KVADDR, &addr, sizeof(ulong), ++ "blk_mq_tags.bitmap_tags", RETURN_ON_ERROR)) ++ break; + bt_for_each(q, tags, addr, 0, nr_reserved_tags, dio); + } + } +@@ -4423,14 +4435,23 @@ get_mq_diskio(unsigned long q, unsigned long *mq_count) + unsigned long mctx_addr; + struct diskio tmp = {0}; + +- if (INVALID_MEMBER(blk_mq_ctx_rq_dispatched) || +- INVALID_MEMBER(blk_mq_ctx_rq_completed)) { ++ /* ++ * Currently this function does not support old blk-mq implementation ++ * before 12f5b9314545 ("blk-mq: Remove generation seqeunce"), so ++ * filter them out. ++ */ ++ if (VALID_MEMBER(request_state)) { ++ if (CRASHDEBUG(1)) ++ fprintf(fp, "mq: using sbitmap\n"); + get_mq_diskio_from_hw_queues(q, &tmp); + mq_count[0] = tmp.read; + mq_count[1] = tmp.write; + return; + } + ++ if (CRASHDEBUG(1)) ++ fprintf(fp, "mq: using blk_mq_ctx.rq_{completed,dispatched} counters\n"); ++ + readmem(q + OFFSET(request_queue_queue_ctx), KVADDR, &queue_ctx, + sizeof(ulong), "request_queue.queue_ctx", + FAULT_ON_ERROR); +-- +2.30.2 + diff --git a/0017-sbitmapq-Fix-for-kernels-without-struct-wait_queue_h.patch b/0017-sbitmapq-Fix-for-kernels-without-struct-wait_queue_h.patch new file mode 100644 index 0000000..fe4a544 --- /dev/null +++ b/0017-sbitmapq-Fix-for-kernels-without-struct-wait_queue_h.patch @@ -0,0 +1,44 @@ +From 6bc3b74c6e2b0aaebe1bc164594e53b010efef56 Mon Sep 17 00:00:00 2001 +From: Kazuhito Hagio +Date: Fri, 10 Jun 2022 15:52:34 +0900 +Subject: [PATCH 17/18] sbitmapq: Fix for kernels without struct + wait_queue_head + +The current struct wait_queue_head was renamed by kernel commit +9d9d676f595b ("sched/wait: Standardize internal naming of wait-queue heads") +at Linux 4.13. Without the patch, on earlier kernels the "sbitmapq" +command fails with the following error: + + crash> sbitmapq ffff8801790b3b50 + depth = 128 + busy = 0 + bits_per_word = 32 + ... + sbitmapq: invalid structure member offset: wait_queue_head_head + FILE: sbitmap.c LINE: 344 FUNCTION: sbitmap_queue_show() + +Signed-off-by: Kazuhito Hagio +Signed-off-by: Lianbo Jiang +--- + sbitmap.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/sbitmap.c b/sbitmap.c +index bb2f19e6207b..be5d30a8ea88 100644 +--- a/sbitmap.c ++++ b/sbitmap.c +@@ -341,7 +341,10 @@ static void sbitmap_queue_show(const struct sbitmap_queue_context *sqc, + sbq_wait_state_size = SIZE(sbq_wait_state); + wait_cnt_off = OFFSET(sbq_wait_state_wait_cnt); + wait_off = OFFSET(sbq_wait_state_wait); +- list_head_off = OFFSET(wait_queue_head_head); ++ if (VALID_MEMBER(wait_queue_head_head)) /* 4.13 and later */ ++ list_head_off = OFFSET(wait_queue_head_head); ++ else ++ list_head_off = OFFSET(__wait_queue_head_task_list); + + sbq_wait_state_buf = GETBUF(sbq_wait_state_size); + +-- +2.30.2 + diff --git a/0018-sbitmapq-Limit-kernels-without-sbitmap-again.patch b/0018-sbitmapq-Limit-kernels-without-sbitmap-again.patch new file mode 100644 index 0000000..791e5ab --- /dev/null +++ b/0018-sbitmapq-Limit-kernels-without-sbitmap-again.patch @@ -0,0 +1,43 @@ +From b8f2ae6b494d706b1e4855b439c4930a6a6a2f5c Mon Sep 17 00:00:00 2001 +From: Kazuhito Hagio +Date: Fri, 10 Jun 2022 16:00:14 +0900 +Subject: [PATCH 18/18] sbitmapq: Limit kernels without sbitmap again + +commit 364b2e413c69 ("sbitmapq: remove struct and member validation +in sbitmapq_init()") allowed the use of the "sbitmapq" command +unconditionally. Without the patch, the command fails with the +following error on kernels without sbitmap: + + crash> sbitmapq ffff88015796e550 + + sbitmapq: invalid structure member offset: sbitmap_queue_sb + FILE: sbitmap.c LINE: 385 FUNCTION: sbitmap_queue_context_load() + +Now the command supports Linux 4.9 and later kernels since it was +abstracted out, so it can be limited by the non-existence of the +sbitmap structure. + +Signed-off-by: Kazuhito Hagio +Signed-off-by: Lianbo Jiang +--- + sbitmap.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/sbitmap.c b/sbitmap.c +index be5d30a8ea88..12d6512a1e4d 100644 +--- a/sbitmap.c ++++ b/sbitmap.c +@@ -540,6 +540,10 @@ void sbitmapq_init(void) + STRUCT_SIZE_INIT(sbitmap_queue, "sbitmap_queue"); + STRUCT_SIZE_INIT(sbq_wait_state, "sbq_wait_state"); + ++ /* sbitmap was abstracted out by commit 88459642cba4 on Linux 4.9. */ ++ if (INVALID_SIZE(sbitmap)) ++ command_not_supported(); ++ + MEMBER_OFFSET_INIT(sbitmap_word_depth, "sbitmap_word", "depth"); + MEMBER_OFFSET_INIT(sbitmap_word_word, "sbitmap_word", "word"); + MEMBER_OFFSET_INIT(sbitmap_word_cleared, "sbitmap_word", "cleared"); +-- +2.30.2 + diff --git a/crash.spec b/crash.spec index bea23d0..607b80f 100644 --- a/crash.spec +++ b/crash.spec @@ -4,7 +4,7 @@ Summary: Kernel analysis utility for live systems, netdump, diskdump, kdump, LKCD or mcore dumpfiles Name: crash Version: 7.3.2 -Release: 1%{?dist} +Release: 2%{?dist} License: GPLv3 Group: Development/Debuggers Source0: https://github.com/crash-utility/crash/archive/crash-%{version}.tar.gz @@ -19,6 +19,22 @@ Provides: bundled(gdb) = 7.6 Patch0: lzo_snappy_zstd.patch Patch1: rhel8_build.patch Patch2: rhel8_freepointer.patch +Patch3: 0001-ppc64-update-the-NR_CPUS-to-8192.patch +Patch4: 0002-sbitmapq-remove-struct-and-member-validation-in-sbit.patch +Patch5: 0003-sbitmapq-fix-invalid-offset-for-sbitmap_queue_alloc_.patch +Patch6: 0004-sbitmapq-fix-invalid-offset-for-sbitmap_queue_round_.patch +Patch7: 0005-sbitmapq-fix-invalid-offset-for-sbitmap_word_depth-o.patch +Patch8: 0007-bt-x86_64-filter-out-idle-task-stack.patch +Patch9: 0008-bt-arm64-add-support-for-bt-n-idle.patch +Patch10: 0010-Enhance-dev-d-D-options-to-support-blk-mq-sbitmap.patch +Patch11: 0011-Fix-for-dev-d-D-options-to-support-blk-mq-change-on-.patch +Patch12: 0012-Doc-update-man-page-for-the-bpf-and-sbitmapq-command.patch +Patch13: 0013-sbitmapq-Fix-for-sbitmap_queue-without-ws_active-mem.patch +Patch14: 0014-sbitmapq-Fix-for-sbitmap_word-without-cleared-member.patch +Patch15: 0015-sbitmapq-Fix-for-sbitmap_queue-without-min_shallow_d.patch +Patch16: 0016-Make-dev-d-D-options-parse-sbitmap-on-Linux-4.18-and.patch +Patch17: 0017-sbitmapq-Fix-for-kernels-without-struct-wait_queue_h.patch +Patch18: 0018-sbitmapq-Limit-kernels-without-sbitmap-again.patch %description The core analysis suite is a self-contained tool that can be used to @@ -42,6 +58,22 @@ offered by Mission Critical Linux, or the LKCD kernel patch. %patch0 -p1 -b lzo_snappy_zstd.patch %patch1 -p1 -b rhel8_build.patch %patch2 -p1 -b rhel8_freepointer.patch +%patch3 -p1 +%patch4 -p1 +%patch5 -p1 +%patch6 -p1 +%patch7 -p1 +%patch8 -p1 +%patch9 -p1 +%patch10 -p1 +%patch11 -p1 +%patch12 -p1 +%patch13 -p1 +%patch14 -p1 +%patch15 -p1 +%patch16 -p1 +%patch17 -p1 +%patch18 -p1 %build cp %{SOURCE1} . @@ -72,6 +104,9 @@ rm -rf %{buildroot} %{_includedir}/* %changelog +* Thu Jun 16 2022 Lianbo Jiang - 7.3.2-2 +- Enhance "dev -d|-D" options to support blk-mq sbitmap + * Mon May 16 2022 Lianbo Jiang - 7.3.2-1 - Rebase to upstream crash 7.3.2