393 lines
13 KiB
Diff
393 lines
13 KiB
Diff
|
From 7095c8fd029e3a33117e3b67de73f504686ebfe2 Mon Sep 17 00:00:00 2001
|
||
|
From: Lianbo Jiang <lijiang@redhat.com>
|
||
|
Date: Thu, 2 Jun 2022 20:12:55 +0800
|
||
|
Subject: [PATCH 10/18] Enhance "dev -d|-D" options to support blk-mq sbitmap
|
||
|
|
||
|
Since Linux 5.16-rc1, which kernel commit 9a14d6ce4135 ("block: remove
|
||
|
debugfs blk_mq_ctx dispatched/merged/completed attributes") removed the
|
||
|
members from struct blk_mq_ctx, crash has not displayed disk I/O statistics
|
||
|
for multiqueue (blk-mq) devices.
|
||
|
|
||
|
Let's parse the sbitmap in blk-mq layer to support it.
|
||
|
|
||
|
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
|
||
|
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
|
||
|
---
|
||
|
defs.h | 11 +++
|
||
|
dev.c | 244 +++++++++++++++++++++++++++++++++++++++++++++---------
|
||
|
symbols.c | 22 +++++
|
||
|
3 files changed, 238 insertions(+), 39 deletions(-)
|
||
|
|
||
|
diff --git a/defs.h b/defs.h
|
||
|
index c8444b4e54eb..2681586a33dc 100644
|
||
|
--- a/defs.h
|
||
|
+++ b/defs.h
|
||
|
@@ -2170,6 +2170,16 @@ struct offset_table { /* stash of commonly-used offsets */
|
||
|
long sbq_wait_state_wait;
|
||
|
long sbitmap_alloc_hint;
|
||
|
long sbitmap_round_robin;
|
||
|
+ long request_cmd_flags;
|
||
|
+ long request_q;
|
||
|
+ long request_state;
|
||
|
+ long request_queue_queue_hw_ctx;
|
||
|
+ long request_queue_nr_hw_queues;
|
||
|
+ long blk_mq_hw_ctx_tags;
|
||
|
+ long blk_mq_tags_bitmap_tags;
|
||
|
+ long blk_mq_tags_breserved_tags;
|
||
|
+ long blk_mq_tags_nr_reserved_tags;
|
||
|
+ long blk_mq_tags_rqs;
|
||
|
};
|
||
|
|
||
|
struct size_table { /* stash of commonly-used sizes */
|
||
|
@@ -2339,6 +2349,7 @@ struct size_table { /* stash of commonly-used sizes */
|
||
|
long sbitmap;
|
||
|
long sbitmap_queue;
|
||
|
long sbq_wait_state;
|
||
|
+ long blk_mq_tags;
|
||
|
};
|
||
|
|
||
|
struct array_table {
|
||
|
diff --git a/dev.c b/dev.c
|
||
|
index a493e51ac95c..4be4c96df8b0 100644
|
||
|
--- a/dev.c
|
||
|
+++ b/dev.c
|
||
|
@@ -4238,19 +4238,176 @@ get_one_mctx_diskio(unsigned long mctx, struct diskio *io)
|
||
|
io->write = (dispatch[1] - comp[1]);
|
||
|
}
|
||
|
|
||
|
+typedef bool (busy_tag_iter_fn)(ulong rq, void *data);
|
||
|
+
|
||
|
+struct mq_inflight {
|
||
|
+ ulong q;
|
||
|
+ struct diskio *dio;
|
||
|
+};
|
||
|
+
|
||
|
+struct bt_iter_data {
|
||
|
+ ulong tags;
|
||
|
+ uint reserved;
|
||
|
+ uint nr_reserved_tags;
|
||
|
+ busy_tag_iter_fn *fn;
|
||
|
+ void *data;
|
||
|
+};
|
||
|
+
|
||
|
+/*
|
||
|
+ * See the include/linux/blk_types.h and include/linux/blk-mq.h
|
||
|
+ */
|
||
|
+#define MQ_RQ_IN_FLIGHT 1
|
||
|
+#define REQ_OP_BITS 8
|
||
|
+#define REQ_OP_MASK ((1 << REQ_OP_BITS) - 1)
|
||
|
+
|
||
|
+static uint op_is_write(uint op)
|
||
|
+{
|
||
|
+ return (op & REQ_OP_MASK) & 1;
|
||
|
+}
|
||
|
+
|
||
|
+static bool mq_check_inflight(ulong rq, void *data)
|
||
|
+{
|
||
|
+ uint cmd_flags = 0, state = 0;
|
||
|
+ ulong addr = 0, queue = 0;
|
||
|
+ struct mq_inflight *mi = data;
|
||
|
+
|
||
|
+ if (!IS_KVADDR(rq))
|
||
|
+ return TRUE;
|
||
|
+
|
||
|
+ addr = rq + OFFSET(request_q);
|
||
|
+ if (!readmem(addr, KVADDR, &queue, sizeof(ulong), "request.q", RETURN_ON_ERROR))
|
||
|
+ return FALSE;
|
||
|
+
|
||
|
+ addr = rq + OFFSET(request_cmd_flags);
|
||
|
+ if (!readmem(addr, KVADDR, &cmd_flags, sizeof(uint), "request.cmd_flags", RETURN_ON_ERROR))
|
||
|
+ return FALSE;
|
||
|
+
|
||
|
+ addr = rq + OFFSET(request_state);
|
||
|
+ if (!readmem(addr, KVADDR, &state, sizeof(uint), "request.state", RETURN_ON_ERROR))
|
||
|
+ return FALSE;
|
||
|
+
|
||
|
+ if (queue == mi->q && state == MQ_RQ_IN_FLIGHT) {
|
||
|
+ if (op_is_write(cmd_flags))
|
||
|
+ mi->dio->write++;
|
||
|
+ else
|
||
|
+ mi->dio->read++;
|
||
|
+ }
|
||
|
+
|
||
|
+ return TRUE;
|
||
|
+}
|
||
|
+
|
||
|
+static bool bt_iter(uint bitnr, void *data)
|
||
|
+{
|
||
|
+ ulong addr = 0, rqs_addr = 0, rq = 0;
|
||
|
+ struct bt_iter_data *iter_data = data;
|
||
|
+ ulong tag = iter_data->tags;
|
||
|
+
|
||
|
+ if (!iter_data->reserved)
|
||
|
+ bitnr += iter_data->nr_reserved_tags;
|
||
|
+
|
||
|
+ /* rqs */
|
||
|
+ addr = tag + OFFSET(blk_mq_tags_rqs);
|
||
|
+ if (!readmem(addr, KVADDR, &rqs_addr, sizeof(void *), "blk_mq_tags.rqs", RETURN_ON_ERROR))
|
||
|
+ return FALSE;
|
||
|
+
|
||
|
+ addr = rqs_addr + bitnr * sizeof(ulong); /* rqs[bitnr] */
|
||
|
+ if (!readmem(addr, KVADDR, &rq, sizeof(ulong), "blk_mq_tags.rqs[]", RETURN_ON_ERROR))
|
||
|
+ return FALSE;
|
||
|
+
|
||
|
+ return iter_data->fn(rq, iter_data->data);
|
||
|
+}
|
||
|
+
|
||
|
+static void bt_for_each(ulong q, ulong tags, ulong sbq, uint reserved, uint nr_resvd_tags, struct diskio *dio)
|
||
|
+{
|
||
|
+ struct sbitmap_context sc = {0};
|
||
|
+ struct mq_inflight mi = {
|
||
|
+ .q = q,
|
||
|
+ .dio = dio,
|
||
|
+ };
|
||
|
+ struct bt_iter_data iter_data = {
|
||
|
+ .tags = tags,
|
||
|
+ .reserved = reserved,
|
||
|
+ .nr_reserved_tags = nr_resvd_tags,
|
||
|
+ .fn = mq_check_inflight,
|
||
|
+ .data = &mi,
|
||
|
+ };
|
||
|
+
|
||
|
+ sbitmap_context_load(sbq + OFFSET(sbitmap_queue_sb), &sc);
|
||
|
+ sbitmap_for_each_set(&sc, bt_iter, &iter_data);
|
||
|
+}
|
||
|
+
|
||
|
+static void queue_for_each_hw_ctx(ulong q, ulong *hctx, uint cnt, struct diskio *dio)
|
||
|
+{
|
||
|
+ uint i;
|
||
|
+
|
||
|
+ for (i = 0; i < cnt; i++) {
|
||
|
+ ulong addr = 0, tags = 0;
|
||
|
+ uint nr_reserved_tags = 0;
|
||
|
+
|
||
|
+ /* Tags owned by the block driver */
|
||
|
+ addr = hctx[i] + OFFSET(blk_mq_hw_ctx_tags);
|
||
|
+ if (!readmem(addr, KVADDR, &tags, sizeof(ulong),
|
||
|
+ "blk_mq_hw_ctx.tags", RETURN_ON_ERROR))
|
||
|
+ break;
|
||
|
+
|
||
|
+ addr = tags + OFFSET(blk_mq_tags_nr_reserved_tags);
|
||
|
+ if (!readmem(addr, KVADDR, &nr_reserved_tags, sizeof(uint),
|
||
|
+ "blk_mq_tags_nr_reserved_tags", RETURN_ON_ERROR))
|
||
|
+ break;
|
||
|
+
|
||
|
+ if (nr_reserved_tags) {
|
||
|
+ addr = tags + OFFSET(blk_mq_tags_breserved_tags);
|
||
|
+ bt_for_each(q, tags, addr, 1, nr_reserved_tags, dio);
|
||
|
+ }
|
||
|
+ addr = tags + OFFSET(blk_mq_tags_bitmap_tags);
|
||
|
+ bt_for_each(q, tags, addr, 0, nr_reserved_tags, dio);
|
||
|
+ }
|
||
|
+}
|
||
|
+
|
||
|
+static void get_mq_diskio_from_hw_queues(ulong q, struct diskio *dio)
|
||
|
+{
|
||
|
+ uint cnt = 0;
|
||
|
+ ulong addr = 0, hctx_addr = 0;
|
||
|
+ ulong *hctx_array = NULL;
|
||
|
+
|
||
|
+ addr = q + OFFSET(request_queue_nr_hw_queues);
|
||
|
+ readmem(addr, KVADDR, &cnt, sizeof(uint),
|
||
|
+ "request_queue.nr_hw_queues", FAULT_ON_ERROR);
|
||
|
+
|
||
|
+ addr = q + OFFSET(request_queue_queue_hw_ctx);
|
||
|
+ readmem(addr, KVADDR, &hctx_addr, sizeof(void *),
|
||
|
+ "request_queue.queue_hw_ctx", FAULT_ON_ERROR);
|
||
|
+
|
||
|
+ hctx_array = (ulong *)GETBUF(sizeof(void *) * cnt);
|
||
|
+ if (!hctx_array)
|
||
|
+ error(FATAL, "fail to get memory for the hctx_array\n");
|
||
|
+
|
||
|
+ if (!readmem(hctx_addr, KVADDR, hctx_array, sizeof(void *) * cnt,
|
||
|
+ "request_queue.queue_hw_ctx[]", RETURN_ON_ERROR)) {
|
||
|
+ FREEBUF(hctx_array);
|
||
|
+ return;
|
||
|
+ }
|
||
|
+
|
||
|
+ queue_for_each_hw_ctx(q, hctx_array, cnt, dio);
|
||
|
+
|
||
|
+ FREEBUF(hctx_array);
|
||
|
+}
|
||
|
+
|
||
|
static void
|
||
|
get_mq_diskio(unsigned long q, unsigned long *mq_count)
|
||
|
{
|
||
|
int cpu;
|
||
|
unsigned long queue_ctx;
|
||
|
unsigned long mctx_addr;
|
||
|
- struct diskio tmp;
|
||
|
+ struct diskio tmp = {0};
|
||
|
|
||
|
if (INVALID_MEMBER(blk_mq_ctx_rq_dispatched) ||
|
||
|
- INVALID_MEMBER(blk_mq_ctx_rq_completed))
|
||
|
+ INVALID_MEMBER(blk_mq_ctx_rq_completed)) {
|
||
|
+ get_mq_diskio_from_hw_queues(q, &tmp);
|
||
|
+ mq_count[0] = tmp.read;
|
||
|
+ mq_count[1] = tmp.write;
|
||
|
return;
|
||
|
-
|
||
|
- memset(&tmp, 0x00, sizeof(struct diskio));
|
||
|
+ }
|
||
|
|
||
|
readmem(q + OFFSET(request_queue_queue_ctx), KVADDR, &queue_ctx,
|
||
|
sizeof(ulong), "request_queue.queue_ctx",
|
||
|
@@ -4479,41 +4636,24 @@ display_one_diskio(struct iter *i, unsigned long gendisk, ulong flags)
|
||
|
&& (io.read + io.write == 0))
|
||
|
return;
|
||
|
|
||
|
- if (use_mq_interface(queue_addr) &&
|
||
|
- (INVALID_MEMBER(blk_mq_ctx_rq_dispatched) ||
|
||
|
- INVALID_MEMBER(blk_mq_ctx_rq_completed)))
|
||
|
- fprintf(fp, "%s%s%s %s%s%s%s %s%s%s",
|
||
|
- mkstring(buf0, 5, RJUST|INT_DEC, (char *)(unsigned long)major),
|
||
|
- space(MINSPACE),
|
||
|
- mkstring(buf1, VADDR_PRLEN, LJUST|LONG_HEX, (char *)gendisk),
|
||
|
- space(MINSPACE),
|
||
|
- mkstring(buf2, 10, LJUST, disk_name),
|
||
|
- space(MINSPACE),
|
||
|
- mkstring(buf3, VADDR_PRLEN <= 11 ? 11 : VADDR_PRLEN,
|
||
|
- LJUST|LONG_HEX, (char *)queue_addr),
|
||
|
- space(MINSPACE),
|
||
|
- mkstring(buf4, 17, RJUST, "(not supported)"),
|
||
|
- space(MINSPACE));
|
||
|
-
|
||
|
- else
|
||
|
- fprintf(fp, "%s%s%s %s%s%s%s %s%5d%s%s%s%s%s",
|
||
|
- mkstring(buf0, 5, RJUST|INT_DEC, (char *)(unsigned long)major),
|
||
|
- space(MINSPACE),
|
||
|
- mkstring(buf1, VADDR_PRLEN, LJUST|LONG_HEX, (char *)gendisk),
|
||
|
- space(MINSPACE),
|
||
|
- mkstring(buf2, 10, LJUST, disk_name),
|
||
|
- space(MINSPACE),
|
||
|
- mkstring(buf3, VADDR_PRLEN <= 11 ? 11 : VADDR_PRLEN,
|
||
|
- LJUST|LONG_HEX, (char *)queue_addr),
|
||
|
- space(MINSPACE),
|
||
|
- io.read + io.write,
|
||
|
- space(MINSPACE),
|
||
|
- mkstring(buf4, 5, RJUST|INT_DEC,
|
||
|
- (char *)(unsigned long)io.read),
|
||
|
- space(MINSPACE),
|
||
|
- mkstring(buf5, 5, RJUST|INT_DEC,
|
||
|
- (char *)(unsigned long)io.write),
|
||
|
- space(MINSPACE));
|
||
|
+ fprintf(fp, "%s%s%s %s%s%s%s %s%5d%s%s%s%s%s",
|
||
|
+ mkstring(buf0, 5, RJUST|INT_DEC, (char *)(unsigned long)major),
|
||
|
+ space(MINSPACE),
|
||
|
+ mkstring(buf1, VADDR_PRLEN, LJUST|LONG_HEX, (char *)gendisk),
|
||
|
+ space(MINSPACE),
|
||
|
+ mkstring(buf2, 10, LJUST, disk_name),
|
||
|
+ space(MINSPACE),
|
||
|
+ mkstring(buf3, VADDR_PRLEN <= 11 ? 11 : VADDR_PRLEN,
|
||
|
+ LJUST|LONG_HEX, (char *)queue_addr),
|
||
|
+ space(MINSPACE),
|
||
|
+ io.read + io.write,
|
||
|
+ space(MINSPACE),
|
||
|
+ mkstring(buf4, 5, RJUST|INT_DEC,
|
||
|
+ (char *)(unsigned long)io.read),
|
||
|
+ space(MINSPACE),
|
||
|
+ mkstring(buf5, 5, RJUST|INT_DEC,
|
||
|
+ (char *)(unsigned long)io.write),
|
||
|
+ space(MINSPACE));
|
||
|
|
||
|
if (VALID_MEMBER(request_queue_in_flight)) {
|
||
|
if (!use_mq_interface(queue_addr)) {
|
||
|
@@ -4597,6 +4737,9 @@ void diskio_init(void)
|
||
|
MEMBER_OFFSET_INIT(kobject_entry, "kobject", "entry");
|
||
|
MEMBER_OFFSET_INIT(kset_list, "kset", "list");
|
||
|
MEMBER_OFFSET_INIT(request_list_count, "request_list", "count");
|
||
|
+ MEMBER_OFFSET_INIT(request_cmd_flags, "request", "cmd_flags");
|
||
|
+ MEMBER_OFFSET_INIT(request_q, "request", "q");
|
||
|
+ MEMBER_OFFSET_INIT(request_state, "request", "state");
|
||
|
MEMBER_OFFSET_INIT(request_queue_in_flight, "request_queue",
|
||
|
"in_flight");
|
||
|
if (MEMBER_EXISTS("request_queue", "rq"))
|
||
|
@@ -4608,10 +4751,33 @@ void diskio_init(void)
|
||
|
"mq_ops");
|
||
|
ANON_MEMBER_OFFSET_INIT(request_queue_queue_ctx,
|
||
|
"request_queue", "queue_ctx");
|
||
|
+ MEMBER_OFFSET_INIT(request_queue_queue_hw_ctx,
|
||
|
+ "request_queue", "queue_hw_ctx");
|
||
|
+ MEMBER_OFFSET_INIT(request_queue_nr_hw_queues,
|
||
|
+ "request_queue", "nr_hw_queues");
|
||
|
MEMBER_OFFSET_INIT(blk_mq_ctx_rq_dispatched, "blk_mq_ctx",
|
||
|
"rq_dispatched");
|
||
|
MEMBER_OFFSET_INIT(blk_mq_ctx_rq_completed, "blk_mq_ctx",
|
||
|
"rq_completed");
|
||
|
+ MEMBER_OFFSET_INIT(blk_mq_hw_ctx_tags, "blk_mq_hw_ctx", "tags");
|
||
|
+ MEMBER_OFFSET_INIT(blk_mq_tags_bitmap_tags, "blk_mq_tags",
|
||
|
+ "bitmap_tags");
|
||
|
+ MEMBER_OFFSET_INIT(blk_mq_tags_breserved_tags, "blk_mq_tags",
|
||
|
+ "breserved_tags");
|
||
|
+ MEMBER_OFFSET_INIT(blk_mq_tags_nr_reserved_tags, "blk_mq_tags",
|
||
|
+ "nr_reserved_tags");
|
||
|
+ MEMBER_OFFSET_INIT(blk_mq_tags_rqs, "blk_mq_tags", "rqs");
|
||
|
+ STRUCT_SIZE_INIT(blk_mq_tags, "blk_mq_tags");
|
||
|
+ STRUCT_SIZE_INIT(sbitmap, "sbitmap");
|
||
|
+ STRUCT_SIZE_INIT(sbitmap_word, "sbitmap_word");
|
||
|
+ MEMBER_OFFSET_INIT(sbitmap_word_word, "sbitmap_word", "word");
|
||
|
+ MEMBER_OFFSET_INIT(sbitmap_word_cleared, "sbitmap_word", "cleared");
|
||
|
+ MEMBER_OFFSET_INIT(sbitmap_depth, "sbitmap", "depth");
|
||
|
+ MEMBER_OFFSET_INIT(sbitmap_shift, "sbitmap", "shift");
|
||
|
+ MEMBER_OFFSET_INIT(sbitmap_map_nr, "sbitmap", "map_nr");
|
||
|
+ MEMBER_OFFSET_INIT(sbitmap_map, "sbitmap", "map");
|
||
|
+ MEMBER_OFFSET_INIT(sbitmap_queue_sb, "sbitmap_queue", "sb");
|
||
|
+
|
||
|
}
|
||
|
MEMBER_OFFSET_INIT(subsys_private_klist_devices, "subsys_private",
|
||
|
"klist_devices");
|
||
|
diff --git a/symbols.c b/symbols.c
|
||
|
index 5d12a021c769..c1f09556d710 100644
|
||
|
--- a/symbols.c
|
||
|
+++ b/symbols.c
|
||
|
@@ -10385,6 +10385,12 @@ dump_offset_table(char *spec, ulong makestruct)
|
||
|
OFFSET(kset_list));
|
||
|
fprintf(fp, " request_list_count: %ld\n",
|
||
|
OFFSET(request_list_count));
|
||
|
+ fprintf(fp, " request_cmd_flags: %ld\n",
|
||
|
+ OFFSET(request_cmd_flags));
|
||
|
+ fprintf(fp, " request_q: %ld\n",
|
||
|
+ OFFSET(request_q));
|
||
|
+ fprintf(fp, " request_state: %ld\n",
|
||
|
+ OFFSET(request_state));
|
||
|
fprintf(fp, " request_queue_in_flight: %ld\n",
|
||
|
OFFSET(request_queue_in_flight));
|
||
|
fprintf(fp, " request_queue_rq: %ld\n",
|
||
|
@@ -10393,10 +10399,25 @@ dump_offset_table(char *spec, ulong makestruct)
|
||
|
OFFSET(request_queue_mq_ops));
|
||
|
fprintf(fp, " request_queue_queue_ctx: %ld\n",
|
||
|
OFFSET(request_queue_queue_ctx));
|
||
|
+ fprintf(fp, " request_queue_queue_hw_ctx: %ld\n",
|
||
|
+ OFFSET(request_queue_queue_hw_ctx));
|
||
|
+ fprintf(fp, " request_queue_nr_hw_queues: %ld\n",
|
||
|
+ OFFSET(request_queue_nr_hw_queues));
|
||
|
fprintf(fp, " blk_mq_ctx_rq_dispatched: %ld\n",
|
||
|
OFFSET(blk_mq_ctx_rq_dispatched));
|
||
|
fprintf(fp, " blk_mq_ctx_rq_completed: %ld\n",
|
||
|
OFFSET(blk_mq_ctx_rq_completed));
|
||
|
+ fprintf(fp, " blk_mq_hw_ctx_tags: %ld\n",
|
||
|
+ OFFSET(blk_mq_hw_ctx_tags));
|
||
|
+ fprintf(fp, " blk_mq_tags_bitmap_tags: %ld\n",
|
||
|
+ OFFSET(blk_mq_tags_bitmap_tags));
|
||
|
+ fprintf(fp, " blk_mq_tags_breserved_tags: %ld\n",
|
||
|
+ OFFSET(blk_mq_tags_breserved_tags));
|
||
|
+ fprintf(fp, " blk_mq_tags_nr_reserved_tags: %ld\n",
|
||
|
+ OFFSET(blk_mq_tags_nr_reserved_tags));
|
||
|
+ fprintf(fp, " blk_mq_tags_rqs: %ld\n",
|
||
|
+ OFFSET(blk_mq_tags_rqs));
|
||
|
+
|
||
|
fprintf(fp, " subsys_private_klist_devices: %ld\n",
|
||
|
OFFSET(subsys_private_klist_devices));
|
||
|
fprintf(fp, " subsystem_kset: %ld\n",
|
||
|
@@ -11003,6 +11024,7 @@ dump_offset_table(char *spec, ulong makestruct)
|
||
|
fprintf(fp, " sbitmap: %ld\n", SIZE(sbitmap));
|
||
|
fprintf(fp, " sbitmap_queue: %ld\n", SIZE(sbitmap_queue));
|
||
|
fprintf(fp, " sbq_wait_state: %ld\n", SIZE(sbq_wait_state));
|
||
|
+ fprintf(fp, " blk_mq_tags: %ld\n", SIZE(blk_mq_tags));
|
||
|
|
||
|
fprintf(fp, "\n array_table:\n");
|
||
|
/*
|
||
|
--
|
||
|
2.30.2
|
||
|
|