Compare commits

..

No commits in common. "c8s" and "c8" have entirely different histories.
c8s ... c8

51 changed files with 8 additions and 5659 deletions

9
.gitignore vendored
View File

@ -1,7 +1,2 @@
SOURCES/crash-7.3.2.tar.gz SOURCES/crash-8.0.4.tar.gz
SOURCES/gdb-7.6.tar.gz SOURCES/gdb-10.2.tar.gz
/crash-7.3.2.tar.gz
/gdb-7.6.tar.gz
/crash-8.0.3.tar.gz
/crash-8.0.4.tar.gz
/gdb-10.2.tar.gz

View File

@ -1,52 +0,0 @@
From 040a56e9f9d0df15a2f8161ed3a0a907d70dda03 Mon Sep 17 00:00:00 2001
From: Kazuhito Hagio <k-hagio-ab@nec.com>
Date: Wed, 10 May 2023 16:09:03 +0900
Subject: [PATCH 01/30] Fix kernel version macros for revision numbers over 255
The current comparison macros for kernel version shift minor number only
8 bits. This can cause an unexpected result on kernels with revision
number over 255, e.g. Linux 4.14.314.
In fact, on Linux 4.14.314 for x86_64 without CONFIG_RANDOMIZE_BASE=y
(KASLR), the following condition became false in x86_64_init().
((THIS_KERNEL_VERSION >= LINUX(4,14,84)) &&
(THIS_KERNEL_VERSION < LINUX(4,15,0)))
As a result, crash used a wrong hard-coded value for PAGE_OFFSET and
failed to start a session with the following seek error.
crash: seek error: physical address: 200e000 type: "pud page"
Shift the major and minor number by 24 and 16 bits respectively to fix
this issue.
Reported-by: Luiz Capitulino <luizcap@amazon.com>
Tested-by: Luiz Capitulino <luizcap@amazon.com>
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/defs.h b/defs.h
index 12ad6aaa0998..211fc9d55d33 100644
--- a/defs.h
+++ b/defs.h
@@ -807,10 +807,10 @@ struct kernel_table { /* kernel data */
} \
}
-#define THIS_KERNEL_VERSION ((kt->kernel_version[0] << 16) + \
- (kt->kernel_version[1] << 8) + \
+#define THIS_KERNEL_VERSION ((kt->kernel_version[0] << 24) + \
+ (kt->kernel_version[1] << 16) + \
(kt->kernel_version[2]))
-#define LINUX(x,y,z) (((uint)(x) << 16) + ((uint)(y) << 8) + (uint)(z))
+#define LINUX(x,y,z) (((uint)(x) << 24) + ((uint)(y) << 16) + (uint)(z))
#define THIS_GCC_VERSION ((kt->gcc_version[0] << 16) + \
(kt->gcc_version[1] << 8) + \
--
2.37.1

View File

@ -1,179 +0,0 @@
From 58c1816521c2e6bece3d69256b1866c9df8d93aa Mon Sep 17 00:00:00 2001
From: Kazuhito Hagio <k-hagio-ab@nec.com>
Date: Tue, 16 May 2023 08:59:50 +0900
Subject: [PATCH 02/30] Fix failure of "dev -d|-D" options on Linux 6.4 and
later kernels
Kernel commit 2df418cf4b72 ("driver core: class: remove subsystem
private pointer from struct class"), which is contained in Linux 6.4 and
later kernels, removed the class.p member for struct subsys_private. As
a result, the "dev -d|-D" options fail with the following error.
dev: invalid structure member offset: class_p
FILE: dev.c LINE: 4689 FUNCTION: init_iter()
Search the class_kset list for the subsys_private of block class to fix
this.
As a preparation, introduce get_subsys_private() function, which is
abstracted from the same search procedure in init_memory_block().
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 1 +
dev.c | 20 +++++++++++++++++---
memory.c | 35 +++--------------------------------
tools.c | 43 +++++++++++++++++++++++++++++++++++++++++++
4 files changed, 64 insertions(+), 35 deletions(-)
diff --git a/defs.h b/defs.h
index 211fc9d55d33..21cc760444d1 100644
--- a/defs.h
+++ b/defs.h
@@ -5521,6 +5521,7 @@ struct rb_node *rb_left(struct rb_node *, struct rb_node *);
struct rb_node *rb_next(struct rb_node *);
struct rb_node *rb_last(struct rb_root *);
long percpu_counter_sum_positive(ulong fbc);
+ulong get_subsys_private(char *, char *);
/*
* symbols.c
diff --git a/dev.c b/dev.c
index 75d30bd022a1..9d38aef9b3db 100644
--- a/dev.c
+++ b/dev.c
@@ -4686,9 +4686,16 @@ init_iter(struct iter *i)
} else {
/* kernel version > 2.6.27, klist */
unsigned long class_private_addr;
- readmem(block_class_addr + OFFSET(class_p), KVADDR,
- &class_private_addr, sizeof(class_private_addr),
- "class.p", FAULT_ON_ERROR);
+
+ if (INVALID_MEMBER(class_p)) /* kernel version >= 6.4 */
+ class_private_addr = get_subsys_private("class_kset", "block");
+ else
+ readmem(block_class_addr + OFFSET(class_p), KVADDR,
+ &class_private_addr, sizeof(class_private_addr),
+ "class.p", FAULT_ON_ERROR);
+
+ if (!class_private_addr)
+ error(FATAL, "cannot determine subsys_private for block.\n");
if (VALID_STRUCT(class_private)) {
/* 2.6.27 < kernel version <= 2.6.37-rc2 */
@@ -4823,6 +4830,13 @@ void diskio_init(void)
if (INVALID_MEMBER(class_devices))
MEMBER_OFFSET_INIT(class_devices, "class", "devices");
MEMBER_OFFSET_INIT(class_p, "class", "p");
+ if (INVALID_MEMBER(class_p)) {
+ MEMBER_OFFSET_INIT(kset_list, "kset", "list");
+ MEMBER_OFFSET_INIT(kset_kobj, "kset", "kobj");
+ MEMBER_OFFSET_INIT(kobject_name, "kobject", "name");
+ MEMBER_OFFSET_INIT(kobject_entry, "kobject", "entry");
+ MEMBER_OFFSET_INIT(subsys_private_subsys, "subsys_private", "subsys");
+ }
MEMBER_OFFSET_INIT(class_private_devices, "class_private",
"class_devices");
MEMBER_OFFSET_INIT(device_knode_class, "device", "knode_class");
diff --git a/memory.c b/memory.c
index 0568f18eb9b7..953fc380c03c 100644
--- a/memory.c
+++ b/memory.c
@@ -17865,38 +17865,9 @@ init_memory_block(int *klistcnt, ulong **klistbuf)
* v6.3-rc1
* d2bf38c088e0 driver core: remove private pointer from struct bus_type
*/
- if (INVALID_MEMBER(bus_type_p)) {
- int i, cnt;
- char buf[32];
- ulong bus_kset, list, name;
-
- BZERO(ld, sizeof(struct list_data));
-
- get_symbol_data("bus_kset", sizeof(ulong), &bus_kset);
- readmem(bus_kset + OFFSET(kset_list), KVADDR, &list,
- sizeof(ulong), "bus_kset.list", FAULT_ON_ERROR);
-
- ld->flags |= LIST_ALLOCATE;
- ld->start = list;
- ld->end = bus_kset + OFFSET(kset_list);
- ld->list_head_offset = OFFSET(kobject_entry);
-
- cnt = do_list(ld);
- for (i = 0; i < cnt; i++) {
- readmem(ld->list_ptr[i] + OFFSET(kobject_name), KVADDR, &name,
- sizeof(ulong), "kobject.name", FAULT_ON_ERROR);
- read_string(name, buf, sizeof(buf)-1);
- if (CRASHDEBUG(1))
- fprintf(fp, "kobject: %lx name: %s\n", ld->list_ptr[i], buf);
- if (STREQ(buf, "memory")) {
- /* entry is subsys_private.subsys.kobj. See bus_to_subsys(). */
- private = ld->list_ptr[i] - OFFSET(kset_kobj)
- - OFFSET(subsys_private_subsys);
- break;
- }
- }
- FREEBUF(ld->list_ptr);
- } else {
+ if (INVALID_MEMBER(bus_type_p))
+ private = get_subsys_private("bus_kset", "memory");
+ else {
ulong memory_subsys = symbol_value("memory_subsys");
readmem(memory_subsys + OFFSET(bus_type_p), KVADDR, &private,
sizeof(void *), "memory_subsys.private", FAULT_ON_ERROR);
diff --git a/tools.c b/tools.c
index c2cfa7e280bc..392a79707e61 100644
--- a/tools.c
+++ b/tools.c
@@ -6963,3 +6963,46 @@ percpu_counter_sum_positive(ulong fbc)
return (ret < 0) ? 0 : ret;
}
+
+ulong
+get_subsys_private(char *kset_name, char *target_name)
+{
+ ulong kset_addr, kset_list, name_addr, private = 0;
+ struct list_data list_data, *ld;
+ char buf[32];
+ int i, cnt;
+
+ if (!symbol_exists(kset_name))
+ return 0;
+
+ ld = &list_data;
+ BZERO(ld, sizeof(struct list_data));
+
+ get_symbol_data(kset_name, sizeof(ulong), &kset_addr);
+ readmem(kset_addr + OFFSET(kset_list), KVADDR, &kset_list,
+ sizeof(ulong), "kset.list", FAULT_ON_ERROR);
+
+ ld->flags |= LIST_ALLOCATE;
+ ld->start = kset_list;
+ ld->end = kset_addr + OFFSET(kset_list);
+ ld->list_head_offset = OFFSET(kobject_entry);
+
+ cnt = do_list(ld);
+
+ for (i = 0; i < cnt; i++) {
+ readmem(ld->list_ptr[i] + OFFSET(kobject_name), KVADDR, &name_addr,
+ sizeof(ulong), "kobject.name", FAULT_ON_ERROR);
+ read_string(name_addr, buf, sizeof(buf)-1);
+ if (CRASHDEBUG(1))
+ fprintf(fp, "kobject: %lx name: %s\n", ld->list_ptr[i], buf);
+ if (STREQ(buf, target_name)) {
+ /* entry is subsys_private.subsys.kobj. See bus_to_subsys(). */
+ private = ld->list_ptr[i] - OFFSET(kset_kobj)
+ - OFFSET(subsys_private_subsys);
+ break;
+ }
+ }
+ FREEBUF(ld->list_ptr);
+
+ return private;
+}
--
2.37.1

View File

@ -1,86 +0,0 @@
From 342cf340ed0386880fe2a3115d6bef32eabb511b Mon Sep 17 00:00:00 2001
From: Kazuhito Hagio <k-hagio-ab@nec.com>
Date: Thu, 18 May 2023 11:48:28 +0900
Subject: [PATCH 03/30] Fix "kmem -v" option displaying no regions on Linux 6.3
and later
Kernel commit 869176a09606 ("mm/vmalloc.c: add flags to mark vm_map_ram
area"), which is contained in Linux 6.3 and later, added "flags" member
to struct vmap_area. This was the revival of the "flags" member as
kernel commit 688fcbfc06e4 had eliminated it before.
As a result, crash started to use the old procedure using the member and
displays no vmalloc'd regions, because it does not have the same flag
value as the old one.
crash> kmem -v
VMAP_AREA VM_STRUCT ADDRESS RANGE SIZE
crash>
To fix this, also check if vmap_area.purge_list exists, which was
introduced with the flags and removed later, to determine that the flags
member is the old one.
Related vmap_area history:
v2.6.28 db64fe02258f introduced vmap_area with flags and purge_list
v5.4 688fcbfc06e4 removed flags
v5.11 96e2db456135 removed purge_list
v6.3 869176a09606 added flags again
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 1 +
memory.c | 4 +++-
symbols.c | 1 +
3 files changed, 5 insertions(+), 1 deletion(-)
diff --git a/defs.h b/defs.h
index 21cc760444d1..bfa07c3f5150 100644
--- a/defs.h
+++ b/defs.h
@@ -2216,6 +2216,7 @@ struct offset_table { /* stash of commonly-used offsets */
long in6_addr_in6_u;
long kset_kobj;
long subsys_private_subsys;
+ long vmap_area_purge_list;
};
struct size_table { /* stash of commonly-used sizes */
diff --git a/memory.c b/memory.c
index 953fc380c03c..15fa8b2f08f1 100644
--- a/memory.c
+++ b/memory.c
@@ -429,6 +429,7 @@ vm_init(void)
MEMBER_OFFSET_INIT(vmap_area_vm, "vmap_area", "vm");
if (INVALID_MEMBER(vmap_area_vm))
MEMBER_OFFSET_INIT(vmap_area_vm, "vmap_area", "private");
+ MEMBER_OFFSET_INIT(vmap_area_purge_list, "vmap_area", "purge_list");
STRUCT_SIZE_INIT(vmap_area, "vmap_area");
if (VALID_MEMBER(vmap_area_va_start) &&
VALID_MEMBER(vmap_area_va_end) &&
@@ -9063,7 +9064,8 @@ dump_vmap_area(struct meminfo *vi)
readmem(ld->list_ptr[i], KVADDR, vmap_area_buf,
SIZE(vmap_area), "vmap_area struct", FAULT_ON_ERROR);
- if (VALID_MEMBER(vmap_area_flags)) {
+ if (VALID_MEMBER(vmap_area_flags) &&
+ VALID_MEMBER(vmap_area_purge_list)) {
flags = ULONG(vmap_area_buf + OFFSET(vmap_area_flags));
if (flags != VM_VM_AREA)
continue;
diff --git a/symbols.c b/symbols.c
index f0721023816d..7b1d59203b90 100644
--- a/symbols.c
+++ b/symbols.c
@@ -9169,6 +9169,7 @@ dump_offset_table(char *spec, ulong makestruct)
OFFSET(vmap_area_vm));
fprintf(fp, " vmap_area_flags: %ld\n",
OFFSET(vmap_area_flags));
+ fprintf(fp, " vmap_area_purge_list: %ld\n", OFFSET(vmap_area_purge_list));
fprintf(fp, " module_size_of_struct: %ld\n",
OFFSET(module_size_of_struct));
--
2.37.1

View File

@ -1,225 +0,0 @@
From a0eceb041dfa248d66f9f9a455106184b7823bec Mon Sep 17 00:00:00 2001
From: Rongwei Wang <rongwei.wang@linux.alibaba.com>
Date: Mon, 29 May 2023 19:55:51 +0800
Subject: [PATCH 04/30] arm64/x86_64: Enhance "vtop" command to show zero_pfn
information
Enhance the "vtop" command to show "ZERO PAGE" information when PTE or
PMD has attached to {huge_}zero_pfn. For example:
crash> vtop -c 13674 ffff8917e000
VIRTUAL PHYSICAL
ffff8917e000 836e71000
PAGE DIRECTORY: ffff000802f8d000
PGD: ffff000802f8dff8 => 884e29003
PUD: ffff000844e29ff0 => 884e93003
PMD: ffff000844e93240 => 840413003
PTE: ffff000800413bf0 => 160000836e71fc3
PAGE: 836e71000 (ZERO PAGE)
...
Hugepage case:
crash> vtop -c 14538 ffff95800000
VIRTUAL PHYSICAL
ffff95800000 910c00000
PAGE DIRECTORY: ffff000801fa0000
PGD: ffff000801fa0ff8 => 884f53003
PUD: ffff000844f53ff0 => 8426cb003
PMD: ffff0008026cb560 => 60000910c00fc1
PAGE: 910c00000 (2MB, ZERO PAGE)
...
Note that
1. support displaying zero page only for THP (except for 1G THP)
2. do not support hugetlb cases.
Signed-off-by: Rongwei Wang <rongwei.wang@linux.alibaba.com>
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
arm64.c | 24 ++++++++++++++++--------
defs.h | 5 +++++
memory.c | 23 +++++++++++++++++++++++
x86_64.c | 9 +++++----
4 files changed, 49 insertions(+), 12 deletions(-)
diff --git a/arm64.c b/arm64.c
index 56fb841f43f8..efbdccbec9d3 100644
--- a/arm64.c
+++ b/arm64.c
@@ -1787,7 +1787,8 @@ arm64_vtop_2level_64k(ulong pgd, ulong vaddr, physaddr_t *paddr, int verbose)
if ((pgd_val & PMD_TYPE_MASK) == PMD_TYPE_SECT) {
ulong sectionbase = (pgd_val & SECTION_PAGE_MASK_512MB) & PHYS_MASK;
if (verbose) {
- fprintf(fp, " PAGE: %lx (512MB)\n\n", sectionbase);
+ fprintf(fp, " PAGE: %lx (512MB%s)\n\n", sectionbase,
+ IS_ZEROPAGE(sectionbase) ? ", ZERO PAGE" : "");
arm64_translate_pte(pgd_val, 0, 0);
}
*paddr = sectionbase + (vaddr & ~SECTION_PAGE_MASK_512MB);
@@ -1806,7 +1807,8 @@ arm64_vtop_2level_64k(ulong pgd, ulong vaddr, physaddr_t *paddr, int verbose)
if (pte_val & PTE_VALID) {
*paddr = (PAGEBASE(pte_val) & PHYS_MASK) + PAGEOFFSET(vaddr);
if (verbose) {
- fprintf(fp, " PAGE: %lx\n\n", PAGEBASE(*paddr));
+ fprintf(fp, " PAGE: %lx %s\n\n", PAGEBASE(*paddr),
+ IS_ZEROPAGE(PAGEBASE(*paddr)) ? "(ZERO PAGE)" : "");
arm64_translate_pte(pte_val, 0, 0);
}
} else {
@@ -1859,7 +1861,8 @@ arm64_vtop_3level_64k(ulong pgd, ulong vaddr, physaddr_t *paddr, int verbose)
if ((pmd_val & PMD_TYPE_MASK) == PMD_TYPE_SECT) {
ulong sectionbase = PTE_TO_PHYS(pmd_val) & SECTION_PAGE_MASK_512MB;
if (verbose) {
- fprintf(fp, " PAGE: %lx (512MB)\n\n", sectionbase);
+ fprintf(fp, " PAGE: %lx (512MB%s)\n\n", sectionbase,
+ IS_ZEROPAGE(sectionbase) ? ", ZERO PAGE" : "");
arm64_translate_pte(pmd_val, 0, 0);
}
*paddr = sectionbase + (vaddr & ~SECTION_PAGE_MASK_512MB);
@@ -1878,7 +1881,8 @@ arm64_vtop_3level_64k(ulong pgd, ulong vaddr, physaddr_t *paddr, int verbose)
if (pte_val & PTE_VALID) {
*paddr = PTE_TO_PHYS(pte_val) + PAGEOFFSET(vaddr);
if (verbose) {
- fprintf(fp, " PAGE: %lx\n\n", PAGEBASE(*paddr));
+ fprintf(fp, " PAGE: %lx %s\n\n", PAGEBASE(*paddr),
+ IS_ZEROPAGE(PAGEBASE(*paddr)) ? "(ZERO PAGE)" : "");
arm64_translate_pte(pte_val, 0, 0);
}
} else {
@@ -1940,7 +1944,8 @@ arm64_vtop_3level_4k(ulong pgd, ulong vaddr, physaddr_t *paddr, int verbose)
if ((pmd_val & PMD_TYPE_MASK) == PMD_TYPE_SECT) {
ulong sectionbase = (pmd_val & SECTION_PAGE_MASK_2MB) & PHYS_MASK;
if (verbose) {
- fprintf(fp, " PAGE: %lx (2MB)\n\n", sectionbase);
+ fprintf(fp, " PAGE: %lx (2MB%s)\n\n", sectionbase,
+ IS_ZEROPAGE(sectionbase) ? ", ZERO PAGE" : "");
arm64_translate_pte(pmd_val, 0, 0);
}
*paddr = sectionbase + (vaddr & ~SECTION_PAGE_MASK_2MB);
@@ -1959,7 +1964,8 @@ arm64_vtop_3level_4k(ulong pgd, ulong vaddr, physaddr_t *paddr, int verbose)
if (pte_val & PTE_VALID) {
*paddr = (PAGEBASE(pte_val) & PHYS_MASK) + PAGEOFFSET(vaddr);
if (verbose) {
- fprintf(fp, " PAGE: %lx\n\n", PAGEBASE(*paddr));
+ fprintf(fp, " PAGE: %lx %s\n\n", PAGEBASE(*paddr),
+ IS_ZEROPAGE(PAGEBASE(*paddr)) ? "(ZERO PAGE)" : "");
arm64_translate_pte(pte_val, 0, 0);
}
} else {
@@ -2029,7 +2035,8 @@ arm64_vtop_4level_4k(ulong pgd, ulong vaddr, physaddr_t *paddr, int verbose)
if ((pmd_val & PMD_TYPE_MASK) == PMD_TYPE_SECT) {
ulong sectionbase = (pmd_val & SECTION_PAGE_MASK_2MB) & PHYS_MASK;
if (verbose) {
- fprintf(fp, " PAGE: %lx (2MB)\n\n", sectionbase);
+ fprintf(fp, " PAGE: %lx (2MB%s)\n\n", sectionbase,
+ IS_ZEROPAGE(sectionbase) ? ", ZERO PAGE" : "");
arm64_translate_pte(pmd_val, 0, 0);
}
*paddr = sectionbase + (vaddr & ~SECTION_PAGE_MASK_2MB);
@@ -2048,7 +2055,8 @@ arm64_vtop_4level_4k(ulong pgd, ulong vaddr, physaddr_t *paddr, int verbose)
if (pte_val & PTE_VALID) {
*paddr = (PAGEBASE(pte_val) & PHYS_MASK) + PAGEOFFSET(vaddr);
if (verbose) {
- fprintf(fp, " PAGE: %lx\n\n", PAGEBASE(*paddr));
+ fprintf(fp, " PAGE: %lx %s\n\n", PAGEBASE(*paddr),
+ IS_ZEROPAGE(PAGEBASE(*paddr)) ? "(ZERO PAGE)" : "");
arm64_translate_pte(pte_val, 0, 0);
}
} else {
diff --git a/defs.h b/defs.h
index bfa07c3f5150..7d8bb8ab3de1 100644
--- a/defs.h
+++ b/defs.h
@@ -2619,6 +2619,8 @@ struct vm_table { /* kernel VM-related data */
char *name;
} *pageflags_data;
ulong max_mem_section_nr;
+ ulong zero_paddr;
+ ulong huge_zero_paddr;
};
#define NODES (0x1)
@@ -3000,6 +3002,9 @@ struct load_module {
#define VIRTPAGEBASE(X) (((ulong)(X)) & (ulong)machdep->pagemask)
#define PHYSPAGEBASE(X) (((physaddr_t)(X)) & (physaddr_t)machdep->pagemask)
+#define IS_ZEROPAGE(paddr) ((paddr) == vt->zero_paddr || \
+ (paddr) == vt->huge_zero_paddr)
+
/*
* Sparse memory stuff
* These must follow the definitions in the kernel mmzone.h
diff --git a/memory.c b/memory.c
index 15fa8b2f08f1..ea3005a5c01f 100644
--- a/memory.c
+++ b/memory.c
@@ -1209,6 +1209,27 @@ vm_init(void)
machdep->memory_size()));
vt->paddr_prlen = strlen(buf);
+ vt->zero_paddr = ~0UL;
+ if (kernel_symbol_exists("zero_pfn")) {
+ ulong zero_pfn;
+
+ if (readmem(symbol_value("zero_pfn"), KVADDR,
+ &zero_pfn, sizeof(zero_pfn),
+ "read zero_pfn", QUIET|RETURN_ON_ERROR))
+ vt->zero_paddr = zero_pfn << PAGESHIFT();
+ }
+
+ vt->huge_zero_paddr = ~0UL;
+ if (kernel_symbol_exists("huge_zero_pfn")) {
+ ulong huge_zero_pfn;
+
+ if (readmem(symbol_value("huge_zero_pfn"), KVADDR,
+ &huge_zero_pfn, sizeof(huge_zero_pfn),
+ "read huge_zero_pfn", QUIET|RETURN_ON_ERROR) &&
+ huge_zero_pfn != ~0UL)
+ vt->huge_zero_paddr = huge_zero_pfn << PAGESHIFT();
+ }
+
if (vt->flags & PERCPU_KMALLOC_V1)
vt->dump_kmem_cache = dump_kmem_cache_percpu_v1;
else if (vt->flags & PERCPU_KMALLOC_V2)
@@ -14065,6 +14086,8 @@ dump_vm_table(int verbose)
} else {
fprintf(fp, " node_online_map: (unused)\n");
}
+ fprintf(fp, " zero_paddr: %lx\n", vt->zero_paddr);
+ fprintf(fp, " huge_zero_paddr: %lx\n", vt->huge_zero_paddr);
fprintf(fp, " nr_vm_stat_items: %d\n", vt->nr_vm_stat_items);
fprintf(fp, " vm_stat_items: %s", (vt->flags & VM_STAT) ?
"\n" : "(not used)\n");
diff --git a/x86_64.c b/x86_64.c
index 5019c69e452e..693a08bea758 100644
--- a/x86_64.c
+++ b/x86_64.c
@@ -2114,8 +2114,9 @@ x86_64_uvtop_level4(struct task_context *tc, ulong uvaddr, physaddr_t *paddr, in
goto no_upage;
if (pmd_pte & _PAGE_PSE) {
if (verbose) {
- fprintf(fp, " PAGE: %lx (2MB)\n\n",
- PAGEBASE(pmd_pte) & PHYSICAL_PAGE_MASK);
+ fprintf(fp, " PAGE: %lx (2MB%s)\n\n",
+ PAGEBASE(pmd_pte) & PHYSICAL_PAGE_MASK,
+ IS_ZEROPAGE(PAGEBASE(pmd_pte) & PHYSICAL_PAGE_MASK) ? ", ZERO PAGE" : "");
x86_64_translate_pte(pmd_pte, 0, 0);
}
@@ -2143,8 +2144,8 @@ x86_64_uvtop_level4(struct task_context *tc, ulong uvaddr, physaddr_t *paddr, in
*paddr = (PAGEBASE(pte) & PHYSICAL_PAGE_MASK) + PAGEOFFSET(uvaddr);
if (verbose) {
- fprintf(fp, " PAGE: %lx\n\n",
- PAGEBASE(*paddr) & PHYSICAL_PAGE_MASK);
+ fprintf(fp, " PAGE: %lx %s\n\n", PAGEBASE(*paddr) & PHYSICAL_PAGE_MASK,
+ IS_ZEROPAGE(PAGEBASE(*paddr) & PHYSICAL_PAGE_MASK) ? "(ZERO PAGE)" : "");
x86_64_translate_pte(pte, 0, 0);
}
--
2.37.1

View File

@ -1,165 +0,0 @@
From db8c030857b4e318728c51c20da687906c109d0d Mon Sep 17 00:00:00 2001
From: HATAYAMA Daisuke <d.hatayama@fujitsu.com>
Date: Tue, 30 May 2023 19:38:34 +0900
Subject: [PATCH 05/30] diskdump/netdump: fix segmentation fault caused by
failure of stopping CPUs
There's no NMI on ARM. Hence, stopping the non-panicking CPUs from the
panicking CPU via IPI can fail easily if interrupts are being masked
in those moment. Moreover, crash_notes are not initialized for such
unstopped CPUs and the corresponding NT_PRSTATUS notes are not
attached to vmcore. However, crash utility never takes it
consideration such uninitialized crash_notes and then ends with
mapping different NT_PRSTATUS to actually unstopped CPUs. This corrupt
mapping can result crash utility into segmentation fault in the
operations where register values in NT_PRSTATUS notes are used.
For example:
crash> bt 1408
PID: 1408 TASK: ffff000003e22200 CPU: 2 COMMAND: "repro"
Segmentation fault (core dumped)
crash> help -D
diskdump_data:
filename: 127.0.0.1-2023-05-26-02:21:27/vmcore-ld1
flags: 46 (KDUMP_CMPRS_LOCAL|ERROR_EXCLUDED|LZO_SUPPORTED)
...snip...
notes_buf: 1815df0
num_vmcoredd_notes: 0
num_prstatus_notes: 5
notes[0]: 1815df0 (NT_PRSTATUS)
si.signo: 0 si.code: 0 si.errno: 0
...snip...
PSTATE: 80400005 FPVALID: 00000000
notes[4]: 1808f10 (NT_PRSTATUS)
Segmentation fault (core dumped)
To fix this issue, let's map NT_PRSTATUS to some CPU only if the
corresponding crash_notes is checked to be initialized.
[ kh: moved existence check for crash_notes out of the loop ]
Signed-off-by: HATAYAMA Daisuke <d.hatayama@fujitsu.com>
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 1 +
diskdump.c | 45 ++++++++++++++++++++++++++++++++++++++++++++-
netdump.c | 7 ++++++-
3 files changed, 51 insertions(+), 2 deletions(-)
diff --git a/defs.h b/defs.h
index 7d8bb8ab3de1..6520d2f13f48 100644
--- a/defs.h
+++ b/defs.h
@@ -7118,6 +7118,7 @@ int dumpfile_is_split(void);
void show_split_dumpfiles(void);
void x86_process_elf_notes(void *, unsigned long);
void *diskdump_get_prstatus_percpu(int);
+int have_crash_notes(int cpu);
void map_cpus_to_prstatus_kdump_cmprs(void);
void diskdump_display_regs(int, FILE *);
void process_elf32_notes(void *, ulong);
diff --git a/diskdump.c b/diskdump.c
index 94bca4ded572..2c284ff3f97f 100644
--- a/diskdump.c
+++ b/diskdump.c
@@ -101,12 +101,54 @@ int dumpfile_is_split(void)
return KDUMP_SPLIT();
}
+int have_crash_notes(int cpu)
+{
+ ulong crash_notes, notes_ptr;
+ char *buf, *p;
+ Elf64_Nhdr *note = NULL;
+
+ if (!readmem(symbol_value("crash_notes"), KVADDR, &crash_notes,
+ sizeof(crash_notes), "crash_notes", RETURN_ON_ERROR)) {
+ error(WARNING, "cannot read \"crash_notes\"\n");
+ return FALSE;
+ }
+
+ if ((kt->flags & SMP) && (kt->flags & PER_CPU_OFF))
+ notes_ptr = crash_notes + kt->__per_cpu_offset[cpu];
+ else
+ notes_ptr = crash_notes;
+
+ buf = GETBUF(SIZE(note_buf));
+
+ if (!readmem(notes_ptr, KVADDR, buf,
+ SIZE(note_buf), "note_buf_t", RETURN_ON_ERROR)) {
+ error(WARNING, "cpu %d: cannot read NT_PRSTATUS note\n", cpu);
+ return FALSE;
+ }
+
+ note = (Elf64_Nhdr *)buf;
+ p = buf + sizeof(Elf64_Nhdr);
+
+ if (note->n_type != NT_PRSTATUS) {
+ error(WARNING, "cpu %d: invalid NT_PRSTATUS note (n_type != NT_PRSTATUS)\n", cpu);
+ return FALSE;
+ }
+
+ if (!STRNEQ(p, "CORE")) {
+ error(WARNING, "cpu %d: invalid NT_PRSTATUS note (name != \"CORE\")\n", cpu);
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
void
map_cpus_to_prstatus_kdump_cmprs(void)
{
void **nt_ptr;
int online, i, j, nrcpus;
size_t size;
+ int crash_notes_exists;
if (pc->flags2 & QEMU_MEM_DUMP_COMPRESSED) /* notes exist for all cpus */
goto resize_note_pointers;
@@ -129,9 +171,10 @@ map_cpus_to_prstatus_kdump_cmprs(void)
* Re-populate the array with the notes mapping to online cpus
*/
nrcpus = (kt->kernel_NR_CPUS ? kt->kernel_NR_CPUS : NR_CPUS);
+ crash_notes_exists = kernel_symbol_exists("crash_notes");
for (i = 0, j = 0; i < nrcpus; i++) {
- if (in_cpu_map(ONLINE_MAP, i)) {
+ if (in_cpu_map(ONLINE_MAP, i) && (!crash_notes_exists || have_crash_notes(i))) {
dd->nt_prstatus_percpu[i] = nt_ptr[j++];
dd->num_prstatus_notes =
MAX(dd->num_prstatus_notes, i+1);
diff --git a/netdump.c b/netdump.c
index 4eba66cecb55..61ddeaa08831 100644
--- a/netdump.c
+++ b/netdump.c
@@ -75,6 +75,7 @@ map_cpus_to_prstatus(void)
void **nt_ptr;
int online, i, j, nrcpus;
size_t size;
+ int crash_notes_exists;
if (pc->flags2 & QEMU_MEM_DUMP_ELF) /* notes exist for all cpus */
return;
@@ -97,10 +98,14 @@ map_cpus_to_prstatus(void)
* Re-populate the array with the notes mapping to online cpus
*/
nrcpus = (kt->kernel_NR_CPUS ? kt->kernel_NR_CPUS : NR_CPUS);
+ crash_notes_exists = kernel_symbol_exists("crash_notes");
for (i = 0, j = 0; i < nrcpus; i++) {
- if (in_cpu_map(ONLINE_MAP, i))
+ if (in_cpu_map(ONLINE_MAP, i) && (!crash_notes_exists || have_crash_notes(i))) {
nd->nt_prstatus_percpu[i] = nt_ptr[j++];
+ nd->num_prstatus_notes =
+ MAX(nd->num_prstatus_notes, i+1);
+ }
}
FREEBUF(nt_ptr);
--
2.37.1

View File

@ -1,62 +0,0 @@
From 9868ebc8e648e5791764a51567a23efae7170d9b Mon Sep 17 00:00:00 2001
From: HATAYAMA Daisuke <d.hatayama@fujitsu.com>
Date: Tue, 30 May 2023 19:38:35 +0900
Subject: [PATCH 06/30] Fix segfault in arm64_is_kernel_exception_frame() when
corrupt stack pointer address is given
Due to the corrupted mapping fixed by the previous commit,
arm64_is_kernel_exception_frame() can receive invalid stack pointer
address via the 2nd argument; different NT_PRSTATUS contains different
task's stack pointer address. However, macro STACK_OFFSET_TYPE() never
checks if a given address is within the range of the kernel stack of
the corresponding task and hence can result in referring to outside of
bt->stackbuf.
static int
arm64_is_kernel_exception_frame(struct bt_info *bt, ulong stkptr)
{
struct arm64_pt_regs *regs;
struct machine_specific *ms = machdep->machspec;
regs = (struct arm64_pt_regs *)&bt->stackbuf[(ulong)(STACK_OFFSET_TYPE(stkptr))];
=> if (INSTACK(regs->sp, bt) && INSTACK(regs->regs[29], bt) &&
!(regs->pstate & (0xffffffff00000000ULL | PSR_MODE32_BIT)) &&
is_kernel_text(regs->pc) &&
is_kernel_text(regs->regs[30] | ms->CONFIG_ARM64_KERNELPACMASK)) {
To fix this issue, check if the given stack pointer address points to
the range of the kernel stack of the corresponding task, and abort if
it turns out to be invalid.
Although the corrupted mapping has already been fixed, this fix is
still needed because corrupt stack pointer address can still be passed
here from different reasons. Consider, for example, that data on the
kernel stack can be modified abnormally due to any kernel bugs or
hardware issues.
Signed-off-by: HATAYAMA Daisuke <d.hatayama@fujitsu.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/defs.h b/defs.h
index 6520d2f13f48..11fdc17e60d0 100644
--- a/defs.h
+++ b/defs.h
@@ -976,7 +976,10 @@ struct bt_info {
#define STACK_OFFSET_TYPE(OFF) \
(((ulong)(OFF) > STACKSIZE()) ? \
- (ulong)((ulong)(OFF) - (ulong)(bt->stackbase)) : (ulong)(OFF))
+ (((ulong)(OFF) < (ulong)(bt->stackbase) || (ulong)(OFF) >= (ulong)(bt->stackbase) + STACKSIZE()) ? \
+ error(FATAL, "invalid stack pointer is given\n") : \
+ (ulong)((ulong)(OFF) - (ulong)(bt->stackbase))) : \
+ (ulong)(OFF))
#define GET_STACK_ULONG(OFF) \
*((ulong *)((char *)(&bt->stackbuf[(ulong)(STACK_OFFSET_TYPE(OFF))])))
--
2.37.1

View File

@ -1,71 +0,0 @@
From 8527bbff71cbdfd90a67d5cec4a1d94156e6bf13 Mon Sep 17 00:00:00 2001
From: Hsin-Yi Wang <hsinyi@chromium.org>
Date: Wed, 31 May 2023 14:01:36 +0800
Subject: [PATCH 07/30] Output prompt when stdin is not a TTY
When stdin is not a TTY, prompt ("crash> ") won't be displayed. If
another process interact with crash with piped stdin/stdout, it will not
get the prompt as a delimiter.
Compared to other debugger like gdb, crash seems intended to give a
prompt in this case in the beginning of process_command_line(). It
checks if pc->flags does NOT have any of
READLINE|SILENT|CMDLINE_IFILE|RCHOME_IFILE|RCLOCAL_IFILE, a
prompt should be printed. The check will never be true since READLINE is
set in setup_environment() unconditionally.
It makes more sense to change the READLINE flag in the check to TTY
instead. Besides this change, the prompt in process_command_line() should
only be print when it's not in the middle of processing the input file
recovering from a previous FATAL command, because the prompt will be
displayed by the exec_input_file().
Additionally, when stdin is not TTY, repeat the command line from user
after prompt, which can give more context.
The prompt and command line can be opt out by using the silent (-s) flag.
Signed-off-by: Hsin-Yi Wang <hsinyi@chromium.org>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
cmdline.c | 14 +++++++++-----
1 file changed, 9 insertions(+), 5 deletions(-)
diff --git a/cmdline.c b/cmdline.c
index ded6551c2597..b7f919ae2279 100644
--- a/cmdline.c
+++ b/cmdline.c
@@ -64,8 +64,8 @@ process_command_line(void)
fp = stdout;
BZERO(pc->command_line, BUFSIZE);
- if (!(pc->flags &
- (READLINE|SILENT|CMDLINE_IFILE|RCHOME_IFILE|RCLOCAL_IFILE)))
+ if (!pc->ifile_in_progress && !(pc->flags &
+ (TTY|SILENT|CMDLINE_IFILE|RCHOME_IFILE|RCLOCAL_IFILE)))
fprintf(fp, "%s", pc->prompt);
fflush(fp);
@@ -136,12 +136,16 @@ process_command_line(void)
add_history(pc->command_line);
check_special_handling(pc->command_line);
- } else {
- if (fgets(pc->command_line, BUFSIZE-1, stdin) == NULL)
+ } else {
+ if (fgets(pc->command_line, BUFSIZE-1, stdin) == NULL)
clean_exit(1);
+ if (!(pc->flags & SILENT)) {
+ fprintf(fp, "%s", pc->command_line);
+ fflush(fp);
+ }
clean_line(pc->command_line);
strcpy(pc->orig_line, pc->command_line);
- }
+ }
/*
* First clean out all linefeeds and leading/trailing spaces.
--
2.37.1

View File

@ -1,345 +0,0 @@
From 77d8621876c1c6a3a25b91e464ba588a542485fb Mon Sep 17 00:00:00 2001
From: Kazuhito Hagio <k-hagio-ab@nec.com>
Date: Thu, 18 May 2023 16:53:54 +0900
Subject: [PATCH 08/30] x86_64: Fix "bt" command printing stale entries on
Linux 6.4 and later
Kernel commit fb799447ae29 ("x86,objtool: Split UNWIND_HINT_EMPTY in
two"), which is contained in Linux 6.4 and later kernels, changed
ORC_TYPE_CALL macro from 0 to 2. As a result, the "bt" command cannot
use ORC entries, and can display stale entries in a call trace.
crash> bt 1
PID: 1 TASK: ffff93cd06294180 CPU: 51 COMMAND: "systemd"
#0 [ffffb72bc00cbc98] __schedule at ffffffff86e52aae
#1 [ffffb72bc00cbd00] schedule at ffffffff86e52f6a
#2 [ffffb72bc00cbd18] schedule_hrtimeout_range_clock at ffffffff86e58ef5
#3 [ffffb72bc00cbd88] ep_poll at ffffffff8669624d
#4 [ffffb72bc00cbe28] do_epoll_wait at ffffffff86696371
#5 [ffffb72bc00cbe30] do_timerfd_settime at ffffffff8669902b <<
#6 [ffffb72bc00cbe60] __x64_sys_epoll_wait at ffffffff86696bf0
#7 [ffffb72bc00cbeb0] do_syscall_64 at ffffffff86e3feb9
#8 [ffffb72bc00cbee0] __task_pid_nr_ns at ffffffff863330d7 <<
#9 [ffffb72bc00cbf08] syscall_exit_to_user_mode at ffffffff86e466b2 << stale entries
#10 [ffffb72bc00cbf18] do_syscall_64 at ffffffff86e3fec9 <<
#11 [ffffb72bc00cbf50] entry_SYSCALL_64_after_hwframe at ffffffff870000aa
Also, kernel commit ffb1b4a41016 added a member to struct orc_entry.
Although this does not affect the crash's unwinder, its debugging
information can be displayed incorrectly.
To fix these,
(1) introduce "kernel_orc_entry_6_4" structure corresponding to 6.4 and
abstruction layer "orc_entry" structure in crash,
(2) switch ORC_TYPE_CALL to 2 or 0 with kernel's orc_entry structure.
Related orc_entry history:
v4.14 39358a033b2e introduced struct orc_entry
v4.19 d31a580266ee added orc_entry.end member
v6.3 ffb1b4a41016 added orc_entry.signal member
v6.4 fb799447ae29 removed end member and changed type member to 3 bits
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 28 ++++++++++++-
x86_64.c | 119 +++++++++++++++++++++++++++++++++++++++++++------------
2 files changed, 119 insertions(+), 28 deletions(-)
diff --git a/defs.h b/defs.h
index 11fdc17e60d0..bfda0c48d37b 100644
--- a/defs.h
+++ b/defs.h
@@ -6363,9 +6363,29 @@ typedef struct __attribute__((__packed__)) {
unsigned int sp_reg:4;
unsigned int bp_reg:4;
unsigned int type:2;
+ unsigned int signal:1;
unsigned int end:1;
} kernel_orc_entry;
+typedef struct __attribute__((__packed__)) {
+ signed short sp_offset;
+ signed short bp_offset;
+ unsigned int sp_reg:4;
+ unsigned int bp_reg:4;
+ unsigned int type:3;
+ unsigned int signal:1;
+} kernel_orc_entry_6_4;
+
+typedef struct orc_entry {
+ signed short sp_offset;
+ signed short bp_offset;
+ unsigned int sp_reg;
+ unsigned int bp_reg;
+ unsigned int type;
+ unsigned int signal;
+ unsigned int end;
+} orc_entry;
+
struct ORC_data {
int module_ORC;
uint lookup_num_blocks;
@@ -6376,10 +6396,13 @@ struct ORC_data {
ulong orc_lookup;
ulong ip_entry;
ulong orc_entry;
- kernel_orc_entry kernel_orc_entry;
+ orc_entry orc_entry_data;
+ int has_signal;
+ int has_end;
};
-#define ORC_TYPE_CALL 0
+#define ORC_TYPE_CALL ((machdep->flags & ORC_6_4) ? 2 : 0)
+/* The below entries are not used and must be updated if we use them. */
#define ORC_TYPE_REGS 1
#define ORC_TYPE_REGS_IRET 2
#define UNWIND_HINT_TYPE_SAVE 3
@@ -6456,6 +6479,7 @@ struct machine_specific {
#define ORC (0x4000)
#define KPTI (0x8000)
#define L1TF (0x10000)
+#define ORC_6_4 (0x20000)
#define VM_FLAGS (VM_ORIG|VM_2_6_11|VM_XEN|VM_XEN_RHEL4|VM_5LEVEL)
diff --git a/x86_64.c b/x86_64.c
index 693a08bea758..87e87ae6e1e8 100644
--- a/x86_64.c
+++ b/x86_64.c
@@ -132,9 +132,9 @@ static void GART_init(void);
static void x86_64_exception_stacks_init(void);
static int in_START_KERNEL_map(ulong);
static ulong orc_ip(ulong);
-static kernel_orc_entry *__orc_find(ulong, ulong, uint, ulong);
-static kernel_orc_entry *orc_find(ulong);
-static kernel_orc_entry *orc_module_find(ulong);
+static orc_entry *__orc_find(ulong, ulong, uint, ulong);
+static orc_entry *orc_find(ulong);
+static orc_entry *orc_module_find(ulong);
static ulong ip_table_to_vaddr(ulong);
static void orc_dump(ulong);
@@ -806,6 +806,8 @@ x86_64_dump_machdep_table(ulong arg)
fprintf(fp, "%sFRAMESIZE_DEBUG", others++ ? "|" : "");
if (machdep->flags & ORC)
fprintf(fp, "%sORC", others++ ? "|" : "");
+ if (machdep->flags & ORC_6_4)
+ fprintf(fp, "%sORC_6_4", others++ ? "|" : "");
if (machdep->flags & FRAMEPOINTER)
fprintf(fp, "%sFRAMEPOINTER", others++ ? "|" : "");
if (machdep->flags & GART_REGION)
@@ -980,6 +982,8 @@ x86_64_dump_machdep_table(ulong arg)
fprintf(fp, " ORC_data: %s", machdep->flags & ORC ? "\n" : "(unused)\n");
if (machdep->flags & ORC) {
fprintf(fp, " module_ORC: %s\n", ms->orc.module_ORC ? "TRUE" : "FALSE");
+ fprintf(fp, " has_signal: %s\n", ms->orc.has_signal ? "TRUE" : "FALSE");
+ fprintf(fp, " has_end: %s\n", ms->orc.has_end ? "TRUE" : "FALSE");
fprintf(fp, " lookup_num_blocks: %d\n", ms->orc.lookup_num_blocks);
fprintf(fp, " __start_orc_unwind_ip: %lx\n", ms->orc.__start_orc_unwind_ip);
fprintf(fp, " __stop_orc_unwind_ip: %lx\n", ms->orc.__stop_orc_unwind_ip);
@@ -988,14 +992,18 @@ x86_64_dump_machdep_table(ulong arg)
fprintf(fp, " orc_lookup: %lx\n", ms->orc.orc_lookup);
fprintf(fp, " ip_entry: %lx\n", ms->orc.ip_entry);
fprintf(fp, " orc_entry: %lx\n", ms->orc.orc_entry);
- fprintf(fp, " kernel_orc_entry:\n");
- fprintf(fp, " sp_offset: %d\n", ms->orc.kernel_orc_entry.sp_offset);
- fprintf(fp, " bp_offset: %d\n", ms->orc.kernel_orc_entry.bp_offset);
- fprintf(fp, " sp_reg: %d\n", ms->orc.kernel_orc_entry.sp_reg);
- fprintf(fp, " bp_reg: %d\n", ms->orc.kernel_orc_entry.bp_reg);
- fprintf(fp, " type: %d\n", ms->orc.kernel_orc_entry.type);
- if (MEMBER_EXISTS("orc_entry", "end"))
- fprintf(fp, " end: %d\n", ms->orc.kernel_orc_entry.end);
+ fprintf(fp, " orc_entry_data:\n");
+ fprintf(fp, " sp_offset: %d\n", ms->orc.orc_entry_data.sp_offset);
+ fprintf(fp, " bp_offset: %d\n", ms->orc.orc_entry_data.bp_offset);
+ fprintf(fp, " sp_reg: %d\n", ms->orc.orc_entry_data.sp_reg);
+ fprintf(fp, " bp_reg: %d\n", ms->orc.orc_entry_data.bp_reg);
+ fprintf(fp, " type: %d\n", ms->orc.orc_entry_data.type);
+ if (ms->orc.has_signal)
+ fprintf(fp, " signal: %d\n", ms->orc.orc_entry_data.signal);
+ else
+ fprintf(fp, " signal: (n/a)\n");
+ if (ms->orc.has_end)
+ fprintf(fp, " end: %d\n", ms->orc.orc_entry_data.end);
else
fprintf(fp, " end: (n/a)\n");
}
@@ -6440,6 +6448,12 @@ x86_64_ORC_init(void)
MEMBER_OFFSET_INIT(inactive_task_frame_bp, "inactive_task_frame", "bp");
MEMBER_OFFSET_INIT(inactive_task_frame_ret_addr, "inactive_task_frame", "ret_addr");
+ orc->has_signal = MEMBER_EXISTS("orc_entry", "signal"); /* added at 6.3 */
+ orc->has_end = MEMBER_EXISTS("orc_entry", "end"); /* removed at 6.4 */
+
+ if (orc->has_signal && !orc->has_end)
+ machdep->flags |= ORC_6_4;
+
machdep->flags |= ORC;
}
@@ -8522,7 +8536,7 @@ x86_64_get_framesize(struct bt_info *bt, ulong textaddr, ulong rsp, char *stack_
int reterror;
int arg_exists;
int exception;
- kernel_orc_entry *korc;
+ orc_entry *korc;
if (!(bt->flags & BT_FRAMESIZE_DEBUG)) {
if ((bt->flags & BT_FRAMESIZE_IGNORE_MASK) ||
@@ -8608,11 +8622,14 @@ x86_64_get_framesize(struct bt_info *bt, ulong textaddr, ulong rsp, char *stack_
if ((machdep->flags & ORC) && (korc = orc_find(textaddr))) {
if (CRASHDEBUG(1)) {
+ struct ORC_data *orc = &machdep->machspec->orc;
fprintf(fp,
"rsp: %lx textaddr: %lx -> spo: %d bpo: %d spr: %d bpr: %d type: %d",
rsp, textaddr, korc->sp_offset, korc->bp_offset,
korc->sp_reg, korc->bp_reg, korc->type);
- if (MEMBER_EXISTS("orc_entry", "end"))
+ if (orc->has_signal)
+ fprintf(fp, " signal: %d", korc->signal);
+ if (orc->has_end)
fprintf(fp, " end: %d", korc->end);
fprintf(fp, "\n");
}
@@ -9118,7 +9135,53 @@ orc_ip(ulong ip)
return (ip + ip_entry);
}
-static kernel_orc_entry *
+static orc_entry *
+orc_get_entry(struct ORC_data *orc)
+{
+ struct orc_entry *entry = &orc->orc_entry_data;
+
+ if (machdep->flags & ORC_6_4) {
+ kernel_orc_entry_6_4 korc;
+
+ if (!readmem(orc->orc_entry, KVADDR, &korc, sizeof(kernel_orc_entry_6_4),
+ "kernel orc_entry", RETURN_ON_ERROR|QUIET))
+ return NULL;
+
+ entry->sp_offset = korc.sp_offset;
+ entry->bp_offset = korc.bp_offset;
+ entry->sp_reg = korc.sp_reg;
+ entry->bp_reg = korc.bp_reg;
+ entry->type = korc.type;
+ entry->signal = korc.signal;
+ } else {
+ kernel_orc_entry korc;
+
+ if (!readmem(orc->orc_entry, KVADDR, &korc, sizeof(kernel_orc_entry),
+ "kernel orc_entry", RETURN_ON_ERROR|QUIET))
+ return NULL;
+
+ entry->sp_offset = korc.sp_offset;
+ entry->bp_offset = korc.bp_offset;
+ entry->sp_reg = korc.sp_reg;
+ entry->bp_reg = korc.bp_reg;
+ entry->type = korc.type;
+ if (orc->has_end) {
+ /*
+ * orc_entry.signal was inserted before orc_entry.end.
+ * see ffb1b4a41016.
+ */
+ if (orc->has_signal) {
+ entry->signal = korc.signal;
+ entry->end = korc.end;
+ } else
+ entry->end = korc.signal; /* on purpose */
+ }
+ }
+
+ return entry;
+}
+
+static orc_entry *
__orc_find(ulong ip_table_ptr, ulong u_table_ptr, uint num_entries, ulong ip)
{
int index;
@@ -9128,7 +9191,7 @@ __orc_find(ulong ip_table_ptr, ulong u_table_ptr, uint num_entries, ulong ip)
int *ip_table = (int *)ip_table_ptr;
struct ORC_data *orc = &machdep->machspec->orc;
ulong vaddr;
- kernel_orc_entry *korc;
+ orc_entry *korc;
if (CRASHDEBUG(2)) {
int i, ip_entry;
@@ -9172,18 +9235,20 @@ __orc_find(ulong ip_table_ptr, ulong u_table_ptr, uint num_entries, ulong ip)
orc->ip_entry = (ulong)found;
orc->orc_entry = u_table_ptr + (index * SIZE(orc_entry));
- if (!readmem(orc->orc_entry, KVADDR, &orc->kernel_orc_entry,
- sizeof(kernel_orc_entry), "kernel orc_entry", RETURN_ON_ERROR|QUIET))
+
+ if (!orc_get_entry(orc))
return NULL;
- korc = &orc->kernel_orc_entry;
+ korc = &orc->orc_entry_data;
if (CRASHDEBUG(2)) {
fprintf(fp, " found: %lx index: %d\n", (ulong)found, index);
fprintf(fp,
" orc_entry: %lx sp_offset: %d bp_offset: %d sp_reg: %d bp_reg: %d type: %d",
orc->orc_entry, korc->sp_offset, korc->bp_offset, korc->sp_reg, korc->bp_reg, korc->type);
- if (MEMBER_EXISTS("orc_entry", "end"))
+ if (orc->has_signal)
+ fprintf(fp, " signal: %d", korc->signal);
+ if (orc->has_end)
fprintf(fp, " end: %d", korc->end);
fprintf(fp, "\n");
}
@@ -9196,7 +9261,7 @@ __orc_find(ulong ip_table_ptr, ulong u_table_ptr, uint num_entries, ulong ip)
#define LOOKUP_START_IP (unsigned long)kt->stext
#define LOOKUP_STOP_IP (unsigned long)kt->etext
-static kernel_orc_entry *
+static orc_entry *
orc_find(ulong ip)
{
unsigned int idx, start, stop;
@@ -9266,7 +9331,7 @@ orc_find(ulong ip)
orc->__start_orc_unwind + (start * SIZE(orc_entry)), stop - start, ip);
}
-static kernel_orc_entry *
+static orc_entry *
orc_module_find(ulong ip)
{
struct load_module *lm;
@@ -9313,7 +9378,7 @@ static void
orc_dump(ulong ip)
{
struct ORC_data *orc = &machdep->machspec->orc;
- kernel_orc_entry *korc;
+ orc_entry *korc;
ulong vaddr, offset;
struct syment *sp, *orig;
@@ -9336,13 +9401,15 @@ next_in_func:
fprintf(fp, "%s+%ld -> ", sp->name, offset);
else
fprintf(fp, "(unresolved) -> ");
- if (!readmem(orc->orc_entry, KVADDR, &orc->kernel_orc_entry, sizeof(kernel_orc_entry),
- "kernel orc_entry", RETURN_ON_ERROR))
+
+ if (!orc_get_entry(orc))
error(FATAL, "cannot read orc_entry\n");
- korc = &orc->kernel_orc_entry;
+ korc = &orc->orc_entry_data;
fprintf(fp, "orc: %lx spo: %d bpo: %d spr: %d bpr: %d type: %d",
orc->orc_entry, korc->sp_offset, korc->bp_offset, korc->sp_reg, korc->bp_reg, korc->type);
- if (MEMBER_EXISTS("orc_entry", "end"))
+ if (orc->has_signal)
+ fprintf(fp, " signal: %d", korc->signal);
+ if (orc->has_end)
fprintf(fp, " end: %d", korc->end);
fprintf(fp, "\n");
--
2.37.1

View File

@ -1,48 +0,0 @@
From ec1e61b33a705b8be8d116a541c7b076b0429deb Mon Sep 17 00:00:00 2001
From: Lianbo Jiang <lijiang@redhat.com>
Date: Mon, 12 Jun 2023 18:50:05 +0800
Subject: [PATCH 09/30] Fix invalid structure size error during crash startup
on ppc64
The crash utility will fail to start session on ppc64 with the following
error:
# crash vmlinux vmcore -s
crash: invalid structure size: note_buf
FILE: diskdump.c LINE: 121 FUNCTION: have_crash_notes()
[./crash] error trace: 101859ac => 10291798 => 10291450 => 10266038
10266038: SIZE_verify+156
10291450: have_crash_notes+308
10291798: map_cpus_to_prstatus_kdump_cmprs+448
101859ac: task_init+11980
The reason is that the size of note_buf is not initialized before using
SIZE(note_buf) in the have_crash_notes() on some architectures including
ppc64. Let's initialize it in task_init() to fix this issue.
Fixes: db8c030857b4 ("diskdump/netdump: fix segmentation fault caused by failure of stopping CPUs")
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
task.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/task.c b/task.c
index 88941c7b0e4d..2b7467b4193d 100644
--- a/task.c
+++ b/task.c
@@ -675,6 +675,9 @@ task_init(void)
tt->this_task = pid_to_task(active_pid);
}
else {
+ if (INVALID_SIZE(note_buf))
+ STRUCT_SIZE_INIT(note_buf, "note_buf_t");
+
if (KDUMP_DUMPFILE())
map_cpus_to_prstatus();
else if (ELF_NOTES_VALID() && DISKDUMP_DUMPFILE())
--
2.37.1

View File

@ -1,69 +0,0 @@
From 91a76958e4a8a9fb67ac61166ff36e8dc961b3b9 Mon Sep 17 00:00:00 2001
From: HATAYAMA Daisuke <d.hatayama@fujitsu.com>
Date: Wed, 7 Jun 2023 18:37:33 +0900
Subject: [PATCH 10/30] Revert "Fix segfault in
arm64_is_kernel_exception_frame() when corrupt stack pointer address is
given"
This reverts commit 9868ebc8e648e5791764a51567a23efae7170d9b.
The commit 9868ebc8e648e5791764a51567a23efae7170d9b causes the issue
that bt command fails to show backtraces for the tasks that is running
in the user mode at the moment of the kernel panic as follows:
crash> bt 1734
PID: 1734 TASK: ffff000000392200 CPU: 4 COMMAND: "insmod"
bt: invalid stack pointer is given
The root cause is that while the commit added a sanity check into
STACK_OFFSET_TYPE() to validate if a given candidate address of any
interrupt or exception frame is contained within the range of the
corresponding kernel stack, the premise that the STACK_OFFSET_TYPE()
should not return out-of-the-buffer address, is wrong.
Reexamining the relevant surrounding part of the backtracing code, it
looks to me now that the STACK_OFFSET_TYPE() is originally expected to
return an out-of-the-buffer address, like the address of the top of
the corresponding kernel stack, e.g. at here:
static int
arm64_in_kdump_text(struct bt_info *bt, struct arm64_stackframe *frame)
{
...
if (bt->flags & BT_USER_SPACE)
start = (ulong *)&bt->stackbuf[(ulong)(STACK_OFFSET_TYPE(bt->stacktop))];
else {
Note that the above bt 1734 aborts here.
Hence, the current implementation policy around STACK_OFFSET_TYPE()
looks that the caller side is responsible for understanding the fact
in advance and for avoiding making buffer overrun carefully.
To fix this issue, revert the commit.
Signed-off-by: HATAYAMA Daisuke <d.hatayama@fujitsu.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/defs.h b/defs.h
index bfda0c48d37b..3e7d6cfbc6a8 100644
--- a/defs.h
+++ b/defs.h
@@ -976,10 +976,7 @@ struct bt_info {
#define STACK_OFFSET_TYPE(OFF) \
(((ulong)(OFF) > STACKSIZE()) ? \
- (((ulong)(OFF) < (ulong)(bt->stackbase) || (ulong)(OFF) >= (ulong)(bt->stackbase) + STACKSIZE()) ? \
- error(FATAL, "invalid stack pointer is given\n") : \
- (ulong)((ulong)(OFF) - (ulong)(bt->stackbase))) : \
- (ulong)(OFF))
+ (ulong)((ulong)(OFF) - (ulong)(bt->stackbase)) : (ulong)(OFF))
#define GET_STACK_ULONG(OFF) \
*((ulong *)((char *)(&bt->stackbuf[(ulong)(STACK_OFFSET_TYPE(OFF))])))
--
2.37.1

View File

@ -1,45 +0,0 @@
From 6c8cd9b5dcf48221e5f75fc5850bb4719d77acce Mon Sep 17 00:00:00 2001
From: HATAYAMA Daisuke <d.hatayama@fujitsu.com>
Date: Wed, 7 Jun 2023 18:37:34 +0900
Subject: [PATCH 11/30] arm64: Fix again segfault in
arm64_is_kernel_exception_frame() when corrupt stack pointer address is given
This is the second trial from the commit
9868ebc8e648e5791764a51567a23efae7170d9b that was reverted at the
previous commit.
As described in the previous commit, result of STACK_OFFSET_TYPE() can
be an address out of bt->stackbuf and hence the address needs to be
checked prior to being referred to as an pt_regs object.
So, to fix the issue, let's check if stkptr points to within the range
of the kernel stack first.
[ kh: added a warning at Lianbo's suggestion ]
Signed-off-by: HATAYAMA Daisuke <d.hatayama@fujitsu.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
arm64.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/arm64.c b/arm64.c
index efbdccbec9d3..67b1a2244810 100644
--- a/arm64.c
+++ b/arm64.c
@@ -2381,6 +2381,12 @@ arm64_is_kernel_exception_frame(struct bt_info *bt, ulong stkptr)
struct arm64_pt_regs *regs;
struct machine_specific *ms = machdep->machspec;
+ if (stkptr > STACKSIZE() && !INSTACK(stkptr, bt)) {
+ if (CRASHDEBUG(1))
+ error(WARNING, "stkptr: %lx is outside the kernel stack range\n", stkptr);
+ return FALSE;
+ }
+
regs = (struct arm64_pt_regs *)&bt->stackbuf[(ulong)(STACK_OFFSET_TYPE(stkptr))];
if (INSTACK(regs->sp, bt) && INSTACK(regs->regs[29], bt) &&
--
2.37.1

View File

@ -1,53 +0,0 @@
From 8b24b2025fb4ae9bd6102bb054bd23987c35387e Mon Sep 17 00:00:00 2001
From: Likhitha Korrapati <likhitha@linux.ibm.com>
Date: Fri, 16 Jun 2023 17:25:19 +0530
Subject: [PATCH 12/30] ppc64: Remove redundant PTE checks
Remove redundant checks for PTE (Page Table Entry) because those
conditions are already covered.
if (!(pte & _PAGE_PRESENT)) {
...
return FALSE;
}
if (!pte)
return FALSE;
The second pte check is redundant because it holds true only when pte is
0. If pte is 0 then (!(pte & _PAGE_PRESENT)) is true and it will return
false. So there is no need for one more pte check.
Signed-off-by: Likhitha Korrapati <likhitha@linux.ibm.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
ppc64.c | 6 ------
1 file changed, 6 deletions(-)
diff --git a/ppc64.c b/ppc64.c
index b95a621d8fe4..fc34006f4863 100644
--- a/ppc64.c
+++ b/ppc64.c
@@ -968,9 +968,6 @@ ppc64_vtop(ulong vaddr, ulong *pgd, physaddr_t *paddr, int verbose)
return FALSE;
}
- if (!pte)
- return FALSE;
-
*paddr = PAGEBASE(PTOB(pte >> PTE_RPN_SHIFT_DEFAULT)) + PAGEOFFSET(vaddr);
if (verbose) {
@@ -1077,9 +1074,6 @@ ppc64_vtop_level4(ulong vaddr, ulong *level4, physaddr_t *paddr, int verbose)
return FALSE;
}
- if (!pte)
- return FALSE;
-
out:
if (hugepage_type) {
if (hugepage_type == 2) {
--
2.37.1

File diff suppressed because it is too large Load Diff

View File

@ -1,119 +0,0 @@
From 88580068b7dd96bf679c82bdc05e146968ade10c Mon Sep 17 00:00:00 2001
From: Kazuhito Hagio <k-hagio-ab@nec.com>
Date: Fri, 23 Jun 2023 16:34:35 +0900
Subject: [PATCH 14/30] Fix failure of gathering task table on Linux 6.5-rc1
and later
Kernel commit b69f0aeb0689 ("pid: Replace struct pid 1-element array
with flex-array") changed pid.numbers[1] to pid.numbers[]. With this,
the size of struct pid does not contain the size of struct upid:
(gdb) ptype /o struct pid
/* offset | size */ type = struct pid {
/* 0 | 4 */ refcount_t count;
...
/* 96 | 0 */ struct upid numbers[];
^^^^ ^^^
/* total size (bytes): 96 */
} ^^^^
As a result, in refresh_xarray_task_table(), crash does not read the
data of pid.numbers[0].ns and cannot gather the task table correctly.
$ crash vmlinux vmcore
...
WARNING: active task ffff936992ad0000 on cpu 1 not found in PID hash
...
crash> ps -S
RU: 9
crash>
Increase the size of reading struct pid by SIZE(upid) in this case.
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 1 +
symbols.c | 3 +++
task.c | 10 ++++++++--
3 files changed, 12 insertions(+), 2 deletions(-)
diff --git a/defs.h b/defs.h
index 414853660dc1..8f7d1fa0aba6 100644
--- a/defs.h
+++ b/defs.h
@@ -2430,6 +2430,7 @@ struct array_table {
int task_struct_rlim;
int signal_struct_rlim;
int vm_numa_stat;
+ int pid_numbers;
};
/*
diff --git a/symbols.c b/symbols.c
index f161ee99e90a..82529a6785c9 100644
--- a/symbols.c
+++ b/symbols.c
@@ -9705,6 +9705,8 @@ builtin_array_length(char *s, int len, int *two_dim)
lenptr = &array_table.signal_struct_rlim;
else if (STREQ(s, "vm_numa_stat"))
lenptr = &array_table.vm_numa_stat;
+ else if (STREQ(s, "pid.numbers"))
+ lenptr = &array_table.pid_numbers;
if (!lenptr) /* not stored */
return(len);
@@ -12107,6 +12109,7 @@ dump_offset_table(char *spec, ulong makestruct)
ARRAY_LENGTH(signal_struct_rlim));
fprintf(fp, " vm_numa_stat: %d\n",
ARRAY_LENGTH(vm_numa_stat));
+ fprintf(fp, " pid_numbers: %d\n", ARRAY_LENGTH(pid_numbers));
if (spec) {
int in_size_table, in_array_table, arrays, offsets, sizes;
diff --git a/task.c b/task.c
index 2b7467b4193d..b9076da35565 100644
--- a/task.c
+++ b/task.c
@@ -352,6 +352,7 @@ task_init(void)
MEMBER_OFFSET_INIT(upid_ns, "upid", "ns");
MEMBER_OFFSET_INIT(upid_pid_chain, "upid", "pid_chain");
MEMBER_OFFSET_INIT(pid_numbers, "pid", "numbers");
+ ARRAY_LENGTH_INIT(len, pid_numbers, "pid.numbers", NULL, 0);
MEMBER_OFFSET_INIT(pid_tasks, "pid", "tasks");
tt->init_pid_ns = symbol_value("init_pid_ns");
}
@@ -2574,6 +2575,7 @@ refresh_xarray_task_table(void)
char *tp;
struct list_pair xp;
char *pidbuf;
+ long pid_size = SIZE(pid);
if (DUMPFILE() && (tt->flags & TASK_INIT_DONE)) /* impossible */
return;
@@ -2603,8 +2605,12 @@ refresh_xarray_task_table(void)
if (CRASHDEBUG(1))
console("xarray: count: %ld\n", count);
+ /* 6.5: b69f0aeb0689 changed pid.numbers[1] to numbers[] */
+ if (ARRAY_LENGTH(pid_numbers) == 0)
+ pid_size += SIZE(upid);
+
retries = 0;
- pidbuf = GETBUF(SIZE(pid));
+ pidbuf = GETBUF(pid_size);
retry_xarray:
if (retries && DUMPFILE())
@@ -2672,7 +2678,7 @@ retry_xarray:
* - get task from address of task->pids[0]
*/
if (!readmem(next, KVADDR, pidbuf,
- SIZE(pid), "pid", RETURN_ON_ERROR|QUIET)) {
+ pid_size, "pid", RETURN_ON_ERROR|QUIET)) {
error(INFO, "\ncannot read pid struct from xarray\n");
if (DUMPFILE())
continue;
--
2.37.1

View File

@ -1,68 +0,0 @@
From 4ee56105881d7bb1da1e668ac5bb47a4e0846676 Mon Sep 17 00:00:00 2001
From: Lianbo Jiang <lijiang@redhat.com>
Date: Wed, 5 Jul 2023 10:02:59 +0800
Subject: [PATCH 15/30] Fix compilation error due to new strlcpy function that
glibc added
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The crash-utility has its own strlcpy(), but recently the latest glibc
has also implemented the strlcpy function, which is derived from
OpenBSD. Eventually this caused the following compilation error:
# make -j8 lzo
...
In file included from global_data.c:18:
defs.h:5556:8: error: conflicting types for strlcpy; have size_t(char *, char *, size_t) {aka long unsigned int(char *, char *, long unsigned int)}
5556 | size_t strlcpy(char *, char *, size_t);
| ^~~~~~~
In file included from memory.c:19:
defs.h:5556:8: error: conflicting types for strlcpy; have size_t(char *, char *, size_t) {aka long unsigned int(char *, char *, long unsigned int)}
5556 | size_t strlcpy(char *, char *, size_t);
| ^~~~~~~
...
To fix the issue, let's declare the strlcpy() as a weak function and
keep the same parameter types as the glibc function has.
Related glibc commits:
454a20c8756c ("Implement strlcpy and strlcat [BZ #178]")
d2fda60e7c40 ("manual: Manual update for strlcat, strlcpy, wcslcat, wclscpy")
388ae538ddcb ("hurd: Add strlcpy, strlcat, wcslcpy, wcslcat to libc.abilist")
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 2 +-
tools.c | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/defs.h b/defs.h
index 8f7d1fa0aba6..26afe232cc3e 100644
--- a/defs.h
+++ b/defs.h
@@ -5553,7 +5553,7 @@ uint32_t swap32(uint32_t, int);
uint64_t swap64(uint64_t, int);
ulong *get_cpumask_buf(void);
int make_cpumask(char *, ulong *, int, int *);
-size_t strlcpy(char *, char *, size_t);
+size_t strlcpy(char *, const char *, size_t) __attribute__ ((__weak__));
struct rb_node *rb_first(struct rb_root *);
struct rb_node *rb_parent(struct rb_node *, struct rb_node *);
struct rb_node *rb_right(struct rb_node *, struct rb_node *);
diff --git a/tools.c b/tools.c
index 392a79707e61..0f2db108838a 100644
--- a/tools.c
+++ b/tools.c
@@ -6795,7 +6795,7 @@ make_cpumask_error:
* always be NULL-terminated.
*/
size_t
-strlcpy(char *dest, char *src, size_t size)
+strlcpy(char *dest, const char *src, size_t size)
{
size_t ret = strlen(src);
--
2.37.1

View File

@ -1,45 +0,0 @@
From 6d0be1316aa3666895c0a8a0d3c98c235ec03bd4 Mon Sep 17 00:00:00 2001
From: Kazuhito Hagio <k-hagio-ab@nec.com>
Date: Mon, 10 Jul 2023 10:42:08 +0900
Subject: [PATCH 16/30] Fix "irq -a" option on Linux 6.0 and later
Kernel commit f0dd891dd5a1d ("lib/cpumask: move some one-line wrappers
to header file"), which is contained in Linux 6.0 and later kernels,
inlined alloc_cpumask_var() function. As a result, the "irq -a" option
fails to determine that cpumask_var_t is a pointer, and displays wrong
CPU affinity for IRQs:
crash> irq -a
IRQ NAME AFFINITY
1 i8042 3
4 ttyS0
8 rtc0
9 acpi 3
12 i8042 3
...
Use alloc_cpumask_var_node() function symbol instead to fix it.
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
kernel.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/kernel.c b/kernel.c
index 639ed64f306a..0fc77c19f12a 100644
--- a/kernel.c
+++ b/kernel.c
@@ -7298,7 +7298,8 @@ generic_get_irq_affinity(int irq)
tmp_addr = irq_desc_addr + \
OFFSET(irq_desc_t_affinity);
- if (symbol_exists("alloc_cpumask_var")) /* pointer member */
+ if (symbol_exists("alloc_cpumask_var_node") ||
+ symbol_exists("alloc_cpumask_var")) /* pointer member */
readmem(tmp_addr,KVADDR, &affinity_ptr, sizeof(ulong),
"irq_desc affinity", FAULT_ON_ERROR);
else /* array member */
--
2.37.1

View File

@ -1,133 +0,0 @@
From b76e116c50ffc228ebc08eb8de35019320679257 Mon Sep 17 00:00:00 2001
From: Dave Wysochanski <dwysocha@redhat.com>
Date: Thu, 6 Jul 2023 10:53:18 -0400
Subject: [PATCH 17/30] vmware: Improve output when we fail to read vmware
'vmsn' file
Today if crash fails to read some structure in a vmware 'vmsn' file,
it will throw an "No such file or directory" message. Such a generic
message does not give any clue as to the problem, but instead sounds
like the file may not exist when it does, for example:
$ crash ./vmcore.vmsn ./vmlinux
crash 8.0.3
...
crash: vmw: Failed to read './vmcore.vmsn': [Error 2] No such file or directory
crash: ./vmcore.vmsn: initialization failed
$ ls -l ./vmcore.vmsn
-rwxrwxrwx. 7 myuser mygroup 12128999 Jul 4 07:21 ./vmcore.vmsn
Improve the above error message so we at least know which portion
of the file crash had difficulty reading. After this patch, the
above error looks like:
crash: vmw: Failed to read 'cptgroupdesc' from file './vmcore.vmsn': [Error 2] No such file or directory
Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
vmware_guestdump.c | 8 ++++----
vmware_vmss.c | 24 ++++++++++++------------
2 files changed, 16 insertions(+), 16 deletions(-)
diff --git a/vmware_guestdump.c b/vmware_guestdump.c
index cf818e588a60..5be26c8e2e90 100644
--- a/vmware_guestdump.c
+++ b/vmware_guestdump.c
@@ -117,8 +117,8 @@ is_vmware_guestdump(char *filename)
}
if (fread(&hdr, sizeof(struct guestdumpheader), 1, fp) != 1) {
- error(INFO, LOGPRX"Failed to read '%s': [Error %d] %s\n",
- filename, errno, strerror(errno));
+ error(INFO, LOGPRX"Failed to read '%s' from file '%s': [Error %d] %s\n",
+ "guestdumpheader", filename, errno, strerror(errno));
fclose(fp);
return FALSE;
}
@@ -204,8 +204,8 @@ vmware_guestdump_init(char *filename, FILE *ofp)
for (i = 0; i < vmss.num_vcpus; i++) {
if (fread(&vs, sizeof(struct vcpu_state), 1, fp) != 1) {
- error(INFO, LOGPRX"Failed to read '%s': [Error %d] %s\n",
- filename, errno, strerror(errno));
+ error(INFO, LOGPRX"Failed to read '%s' from file '%s': [Error %d] %s\n",
+ "vcpu_state", filename, errno, strerror(errno));
result = FALSE;
goto exit;
}
diff --git a/vmware_vmss.c b/vmware_vmss.c
index f6c5f32ea4c0..8121ab64a99a 100644
--- a/vmware_vmss.c
+++ b/vmware_vmss.c
@@ -39,8 +39,8 @@ is_vmware_vmss(char *filename)
}
if (fread(&hdr, sizeof(cptdumpheader), 1, fp) != 1) {
- error(INFO, LOGPRX"Failed to read '%s': [Error %d] %s\n",
- filename, errno, strerror(errno));
+ error(INFO, LOGPRX"Failed to read '%s' from file '%s': [Error %d] %s\n",
+ "cptdumpheader", filename, errno, strerror(errno));
fclose(fp);
return FALSE;
}
@@ -86,8 +86,8 @@ vmware_vmss_init(char *filename, FILE *ofp)
}
if (fread(&hdr, sizeof(cptdumpheader), 1, fp) != 1) {
- error(INFO, LOGPRX"Failed to read '%s': %s [Error %d] %s\n",
- filename, errno, strerror(errno));
+ error(INFO, LOGPRX"Failed to read '%s' from file '%s': [Error %d] %s\n",
+ "cptdumpheader", filename, errno, strerror(errno));
result = FALSE;
goto exit;
}
@@ -112,8 +112,8 @@ vmware_vmss_init(char *filename, FILE *ofp)
}
if (fread(grps, sizeof(cptgroupdesc), grpsize, fp) != grpsize) {
- error(INFO, LOGPRX"Failed to read '%s': [Error %d] %s\n",
- filename, errno, strerror(errno));
+ error(INFO, LOGPRX"Failed to read '%s' from file '%s': [Error %d] %s\n",
+ "cptgroupdesc", filename, errno, strerror(errno));
result = FALSE;
goto exit;
}
@@ -225,8 +225,8 @@ vmware_vmss_init(char *filename, FILE *ofp)
idx[0] < vmss.num_vcpus) {
int cpu = idx[0];
if (fread(vmss.regs64[cpu], VMW_GPREGS_SIZE, 1, fp) != 1) {
- error(INFO, LOGPRX"Failed to read '%s': [Error %d] %s\n",
- filename, errno, strerror(errno));
+ error(INFO, LOGPRX"Failed to read '%s' from file '%s': [Error %d] %s\n",
+ name, filename, errno, strerror(errno));
break;
}
DEBUG_PARSE_PRINT((ofp, "\n"));
@@ -237,8 +237,8 @@ vmware_vmss_init(char *filename, FILE *ofp)
int cpu = idx[0];
DEBUG_PARSE_PRINT((ofp, "\t=> "));
if (fread(&vmss.regs64[cpu]->cr[0], VMW_CR64_SIZE, 1, fp) != 1) {
- error(INFO, LOGPRX"Failed to read '%s': [Error %d] %s\n",
- filename, errno, strerror(errno));
+ error(INFO, LOGPRX"Failed to read '%s' from file '%s': [Error %d] %s\n",
+ name, filename, errno, strerror(errno));
break;
}
for (j = 0; j < VMW_CR64_SIZE / 8; j++)
@@ -257,8 +257,8 @@ vmware_vmss_init(char *filename, FILE *ofp)
break;
}
if (fread(&idtr, sizeof(idtr), 1, fp) != 1) {
- error(INFO, LOGPRX"Failed to read '%s': [Error %d] %s\n",
- filename, errno, strerror(errno));
+ error(INFO, LOGPRX"Failed to read '%s' from file '%s': [Error %d] %s\n",
+ name, filename, errno, strerror(errno));
break;
}
DEBUG_PARSE_PRINT((ofp, "\n"));
--
2.37.1

View File

@ -1,33 +0,0 @@
From d17d51a92a3a1c1cce1e646c38fe52ca99406cf9 Mon Sep 17 00:00:00 2001
From: Kazuhito Hagio <k-hagio-ab@nec.com>
Date: Fri, 7 Jul 2023 15:17:18 +0900
Subject: [PATCH 18/30] Exclude zero entries from do_maple_tree() return value
While the return value of do_radix_tree() and do_xarray() does not
contain NULL entries, do_maple_tree()'s one contains NULL entries.
Make this behavior consistent with the previous tree functions to make
replacement easier, especially for the following patch.
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
maple_tree.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/maple_tree.c b/maple_tree.c
index 807c17f7dfa0..eccd273105a6 100644
--- a/maple_tree.c
+++ b/maple_tree.c
@@ -287,7 +287,7 @@ static void do_mt_entry(ulong entry, ulong min, ulong max, uint depth,
static struct req_entry **e = NULL;
struct tree_data *td = ops->is_td ? (struct tree_data *)ops->private : NULL;
- if (ops->entry)
+ if (ops->entry && entry)
ops->entry(entry, entry, path, max, ops->private);
if (!td)
--
2.37.1

View File

@ -1,198 +0,0 @@
From 38d35bd1423ccafd0b8be0744155ce59ef3034ff Mon Sep 17 00:00:00 2001
From: Kazuhito Hagio <k-hagio-ab@nec.com>
Date: Wed, 12 Jul 2023 17:55:29 +0900
Subject: [PATCH 19/30] Fix "irq [-a|-s]" options on Linux 6.5-rc1 and later
Kernel commit 721255b982 ("genirq: Use a maple tree for interrupt
descriptor management"), which is contained in Linux 6.5-rc1 and later
kernels, replaced irq_desc_tree with a maple tree sparse_irqs.
Without the patch, "irq [-a|-s]" options fail with an error, e.g. the
following on x86_64, on kernels configured with CONFIG_SPARSE_IRQ=y.
crash> irq
irq: x86_64_dump_irq: irq_desc[] or irq_desc_tree do not exist?
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 2 ++
ia64.c | 3 ++-
kernel.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++---
symbols.c | 1 +
x86_64.c | 9 ++++++---
5 files changed, 59 insertions(+), 7 deletions(-)
diff --git a/defs.h b/defs.h
index 26afe232cc3e..358f365585cf 100644
--- a/defs.h
+++ b/defs.h
@@ -676,6 +676,7 @@ struct new_utsname {
#define IRQ_DESC_TREE_XARRAY (0x80ULL)
#define KMOD_PAX (0x100ULL)
#define KMOD_MEMORY (0x200ULL)
+#define IRQ_DESC_TREE_MAPLE (0x400ULL)
#define XEN() (kt->flags & ARCH_XEN)
#define OPENVZ() (kt->flags & ARCH_OPENVZ)
@@ -2222,6 +2223,7 @@ struct offset_table { /* stash of commonly-used offsets */
long module_mem;
long module_memory_base;
long module_memory_size;
+ long irq_data_irq;
};
struct size_table { /* stash of commonly-used sizes */
diff --git a/ia64.c b/ia64.c
index 2e1d15fe6042..d3e0a3b01869 100644
--- a/ia64.c
+++ b/ia64.c
@@ -791,7 +791,8 @@ ia64_back_trace_cmd(struct bt_info *bt)
static void
ia64_dump_irq(int irq)
{
- if (symbol_exists("irq_desc") || symbol_exists("_irq_desc") ||
+ if (kernel_symbol_exists("sparse_irqs") ||
+ symbol_exists("irq_desc") || symbol_exists("_irq_desc") ||
kernel_symbol_exists("irq_desc_ptrs")) {
machdep->dump_irq = generic_dump_irq;
return(generic_dump_irq(irq));
diff --git a/kernel.c b/kernel.c
index 0fc77c19f12a..546eed95eebd 100644
--- a/kernel.c
+++ b/kernel.c
@@ -541,7 +541,10 @@ kernel_init()
MEMBER_OFFSET_INIT(irqaction_dev_id, "irqaction", "dev_id");
MEMBER_OFFSET_INIT(irqaction_next, "irqaction", "next");
- if (kernel_symbol_exists("irq_desc_tree")) {
+ /* 6.5 and later: CONFIG_SPARSE_IRQ */
+ if (kernel_symbol_exists("sparse_irqs"))
+ kt->flags2 |= IRQ_DESC_TREE_MAPLE;
+ else if (kernel_symbol_exists("irq_desc_tree")) {
get_symbol_type("irq_desc_tree", NULL, &req);
if (STREQ(req.type_tag_name, "xarray")) {
kt->flags2 |= IRQ_DESC_TREE_XARRAY;
@@ -554,6 +557,7 @@ kernel_init()
}
STRUCT_SIZE_INIT(irq_data, "irq_data");
if (VALID_STRUCT(irq_data)) {
+ MEMBER_OFFSET_INIT(irq_data_irq, "irq_data", "irq");
MEMBER_OFFSET_INIT(irq_data_chip, "irq_data", "chip");
MEMBER_OFFSET_INIT(irq_data_affinity, "irq_data", "affinity");
MEMBER_OFFSET_INIT(irq_desc_irq_data, "irq_desc", "irq_data");
@@ -6180,6 +6184,8 @@ dump_kernel_table(int verbose)
fprintf(fp, "%sIRQ_DESC_TREE_RADIX", others++ ? "|" : "");
if (kt->flags2 & IRQ_DESC_TREE_XARRAY)
fprintf(fp, "%sIRQ_DESC_TREE_XARRAY", others++ ? "|" : "");
+ if (kt->flags2 & IRQ_DESC_TREE_MAPLE)
+ fprintf(fp, "%sIRQ_DESC_TREE_MAPLE", others++ ? "|" : "");
if (kt->flags2 & KMOD_PAX)
fprintf(fp, "%sKMOD_PAX", others++ ? "|" : "");
if (kt->flags2 & KMOD_MEMORY)
@@ -6652,6 +6658,45 @@ get_irq_desc_addr(int irq)
readmem(ptr, KVADDR, &addr,
sizeof(void *), "irq_desc_ptrs entry",
FAULT_ON_ERROR);
+ } else if (kt->flags2 & IRQ_DESC_TREE_MAPLE) {
+ unsigned int i;
+
+ if (kt->highest_irq && (irq > kt->highest_irq))
+ return addr;
+
+ cnt = do_maple_tree(symbol_value("sparse_irqs"), MAPLE_TREE_COUNT, NULL);
+
+ len = sizeof(struct list_pair) * (cnt+1);
+ lp = (struct list_pair *)GETBUF(len);
+ lp[0].index = cnt; /* maxcount */
+
+ cnt = do_maple_tree(symbol_value("sparse_irqs"), MAPLE_TREE_GATHER, lp);
+
+ /*
+ * NOTE: We cannot use lp.index like Radix Tree or XArray because
+ * it's not an absolute index and just counter in Maple Tree.
+ */
+ if (kt->highest_irq == 0) {
+ readmem((ulong)lp[cnt-1].value +
+ OFFSET(irq_desc_irq_data) + OFFSET(irq_data_irq),
+ KVADDR, &kt->highest_irq, sizeof(int), "irq_data.irq",
+ FAULT_ON_ERROR);
+ }
+
+ for (c = 0; c < cnt; c++) {
+ readmem((ulong)lp[c].value +
+ OFFSET(irq_desc_irq_data) + OFFSET(irq_data_irq),
+ KVADDR, &i, sizeof(int), "irq_data.irq", FAULT_ON_ERROR);
+ if (i == irq) {
+ if (CRASHDEBUG(1))
+ fprintf(fp, "index: %d value: %lx\n",
+ i, (ulong)lp[c].value);
+ addr = (ulong)lp[c].value;
+ break;
+ }
+ }
+ FREEBUF(lp);
+
} else if (kt->flags2 & (IRQ_DESC_TREE_RADIX|IRQ_DESC_TREE_XARRAY)) {
if (kt->highest_irq && (irq > kt->highest_irq))
return addr;
@@ -6700,8 +6745,8 @@ get_irq_desc_addr(int irq)
FREEBUF(lp);
} else {
error(FATAL,
- "neither irq_desc, _irq_desc, irq_desc_ptrs "
- "or irq_desc_tree symbols exist\n");
+ "neither irq_desc, _irq_desc, irq_desc_ptrs, "
+ "irq_desc_tree or sparse_irqs symbols exist\n");
}
return addr;
diff --git a/symbols.c b/symbols.c
index 82529a6785c9..876be7aea90e 100644
--- a/symbols.c
+++ b/symbols.c
@@ -10375,6 +10375,7 @@ dump_offset_table(char *spec, ulong makestruct)
OFFSET(irq_desc_t_kstat_irqs));
fprintf(fp, " irq_desc_t_affinity: %ld\n",
OFFSET(irq_desc_t_affinity));
+ fprintf(fp, " irq_data_irq: %ld\n", OFFSET(irq_data_irq));
fprintf(fp, " irq_data_chip: %ld\n",
OFFSET(irq_data_chip));
fprintf(fp, " irq_data_affinity: %ld\n",
diff --git a/x86_64.c b/x86_64.c
index 87e87ae6e1e8..42ade4817ad9 100644
--- a/x86_64.c
+++ b/x86_64.c
@@ -5391,7 +5391,8 @@ get_x86_64_frame(struct bt_info *bt, ulong *getpc, ulong *getsp)
static void
x86_64_dump_irq(int irq)
{
- if (symbol_exists("irq_desc") ||
+ if (kernel_symbol_exists("sparse_irqs") ||
+ symbol_exists("irq_desc") ||
kernel_symbol_exists("irq_desc_ptrs") ||
kernel_symbol_exists("irq_desc_tree")) {
machdep->dump_irq = generic_dump_irq;
@@ -5405,7 +5406,8 @@ x86_64_dump_irq(int irq)
static void
x86_64_get_irq_affinity(int irq)
{
- if (symbol_exists("irq_desc") ||
+ if (kernel_symbol_exists("sparse_irqs") ||
+ symbol_exists("irq_desc") ||
kernel_symbol_exists("irq_desc_ptrs") ||
kernel_symbol_exists("irq_desc_tree")) {
machdep->get_irq_affinity = generic_get_irq_affinity;
@@ -5419,7 +5421,8 @@ x86_64_get_irq_affinity(int irq)
static void
x86_64_show_interrupts(int irq, ulong *cpus)
{
- if (symbol_exists("irq_desc") ||
+ if (kernel_symbol_exists("sparse_irqs") ||
+ symbol_exists("irq_desc") ||
kernel_symbol_exists("irq_desc_ptrs") ||
kernel_symbol_exists("irq_desc_tree")) {
machdep->show_interrupts = generic_show_interrupts;
--
2.37.1

View File

@ -1,53 +0,0 @@
From f0b59524624b83d634b3fa8ab4ab3acf9ccce9df Mon Sep 17 00:00:00 2001
From: Kazuhito Hagio <k-hagio-ab@nec.com>
Date: Mon, 10 Jul 2023 15:05:36 +0900
Subject: [PATCH 20/30] Fix segmentation fault by "tree -s" option with Maple
Tree
Without the patch, do_mt_entry() can call dump_struct_members_for_tree()
with a NULL entry, and parse_for_member_extended() will cause a
segmentation fault during strncpy().
This is caused by "tree -t maple -s struct.member.member" style multiple
level member access:
crash> tree -t maple -s irq_desc.irq_data.irq sparse_irqs
ffff936980188400
irq_data.irq = 0,
ffff93698018be00
irq_data.irq = 1,
...
ffff936980f38e00
irq_data.irq = 19,
Segmentation fault (core dumped)
(gdb) bt
#0 0x00007faaf8e51635 in __strncpy_avx2 () from /lib64/libc.so.6
#1 0x00000000005e5927 in parse_for_member_extended (dm=dm@entry=0x7ffcb9e6d860, ...
#2 0x0000000000603c45 in dump_struct_member (s=s@entry=0x128cde0 <shared_bufs+1024> ...
#3 0x0000000000513cf5 in dump_struct_members_for_tree (td=td@entry=0x7ffcb9e6eeb0, ...
#4 0x0000000000651f15 in do_mt_entry (entry=0, min=min@entry=20, max=max@entry=119, ...
...
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
maple_tree.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/maple_tree.c b/maple_tree.c
index eccd273105a6..8c804d0cb80d 100644
--- a/maple_tree.c
+++ b/maple_tree.c
@@ -313,7 +313,7 @@ static void do_mt_entry(ulong entry, ulong min, ulong max, uint depth,
fprintf(fp, " index: %ld position: %s/%u\n",
++(*global_index), path, index);
- if (td->structname) {
+ if (td->structname && entry) {
if (td->flags & TREE_STRUCT_RADIX_10)
print_radix = 10;
else if (td->flags & TREE_STRUCT_RADIX_16)
--
2.37.1

View File

@ -1,49 +0,0 @@
From aa5763800d614ff6080fd1909517a3939c250e86 Mon Sep 17 00:00:00 2001
From: Lianbo Jiang <lijiang@redhat.com>
Date: Fri, 21 Jul 2023 12:36:18 +0800
Subject: [PATCH 21/30] Fix warning about kernel version inconsistency during
crash startup
Currently, the symbol ".rodata" may not be found in some vmlinux, and
the strings command will still be used to get the linux banner string,
but this gets two strings as below:
# strings vmlinux | grep "Linux version"
Linux version 6.5.0-0.rc2.17.fc39.x86_64 ... GNU ld version 2.40-9.fc39) # SMP PREEMPT_DYNAMIC
Linux version 6.5.0-0.rc2.17.fc39.x86_64 ... GNU ld version 2.40-9.fc39) #1 SMP PREEMPT_DYNAMIC Mon Jul 17 14:57:35 UTC 2023
In the verify_namelist(), the while-loop will only determine if the
first linux banner string above matches and break the loop. But actually
the second string above is correct one. Eventually, crash starts up with
the following warning:
# ./crash -s vmlinux vmcore
WARNING: kernel version inconsistency between vmlinux and dumpfile
# ./crash -s
WARNING: kernel version inconsistency between vmlinux and live memory
Let's always try to match the correct one, otherwise still prints a
warning as before.
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
kernel.c | 2 --
1 file changed, 2 deletions(-)
diff --git a/kernel.c b/kernel.c
index 546eed95eebd..9801812387bd 100644
--- a/kernel.c
+++ b/kernel.c
@@ -1375,8 +1375,6 @@ verify_namelist()
buffer3[i++] = *p1++;
buffer3[i] = NULLCHAR;
}
-
- break;
}
pclose(pipe);
--
2.37.1

View File

@ -1,49 +0,0 @@
From c74f375e0ef7cd9b593fa1d73c47505822c8f2a0 Mon Sep 17 00:00:00 2001
From: Kazuhito Hagio <k-hagio-ab@nec.com>
Date: Mon, 24 Jul 2023 17:25:12 +0900
Subject: [PATCH 22/30] Fix get_linux_banner_from_vmlinux() for vmlinux without
".rodata" symbol
As written in the previous patch, some recent kernels do not have the
".rodata" symbol. As a result, the get_linux_banner_from_vmlinux()
returns FALSE and the slower fallback routine is used.
Use "__start_rodata" symbol if the ".rodata" symbol is not available.
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
kernel.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/kernel.c b/kernel.c
index 9801812387bd..2114700eecc8 100644
--- a/kernel.c
+++ b/kernel.c
@@ -11891,8 +11891,13 @@ int get_linux_banner_from_vmlinux(char *buf, size_t size)
{
struct bfd_section *sect;
long offset;
+ ulong start_rodata;
- if (!kernel_symbol_exists(".rodata"))
+ if (kernel_symbol_exists(".rodata"))
+ start_rodata = symbol_value(".rodata");
+ else if (kernel_symbol_exists("__start_rodata"))
+ start_rodata = symbol_value("__start_rodata");
+ else
return FALSE;
sect = bfd_get_section_by_name(st->bfd, ".rodata");
@@ -11905,7 +11910,7 @@ int get_linux_banner_from_vmlinux(char *buf, size_t size)
* value in vmlinux file, but relative offset to linux_banner
* object in .rodata section is idential.
*/
- offset = symbol_value("linux_banner") - symbol_value(".rodata");
+ offset = symbol_value("linux_banner") - start_rodata;
if (!bfd_get_section_contents(st->bfd,
sect,
--
2.37.1

View File

@ -1,141 +0,0 @@
From 558aecc98987e54b122a09ce0d3c3484b034277f Mon Sep 17 00:00:00 2001
From: Lianbo Jiang <lijiang@redhat.com>
Date: Wed, 2 Aug 2023 16:18:41 +0800
Subject: [PATCH 23/30] Fix "foreach" command with "DE" state to display only
expected tasks
Currently, the "foreach DE ps -m" command may display "DE" as well as
"ZO" state tasks as below:
crash> foreach DE ps -m
...
[0 00:00:00.040] [ZO] PID: 11458 TASK: ffff91c75680d280 CPU: 7 COMMAND: "ora_w01o_p01mci"
[0 00:00:00.044] [ZO] PID: 49118 TASK: ffff91c7bf3e8000 CPU: 19 COMMAND: "oracle_49118_p0"
[0 00:00:00.050] [ZO] PID: 28748 TASK: ffff91a7cbde3180 CPU: 2 COMMAND: "ora_imr0_p01sci"
[0 00:00:00.050] [DE] PID: 28405 TASK: ffff91a7c8eb0000 CPU: 27 COMMAND: "ora_vktm_p01sci"
[0 00:00:00.051] [ZO] PID: 31716 TASK: ffff91a7f7192100 CPU: 6 COMMAND: "ora_p001_p01sci"
...
That is not expected behavior, the "foreach" command needs to handle
such cases. Let's add a check to determine if the task state identifier
is specified and the specified identifier is equal to the actual task
state identifier, so that it can filter out the unspecified state
tasks.
With the patch:
crash> foreach DE ps -m
[0 00:00:00.050] [DE] PID: 28405 TASK: ffff91a7c8eb0000 CPU: 27 COMMAND: "ora_vktm_p01sci"
crash>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 2 +-
task.c | 52 +++++++++++++++++++---------------------------------
2 files changed, 20 insertions(+), 34 deletions(-)
diff --git a/defs.h b/defs.h
index 358f365585cf..5ee60f1eb3a5 100644
--- a/defs.h
+++ b/defs.h
@@ -1203,7 +1203,7 @@ struct foreach_data {
char *pattern;
regex_t regex;
} regex_info[MAX_REGEX_ARGS];
- ulong state;
+ const char *state;
char *reference;
int keys;
int pids;
diff --git a/task.c b/task.c
index b9076da35565..c9206f50c679 100644
--- a/task.c
+++ b/task.c
@@ -6636,39 +6636,42 @@ cmd_foreach(void)
STREQ(args[optind], "NE") ||
STREQ(args[optind], "SW")) {
+ ulong state = TASK_STATE_UNINITIALIZED;
+
if (fd->flags & FOREACH_STATE)
error(FATAL, "only one task state allowed\n");
if (STREQ(args[optind], "RU"))
- fd->state = _RUNNING_;
+ state = _RUNNING_;
else if (STREQ(args[optind], "IN"))
- fd->state = _INTERRUPTIBLE_;
+ state = _INTERRUPTIBLE_;
else if (STREQ(args[optind], "UN"))
- fd->state = _UNINTERRUPTIBLE_;
+ state = _UNINTERRUPTIBLE_;
else if (STREQ(args[optind], "ST"))
- fd->state = _STOPPED_;
+ state = _STOPPED_;
else if (STREQ(args[optind], "TR"))
- fd->state = _TRACING_STOPPED_;
+ state = _TRACING_STOPPED_;
else if (STREQ(args[optind], "ZO"))
- fd->state = _ZOMBIE_;
+ state = _ZOMBIE_;
else if (STREQ(args[optind], "DE"))
- fd->state = _DEAD_;
+ state = _DEAD_;
else if (STREQ(args[optind], "SW"))
- fd->state = _SWAPPING_;
+ state = _SWAPPING_;
else if (STREQ(args[optind], "PA"))
- fd->state = _PARKED_;
+ state = _PARKED_;
else if (STREQ(args[optind], "WA"))
- fd->state = _WAKING_;
+ state = _WAKING_;
else if (STREQ(args[optind], "ID"))
- fd->state = _UNINTERRUPTIBLE_|_NOLOAD_;
+ state = _UNINTERRUPTIBLE_|_NOLOAD_;
else if (STREQ(args[optind], "NE"))
- fd->state = _NEW_;
+ state = _NEW_;
- if (fd->state == TASK_STATE_UNINITIALIZED)
+ if (state == TASK_STATE_UNINITIALIZED)
error(FATAL,
"invalid task state for this kernel: %s\n",
args[optind]);
+ fd->state = args[optind];
fd->flags |= FOREACH_STATE;
optind++;
@@ -7039,26 +7042,9 @@ foreach(struct foreach_data *fd)
if ((fd->flags & FOREACH_KERNEL) && !is_kernel_thread(tc->task))
continue;
- if (fd->flags & FOREACH_STATE) {
- if (fd->state == _RUNNING_) {
- if (task_state(tc->task) != _RUNNING_)
- continue;
- } else if (fd->state & _UNINTERRUPTIBLE_) {
- if (!(task_state(tc->task) & _UNINTERRUPTIBLE_))
- continue;
-
- if (valid_task_state(_NOLOAD_)) {
- if (fd->state & _NOLOAD_) {
- if (!(task_state(tc->task) & _NOLOAD_))
- continue;
- } else {
- if ((task_state(tc->task) & _NOLOAD_))
- continue;
- }
- }
- } else if (!(task_state(tc->task) & fd->state))
- continue;
- }
+ if ((fd->flags & FOREACH_STATE) &&
+ (!STRNEQ(task_state_string(tc->task, buf, 0), fd->state)))
+ continue;
if (specified) {
for (j = 0; j < fd->tasks; j++) {
--
2.37.1

View File

@ -1,189 +0,0 @@
From 69f38d777450c3fe4f089eaa403434815eecdbd7 Mon Sep 17 00:00:00 2001
From: Lianbo Jiang <lijiang@redhat.com>
Date: Tue, 8 Aug 2023 21:25:31 +0800
Subject: [PATCH 24/30] Fix "ps/vm" commands to display correct memory usage
Kernel commit eca56ff906bd ("mm, shmem: add internal shmem resident
memory accounting") added shmem resident memory accounting and it's
tallied up into the mm_rss_stat counter.
As a result, the "ps/vm" commands miss the shmem pages count and fail to
show correct memory usage when a process uses an anonymous shared memory
region.
Without the patch:
crash> ps 2150
PID PPID CPU TASK ST %MEM VSZ RSS COMM
2150 2105 14 ffff8fba86d74d40 IN 0.0 10488392 444 mmap_test
^^^
Let's count the shmem pages together with regular files and anonymous
pages.
With the patch:
crash> ps 2150
PID PPID CPU TASK ST %MEM VSZ RSS COMM
2150 2105 14 ffff8fba86d74d40 IN 20.8 10488392 3659008 mmap_test
Reported-by: Buland Kumar Singh <bsingh@redhat.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 1 +
memory.c | 59 ++++++++++++++++++++++++++++++++++++++++++--------------
task.c | 1 +
3 files changed, 46 insertions(+), 15 deletions(-)
diff --git a/defs.h b/defs.h
index 5ee60f1eb3a5..f784d40c0b17 100644
--- a/defs.h
+++ b/defs.h
@@ -887,6 +887,7 @@ struct task_table { /* kernel/local task table data */
int callbacks;
struct task_context **context_by_task; /* task_context sorted by task addr */
ulong pid_xarray;
+ long shmempages;
};
#define TASK_INIT_DONE (0x1)
diff --git a/memory.c b/memory.c
index acbee6389472..3f524fa40eb4 100644
--- a/memory.c
+++ b/memory.c
@@ -4466,13 +4466,13 @@ in_user_stack(ulong task, ulong vaddr)
}
/*
- * Set the const value of filepages and anonpages
- * according to MM_FILEPAGES and MM_ANONPAGES.
+ * Set the const value of filepages, anonpages and shmempages
+ * according to MM_FILEPAGES, MM_ANONPAGES and MM_SHMEMPAGES.
*/
static void
rss_page_types_init(void)
{
- long anonpages, filepages;
+ long anonpages, filepages, shmempages;
if (VALID_MEMBER(mm_struct_rss))
return;
@@ -4487,6 +4487,15 @@ rss_page_types_init(void)
}
tt->filepages = filepages;
tt->anonpages = anonpages;
+
+ /*
+ * The default value(MM_SHMEMPAGES) is 3, which is introduced
+ * in linux v4.5-rc1 and later. See commit eca56ff906bd.
+ */
+ if (!enumerator_value("MM_SHMEMPAGES", &shmempages))
+ tt->shmempages = -1;
+ else
+ tt->shmempages = shmempages;
}
}
@@ -4812,10 +4821,11 @@ get_task_mem_usage(ulong task, struct task_mem_usage *tm)
* Latest kernels have mm_struct.mm_rss_stat[].
*/
if (VALID_MEMBER(mm_struct_rss_stat) && VALID_MEMBER(mm_rss_stat_count)) {
- long anonpages, filepages, count;
+ long anonpages, filepages, shmempages, count;
anonpages = tt->anonpages;
filepages = tt->filepages;
+ shmempages = tt->shmempages;
count = LONG(tt->mm_struct +
OFFSET(mm_struct_rss_stat) +
OFFSET(mm_rss_stat_count) +
@@ -4836,6 +4846,15 @@ get_task_mem_usage(ulong task, struct task_mem_usage *tm)
if (count > 0)
rss += count;
+ if (shmempages > 0) {
+ count = LONG(tt->mm_struct +
+ OFFSET(mm_struct_rss_stat) +
+ OFFSET(mm_rss_stat_count) +
+ (shmempages * sizeof(long)));
+ if (count > 0)
+ rss += count;
+ }
+
} else if (VALID_MEMBER(mm_struct_rss_stat)) {
/* 6.2: struct percpu_counter rss_stat[NR_MM_COUNTERS] */
ulong fbc;
@@ -4847,6 +4866,10 @@ get_task_mem_usage(ulong task, struct task_mem_usage *tm)
fbc = tc->mm_struct + OFFSET(mm_struct_rss_stat) +
(tt->anonpages * SIZE(percpu_counter));
rss += percpu_counter_sum_positive(fbc);
+
+ fbc = tc->mm_struct + OFFSET(mm_struct_rss_stat) +
+ (tt->shmempages * SIZE(percpu_counter));
+ rss += percpu_counter_sum_positive(fbc);
}
/* Check whether SPLIT_RSS_COUNTING is enabled */
@@ -4880,12 +4903,11 @@ get_task_mem_usage(ulong task, struct task_mem_usage *tm)
if (ACTIVE() || last->rss_cache == UNINITIALIZED) {
while (first <= last)
{
+ ulong addr = first->task + OFFSET(task_struct_rss_stat) +
+ OFFSET(task_rss_stat_count);
+
/* count 0 -> filepages */
- if (!readmem(first->task +
- OFFSET(task_struct_rss_stat) +
- OFFSET(task_rss_stat_count), KVADDR,
- &sync_rss,
- sizeof(int),
+ if (!readmem(addr, KVADDR, &sync_rss, sizeof(int),
"task_struct rss_stat MM_FILEPAGES",
RETURN_ON_ERROR))
continue;
@@ -4894,12 +4916,7 @@ get_task_mem_usage(ulong task, struct task_mem_usage *tm)
rss_cache += sync_rss;
/* count 1 -> anonpages */
- if (!readmem(first->task +
- OFFSET(task_struct_rss_stat) +
- OFFSET(task_rss_stat_count) +
- sizeof(int),
- KVADDR, &sync_rss,
- sizeof(int),
+ if (!readmem(addr + sizeof(int), KVADDR, &sync_rss, sizeof(int),
"task_struct rss_stat MM_ANONPAGES",
RETURN_ON_ERROR))
continue;
@@ -4907,6 +4924,18 @@ get_task_mem_usage(ulong task, struct task_mem_usage *tm)
if (sync_rss > 0)
rss_cache += sync_rss;
+ /* count 3 -> shmempages */
+ if (tt->shmempages >= 0) {
+ if (!readmem(addr + tt->shmempages * sizeof(int), KVADDR,
+ &sync_rss, sizeof(int),
+ "task_struct rss_stat MM_SHMEMPAGES",
+ RETURN_ON_ERROR))
+ continue;
+
+ if (sync_rss > 0)
+ rss_cache += sync_rss;
+ }
+
if (first == last)
break;
first++;
diff --git a/task.c b/task.c
index c9206f50c679..4018a543b715 100644
--- a/task.c
+++ b/task.c
@@ -7873,6 +7873,7 @@ dump_task_table(int verbose)
fprintf(fp, " init_pid_ns: %lx\n", tt->init_pid_ns);
fprintf(fp, " filepages: %ld\n", tt->filepages);
fprintf(fp, " anonpages: %ld\n", tt->anonpages);
+ fprintf(fp, " shmempages: %ld\n", tt->shmempages);
fprintf(fp, " stack_end_magic: %lx\n", tt->stack_end_magic);
fprintf(fp, " pf_kthread: %lx ", tt->pf_kthread);
switch (tt->pf_kthread)
--
2.37.1

View File

@ -1,90 +0,0 @@
From ff963b795b3f93b9d1a3cc5ec0212ebca545259f Mon Sep 17 00:00:00 2001
From: Song Shuai <suagrfillet@gmail.com>
Date: Fri, 4 Aug 2023 17:15:59 +0800
Subject: [PATCH 25/30] RISCV64: Use va_kernel_pa_offset in VTOP()
Since RISC-V Linux v6.4, the commit 3335068f8721 ("riscv: Use
PUD/P4D/PGD pages for the linear mapping") changes phys_ram_base from
the physical start of the kernel to the actual start of the DRAM.
The Crash's VTOP() still uses phys_ram_base and kernel_map.virt_addr
to translate kernel virtual address, that made Crash boot failed with
Linux v6.4 and later version.
Let Linux export kernel_map.va_kernel_pa_offset in v6.5 and backported
v6.4.0 stable, so Crash can use "va_kernel_pa_offset" to translate the
kernel virtual address in VTOP() correctly.
Signed-off-by: Song Shuai <suagrfillet@gmail.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 4 ++--
riscv64.c | 23 +++++++++++++++++++++++
2 files changed, 25 insertions(+), 2 deletions(-)
diff --git a/defs.h b/defs.h
index f784d40c0b17..4883f889f169 100644
--- a/defs.h
+++ b/defs.h
@@ -3663,8 +3663,7 @@ typedef signed int s32;
ulong _X = X; \
(THIS_KERNEL_VERSION >= LINUX(5,13,0) && \
(_X) >= machdep->machspec->kernel_link_addr) ? \
- (((unsigned long)(_X)-(machdep->machspec->kernel_link_addr)) + \
- machdep->machspec->phys_base): \
+ ((unsigned long)(_X)-(machdep->machspec->va_kernel_pa_offset)): \
(((unsigned long)(_X)-(machdep->kvbase)) + \
machdep->machspec->phys_base); \
})
@@ -7022,6 +7021,7 @@ struct machine_specific {
ulong modules_vaddr;
ulong modules_end;
ulong kernel_link_addr;
+ ulong va_kernel_pa_offset;
ulong _page_present;
ulong _page_read;
diff --git a/riscv64.c b/riscv64.c
index 6b9a68840d4c..7b5dd3db7f91 100644
--- a/riscv64.c
+++ b/riscv64.c
@@ -418,6 +418,28 @@ error:
error(FATAL, "cannot get vm layout\n");
}
+static void
+riscv64_get_va_kernel_pa_offset(struct machine_specific *ms)
+{
+ unsigned long kernel_version = riscv64_get_kernel_version();
+
+ /*
+ * Since Linux v6.4 phys_base is not the physical start of the kernel,
+ * trying to use "va_kernel_pa_offset" to determine the offset between
+ * kernel virtual and physical addresses.
+ */
+ if (kernel_version >= LINUX(6,4,0)) {
+ char *string;
+ if ((string = pc->read_vmcoreinfo("NUMBER(va_kernel_pa_offset)"))) {
+ ms->va_kernel_pa_offset = htol(string, QUIET, NULL);
+ free(string);
+ } else
+ error(FATAL, "cannot read va_kernel_pa_offset\n");
+ }
+ else
+ ms->va_kernel_pa_offset = ms->kernel_link_addr - ms->phys_base;
+}
+
static int
riscv64_is_kvaddr(ulong vaddr)
{
@@ -1352,6 +1374,7 @@ riscv64_init(int when)
riscv64_get_struct_page_size(machdep->machspec);
riscv64_get_va_bits(machdep->machspec);
riscv64_get_va_range(machdep->machspec);
+ riscv64_get_va_kernel_pa_offset(machdep->machspec);
pt_level_alloc(&machdep->pgd, "cannot malloc pgd space.");
pt_level_alloc(&machdep->machspec->p4d, "cannot malloc p4d space.");
--
2.37.1

View File

@ -1,125 +0,0 @@
From bc145861bfeb8b20b77309cb477359e9d46680d6 Mon Sep 17 00:00:00 2001
From: Lianbo Jiang <lijiang@redhat.com>
Date: Mon, 14 Aug 2023 09:54:23 +0800
Subject: [PATCH 26/30] Revert "Fix "kmem -s|-S" not working properly on
RHEL8.6 and later"
This reverts commit 9253b40a0ecb2d365f89f0a5ebc28a01735c1d24.
The commit 9253b40a0ecb only handles the current issue on x86_64/x86
architectures. Furthermore the freelist_ptr_bswap_x86() depends on
disassembling a static symbol which might not be available, depending on
how the compiler decides to optimize the code, that is to say, the
compiler might generate different code eventually.
More importantly, a subsequent patch can cover the current issue on
various architectures. Given that, revert the commit.
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 1 -
memory.c | 49 +------------------------------------------------
2 files changed, 1 insertion(+), 49 deletions(-)
diff --git a/defs.h b/defs.h
index 4883f889f169..20b64a748d5a 100644
--- a/defs.h
+++ b/defs.h
@@ -2663,7 +2663,6 @@ struct vm_table { /* kernel VM-related data */
#define SLAB_OVERLOAD_PAGE (0x8000000)
#define SLAB_CPU_CACHE (0x10000000)
#define SLAB_ROOT_CACHES (0x20000000)
-#define FREELIST_PTR_BSWAP (0x40000000)
#define IS_FLATMEM() (vt->flags & FLATMEM)
#define IS_DISCONTIGMEM() (vt->flags & DISCONTIGMEM)
diff --git a/memory.c b/memory.c
index 3f524fa40eb4..39f0e0ec36d2 100644
--- a/memory.c
+++ b/memory.c
@@ -320,7 +320,6 @@ static void dump_per_cpu_offsets(void);
static void dump_page_flags(ulonglong);
static ulong kmem_cache_nodelists(ulong);
static void dump_hstates(void);
-static void freelist_ptr_init(void);
static ulong freelist_ptr(struct meminfo *, ulong, ulong);
static ulong handle_each_vm_area(struct handle_each_vm_area_args *);
@@ -791,8 +790,6 @@ vm_init(void)
MEMBER_OFFSET_INIT(kmem_cache_name, "kmem_cache", "name");
MEMBER_OFFSET_INIT(kmem_cache_flags, "kmem_cache", "flags");
MEMBER_OFFSET_INIT(kmem_cache_random, "kmem_cache", "random");
- if (VALID_MEMBER(kmem_cache_random))
- freelist_ptr_init();
MEMBER_OFFSET_INIT(kmem_cache_cpu_freelist, "kmem_cache_cpu", "freelist");
MEMBER_OFFSET_INIT(kmem_cache_cpu_page, "kmem_cache_cpu", "page");
if (INVALID_MEMBER(kmem_cache_cpu_page))
@@ -13994,8 +13991,6 @@ dump_vm_table(int verbose)
fprintf(fp, "%sSLAB_CPU_CACHE", others++ ? "|" : "");\
if (vt->flags & SLAB_ROOT_CACHES)
fprintf(fp, "%sSLAB_ROOT_CACHES", others++ ? "|" : "");\
- if (vt->flags & FREELIST_PTR_BSWAP)
- fprintf(fp, "%sFREELIST_PTR_BSWAP", others++ ? "|" : "");\
if (vt->flags & USE_VMAP_AREA)
fprintf(fp, "%sUSE_VMAP_AREA", others++ ? "|" : "");\
if (vt->flags & CONFIG_NUMA)
@@ -19682,55 +19677,13 @@ count_free_objects(struct meminfo *si, ulong freelist)
return c;
}
-/*
- * With CONFIG_SLAB_FREELIST_HARDENED, freelist_ptr's are crypted with xor's,
- * and for recent release with an additionnal bswap. Some releases prio to 5.7.0
- * may be using the additionnal bswap. The only easy and reliable way to tell is
- * to inspect assembly code (eg. "__slab_free") for a bswap instruction.
- */
-static int
-freelist_ptr_bswap_x86(void)
-{
- char buf1[BUFSIZE];
- char buf2[BUFSIZE];
- char *arglist[MAXARGS];
- int found;
-
- sprintf(buf1, "disassemble __slab_free");
- open_tmpfile();
- if (!gdb_pass_through(buf1, pc->tmpfile, GNU_RETURN_ON_ERROR)) {
- close_tmpfile();
- return FALSE;
- }
- rewind(pc->tmpfile);
- found = FALSE;
- while (fgets(buf2, BUFSIZE, pc->tmpfile)) {
- if (parse_line(buf2, arglist) < 3)
- continue;
- if (STREQ(arglist[2], "bswap")) {
- found = TRUE;
- break;
- }
- }
- close_tmpfile();
- return found;
-}
-
-static void
-freelist_ptr_init(void)
-{
- if (THIS_KERNEL_VERSION >= LINUX(5,7,0) ||
- ((machine_type("X86_64") || machine_type("X86")) && freelist_ptr_bswap_x86()))
- vt->flags |= FREELIST_PTR_BSWAP;
-}
-
static ulong
freelist_ptr(struct meminfo *si, ulong ptr, ulong ptr_addr)
{
if (VALID_MEMBER(kmem_cache_random)) {
/* CONFIG_SLAB_FREELIST_HARDENED */
- if (vt->flags & FREELIST_PTR_BSWAP)
+ if (THIS_KERNEL_VERSION >= LINUX(5,7,0))
ptr_addr = (sizeof(long) == 8) ? bswap_64(ptr_addr)
: bswap_32(ptr_addr);
return (ptr ^ si->random ^ ptr_addr);
--
2.37.1

View File

@ -1,52 +0,0 @@
From eeaed479a438891fca96977cd64ae1166fddd38e Mon Sep 17 00:00:00 2001
From: Lianbo Jiang <lijiang@redhat.com>
Date: Mon, 14 Aug 2023 09:54:24 +0800
Subject: [PATCH 27/30] Fix "kmem -s|-S" not working properly when
CONFIG_SLAB_FREELIST_HARDENED is enabled
Currently, crash-utility still depends on detecting the kernel version,
or the asm instruction 'bswap' on x86_64/x86 architectures to decide how
to deal with the freelist ptr obfuscation, when kernel option
CONFIG_SLAB_FREELIST_HARDENED is enabled.
As you known, the bit diffusion for freelist ptr obfuscation has
experienced the changes several times on the kernel side, For most
distributions, usually they might backport these kernel patches from
upstream, especially for the old kernel, the 'kmem -s|-S' will fail with
an error "invalid freepointer", which can be observed on ppc64le and
S390x architectures, etc. That is really not friendly.
Given that, let's fix the above issues this time, and it won't rely
on the linux version number or asm instruction 'bswap' to decide how to
dereference the freelist ptr.
Reported-by: Lucas Oakley <soakley@redhat.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
Acked-by: Rafael Aquini <aquini@redhat.com>
---
memory.c | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/memory.c b/memory.c
index 39f0e0ec36d2..5d76c5d7fe6f 100644
--- a/memory.c
+++ b/memory.c
@@ -19683,9 +19683,12 @@ freelist_ptr(struct meminfo *si, ulong ptr, ulong ptr_addr)
if (VALID_MEMBER(kmem_cache_random)) {
/* CONFIG_SLAB_FREELIST_HARDENED */
- if (THIS_KERNEL_VERSION >= LINUX(5,7,0))
- ptr_addr = (sizeof(long) == 8) ? bswap_64(ptr_addr)
- : bswap_32(ptr_addr);
+ ulong addr = (sizeof(long) == 8) ? bswap_64(ptr_addr) : bswap_32(ptr_addr);
+ addr = ptr ^ si->random ^ addr;
+
+ if (!addr || accessible(addr))
+ return addr;
+
return (ptr ^ si->random ^ ptr_addr);
} else
return ptr;
--
2.37.1

View File

@ -1,234 +0,0 @@
From f774fe0f59b45596e5165eb008845b3534f650d0 Mon Sep 17 00:00:00 2001
From: Rafael Aquini <aquini@redhat.com>
Date: Mon, 14 Aug 2023 09:41:12 -0400
Subject: [PATCH 28/30] deduplicate kernel_version open-coded parser
The code that parses kernel version from OSRELEASE/UTSRELEASE strings
and populates the global kernel table is duplicated across the codebase
for no good reason. This commit consolidates all the duplicated parsing
code into a single method to remove the unnecessary duplicated code.
Signed-off-by: Rafael Aquini <aquini@redhat.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
arm64.c | 27 +++----------------
defs.h | 2 ++
kernel.c | 77 ++++++++++++++++++++++++++-----------------------------
riscv64.c | 25 ++----------------
4 files changed, 43 insertions(+), 88 deletions(-)
diff --git a/arm64.c b/arm64.c
index 67b1a2244810..39d5f04a1263 100644
--- a/arm64.c
+++ b/arm64.c
@@ -834,35 +834,14 @@ static struct kernel_va_range_handler kernel_va_range_handlers[] = {
static unsigned long arm64_get_kernel_version(void)
{
char *string;
- char buf[BUFSIZE];
- char *p1, *p2;
if (THIS_KERNEL_VERSION)
return THIS_KERNEL_VERSION;
- string = pc->read_vmcoreinfo("OSRELEASE");
- if (string) {
- strcpy(buf, string);
-
- p1 = p2 = buf;
- while (*p2 != '.')
- p2++;
- *p2 = NULLCHAR;
- kt->kernel_version[0] = atoi(p1);
-
- p1 = ++p2;
- while (*p2 != '.')
- p2++;
- *p2 = NULLCHAR;
- kt->kernel_version[1] = atoi(p1);
-
- p1 = ++p2;
- while ((*p2 >= '0') && (*p2 <= '9'))
- p2++;
- *p2 = NULLCHAR;
- kt->kernel_version[2] = atoi(p1);
+ if ((string = pc->read_vmcoreinfo("OSRELEASE"))) {
+ parse_kernel_version(string);
+ free(string);
}
- free(string);
return THIS_KERNEL_VERSION;
}
diff --git a/defs.h b/defs.h
index 20b64a748d5a..96a7a2a31471 100644
--- a/defs.h
+++ b/defs.h
@@ -6031,6 +6031,8 @@ void clone_bt_info(struct bt_info *, struct bt_info *, struct task_context *);
void dump_kernel_table(int);
void dump_bt_info(struct bt_info *, char *where);
void dump_log(int);
+void parse_kernel_version(char *);
+
#define LOG_LEVEL(v) ((v) & 0x07)
#define SHOW_LOG_LEVEL (0x1)
#define SHOW_LOG_DICT (0x2)
diff --git a/kernel.c b/kernel.c
index 2114700eecc8..988206b2e55a 100644
--- a/kernel.c
+++ b/kernel.c
@@ -104,6 +104,38 @@ static void check_vmcoreinfo(void);
static int is_pvops_xen(void);
static int get_linux_banner_from_vmlinux(char *, size_t);
+/*
+ * popuplate the global kernel table (kt) with kernel version
+ * information parsed from UTSNAME/OSRELEASE string
+ */
+void
+parse_kernel_version(char *str)
+{
+ char *p1, *p2, separator;
+
+ p1 = p2 = str;
+ while (*p2 != '.' && *p2 != '\0')
+ p2++;
+
+ *p2 = NULLCHAR;
+ kt->kernel_version[0] = atoi(p1);
+ p1 = ++p2;
+ while (*p2 != '.' && *p2 != '-' && *p2 != '\0')
+ p2++;
+
+ separator = *p2;
+ *p2 = NULLCHAR;
+ kt->kernel_version[1] = atoi(p1);
+
+ if (separator == '.') {
+ p1 = ++p2;
+ while ((*p2 >= '0') && (*p2 <= '9'))
+ p2++;
+
+ *p2 = NULLCHAR;
+ kt->kernel_version[2] = atoi(p1);
+ }
+}
/*
* Gather a few kernel basics.
@@ -112,7 +144,7 @@ void
kernel_init()
{
int i, c;
- char *p1, *p2, buf[BUFSIZE];
+ char buf[BUFSIZE];
struct syment *sp1, *sp2;
char *rqstruct;
char *rq_timestamp_name = NULL;
@@ -270,28 +302,7 @@ kernel_init()
if (buf[64])
buf[64] = NULLCHAR;
if (ascii_string(kt->utsname.release)) {
- char separator;
-
- p1 = p2 = buf;
- while (*p2 != '.')
- p2++;
- *p2 = NULLCHAR;
- kt->kernel_version[0] = atoi(p1);
- p1 = ++p2;
- while (*p2 != '.' && *p2 != '-' && *p2 != '\0')
- p2++;
- separator = *p2;
- *p2 = NULLCHAR;
- kt->kernel_version[1] = atoi(p1);
- *p2 = separator;
- if (*p2 == '.') {
- p1 = ++p2;
- while ((*p2 >= '0') && (*p2 <= '9'))
- p2++;
- *p2 = NULLCHAR;
- kt->kernel_version[2] = atoi(p1);
- } else
- kt->kernel_version[2] = 0;
+ parse_kernel_version(buf);
if (CRASHDEBUG(1))
fprintf(fp, "base kernel version: %d.%d.%d\n",
@@ -10973,8 +10984,6 @@ void
get_log_from_vmcoreinfo(char *file)
{
char *string;
- char buf[BUFSIZE];
- char *p1, *p2;
struct vmcoreinfo_data *vmc = &kt->vmcoreinfo;
if (!(pc->flags2 & VMCOREINFO))
@@ -10986,22 +10995,8 @@ get_log_from_vmcoreinfo(char *file)
if ((string = pc->read_vmcoreinfo("OSRELEASE"))) {
if (CRASHDEBUG(1))
fprintf(fp, "OSRELEASE: %s\n", string);
- strcpy(buf, string);
- p1 = p2 = buf;
- while (*p2 != '.')
- p2++;
- *p2 = NULLCHAR;
- kt->kernel_version[0] = atoi(p1);
- p1 = ++p2;
- while (*p2 != '.')
- p2++;
- *p2 = NULLCHAR;
- kt->kernel_version[1] = atoi(p1);
- p1 = ++p2;
- while ((*p2 >= '0') && (*p2 <= '9'))
- p2++;
- *p2 = NULLCHAR;
- kt->kernel_version[2] = atoi(p1);
+
+ parse_kernel_version(string);
if (CRASHDEBUG(1))
fprintf(fp, "base kernel version: %d.%d.%d\n",
diff --git a/riscv64.c b/riscv64.c
index 7b5dd3db7f91..fef08a440f3d 100644
--- a/riscv64.c
+++ b/riscv64.c
@@ -259,33 +259,12 @@ riscv64_processor_speed(void)
static unsigned long riscv64_get_kernel_version(void)
{
char *string;
- char buf[BUFSIZE];
- char *p1, *p2;
if (THIS_KERNEL_VERSION)
return THIS_KERNEL_VERSION;
- string = pc->read_vmcoreinfo("OSRELEASE");
- if (string) {
- strcpy(buf, string);
-
- p1 = p2 = buf;
- while (*p2 != '.')
- p2++;
- *p2 = NULLCHAR;
- kt->kernel_version[0] = atoi(p1);
-
- p1 = ++p2;
- while (*p2 != '.')
- p2++;
- *p2 = NULLCHAR;
- kt->kernel_version[1] = atoi(p1);
-
- p1 = ++p2;
- while ((*p2 >= '0') && (*p2 <= '9'))
- p2++;
- *p2 = NULLCHAR;
- kt->kernel_version[2] = atoi(p1);
+ if ((string = pc->read_vmcoreinfo("OSRELEASE"))) {
+ parse_kernel_version(string);
free(string);
}
return THIS_KERNEL_VERSION;
--
2.37.1

View File

@ -1,138 +0,0 @@
From 1aa93cd33fa11f9d9bc9dc7e6a698d690fdd1bb3 Mon Sep 17 00:00:00 2001
From: Song Shuai <suagrfillet@gmail.com>
Date: Fri, 18 Aug 2023 17:50:28 +0800
Subject: [PATCH 29/30] RISCV64: Add KASLR support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This patch adds KASLR support for Crash to analyze KASLR-ed vmcore
since RISC-V Linux is already sufficiently prepared for KASLR [1].
With this patch, even if the Crash '--kaslr' option is not set or Linux
CONFIG_RANDOMIZE_BASE is not configured, the 'derive_kaslr_offset()'
function will always work to calculate 'kt->relocate' which serves to
update the kernel virtual address.
Testing in Qemu rv64 virt, kernel log outputed the kernel offset:
[ 121.214447] SMP: stopping secondary CPUs
[ 121.215445] Kernel Offset: 0x37c00000 from 0xffffffff80000000
[ 121.216312] Starting crashdump kernel...
[ 121.216585] Will call new kernel at 94800000 from hart id 0
[ 121.216834] FDT image at 9c7fd000
[ 121.216982] Bye...
Running crash with '-d 1' option and without '--kaslr' option,
we get the right 'kt->relocate' and kernel link addr:
$ ../crash/crash -d 1 vmlinux vmcore_kaslr_0815
...
KASLR:
_stext from vmlinux: ffffffff80002000
_stext from vmcoreinfo: ffffffffb7c02000
relocate: 37c00000 (892MB)
vmemmap : 0xff1c000000000000 - 0xff20000000000000
vmalloc : 0xff20000000000000 - 0xff60000000000000
mudules : 0xffffffff3952f000 - 0xffffffffb7c00000
lowmem : 0xff60000000000000 -
kernel link addr : 0xffffffffb7c00000
...
KERNEL: /home/song/9_linux/linux/00_rv_kaslr/vmlinux
DUMPFILE: /tmp/hello/vmcore_kaslr_0815
CPUS: 2
DATE: Tue Aug 15 16:36:15 CST 2023
UPTIME: 00:02:01
LOAD AVERAGE: 0.40, 0.23, 0.09
TASKS: 63
NODENAME: stage4.fedoraproject.org
RELEASE: 6.5.0-rc3-00008-gad18dee423ac
VERSION: #17 SMP Tue Aug 15 14:41:12 CST 2023
MACHINE: riscv64 (unknown Mhz)
MEMORY: 511.8 MB
PANIC: "Kernel panic - not syncing: sysrq triggered crash"
PID: 160
COMMAND: "bash"
TASK: ff6000000152bac0 [THREAD_INFO: ff6000000152bac0]
CPU: 1
STATE: TASK_RUNNING (PANIC)
crash>
[1]: https://lore.kernel.org/linux-riscv/20230722123850.634544-1-alexghiti@rivosinc.com/
Signed-off-by: Song Shuai <suagrfillet@gmail.com>
Reviewed-by: Guo Ren <guoren@kernel.org>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
main.c | 2 +-
riscv64.c | 11 +++++++++++
symbols.c | 4 ++--
3 files changed, 14 insertions(+), 3 deletions(-)
diff --git a/main.c b/main.c
index b278c22e2591..0c6e5958f3ad 100644
--- a/main.c
+++ b/main.c
@@ -228,7 +228,7 @@ main(int argc, char **argv)
} else if (STREQ(long_options[option_index].name, "kaslr")) {
if (!machine_type("X86_64") &&
!machine_type("ARM64") && !machine_type("X86") &&
- !machine_type("S390X"))
+ !machine_type("S390X") && !machine_type("RISCV64"))
error(INFO, "--kaslr not valid "
"with this machine type.\n");
else if (STREQ(optarg, "auto"))
diff --git a/riscv64.c b/riscv64.c
index fef08a440f3d..0aaa14b2671e 100644
--- a/riscv64.c
+++ b/riscv64.c
@@ -357,6 +357,9 @@ static void riscv64_get_va_range(struct machine_specific *ms)
} else
goto error;
+ if ((kt->flags2 & KASLR) && (kt->flags & RELOC_SET))
+ ms->kernel_link_addr += (kt->relocate * -1);
+
/*
* From Linux 5.13, the kernel mapping is moved to the last 2GB
* of the address space, modules use the 2GB memory range right
@@ -1340,6 +1343,14 @@ riscv64_init(int when)
machdep->verify_paddr = generic_verify_paddr;
machdep->ptrs_per_pgd = PTRS_PER_PGD;
+
+ /*
+ * Even if CONFIG_RANDOMIZE_BASE is not configured,
+ * derive_kaslr_offset() should work and set
+ * kt->relocate to 0
+ */
+ if (!kt->relocate && !(kt->flags2 & (RELOC_AUTO|KASLR)))
+ kt->flags2 |= (RELOC_AUTO|KASLR);
break;
case PRE_GDB:
diff --git a/symbols.c b/symbols.c
index 876be7aea90e..8e8b4c31d915 100644
--- a/symbols.c
+++ b/symbols.c
@@ -629,7 +629,7 @@ kaslr_init(void)
char *string;
if ((!machine_type("X86_64") && !machine_type("ARM64") && !machine_type("X86") &&
- !machine_type("S390X")) || (kt->flags & RELOC_SET))
+ !machine_type("S390X") && !machine_type("RISCV64")) || (kt->flags & RELOC_SET))
return;
if (!kt->vmcoreinfo._stext_SYMBOL &&
@@ -795,7 +795,7 @@ store_symbols(bfd *abfd, int dynamic, void *minisyms, long symcount,
} else if (!(kt->flags & RELOC_SET))
kt->flags |= RELOC_FORCE;
} else if (machine_type("X86_64") || machine_type("ARM64") ||
- machine_type("S390X")) {
+ machine_type("S390X") || machine_type("RISCV64")) {
if ((kt->flags2 & RELOC_AUTO) && !(kt->flags & RELOC_SET))
derive_kaslr_offset(abfd, dynamic, from,
fromend, size, store);
--
2.37.1

View File

@ -1,75 +0,0 @@
From 3253e5ac87c67dd7742e2b2bd9d912f21c1d2711 Mon Sep 17 00:00:00 2001
From: Lianbo Jiang <lijiang@redhat.com>
Date: Fri, 25 Aug 2023 14:23:27 +0800
Subject: [PATCH 30/30] Fix "ps/vm" commands to display the memory usage for
exiting tasks
When a task is exiting, usually kernel marks its flags as 'PF_EXITING',
but even so, sometimes the mm_struct has not been freed, it might still
be valid. For such tasks, the "ps/vm" commands won't display the memory
usage. For example:
crash> ps 47070
PID PPID CPU TASK ST %MEM VSZ RSS COMM
47070 1 0 ffff9ba7c4910000 UN 0.0 0 0 ra_ris.parse
crash> vm 47070
PID: 47070 TASK: ffff9ba7c4910000 CPU: 0 COMMAND: "ra_ris.parse"
MM PGD RSS TOTAL_VM
0 0 0k 0k
This is a corner case, but it has already occurred in actual production
environments. Given that, let's allow the "ps/vm" commands to try to
display the memory usage for this case. Note that it does not guarantee
that it can work well at any time, which still depends on how far the
mm_struct deconstruction has proceeded.
With the patch:
crash> ps 47070
PID PPID CPU TASK ST %MEM VSZ RSS COMM
47070 1 0 ffff9ba7c4910000 UN 90.8 38461228 31426444 ra_ris.parse
crash> vm 47070
PID: 47070 TASK: ffff9ba7c4910000 CPU: 0 COMMAND: "ra_ris.parse"
MM PGD RSS TOTAL_VM
ffff9bad6e873840 ffff9baee0544000 31426444k 38461228k
VMA START END FLAGS FILE
ffff9bafdbe1d6c8 400000 8c5000 8000875 /data1/rishome/ra_cu_cn_412/sbin/ra_ris.parse
...
Reported-by: Buland Kumar Singh <bsingh@redhat.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
memory.c | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/memory.c b/memory.c
index 5d76c5d7fe6f..86ccec5e2bac 100644
--- a/memory.c
+++ b/memory.c
@@ -4792,10 +4792,11 @@ get_task_mem_usage(ulong task, struct task_mem_usage *tm)
{
struct task_context *tc;
long rss = 0, rss_cache = 0;
+ int mm_count = 0;
BZERO(tm, sizeof(struct task_mem_usage));
- if (IS_ZOMBIE(task) || IS_EXITING(task))
+ if (IS_ZOMBIE(task))
return;
tc = task_to_context(task);
@@ -4808,6 +4809,11 @@ get_task_mem_usage(ulong task, struct task_mem_usage *tm)
if (!task_mm(task, TRUE))
return;
+ mm_count = INT(tt->mm_struct + OFFSET(mm_struct_mm_count));
+
+ if (IS_EXITING(task) && mm_count <= 0)
+ return;
+
if (VALID_MEMBER(mm_struct_rss))
/*
* mm_struct.rss or mm_struct._rss exist.
--
2.37.1

View File

@ -4,7 +4,7 @@
Summary: Kernel analysis utility for live systems, netdump, diskdump, kdump, LKCD or mcore dumpfiles Summary: Kernel analysis utility for live systems, netdump, diskdump, kdump, LKCD or mcore dumpfiles
Name: crash Name: crash
Version: 8.0.4 Version: 8.0.4
Release: 2%{?dist} Release: 2%{?dist}.alma
License: GPLv3 License: GPLv3
Group: Development/Debuggers Group: Development/Debuggers
Source0: https://github.com/crash-utility/crash/archive/crash-%{version}.tar.gz Source0: https://github.com/crash-utility/crash/archive/crash-%{version}.tar.gz
@ -37,7 +37,7 @@ Patch15: 0014-x86_64-check-bt-bptr-before-calculate-framesize.patch
%description %description
The core analysis suite is a self-contained tool that can be used to The core analysis suite is a self-contained tool that can be used to
investigate either live systems, kernel core dumps created from the investigate either live systems, kernel core dumps created from the
netdump, diskdump and kdump packages from Red Hat Linux, the mcore kernel patch netdump, diskdump and kdump packages from AlmaLinux, the mcore kernel patch
offered by Mission Critical Linux, or the LKCD kernel patch. offered by Mission Critical Linux, or the LKCD kernel patch.
%package devel %package devel
@ -48,7 +48,7 @@ Group: Development/Debuggers
%description devel %description devel
The core analysis suite is a self-contained tool that can be used to The core analysis suite is a self-contained tool that can be used to
investigate either live systems, kernel core dumps created from the investigate either live systems, kernel core dumps created from the
netdump, diskdump and kdump packages from Red Hat Linux, the mcore kernel patch netdump, diskdump and kdump packages from AlmaLinux, the mcore kernel patch
offered by Mission Critical Linux, or the LKCD kernel patch. offered by Mission Critical Linux, or the LKCD kernel patch.
%prep %prep
@ -96,11 +96,13 @@ rm -rf %{buildroot}
%{_includedir}/* %{_includedir}/*
%changelog %changelog
* Wed Mar 27 2024 Eduard Abdullin <eabdullin@almalinux.org> - 8.0.4-2.alma
- AlmaLinux changes
* Tue Jan 02 2024 Lianbo Jiang <lijiang@redhat.com> - 8.0.4-2 * Tue Jan 02 2024 Lianbo Jiang <lijiang@redhat.com> - 8.0.4-2
- Fix the "dis -lr" not displaying the source file names - Fix the "dis -lr" not displaying the source file names
and line numbers and line numbers
- Fix incorrect symbol translation by the 'struct blk_mq_ops' - Fix incorrect symbol translation by the 'struct blk_mq_ops'
* Fri Nov 17 2023 Lianbo Jiang <lijiang@redhat.com> - 8.0.4-1 * Fri Nov 17 2023 Lianbo Jiang <lijiang@redhat.com> - 8.0.4-1
- Rebase to upstream crash 8.0.4 - Rebase to upstream crash 8.0.4
@ -111,7 +113,6 @@ rm -rf %{buildroot}
* Thu Jun 15 2023 Lianbo Jiang <lijiang@redhat.com> - 7.3.2-8 * Thu Jun 15 2023 Lianbo Jiang <lijiang@redhat.com> - 7.3.2-8
- arm64: Fix again segfault in arm64_is_kernel_exception_frame() - arm64: Fix again segfault in arm64_is_kernel_exception_frame()
- Fix invalid structure size error during crash startup on ppc64 - Fix invalid structure size error during crash startup on ppc64
* Wed Jun 07 2023 Lianbo Jiang <lijiang@redhat.com> - 7.3.2-7 * Wed Jun 07 2023 Lianbo Jiang <lijiang@redhat.com> - 7.3.2-7
- Fix segfault caused by failure of stopping CPUs - Fix segfault caused by failure of stopping CPUs
@ -124,13 +125,11 @@ rm -rf %{buildroot}
* Mon Nov 21 2022 Lianbo Jiang <lijiang@redhat.com> - 7.3.2-4 * Mon Nov 21 2022 Lianbo Jiang <lijiang@redhat.com> - 7.3.2-4
- Fix for commit 2145b2bb79c5, there are different behaviors between gdb-7.6 and gdb-10.2 - Fix for commit 2145b2bb79c5, there are different behaviors between gdb-7.6 and gdb-10.2
* Thu Nov 17 2022 Lianbo Jiang <lijiang@redhat.com> - 7.3.2-3 * Thu Nov 17 2022 Lianbo Jiang <lijiang@redhat.com> - 7.3.2-3
- Update to the latest commit a158590f475c from master branch - Update to the latest commit a158590f475c from master branch
* Thu Jun 16 2022 Lianbo Jiang <lijiang@redhat.com> - 7.3.2-2 * Thu Jun 16 2022 Lianbo Jiang <lijiang@redhat.com> - 7.3.2-2
- Enhance "dev -d|-D" options to support blk-mq sbitmap - Enhance "dev -d|-D" options to support blk-mq sbitmap
* Mon May 16 2022 Lianbo Jiang <lijiang@redhat.com> - 7.3.2-1 * Mon May 16 2022 Lianbo Jiang <lijiang@redhat.com> - 7.3.2-1
- Rebase to upstream crash 7.3.2 - Rebase to upstream crash 7.3.2

View File

@ -1,6 +0,0 @@
--- !Policy
product_versions:
- rhel-8
decision_context: osci_compose_gate
rules:
- !PassingTestCaseRule {test_case_name: kernel-qe.kernel-ci.general-kdump.tier0.functional}

View File

@ -1,3 +0,0 @@
---
inspections:
badfuncs: off

View File

@ -1,2 +0,0 @@
SHA512 (crash-8.0.4.tar.gz) = a08589026515990eee555af6eeba0457433fe41263512ed67dfcac1cf49a8f61dc794081f4984700d8dfed228440a1d7928fdd1f5cf4ae8a45cf39eb49d3470b
SHA512 (gdb-10.2.tar.gz) = aa89caf47c1c84366020377d47e7c51ddbc48e5b7686f244e38797c8eb88411cf57fcdc37eb669961efb41ceeac4181747f429625fd1acce7712cb9a1fea9c41