Compare commits

...

No commits in common. "c8" and "c8s" have entirely different histories.
c8 ... c8s

51 changed files with 5659 additions and 8 deletions

9
.gitignore vendored
View File

@ -1,2 +1,7 @@
SOURCES/crash-8.0.4.tar.gz
SOURCES/gdb-10.2.tar.gz
SOURCES/crash-7.3.2.tar.gz
SOURCES/gdb-7.6.tar.gz
/crash-7.3.2.tar.gz
/gdb-7.6.tar.gz
/crash-8.0.3.tar.gz
/crash-8.0.4.tar.gz
/gdb-10.2.tar.gz

View File

@ -0,0 +1,52 @@
From 040a56e9f9d0df15a2f8161ed3a0a907d70dda03 Mon Sep 17 00:00:00 2001
From: Kazuhito Hagio <k-hagio-ab@nec.com>
Date: Wed, 10 May 2023 16:09:03 +0900
Subject: [PATCH 01/30] Fix kernel version macros for revision numbers over 255
The current comparison macros for kernel version shift minor number only
8 bits. This can cause an unexpected result on kernels with revision
number over 255, e.g. Linux 4.14.314.
In fact, on Linux 4.14.314 for x86_64 without CONFIG_RANDOMIZE_BASE=y
(KASLR), the following condition became false in x86_64_init().
((THIS_KERNEL_VERSION >= LINUX(4,14,84)) &&
(THIS_KERNEL_VERSION < LINUX(4,15,0)))
As a result, crash used a wrong hard-coded value for PAGE_OFFSET and
failed to start a session with the following seek error.
crash: seek error: physical address: 200e000 type: "pud page"
Shift the major and minor number by 24 and 16 bits respectively to fix
this issue.
Reported-by: Luiz Capitulino <luizcap@amazon.com>
Tested-by: Luiz Capitulino <luizcap@amazon.com>
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/defs.h b/defs.h
index 12ad6aaa0998..211fc9d55d33 100644
--- a/defs.h
+++ b/defs.h
@@ -807,10 +807,10 @@ struct kernel_table { /* kernel data */
} \
}
-#define THIS_KERNEL_VERSION ((kt->kernel_version[0] << 16) + \
- (kt->kernel_version[1] << 8) + \
+#define THIS_KERNEL_VERSION ((kt->kernel_version[0] << 24) + \
+ (kt->kernel_version[1] << 16) + \
(kt->kernel_version[2]))
-#define LINUX(x,y,z) (((uint)(x) << 16) + ((uint)(y) << 8) + (uint)(z))
+#define LINUX(x,y,z) (((uint)(x) << 24) + ((uint)(y) << 16) + (uint)(z))
#define THIS_GCC_VERSION ((kt->gcc_version[0] << 16) + \
(kt->gcc_version[1] << 8) + \
--
2.37.1

View File

@ -0,0 +1,179 @@
From 58c1816521c2e6bece3d69256b1866c9df8d93aa Mon Sep 17 00:00:00 2001
From: Kazuhito Hagio <k-hagio-ab@nec.com>
Date: Tue, 16 May 2023 08:59:50 +0900
Subject: [PATCH 02/30] Fix failure of "dev -d|-D" options on Linux 6.4 and
later kernels
Kernel commit 2df418cf4b72 ("driver core: class: remove subsystem
private pointer from struct class"), which is contained in Linux 6.4 and
later kernels, removed the class.p member for struct subsys_private. As
a result, the "dev -d|-D" options fail with the following error.
dev: invalid structure member offset: class_p
FILE: dev.c LINE: 4689 FUNCTION: init_iter()
Search the class_kset list for the subsys_private of block class to fix
this.
As a preparation, introduce get_subsys_private() function, which is
abstracted from the same search procedure in init_memory_block().
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 1 +
dev.c | 20 +++++++++++++++++---
memory.c | 35 +++--------------------------------
tools.c | 43 +++++++++++++++++++++++++++++++++++++++++++
4 files changed, 64 insertions(+), 35 deletions(-)
diff --git a/defs.h b/defs.h
index 211fc9d55d33..21cc760444d1 100644
--- a/defs.h
+++ b/defs.h
@@ -5521,6 +5521,7 @@ struct rb_node *rb_left(struct rb_node *, struct rb_node *);
struct rb_node *rb_next(struct rb_node *);
struct rb_node *rb_last(struct rb_root *);
long percpu_counter_sum_positive(ulong fbc);
+ulong get_subsys_private(char *, char *);
/*
* symbols.c
diff --git a/dev.c b/dev.c
index 75d30bd022a1..9d38aef9b3db 100644
--- a/dev.c
+++ b/dev.c
@@ -4686,9 +4686,16 @@ init_iter(struct iter *i)
} else {
/* kernel version > 2.6.27, klist */
unsigned long class_private_addr;
- readmem(block_class_addr + OFFSET(class_p), KVADDR,
- &class_private_addr, sizeof(class_private_addr),
- "class.p", FAULT_ON_ERROR);
+
+ if (INVALID_MEMBER(class_p)) /* kernel version >= 6.4 */
+ class_private_addr = get_subsys_private("class_kset", "block");
+ else
+ readmem(block_class_addr + OFFSET(class_p), KVADDR,
+ &class_private_addr, sizeof(class_private_addr),
+ "class.p", FAULT_ON_ERROR);
+
+ if (!class_private_addr)
+ error(FATAL, "cannot determine subsys_private for block.\n");
if (VALID_STRUCT(class_private)) {
/* 2.6.27 < kernel version <= 2.6.37-rc2 */
@@ -4823,6 +4830,13 @@ void diskio_init(void)
if (INVALID_MEMBER(class_devices))
MEMBER_OFFSET_INIT(class_devices, "class", "devices");
MEMBER_OFFSET_INIT(class_p, "class", "p");
+ if (INVALID_MEMBER(class_p)) {
+ MEMBER_OFFSET_INIT(kset_list, "kset", "list");
+ MEMBER_OFFSET_INIT(kset_kobj, "kset", "kobj");
+ MEMBER_OFFSET_INIT(kobject_name, "kobject", "name");
+ MEMBER_OFFSET_INIT(kobject_entry, "kobject", "entry");
+ MEMBER_OFFSET_INIT(subsys_private_subsys, "subsys_private", "subsys");
+ }
MEMBER_OFFSET_INIT(class_private_devices, "class_private",
"class_devices");
MEMBER_OFFSET_INIT(device_knode_class, "device", "knode_class");
diff --git a/memory.c b/memory.c
index 0568f18eb9b7..953fc380c03c 100644
--- a/memory.c
+++ b/memory.c
@@ -17865,38 +17865,9 @@ init_memory_block(int *klistcnt, ulong **klistbuf)
* v6.3-rc1
* d2bf38c088e0 driver core: remove private pointer from struct bus_type
*/
- if (INVALID_MEMBER(bus_type_p)) {
- int i, cnt;
- char buf[32];
- ulong bus_kset, list, name;
-
- BZERO(ld, sizeof(struct list_data));
-
- get_symbol_data("bus_kset", sizeof(ulong), &bus_kset);
- readmem(bus_kset + OFFSET(kset_list), KVADDR, &list,
- sizeof(ulong), "bus_kset.list", FAULT_ON_ERROR);
-
- ld->flags |= LIST_ALLOCATE;
- ld->start = list;
- ld->end = bus_kset + OFFSET(kset_list);
- ld->list_head_offset = OFFSET(kobject_entry);
-
- cnt = do_list(ld);
- for (i = 0; i < cnt; i++) {
- readmem(ld->list_ptr[i] + OFFSET(kobject_name), KVADDR, &name,
- sizeof(ulong), "kobject.name", FAULT_ON_ERROR);
- read_string(name, buf, sizeof(buf)-1);
- if (CRASHDEBUG(1))
- fprintf(fp, "kobject: %lx name: %s\n", ld->list_ptr[i], buf);
- if (STREQ(buf, "memory")) {
- /* entry is subsys_private.subsys.kobj. See bus_to_subsys(). */
- private = ld->list_ptr[i] - OFFSET(kset_kobj)
- - OFFSET(subsys_private_subsys);
- break;
- }
- }
- FREEBUF(ld->list_ptr);
- } else {
+ if (INVALID_MEMBER(bus_type_p))
+ private = get_subsys_private("bus_kset", "memory");
+ else {
ulong memory_subsys = symbol_value("memory_subsys");
readmem(memory_subsys + OFFSET(bus_type_p), KVADDR, &private,
sizeof(void *), "memory_subsys.private", FAULT_ON_ERROR);
diff --git a/tools.c b/tools.c
index c2cfa7e280bc..392a79707e61 100644
--- a/tools.c
+++ b/tools.c
@@ -6963,3 +6963,46 @@ percpu_counter_sum_positive(ulong fbc)
return (ret < 0) ? 0 : ret;
}
+
+ulong
+get_subsys_private(char *kset_name, char *target_name)
+{
+ ulong kset_addr, kset_list, name_addr, private = 0;
+ struct list_data list_data, *ld;
+ char buf[32];
+ int i, cnt;
+
+ if (!symbol_exists(kset_name))
+ return 0;
+
+ ld = &list_data;
+ BZERO(ld, sizeof(struct list_data));
+
+ get_symbol_data(kset_name, sizeof(ulong), &kset_addr);
+ readmem(kset_addr + OFFSET(kset_list), KVADDR, &kset_list,
+ sizeof(ulong), "kset.list", FAULT_ON_ERROR);
+
+ ld->flags |= LIST_ALLOCATE;
+ ld->start = kset_list;
+ ld->end = kset_addr + OFFSET(kset_list);
+ ld->list_head_offset = OFFSET(kobject_entry);
+
+ cnt = do_list(ld);
+
+ for (i = 0; i < cnt; i++) {
+ readmem(ld->list_ptr[i] + OFFSET(kobject_name), KVADDR, &name_addr,
+ sizeof(ulong), "kobject.name", FAULT_ON_ERROR);
+ read_string(name_addr, buf, sizeof(buf)-1);
+ if (CRASHDEBUG(1))
+ fprintf(fp, "kobject: %lx name: %s\n", ld->list_ptr[i], buf);
+ if (STREQ(buf, target_name)) {
+ /* entry is subsys_private.subsys.kobj. See bus_to_subsys(). */
+ private = ld->list_ptr[i] - OFFSET(kset_kobj)
+ - OFFSET(subsys_private_subsys);
+ break;
+ }
+ }
+ FREEBUF(ld->list_ptr);
+
+ return private;
+}
--
2.37.1

View File

@ -0,0 +1,86 @@
From 342cf340ed0386880fe2a3115d6bef32eabb511b Mon Sep 17 00:00:00 2001
From: Kazuhito Hagio <k-hagio-ab@nec.com>
Date: Thu, 18 May 2023 11:48:28 +0900
Subject: [PATCH 03/30] Fix "kmem -v" option displaying no regions on Linux 6.3
and later
Kernel commit 869176a09606 ("mm/vmalloc.c: add flags to mark vm_map_ram
area"), which is contained in Linux 6.3 and later, added "flags" member
to struct vmap_area. This was the revival of the "flags" member as
kernel commit 688fcbfc06e4 had eliminated it before.
As a result, crash started to use the old procedure using the member and
displays no vmalloc'd regions, because it does not have the same flag
value as the old one.
crash> kmem -v
VMAP_AREA VM_STRUCT ADDRESS RANGE SIZE
crash>
To fix this, also check if vmap_area.purge_list exists, which was
introduced with the flags and removed later, to determine that the flags
member is the old one.
Related vmap_area history:
v2.6.28 db64fe02258f introduced vmap_area with flags and purge_list
v5.4 688fcbfc06e4 removed flags
v5.11 96e2db456135 removed purge_list
v6.3 869176a09606 added flags again
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 1 +
memory.c | 4 +++-
symbols.c | 1 +
3 files changed, 5 insertions(+), 1 deletion(-)
diff --git a/defs.h b/defs.h
index 21cc760444d1..bfa07c3f5150 100644
--- a/defs.h
+++ b/defs.h
@@ -2216,6 +2216,7 @@ struct offset_table { /* stash of commonly-used offsets */
long in6_addr_in6_u;
long kset_kobj;
long subsys_private_subsys;
+ long vmap_area_purge_list;
};
struct size_table { /* stash of commonly-used sizes */
diff --git a/memory.c b/memory.c
index 953fc380c03c..15fa8b2f08f1 100644
--- a/memory.c
+++ b/memory.c
@@ -429,6 +429,7 @@ vm_init(void)
MEMBER_OFFSET_INIT(vmap_area_vm, "vmap_area", "vm");
if (INVALID_MEMBER(vmap_area_vm))
MEMBER_OFFSET_INIT(vmap_area_vm, "vmap_area", "private");
+ MEMBER_OFFSET_INIT(vmap_area_purge_list, "vmap_area", "purge_list");
STRUCT_SIZE_INIT(vmap_area, "vmap_area");
if (VALID_MEMBER(vmap_area_va_start) &&
VALID_MEMBER(vmap_area_va_end) &&
@@ -9063,7 +9064,8 @@ dump_vmap_area(struct meminfo *vi)
readmem(ld->list_ptr[i], KVADDR, vmap_area_buf,
SIZE(vmap_area), "vmap_area struct", FAULT_ON_ERROR);
- if (VALID_MEMBER(vmap_area_flags)) {
+ if (VALID_MEMBER(vmap_area_flags) &&
+ VALID_MEMBER(vmap_area_purge_list)) {
flags = ULONG(vmap_area_buf + OFFSET(vmap_area_flags));
if (flags != VM_VM_AREA)
continue;
diff --git a/symbols.c b/symbols.c
index f0721023816d..7b1d59203b90 100644
--- a/symbols.c
+++ b/symbols.c
@@ -9169,6 +9169,7 @@ dump_offset_table(char *spec, ulong makestruct)
OFFSET(vmap_area_vm));
fprintf(fp, " vmap_area_flags: %ld\n",
OFFSET(vmap_area_flags));
+ fprintf(fp, " vmap_area_purge_list: %ld\n", OFFSET(vmap_area_purge_list));
fprintf(fp, " module_size_of_struct: %ld\n",
OFFSET(module_size_of_struct));
--
2.37.1

View File

@ -0,0 +1,225 @@
From a0eceb041dfa248d66f9f9a455106184b7823bec Mon Sep 17 00:00:00 2001
From: Rongwei Wang <rongwei.wang@linux.alibaba.com>
Date: Mon, 29 May 2023 19:55:51 +0800
Subject: [PATCH 04/30] arm64/x86_64: Enhance "vtop" command to show zero_pfn
information
Enhance the "vtop" command to show "ZERO PAGE" information when PTE or
PMD has attached to {huge_}zero_pfn. For example:
crash> vtop -c 13674 ffff8917e000
VIRTUAL PHYSICAL
ffff8917e000 836e71000
PAGE DIRECTORY: ffff000802f8d000
PGD: ffff000802f8dff8 => 884e29003
PUD: ffff000844e29ff0 => 884e93003
PMD: ffff000844e93240 => 840413003
PTE: ffff000800413bf0 => 160000836e71fc3
PAGE: 836e71000 (ZERO PAGE)
...
Hugepage case:
crash> vtop -c 14538 ffff95800000
VIRTUAL PHYSICAL
ffff95800000 910c00000
PAGE DIRECTORY: ffff000801fa0000
PGD: ffff000801fa0ff8 => 884f53003
PUD: ffff000844f53ff0 => 8426cb003
PMD: ffff0008026cb560 => 60000910c00fc1
PAGE: 910c00000 (2MB, ZERO PAGE)
...
Note that
1. support displaying zero page only for THP (except for 1G THP)
2. do not support hugetlb cases.
Signed-off-by: Rongwei Wang <rongwei.wang@linux.alibaba.com>
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
arm64.c | 24 ++++++++++++++++--------
defs.h | 5 +++++
memory.c | 23 +++++++++++++++++++++++
x86_64.c | 9 +++++----
4 files changed, 49 insertions(+), 12 deletions(-)
diff --git a/arm64.c b/arm64.c
index 56fb841f43f8..efbdccbec9d3 100644
--- a/arm64.c
+++ b/arm64.c
@@ -1787,7 +1787,8 @@ arm64_vtop_2level_64k(ulong pgd, ulong vaddr, physaddr_t *paddr, int verbose)
if ((pgd_val & PMD_TYPE_MASK) == PMD_TYPE_SECT) {
ulong sectionbase = (pgd_val & SECTION_PAGE_MASK_512MB) & PHYS_MASK;
if (verbose) {
- fprintf(fp, " PAGE: %lx (512MB)\n\n", sectionbase);
+ fprintf(fp, " PAGE: %lx (512MB%s)\n\n", sectionbase,
+ IS_ZEROPAGE(sectionbase) ? ", ZERO PAGE" : "");
arm64_translate_pte(pgd_val, 0, 0);
}
*paddr = sectionbase + (vaddr & ~SECTION_PAGE_MASK_512MB);
@@ -1806,7 +1807,8 @@ arm64_vtop_2level_64k(ulong pgd, ulong vaddr, physaddr_t *paddr, int verbose)
if (pte_val & PTE_VALID) {
*paddr = (PAGEBASE(pte_val) & PHYS_MASK) + PAGEOFFSET(vaddr);
if (verbose) {
- fprintf(fp, " PAGE: %lx\n\n", PAGEBASE(*paddr));
+ fprintf(fp, " PAGE: %lx %s\n\n", PAGEBASE(*paddr),
+ IS_ZEROPAGE(PAGEBASE(*paddr)) ? "(ZERO PAGE)" : "");
arm64_translate_pte(pte_val, 0, 0);
}
} else {
@@ -1859,7 +1861,8 @@ arm64_vtop_3level_64k(ulong pgd, ulong vaddr, physaddr_t *paddr, int verbose)
if ((pmd_val & PMD_TYPE_MASK) == PMD_TYPE_SECT) {
ulong sectionbase = PTE_TO_PHYS(pmd_val) & SECTION_PAGE_MASK_512MB;
if (verbose) {
- fprintf(fp, " PAGE: %lx (512MB)\n\n", sectionbase);
+ fprintf(fp, " PAGE: %lx (512MB%s)\n\n", sectionbase,
+ IS_ZEROPAGE(sectionbase) ? ", ZERO PAGE" : "");
arm64_translate_pte(pmd_val, 0, 0);
}
*paddr = sectionbase + (vaddr & ~SECTION_PAGE_MASK_512MB);
@@ -1878,7 +1881,8 @@ arm64_vtop_3level_64k(ulong pgd, ulong vaddr, physaddr_t *paddr, int verbose)
if (pte_val & PTE_VALID) {
*paddr = PTE_TO_PHYS(pte_val) + PAGEOFFSET(vaddr);
if (verbose) {
- fprintf(fp, " PAGE: %lx\n\n", PAGEBASE(*paddr));
+ fprintf(fp, " PAGE: %lx %s\n\n", PAGEBASE(*paddr),
+ IS_ZEROPAGE(PAGEBASE(*paddr)) ? "(ZERO PAGE)" : "");
arm64_translate_pte(pte_val, 0, 0);
}
} else {
@@ -1940,7 +1944,8 @@ arm64_vtop_3level_4k(ulong pgd, ulong vaddr, physaddr_t *paddr, int verbose)
if ((pmd_val & PMD_TYPE_MASK) == PMD_TYPE_SECT) {
ulong sectionbase = (pmd_val & SECTION_PAGE_MASK_2MB) & PHYS_MASK;
if (verbose) {
- fprintf(fp, " PAGE: %lx (2MB)\n\n", sectionbase);
+ fprintf(fp, " PAGE: %lx (2MB%s)\n\n", sectionbase,
+ IS_ZEROPAGE(sectionbase) ? ", ZERO PAGE" : "");
arm64_translate_pte(pmd_val, 0, 0);
}
*paddr = sectionbase + (vaddr & ~SECTION_PAGE_MASK_2MB);
@@ -1959,7 +1964,8 @@ arm64_vtop_3level_4k(ulong pgd, ulong vaddr, physaddr_t *paddr, int verbose)
if (pte_val & PTE_VALID) {
*paddr = (PAGEBASE(pte_val) & PHYS_MASK) + PAGEOFFSET(vaddr);
if (verbose) {
- fprintf(fp, " PAGE: %lx\n\n", PAGEBASE(*paddr));
+ fprintf(fp, " PAGE: %lx %s\n\n", PAGEBASE(*paddr),
+ IS_ZEROPAGE(PAGEBASE(*paddr)) ? "(ZERO PAGE)" : "");
arm64_translate_pte(pte_val, 0, 0);
}
} else {
@@ -2029,7 +2035,8 @@ arm64_vtop_4level_4k(ulong pgd, ulong vaddr, physaddr_t *paddr, int verbose)
if ((pmd_val & PMD_TYPE_MASK) == PMD_TYPE_SECT) {
ulong sectionbase = (pmd_val & SECTION_PAGE_MASK_2MB) & PHYS_MASK;
if (verbose) {
- fprintf(fp, " PAGE: %lx (2MB)\n\n", sectionbase);
+ fprintf(fp, " PAGE: %lx (2MB%s)\n\n", sectionbase,
+ IS_ZEROPAGE(sectionbase) ? ", ZERO PAGE" : "");
arm64_translate_pte(pmd_val, 0, 0);
}
*paddr = sectionbase + (vaddr & ~SECTION_PAGE_MASK_2MB);
@@ -2048,7 +2055,8 @@ arm64_vtop_4level_4k(ulong pgd, ulong vaddr, physaddr_t *paddr, int verbose)
if (pte_val & PTE_VALID) {
*paddr = (PAGEBASE(pte_val) & PHYS_MASK) + PAGEOFFSET(vaddr);
if (verbose) {
- fprintf(fp, " PAGE: %lx\n\n", PAGEBASE(*paddr));
+ fprintf(fp, " PAGE: %lx %s\n\n", PAGEBASE(*paddr),
+ IS_ZEROPAGE(PAGEBASE(*paddr)) ? "(ZERO PAGE)" : "");
arm64_translate_pte(pte_val, 0, 0);
}
} else {
diff --git a/defs.h b/defs.h
index bfa07c3f5150..7d8bb8ab3de1 100644
--- a/defs.h
+++ b/defs.h
@@ -2619,6 +2619,8 @@ struct vm_table { /* kernel VM-related data */
char *name;
} *pageflags_data;
ulong max_mem_section_nr;
+ ulong zero_paddr;
+ ulong huge_zero_paddr;
};
#define NODES (0x1)
@@ -3000,6 +3002,9 @@ struct load_module {
#define VIRTPAGEBASE(X) (((ulong)(X)) & (ulong)machdep->pagemask)
#define PHYSPAGEBASE(X) (((physaddr_t)(X)) & (physaddr_t)machdep->pagemask)
+#define IS_ZEROPAGE(paddr) ((paddr) == vt->zero_paddr || \
+ (paddr) == vt->huge_zero_paddr)
+
/*
* Sparse memory stuff
* These must follow the definitions in the kernel mmzone.h
diff --git a/memory.c b/memory.c
index 15fa8b2f08f1..ea3005a5c01f 100644
--- a/memory.c
+++ b/memory.c
@@ -1209,6 +1209,27 @@ vm_init(void)
machdep->memory_size()));
vt->paddr_prlen = strlen(buf);
+ vt->zero_paddr = ~0UL;
+ if (kernel_symbol_exists("zero_pfn")) {
+ ulong zero_pfn;
+
+ if (readmem(symbol_value("zero_pfn"), KVADDR,
+ &zero_pfn, sizeof(zero_pfn),
+ "read zero_pfn", QUIET|RETURN_ON_ERROR))
+ vt->zero_paddr = zero_pfn << PAGESHIFT();
+ }
+
+ vt->huge_zero_paddr = ~0UL;
+ if (kernel_symbol_exists("huge_zero_pfn")) {
+ ulong huge_zero_pfn;
+
+ if (readmem(symbol_value("huge_zero_pfn"), KVADDR,
+ &huge_zero_pfn, sizeof(huge_zero_pfn),
+ "read huge_zero_pfn", QUIET|RETURN_ON_ERROR) &&
+ huge_zero_pfn != ~0UL)
+ vt->huge_zero_paddr = huge_zero_pfn << PAGESHIFT();
+ }
+
if (vt->flags & PERCPU_KMALLOC_V1)
vt->dump_kmem_cache = dump_kmem_cache_percpu_v1;
else if (vt->flags & PERCPU_KMALLOC_V2)
@@ -14065,6 +14086,8 @@ dump_vm_table(int verbose)
} else {
fprintf(fp, " node_online_map: (unused)\n");
}
+ fprintf(fp, " zero_paddr: %lx\n", vt->zero_paddr);
+ fprintf(fp, " huge_zero_paddr: %lx\n", vt->huge_zero_paddr);
fprintf(fp, " nr_vm_stat_items: %d\n", vt->nr_vm_stat_items);
fprintf(fp, " vm_stat_items: %s", (vt->flags & VM_STAT) ?
"\n" : "(not used)\n");
diff --git a/x86_64.c b/x86_64.c
index 5019c69e452e..693a08bea758 100644
--- a/x86_64.c
+++ b/x86_64.c
@@ -2114,8 +2114,9 @@ x86_64_uvtop_level4(struct task_context *tc, ulong uvaddr, physaddr_t *paddr, in
goto no_upage;
if (pmd_pte & _PAGE_PSE) {
if (verbose) {
- fprintf(fp, " PAGE: %lx (2MB)\n\n",
- PAGEBASE(pmd_pte) & PHYSICAL_PAGE_MASK);
+ fprintf(fp, " PAGE: %lx (2MB%s)\n\n",
+ PAGEBASE(pmd_pte) & PHYSICAL_PAGE_MASK,
+ IS_ZEROPAGE(PAGEBASE(pmd_pte) & PHYSICAL_PAGE_MASK) ? ", ZERO PAGE" : "");
x86_64_translate_pte(pmd_pte, 0, 0);
}
@@ -2143,8 +2144,8 @@ x86_64_uvtop_level4(struct task_context *tc, ulong uvaddr, physaddr_t *paddr, in
*paddr = (PAGEBASE(pte) & PHYSICAL_PAGE_MASK) + PAGEOFFSET(uvaddr);
if (verbose) {
- fprintf(fp, " PAGE: %lx\n\n",
- PAGEBASE(*paddr) & PHYSICAL_PAGE_MASK);
+ fprintf(fp, " PAGE: %lx %s\n\n", PAGEBASE(*paddr) & PHYSICAL_PAGE_MASK,
+ IS_ZEROPAGE(PAGEBASE(*paddr) & PHYSICAL_PAGE_MASK) ? "(ZERO PAGE)" : "");
x86_64_translate_pte(pte, 0, 0);
}
--
2.37.1

View File

@ -0,0 +1,165 @@
From db8c030857b4e318728c51c20da687906c109d0d Mon Sep 17 00:00:00 2001
From: HATAYAMA Daisuke <d.hatayama@fujitsu.com>
Date: Tue, 30 May 2023 19:38:34 +0900
Subject: [PATCH 05/30] diskdump/netdump: fix segmentation fault caused by
failure of stopping CPUs
There's no NMI on ARM. Hence, stopping the non-panicking CPUs from the
panicking CPU via IPI can fail easily if interrupts are being masked
in those moment. Moreover, crash_notes are not initialized for such
unstopped CPUs and the corresponding NT_PRSTATUS notes are not
attached to vmcore. However, crash utility never takes it
consideration such uninitialized crash_notes and then ends with
mapping different NT_PRSTATUS to actually unstopped CPUs. This corrupt
mapping can result crash utility into segmentation fault in the
operations where register values in NT_PRSTATUS notes are used.
For example:
crash> bt 1408
PID: 1408 TASK: ffff000003e22200 CPU: 2 COMMAND: "repro"
Segmentation fault (core dumped)
crash> help -D
diskdump_data:
filename: 127.0.0.1-2023-05-26-02:21:27/vmcore-ld1
flags: 46 (KDUMP_CMPRS_LOCAL|ERROR_EXCLUDED|LZO_SUPPORTED)
...snip...
notes_buf: 1815df0
num_vmcoredd_notes: 0
num_prstatus_notes: 5
notes[0]: 1815df0 (NT_PRSTATUS)
si.signo: 0 si.code: 0 si.errno: 0
...snip...
PSTATE: 80400005 FPVALID: 00000000
notes[4]: 1808f10 (NT_PRSTATUS)
Segmentation fault (core dumped)
To fix this issue, let's map NT_PRSTATUS to some CPU only if the
corresponding crash_notes is checked to be initialized.
[ kh: moved existence check for crash_notes out of the loop ]
Signed-off-by: HATAYAMA Daisuke <d.hatayama@fujitsu.com>
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 1 +
diskdump.c | 45 ++++++++++++++++++++++++++++++++++++++++++++-
netdump.c | 7 ++++++-
3 files changed, 51 insertions(+), 2 deletions(-)
diff --git a/defs.h b/defs.h
index 7d8bb8ab3de1..6520d2f13f48 100644
--- a/defs.h
+++ b/defs.h
@@ -7118,6 +7118,7 @@ int dumpfile_is_split(void);
void show_split_dumpfiles(void);
void x86_process_elf_notes(void *, unsigned long);
void *diskdump_get_prstatus_percpu(int);
+int have_crash_notes(int cpu);
void map_cpus_to_prstatus_kdump_cmprs(void);
void diskdump_display_regs(int, FILE *);
void process_elf32_notes(void *, ulong);
diff --git a/diskdump.c b/diskdump.c
index 94bca4ded572..2c284ff3f97f 100644
--- a/diskdump.c
+++ b/diskdump.c
@@ -101,12 +101,54 @@ int dumpfile_is_split(void)
return KDUMP_SPLIT();
}
+int have_crash_notes(int cpu)
+{
+ ulong crash_notes, notes_ptr;
+ char *buf, *p;
+ Elf64_Nhdr *note = NULL;
+
+ if (!readmem(symbol_value("crash_notes"), KVADDR, &crash_notes,
+ sizeof(crash_notes), "crash_notes", RETURN_ON_ERROR)) {
+ error(WARNING, "cannot read \"crash_notes\"\n");
+ return FALSE;
+ }
+
+ if ((kt->flags & SMP) && (kt->flags & PER_CPU_OFF))
+ notes_ptr = crash_notes + kt->__per_cpu_offset[cpu];
+ else
+ notes_ptr = crash_notes;
+
+ buf = GETBUF(SIZE(note_buf));
+
+ if (!readmem(notes_ptr, KVADDR, buf,
+ SIZE(note_buf), "note_buf_t", RETURN_ON_ERROR)) {
+ error(WARNING, "cpu %d: cannot read NT_PRSTATUS note\n", cpu);
+ return FALSE;
+ }
+
+ note = (Elf64_Nhdr *)buf;
+ p = buf + sizeof(Elf64_Nhdr);
+
+ if (note->n_type != NT_PRSTATUS) {
+ error(WARNING, "cpu %d: invalid NT_PRSTATUS note (n_type != NT_PRSTATUS)\n", cpu);
+ return FALSE;
+ }
+
+ if (!STRNEQ(p, "CORE")) {
+ error(WARNING, "cpu %d: invalid NT_PRSTATUS note (name != \"CORE\")\n", cpu);
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
void
map_cpus_to_prstatus_kdump_cmprs(void)
{
void **nt_ptr;
int online, i, j, nrcpus;
size_t size;
+ int crash_notes_exists;
if (pc->flags2 & QEMU_MEM_DUMP_COMPRESSED) /* notes exist for all cpus */
goto resize_note_pointers;
@@ -129,9 +171,10 @@ map_cpus_to_prstatus_kdump_cmprs(void)
* Re-populate the array with the notes mapping to online cpus
*/
nrcpus = (kt->kernel_NR_CPUS ? kt->kernel_NR_CPUS : NR_CPUS);
+ crash_notes_exists = kernel_symbol_exists("crash_notes");
for (i = 0, j = 0; i < nrcpus; i++) {
- if (in_cpu_map(ONLINE_MAP, i)) {
+ if (in_cpu_map(ONLINE_MAP, i) && (!crash_notes_exists || have_crash_notes(i))) {
dd->nt_prstatus_percpu[i] = nt_ptr[j++];
dd->num_prstatus_notes =
MAX(dd->num_prstatus_notes, i+1);
diff --git a/netdump.c b/netdump.c
index 4eba66cecb55..61ddeaa08831 100644
--- a/netdump.c
+++ b/netdump.c
@@ -75,6 +75,7 @@ map_cpus_to_prstatus(void)
void **nt_ptr;
int online, i, j, nrcpus;
size_t size;
+ int crash_notes_exists;
if (pc->flags2 & QEMU_MEM_DUMP_ELF) /* notes exist for all cpus */
return;
@@ -97,10 +98,14 @@ map_cpus_to_prstatus(void)
* Re-populate the array with the notes mapping to online cpus
*/
nrcpus = (kt->kernel_NR_CPUS ? kt->kernel_NR_CPUS : NR_CPUS);
+ crash_notes_exists = kernel_symbol_exists("crash_notes");
for (i = 0, j = 0; i < nrcpus; i++) {
- if (in_cpu_map(ONLINE_MAP, i))
+ if (in_cpu_map(ONLINE_MAP, i) && (!crash_notes_exists || have_crash_notes(i))) {
nd->nt_prstatus_percpu[i] = nt_ptr[j++];
+ nd->num_prstatus_notes =
+ MAX(nd->num_prstatus_notes, i+1);
+ }
}
FREEBUF(nt_ptr);
--
2.37.1

View File

@ -0,0 +1,62 @@
From 9868ebc8e648e5791764a51567a23efae7170d9b Mon Sep 17 00:00:00 2001
From: HATAYAMA Daisuke <d.hatayama@fujitsu.com>
Date: Tue, 30 May 2023 19:38:35 +0900
Subject: [PATCH 06/30] Fix segfault in arm64_is_kernel_exception_frame() when
corrupt stack pointer address is given
Due to the corrupted mapping fixed by the previous commit,
arm64_is_kernel_exception_frame() can receive invalid stack pointer
address via the 2nd argument; different NT_PRSTATUS contains different
task's stack pointer address. However, macro STACK_OFFSET_TYPE() never
checks if a given address is within the range of the kernel stack of
the corresponding task and hence can result in referring to outside of
bt->stackbuf.
static int
arm64_is_kernel_exception_frame(struct bt_info *bt, ulong stkptr)
{
struct arm64_pt_regs *regs;
struct machine_specific *ms = machdep->machspec;
regs = (struct arm64_pt_regs *)&bt->stackbuf[(ulong)(STACK_OFFSET_TYPE(stkptr))];
=> if (INSTACK(regs->sp, bt) && INSTACK(regs->regs[29], bt) &&
!(regs->pstate & (0xffffffff00000000ULL | PSR_MODE32_BIT)) &&
is_kernel_text(regs->pc) &&
is_kernel_text(regs->regs[30] | ms->CONFIG_ARM64_KERNELPACMASK)) {
To fix this issue, check if the given stack pointer address points to
the range of the kernel stack of the corresponding task, and abort if
it turns out to be invalid.
Although the corrupted mapping has already been fixed, this fix is
still needed because corrupt stack pointer address can still be passed
here from different reasons. Consider, for example, that data on the
kernel stack can be modified abnormally due to any kernel bugs or
hardware issues.
Signed-off-by: HATAYAMA Daisuke <d.hatayama@fujitsu.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/defs.h b/defs.h
index 6520d2f13f48..11fdc17e60d0 100644
--- a/defs.h
+++ b/defs.h
@@ -976,7 +976,10 @@ struct bt_info {
#define STACK_OFFSET_TYPE(OFF) \
(((ulong)(OFF) > STACKSIZE()) ? \
- (ulong)((ulong)(OFF) - (ulong)(bt->stackbase)) : (ulong)(OFF))
+ (((ulong)(OFF) < (ulong)(bt->stackbase) || (ulong)(OFF) >= (ulong)(bt->stackbase) + STACKSIZE()) ? \
+ error(FATAL, "invalid stack pointer is given\n") : \
+ (ulong)((ulong)(OFF) - (ulong)(bt->stackbase))) : \
+ (ulong)(OFF))
#define GET_STACK_ULONG(OFF) \
*((ulong *)((char *)(&bt->stackbuf[(ulong)(STACK_OFFSET_TYPE(OFF))])))
--
2.37.1

View File

@ -0,0 +1,71 @@
From 8527bbff71cbdfd90a67d5cec4a1d94156e6bf13 Mon Sep 17 00:00:00 2001
From: Hsin-Yi Wang <hsinyi@chromium.org>
Date: Wed, 31 May 2023 14:01:36 +0800
Subject: [PATCH 07/30] Output prompt when stdin is not a TTY
When stdin is not a TTY, prompt ("crash> ") won't be displayed. If
another process interact with crash with piped stdin/stdout, it will not
get the prompt as a delimiter.
Compared to other debugger like gdb, crash seems intended to give a
prompt in this case in the beginning of process_command_line(). It
checks if pc->flags does NOT have any of
READLINE|SILENT|CMDLINE_IFILE|RCHOME_IFILE|RCLOCAL_IFILE, a
prompt should be printed. The check will never be true since READLINE is
set in setup_environment() unconditionally.
It makes more sense to change the READLINE flag in the check to TTY
instead. Besides this change, the prompt in process_command_line() should
only be print when it's not in the middle of processing the input file
recovering from a previous FATAL command, because the prompt will be
displayed by the exec_input_file().
Additionally, when stdin is not TTY, repeat the command line from user
after prompt, which can give more context.
The prompt and command line can be opt out by using the silent (-s) flag.
Signed-off-by: Hsin-Yi Wang <hsinyi@chromium.org>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
cmdline.c | 14 +++++++++-----
1 file changed, 9 insertions(+), 5 deletions(-)
diff --git a/cmdline.c b/cmdline.c
index ded6551c2597..b7f919ae2279 100644
--- a/cmdline.c
+++ b/cmdline.c
@@ -64,8 +64,8 @@ process_command_line(void)
fp = stdout;
BZERO(pc->command_line, BUFSIZE);
- if (!(pc->flags &
- (READLINE|SILENT|CMDLINE_IFILE|RCHOME_IFILE|RCLOCAL_IFILE)))
+ if (!pc->ifile_in_progress && !(pc->flags &
+ (TTY|SILENT|CMDLINE_IFILE|RCHOME_IFILE|RCLOCAL_IFILE)))
fprintf(fp, "%s", pc->prompt);
fflush(fp);
@@ -136,12 +136,16 @@ process_command_line(void)
add_history(pc->command_line);
check_special_handling(pc->command_line);
- } else {
- if (fgets(pc->command_line, BUFSIZE-1, stdin) == NULL)
+ } else {
+ if (fgets(pc->command_line, BUFSIZE-1, stdin) == NULL)
clean_exit(1);
+ if (!(pc->flags & SILENT)) {
+ fprintf(fp, "%s", pc->command_line);
+ fflush(fp);
+ }
clean_line(pc->command_line);
strcpy(pc->orig_line, pc->command_line);
- }
+ }
/*
* First clean out all linefeeds and leading/trailing spaces.
--
2.37.1

View File

@ -0,0 +1,345 @@
From 77d8621876c1c6a3a25b91e464ba588a542485fb Mon Sep 17 00:00:00 2001
From: Kazuhito Hagio <k-hagio-ab@nec.com>
Date: Thu, 18 May 2023 16:53:54 +0900
Subject: [PATCH 08/30] x86_64: Fix "bt" command printing stale entries on
Linux 6.4 and later
Kernel commit fb799447ae29 ("x86,objtool: Split UNWIND_HINT_EMPTY in
two"), which is contained in Linux 6.4 and later kernels, changed
ORC_TYPE_CALL macro from 0 to 2. As a result, the "bt" command cannot
use ORC entries, and can display stale entries in a call trace.
crash> bt 1
PID: 1 TASK: ffff93cd06294180 CPU: 51 COMMAND: "systemd"
#0 [ffffb72bc00cbc98] __schedule at ffffffff86e52aae
#1 [ffffb72bc00cbd00] schedule at ffffffff86e52f6a
#2 [ffffb72bc00cbd18] schedule_hrtimeout_range_clock at ffffffff86e58ef5
#3 [ffffb72bc00cbd88] ep_poll at ffffffff8669624d
#4 [ffffb72bc00cbe28] do_epoll_wait at ffffffff86696371
#5 [ffffb72bc00cbe30] do_timerfd_settime at ffffffff8669902b <<
#6 [ffffb72bc00cbe60] __x64_sys_epoll_wait at ffffffff86696bf0
#7 [ffffb72bc00cbeb0] do_syscall_64 at ffffffff86e3feb9
#8 [ffffb72bc00cbee0] __task_pid_nr_ns at ffffffff863330d7 <<
#9 [ffffb72bc00cbf08] syscall_exit_to_user_mode at ffffffff86e466b2 << stale entries
#10 [ffffb72bc00cbf18] do_syscall_64 at ffffffff86e3fec9 <<
#11 [ffffb72bc00cbf50] entry_SYSCALL_64_after_hwframe at ffffffff870000aa
Also, kernel commit ffb1b4a41016 added a member to struct orc_entry.
Although this does not affect the crash's unwinder, its debugging
information can be displayed incorrectly.
To fix these,
(1) introduce "kernel_orc_entry_6_4" structure corresponding to 6.4 and
abstruction layer "orc_entry" structure in crash,
(2) switch ORC_TYPE_CALL to 2 or 0 with kernel's orc_entry structure.
Related orc_entry history:
v4.14 39358a033b2e introduced struct orc_entry
v4.19 d31a580266ee added orc_entry.end member
v6.3 ffb1b4a41016 added orc_entry.signal member
v6.4 fb799447ae29 removed end member and changed type member to 3 bits
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 28 ++++++++++++-
x86_64.c | 119 +++++++++++++++++++++++++++++++++++++++++++------------
2 files changed, 119 insertions(+), 28 deletions(-)
diff --git a/defs.h b/defs.h
index 11fdc17e60d0..bfda0c48d37b 100644
--- a/defs.h
+++ b/defs.h
@@ -6363,9 +6363,29 @@ typedef struct __attribute__((__packed__)) {
unsigned int sp_reg:4;
unsigned int bp_reg:4;
unsigned int type:2;
+ unsigned int signal:1;
unsigned int end:1;
} kernel_orc_entry;
+typedef struct __attribute__((__packed__)) {
+ signed short sp_offset;
+ signed short bp_offset;
+ unsigned int sp_reg:4;
+ unsigned int bp_reg:4;
+ unsigned int type:3;
+ unsigned int signal:1;
+} kernel_orc_entry_6_4;
+
+typedef struct orc_entry {
+ signed short sp_offset;
+ signed short bp_offset;
+ unsigned int sp_reg;
+ unsigned int bp_reg;
+ unsigned int type;
+ unsigned int signal;
+ unsigned int end;
+} orc_entry;
+
struct ORC_data {
int module_ORC;
uint lookup_num_blocks;
@@ -6376,10 +6396,13 @@ struct ORC_data {
ulong orc_lookup;
ulong ip_entry;
ulong orc_entry;
- kernel_orc_entry kernel_orc_entry;
+ orc_entry orc_entry_data;
+ int has_signal;
+ int has_end;
};
-#define ORC_TYPE_CALL 0
+#define ORC_TYPE_CALL ((machdep->flags & ORC_6_4) ? 2 : 0)
+/* The below entries are not used and must be updated if we use them. */
#define ORC_TYPE_REGS 1
#define ORC_TYPE_REGS_IRET 2
#define UNWIND_HINT_TYPE_SAVE 3
@@ -6456,6 +6479,7 @@ struct machine_specific {
#define ORC (0x4000)
#define KPTI (0x8000)
#define L1TF (0x10000)
+#define ORC_6_4 (0x20000)
#define VM_FLAGS (VM_ORIG|VM_2_6_11|VM_XEN|VM_XEN_RHEL4|VM_5LEVEL)
diff --git a/x86_64.c b/x86_64.c
index 693a08bea758..87e87ae6e1e8 100644
--- a/x86_64.c
+++ b/x86_64.c
@@ -132,9 +132,9 @@ static void GART_init(void);
static void x86_64_exception_stacks_init(void);
static int in_START_KERNEL_map(ulong);
static ulong orc_ip(ulong);
-static kernel_orc_entry *__orc_find(ulong, ulong, uint, ulong);
-static kernel_orc_entry *orc_find(ulong);
-static kernel_orc_entry *orc_module_find(ulong);
+static orc_entry *__orc_find(ulong, ulong, uint, ulong);
+static orc_entry *orc_find(ulong);
+static orc_entry *orc_module_find(ulong);
static ulong ip_table_to_vaddr(ulong);
static void orc_dump(ulong);
@@ -806,6 +806,8 @@ x86_64_dump_machdep_table(ulong arg)
fprintf(fp, "%sFRAMESIZE_DEBUG", others++ ? "|" : "");
if (machdep->flags & ORC)
fprintf(fp, "%sORC", others++ ? "|" : "");
+ if (machdep->flags & ORC_6_4)
+ fprintf(fp, "%sORC_6_4", others++ ? "|" : "");
if (machdep->flags & FRAMEPOINTER)
fprintf(fp, "%sFRAMEPOINTER", others++ ? "|" : "");
if (machdep->flags & GART_REGION)
@@ -980,6 +982,8 @@ x86_64_dump_machdep_table(ulong arg)
fprintf(fp, " ORC_data: %s", machdep->flags & ORC ? "\n" : "(unused)\n");
if (machdep->flags & ORC) {
fprintf(fp, " module_ORC: %s\n", ms->orc.module_ORC ? "TRUE" : "FALSE");
+ fprintf(fp, " has_signal: %s\n", ms->orc.has_signal ? "TRUE" : "FALSE");
+ fprintf(fp, " has_end: %s\n", ms->orc.has_end ? "TRUE" : "FALSE");
fprintf(fp, " lookup_num_blocks: %d\n", ms->orc.lookup_num_blocks);
fprintf(fp, " __start_orc_unwind_ip: %lx\n", ms->orc.__start_orc_unwind_ip);
fprintf(fp, " __stop_orc_unwind_ip: %lx\n", ms->orc.__stop_orc_unwind_ip);
@@ -988,14 +992,18 @@ x86_64_dump_machdep_table(ulong arg)
fprintf(fp, " orc_lookup: %lx\n", ms->orc.orc_lookup);
fprintf(fp, " ip_entry: %lx\n", ms->orc.ip_entry);
fprintf(fp, " orc_entry: %lx\n", ms->orc.orc_entry);
- fprintf(fp, " kernel_orc_entry:\n");
- fprintf(fp, " sp_offset: %d\n", ms->orc.kernel_orc_entry.sp_offset);
- fprintf(fp, " bp_offset: %d\n", ms->orc.kernel_orc_entry.bp_offset);
- fprintf(fp, " sp_reg: %d\n", ms->orc.kernel_orc_entry.sp_reg);
- fprintf(fp, " bp_reg: %d\n", ms->orc.kernel_orc_entry.bp_reg);
- fprintf(fp, " type: %d\n", ms->orc.kernel_orc_entry.type);
- if (MEMBER_EXISTS("orc_entry", "end"))
- fprintf(fp, " end: %d\n", ms->orc.kernel_orc_entry.end);
+ fprintf(fp, " orc_entry_data:\n");
+ fprintf(fp, " sp_offset: %d\n", ms->orc.orc_entry_data.sp_offset);
+ fprintf(fp, " bp_offset: %d\n", ms->orc.orc_entry_data.bp_offset);
+ fprintf(fp, " sp_reg: %d\n", ms->orc.orc_entry_data.sp_reg);
+ fprintf(fp, " bp_reg: %d\n", ms->orc.orc_entry_data.bp_reg);
+ fprintf(fp, " type: %d\n", ms->orc.orc_entry_data.type);
+ if (ms->orc.has_signal)
+ fprintf(fp, " signal: %d\n", ms->orc.orc_entry_data.signal);
+ else
+ fprintf(fp, " signal: (n/a)\n");
+ if (ms->orc.has_end)
+ fprintf(fp, " end: %d\n", ms->orc.orc_entry_data.end);
else
fprintf(fp, " end: (n/a)\n");
}
@@ -6440,6 +6448,12 @@ x86_64_ORC_init(void)
MEMBER_OFFSET_INIT(inactive_task_frame_bp, "inactive_task_frame", "bp");
MEMBER_OFFSET_INIT(inactive_task_frame_ret_addr, "inactive_task_frame", "ret_addr");
+ orc->has_signal = MEMBER_EXISTS("orc_entry", "signal"); /* added at 6.3 */
+ orc->has_end = MEMBER_EXISTS("orc_entry", "end"); /* removed at 6.4 */
+
+ if (orc->has_signal && !orc->has_end)
+ machdep->flags |= ORC_6_4;
+
machdep->flags |= ORC;
}
@@ -8522,7 +8536,7 @@ x86_64_get_framesize(struct bt_info *bt, ulong textaddr, ulong rsp, char *stack_
int reterror;
int arg_exists;
int exception;
- kernel_orc_entry *korc;
+ orc_entry *korc;
if (!(bt->flags & BT_FRAMESIZE_DEBUG)) {
if ((bt->flags & BT_FRAMESIZE_IGNORE_MASK) ||
@@ -8608,11 +8622,14 @@ x86_64_get_framesize(struct bt_info *bt, ulong textaddr, ulong rsp, char *stack_
if ((machdep->flags & ORC) && (korc = orc_find(textaddr))) {
if (CRASHDEBUG(1)) {
+ struct ORC_data *orc = &machdep->machspec->orc;
fprintf(fp,
"rsp: %lx textaddr: %lx -> spo: %d bpo: %d spr: %d bpr: %d type: %d",
rsp, textaddr, korc->sp_offset, korc->bp_offset,
korc->sp_reg, korc->bp_reg, korc->type);
- if (MEMBER_EXISTS("orc_entry", "end"))
+ if (orc->has_signal)
+ fprintf(fp, " signal: %d", korc->signal);
+ if (orc->has_end)
fprintf(fp, " end: %d", korc->end);
fprintf(fp, "\n");
}
@@ -9118,7 +9135,53 @@ orc_ip(ulong ip)
return (ip + ip_entry);
}
-static kernel_orc_entry *
+static orc_entry *
+orc_get_entry(struct ORC_data *orc)
+{
+ struct orc_entry *entry = &orc->orc_entry_data;
+
+ if (machdep->flags & ORC_6_4) {
+ kernel_orc_entry_6_4 korc;
+
+ if (!readmem(orc->orc_entry, KVADDR, &korc, sizeof(kernel_orc_entry_6_4),
+ "kernel orc_entry", RETURN_ON_ERROR|QUIET))
+ return NULL;
+
+ entry->sp_offset = korc.sp_offset;
+ entry->bp_offset = korc.bp_offset;
+ entry->sp_reg = korc.sp_reg;
+ entry->bp_reg = korc.bp_reg;
+ entry->type = korc.type;
+ entry->signal = korc.signal;
+ } else {
+ kernel_orc_entry korc;
+
+ if (!readmem(orc->orc_entry, KVADDR, &korc, sizeof(kernel_orc_entry),
+ "kernel orc_entry", RETURN_ON_ERROR|QUIET))
+ return NULL;
+
+ entry->sp_offset = korc.sp_offset;
+ entry->bp_offset = korc.bp_offset;
+ entry->sp_reg = korc.sp_reg;
+ entry->bp_reg = korc.bp_reg;
+ entry->type = korc.type;
+ if (orc->has_end) {
+ /*
+ * orc_entry.signal was inserted before orc_entry.end.
+ * see ffb1b4a41016.
+ */
+ if (orc->has_signal) {
+ entry->signal = korc.signal;
+ entry->end = korc.end;
+ } else
+ entry->end = korc.signal; /* on purpose */
+ }
+ }
+
+ return entry;
+}
+
+static orc_entry *
__orc_find(ulong ip_table_ptr, ulong u_table_ptr, uint num_entries, ulong ip)
{
int index;
@@ -9128,7 +9191,7 @@ __orc_find(ulong ip_table_ptr, ulong u_table_ptr, uint num_entries, ulong ip)
int *ip_table = (int *)ip_table_ptr;
struct ORC_data *orc = &machdep->machspec->orc;
ulong vaddr;
- kernel_orc_entry *korc;
+ orc_entry *korc;
if (CRASHDEBUG(2)) {
int i, ip_entry;
@@ -9172,18 +9235,20 @@ __orc_find(ulong ip_table_ptr, ulong u_table_ptr, uint num_entries, ulong ip)
orc->ip_entry = (ulong)found;
orc->orc_entry = u_table_ptr + (index * SIZE(orc_entry));
- if (!readmem(orc->orc_entry, KVADDR, &orc->kernel_orc_entry,
- sizeof(kernel_orc_entry), "kernel orc_entry", RETURN_ON_ERROR|QUIET))
+
+ if (!orc_get_entry(orc))
return NULL;
- korc = &orc->kernel_orc_entry;
+ korc = &orc->orc_entry_data;
if (CRASHDEBUG(2)) {
fprintf(fp, " found: %lx index: %d\n", (ulong)found, index);
fprintf(fp,
" orc_entry: %lx sp_offset: %d bp_offset: %d sp_reg: %d bp_reg: %d type: %d",
orc->orc_entry, korc->sp_offset, korc->bp_offset, korc->sp_reg, korc->bp_reg, korc->type);
- if (MEMBER_EXISTS("orc_entry", "end"))
+ if (orc->has_signal)
+ fprintf(fp, " signal: %d", korc->signal);
+ if (orc->has_end)
fprintf(fp, " end: %d", korc->end);
fprintf(fp, "\n");
}
@@ -9196,7 +9261,7 @@ __orc_find(ulong ip_table_ptr, ulong u_table_ptr, uint num_entries, ulong ip)
#define LOOKUP_START_IP (unsigned long)kt->stext
#define LOOKUP_STOP_IP (unsigned long)kt->etext
-static kernel_orc_entry *
+static orc_entry *
orc_find(ulong ip)
{
unsigned int idx, start, stop;
@@ -9266,7 +9331,7 @@ orc_find(ulong ip)
orc->__start_orc_unwind + (start * SIZE(orc_entry)), stop - start, ip);
}
-static kernel_orc_entry *
+static orc_entry *
orc_module_find(ulong ip)
{
struct load_module *lm;
@@ -9313,7 +9378,7 @@ static void
orc_dump(ulong ip)
{
struct ORC_data *orc = &machdep->machspec->orc;
- kernel_orc_entry *korc;
+ orc_entry *korc;
ulong vaddr, offset;
struct syment *sp, *orig;
@@ -9336,13 +9401,15 @@ next_in_func:
fprintf(fp, "%s+%ld -> ", sp->name, offset);
else
fprintf(fp, "(unresolved) -> ");
- if (!readmem(orc->orc_entry, KVADDR, &orc->kernel_orc_entry, sizeof(kernel_orc_entry),
- "kernel orc_entry", RETURN_ON_ERROR))
+
+ if (!orc_get_entry(orc))
error(FATAL, "cannot read orc_entry\n");
- korc = &orc->kernel_orc_entry;
+ korc = &orc->orc_entry_data;
fprintf(fp, "orc: %lx spo: %d bpo: %d spr: %d bpr: %d type: %d",
orc->orc_entry, korc->sp_offset, korc->bp_offset, korc->sp_reg, korc->bp_reg, korc->type);
- if (MEMBER_EXISTS("orc_entry", "end"))
+ if (orc->has_signal)
+ fprintf(fp, " signal: %d", korc->signal);
+ if (orc->has_end)
fprintf(fp, " end: %d", korc->end);
fprintf(fp, "\n");
--
2.37.1

View File

@ -0,0 +1,48 @@
From ec1e61b33a705b8be8d116a541c7b076b0429deb Mon Sep 17 00:00:00 2001
From: Lianbo Jiang <lijiang@redhat.com>
Date: Mon, 12 Jun 2023 18:50:05 +0800
Subject: [PATCH 09/30] Fix invalid structure size error during crash startup
on ppc64
The crash utility will fail to start session on ppc64 with the following
error:
# crash vmlinux vmcore -s
crash: invalid structure size: note_buf
FILE: diskdump.c LINE: 121 FUNCTION: have_crash_notes()
[./crash] error trace: 101859ac => 10291798 => 10291450 => 10266038
10266038: SIZE_verify+156
10291450: have_crash_notes+308
10291798: map_cpus_to_prstatus_kdump_cmprs+448
101859ac: task_init+11980
The reason is that the size of note_buf is not initialized before using
SIZE(note_buf) in the have_crash_notes() on some architectures including
ppc64. Let's initialize it in task_init() to fix this issue.
Fixes: db8c030857b4 ("diskdump/netdump: fix segmentation fault caused by failure of stopping CPUs")
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
task.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/task.c b/task.c
index 88941c7b0e4d..2b7467b4193d 100644
--- a/task.c
+++ b/task.c
@@ -675,6 +675,9 @@ task_init(void)
tt->this_task = pid_to_task(active_pid);
}
else {
+ if (INVALID_SIZE(note_buf))
+ STRUCT_SIZE_INIT(note_buf, "note_buf_t");
+
if (KDUMP_DUMPFILE())
map_cpus_to_prstatus();
else if (ELF_NOTES_VALID() && DISKDUMP_DUMPFILE())
--
2.37.1

View File

@ -0,0 +1,69 @@
From 91a76958e4a8a9fb67ac61166ff36e8dc961b3b9 Mon Sep 17 00:00:00 2001
From: HATAYAMA Daisuke <d.hatayama@fujitsu.com>
Date: Wed, 7 Jun 2023 18:37:33 +0900
Subject: [PATCH 10/30] Revert "Fix segfault in
arm64_is_kernel_exception_frame() when corrupt stack pointer address is
given"
This reverts commit 9868ebc8e648e5791764a51567a23efae7170d9b.
The commit 9868ebc8e648e5791764a51567a23efae7170d9b causes the issue
that bt command fails to show backtraces for the tasks that is running
in the user mode at the moment of the kernel panic as follows:
crash> bt 1734
PID: 1734 TASK: ffff000000392200 CPU: 4 COMMAND: "insmod"
bt: invalid stack pointer is given
The root cause is that while the commit added a sanity check into
STACK_OFFSET_TYPE() to validate if a given candidate address of any
interrupt or exception frame is contained within the range of the
corresponding kernel stack, the premise that the STACK_OFFSET_TYPE()
should not return out-of-the-buffer address, is wrong.
Reexamining the relevant surrounding part of the backtracing code, it
looks to me now that the STACK_OFFSET_TYPE() is originally expected to
return an out-of-the-buffer address, like the address of the top of
the corresponding kernel stack, e.g. at here:
static int
arm64_in_kdump_text(struct bt_info *bt, struct arm64_stackframe *frame)
{
...
if (bt->flags & BT_USER_SPACE)
start = (ulong *)&bt->stackbuf[(ulong)(STACK_OFFSET_TYPE(bt->stacktop))];
else {
Note that the above bt 1734 aborts here.
Hence, the current implementation policy around STACK_OFFSET_TYPE()
looks that the caller side is responsible for understanding the fact
in advance and for avoiding making buffer overrun carefully.
To fix this issue, revert the commit.
Signed-off-by: HATAYAMA Daisuke <d.hatayama@fujitsu.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/defs.h b/defs.h
index bfda0c48d37b..3e7d6cfbc6a8 100644
--- a/defs.h
+++ b/defs.h
@@ -976,10 +976,7 @@ struct bt_info {
#define STACK_OFFSET_TYPE(OFF) \
(((ulong)(OFF) > STACKSIZE()) ? \
- (((ulong)(OFF) < (ulong)(bt->stackbase) || (ulong)(OFF) >= (ulong)(bt->stackbase) + STACKSIZE()) ? \
- error(FATAL, "invalid stack pointer is given\n") : \
- (ulong)((ulong)(OFF) - (ulong)(bt->stackbase))) : \
- (ulong)(OFF))
+ (ulong)((ulong)(OFF) - (ulong)(bt->stackbase)) : (ulong)(OFF))
#define GET_STACK_ULONG(OFF) \
*((ulong *)((char *)(&bt->stackbuf[(ulong)(STACK_OFFSET_TYPE(OFF))])))
--
2.37.1

View File

@ -0,0 +1,45 @@
From 6c8cd9b5dcf48221e5f75fc5850bb4719d77acce Mon Sep 17 00:00:00 2001
From: HATAYAMA Daisuke <d.hatayama@fujitsu.com>
Date: Wed, 7 Jun 2023 18:37:34 +0900
Subject: [PATCH 11/30] arm64: Fix again segfault in
arm64_is_kernel_exception_frame() when corrupt stack pointer address is given
This is the second trial from the commit
9868ebc8e648e5791764a51567a23efae7170d9b that was reverted at the
previous commit.
As described in the previous commit, result of STACK_OFFSET_TYPE() can
be an address out of bt->stackbuf and hence the address needs to be
checked prior to being referred to as an pt_regs object.
So, to fix the issue, let's check if stkptr points to within the range
of the kernel stack first.
[ kh: added a warning at Lianbo's suggestion ]
Signed-off-by: HATAYAMA Daisuke <d.hatayama@fujitsu.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
arm64.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/arm64.c b/arm64.c
index efbdccbec9d3..67b1a2244810 100644
--- a/arm64.c
+++ b/arm64.c
@@ -2381,6 +2381,12 @@ arm64_is_kernel_exception_frame(struct bt_info *bt, ulong stkptr)
struct arm64_pt_regs *regs;
struct machine_specific *ms = machdep->machspec;
+ if (stkptr > STACKSIZE() && !INSTACK(stkptr, bt)) {
+ if (CRASHDEBUG(1))
+ error(WARNING, "stkptr: %lx is outside the kernel stack range\n", stkptr);
+ return FALSE;
+ }
+
regs = (struct arm64_pt_regs *)&bt->stackbuf[(ulong)(STACK_OFFSET_TYPE(stkptr))];
if (INSTACK(regs->sp, bt) && INSTACK(regs->regs[29], bt) &&
--
2.37.1

View File

@ -0,0 +1,53 @@
From 8b24b2025fb4ae9bd6102bb054bd23987c35387e Mon Sep 17 00:00:00 2001
From: Likhitha Korrapati <likhitha@linux.ibm.com>
Date: Fri, 16 Jun 2023 17:25:19 +0530
Subject: [PATCH 12/30] ppc64: Remove redundant PTE checks
Remove redundant checks for PTE (Page Table Entry) because those
conditions are already covered.
if (!(pte & _PAGE_PRESENT)) {
...
return FALSE;
}
if (!pte)
return FALSE;
The second pte check is redundant because it holds true only when pte is
0. If pte is 0 then (!(pte & _PAGE_PRESENT)) is true and it will return
false. So there is no need for one more pte check.
Signed-off-by: Likhitha Korrapati <likhitha@linux.ibm.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
ppc64.c | 6 ------
1 file changed, 6 deletions(-)
diff --git a/ppc64.c b/ppc64.c
index b95a621d8fe4..fc34006f4863 100644
--- a/ppc64.c
+++ b/ppc64.c
@@ -968,9 +968,6 @@ ppc64_vtop(ulong vaddr, ulong *pgd, physaddr_t *paddr, int verbose)
return FALSE;
}
- if (!pte)
- return FALSE;
-
*paddr = PAGEBASE(PTOB(pte >> PTE_RPN_SHIFT_DEFAULT)) + PAGEOFFSET(vaddr);
if (verbose) {
@@ -1077,9 +1074,6 @@ ppc64_vtop_level4(ulong vaddr, ulong *level4, physaddr_t *paddr, int verbose)
return FALSE;
}
- if (!pte)
- return FALSE;
-
out:
if (hugepage_type) {
if (hugepage_type == 2) {
--
2.37.1

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,119 @@
From 88580068b7dd96bf679c82bdc05e146968ade10c Mon Sep 17 00:00:00 2001
From: Kazuhito Hagio <k-hagio-ab@nec.com>
Date: Fri, 23 Jun 2023 16:34:35 +0900
Subject: [PATCH 14/30] Fix failure of gathering task table on Linux 6.5-rc1
and later
Kernel commit b69f0aeb0689 ("pid: Replace struct pid 1-element array
with flex-array") changed pid.numbers[1] to pid.numbers[]. With this,
the size of struct pid does not contain the size of struct upid:
(gdb) ptype /o struct pid
/* offset | size */ type = struct pid {
/* 0 | 4 */ refcount_t count;
...
/* 96 | 0 */ struct upid numbers[];
^^^^ ^^^
/* total size (bytes): 96 */
} ^^^^
As a result, in refresh_xarray_task_table(), crash does not read the
data of pid.numbers[0].ns and cannot gather the task table correctly.
$ crash vmlinux vmcore
...
WARNING: active task ffff936992ad0000 on cpu 1 not found in PID hash
...
crash> ps -S
RU: 9
crash>
Increase the size of reading struct pid by SIZE(upid) in this case.
Signed-off-by: Kazuhito Hagio <k-hagio-ab@nec.com>
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 1 +
symbols.c | 3 +++
task.c | 10 ++++++++--
3 files changed, 12 insertions(+), 2 deletions(-)
diff --git a/defs.h b/defs.h
index 414853660dc1..8f7d1fa0aba6 100644
--- a/defs.h
+++ b/defs.h
@@ -2430,6 +2430,7 @@ struct array_table {
int task_struct_rlim;
int signal_struct_rlim;
int vm_numa_stat;
+ int pid_numbers;
};
/*
diff --git a/symbols.c b/symbols.c
index f161ee99e90a..82529a6785c9 100644
--- a/symbols.c
+++ b/symbols.c
@@ -9705,6 +9705,8 @@ builtin_array_length(char *s, int len, int *two_dim)
lenptr = &array_table.signal_struct_rlim;
else if (STREQ(s, "vm_numa_stat"))
lenptr = &array_table.vm_numa_stat;
+ else if (STREQ(s, "pid.numbers"))
+ lenptr = &array_table.pid_numbers;
if (!lenptr) /* not stored */
return(len);
@@ -12107,6 +12109,7 @@ dump_offset_table(char *spec, ulong makestruct)
ARRAY_LENGTH(signal_struct_rlim));
fprintf(fp, " vm_numa_stat: %d\n",
ARRAY_LENGTH(vm_numa_stat));
+ fprintf(fp, " pid_numbers: %d\n", ARRAY_LENGTH(pid_numbers));
if (spec) {
int in_size_table, in_array_table, arrays, offsets, sizes;
diff --git a/task.c b/task.c
index 2b7467b4193d..b9076da35565 100644
--- a/task.c
+++ b/task.c
@@ -352,6 +352,7 @@ task_init(void)
MEMBER_OFFSET_INIT(upid_ns, "upid", "ns");
MEMBER_OFFSET_INIT(upid_pid_chain, "upid", "pid_chain");
MEMBER_OFFSET_INIT(pid_numbers, "pid", "numbers");
+ ARRAY_LENGTH_INIT(len, pid_numbers, "pid.numbers", NULL, 0);
MEMBER_OFFSET_INIT(pid_tasks, "pid", "tasks");
tt->init_pid_ns = symbol_value("init_pid_ns");
}
@@ -2574,6 +2575,7 @@ refresh_xarray_task_table(void)
char *tp;
struct list_pair xp;
char *pidbuf;
+ long pid_size = SIZE(pid);
if (DUMPFILE() && (tt->flags & TASK_INIT_DONE)) /* impossible */
return;
@@ -2603,8 +2605,12 @@ refresh_xarray_task_table(void)
if (CRASHDEBUG(1))
console("xarray: count: %ld\n", count);
+ /* 6.5: b69f0aeb0689 changed pid.numbers[1] to numbers[] */
+ if (ARRAY_LENGTH(pid_numbers) == 0)
+ pid_size += SIZE(upid);
+
retries = 0;
- pidbuf = GETBUF(SIZE(pid));
+ pidbuf = GETBUF(pid_size);
retry_xarray:
if (retries && DUMPFILE())
@@ -2672,7 +2678,7 @@ retry_xarray:
* - get task from address of task->pids[0]
*/
if (!readmem(next, KVADDR, pidbuf,
- SIZE(pid), "pid", RETURN_ON_ERROR|QUIET)) {
+ pid_size, "pid", RETURN_ON_ERROR|QUIET)) {
error(INFO, "\ncannot read pid struct from xarray\n");
if (DUMPFILE())
continue;
--
2.37.1

View File

@ -0,0 +1,68 @@
From 4ee56105881d7bb1da1e668ac5bb47a4e0846676 Mon Sep 17 00:00:00 2001
From: Lianbo Jiang <lijiang@redhat.com>
Date: Wed, 5 Jul 2023 10:02:59 +0800
Subject: [PATCH 15/30] Fix compilation error due to new strlcpy function that
glibc added
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The crash-utility has its own strlcpy(), but recently the latest glibc
has also implemented the strlcpy function, which is derived from
OpenBSD. Eventually this caused the following compilation error:
# make -j8 lzo
...
In file included from global_data.c:18:
defs.h:5556:8: error: conflicting types for strlcpy; have size_t(char *, char *, size_t) {aka long unsigned int(char *, char *, long unsigned int)}
5556 | size_t strlcpy(char *, char *, size_t);
| ^~~~~~~
In file included from memory.c:19:
defs.h:5556:8: error: conflicting types for strlcpy; have size_t(char *, char *, size_t) {aka long unsigned int(char *, char *, long unsigned int)}
5556 | size_t strlcpy(char *, char *, size_t);
| ^~~~~~~
...
To fix the issue, let's declare the strlcpy() as a weak function and
keep the same parameter types as the glibc function has.
Related glibc commits:
454a20c8756c ("Implement strlcpy and strlcat [BZ #178]")
d2fda60e7c40 ("manual: Manual update for strlcat, strlcpy, wcslcat, wclscpy")
388ae538ddcb ("hurd: Add strlcpy, strlcat, wcslcpy, wcslcat to libc.abilist")
Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
---
defs.h | 2 +-
tools.c | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/defs.h b/defs.h
index 8f7d1fa0aba6..26afe232cc3e 100644
--- a/defs.h
+++ b/defs.h
@@ -5553,7 +5553,7 @@ uint32_t swap32(uint32_t, int);
uint64_t swap64(uint64_t, int);
ulong *get_cpumask_buf(void);
int make_cpumask(char *, ulong *, int, int *);
-size_t strlcpy(char *, char *, size_t);
+size_t strlcpy(char *, const char *, size_t) __attribute__ ((__weak__));
struct rb_node *rb_first(struct rb_root *);
struct rb_node *rb_parent(struct rb_node *, struct rb_node *);
struct rb_node *rb_right(struct rb_node *, struct rb_node *);
diff --git a/tools.c b/tools.c
index 392a79707e61..0f2db108838a 100644
--- a/tools.c
+++ b/tools.c
@@ -6795,7 +6795,7 @@ make_cpumask_error:
* always be NULL-terminated.
*/
size_t
-strlcpy(char *dest, char *src, size_t size)
+strlcpy(char *dest, const char *src, size_t size)
{
size_t ret = strlen(src);
--
2.37.1

View File

@ -0,0 +1,45 @@
From 6d0be1316aa3666895c0a8a0d3c98c235ec03bd4 Mon Sep 17 00:00:00 2001
From: Kazuhito Hagio <k-hagio-ab@nec.com>