diff --git a/0001-Fix-kernel-version-macros-for-revision-numbers-over-.patch b/0001-Fix-kernel-version-macros-for-revision-numbers-over-.patch new file mode 100644 index 0000000..3ca7cdc --- /dev/null +++ b/0001-Fix-kernel-version-macros-for-revision-numbers-over-.patch @@ -0,0 +1,52 @@ +From 040a56e9f9d0df15a2f8161ed3a0a907d70dda03 Mon Sep 17 00:00:00 2001 +From: Kazuhito Hagio +Date: Wed, 10 May 2023 16:09:03 +0900 +Subject: [PATCH 1/6] Fix kernel version macros for revision numbers over 255 + +The current comparison macros for kernel version shift minor number only +8 bits. This can cause an unexpected result on kernels with revision +number over 255, e.g. Linux 4.14.314. + +In fact, on Linux 4.14.314 for x86_64 without CONFIG_RANDOMIZE_BASE=y +(KASLR), the following condition became false in x86_64_init(). + + ((THIS_KERNEL_VERSION >= LINUX(4,14,84)) && + (THIS_KERNEL_VERSION < LINUX(4,15,0))) + +As a result, crash used a wrong hard-coded value for PAGE_OFFSET and +failed to start a session with the following seek error. + + crash: seek error: physical address: 200e000 type: "pud page" + +Shift the major and minor number by 24 and 16 bits respectively to fix +this issue. + +Reported-by: Luiz Capitulino +Tested-by: Luiz Capitulino +Signed-off-by: Kazuhito Hagio +Signed-off-by: Lianbo Jiang +--- + defs.h | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/defs.h b/defs.h +index 12ad6aaa0998..211fc9d55d33 100644 +--- a/defs.h ++++ b/defs.h +@@ -807,10 +807,10 @@ struct kernel_table { /* kernel data */ + } \ + } + +-#define THIS_KERNEL_VERSION ((kt->kernel_version[0] << 16) + \ +- (kt->kernel_version[1] << 8) + \ ++#define THIS_KERNEL_VERSION ((kt->kernel_version[0] << 24) + \ ++ (kt->kernel_version[1] << 16) + \ + (kt->kernel_version[2])) +-#define LINUX(x,y,z) (((uint)(x) << 16) + ((uint)(y) << 8) + (uint)(z)) ++#define LINUX(x,y,z) (((uint)(x) << 24) + ((uint)(y) << 16) + (uint)(z)) + + #define THIS_GCC_VERSION ((kt->gcc_version[0] << 16) + \ + (kt->gcc_version[1] << 8) + \ +-- +2.37.1 + diff --git a/0002-Fix-failure-of-dev-d-D-options-on-Linux-6.4-and-late.patch b/0002-Fix-failure-of-dev-d-D-options-on-Linux-6.4-and-late.patch new file mode 100644 index 0000000..89bc7d5 --- /dev/null +++ b/0002-Fix-failure-of-dev-d-D-options-on-Linux-6.4-and-late.patch @@ -0,0 +1,179 @@ +From 58c1816521c2e6bece3d69256b1866c9df8d93aa Mon Sep 17 00:00:00 2001 +From: Kazuhito Hagio +Date: Tue, 16 May 2023 08:59:50 +0900 +Subject: [PATCH 2/6] Fix failure of "dev -d|-D" options on Linux 6.4 and later + kernels + +Kernel commit 2df418cf4b72 ("driver core: class: remove subsystem +private pointer from struct class"), which is contained in Linux 6.4 and +later kernels, removed the class.p member for struct subsys_private. As +a result, the "dev -d|-D" options fail with the following error. + + dev: invalid structure member offset: class_p + FILE: dev.c LINE: 4689 FUNCTION: init_iter() + +Search the class_kset list for the subsys_private of block class to fix +this. + +As a preparation, introduce get_subsys_private() function, which is +abstracted from the same search procedure in init_memory_block(). + +Signed-off-by: Kazuhito Hagio +Signed-off-by: Lianbo Jiang +--- + defs.h | 1 + + dev.c | 20 +++++++++++++++++--- + memory.c | 35 +++-------------------------------- + tools.c | 43 +++++++++++++++++++++++++++++++++++++++++++ + 4 files changed, 64 insertions(+), 35 deletions(-) + +diff --git a/defs.h b/defs.h +index 211fc9d55d33..21cc760444d1 100644 +--- a/defs.h ++++ b/defs.h +@@ -5521,6 +5521,7 @@ struct rb_node *rb_left(struct rb_node *, struct rb_node *); + struct rb_node *rb_next(struct rb_node *); + struct rb_node *rb_last(struct rb_root *); + long percpu_counter_sum_positive(ulong fbc); ++ulong get_subsys_private(char *, char *); + + /* + * symbols.c +diff --git a/dev.c b/dev.c +index 75d30bd022a1..9d38aef9b3db 100644 +--- a/dev.c ++++ b/dev.c +@@ -4686,9 +4686,16 @@ init_iter(struct iter *i) + } else { + /* kernel version > 2.6.27, klist */ + unsigned long class_private_addr; +- readmem(block_class_addr + OFFSET(class_p), KVADDR, +- &class_private_addr, sizeof(class_private_addr), +- "class.p", FAULT_ON_ERROR); ++ ++ if (INVALID_MEMBER(class_p)) /* kernel version >= 6.4 */ ++ class_private_addr = get_subsys_private("class_kset", "block"); ++ else ++ readmem(block_class_addr + OFFSET(class_p), KVADDR, ++ &class_private_addr, sizeof(class_private_addr), ++ "class.p", FAULT_ON_ERROR); ++ ++ if (!class_private_addr) ++ error(FATAL, "cannot determine subsys_private for block.\n"); + + if (VALID_STRUCT(class_private)) { + /* 2.6.27 < kernel version <= 2.6.37-rc2 */ +@@ -4823,6 +4830,13 @@ void diskio_init(void) + if (INVALID_MEMBER(class_devices)) + MEMBER_OFFSET_INIT(class_devices, "class", "devices"); + MEMBER_OFFSET_INIT(class_p, "class", "p"); ++ if (INVALID_MEMBER(class_p)) { ++ MEMBER_OFFSET_INIT(kset_list, "kset", "list"); ++ MEMBER_OFFSET_INIT(kset_kobj, "kset", "kobj"); ++ MEMBER_OFFSET_INIT(kobject_name, "kobject", "name"); ++ MEMBER_OFFSET_INIT(kobject_entry, "kobject", "entry"); ++ MEMBER_OFFSET_INIT(subsys_private_subsys, "subsys_private", "subsys"); ++ } + MEMBER_OFFSET_INIT(class_private_devices, "class_private", + "class_devices"); + MEMBER_OFFSET_INIT(device_knode_class, "device", "knode_class"); +diff --git a/memory.c b/memory.c +index 0568f18eb9b7..953fc380c03c 100644 +--- a/memory.c ++++ b/memory.c +@@ -17865,38 +17865,9 @@ init_memory_block(int *klistcnt, ulong **klistbuf) + * v6.3-rc1 + * d2bf38c088e0 driver core: remove private pointer from struct bus_type + */ +- if (INVALID_MEMBER(bus_type_p)) { +- int i, cnt; +- char buf[32]; +- ulong bus_kset, list, name; +- +- BZERO(ld, sizeof(struct list_data)); +- +- get_symbol_data("bus_kset", sizeof(ulong), &bus_kset); +- readmem(bus_kset + OFFSET(kset_list), KVADDR, &list, +- sizeof(ulong), "bus_kset.list", FAULT_ON_ERROR); +- +- ld->flags |= LIST_ALLOCATE; +- ld->start = list; +- ld->end = bus_kset + OFFSET(kset_list); +- ld->list_head_offset = OFFSET(kobject_entry); +- +- cnt = do_list(ld); +- for (i = 0; i < cnt; i++) { +- readmem(ld->list_ptr[i] + OFFSET(kobject_name), KVADDR, &name, +- sizeof(ulong), "kobject.name", FAULT_ON_ERROR); +- read_string(name, buf, sizeof(buf)-1); +- if (CRASHDEBUG(1)) +- fprintf(fp, "kobject: %lx name: %s\n", ld->list_ptr[i], buf); +- if (STREQ(buf, "memory")) { +- /* entry is subsys_private.subsys.kobj. See bus_to_subsys(). */ +- private = ld->list_ptr[i] - OFFSET(kset_kobj) +- - OFFSET(subsys_private_subsys); +- break; +- } +- } +- FREEBUF(ld->list_ptr); +- } else { ++ if (INVALID_MEMBER(bus_type_p)) ++ private = get_subsys_private("bus_kset", "memory"); ++ else { + ulong memory_subsys = symbol_value("memory_subsys"); + readmem(memory_subsys + OFFSET(bus_type_p), KVADDR, &private, + sizeof(void *), "memory_subsys.private", FAULT_ON_ERROR); +diff --git a/tools.c b/tools.c +index c2cfa7e280bc..392a79707e61 100644 +--- a/tools.c ++++ b/tools.c +@@ -6963,3 +6963,46 @@ percpu_counter_sum_positive(ulong fbc) + + return (ret < 0) ? 0 : ret; + } ++ ++ulong ++get_subsys_private(char *kset_name, char *target_name) ++{ ++ ulong kset_addr, kset_list, name_addr, private = 0; ++ struct list_data list_data, *ld; ++ char buf[32]; ++ int i, cnt; ++ ++ if (!symbol_exists(kset_name)) ++ return 0; ++ ++ ld = &list_data; ++ BZERO(ld, sizeof(struct list_data)); ++ ++ get_symbol_data(kset_name, sizeof(ulong), &kset_addr); ++ readmem(kset_addr + OFFSET(kset_list), KVADDR, &kset_list, ++ sizeof(ulong), "kset.list", FAULT_ON_ERROR); ++ ++ ld->flags |= LIST_ALLOCATE; ++ ld->start = kset_list; ++ ld->end = kset_addr + OFFSET(kset_list); ++ ld->list_head_offset = OFFSET(kobject_entry); ++ ++ cnt = do_list(ld); ++ ++ for (i = 0; i < cnt; i++) { ++ readmem(ld->list_ptr[i] + OFFSET(kobject_name), KVADDR, &name_addr, ++ sizeof(ulong), "kobject.name", FAULT_ON_ERROR); ++ read_string(name_addr, buf, sizeof(buf)-1); ++ if (CRASHDEBUG(1)) ++ fprintf(fp, "kobject: %lx name: %s\n", ld->list_ptr[i], buf); ++ if (STREQ(buf, target_name)) { ++ /* entry is subsys_private.subsys.kobj. See bus_to_subsys(). */ ++ private = ld->list_ptr[i] - OFFSET(kset_kobj) ++ - OFFSET(subsys_private_subsys); ++ break; ++ } ++ } ++ FREEBUF(ld->list_ptr); ++ ++ return private; ++} +-- +2.37.1 + diff --git a/0003-Fix-kmem-v-option-displaying-no-regions-on-Linux-6.3.patch b/0003-Fix-kmem-v-option-displaying-no-regions-on-Linux-6.3.patch new file mode 100644 index 0000000..5a2f744 --- /dev/null +++ b/0003-Fix-kmem-v-option-displaying-no-regions-on-Linux-6.3.patch @@ -0,0 +1,86 @@ +From 342cf340ed0386880fe2a3115d6bef32eabb511b Mon Sep 17 00:00:00 2001 +From: Kazuhito Hagio +Date: Thu, 18 May 2023 11:48:28 +0900 +Subject: [PATCH 3/6] Fix "kmem -v" option displaying no regions on Linux 6.3 + and later + +Kernel commit 869176a09606 ("mm/vmalloc.c: add flags to mark vm_map_ram +area"), which is contained in Linux 6.3 and later, added "flags" member +to struct vmap_area. This was the revival of the "flags" member as +kernel commit 688fcbfc06e4 had eliminated it before. + +As a result, crash started to use the old procedure using the member and +displays no vmalloc'd regions, because it does not have the same flag +value as the old one. + + crash> kmem -v + VMAP_AREA VM_STRUCT ADDRESS RANGE SIZE + crash> + +To fix this, also check if vmap_area.purge_list exists, which was +introduced with the flags and removed later, to determine that the flags +member is the old one. + +Related vmap_area history: + v2.6.28 db64fe02258f introduced vmap_area with flags and purge_list + v5.4 688fcbfc06e4 removed flags + v5.11 96e2db456135 removed purge_list + v6.3 869176a09606 added flags again + +Signed-off-by: Kazuhito Hagio +Signed-off-by: Lianbo Jiang +--- + defs.h | 1 + + memory.c | 4 +++- + symbols.c | 1 + + 3 files changed, 5 insertions(+), 1 deletion(-) + +diff --git a/defs.h b/defs.h +index 21cc760444d1..bfa07c3f5150 100644 +--- a/defs.h ++++ b/defs.h +@@ -2216,6 +2216,7 @@ struct offset_table { /* stash of commonly-used offsets */ + long in6_addr_in6_u; + long kset_kobj; + long subsys_private_subsys; ++ long vmap_area_purge_list; + }; + + struct size_table { /* stash of commonly-used sizes */ +diff --git a/memory.c b/memory.c +index 953fc380c03c..15fa8b2f08f1 100644 +--- a/memory.c ++++ b/memory.c +@@ -429,6 +429,7 @@ vm_init(void) + MEMBER_OFFSET_INIT(vmap_area_vm, "vmap_area", "vm"); + if (INVALID_MEMBER(vmap_area_vm)) + MEMBER_OFFSET_INIT(vmap_area_vm, "vmap_area", "private"); ++ MEMBER_OFFSET_INIT(vmap_area_purge_list, "vmap_area", "purge_list"); + STRUCT_SIZE_INIT(vmap_area, "vmap_area"); + if (VALID_MEMBER(vmap_area_va_start) && + VALID_MEMBER(vmap_area_va_end) && +@@ -9063,7 +9064,8 @@ dump_vmap_area(struct meminfo *vi) + readmem(ld->list_ptr[i], KVADDR, vmap_area_buf, + SIZE(vmap_area), "vmap_area struct", FAULT_ON_ERROR); + +- if (VALID_MEMBER(vmap_area_flags)) { ++ if (VALID_MEMBER(vmap_area_flags) && ++ VALID_MEMBER(vmap_area_purge_list)) { + flags = ULONG(vmap_area_buf + OFFSET(vmap_area_flags)); + if (flags != VM_VM_AREA) + continue; +diff --git a/symbols.c b/symbols.c +index f0721023816d..7b1d59203b90 100644 +--- a/symbols.c ++++ b/symbols.c +@@ -9169,6 +9169,7 @@ dump_offset_table(char *spec, ulong makestruct) + OFFSET(vmap_area_vm)); + fprintf(fp, " vmap_area_flags: %ld\n", + OFFSET(vmap_area_flags)); ++ fprintf(fp, " vmap_area_purge_list: %ld\n", OFFSET(vmap_area_purge_list)); + + fprintf(fp, " module_size_of_struct: %ld\n", + OFFSET(module_size_of_struct)); +-- +2.37.1 + diff --git a/0004-arm64-x86_64-Enhance-vtop-command-to-show-zero_pfn-i.patch b/0004-arm64-x86_64-Enhance-vtop-command-to-show-zero_pfn-i.patch new file mode 100644 index 0000000..e3e9cae --- /dev/null +++ b/0004-arm64-x86_64-Enhance-vtop-command-to-show-zero_pfn-i.patch @@ -0,0 +1,225 @@ +From a0eceb041dfa248d66f9f9a455106184b7823bec Mon Sep 17 00:00:00 2001 +From: Rongwei Wang +Date: Mon, 29 May 2023 19:55:51 +0800 +Subject: [PATCH 4/6] arm64/x86_64: Enhance "vtop" command to show zero_pfn + information + +Enhance the "vtop" command to show "ZERO PAGE" information when PTE or +PMD has attached to {huge_}zero_pfn. For example: + + crash> vtop -c 13674 ffff8917e000 + VIRTUAL PHYSICAL + ffff8917e000 836e71000 + + PAGE DIRECTORY: ffff000802f8d000 + PGD: ffff000802f8dff8 => 884e29003 + PUD: ffff000844e29ff0 => 884e93003 + PMD: ffff000844e93240 => 840413003 + PTE: ffff000800413bf0 => 160000836e71fc3 + PAGE: 836e71000 (ZERO PAGE) + ... + +Hugepage case: + crash> vtop -c 14538 ffff95800000 + VIRTUAL PHYSICAL + ffff95800000 910c00000 + + PAGE DIRECTORY: ffff000801fa0000 + PGD: ffff000801fa0ff8 => 884f53003 + PUD: ffff000844f53ff0 => 8426cb003 + PMD: ffff0008026cb560 => 60000910c00fc1 + PAGE: 910c00000 (2MB, ZERO PAGE) + ... + +Note that +1. support displaying zero page only for THP (except for 1G THP) +2. do not support hugetlb cases. + +Signed-off-by: Rongwei Wang +Signed-off-by: Kazuhito Hagio +Signed-off-by: Lianbo Jiang +--- + arm64.c | 24 ++++++++++++++++-------- + defs.h | 5 +++++ + memory.c | 23 +++++++++++++++++++++++ + x86_64.c | 9 +++++---- + 4 files changed, 49 insertions(+), 12 deletions(-) + +diff --git a/arm64.c b/arm64.c +index 56fb841f43f8..efbdccbec9d3 100644 +--- a/arm64.c ++++ b/arm64.c +@@ -1787,7 +1787,8 @@ arm64_vtop_2level_64k(ulong pgd, ulong vaddr, physaddr_t *paddr, int verbose) + if ((pgd_val & PMD_TYPE_MASK) == PMD_TYPE_SECT) { + ulong sectionbase = (pgd_val & SECTION_PAGE_MASK_512MB) & PHYS_MASK; + if (verbose) { +- fprintf(fp, " PAGE: %lx (512MB)\n\n", sectionbase); ++ fprintf(fp, " PAGE: %lx (512MB%s)\n\n", sectionbase, ++ IS_ZEROPAGE(sectionbase) ? ", ZERO PAGE" : ""); + arm64_translate_pte(pgd_val, 0, 0); + } + *paddr = sectionbase + (vaddr & ~SECTION_PAGE_MASK_512MB); +@@ -1806,7 +1807,8 @@ arm64_vtop_2level_64k(ulong pgd, ulong vaddr, physaddr_t *paddr, int verbose) + if (pte_val & PTE_VALID) { + *paddr = (PAGEBASE(pte_val) & PHYS_MASK) + PAGEOFFSET(vaddr); + if (verbose) { +- fprintf(fp, " PAGE: %lx\n\n", PAGEBASE(*paddr)); ++ fprintf(fp, " PAGE: %lx %s\n\n", PAGEBASE(*paddr), ++ IS_ZEROPAGE(PAGEBASE(*paddr)) ? "(ZERO PAGE)" : ""); + arm64_translate_pte(pte_val, 0, 0); + } + } else { +@@ -1859,7 +1861,8 @@ arm64_vtop_3level_64k(ulong pgd, ulong vaddr, physaddr_t *paddr, int verbose) + if ((pmd_val & PMD_TYPE_MASK) == PMD_TYPE_SECT) { + ulong sectionbase = PTE_TO_PHYS(pmd_val) & SECTION_PAGE_MASK_512MB; + if (verbose) { +- fprintf(fp, " PAGE: %lx (512MB)\n\n", sectionbase); ++ fprintf(fp, " PAGE: %lx (512MB%s)\n\n", sectionbase, ++ IS_ZEROPAGE(sectionbase) ? ", ZERO PAGE" : ""); + arm64_translate_pte(pmd_val, 0, 0); + } + *paddr = sectionbase + (vaddr & ~SECTION_PAGE_MASK_512MB); +@@ -1878,7 +1881,8 @@ arm64_vtop_3level_64k(ulong pgd, ulong vaddr, physaddr_t *paddr, int verbose) + if (pte_val & PTE_VALID) { + *paddr = PTE_TO_PHYS(pte_val) + PAGEOFFSET(vaddr); + if (verbose) { +- fprintf(fp, " PAGE: %lx\n\n", PAGEBASE(*paddr)); ++ fprintf(fp, " PAGE: %lx %s\n\n", PAGEBASE(*paddr), ++ IS_ZEROPAGE(PAGEBASE(*paddr)) ? "(ZERO PAGE)" : ""); + arm64_translate_pte(pte_val, 0, 0); + } + } else { +@@ -1940,7 +1944,8 @@ arm64_vtop_3level_4k(ulong pgd, ulong vaddr, physaddr_t *paddr, int verbose) + if ((pmd_val & PMD_TYPE_MASK) == PMD_TYPE_SECT) { + ulong sectionbase = (pmd_val & SECTION_PAGE_MASK_2MB) & PHYS_MASK; + if (verbose) { +- fprintf(fp, " PAGE: %lx (2MB)\n\n", sectionbase); ++ fprintf(fp, " PAGE: %lx (2MB%s)\n\n", sectionbase, ++ IS_ZEROPAGE(sectionbase) ? ", ZERO PAGE" : ""); + arm64_translate_pte(pmd_val, 0, 0); + } + *paddr = sectionbase + (vaddr & ~SECTION_PAGE_MASK_2MB); +@@ -1959,7 +1964,8 @@ arm64_vtop_3level_4k(ulong pgd, ulong vaddr, physaddr_t *paddr, int verbose) + if (pte_val & PTE_VALID) { + *paddr = (PAGEBASE(pte_val) & PHYS_MASK) + PAGEOFFSET(vaddr); + if (verbose) { +- fprintf(fp, " PAGE: %lx\n\n", PAGEBASE(*paddr)); ++ fprintf(fp, " PAGE: %lx %s\n\n", PAGEBASE(*paddr), ++ IS_ZEROPAGE(PAGEBASE(*paddr)) ? "(ZERO PAGE)" : ""); + arm64_translate_pte(pte_val, 0, 0); + } + } else { +@@ -2029,7 +2035,8 @@ arm64_vtop_4level_4k(ulong pgd, ulong vaddr, physaddr_t *paddr, int verbose) + if ((pmd_val & PMD_TYPE_MASK) == PMD_TYPE_SECT) { + ulong sectionbase = (pmd_val & SECTION_PAGE_MASK_2MB) & PHYS_MASK; + if (verbose) { +- fprintf(fp, " PAGE: %lx (2MB)\n\n", sectionbase); ++ fprintf(fp, " PAGE: %lx (2MB%s)\n\n", sectionbase, ++ IS_ZEROPAGE(sectionbase) ? ", ZERO PAGE" : ""); + arm64_translate_pte(pmd_val, 0, 0); + } + *paddr = sectionbase + (vaddr & ~SECTION_PAGE_MASK_2MB); +@@ -2048,7 +2055,8 @@ arm64_vtop_4level_4k(ulong pgd, ulong vaddr, physaddr_t *paddr, int verbose) + if (pte_val & PTE_VALID) { + *paddr = (PAGEBASE(pte_val) & PHYS_MASK) + PAGEOFFSET(vaddr); + if (verbose) { +- fprintf(fp, " PAGE: %lx\n\n", PAGEBASE(*paddr)); ++ fprintf(fp, " PAGE: %lx %s\n\n", PAGEBASE(*paddr), ++ IS_ZEROPAGE(PAGEBASE(*paddr)) ? "(ZERO PAGE)" : ""); + arm64_translate_pte(pte_val, 0, 0); + } + } else { +diff --git a/defs.h b/defs.h +index bfa07c3f5150..7d8bb8ab3de1 100644 +--- a/defs.h ++++ b/defs.h +@@ -2619,6 +2619,8 @@ struct vm_table { /* kernel VM-related data */ + char *name; + } *pageflags_data; + ulong max_mem_section_nr; ++ ulong zero_paddr; ++ ulong huge_zero_paddr; + }; + + #define NODES (0x1) +@@ -3000,6 +3002,9 @@ struct load_module { + #define VIRTPAGEBASE(X) (((ulong)(X)) & (ulong)machdep->pagemask) + #define PHYSPAGEBASE(X) (((physaddr_t)(X)) & (physaddr_t)machdep->pagemask) + ++#define IS_ZEROPAGE(paddr) ((paddr) == vt->zero_paddr || \ ++ (paddr) == vt->huge_zero_paddr) ++ + /* + * Sparse memory stuff + * These must follow the definitions in the kernel mmzone.h +diff --git a/memory.c b/memory.c +index 15fa8b2f08f1..ea3005a5c01f 100644 +--- a/memory.c ++++ b/memory.c +@@ -1209,6 +1209,27 @@ vm_init(void) + machdep->memory_size())); + vt->paddr_prlen = strlen(buf); + ++ vt->zero_paddr = ~0UL; ++ if (kernel_symbol_exists("zero_pfn")) { ++ ulong zero_pfn; ++ ++ if (readmem(symbol_value("zero_pfn"), KVADDR, ++ &zero_pfn, sizeof(zero_pfn), ++ "read zero_pfn", QUIET|RETURN_ON_ERROR)) ++ vt->zero_paddr = zero_pfn << PAGESHIFT(); ++ } ++ ++ vt->huge_zero_paddr = ~0UL; ++ if (kernel_symbol_exists("huge_zero_pfn")) { ++ ulong huge_zero_pfn; ++ ++ if (readmem(symbol_value("huge_zero_pfn"), KVADDR, ++ &huge_zero_pfn, sizeof(huge_zero_pfn), ++ "read huge_zero_pfn", QUIET|RETURN_ON_ERROR) && ++ huge_zero_pfn != ~0UL) ++ vt->huge_zero_paddr = huge_zero_pfn << PAGESHIFT(); ++ } ++ + if (vt->flags & PERCPU_KMALLOC_V1) + vt->dump_kmem_cache = dump_kmem_cache_percpu_v1; + else if (vt->flags & PERCPU_KMALLOC_V2) +@@ -14065,6 +14086,8 @@ dump_vm_table(int verbose) + } else { + fprintf(fp, " node_online_map: (unused)\n"); + } ++ fprintf(fp, " zero_paddr: %lx\n", vt->zero_paddr); ++ fprintf(fp, " huge_zero_paddr: %lx\n", vt->huge_zero_paddr); + fprintf(fp, " nr_vm_stat_items: %d\n", vt->nr_vm_stat_items); + fprintf(fp, " vm_stat_items: %s", (vt->flags & VM_STAT) ? + "\n" : "(not used)\n"); +diff --git a/x86_64.c b/x86_64.c +index 5019c69e452e..693a08bea758 100644 +--- a/x86_64.c ++++ b/x86_64.c +@@ -2114,8 +2114,9 @@ x86_64_uvtop_level4(struct task_context *tc, ulong uvaddr, physaddr_t *paddr, in + goto no_upage; + if (pmd_pte & _PAGE_PSE) { + if (verbose) { +- fprintf(fp, " PAGE: %lx (2MB)\n\n", +- PAGEBASE(pmd_pte) & PHYSICAL_PAGE_MASK); ++ fprintf(fp, " PAGE: %lx (2MB%s)\n\n", ++ PAGEBASE(pmd_pte) & PHYSICAL_PAGE_MASK, ++ IS_ZEROPAGE(PAGEBASE(pmd_pte) & PHYSICAL_PAGE_MASK) ? ", ZERO PAGE" : ""); + x86_64_translate_pte(pmd_pte, 0, 0); + } + +@@ -2143,8 +2144,8 @@ x86_64_uvtop_level4(struct task_context *tc, ulong uvaddr, physaddr_t *paddr, in + *paddr = (PAGEBASE(pte) & PHYSICAL_PAGE_MASK) + PAGEOFFSET(uvaddr); + + if (verbose) { +- fprintf(fp, " PAGE: %lx\n\n", +- PAGEBASE(*paddr) & PHYSICAL_PAGE_MASK); ++ fprintf(fp, " PAGE: %lx %s\n\n", PAGEBASE(*paddr) & PHYSICAL_PAGE_MASK, ++ IS_ZEROPAGE(PAGEBASE(*paddr) & PHYSICAL_PAGE_MASK) ? "(ZERO PAGE)" : ""); + x86_64_translate_pte(pte, 0, 0); + } + +-- +2.37.1 + diff --git a/0005-diskdump-netdump-fix-segmentation-fault-caused-by-fa.patch b/0005-diskdump-netdump-fix-segmentation-fault-caused-by-fa.patch new file mode 100644 index 0000000..9b70742 --- /dev/null +++ b/0005-diskdump-netdump-fix-segmentation-fault-caused-by-fa.patch @@ -0,0 +1,165 @@ +From db8c030857b4e318728c51c20da687906c109d0d Mon Sep 17 00:00:00 2001 +From: HATAYAMA Daisuke +Date: Tue, 30 May 2023 19:38:34 +0900 +Subject: [PATCH 5/6] diskdump/netdump: fix segmentation fault caused by + failure of stopping CPUs + +There's no NMI on ARM. Hence, stopping the non-panicking CPUs from the +panicking CPU via IPI can fail easily if interrupts are being masked +in those moment. Moreover, crash_notes are not initialized for such +unstopped CPUs and the corresponding NT_PRSTATUS notes are not +attached to vmcore. However, crash utility never takes it +consideration such uninitialized crash_notes and then ends with +mapping different NT_PRSTATUS to actually unstopped CPUs. This corrupt +mapping can result crash utility into segmentation fault in the +operations where register values in NT_PRSTATUS notes are used. + +For example: + + crash> bt 1408 + PID: 1408 TASK: ffff000003e22200 CPU: 2 COMMAND: "repro" + Segmentation fault (core dumped) + + crash> help -D + diskdump_data: + filename: 127.0.0.1-2023-05-26-02:21:27/vmcore-ld1 + flags: 46 (KDUMP_CMPRS_LOCAL|ERROR_EXCLUDED|LZO_SUPPORTED) + ...snip... + notes_buf: 1815df0 + num_vmcoredd_notes: 0 + num_prstatus_notes: 5 + notes[0]: 1815df0 (NT_PRSTATUS) + si.signo: 0 si.code: 0 si.errno: 0 + ...snip... + PSTATE: 80400005 FPVALID: 00000000 + notes[4]: 1808f10 (NT_PRSTATUS) + Segmentation fault (core dumped) + +To fix this issue, let's map NT_PRSTATUS to some CPU only if the +corresponding crash_notes is checked to be initialized. + +[ kh: moved existence check for crash_notes out of the loop ] + +Signed-off-by: HATAYAMA Daisuke +Signed-off-by: Kazuhito Hagio +Signed-off-by: Lianbo Jiang +--- + defs.h | 1 + + diskdump.c | 45 ++++++++++++++++++++++++++++++++++++++++++++- + netdump.c | 7 ++++++- + 3 files changed, 51 insertions(+), 2 deletions(-) + +diff --git a/defs.h b/defs.h +index 7d8bb8ab3de1..6520d2f13f48 100644 +--- a/defs.h ++++ b/defs.h +@@ -7118,6 +7118,7 @@ int dumpfile_is_split(void); + void show_split_dumpfiles(void); + void x86_process_elf_notes(void *, unsigned long); + void *diskdump_get_prstatus_percpu(int); ++int have_crash_notes(int cpu); + void map_cpus_to_prstatus_kdump_cmprs(void); + void diskdump_display_regs(int, FILE *); + void process_elf32_notes(void *, ulong); +diff --git a/diskdump.c b/diskdump.c +index 94bca4ded572..2c284ff3f97f 100644 +--- a/diskdump.c ++++ b/diskdump.c +@@ -101,12 +101,54 @@ int dumpfile_is_split(void) + return KDUMP_SPLIT(); + } + ++int have_crash_notes(int cpu) ++{ ++ ulong crash_notes, notes_ptr; ++ char *buf, *p; ++ Elf64_Nhdr *note = NULL; ++ ++ if (!readmem(symbol_value("crash_notes"), KVADDR, &crash_notes, ++ sizeof(crash_notes), "crash_notes", RETURN_ON_ERROR)) { ++ error(WARNING, "cannot read \"crash_notes\"\n"); ++ return FALSE; ++ } ++ ++ if ((kt->flags & SMP) && (kt->flags & PER_CPU_OFF)) ++ notes_ptr = crash_notes + kt->__per_cpu_offset[cpu]; ++ else ++ notes_ptr = crash_notes; ++ ++ buf = GETBUF(SIZE(note_buf)); ++ ++ if (!readmem(notes_ptr, KVADDR, buf, ++ SIZE(note_buf), "note_buf_t", RETURN_ON_ERROR)) { ++ error(WARNING, "cpu %d: cannot read NT_PRSTATUS note\n", cpu); ++ return FALSE; ++ } ++ ++ note = (Elf64_Nhdr *)buf; ++ p = buf + sizeof(Elf64_Nhdr); ++ ++ if (note->n_type != NT_PRSTATUS) { ++ error(WARNING, "cpu %d: invalid NT_PRSTATUS note (n_type != NT_PRSTATUS)\n", cpu); ++ return FALSE; ++ } ++ ++ if (!STRNEQ(p, "CORE")) { ++ error(WARNING, "cpu %d: invalid NT_PRSTATUS note (name != \"CORE\")\n", cpu); ++ return FALSE; ++ } ++ ++ return TRUE; ++} ++ + void + map_cpus_to_prstatus_kdump_cmprs(void) + { + void **nt_ptr; + int online, i, j, nrcpus; + size_t size; ++ int crash_notes_exists; + + if (pc->flags2 & QEMU_MEM_DUMP_COMPRESSED) /* notes exist for all cpus */ + goto resize_note_pointers; +@@ -129,9 +171,10 @@ map_cpus_to_prstatus_kdump_cmprs(void) + * Re-populate the array with the notes mapping to online cpus + */ + nrcpus = (kt->kernel_NR_CPUS ? kt->kernel_NR_CPUS : NR_CPUS); ++ crash_notes_exists = kernel_symbol_exists("crash_notes"); + + for (i = 0, j = 0; i < nrcpus; i++) { +- if (in_cpu_map(ONLINE_MAP, i)) { ++ if (in_cpu_map(ONLINE_MAP, i) && (!crash_notes_exists || have_crash_notes(i))) { + dd->nt_prstatus_percpu[i] = nt_ptr[j++]; + dd->num_prstatus_notes = + MAX(dd->num_prstatus_notes, i+1); +diff --git a/netdump.c b/netdump.c +index 4eba66cecb55..61ddeaa08831 100644 +--- a/netdump.c ++++ b/netdump.c +@@ -75,6 +75,7 @@ map_cpus_to_prstatus(void) + void **nt_ptr; + int online, i, j, nrcpus; + size_t size; ++ int crash_notes_exists; + + if (pc->flags2 & QEMU_MEM_DUMP_ELF) /* notes exist for all cpus */ + return; +@@ -97,10 +98,14 @@ map_cpus_to_prstatus(void) + * Re-populate the array with the notes mapping to online cpus + */ + nrcpus = (kt->kernel_NR_CPUS ? kt->kernel_NR_CPUS : NR_CPUS); ++ crash_notes_exists = kernel_symbol_exists("crash_notes"); + + for (i = 0, j = 0; i < nrcpus; i++) { +- if (in_cpu_map(ONLINE_MAP, i)) ++ if (in_cpu_map(ONLINE_MAP, i) && (!crash_notes_exists || have_crash_notes(i))) { + nd->nt_prstatus_percpu[i] = nt_ptr[j++]; ++ nd->num_prstatus_notes = ++ MAX(nd->num_prstatus_notes, i+1); ++ } + } + + FREEBUF(nt_ptr); +-- +2.37.1 + diff --git a/0006-Fix-segfault-in-arm64_is_kernel_exception_frame-when.patch b/0006-Fix-segfault-in-arm64_is_kernel_exception_frame-when.patch new file mode 100644 index 0000000..230738d --- /dev/null +++ b/0006-Fix-segfault-in-arm64_is_kernel_exception_frame-when.patch @@ -0,0 +1,62 @@ +From 9868ebc8e648e5791764a51567a23efae7170d9b Mon Sep 17 00:00:00 2001 +From: HATAYAMA Daisuke +Date: Tue, 30 May 2023 19:38:35 +0900 +Subject: [PATCH 6/6] Fix segfault in arm64_is_kernel_exception_frame() when + corrupt stack pointer address is given + +Due to the corrupted mapping fixed by the previous commit, +arm64_is_kernel_exception_frame() can receive invalid stack pointer +address via the 2nd argument; different NT_PRSTATUS contains different +task's stack pointer address. However, macro STACK_OFFSET_TYPE() never +checks if a given address is within the range of the kernel stack of +the corresponding task and hence can result in referring to outside of +bt->stackbuf. + + static int + arm64_is_kernel_exception_frame(struct bt_info *bt, ulong stkptr) + { + struct arm64_pt_regs *regs; + struct machine_specific *ms = machdep->machspec; + + regs = (struct arm64_pt_regs *)&bt->stackbuf[(ulong)(STACK_OFFSET_TYPE(stkptr))]; + + => if (INSTACK(regs->sp, bt) && INSTACK(regs->regs[29], bt) && + !(regs->pstate & (0xffffffff00000000ULL | PSR_MODE32_BIT)) && + is_kernel_text(regs->pc) && + is_kernel_text(regs->regs[30] | ms->CONFIG_ARM64_KERNELPACMASK)) { + +To fix this issue, check if the given stack pointer address points to +the range of the kernel stack of the corresponding task, and abort if +it turns out to be invalid. + +Although the corrupted mapping has already been fixed, this fix is +still needed because corrupt stack pointer address can still be passed +here from different reasons. Consider, for example, that data on the +kernel stack can be modified abnormally due to any kernel bugs or +hardware issues. + +Signed-off-by: HATAYAMA Daisuke +Signed-off-by: Lianbo Jiang +--- + defs.h | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/defs.h b/defs.h +index 6520d2f13f48..11fdc17e60d0 100644 +--- a/defs.h ++++ b/defs.h +@@ -976,7 +976,10 @@ struct bt_info { + + #define STACK_OFFSET_TYPE(OFF) \ + (((ulong)(OFF) > STACKSIZE()) ? \ +- (ulong)((ulong)(OFF) - (ulong)(bt->stackbase)) : (ulong)(OFF)) ++ (((ulong)(OFF) < (ulong)(bt->stackbase) || (ulong)(OFF) >= (ulong)(bt->stackbase) + STACKSIZE()) ? \ ++ error(FATAL, "invalid stack pointer is given\n") : \ ++ (ulong)((ulong)(OFF) - (ulong)(bt->stackbase))) : \ ++ (ulong)(OFF)) + + #define GET_STACK_ULONG(OFF) \ + *((ulong *)((char *)(&bt->stackbuf[(ulong)(STACK_OFFSET_TYPE(OFF))]))) +-- +2.37.1 + diff --git a/crash.spec b/crash.spec index 14ef193..c8dd386 100644 --- a/crash.spec +++ b/crash.spec @@ -4,7 +4,7 @@ Summary: Kernel analysis utility for live systems, netdump, diskdump, kdump, LKCD or mcore dumpfiles Name: crash Version: 7.3.2 -Release: 6%{?dist} +Release: 7%{?dist} License: GPLv3 Group: Development/Debuggers Source0: https://github.com/crash-utility/crash/archive/crash-%{version}.tar.gz @@ -105,9 +105,15 @@ Patch85: 0086-Fix-fuser-command-to-properly-deal-with-an-invalid-a.patch Patch86: 0087-Replace-lseek-read-into-pread-for-kcore-and-vmcore-r.patch Patch87: 0088-Fix-net-command-on-kernel-configured-with-CONFIG_IPV.patch Patch88: 0089-gdb-7.6-fix-for-whatis-command-causes-crash-coredump.patch -Patch89: lzo_snappy_zstd.patch -Patch90: rhel8_build.patch -Patch91: rhel8-freepointer.patch +Patch89: 0001-Fix-kernel-version-macros-for-revision-numbers-over-.patch +Patch90: 0002-Fix-failure-of-dev-d-D-options-on-Linux-6.4-and-late.patch +Patch91: 0003-Fix-kmem-v-option-displaying-no-regions-on-Linux-6.3.patch +Patch92: 0004-arm64-x86_64-Enhance-vtop-command-to-show-zero_pfn-i.patch +Patch93: 0005-diskdump-netdump-fix-segmentation-fault-caused-by-fa.patch +Patch94: 0006-Fix-segfault-in-arm64_is_kernel_exception_frame-when.patch +Patch95: lzo_snappy_zstd.patch +Patch96: rhel8_build.patch +Patch97: rhel8-freepointer.patch %description The core analysis suite is a self-contained tool that can be used to @@ -220,6 +226,12 @@ offered by Mission Critical Linux, or the LKCD kernel patch. %patch89 -p1 %patch90 -p1 %patch91 -p1 +%patch92 -p1 +%patch93 -p1 +%patch94 -p1 +%patch95 -p1 +%patch96 -p1 +%patch97 -p1 %build cp %{SOURCE1} . @@ -250,6 +262,9 @@ rm -rf %{buildroot} %{_includedir}/* %changelog +* Wed Jun 07 2023 Lianbo Jiang - 7.3.2-7 +- Fix segfault caused by failure of stopping CPUs + * Mon May 08 2023 Lianbo Jiang - 7.3.2-6 - Fix for freelist pointer on PPC64le, ARM64 and S390x