--- crash/extensions/Makefile.orig 2006-05-15 17:21:18.000000000 -0400 +++ crash/extensions/Makefile 2005-11-08 11:38:21.000000000 -0500 @@ -0,0 +1,41 @@ +# +# Makefile for building crash shared object extensions +# +# Copyright (C) 2005 David Anderson +# Copyright (C) 2005 Red Hat, Inc. All rights reserved. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# To build the extension shared objects in this directory, run +# "make extensions" from the top-level directory. +# +# To add a new extension object: +# +# - add the new source file to the EXTENSION_SOURCE_FILES list +# in the top-level Makefile +# - add the object file name to the EXTENSION_OBJECT_FILES list +# in the top-level Makefile +# - create a compile stanza below, typically using "echo.so" as +# a base template. +# + +all: link_defs $(OBJECTS) + +link_defs: + @if [ ! -f defs.h ]; then \ + ln -s ../defs.h; fi + +echo.so: ../defs.h echo.c + gcc -nostartfiles -shared -rdynamic -o echo.so echo.c -fPIC -D$(TARGET) + +dminfo.so: ../defs.h dminfo.c + gcc -nostartfiles -shared -rdynamic -o dminfo.so dminfo.c -fPIC -D$(TARGET) + --- crash/extensions/echo.c.orig 2006-05-15 17:21:18.000000000 -0400 +++ crash/extensions/echo.c 2005-11-10 16:12:50.000000000 -0500 @@ -0,0 +1,105 @@ +/* echo.c - simple example of a crash extension + * + * Copyright (C) 2001, 2002 Mission Critical Linux, Inc. + * Copyright (C) 2002, 2003, 2004, 2005 David Anderson + * Copyright (C) 2002, 2003, 2004, 2005 Red Hat, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "defs.h" /* From the crash source top-level directory */ + +void cmd_echo(); /* Declare the commands and their help data. */ +char *help_echo[]; + +static struct command_table_entry command_table[] = { + "echo", cmd_echo, help_echo, 0, /* One or more commands, */ + NULL, /* terminated by NULL, */ +}; + + +_init() /* Register the command set. */ +{ + register_extension(command_table); +} + +/* + * The _fini() function is called if the shared object is unloaded. + * If desired, perform any cleanups here. + */ +_fini() { } + + +/* + * Arguments are passed to the command functions in the global args[argcnt] + * array. See getopt(3) for info on dash arguments. Check out defs.h and + * other crash commands for usage of the myriad of utility routines available + * to accomplish what your task. + */ +void +cmd_echo() +{ + int c; + + while ((c = getopt(argcnt, args, "")) != EOF) { + switch(c) + { + default: + argerrs++; + break; + } + } + + if (argerrs) + cmd_usage(pc->curcmd, SYNOPSIS); + + while (args[optind]) + fprintf(fp, "%s ", args[optind++]); + + fprintf(fp, "\n"); +} + +/* + * The optional help data is simply an array of strings in a defined format. + * For example, the "help echo" command will use the help_echo[] string + * array below to create a help page that looks like this: + * + * NAME + * echo - echoes back its arguments + * + * SYNOPSIS + * echo arg ... + * + * DESCRIPTION + * This command simply echoes back its arguments. + * + * EXAMPLE + * Echo back all command arguments: + * + * crash> echo hello, world + * hello, world + * + */ + +char *help_echo[] = { + "echo", /* command name */ + "echoes back its arguments", /* short description */ + "arg ...", /* argument synopsis, or " " if none */ + + " This command simply echoes back its arguments.", + "\nEXAMPLE", + " Echo back all command arguments:\n", + " crash> echo hello, world", + " hello, world", + NULL +}; + + --- crash/extensions/dminfo.c.orig 2006-05-15 17:21:18.000000000 -0400 +++ crash/extensions/dminfo.c 2005-11-10 16:12:50.000000000 -0500 @@ -0,0 +1,1531 @@ +/* dminfo.c - crash extension module for device-mapper analysis + * + * Copyright (C) 2005 NEC Corporation + * Copyright (C) 2005 Red Hat, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "defs.h" /* From the crash source top-level directory */ + +/* + * Indices of size-offset array (Used by GET_xxx macros) + * + * DM__ + */ +enum { + DM_hash_cell_name_list = 0, + DM_hash_cell_name, + DM_hash_cell_md, + + DM_mapped_device_disk, + DM_mapped_device_map, + + DM_gendisk_major, + DM_gendisk_first_minor, + DM_gendisk_disk_name, + + DM_dm_table_num_targets, + DM_dm_table_targets, + DM_dm_table_devices, + + DM_dm_target_type, + DM_dm_target_begin, + DM_dm_target_len, + DM_dm_target_private, + + DM_dm_dev_count, + DM_dm_dev_bdev, + DM_dm_dev_name, + + DM_dm_io_md, + DM_dm_io_bio, + + DM_target_type_name, + + DM_target_io_io, + + DM_block_device_bd_disk, + + DM_bio_bi_private, + + DM_bio_list_head, + + DM_linear_c_dev, + DM_linear_c_start, + + DM_multipath_hw_handler, + DM_multipath_nr_priority_groups, + DM_multipath_priority_groups, + DM_multipath_nr_valid_paths, + DM_multipath_current_pg, + DM_multipath_queue_if_no_path, + DM_multipath_queue_size, + + DM_hw_handler_type, + DM_hw_handler_type_name, + + DM_priority_group_ps, + DM_priority_group_pg_num, + DM_priority_group_bypassed, + DM_priority_group_nr_pgpaths, + DM_priority_group_pgpaths, + + DM_path_selector_type, + DM_path_selector_type_name, + + DM_pgpath_fail_count, + DM_pgpath_path, + + DM_path_dev, + DM_path_is_active, + + DM_mirror_set_rh, + DM_mirror_set_reads, + DM_mirror_set_writes, + DM_mirror_set_in_sync, + DM_mirror_set_nr_mirrors, + DM_mirror_set_mirror, + + DM_region_hash_log, + DM_region_hash_quiesced_regions, + DM_region_hash_recovered_regions, + + DM_dirty_log_type, + DM_dirty_log_type_name, + + DM_mirror_error_count, + DM_mirror_dev, + DM_mirror_offset, + + DM_crypt_config_dev, + DM_crypt_config_iv_mode, + DM_crypt_config_tfm, + DM_crypt_config_key_size, + DM_crypt_config_key, + + DM_crypto_tfm_crt_u, + DM_crypto_tfm___crt_alg, + + DM_crypto_alg_cra_name, + + DM_cipher_tfm_cit_mode, + + DM_stripe_c_stripes, + DM_stripe_c_chunk_mask, + DM_stripe_c_stripe, + + DM_stripe_dev, + + DM_dm_snapshot_origin, + DM_dm_snapshot_cow, + DM_dm_snapshot_chunk_size, + DM_dm_snapshot_valid, + DM_dm_snapshot_type, + + NR_DMINFO_MEMBER_TABLE_ENTRY +}; + +/* Size-offset array for structure's member */ +static struct dminfo_member_entry { + unsigned long offset; + unsigned long size; +} mbr_ary[NR_DMINFO_MEMBER_TABLE_ENTRY]; + +/* + * Macros to retrieve data of given structure's member + * + * Macros except for the MSG assume 'struct s' is at 'addr' + */ +#define MSG(msg, s, m) msg ": " s "." m + +/* Initialize the size-offset array */ +#define INIT_MBR_TABLE(s, m) \ + do { \ + if (!mbr_ary[DM_##s##_##m].size) { \ + mbr_ary[DM_##s##_##m].offset = MEMBER_OFFSET("struct " #s, #m); \ + mbr_ary[DM_##s##_##m].size = MEMBER_SIZE("struct " #s, #m); \ + } \ + } while (0) + +/* + * Store the data of member m in ret. + * Initialize the size-offset array for the member m if needed. + */ +#define GET_VALUE(addr, s, m, ret) \ + do { \ + INIT_MBR_TABLE(s, m); \ + if (sizeof(ret) < mbr_ary[DM_##s##_##m].size) \ + fprintf(fp, "%s\n", \ + MSG("ERROR: GET_VALUE size_check", #s, #m)); \ + readmem(addr + mbr_ary[DM_##s##_##m].offset, KVADDR, &ret, \ + mbr_ary[DM_##s##_##m].size, MSG("GET_VALUE", #s, #m), \ + FAULT_ON_ERROR);\ + } while (0) + +/* + * Store the address of member m in ret. + * Initialize the size-offset array for the member m if needed. + */ +#define GET_ADDR(addr, s, m, ret) \ + do { \ + INIT_MBR_TABLE(s, m); \ + ret = addr + mbr_ary[DM_##s##_##m].offset; \ + } while (0) + +/* + * Store the string data of member m in ret. + * Initialize the size-offset array for the member m if needed. + */ +#define GET_STR(addr, s, m, ret, len) \ + do { \ + INIT_MBR_TABLE(s, m); \ + if (!read_string(addr + mbr_ary[DM_##s##_##m].offset, ret, len - 1)) \ + fprintf(fp, "%s\n", MSG("ERROR: GET_STR", #s, #m)); \ + } while (0) + +/* + * Store the string data pointed by member m in ret. + * Initialize the size-offset array for the member m if needed. + */ +#define GET_PTR_STR(addr, s, m, ret, len) \ + do { \ + unsigned long tmp; \ + INIT_MBR_TABLE(s, m); \ + readmem(addr + mbr_ary[DM_##s##_##m].offset, KVADDR, &tmp, \ + mbr_ary[DM_##s##_##m].size, MSG("GET_PTR_STR", #s, #m),\ + FAULT_ON_ERROR);\ + if (!read_string(tmp, ret, len - 1)) \ + fprintf(fp, "%s\n", MSG("ERROR: GET_PTR_STR", #s, #m));\ + } while (0) + +/* + * Utility function/macro to walk the list + */ +static unsigned long +get_next_from_list_head(unsigned long addr) +{ + unsigned long ret; + + readmem(addr + OFFSET(list_head_next), KVADDR, &ret, sizeof(void *), + MSG("get_next_from_list_head", "list_head", "next"), + FAULT_ON_ERROR); + + return ret; +} + +#define list_for_each(next, head, last) \ + for (next = get_next_from_list_head(head), last = 0UL; \ + next && next != head && next != last; \ + last = next, next = get_next_from_list_head(next)) + +/* + * device-mapper target analyzer + * + * device-mapper has various target driver: linear, mirror, multipath, etc. + * Information specific to target is stored in its own way. + * Target-specific analyzer is provided for each target driver for this reason. + */ +static struct dminfo_target_analyzer { + struct dminfo_target_analyzer *next; + char *target_name; + int (*ready) (void); /* returns true if analyzer is available */ + void (*show_table) (unsigned long); /* display table info */ + void (*show_status) (unsigned long); /* display status info */ + void (*show_queue) (unsigned long); /* display queued I/O info */ +} analyzers_head; + +static void +dminfo_register_target_analyzer(struct dminfo_target_analyzer *ta) +{ + ta->next = analyzers_head.next; + analyzers_head.next = ta; +} + +static struct +dminfo_target_analyzer *find_target_analyzer(char *target_type) +{ + struct dminfo_target_analyzer *ta; + + for (ta = analyzers_head.next; ta; ta = ta->next) + if (!strcmp(ta->target_name, target_type)) + return ta; + + return NULL; +} + +/* + * zero target + */ +static int +zero_ready(void) +{ + return 1; +} + +static void +zero_show_table(unsigned long target) +{ + unsigned long long start, len; + + /* Get target information */ + GET_VALUE(target, dm_target, begin, start); + GET_VALUE(target, dm_target, len, len); + + fprintf(fp, " begin:%llu len:%llu", start, len); +} + +static void +zero_show_status(unsigned long target) +{ + /* zero target has no status */ + fprintf(fp, " No status info"); +} + +static void +zero_show_queue(unsigned long target) +{ + /* zero target has no queue */ + fprintf(fp, " No queue info"); +} + +static struct dminfo_target_analyzer zero_analyzer = { + .target_name = "zero", + .ready = zero_ready, + .show_table = zero_show_table, + .show_status = zero_show_status, + .show_queue = zero_show_queue +}; + +/* + * error target + */ +static int +error_ready(void) +{ + return 1; +} + +static void +error_show_table(unsigned long target) +{ + unsigned long long start, len; + + /* Get target information */ + GET_VALUE(target, dm_target, begin, start); + GET_VALUE(target, dm_target, len, len); + + fprintf(fp, " begin:%llu len:%llu", start, len); +} + +static void +error_show_status(unsigned long target) +{ + /* error target has no status */ + fprintf(fp, " No status info"); +} + +static void +error_show_queue(unsigned long target) +{ + /* error target has no queue */ + fprintf(fp, " No queue info"); +} + +static struct dminfo_target_analyzer error_analyzer = { + .target_name = "error", + .ready = error_ready, + .show_table = error_show_table, + .show_status = error_show_status, + .show_queue = error_show_queue +}; + +/* + * linear target + */ +static int +linear_ready(void) +{ + static int debuginfo = 0; + + if (debuginfo) + return 1; + + if (STRUCT_EXISTS("struct linear_c")) { + debuginfo = 1; + return 1; + } else + fprintf(fp, "No such struct info: linear_c"); + + return 0; +} + +static void +linear_show_table(unsigned long target) +{ + unsigned long lc, dm_dev; + unsigned long long start, len, offset; + char devt[BUFSIZE]; + + /* Get target information */ + GET_VALUE(target, dm_target, begin, start); + GET_VALUE(target, dm_target, len, len); + GET_VALUE(target, dm_target, private, lc); + GET_VALUE(lc, linear_c, dev, dm_dev); + GET_STR(dm_dev, dm_dev, name, devt, BUFSIZE); + GET_VALUE(lc, linear_c, start, offset); + + fprintf(fp, " begin:%llu len:%llu dev:%s offset:%llu", + start, len, devt, offset); +} + +static void +linear_show_status(unsigned long target) +{ + /* linear target has no status */ + fprintf(fp, " No status info"); +} + +static void +linear_show_queue(unsigned long target) +{ + /* linear target has no I/O queue */ + fprintf(fp, " No queue info"); +} + +static struct dminfo_target_analyzer linear_analyzer = { + .target_name = "linear", + .ready = linear_ready, + .show_table = linear_show_table, + .show_status = linear_show_status, + .show_queue = linear_show_queue +}; + +/* + * mirror target + */ +static int +mirror_ready(void) +{ + static int debuginfo = 0; + + if (debuginfo) + return 1; + + if (STRUCT_EXISTS("struct mirror_set")) { + debuginfo = 1; + return 1; + } else + fprintf(fp, "No such struct info: mirror_set"); + + return 0; +} + +static void +mirror_show_table(unsigned long target) +{ + unsigned int i, nr_mir; + unsigned long ms, rh, log, log_type, mir_size, mir_head, mir, dm_dev; + unsigned long long offset; + char buf[BUFSIZE]; + + /* Get the address of struct mirror_set */ + GET_VALUE(target, dm_target, private, ms); + + /* Get the log-type name of the mirror_set */ + GET_ADDR(ms, mirror_set, rh, rh); + GET_VALUE(rh, region_hash, log, log); + GET_VALUE(log, dirty_log, type, log_type); + GET_PTR_STR(log_type, dirty_log_type, name, buf, BUFSIZE); + fprintf(fp, " log:%s", buf); + + /* + * Display information for each mirror disks. + * + * mir_head = mirror_set.mirror. + * This is the head of struct mirror array. + */ + fprintf(fp, " dev:"); + mir_size = STRUCT_SIZE("struct mirror"); + GET_ADDR(ms, mirror_set, mirror, mir_head); + GET_VALUE(ms, mirror_set, nr_mirrors, nr_mir); + for (i = 0; i < nr_mir; i++) { + mir = mir_head + mir_size * i; /* Get next mirror */ + + /* Get the devt of the mirror disk */ + GET_VALUE(mir, mirror, dev, dm_dev); + GET_STR(dm_dev, dm_dev, name, buf, BUFSIZE); + + /* Get the offset of the mirror disk */ + GET_VALUE(mir, mirror, offset, offset); + + fprintf(fp, "%s(%llu)%s", buf, offset, + i == nr_mir - 1 ? "" : ","); + } + if (i != nr_mir) + fprintf(fp, " ERROR: dev are less than nr_mir:%d", nr_mir); +} + +static void +mirror_show_status(unsigned long target) +{ + unsigned int i, nr_mir, synced, nr_error; + unsigned long ms, mir_size, mir_head, mir, dm_dev; + char buf[BUFSIZE]; + + /* Get the address of struct mirror_set */ + GET_VALUE(target, dm_target, private, ms); + + /* Get the status info of the mirror_set */ + GET_VALUE(ms, mirror_set, in_sync, synced); + fprintf(fp, " in_sync:%d", synced); + + /* + * Display information for each mirror disks. + * + * mir_head = mirror_set.mirror. + * This is the head of struct mirror array. + */ + fprintf(fp, " dev:"); + mir_size = STRUCT_SIZE("struct mirror"); + GET_ADDR(ms, mirror_set, mirror, mir_head); + GET_VALUE(ms, mirror_set, nr_mirrors, nr_mir); + for (i = 0; i < nr_mir; i++) { + mir = mir_head + mir_size * i; /* Get next mirror */ + + /* Get the devt of the mirror disk */ + GET_VALUE(mir, mirror, dev, dm_dev); + GET_STR(dm_dev, dm_dev, name, buf, BUFSIZE); + + /* Get the offset of the mirror disk */ + GET_VALUE(mir, mirror, error_count, nr_error); + + fprintf(fp, "%s(%c,%d)%s", buf, nr_error ? 'D' : 'A', nr_error, + i == nr_mir - 1 ? "" : ","); + } + if (i != nr_mir) + fprintf(fp, " ERROR: dev are less than nr_mir:%d", nr_mir); +} + +static void +mirror_show_queue(unsigned long target) +{ + unsigned long ms, rlist, wlist, rhead, whead; + unsigned long rh, quis_head, rcov_head, quis_next, rcov_next; + + /* Get the address of struct mirror_set */ + GET_VALUE(target, dm_target, private, ms); + + /* Get the address of queued I/O lists in struct mirror_set */ + GET_ADDR(ms, mirror_set, reads, rlist); + GET_ADDR(ms, mirror_set, writes, wlist); + + /* Get the head of queued I/O lists */ + GET_VALUE(rlist, bio_list, head, rhead); + GET_VALUE(wlist, bio_list, head, whead); + fprintf(fp, " %s", rhead ? "reads" : "(reads)"); + fprintf(fp, " %s", whead ? "writes" : "(writes)"); + + /* Get the address of the struct region_hash */ + GET_ADDR(ms, mirror_set, rh, rh); + + /* Get the address of recover region lists in struct region_hash */ + GET_ADDR(rh, region_hash, quiesced_regions, quis_head); + GET_ADDR(rh, region_hash, recovered_regions, rcov_head); + + /* Get the head of recover region lists */ + quis_next = get_next_from_list_head(quis_head); + rcov_next = get_next_from_list_head(rcov_head); + + fprintf(fp, " %s", quis_next != quis_head ? "quiesced" : "(quiesced)"); + fprintf(fp, " %s", rcov_next != rcov_head ? "recovered" : "(recovered)"); +} + +static struct dminfo_target_analyzer mirror_analyzer = { + .target_name = "mirror", + .ready = mirror_ready, + .show_table = mirror_show_table, + .show_status = mirror_show_status, + .show_queue = mirror_show_queue +}; + +/* + * multipath target + */ +static int +multipath_ready(void) +{ + static int debuginfo = 0; + + if (debuginfo) + return 1; + + if (STRUCT_EXISTS("struct multipath")) { + debuginfo = 1; + return 1; + } else + fprintf(fp, "No such struct info: multipath"); + + return 0; +} + +static void +multipath_show_table(unsigned long target) +{ + int i, j; + unsigned int queue_if_no_path, nr_pgs, pg_id, nr_paths; + unsigned long mp, hwh, hwh_type, ps, ps_type, path, dm_dev; + unsigned long pg_head, pg_next, pg_last; + unsigned long path_head, path_next, path_last; + char name[BUFSIZE]; + + /* Get the address of struct multipath */ + GET_VALUE(target, dm_target, private, mp); + + /* Get features information */ + GET_VALUE(mp, multipath, queue_if_no_path, queue_if_no_path); + + /* Get the hardware-handler information */ + GET_ADDR(mp, multipath, hw_handler, hwh); + GET_VALUE(hwh, hw_handler, type, hwh_type); + if (hwh_type) + GET_PTR_STR(hwh_type, hw_handler_type, name, name, BUFSIZE); + else + strcpy(name, "none"); + + /* Get the number of priority groups */ + GET_VALUE(mp, multipath, nr_priority_groups, nr_pgs); + + fprintf(fp, " queue_if_no_path:%d hwh:%s nr_pgs:%d\n", + queue_if_no_path, name, nr_pgs); + + /* Display information for each priority group */ + fprintf(fp, " %-2s %-13s %-8s %s", + "PG", "PATH_SELECTOR", "NR_PATHS", "PATHS"); + GET_ADDR(mp, multipath, priority_groups, pg_head); + i = 0; + list_for_each (pg_next, pg_head, pg_last) { + /* pg_next == struct priority_group */ + + /* Get the index of the priority group */ + GET_VALUE(pg_next, priority_group, pg_num, pg_id); + + /* Get the name of path selector */ + GET_ADDR(pg_next, priority_group, ps, ps); + GET_VALUE(ps, path_selector, type, ps_type); + GET_PTR_STR(ps_type, path_selector_type, name, name, BUFSIZE); + + /* Get the number of paths in the priority group */ + GET_VALUE(pg_next, priority_group, nr_pgpaths, nr_paths); + + fprintf(fp, "\n %-2d %-13s %-8d ", pg_id, name, nr_paths); + + /* Display information for each path */ + GET_ADDR(pg_next, priority_group, pgpaths, path_head); + j = 0; + list_for_each (path_next, path_head, path_last) { + /* path_next == struct pgpath */ + + /* Get the devt of the pgpath */ + GET_ADDR(path_next, pgpath, path, path); + GET_VALUE(path, path, dev, dm_dev); + GET_STR(dm_dev, dm_dev, name, name, BUFSIZE); + + fprintf(fp, " %s", name); + j++; + } + if (j != nr_paths) + fprintf(fp, " ERROR: paths are less than nr_paths:%d", + nr_paths); + i++; + } + if (i != nr_pgs) + fprintf(fp, " ERROR: pgs are less than nr_pgs:%d", nr_pgs); +} + +static void +multipath_show_status(unsigned long target) +{ + int i, j; + unsigned int queue_if_no_path, nr_pgs, pg_id, nr_paths; + unsigned int bypassed_pg, path_active, nr_fails; + unsigned long mp, hwh, hwh_type, cur_pg, path, dm_dev; + unsigned long pg_head, pg_next, pg_last; + unsigned long path_head, path_next, path_last; + char buf[BUFSIZE], path_status; + + /* Get the address of struct multipath */ + GET_VALUE(target, dm_target, private, mp); + + /* Get features information */ + GET_VALUE(mp, multipath, queue_if_no_path, queue_if_no_path); + + /* Get the hardware-handler information */ + GET_ADDR(mp, multipath, hw_handler, hwh); + GET_VALUE(hwh, hw_handler, type, hwh_type); + if (hwh_type) + GET_PTR_STR(hwh_type, hw_handler_type, name, buf, BUFSIZE); + else + strcpy(buf, "none"); + + /* Get the number of priority groups */ + GET_VALUE(mp, multipath, nr_priority_groups, nr_pgs); + + fprintf(fp, " queue_if_no_path:%d hwh:%s nr_pgs:%d\n", + queue_if_no_path, buf, nr_pgs); + + /* Display information for each priority group */ + fprintf(fp, " %-2s %-9s %-8s %s", + "PG", "PG_STATUS", "NR_PATHS", "PATHS"); + GET_ADDR(mp, multipath, priority_groups, pg_head); + i = 0; + list_for_each (pg_next, pg_head, pg_last) { + /* pg_next == struct priority_group */ + + /* Get the index of the priority group */ + GET_VALUE(pg_next, priority_group, pg_num, pg_id); + + /* Get the status of the priority group */ + GET_VALUE(pg_next, priority_group, bypassed, bypassed_pg); + if (bypassed_pg) + strcpy(buf, "disabled"); + else { + GET_VALUE(mp, multipath, current_pg, cur_pg); + if (pg_next == cur_pg) + strcpy(buf, "active"); + else + strcpy(buf, "enabled"); + } + + /* Get the number of paths in the priority group */ + GET_VALUE(pg_next, priority_group, nr_pgpaths, nr_paths); + + fprintf(fp, "\n %-2d %-9s %-8d ", pg_id, buf, nr_paths); + + /* Display information for each path */ + GET_ADDR(pg_next, priority_group, pgpaths, path_head); + j = 0; + list_for_each (path_next, path_head, path_last) { + /* path_next == struct pgpath */ + + /* Get the devt of the pgpath */ + GET_ADDR(path_next, pgpath, path, path); + GET_VALUE(path, path, dev, dm_dev); + GET_STR(dm_dev, dm_dev, name, buf, BUFSIZE); + + /* Get the status of the path */ + GET_VALUE(path, path, is_active, path_active); + GET_VALUE(path_next, pgpath, fail_count, nr_fails); + path_status = path_active ? 'A' : 'F'; + + fprintf(fp, " %s(%c,%u)", buf, path_status, nr_fails); + j++; + } + if (j != nr_paths) + fprintf(fp, " ERROR: paths are less than nr_paths:%d", + nr_paths); + i++; + } + if (i != nr_pgs) + fprintf(fp, " ERROR: pgs are less than nr_pgs:%d", nr_pgs); +} + +static void +multipath_show_queue(unsigned long target) +{ + unsigned int queue_size; + unsigned long mp; + + /* Get the address of struct multipath */ + GET_VALUE(target, dm_target, private, mp); + + /* Get the size of queued I/Os in this 'target' */ + GET_VALUE(mp, multipath, queue_size, queue_size); + + fprintf(fp, " queue_size:%d", queue_size); +} + +static struct dminfo_target_analyzer multipath_analyzer = { + .target_name = "multipath", + .ready = multipath_ready, + .show_table = multipath_show_table, + .show_status = multipath_show_status, + .show_queue = multipath_show_queue +}; + +/* + * crypt target + */ +static int +crypt_ready(void) +{ + static int debuginfo = 0; + + if (debuginfo) + return 1; + + if (STRUCT_EXISTS("struct crypt_config")) { + debuginfo = 1; + return 1; + } else + fprintf(fp, "No such struct info: crypt_config"); + + return 0; +} + +#define DMINFO_CRYPTO_TFM_MODE_ECB 0x00000001 +#define DMINFO_CRYPTO_TFM_MODE_CBC 0x00000002 + +static void +crypt_show_table(unsigned long target) +{ + int i, cit_mode, key_size; + unsigned long cc, tfm, crt_alg, cipher, iv_mode, dm_dev; + char buf[BUFSIZE], *chainmode; + + /* Get the address of struct crypt_config */ + GET_VALUE(target, dm_target, private, cc); + + /* Get the cipher name of the crypt_tfm */ + GET_VALUE(cc, crypt_config, tfm, tfm); + GET_VALUE(tfm, crypto_tfm, __crt_alg, crt_alg); + GET_STR(crt_alg, crypto_alg, cra_name, buf, BUFSIZE); + fprintf(fp, " type:%s", buf); + + /* Get the cit_mode of the crypt_tfm */ + GET_ADDR(tfm, crypto_tfm, crt_u, cipher); + GET_VALUE(cipher, cipher_tfm, cit_mode, cit_mode); + + if (MEMBER_EXISTS("struct crypt_config", "iv_mode")) { + if (cit_mode == DMINFO_CRYPTO_TFM_MODE_CBC) + chainmode = "cbc"; + else if (cit_mode == DMINFO_CRYPTO_TFM_MODE_ECB) + chainmode = "ecb"; + else + chainmode = "unknown"; + + /* Get the iv_mode of the crypt_config */ + GET_VALUE(cc, crypt_config, iv_mode, iv_mode); + if (iv_mode) { + GET_PTR_STR(cc, crypt_config, iv_mode, buf, BUFSIZE); + fprintf(fp, "-%s-%s", chainmode, buf); + } else + fprintf(fp, "-%s", chainmode); + + } else { + /* Compatibility mode for old dm-crypt cipher strings */ + if (cit_mode == DMINFO_CRYPTO_TFM_MODE_CBC) + chainmode = "plain"; + else if (cit_mode == DMINFO_CRYPTO_TFM_MODE_ECB) + chainmode = "ecb"; + else + chainmode = "unknown"; + + fprintf(fp, "-%s", chainmode); + } + + /* Get the devt of the crypt_config */ + GET_VALUE(cc, crypt_config, dev, dm_dev); + GET_STR(dm_dev, dm_dev, name, buf, BUFSIZE); + fprintf(fp, " dev:%s", buf); + + /* + * Get the key of the crypt_config. + */ + GET_VALUE(cc, crypt_config, key_size, key_size); + GET_STR(cc, crypt_config, key, buf, MIN(key_size + 1, BUFSIZE)); + fprintf(fp, " key:"); + for (i = 0; i < key_size; i++) + fprintf(fp, "%02x", (unsigned char)buf[i]); +} + +static void +crypt_show_status(unsigned long target) +{ + /* crypt target has no status */ + fprintf(fp, " No status info"); +} + +static void +crypt_show_queue(unsigned long target) +{ + /* crypt target has no queue */ + fprintf(fp, " No queue info"); +} + +static struct dminfo_target_analyzer crypt_analyzer = { + .target_name = "crypt", + .ready = crypt_ready, + .show_table = crypt_show_table, + .show_status = crypt_show_status, + .show_queue = crypt_show_queue +}; + +/* + * stripe target + */ +static int +stripe_ready(void) +{ + static int debuginfo = 0; + + if (debuginfo) + return 1; + + if (STRUCT_EXISTS("struct stripe_c")) { + debuginfo = 1; + return 1; + } else + fprintf(fp, "No such struct info: stripe_c"); + + return 0; +} + +static void +stripe_show_table(unsigned long target) +{ + unsigned int i, n_stripe; + unsigned long sc, stripe_size, s, head, dm_dev; + unsigned long long mask; + char buf[BUFSIZE]; + + /* Get the address of struct stripe_c */ + GET_VALUE(target, dm_target, private, sc); + + /* Get the chunk_size of the stripe_c */ + GET_VALUE(sc, stripe_c, chunk_mask, mask); + fprintf(fp, " chunk_size:%llu", mask + 1); + + /* + * Display the information of each stripe disks. + * + * head = stripe_c.stripe. + * This is the head of struct stripe array. + */ + stripe_size = STRUCT_SIZE("struct stripe"); + GET_ADDR(sc, stripe_c, stripe, head); + GET_VALUE(sc, stripe_c, stripes, n_stripe); + fprintf(fp, " dev:"); + for (i = 0; i < n_stripe; i++) { + s = head + stripe_size * i; /* Get next stripe */ + + /* Get the devt of the stripe disk */ + GET_VALUE(s, stripe, dev, dm_dev); + GET_STR(dm_dev, dm_dev, name, buf, BUFSIZE); + + fprintf(fp, "%s%s", buf, i == n_stripe - 1 ? "" : ","); + } + if (i != n_stripe) + fprintf(fp, " ERROR: dev are less than n_stripe:%d", n_stripe); +} + +static void +stripe_show_status(unsigned long target) +{ + /* stripe target has no status */ + fprintf(fp, " No status info"); +} + +static void +stripe_show_queue(unsigned long target) +{ + /* stripe target has no queue */ + fprintf(fp, " No queue info"); +} + +static struct dminfo_target_analyzer stripe_analyzer = { + .target_name = "striped", + .ready = stripe_ready, + .show_table = stripe_show_table, + .show_status = stripe_show_status, + .show_queue = stripe_show_queue +}; + +/* + * snapshot target + */ +static int +snapshot_ready(void) +{ + static int debuginfo = 0; + + if (debuginfo) + return 1; + + if (STRUCT_EXISTS("struct dm_snapshot")) { + debuginfo = 1; + return 1; + } else + fprintf(fp, "No such struct info: dm_snapshot"); + + return 0; +} + +static void +snapshot_show_table(unsigned long target) +{ + unsigned long snap, orig_dev, cow_dev; + unsigned long long chunk_size; + char orig_name[BUFSIZE], cow_name[BUFSIZE], type; + + /* Get the address of struct dm_snapshot */ + GET_VALUE(target, dm_target, private, snap); + + /* Get snapshot parameters of the dm_snapshot */ + GET_VALUE(snap, dm_snapshot, origin, orig_dev); + GET_STR(orig_dev, dm_dev, name, orig_name, BUFSIZE); + GET_VALUE(snap, dm_snapshot, cow, cow_dev); + GET_STR(cow_dev, dm_dev, name, cow_name, BUFSIZE); + GET_VALUE(snap, dm_snapshot, type, type); + GET_VALUE(snap, dm_snapshot, chunk_size, chunk_size); + + fprintf(fp, " orig:%s cow:%s type:%c chunk_size:%llu", + orig_name, cow_name, type, chunk_size); +} + +static void +snapshot_show_status(unsigned long target) +{ + int valid; + unsigned long snap; + + /* Get the address of struct dm_snapshot */ + GET_VALUE(target, dm_target, private, snap); + + /* Get snapshot parameters of the dm_snapshot */ + GET_VALUE(snap, dm_snapshot, valid, valid); + + fprintf(fp, " vaild:%d", valid); +} + +static void +snapshot_show_queue(unsigned long target) +{ + fprintf(fp, " No queue info"); +} + +static struct dminfo_target_analyzer snapshot_analyzer = { + .target_name = "snapshot", + .ready = snapshot_ready, + .show_table = snapshot_show_table, + .show_status = snapshot_show_status, + .show_queue = snapshot_show_queue +}; + +/* + * snapshot-origin target + */ +static int +origin_ready(void) +{ + return 1; +} + +static void +origin_show_table(unsigned long target) +{ + unsigned long dm_dev; + char buf[BUFSIZE]; + + /* Get the name of the struct dm_dev */ + GET_VALUE(target, dm_target, private, dm_dev); + GET_STR(dm_dev, dm_dev, name, buf, BUFSIZE); + + fprintf(fp, " orig_dev:%s", buf); +} + +static void +origin_show_status(unsigned long target) +{ + /* snapshot-origin target has no status */ + fprintf(fp, " No status info"); +} + +static void +origin_show_queue(unsigned long target) +{ + /* snapshot-origin target has no queue */ + fprintf(fp, " No queue info"); +} + +static struct dminfo_target_analyzer snapshot_origin_analyzer = { + .target_name = "snapshot-origin", + .ready = origin_ready, + .show_table = origin_show_table, + .show_status = origin_show_status, + .show_queue = origin_show_queue +}; + +/* + * Core part of dminfo + */ +#define DMINFO_LIST 0 +#define DMINFO_DEPS 1 +#define DMINFO_TABLE 2 +#define DMINFO_STATUS 3 +#define DMINFO_QUEUE 4 + +static int +dm_core_ready(void) +{ + static int debuginfo = 0; + + if (debuginfo) + return 1; + + if (STRUCT_EXISTS("struct hash_cell")) { + debuginfo = 1; + return 1; + } else + fprintf(fp, "No such struct info: hash_cell\n"); + + return 0; +} + +/* Display dependency information of the 'table' */ +static void +dminfo_show_deps(unsigned long table) +{ + int major, minor, count; + unsigned long head, next, last, dev, bdev; + char buf[BUFSIZE]; + + /* head = dm_table.devices */ + GET_ADDR(table, dm_table, devices, head); + + fprintf(fp, " %-3s %-3s %-16s %-5s %s\n", + "MAJ", "MIN", "GENDISK", "COUNT", "DEVNAME"); + + list_for_each (next, head, last) { + /* Get dependency information. (next == struct *dm_dev) */ + GET_VALUE(next, dm_dev, count, count); + GET_VALUE(next, dm_dev, bdev, bdev); + GET_VALUE(bdev, block_device, bd_disk, dev); + GET_VALUE(dev, gendisk, major, major); + GET_VALUE(dev, gendisk, first_minor, minor); + GET_STR(dev, gendisk, disk_name, buf, BUFSIZE); + + fprintf(fp, " %-3d %-3d %-16lx %-5d %s\n", + major, minor, dev, count, buf); + } +} + +/* + * Display target specific information in the 'table', if the target + * analyzer is registered and available. + */ +static void +dminfo_show_details(unsigned long table, unsigned int num_targets, int info_type) +{ + unsigned int i; + unsigned long head, target_size, target, target_type; + struct dminfo_target_analyzer *ta; + char buf[BUFSIZE]; + + /* + * head = dm_table.targets. + * This is the head of struct dm_target array. + */ + GET_VALUE(table, dm_table, targets, head); + target_size = STRUCT_SIZE("struct dm_target"); + + fprintf(fp, " %-16s %-11s %s\n", + "TARGET", "TARGET_TYPE", "PRIVATE_DATA"); + + for (i = 0; i < num_targets; i++, fprintf(fp, "\n")) { + target = head + target_size * i; /* Get next target */ + + /* Get target information */ + GET_VALUE(target, dm_target, type, target_type); + GET_PTR_STR(target_type, target_type, name, buf, BUFSIZE); + + fprintf(fp, " %-16lx %-11s", target, buf); + + if (!(ta = find_target_analyzer(buf)) || !ta->ready + || !ta->ready()) + continue; + + switch (info_type) { + case DMINFO_TABLE: + if (ta->show_table) + ta->show_table(target); + break; + case DMINFO_STATUS: + if (ta->show_status) + ta->show_status(target); + break; + case DMINFO_QUEUE: + if (ta->show_queue) + ta->show_queue(target); + break; + default: + break; + } + } + + if (i != num_targets) + fprintf(fp, " ERROR: targets are less than num_targets:%d", + num_targets); +} + +/* + * Display lists (and detail information if specified) of existing + * dm devices. + */ +static void +dminfo_show_list(int additional_info) +{ + int i, major, minor, array_len; + unsigned int num_targets; + unsigned long _name_buckets, head, next, last, md, dev, table; + char buf[BUFSIZE]; + + _name_buckets = symbol_value("_name_buckets"); + array_len = get_array_length("_name_buckets", NULL, 0); + + if (additional_info == DMINFO_LIST) + fprintf(fp, "%-3s %-3s %-16s %-16s %-7s %s\n", + "MAJ", "MIN", "MAP_DEV", "DM_TABLE", + "TARGETS", "MAPNAME"); + + for (i = 0; i < array_len; i++) { + /* head = _name_buckets[i] */ + head = _name_buckets + (i * SIZE(list_head)); + + list_for_each (next, head, last) { /* next == hash_cell */ + /* Get device and table information */ + GET_PTR_STR(next, hash_cell, name, buf, BUFSIZE); + GET_VALUE(next, hash_cell, md, md); + GET_VALUE(md, mapped_device, disk, dev); + GET_VALUE(dev, gendisk, major, major); + GET_VALUE(dev, gendisk, first_minor, minor); + GET_VALUE(md, mapped_device, map, table); + GET_VALUE(table, dm_table, num_targets, num_targets); + + if (additional_info != DMINFO_LIST) + fprintf(fp, "%-3s %-3s %-16s %-16s %-7s %s\n", + "MAJ", "MIN", "MAP_DEV", "DM_TABLE", + "TARGETS", "MAPNAME"); + + fprintf(fp, "%-3d %-3d %-16lx %-16lx %-7d %s\n", + major, minor, md, table, num_targets, buf); + + switch(additional_info) { + case DMINFO_DEPS: + dminfo_show_deps(table); + break; + case DMINFO_TABLE: + case DMINFO_STATUS: + case DMINFO_QUEUE: + dminfo_show_details(table, num_targets, + additional_info); + break; + default: + break; + } + + if (additional_info != DMINFO_LIST) + fprintf(fp, "\n"); + } + } +} + +/* + * Display the original bio information for the 'bio'. + * If the 'bio' is for dm devices, the original bio information is pointed + * by bio.bi_private as struct target_io. + */ +static void +dminfo_show_bio(unsigned long bio) +{ + int major, minor; + unsigned long target_io, dm_io, dm_bio, md, dev; + char buf[BUFSIZE]; + + /* Get original bio and device information */ + GET_VALUE(bio, bio, bi_private, target_io); + GET_VALUE(target_io, target_io, io, dm_io); + GET_VALUE(dm_io, dm_io, bio, dm_bio); + GET_VALUE(dm_io, dm_io, md, md); + GET_VALUE(md, mapped_device, disk, dev); + GET_VALUE(dev, gendisk, major, major); + GET_VALUE(dev, gendisk, first_minor, minor); + GET_STR(dev, gendisk, disk_name, buf, BUFSIZE); + + fprintf(fp, "%-16s %-3s %-3s %-16s %s\n", + "DM_BIO_ADDRESS", "MAJ", "MIN", "MAP_DEV", "DEVNAME"); + fprintf(fp, "%-16lx %-3d %-3d %-16lx %s\n", + dm_bio, major, minor, md, buf); +} + +static void +cmd_dminfo(void) +{ + int c, additional_info = DMINFO_LIST; + unsigned long bio; + + if (!dm_core_ready()) + return; + + /* Parse command line option */ + while ((c = getopt(argcnt, args, "b:dlqst")) != EOF) { + switch(c) + { + case 'b': + bio = stol(optarg, FAULT_ON_ERROR, NULL); + dminfo_show_bio(bio); + return; + case 'd': + additional_info = DMINFO_DEPS; + break; + case 'l': + additional_info = DMINFO_LIST; + break; + case 'q': + additional_info = DMINFO_QUEUE; + break; + case 's': + additional_info = DMINFO_STATUS; + break; + case 't': + additional_info = DMINFO_TABLE; + break; + default: + argerrs++; + break; + } + } + + if (argerrs) + cmd_usage(pc->curcmd, SYNOPSIS); + + dminfo_show_list(additional_info); +} + +/* + * dminfo help + */ +static char *help_dminfo[] = { + "dminfo", /* command name */ + "device mapper (dm) information", /* short description */ + "[-b bio | -d | -l | -q | -s | -t]", /* argument synopsis */ + " This command displays information about device-mapper mapped ", + " devices (dm devices).", + " If no argument is entered, displays lists of existing dm devices.", + " It's same as -l option.", + "", + " -b bio displays the information of the dm device which the bio", + " is submitted in. If the bio isn't for dm devices,", + " results will be error.", + " -d displays dependency information for existing dm devices.", + " -l displays lists of existing dm devices.", + " -q displays queued I/O information for each target of", + " existing dm devices.", + " -s displays status information for each target of existing", + " dm devices.", + " -t displays table information for each target of existing", + " dm devices.", + "", + "EXAMPLE", + " Display lists of dm devices. \"MAP_DEV\" is the address of the", + " struct mapped_device. \"DM_TABLE\" is the address of the struct", + " dm_table. \"TARGETS\" is the number of targets which are in", + " the struct dm_table.", + "", + " %s> dminfo", + " MAJ MIN MAP_DEV DM_TABLE TARGETS MAPNAME", + " 253 8 c4866c80 c4866280 1 vg0-snap0", + " 253 6 f6a04a80 f6a04580 1 vg0-lv0-real", + " 253 0 c4840380 c4841880 1 mp0", + " 253 5 f7c50c80 c488e480 1 via_cbeheddbdd", + " 253 7 c4866a80 c4866380 1 vg0-snap0-cow", + " 253 4 d441e280 c919ed80 1 dummy1", + " 253 3 f5dc4280 cba81d80 1 dummy0", + " 253 2 f7c53180 c4866180 1 vg0-lv0", + " 253 1 f746d280 f746cd80 1 mp0p1", + "", + " Display the dm device information which the bio is submitted in.", + " The bio (ceacee80) is a clone of the bio (ceacee00) which is", + " submitted in the dm-3 (dummy0). And the bio (ceacee00) is a clone", + " of the bio (ceaced80) which is submitted in the dm-4 (dummy1), too.", + " The bio (ceaced80) is the original bio.", + "", + " %s> dminfo -b ceacee80", + " DM_BIO_ADDRESS MAJ MIN MAP_DEV DEVNAME", + " ceacee00 253 3 f5dc4280 dm-3", + " crash> dminfo -b ceacee00", + " DM_BIO_ADDRESS MAJ MIN MAP_DEV DEVNAME", + " ceaced80 253 4 d441e280 dm-4", + " crash> dminfo -b ceaced80", + " dminfo: invalid kernel virtual address: 64 type: \"GET_VALUE: dm_io.bio\"", + "", + " Display dependency information for each target.", + " The vg0-snap0 depends on thd dm-6 (vg0-lv0-real) and the dm-7", + " (vg0-snap0-cow)", + "", + " %s> dminfo -d", + " MAJ MIN MAP_DEV DM_TABLE TARGETS MAPNAME", + " 253 8 c4866c80 c4866280 1 vg0-snap0", + " MAJ MIN GENDISK COUNT DEVNAME", + " 253 7 c4866980 1 dm-7", + " 253 6 f6a04280 1 dm-6", + "", + " MAJ MIN MAP_DEV DM_TABLE TARGETS MAPNAME", + " 253 6 f6a04a80 f6a04580 1 vg0-lv0-real", + " MAJ MIN GENDISK COUNT DEVNAME", + " 8 0 f7f24c80 1 sda", + "", + " MAJ MIN MAP_DEV DM_TABLE TARGETS MAPNAME", + " 253 7 c4866a80 c4866380 1 vg0-snap0-cow", + " MAJ MIN GENDISK COUNT DEVNAME", + " 8 0 f7f24c80 1 sda", + "", + " MAJ MIN MAP_DEV DM_TABLE TARGETS MAPNAME", + " 253 2 f7c53180 c4866180 1 vg0-lv0", + " MAJ MIN GENDISK COUNT DEVNAME", + " 253 6 f6a04280 1 dm-6", + "", + " Display queued I/O information for each target.", + " The information is displayed under the \"PRIVATE_DATA\" column.", + "", + " %s> dminfo -q", + " MAJ MIN MAP_DEV DM_TABLE TARGETS MAPNAME", + " 253 5 f7c50c80 c488e480 1 via_cbeheddbdd", + " TARGET TARGET_TYPE PRIVATE_DATA", + " f8961080 mirror (reads) (writes) (quiesced) (recovered)", + "", + " --------------------------------------------------------------", + " \"reads/writes\" are members of the struct mirror_set, and", + " \"quiesced/recovered\" are members of the struct region_hash.", + " If the list is empty, the member is bracketed by \"()\".", + " --------------------------------------------------------------", + "", + " MAJ MIN MAP_DEV DM_TABLE TARGETS MAPNAME", + " 253 0 c4840380 c4841880 1 mp0", + " TARGET TARGET_TYPE PRIVATE_DATA", + " f8802080 multipath queue_size:0", + "", + " MAJ MIN MAP_DEV DM_TABLE TARGETS MAPNAME", + " 253 1 f746d280 f746cd80 1 mp0p1", + " TARGET TARGET_TYPE PRIVATE_DATA", + " f8821080 linear No queue info", + "", + " Display status information for each target.", + " The information is displayed under the \"PRIVATE_DATA\" column.", + "", + " %s> dminfo -s", + " MAJ MIN MAP_DEV DM_TABLE TARGETS MAPNAME", + " 253 0 c4840380 c4841880 1 mp0", + " TARGET TARGET_TYPE PRIVATE_DATA", + " f8802080 multipath queue_if_no_path:0 hwh:none nr_pgs:1", + " PG PG_STATUS NR_PATHS PATHS", + " 1 active 2 8:16(A,0) 8:32(A,0)", + "", + " --------------------------------------------------------------", + " Format of \"PATHS\": :(,)", + " Status: A:active, F:faulty", + " Fail_count: the value of the struct pgpath.fail_count", + " --------------------------------------------------------------", + "", + " MAJ MIN MAP_DEV DM_TABLE TARGETS MAPNAME", + " 253 5 f7c50c80 c488e480 1 via_cbeheddbdd", + " TARGET TARGET_TYPE PRIVATE_DATA", + " f8961080 mirror in_sync:1 dev:8:16(A,0),8:32(A,0)", + "", + " --------------------------------------------------------------", + " Format of \"dev\": :(,)", + " Status: A:active, D:degraded", + " Error_count: the value of the struct mirror.error_count", + " --------------------------------------------------------------", + "", + " MAJ MIN MAP_DEV DM_TABLE TARGETS MAPNAME", + " 253 1 f746d280 f746cd80 1 mp0p1", + " TARGET TARGET_TYPE PRIVATE_DATA", + " f8821080 linear No status info", + "", + " Display table information for each target.", + " The information is displayed under the \"PRIVATE_DATA\" column.", + "", + " %s> dminfo -t", + " MAJ MIN MAP_DEV DM_TABLE TARGETS MAPNAME", + " 253 8 c4866c80 c4866280 1 vg0-snap0", + " TARGET TARGET_TYPE PRIVATE_DATA", + " f89b4080 snapshot orig:253:6 cow:253:7 type:P chunk_size:16", + "", + " MAJ MIN MAP_DEV DM_TABLE TARGETS MAPNAME", + " 253 6 f6a04a80 f6a04580 1 vg0-lv0-real", + " TARGET TARGET_TYPE PRIVATE_DATA", + " f890f080 linear begin:0 len:204800 dev:8:5 offset:384", + "", + " MAJ MIN MAP_DEV DM_TABLE TARGETS MAPNAME", + " 253 0 c4840380 c4841880 1 mp0", + " TARGET TARGET_TYPE PRIVATE_DATA", + " f8802080 multipath queue_if_no_path:0 hwh:none nr_pgs:1", + " PG PATH_SELECTOR NR_PATHS PATHS", + " 1 round-robin 2 8:16 8:32", + "", + " MAJ MIN MAP_DEV DM_TABLE TARGETS MAPNAME", + " 253 5 f7c50c80 c488e480 1 via_cbeheddbdd", + " TARGET TARGET_TYPE PRIVATE_DATA", + " f8961080 mirror log:core dev:8:16(0),8:32(0)", + "", + " --------------------------------------------------------------", + " Format of \"dev\": :()", + " Offset: the value of the struct mirror.offset", + " --------------------------------------------------------------", + "", + " MAJ MIN MAP_DEV DM_TABLE TARGETS MAPNAME", + " 253 7 c4866a80 c4866380 1 vg0-snap0-cow", + " TARGET TARGET_TYPE PRIVATE_DATA", + " f899d080 linear begin:0 len:8192 dev:8:5 offset:205184", + "", + " MAJ MIN MAP_DEV DM_TABLE TARGETS MAPNAME", + " 253 2 f7c53180 c4866180 1 vg0-lv0", + " TARGET TARGET_TYPE PRIVATE_DATA", + " f8bbc080 snapshot-origin orig_dev:253:6", + "", + " MAJ MIN MAP_DEV DM_TABLE TARGETS MAPNAME", + " 253 1 f746d280 f746cd80 1 mp0p1", + " TARGET TARGET_TYPE PRIVATE_DATA", + " f8821080 linear begin:0 len:2040192 dev:253:0 offset:63", + NULL +}; + +/* + * Registering command extension + */ + +static struct command_table_entry command_table[] = { + {"dminfo", cmd_dminfo, help_dminfo, 0}, + {NULL, NULL, NULL, 0}, +}; + +int _init() +{ + register_extension(command_table); + + dminfo_register_target_analyzer(&zero_analyzer); + dminfo_register_target_analyzer(&error_analyzer); + dminfo_register_target_analyzer(&linear_analyzer); + dminfo_register_target_analyzer(&mirror_analyzer); + dminfo_register_target_analyzer(&multipath_analyzer); + dminfo_register_target_analyzer(&crypt_analyzer); + dminfo_register_target_analyzer(&stripe_analyzer); + dminfo_register_target_analyzer(&snapshot_analyzer); + dminfo_register_target_analyzer(&snapshot_origin_analyzer); + + return 0; +} + +int _fini() +{ + return 0; +} --- crash/gdb-6.1/gdb/ppc-linux-tdep.c.orig 2006-05-15 17:21:18.000000000 -0400 +++ crash/gdb-6.1/gdb/ppc-linux-tdep.c 2005-11-04 17:37:54.000000000 -0500 @@ -0,0 +1,1116 @@ +/* Target-dependent code for GDB, the GNU debugger. + + Copyright 1986, 1987, 1989, 1991, 1992, 1993, 1994, 1995, 1996, + 1997, 2000, 2001, 2002, 2003 Free Software Foundation, Inc. + Copyright (c) 2004, 2005 Red Hat, Inc. All rights reserved. + + This file is part of GDB. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +#include "defs.h" +#include "frame.h" +#include "inferior.h" +#include "symtab.h" +#include "target.h" +#include "gdbcore.h" +#include "gdbcmd.h" +#include "symfile.h" +#include "objfiles.h" +#include "regcache.h" +#include "value.h" +#include "osabi.h" + +#include "solib-svr4.h" +#include "ppc-tdep.h" + +/* The following instructions are used in the signal trampoline code + on GNU/Linux PPC. The kernel used to use magic syscalls 0x6666 and + 0x7777 but now uses the sigreturn syscalls. We check for both. */ +#define INSTR_LI_R0_0x6666 0x38006666 +#define INSTR_LI_R0_0x7777 0x38007777 +#define INSTR_LI_R0_NR_sigreturn 0x38000077 +#define INSTR_LI_R0_NR_rt_sigreturn 0x380000AC + +#define INSTR_SC 0x44000002 + +/* Since the *-tdep.c files are platform independent (i.e, they may be + used to build cross platform debuggers), we can't include system + headers. Therefore, details concerning the sigcontext structure + must be painstakingly rerecorded. What's worse, if these details + ever change in the header files, they'll have to be changed here + as well. */ + +/* __SIGNAL_FRAMESIZE from */ +#define PPC_LINUX_SIGNAL_FRAMESIZE 64 + +/* From , offsetof(struct sigcontext_struct, regs) == 0x1c */ +#define PPC_LINUX_REGS_PTR_OFFSET (PPC_LINUX_SIGNAL_FRAMESIZE + 0x1c) + +/* From , + offsetof(struct sigcontext_struct, handler) == 0x14 */ +#define PPC_LINUX_HANDLER_PTR_OFFSET (PPC_LINUX_SIGNAL_FRAMESIZE + 0x14) + +/* From , values for PT_NIP, PT_R1, and PT_LNK */ +#define PPC_LINUX_PT_R0 0 +#define PPC_LINUX_PT_R1 1 +#define PPC_LINUX_PT_R2 2 +#define PPC_LINUX_PT_R3 3 +#define PPC_LINUX_PT_R4 4 +#define PPC_LINUX_PT_R5 5 +#define PPC_LINUX_PT_R6 6 +#define PPC_LINUX_PT_R7 7 +#define PPC_LINUX_PT_R8 8 +#define PPC_LINUX_PT_R9 9 +#define PPC_LINUX_PT_R10 10 +#define PPC_LINUX_PT_R11 11 +#define PPC_LINUX_PT_R12 12 +#define PPC_LINUX_PT_R13 13 +#define PPC_LINUX_PT_R14 14 +#define PPC_LINUX_PT_R15 15 +#define PPC_LINUX_PT_R16 16 +#define PPC_LINUX_PT_R17 17 +#define PPC_LINUX_PT_R18 18 +#define PPC_LINUX_PT_R19 19 +#define PPC_LINUX_PT_R20 20 +#define PPC_LINUX_PT_R21 21 +#define PPC_LINUX_PT_R22 22 +#define PPC_LINUX_PT_R23 23 +#define PPC_LINUX_PT_R24 24 +#define PPC_LINUX_PT_R25 25 +#define PPC_LINUX_PT_R26 26 +#define PPC_LINUX_PT_R27 27 +#define PPC_LINUX_PT_R28 28 +#define PPC_LINUX_PT_R29 29 +#define PPC_LINUX_PT_R30 30 +#define PPC_LINUX_PT_R31 31 +#define PPC_LINUX_PT_NIP 32 +#define PPC_LINUX_PT_MSR 33 +#define PPC_LINUX_PT_CTR 35 +#define PPC_LINUX_PT_LNK 36 +#define PPC_LINUX_PT_XER 37 +#define PPC_LINUX_PT_CCR 38 +#define PPC_LINUX_PT_MQ 39 +#define PPC_LINUX_PT_FPR0 48 /* each FP reg occupies 2 slots in this space */ +#define PPC_LINUX_PT_FPR31 (PPC_LINUX_PT_FPR0 + 2*31) +#define PPC_LINUX_PT_FPSCR (PPC_LINUX_PT_FPR0 + 2*32 + 1) + +static int ppc_linux_at_sigtramp_return_path (CORE_ADDR pc); + +/* Determine if pc is in a signal trampoline... + + Ha! That's not what this does at all. wait_for_inferior in + infrun.c calls PC_IN_SIGTRAMP in order to detect entry into a + signal trampoline just after delivery of a signal. But on + GNU/Linux, signal trampolines are used for the return path only. + The kernel sets things up so that the signal handler is called + directly. + + If we use in_sigtramp2() in place of in_sigtramp() (see below) + we'll (often) end up with stop_pc in the trampoline and prev_pc in + the (now exited) handler. The code there will cause a temporary + breakpoint to be set on prev_pc which is not very likely to get hit + again. + + If this is confusing, think of it this way... the code in + wait_for_inferior() needs to be able to detect entry into a signal + trampoline just after a signal is delivered, not after the handler + has been run. + + So, we define in_sigtramp() below to return 1 if the following is + true: + + 1) The previous frame is a real signal trampoline. + + - and - + + 2) pc is at the first or second instruction of the corresponding + handler. + + Why the second instruction? It seems that wait_for_inferior() + never sees the first instruction when single stepping. When a + signal is delivered while stepping, the next instruction that + would've been stepped over isn't, instead a signal is delivered and + the first instruction of the handler is stepped over instead. That + puts us on the second instruction. (I added the test for the + first instruction long after the fact, just in case the observed + behavior is ever fixed.) + + PC_IN_SIGTRAMP is called from blockframe.c as well in order to set + the frame's type (if a SIGTRAMP_FRAME). Because of our strange + definition of in_sigtramp below, we can't rely on the frame's type + getting set correctly from within blockframe.c. This is why we + take pains to set it in init_extra_frame_info(). + + NOTE: cagney/2002-11-10: I suspect the real problem here is that + the get_prev_frame() only initializes the frame's type after the + call to INIT_FRAME_INFO. get_prev_frame() should be fixed, this + code shouldn't be working its way around a bug :-(. */ + +int +ppc_linux_in_sigtramp (CORE_ADDR pc, char *func_name) +{ + CORE_ADDR lr; + CORE_ADDR sp; + CORE_ADDR tramp_sp; + char buf[4]; + CORE_ADDR handler; + + lr = read_register (gdbarch_tdep (current_gdbarch)->ppc_lr_regnum); + if (!ppc_linux_at_sigtramp_return_path (lr)) + return 0; + + sp = read_register (SP_REGNUM); + + if (target_read_memory (sp, buf, sizeof (buf)) != 0) + return 0; + + tramp_sp = extract_unsigned_integer (buf, 4); + + if (target_read_memory (tramp_sp + PPC_LINUX_HANDLER_PTR_OFFSET, buf, + sizeof (buf)) != 0) + return 0; + + handler = extract_unsigned_integer (buf, 4); + + return (pc == handler || pc == handler + 4); +} + +static int +insn_is_sigreturn (unsigned long pcinsn) +{ + switch(pcinsn) + { + case INSTR_LI_R0_0x6666: + case INSTR_LI_R0_0x7777: + case INSTR_LI_R0_NR_sigreturn: + case INSTR_LI_R0_NR_rt_sigreturn: + return 1; + default: + return 0; + } +} + +/* + * The signal handler trampoline is on the stack and consists of exactly + * two instructions. The easiest and most accurate way of determining + * whether the pc is in one of these trampolines is by inspecting the + * instructions. It'd be faster though if we could find a way to do this + * via some simple address comparisons. + */ +static int +ppc_linux_at_sigtramp_return_path (CORE_ADDR pc) +{ + char buf[12]; + unsigned long pcinsn; + if (target_read_memory (pc - 4, buf, sizeof (buf)) != 0) + return 0; + + /* extract the instruction at the pc */ + pcinsn = extract_unsigned_integer (buf + 4, 4); + + return ( + (insn_is_sigreturn (pcinsn) + && extract_unsigned_integer (buf + 8, 4) == INSTR_SC) + || + (pcinsn == INSTR_SC + && insn_is_sigreturn (extract_unsigned_integer (buf, 4)))); +} + +static CORE_ADDR +ppc_linux_skip_trampoline_code (CORE_ADDR pc) +{ + char buf[4]; + struct obj_section *sect; + struct objfile *objfile; + unsigned long insn; + CORE_ADDR plt_start = 0; + CORE_ADDR symtab = 0; + CORE_ADDR strtab = 0; + int num_slots = -1; + int reloc_index = -1; + CORE_ADDR plt_table; + CORE_ADDR reloc; + CORE_ADDR sym; + long symidx; + char symname[1024]; + struct minimal_symbol *msymbol; + + /* Find the section pc is in; return if not in .plt */ + sect = find_pc_section (pc); + if (!sect || strcmp (sect->the_bfd_section->name, ".plt") != 0) + return 0; + + objfile = sect->objfile; + + /* Pick up the instruction at pc. It had better be of the + form + li r11, IDX + + where IDX is an index into the plt_table. */ + + if (target_read_memory (pc, buf, 4) != 0) + return 0; + insn = extract_unsigned_integer (buf, 4); + + if ((insn & 0xffff0000) != 0x39600000 /* li r11, VAL */ ) + return 0; + + reloc_index = (insn << 16) >> 16; + + /* Find the objfile that pc is in and obtain the information + necessary for finding the symbol name. */ + for (sect = objfile->sections; sect < objfile->sections_end; ++sect) + { + const char *secname = sect->the_bfd_section->name; + if (strcmp (secname, ".plt") == 0) + plt_start = sect->addr; + else if (strcmp (secname, ".rela.plt") == 0) + num_slots = ((int) sect->endaddr - (int) sect->addr) / 12; + else if (strcmp (secname, ".dynsym") == 0) + symtab = sect->addr; + else if (strcmp (secname, ".dynstr") == 0) + strtab = sect->addr; + } + + /* Make sure we have all the information we need. */ + if (plt_start == 0 || num_slots == -1 || symtab == 0 || strtab == 0) + return 0; + + /* Compute the value of the plt table */ + plt_table = plt_start + 72 + 8 * num_slots; + + /* Get address of the relocation entry (Elf32_Rela) */ + if (target_read_memory (plt_table + reloc_index, buf, 4) != 0) + return 0; + reloc = extract_unsigned_integer (buf, 4); + + sect = find_pc_section (reloc); + if (!sect) + return 0; + + if (strcmp (sect->the_bfd_section->name, ".text") == 0) + return reloc; + + /* Now get the r_info field which is the relocation type and symbol + index. */ + if (target_read_memory (reloc + 4, buf, 4) != 0) + return 0; + symidx = extract_unsigned_integer (buf, 4); + + /* Shift out the relocation type leaving just the symbol index */ + /* symidx = ELF32_R_SYM(symidx); */ + symidx = symidx >> 8; + + /* compute the address of the symbol */ + sym = symtab + symidx * 4; + + /* Fetch the string table index */ + if (target_read_memory (sym, buf, 4) != 0) + return 0; + symidx = extract_unsigned_integer (buf, 4); + + /* Fetch the string; we don't know how long it is. Is it possible + that the following will fail because we're trying to fetch too + much? */ + if (target_read_memory (strtab + symidx, symname, sizeof (symname)) != 0) + return 0; + + /* This might not work right if we have multiple symbols with the + same name; the only way to really get it right is to perform + the same sort of lookup as the dynamic linker. */ + msymbol = lookup_minimal_symbol_text (symname, NULL); + if (!msymbol) + return 0; + + return SYMBOL_VALUE_ADDRESS (msymbol); +} + +/* The rs6000 version of FRAME_SAVED_PC will almost work for us. The + signal handler details are different, so we'll handle those here + and call the rs6000 version to do the rest. */ +CORE_ADDR +ppc_linux_frame_saved_pc (struct frame_info *fi) +{ + if ((get_frame_type (fi) == SIGTRAMP_FRAME)) + { + CORE_ADDR regs_addr = + read_memory_integer (get_frame_base (fi) + + PPC_LINUX_REGS_PTR_OFFSET, 4); + /* return the NIP in the regs array */ + return read_memory_integer (regs_addr + 4 * PPC_LINUX_PT_NIP, 4); + } + else if (get_next_frame (fi) + && (get_frame_type (get_next_frame (fi)) == SIGTRAMP_FRAME)) + { + CORE_ADDR regs_addr = + read_memory_integer (get_frame_base (get_next_frame (fi)) + + PPC_LINUX_REGS_PTR_OFFSET, 4); + /* return LNK in the regs array */ + return read_memory_integer (regs_addr + 4 * PPC_LINUX_PT_LNK, 4); + } + else + return rs6000_frame_saved_pc (fi); +} + +void +ppc_linux_init_extra_frame_info (int fromleaf, struct frame_info *fi) +{ + rs6000_init_extra_frame_info (fromleaf, fi); + + if (get_next_frame (fi) != 0) + { + /* We're called from get_prev_frame_info; check to see if + this is a signal frame by looking to see if the pc points + at trampoline code */ + if (ppc_linux_at_sigtramp_return_path (get_frame_pc (fi))) + deprecated_set_frame_type (fi, SIGTRAMP_FRAME); + else + /* FIXME: cagney/2002-11-10: Is this double bogus? What + happens if the frame has previously been marked as a dummy? */ + deprecated_set_frame_type (fi, NORMAL_FRAME); + } +} + +int +ppc_linux_frameless_function_invocation (struct frame_info *fi) +{ + /* We'll find the wrong thing if we let + rs6000_frameless_function_invocation () search for a signal trampoline */ + if (ppc_linux_at_sigtramp_return_path (get_frame_pc (fi))) + return 0; + else + return rs6000_frameless_function_invocation (fi); +} + +void +ppc_linux_frame_init_saved_regs (struct frame_info *fi) +{ + if ((get_frame_type (fi) == SIGTRAMP_FRAME)) + { + CORE_ADDR regs_addr; + int i; + if (deprecated_get_frame_saved_regs (fi)) + return; + + frame_saved_regs_zalloc (fi); + + regs_addr = + read_memory_integer (get_frame_base (fi) + + PPC_LINUX_REGS_PTR_OFFSET, 4); + deprecated_get_frame_saved_regs (fi)[PC_REGNUM] = regs_addr + 4 * PPC_LINUX_PT_NIP; + deprecated_get_frame_saved_regs (fi)[gdbarch_tdep (current_gdbarch)->ppc_ps_regnum] = + regs_addr + 4 * PPC_LINUX_PT_MSR; + deprecated_get_frame_saved_regs (fi)[gdbarch_tdep (current_gdbarch)->ppc_cr_regnum] = + regs_addr + 4 * PPC_LINUX_PT_CCR; + deprecated_get_frame_saved_regs (fi)[gdbarch_tdep (current_gdbarch)->ppc_lr_regnum] = + regs_addr + 4 * PPC_LINUX_PT_LNK; + deprecated_get_frame_saved_regs (fi)[gdbarch_tdep (current_gdbarch)->ppc_ctr_regnum] = + regs_addr + 4 * PPC_LINUX_PT_CTR; + deprecated_get_frame_saved_regs (fi)[gdbarch_tdep (current_gdbarch)->ppc_xer_regnum] = + regs_addr + 4 * PPC_LINUX_PT_XER; + deprecated_get_frame_saved_regs (fi)[gdbarch_tdep (current_gdbarch)->ppc_mq_regnum] = + regs_addr + 4 * PPC_LINUX_PT_MQ; + for (i = 0; i < 32; i++) + deprecated_get_frame_saved_regs (fi)[gdbarch_tdep (current_gdbarch)->ppc_gp0_regnum + i] = + regs_addr + 4 * PPC_LINUX_PT_R0 + 4 * i; + for (i = 0; i < 32; i++) + deprecated_get_frame_saved_regs (fi)[FP0_REGNUM + i] = regs_addr + 4 * PPC_LINUX_PT_FPR0 + 8 * i; + } + else + rs6000_frame_init_saved_regs (fi); +} + +CORE_ADDR +ppc_linux_frame_chain (struct frame_info *thisframe) +{ + /* Kernel properly constructs the frame chain for the handler */ + if ((get_frame_type (thisframe) == SIGTRAMP_FRAME)) + return read_memory_integer (get_frame_base (thisframe), 4); + else + return rs6000_frame_chain (thisframe); +} + +/* ppc_linux_memory_remove_breakpoints attempts to remove a breakpoint + in much the same fashion as memory_remove_breakpoint in mem-break.c, + but is careful not to write back the previous contents if the code + in question has changed in between inserting the breakpoint and + removing it. + + Here is the problem that we're trying to solve... + + Once upon a time, before introducing this function to remove + breakpoints from the inferior, setting a breakpoint on a shared + library function prior to running the program would not work + properly. In order to understand the problem, it is first + necessary to understand a little bit about dynamic linking on + this platform. + + A call to a shared library function is accomplished via a bl + (branch-and-link) instruction whose branch target is an entry + in the procedure linkage table (PLT). The PLT in the object + file is uninitialized. To gdb, prior to running the program, the + entries in the PLT are all zeros. + + Once the program starts running, the shared libraries are loaded + and the procedure linkage table is initialized, but the entries in + the table are not (necessarily) resolved. Once a function is + actually called, the code in the PLT is hit and the function is + resolved. In order to better illustrate this, an example is in + order; the following example is from the gdb testsuite. + + We start the program shmain. + + [kev@arroyo testsuite]$ ../gdb gdb.base/shmain + [...] + + We place two breakpoints, one on shr1 and the other on main. + + (gdb) b shr1 + Breakpoint 1 at 0x100409d4 + (gdb) b main + Breakpoint 2 at 0x100006a0: file gdb.base/shmain.c, line 44. + + Examine the instruction (and the immediatly following instruction) + upon which the breakpoint was placed. Note that the PLT entry + for shr1 contains zeros. + + (gdb) x/2i 0x100409d4 + 0x100409d4 : .long 0x0 + 0x100409d8 : .long 0x0 + + Now run 'til main. + + (gdb) r + Starting program: gdb.base/shmain + Breakpoint 1 at 0xffaf790: file gdb.base/shr1.c, line 19. + + Breakpoint 2, main () + at gdb.base/shmain.c:44 + 44 g = 1; + + Examine the PLT again. Note that the loading of the shared + library has initialized the PLT to code which loads a constant + (which I think is an index into the GOT) into r11 and then + branchs a short distance to the code which actually does the + resolving. + + (gdb) x/2i 0x100409d4 + 0x100409d4 : li r11,4 + 0x100409d8 : b 0x10040984 + (gdb) c + Continuing. + + Breakpoint 1, shr1 (x=1) + at gdb.base/shr1.c:19 + 19 l = 1; + + Now we've hit the breakpoint at shr1. (The breakpoint was + reset from the PLT entry to the actual shr1 function after the + shared library was loaded.) Note that the PLT entry has been + resolved to contain a branch that takes us directly to shr1. + (The real one, not the PLT entry.) + + (gdb) x/2i 0x100409d4 + 0x100409d4 : b 0xffaf76c + 0x100409d8 : b 0x10040984 + + The thing to note here is that the PLT entry for shr1 has been + changed twice. + + Now the problem should be obvious. GDB places a breakpoint (a + trap instruction) on the zero value of the PLT entry for shr1. + Later on, after the shared library had been loaded and the PLT + initialized, GDB gets a signal indicating this fact and attempts + (as it always does when it stops) to remove all the breakpoints. + + The breakpoint removal was causing the former contents (a zero + word) to be written back to the now initialized PLT entry thus + destroying a portion of the initialization that had occurred only a + short time ago. When execution continued, the zero word would be + executed as an instruction an an illegal instruction trap was + generated instead. (0 is not a legal instruction.) + + The fix for this problem was fairly straightforward. The function + memory_remove_breakpoint from mem-break.c was copied to this file, + modified slightly, and renamed to ppc_linux_memory_remove_breakpoint. + In tm-linux.h, MEMORY_REMOVE_BREAKPOINT is defined to call this new + function. + + The differences between ppc_linux_memory_remove_breakpoint () and + memory_remove_breakpoint () are minor. All that the former does + that the latter does not is check to make sure that the breakpoint + location actually contains a breakpoint (trap instruction) prior + to attempting to write back the old contents. If it does contain + a trap instruction, we allow the old contents to be written back. + Otherwise, we silently do nothing. + + The big question is whether memory_remove_breakpoint () should be + changed to have the same functionality. The downside is that more + traffic is generated for remote targets since we'll have an extra + fetch of a memory word each time a breakpoint is removed. + + For the time being, we'll leave this self-modifying-code-friendly + version in ppc-linux-tdep.c, but it ought to be migrated somewhere + else in the event that some other platform has similar needs with + regard to removing breakpoints in some potentially self modifying + code. */ +int +ppc_linux_memory_remove_breakpoint (CORE_ADDR addr, char *contents_cache) +{ + const unsigned char *bp; + int val; + int bplen; + char old_contents[BREAKPOINT_MAX]; + + /* Determine appropriate breakpoint contents and size for this address. */ + bp = BREAKPOINT_FROM_PC (&addr, &bplen); + if (bp == NULL) + error ("Software breakpoints not implemented for this target."); + + val = target_read_memory (addr, old_contents, bplen); + + /* If our breakpoint is no longer at the address, this means that the + program modified the code on us, so it is wrong to put back the + old value */ + if (val == 0 && memcmp (bp, old_contents, bplen) == 0) + val = target_write_memory (addr, contents_cache, bplen); + + return val; +} + +/* For historic reasons, PPC 32 GNU/Linux follows PowerOpen rather + than the 32 bit SYSV R4 ABI structure return convention - all + structures, no matter their size, are put in memory. Vectors, + which were added later, do get returned in a register though. */ + +static enum return_value_convention +ppc_linux_return_value (struct gdbarch *gdbarch, struct type *valtype, + struct regcache *regcache, void *readbuf, + const void *writebuf) +{ + if ((TYPE_CODE (valtype) == TYPE_CODE_STRUCT + || TYPE_CODE (valtype) == TYPE_CODE_UNION) + && !((TYPE_LENGTH (valtype) == 16 || TYPE_LENGTH (valtype) == 8) + && TYPE_VECTOR (valtype))) + return RETURN_VALUE_STRUCT_CONVENTION; + else + return ppc_sysv_abi_return_value (gdbarch, valtype, regcache, readbuf, + writebuf); +} + +/* Fetch (and possibly build) an appropriate link_map_offsets + structure for GNU/Linux PPC targets using the struct offsets + defined in link.h (but without actual reference to that file). + + This makes it possible to access GNU/Linux PPC shared libraries + from a GDB that was not built on an GNU/Linux PPC host (for cross + debugging). */ + +struct link_map_offsets * +ppc_linux_svr4_fetch_link_map_offsets (void) +{ + static struct link_map_offsets lmo; + static struct link_map_offsets *lmp = NULL; + + if (lmp == NULL) + { + lmp = &lmo; + + lmo.r_debug_size = 8; /* The actual size is 20 bytes, but + this is all we need. */ + lmo.r_map_offset = 4; + lmo.r_map_size = 4; + + lmo.link_map_size = 20; /* The actual size is 560 bytes, but + this is all we need. */ + lmo.l_addr_offset = 0; + lmo.l_addr_size = 4; + + lmo.l_name_offset = 4; + lmo.l_name_size = 4; + + lmo.l_next_offset = 12; + lmo.l_next_size = 4; + + lmo.l_prev_offset = 16; + lmo.l_prev_size = 4; + } + + return lmp; +} + + +/* Macros for matching instructions. Note that, since all the + operands are masked off before they're or-ed into the instruction, + you can use -1 to make masks. */ + +#define insn_d(opcd, rts, ra, d) \ + ((((opcd) & 0x3f) << 26) \ + | (((rts) & 0x1f) << 21) \ + | (((ra) & 0x1f) << 16) \ + | ((d) & 0xffff)) + +#define insn_ds(opcd, rts, ra, d, xo) \ + ((((opcd) & 0x3f) << 26) \ + | (((rts) & 0x1f) << 21) \ + | (((ra) & 0x1f) << 16) \ + | ((d) & 0xfffc) \ + | ((xo) & 0x3)) + +#define insn_xfx(opcd, rts, spr, xo) \ + ((((opcd) & 0x3f) << 26) \ + | (((rts) & 0x1f) << 21) \ + | (((spr) & 0x1f) << 16) \ + | (((spr) & 0x3e0) << 6) \ + | (((xo) & 0x3ff) << 1)) + +/* Read a PPC instruction from memory. PPC instructions are always + big-endian, no matter what endianness the program is running in, so + we can't use read_memory_integer or one of its friends here. */ +static unsigned int +read_insn (CORE_ADDR pc) +{ + unsigned char buf[4]; + + read_memory (pc, buf, 4); + return (buf[0] << 24) | (buf[1] << 16) | (buf[2] << 8) | buf[3]; +} + + +/* An instruction to match. */ +struct insn_pattern +{ + unsigned int mask; /* mask the insn with this... */ + unsigned int data; /* ...and see if it matches this. */ + int optional; /* If non-zero, this insn may be absent. */ +}; + +/* Return non-zero if the instructions at PC match the series + described in PATTERN, or zero otherwise. PATTERN is an array of + 'struct insn_pattern' objects, terminated by an entry whose mask is + zero. + + When the match is successful, fill INSN[i] with what PATTERN[i] + matched. If PATTERN[i] is optional, and the instruction wasn't + present, set INSN[i] to 0 (which is not a valid PPC instruction). + INSN should have as many elements as PATTERN. Note that, if + PATTERN contains optional instructions which aren't present in + memory, then INSN will have holes, so INSN[i] isn't necessarily the + i'th instruction in memory. */ +static int +insns_match_pattern (CORE_ADDR pc, + struct insn_pattern *pattern, + unsigned int *insn) +{ + int i; + + for (i = 0; pattern[i].mask; i++) + { + insn[i] = read_insn (pc); + if ((insn[i] & pattern[i].mask) == pattern[i].data) + pc += 4; + else if (pattern[i].optional) + insn[i] = 0; + else + return 0; + } + + return 1; +} + + +/* Return the 'd' field of the d-form instruction INSN, properly + sign-extended. */ +static CORE_ADDR +insn_d_field (unsigned int insn) +{ + return ((((CORE_ADDR) insn & 0xffff) ^ 0x8000) - 0x8000); +} + + +/* Return the 'ds' field of the ds-form instruction INSN, with the two + zero bits concatenated at the right, and properly + sign-extended. */ +static CORE_ADDR +insn_ds_field (unsigned int insn) +{ + return ((((CORE_ADDR) insn & 0xfffc) ^ 0x8000) - 0x8000); +} + + +/* If DESC is the address of a 64-bit PowerPC GNU/Linux function + descriptor, return the descriptor's entry point. */ +static CORE_ADDR +ppc64_desc_entry_point (CORE_ADDR desc) +{ + /* The first word of the descriptor is the entry point. */ + return (CORE_ADDR) read_memory_unsigned_integer (desc, 8); +} + + +/* Pattern for the standard linkage function. These are built by + build_plt_stub in elf64-ppc.c, whose GLINK argument is always + zero. */ +static struct insn_pattern ppc64_standard_linkage[] = + { + /* addis r12, r2, */ + { insn_d (-1, -1, -1, 0), insn_d (15, 12, 2, 0), 0 }, + + /* std r2, 40(r1) */ + { -1, insn_ds (62, 2, 1, 40, 0), 0 }, + + /* ld r11, (r12) */ + { insn_ds (-1, -1, -1, 0, -1), insn_ds (58, 11, 12, 0, 0), 0 }, + + /* addis r12, r12, 1 */ + { insn_d (-1, -1, -1, -1), insn_d (15, 12, 2, 1), 1 }, + + /* ld r2, (r12) */ + { insn_ds (-1, -1, -1, 0, -1), insn_ds (58, 2, 12, 0, 0), 0 }, + + /* addis r12, r12, 1 */ + { insn_d (-1, -1, -1, -1), insn_d (15, 12, 2, 1), 1 }, + + /* mtctr r11 */ + { insn_xfx (-1, -1, -1, -1), insn_xfx (31, 11, 9, 467), + 0 }, + + /* ld r11, (r12) */ + { insn_ds (-1, -1, -1, 0, -1), insn_ds (58, 11, 12, 0, 0), 0 }, + + /* bctr */ + { -1, 0x4e800420, 0 }, + + { 0, 0, 0 } + }; +#define PPC64_STANDARD_LINKAGE_LEN \ + (sizeof (ppc64_standard_linkage) / sizeof (ppc64_standard_linkage[0])) + + +/* Recognize a 64-bit PowerPC GNU/Linux linkage function --- what GDB + calls a "solib trampoline". */ +static int +ppc64_in_solib_call_trampoline (CORE_ADDR pc, char *name) +{ + /* Detecting solib call trampolines on PPC64 GNU/Linux is a pain. + + It's not specifically solib call trampolines that are the issue. + Any call from one function to another function that uses a + different TOC requires a trampoline, to save the caller's TOC + pointer and then load the callee's TOC. An executable or shared + library may have more than one TOC, so even intra-object calls + may require a trampoline. Since executable and shared libraries + will all have their own distinct TOCs, every inter-object call is + also an inter-TOC call, and requires a trampoline --- so "solib + call trampolines" are just a special case. + + The 64-bit PowerPC GNU/Linux ABI calls these call trampolines + "linkage functions". Since they need to be near the functions + that call them, they all appear in .text, not in any special + section. The .plt section just contains an array of function + descriptors, from which the linkage functions load the callee's + entry point, TOC value, and environment pointer. So + in_plt_section is useless. The linkage functions don't have any + special linker symbols to name them, either. + + The only way I can see to recognize them is to actually look at + their code. They're generated by ppc_build_one_stub and some + other functions in bfd/elf64-ppc.c, so that should show us all + the instruction sequences we need to recognize. */ + unsigned int insn[PPC64_STANDARD_LINKAGE_LEN]; + + return insns_match_pattern (pc, ppc64_standard_linkage, insn); +} + + +/* When the dynamic linker is doing lazy symbol resolution, the first + call to a function in another object will go like this: + + - The user's function calls the linkage function: + + 100007c4: 4b ff fc d5 bl 10000498 + 100007c8: e8 41 00 28 ld r2,40(r1) + + - The linkage function loads the entry point (and other stuff) from + the function descriptor in the PLT, and jumps to it: + + 10000498: 3d 82 00 00 addis r12,r2,0 + 1000049c: f8 41 00 28 std r2,40(r1) + 100004a0: e9 6c 80 98 ld r11,-32616(r12) + 100004a4: e8 4c 80 a0 ld r2,-32608(r12) + 100004a8: 7d 69 03 a6 mtctr r11 + 100004ac: e9 6c 80 a8 ld r11,-32600(r12) + 100004b0: 4e 80 04 20 bctr + + - But since this is the first time that PLT entry has been used, it + sends control to its glink entry. That loads the number of the + PLT entry and jumps to the common glink0 code: + + 10000c98: 38 00 00 00 li r0,0 + 10000c9c: 4b ff ff dc b 10000c78 + + - The common glink0 code then transfers control to the dynamic + linker's fixup code: + + 10000c78: e8 41 00 28 ld r2,40(r1) + 10000c7c: 3d 82 00 00 addis r12,r2,0 + 10000c80: e9 6c 80 80 ld r11,-32640(r12) + 10000c84: e8 4c 80 88 ld r2,-32632(r12) + 10000c88: 7d 69 03 a6 mtctr r11 + 10000c8c: e9 6c 80 90 ld r11,-32624(r12) + 10000c90: 4e 80 04 20 bctr + + Eventually, this code will figure out how to skip all of this, + including the dynamic linker. At the moment, we just get through + the linkage function. */ + +/* If the current thread is about to execute a series of instructions + at PC matching the ppc64_standard_linkage pattern, and INSN is the result + from that pattern match, return the code address to which the + standard linkage function will send them. (This doesn't deal with + dynamic linker lazy symbol resolution stubs.) */ +static CORE_ADDR +ppc64_standard_linkage_target (CORE_ADDR pc, unsigned int *insn) +{ + struct gdbarch_tdep *tdep = gdbarch_tdep (current_gdbarch); + + /* The address of the function descriptor this linkage function + references. */ + CORE_ADDR desc + = ((CORE_ADDR) read_register (tdep->ppc_gp0_regnum + 2) + + (insn_d_field (insn[0]) << 16) + + insn_ds_field (insn[2])); + + /* The first word of the descriptor is the entry point. Return that. */ + return ppc64_desc_entry_point (desc); +} + + +/* Given that we've begun executing a call trampoline at PC, return + the entry point of the function the trampoline will go to. */ +static CORE_ADDR +ppc64_skip_trampoline_code (CORE_ADDR pc) +{ + unsigned int ppc64_standard_linkage_insn[PPC64_STANDARD_LINKAGE_LEN]; + + if (insns_match_pattern (pc, ppc64_standard_linkage, + ppc64_standard_linkage_insn)) + return ppc64_standard_linkage_target (pc, ppc64_standard_linkage_insn); + else + return 0; +} + + +/* Support for CONVERT_FROM_FUNC_PTR_ADDR (ARCH, ADDR, TARG) on PPC64 + GNU/Linux. + + Usually a function pointer's representation is simply the address + of the function. On GNU/Linux on the 64-bit PowerPC however, a + function pointer is represented by a pointer to a TOC entry. This + TOC entry contains three words, the first word is the address of + the function, the second word is the TOC pointer (r2), and the + third word is the static chain value. Throughout GDB it is + currently assumed that a function pointer contains the address of + the function, which is not easy to fix. In addition, the + conversion of a function address to a function pointer would + require allocation of a TOC entry in the inferior's memory space, + with all its drawbacks. To be able to call C++ virtual methods in + the inferior (which are called via function pointers), + find_function_addr uses this function to get the function address + from a function pointer. */ + +/* If ADDR points at what is clearly a function descriptor, transform + it into the address of the corresponding function. Be + conservative, otherwize GDB will do the transformation on any + random addresses such as occures when there is no symbol table. */ + +static CORE_ADDR +ppc64_linux_convert_from_func_ptr_addr (struct gdbarch *gdbarch, + CORE_ADDR addr, + struct target_ops *targ) +{ + struct section_table *s = target_section_by_addr (targ, addr); + + /* Check if ADDR points to a function descriptor. */ + if (s && strcmp (s->the_bfd_section->name, ".opd") == 0) + return get_target_memory_unsigned (targ, addr, 8); + + return addr; +} + +#ifdef CRASH_MERGE +enum { + PPC_ELF_NGREG = 48, + PPC_ELF_NFPREG = 33, + PPC_ELF_NVRREG = 33 +}; + +enum { + ELF_GREGSET_SIZE = (PPC_ELF_NGREG * 4), + ELF_FPREGSET_SIZE = (PPC_ELF_NFPREG * 8) +}; +#else +enum { + ELF_NGREG = 48, + ELF_NFPREG = 33, + ELF_NVRREG = 33 +}; + +enum { + ELF_GREGSET_SIZE = (ELF_NGREG * 4), + ELF_FPREGSET_SIZE = (ELF_NFPREG * 8) +}; +#endif + +void +ppc_linux_supply_gregset (char *buf) +{ + int regi; + struct gdbarch_tdep *tdep = gdbarch_tdep (current_gdbarch); + + for (regi = 0; regi < 32; regi++) + supply_register (regi, buf + 4 * regi); + + supply_register (PC_REGNUM, buf + 4 * PPC_LINUX_PT_NIP); + supply_register (tdep->ppc_lr_regnum, buf + 4 * PPC_LINUX_PT_LNK); + supply_register (tdep->ppc_cr_regnum, buf + 4 * PPC_LINUX_PT_CCR); + supply_register (tdep->ppc_xer_regnum, buf + 4 * PPC_LINUX_PT_XER); + supply_register (tdep->ppc_ctr_regnum, buf + 4 * PPC_LINUX_PT_CTR); + if (tdep->ppc_mq_regnum != -1) + supply_register (tdep->ppc_mq_regnum, buf + 4 * PPC_LINUX_PT_MQ); + supply_register (tdep->ppc_ps_regnum, buf + 4 * PPC_LINUX_PT_MSR); +} + +void +ppc_linux_supply_fpregset (char *buf) +{ + int regi; + struct gdbarch_tdep *tdep = gdbarch_tdep (current_gdbarch); + + for (regi = 0; regi < 32; regi++) + supply_register (FP0_REGNUM + regi, buf + 8 * regi); + + /* The FPSCR is stored in the low order word of the last doubleword in the + fpregset. */ + supply_register (tdep->ppc_fpscr_regnum, buf + 8 * 32 + 4); +} + +/* + Use a local version of this function to get the correct types for regsets. +*/ + +static void +fetch_core_registers (char *core_reg_sect, + unsigned core_reg_size, + int which, + CORE_ADDR reg_addr) +{ + if (which == 0) + { + if (core_reg_size == ELF_GREGSET_SIZE) + ppc_linux_supply_gregset (core_reg_sect); + else + warning ("wrong size gregset struct in core file"); + } + else if (which == 2) + { + if (core_reg_size == ELF_FPREGSET_SIZE) + ppc_linux_supply_fpregset (core_reg_sect); + else + warning ("wrong size fpregset struct in core file"); + } +} + +/* Register that we are able to handle ELF file formats using standard + procfs "regset" structures. */ + +static struct core_fns ppc_linux_regset_core_fns = +{ + bfd_target_elf_flavour, /* core_flavour */ + default_check_format, /* check_format */ + default_core_sniffer, /* core_sniffer */ + fetch_core_registers, /* core_read_registers */ + NULL /* next */ +}; + +static void +ppc_linux_init_abi (struct gdbarch_info info, + struct gdbarch *gdbarch) +{ + struct gdbarch_tdep *tdep = gdbarch_tdep (gdbarch); + + if (tdep->wordsize == 4) + { + /* Until November 2001, gcc did not comply with the 32 bit SysV + R4 ABI requirement that structures less than or equal to 8 + bytes should be returned in registers. Instead GCC was using + the the AIX/PowerOpen ABI - everything returned in memory + (well ignoring vectors that is). When this was corrected, it + wasn't fixed for GNU/Linux native platform. Use the + PowerOpen struct convention. */ + set_gdbarch_return_value (gdbarch, ppc_linux_return_value); + + /* Note: kevinb/2002-04-12: See note in rs6000_gdbarch_init regarding + *_push_arguments(). The same remarks hold for the methods below. */ + set_gdbarch_deprecated_frameless_function_invocation (gdbarch, ppc_linux_frameless_function_invocation); + set_gdbarch_deprecated_frame_chain (gdbarch, ppc_linux_frame_chain); + set_gdbarch_deprecated_frame_saved_pc (gdbarch, ppc_linux_frame_saved_pc); + + set_gdbarch_deprecated_frame_init_saved_regs (gdbarch, + ppc_linux_frame_init_saved_regs); + set_gdbarch_deprecated_init_extra_frame_info (gdbarch, + ppc_linux_init_extra_frame_info); + + set_gdbarch_memory_remove_breakpoint (gdbarch, + ppc_linux_memory_remove_breakpoint); + /* Shared library handling. */ + set_gdbarch_in_solib_call_trampoline (gdbarch, in_plt_section); + set_gdbarch_skip_trampoline_code (gdbarch, + ppc_linux_skip_trampoline_code); + set_solib_svr4_fetch_link_map_offsets + (gdbarch, ppc_linux_svr4_fetch_link_map_offsets); + } + + if (tdep->wordsize == 8) + { + /* Handle PPC64 GNU/Linux function pointers (which are really + function descriptors). */ + set_gdbarch_convert_from_func_ptr_addr + (gdbarch, ppc64_linux_convert_from_func_ptr_addr); + + set_gdbarch_in_solib_call_trampoline + (gdbarch, ppc64_in_solib_call_trampoline); + set_gdbarch_skip_trampoline_code (gdbarch, ppc64_skip_trampoline_code); + + /* PPC64 malloc's entry-point is called ".malloc". */ + set_gdbarch_name_of_malloc (gdbarch, ".malloc"); + } +} + +void +_initialize_ppc_linux_tdep (void) +{ + /* Register for all sub-familes of the POWER/PowerPC: 32-bit and + 64-bit PowerPC, and the older rs6k. */ + gdbarch_register_osabi (bfd_arch_powerpc, bfd_mach_ppc, GDB_OSABI_LINUX, + ppc_linux_init_abi); + gdbarch_register_osabi (bfd_arch_powerpc, bfd_mach_ppc64, GDB_OSABI_LINUX, + ppc_linux_init_abi); + gdbarch_register_osabi (bfd_arch_rs6000, bfd_mach_rs6k, GDB_OSABI_LINUX, + ppc_linux_init_abi); + add_core_fns (&ppc_linux_regset_core_fns); +} --- crash/main.c.orig 2006-05-15 17:21:18.000000000 -0400 +++ crash/main.c 2006-04-26 15:41:04.000000000 -0400 @@ -35,11 +35,13 @@ {"no_data_debug", 0, 0, 0}, {"no_crashrc", 0, 0, 0}, {"no_kmem_cache", 0, 0, 0}, + {"kmem_cache_delay", 0, 0, 0}, {"readnow", 0, 0, 0}, {"smp", 0, 0, 0}, {"machdep", 1, 0, 0}, {"version", 0, 0, 0}, {"buildinfo", 0, 0, 0}, + {"shadow_page_tables", 0, 0, 0}, {0, 0, 0, 0} }; @@ -55,7 +57,7 @@ */ opterr = 0; optind = 0; - while((c = getopt_long(argc, argv, "LgH:h:e:i:sSvc:d:tf", + while((c = getopt_long(argc, argv, "LgH:h:e:i:sSvc:d:tfp:", long_options, &option_index)) != -1) { switch (c) { @@ -98,6 +100,10 @@ vt->flags |= KMEM_CACHE_UNAVAIL; if (STREQ(long_options[option_index].name, + "kmem_cache_delay")) + vt->flags |= KMEM_CACHE_DELAY; + + if (STREQ(long_options[option_index].name, "readnow")) pc->flags |= READNOW; @@ -123,6 +129,10 @@ clean_exit(0); } + if (STREQ(long_options[option_index].name, + "shadow_page_tables")) + kt->xen_flags |= SHADOW_PAGE_TABLES; + break; case 'f': @@ -193,6 +203,10 @@ set_vas_debug(pc->debug); break; + case 'p': + force_page_size(optarg); + break; + default: if (STREQ(argv[optind-1], "-h")) program_usage(LONG_FORM); @@ -264,6 +278,28 @@ pc->readmem = read_netdump; pc->writemem = write_netdump; + } else if (is_kdump(argv[optind], KDUMP_LOCAL)) { + if (pc->flags & MEMORY_SOURCES) { + error(INFO, + "too many dumpfile arguments\n"); + program_usage(SHORT_FORM); + } + pc->flags |= KDUMP; + pc->dumpfile = argv[optind]; + pc->readmem = read_kdump; + pc->writemem = write_kdump; + + } else if (is_xendump(argv[optind])) { + if (pc->flags & MEMORY_SOURCES) { + error(INFO, + "too many dumpfile arguments\n"); + program_usage(SHORT_FORM); + } + pc->flags |= XENDUMP; + pc->dumpfile = argv[optind]; + pc->readmem = read_xendump; + pc->writemem = write_xendump; + } else if (is_diskdump(argv[optind])) { if (pc->flags & MEMORY_SOURCES) { error(INFO, @@ -335,8 +371,8 @@ machdep_init(PRE_SYMTAB); symtab_init(); machdep_init(PRE_GDB); - kernel_init(PRE_GDB); - verify_version(); +// kernel_init(PRE_GDB); +// verify_version(); datatype_init(); /* @@ -361,6 +397,8 @@ { if (!(pc->flags & GDB_INIT)) { gdb_session_init(); + kernel_init(PRE_GDB); + verify_version(); kernel_init(POST_GDB); machdep_init(POST_GDB); vm_init(); @@ -591,6 +629,8 @@ int i; char *p1; char buf[BUFSIZE]; + char homerc[BUFSIZE]; + char localrc[BUFSIZE]; FILE *afp; char *program; @@ -685,11 +725,11 @@ pc->home = "(unknown)"; } else strcpy(pc->home, p1); - sprintf(buf, "%s/.%src", pc->home, pc->program_name); - if (!(pc->flags & NOCRASHRC) && file_exists(buf, NULL)) { - if ((afp = fopen(buf, "r")) == NULL) + sprintf(homerc, "%s/.%src", pc->home, pc->program_name); + if (!(pc->flags & NOCRASHRC) && file_exists(homerc, NULL)) { + if ((afp = fopen(homerc, "r")) == NULL) error(INFO, "cannot open %s: %s\n", - buf, strerror(errno)); + homerc, strerror(errno)); else { while (fgets(buf, BUFSIZE, afp)) resolve_rc_cmd(buf, ALIAS_RCHOME); @@ -698,11 +738,12 @@ } } - sprintf(buf, ".%src", pc->program_name); - if (!(pc->flags & NOCRASHRC) && file_exists(buf, NULL)) { - if ((afp = fopen(buf, "r")) == NULL) + sprintf(localrc, ".%src", pc->program_name); + if (!same_file(homerc, localrc) && + !(pc->flags & NOCRASHRC) && file_exists(localrc, NULL)) { + if ((afp = fopen(localrc, "r")) == NULL) error(INFO, "cannot open %s: %s\n", - buf, strerror(errno)); + localrc, strerror(errno)); else { while (fgets(buf, BUFSIZE, afp)) resolve_rc_cmd(buf, ALIAS_RCLOCAL); @@ -840,13 +881,22 @@ if (pc->flags & REM_S390D) sprintf(&buf[strlen(buf)], "%sREM_S390D", others++ ? "|" : ""); - if (pc->flags & NETDUMP) + if (pc->flags & NETDUMP) sprintf(&buf[strlen(buf)], "%sNETDUMP", others++ ? "|" : ""); + if (pc->flags & XENDUMP) + sprintf(&buf[strlen(buf)], + "%sXENDUMP", others++ ? "|" : ""); + if (pc->flags & KDUMP) + sprintf(&buf[strlen(buf)], + "%sKDUMP", others++ ? "|" : ""); + if (pc->flags & SYSRQ) + sprintf(&buf[strlen(buf)], + "%sSYSRQ", others++ ? "|" : ""); if (pc->flags & REM_NETDUMP) sprintf(&buf[strlen(buf)], "%sREM_NETDUMP", others++ ? "|" : ""); - if (pc->flags & DISKDUMP) + if (pc->flags & DISKDUMP) sprintf(&buf[strlen(buf)], "%sDISKDUMP", others++ ? "|" : ""); if (pc->flags & SYSMAP) @@ -855,21 +905,24 @@ if (pc->flags & SYSMAP_ARG) sprintf(&buf[strlen(buf)], "%sSYSMAP_ARG", others++ ? "|" : ""); - if (pc->flags & DATADEBUG) + if (pc->flags & DATADEBUG) sprintf(&buf[strlen(buf)], "%sDATADEBUG", others++ ? "|" : ""); - if (pc->flags & FINDKERNEL) + if (pc->flags & FINDKERNEL) sprintf(&buf[strlen(buf)], "%sFINDKERNEL", others++ ? "|" : ""); - if (pc->flags & VERSION_QUERY) + if (pc->flags & VERSION_QUERY) sprintf(&buf[strlen(buf)], "%sVERSION_QUERY", others++ ? "|" : ""); - if (pc->flags & READNOW) + if (pc->flags & READNOW) sprintf(&buf[strlen(buf)], "%sREADNOW", others++ ? "|" : ""); - if (pc->flags & NOCRASHRC) + if (pc->flags & NOCRASHRC) sprintf(&buf[strlen(buf)], "%sNOCRASHRC", others++ ? "|" : ""); + if (pc->flags & INIT_IFILE) + sprintf(&buf[strlen(buf)], + "%sINIT_IFILE", others++ ? "|" : ""); if (pc->flags) strcat(buf, ")"); @@ -1051,6 +1104,8 @@ fprintf(fp, " readmem: read_daemon()\n"); else if (pc->readmem == read_netdump) fprintf(fp, " readmem: read_netdump()\n"); + else if (pc->readmem == read_kdump) + fprintf(fp, " readmem: read_kdump()\n"); else if (pc->readmem == read_memory_device) fprintf(fp, " readmem: read_memory_device()\n"); else @@ -1065,6 +1120,8 @@ fprintf(fp, " writemem: write_daemon()\n"); else if (pc->writemem == write_netdump) fprintf(fp, " writemem: write_netdump()\n"); + else if (pc->writemem == write_kdump) + fprintf(fp, " writemem: write_kdump()\n"); else if (pc->writemem == write_memory_device) fprintf(fp, " writemem: write_memory_device()\n"); else --- crash/tools.c.orig 2006-05-15 17:21:18.000000000 -0400 +++ crash/tools.c 2006-04-26 15:45:34.000000000 -0400 @@ -2004,6 +2004,10 @@ pc->flags &= ~(DUMPFILE_TYPES); if (is_netdump(args[optind], NETDUMP_LOCAL)) pc->flags |= NETDUMP; + else if (is_kdump(args[optind], KDUMP_LOCAL)) + pc->flags |= KDUMP; + else if (is_xendump(args[optind])) + pc->flags |= XENDUMP; else if (is_diskdump(args[optind])) pc->flags |= DISKDUMP; else if (is_lkcd_compressed_dump(args[optind])) @@ -2896,11 +2900,25 @@ } /* - * If it's not a symbol nor a number, bail out. + * If it's not a symbol nor a number, bail out if it + * cannot be evaluated as a start address. */ - if (!IS_A_NUMBER(args[optind])) + if (!IS_A_NUMBER(args[optind])) { + if (can_eval(args[optind])) { + value = eval(args[optind], FAULT_ON_ERROR, NULL); + if (IS_KVADDR(value)) { + if (ld->flags & LIST_START_ENTERED) + error(FATAL, + "list start already entered\n"); + ld->start = value; + ld->flags |= LIST_START_ENTERED; + goto next_arg; + } + } + error(FATAL, "invalid argument: %s\n", args[optind]); + } /* * If the start is known, it's got to be an offset. @@ -2941,7 +2959,8 @@ ld->member_offset = value; ld->flags |= LIST_OFFSET_ENTERED; goto next_arg; - } else if (!IS_A_NUMBER(args[optind+1]) && + } else if ((!IS_A_NUMBER(args[optind+1]) && + !can_eval(args[optind+1])) && !strstr(args[optind+1], ".")) error(FATAL, "symbol not found: %s\n", args[optind+1]); @@ -4210,6 +4229,9 @@ { ulonglong total, days, hours, minutes, seconds; + if (CRASHDEBUG(2)) + error(INFO, "convert_time: %lld (%llx)\n", count, count); + total = (count)/(ulonglong)machdep->hz; days = total / SEC_DAYS; @@ -4300,12 +4322,34 @@ void command_not_supported() { - error(FATAL, "command not supported on this architecture\n"); + error(FATAL, "command not supported on this architecture or kernel\n"); } void option_not_supported(int c) { - error(FATAL, "-%c option not supported on this architecture\n", + error(FATAL, "-%c option not supported on this architecture or kernel\n", (char)c); } + +void +please_wait(char *s) +{ + if ((pc->flags & SILENT) || !(pc->flags & TTY) || + !DUMPFILE() || (pc->flags & RUNTIME)) + return; + + fprintf(fp, "\rplease wait... (%s)", s); + fflush(fp); +} + +void +please_wait_done(void) +{ + if ((pc->flags & SILENT) || !(pc->flags & TTY) || + !DUMPFILE() || (pc->flags & RUNTIME)) + return; + + fprintf(fp, "\r \r"); + fflush(fp); +} --- crash/memory.c.orig 2006-05-15 17:21:18.000000000 -0400 +++ crash/memory.c 2006-05-01 11:35:50.000000000 -0400 @@ -45,6 +45,8 @@ ulong *addrlist; int *kmem_bufctl; ulong *cpudata[NR_CPUS]; + ulong *shared_array_cache; + int current_cache_index; ulong found; ulong retval; char *ignore; @@ -57,6 +59,10 @@ ulong get_slabs; char *slab_buf; char *cache_buf; + struct vmlist { + ulong addr; + ulong size; + } *vmlist; }; static char *memtype_string(int, int); @@ -85,6 +91,7 @@ static void do_slab_chain(int, struct meminfo *); static void do_slab_chain_percpu_v1(long, struct meminfo *); static void do_slab_chain_percpu_v2(long, struct meminfo *); +static void do_slab_chain_percpu_v2_nodes(long, struct meminfo *); static void save_slab_data(struct meminfo *); static int slab_data_saved(struct meminfo *); static void dump_saved_slab_data(void); @@ -97,7 +104,9 @@ static void gather_slab_free_list_percpu(struct meminfo *); static void gather_cpudata_list_v1(struct meminfo *); static void gather_cpudata_list_v2(struct meminfo *); +static void gather_cpudata_list_v2_nodes(struct meminfo *, int); static int check_cpudata_list(struct meminfo *, ulong); +static int check_shared_list(struct meminfo *, ulong); static void gather_slab_cached_count(struct meminfo *); static void dump_slab_objects(struct meminfo *); static void dump_slab_objects_percpu(struct meminfo *); @@ -110,6 +119,7 @@ static void search(ulong, ulong, ulong, int, ulong *, int); static int next_upage(struct task_context *, ulong, ulong *); static int next_kpage(ulong, ulong *); +static ulong next_vmlist_vaddr(struct meminfo *, ulong); static int vm_area_page_dump(ulong, ulong, ulong, ulong, void *, struct reference *); static int dump_swap_info(ulong, ulong *, ulong *); @@ -124,9 +134,18 @@ static int compare_node_data(const void *, const void *); static void do_vm_flags(ulong); static void PG_reserved_flag_init(void); +static void PG_slab_flag_init(void); static ulong nr_blockdev_pages(void); - - +void sparse_mem_init(void); +void list_mem_sections(void); +ulong sparse_decode_mem_map(ulong, ulong); +char *read_mem_section(ulong); +ulong nr_to_section(ulong); +int valid_section(ulong); +int section_has_mem_map(ulong); +ulong section_mem_map_addr(ulong); +ulong valid_section_nr(ulong); +ulong pfn_to_map(ulong); /* * Memory display modes specific to this file. @@ -182,6 +201,13 @@ MEMBER_OFFSET_INIT(mm_struct_mmap, "mm_struct", "mmap"); MEMBER_OFFSET_INIT(mm_struct_pgd, "mm_struct", "pgd"); MEMBER_OFFSET_INIT(mm_struct_rss, "mm_struct", "rss"); + if (!VALID_MEMBER(mm_struct_rss)) + MEMBER_OFFSET_INIT(mm_struct_rss, "mm_struct", "_rss"); + if (!VALID_MEMBER(mm_struct_rss)) + MEMBER_OFFSET_INIT(mm_struct_rss, "mm_struct", "_file_rss"); + MEMBER_OFFSET_INIT(mm_struct_anon_rss, "mm_struct", "anon_rss"); + if (!VALID_MEMBER(mm_struct_anon_rss)) + MEMBER_OFFSET_INIT(mm_struct_anon_rss, "mm_struct", "_anon_rss"); MEMBER_OFFSET_INIT(mm_struct_total_vm, "mm_struct", "total_vm"); MEMBER_OFFSET_INIT(mm_struct_start_code, "mm_struct", "start_code"); MEMBER_OFFSET_INIT(vm_area_struct_vm_mm, "vm_area_struct", "vm_mm"); @@ -270,6 +296,7 @@ STRUCT_SIZE_INIT(kmem_slab_s, "kmem_slab_s"); STRUCT_SIZE_INIT(slab_s, "slab_s"); STRUCT_SIZE_INIT(slab, "slab"); + STRUCT_SIZE_INIT(kmem_cache_s, "kmem_cache_s"); STRUCT_SIZE_INIT(pgd_t, "pgd_t"); if (!VALID_STRUCT(kmem_slab_s) && VALID_STRUCT(slab_s)) { @@ -310,17 +337,49 @@ !VALID_STRUCT(slab_s) && VALID_STRUCT(slab)) { vt->flags |= PERCPU_KMALLOC_V2; - MEMBER_OFFSET_INIT(kmem_cache_s_num, "kmem_cache_s", "num"); - MEMBER_OFFSET_INIT(kmem_cache_s_next, "kmem_cache_s", "next"); - MEMBER_OFFSET_INIT(kmem_cache_s_name, "kmem_cache_s", "name"); - MEMBER_OFFSET_INIT(kmem_cache_s_colour_off, "kmem_cache_s", - "colour_off"); - MEMBER_OFFSET_INIT(kmem_cache_s_objsize, "kmem_cache_s", - "objsize"); - MEMBER_OFFSET_INIT(kmem_cache_s_flags, "kmem_cache_s", "flags"); - MEMBER_OFFSET_INIT(kmem_cache_s_gfporder, - "kmem_cache_s", "gfporder"); - + if (VALID_STRUCT(kmem_cache_s)) { + MEMBER_OFFSET_INIT(kmem_cache_s_num, "kmem_cache_s", "num"); + MEMBER_OFFSET_INIT(kmem_cache_s_next, "kmem_cache_s", "next"); + MEMBER_OFFSET_INIT(kmem_cache_s_name, "kmem_cache_s", "name"); + MEMBER_OFFSET_INIT(kmem_cache_s_colour_off, "kmem_cache_s", + "colour_off"); + MEMBER_OFFSET_INIT(kmem_cache_s_objsize, "kmem_cache_s", + "objsize"); + MEMBER_OFFSET_INIT(kmem_cache_s_flags, "kmem_cache_s", "flags"); + MEMBER_OFFSET_INIT(kmem_cache_s_gfporder, + "kmem_cache_s", "gfporder"); + + MEMBER_OFFSET_INIT(kmem_cache_s_lists, "kmem_cache_s", "lists"); + MEMBER_OFFSET_INIT(kmem_cache_s_array, "kmem_cache_s", "array"); + ARRAY_LENGTH_INIT(len, NULL, "kmem_cache_s.array", NULL, 0); + } else { + STRUCT_SIZE_INIT(kmem_cache_s, "kmem_cache"); + MEMBER_OFFSET_INIT(kmem_cache_s_num, "kmem_cache", "num"); + MEMBER_OFFSET_INIT(kmem_cache_s_next, "kmem_cache", "next"); + MEMBER_OFFSET_INIT(kmem_cache_s_name, "kmem_cache", "name"); + MEMBER_OFFSET_INIT(kmem_cache_s_colour_off, "kmem_cache", + "colour_off"); + if (MEMBER_EXISTS("kmem_cache", "objsize")) + MEMBER_OFFSET_INIT(kmem_cache_s_objsize, "kmem_cache", + "objsize"); + else if (MEMBER_EXISTS("kmem_cache", "buffer_size")) + MEMBER_OFFSET_INIT(kmem_cache_s_objsize, "kmem_cache", + "buffer_size"); + MEMBER_OFFSET_INIT(kmem_cache_s_flags, "kmem_cache", "flags"); + MEMBER_OFFSET_INIT(kmem_cache_s_gfporder, + "kmem_cache", "gfporder"); + + if (MEMBER_EXISTS("kmem_cache", "lists")) + MEMBER_OFFSET_INIT(kmem_cache_s_lists, "kmem_cache", "lists"); + else if (MEMBER_EXISTS("kmem_cache", "nodelists")) { + vt->flags |= PERCPU_KMALLOC_V2_NODES; + MEMBER_OFFSET_INIT(kmem_cache_s_lists, "kmem_cache", "nodelists"); + ARRAY_LENGTH_INIT(vt->kmem_cache_len_nodes, NULL, + "kmem_cache.nodelists", NULL, 0); + } + MEMBER_OFFSET_INIT(kmem_cache_s_array, "kmem_cache", "array"); + ARRAY_LENGTH_INIT(len, NULL, "kmem_cache.array", NULL, 0); + } MEMBER_OFFSET_INIT(slab_list, "slab", "list"); MEMBER_OFFSET_INIT(slab_s_mem, "slab", "s_mem"); MEMBER_OFFSET_INIT(slab_inuse, "slab", "inuse"); @@ -330,10 +389,6 @@ MEMBER_OFFSET_INIT(array_cache_limit, "array_cache", "limit"); STRUCT_SIZE_INIT(array_cache, "array_cache"); - MEMBER_OFFSET_INIT(kmem_cache_s_lists, "kmem_cache_s", "lists"); - MEMBER_OFFSET_INIT(kmem_cache_s_array, "kmem_cache_s", "array"); - ARRAY_LENGTH_INIT(len, NULL, "kmem_cache_s.array", NULL, 0); - MEMBER_OFFSET_INIT(kmem_list3_slabs_partial, "kmem_list3", "slabs_partial"); MEMBER_OFFSET_INIT(kmem_list3_slabs_full, @@ -415,10 +470,17 @@ error(FATAL, "no swapper_pg_dir or cpu_pgd symbols exist?\n"); get_symbol_data("high_memory", sizeof(ulong), &vt->high_memory); - if (kernel_symbol_exists("mem_map")) + + if (kernel_symbol_exists("mem_map")) { get_symbol_data("mem_map", sizeof(char *), &vt->mem_map); + vt->flags |= FLATMEM; + } else if (kernel_symbol_exists("mem_section")) + vt->flags |= SPARSEMEM; else vt->flags |= DISCONTIGMEM; + + sparse_mem_init(); + vt->vmalloc_start = machdep->vmalloc_start(); if (IS_VMALLOC_ADDR(vt->mem_map)) vt->flags |= V_MEM_MAP; @@ -478,7 +540,6 @@ STRUCT_SIZE_INIT(free_area_struct, "free_area_struct"); STRUCT_SIZE_INIT(zone, "zone"); STRUCT_SIZE_INIT(zone_struct, "zone_struct"); - STRUCT_SIZE_INIT(kmem_cache_s, "kmem_cache_s"); STRUCT_SIZE_INIT(kmem_bufctl_t, "kmem_bufctl_t"); STRUCT_SIZE_INIT(swap_info_struct, "swap_info_struct"); STRUCT_SIZE_INIT(mm_struct, "mm_struct"); @@ -524,6 +585,7 @@ ARRAY_LENGTH_INIT(vt->nr_zones, pglist_data_node_zones, "pglist_data.node_zones", NULL, SIZE_OPTION(zone_struct, zone)); + vt->ZONE_HIGHMEM = vt->nr_zones - 1; if (VALID_STRUCT(zone_struct)) { MEMBER_OFFSET_INIT(zone_struct_free_pages, @@ -539,6 +601,8 @@ if (INVALID_MEMBER(zone_struct_size)) MEMBER_OFFSET_INIT(zone_struct_memsize, "zone_struct", "memsize"); + MEMBER_OFFSET_INIT(zone_struct_zone_start_pfn, + "zone_struct", "zone_start_pfn"); MEMBER_OFFSET_INIT(zone_struct_zone_start_paddr, "zone_struct", "zone_start_paddr"); MEMBER_OFFSET_INIT(zone_struct_zone_start_mapnr, @@ -640,13 +704,7 @@ kmem_cache_init(); PG_reserved_flag_init(); - - if (VALID_MEMBER(page_pte)) { - if (THIS_KERNEL_VERSION < LINUX(2,6,0)) - vt->PG_slab = 10; - else if (THIS_KERNEL_VERSION >= LINUX(2,6,0)) - vt->PG_slab = 7; - } + PG_slab_flag_init(); } /* @@ -970,7 +1028,8 @@ case DISPLAY_64: if ((flag & (HEXADECIMAL|SYMBOLIC|DISPLAY_DEFAULT)) == (HEXADECIMAL|SYMBOLIC|DISPLAY_DEFAULT)) { - if (in_ksymbol_range(mem.u64)) { + if (in_ksymbol_range(mem.u64) && + strlen(value_to_symstr(mem.u64, buf, 0))) { fprintf(fp, "%-16s ", value_to_symstr(mem.u64, buf, 0)); linelen += strlen(buf)+1; @@ -993,7 +1052,8 @@ case DISPLAY_32: if ((flag & (HEXADECIMAL|SYMBOLIC|DISPLAY_DEFAULT)) == (HEXADECIMAL|SYMBOLIC|DISPLAY_DEFAULT)) { - if (in_ksymbol_range(mem.u32)) { + if (in_ksymbol_range(mem.u32) && + strlen(value_to_symstr(mem.u32, buf, 0))) { fprintf(fp, INT_PRLEN == 16 ? "%-16s " : "%-8s ", value_to_symstr(mem.u32, @@ -2113,6 +2173,8 @@ break; } + paddr = 0; + switch (memtype) { case UVADDR: fprintf(fp, "%s %s\n", @@ -2126,9 +2188,12 @@ return; } if (!uvtop(tc, vaddr, &paddr, 0)) { - fprintf(fp, "%s (not mapped)\n\n", + fprintf(fp, "%s %s\n\n", mkstring(buf1, UVADDR_PRLEN, LJUST|LONG_HEX, - MKSTR(vaddr))); + MKSTR(vaddr)), + (XEN() && (paddr == PADDR_NOT_AVAILABLE)) ? + "(page not available)" : "(not mapped)"); + page_exists = FALSE; } else { fprintf(fp, "%s %s\n\n", @@ -2161,9 +2226,13 @@ } if (vtop_flags & USE_USER_PGD) { if (!uvtop(tc, vaddr, &paddr, 0)) { - fprintf(fp, "%s (not mapped)\n\n", + fprintf(fp, "%s %s\n\n", mkstring(buf1, UVADDR_PRLEN, - LJUST|LONG_HEX, MKSTR(vaddr))); + LJUST|LONG_HEX, MKSTR(vaddr)), + (XEN() && + (paddr == PADDR_NOT_AVAILABLE)) ? + "(page not available)" : + "(not mapped)"); page_exists = FALSE; } else { fprintf(fp, "%s %s\n\n", @@ -2176,9 +2245,13 @@ uvtop(tc, vaddr, &paddr, VERBOSE); } else { if (!kvtop(tc, vaddr, &paddr, 0)) { - fprintf(fp, "%s (not mapped)\n\n", + fprintf(fp, "%s %s\n\n", mkstring(buf1, VADDR_PRLEN, - LJUST|LONG_HEX, MKSTR(vaddr))); + LJUST|LONG_HEX, MKSTR(vaddr)), + (XEN() && + (paddr == PADDR_NOT_AVAILABLE)) ? + "(page not available)" : + "(not mapped)"); page_exists = FALSE; } else { fprintf(fp, "%s %s\n\n", @@ -2980,6 +3053,8 @@ return; tm->rss = ULONG(tt->mm_struct + OFFSET(mm_struct_rss)); + if (VALID_MEMBER(mm_struct_anon_rss)) + tm->rss += ULONG(tt->mm_struct + OFFSET(mm_struct_anon_rss)); tm->total_vm = ULONG(tt->mm_struct + OFFSET(mm_struct_total_vm)); tm->pgd_addr = ULONG(tt->mm_struct + OFFSET(mm_struct_pgd)); @@ -3036,6 +3111,9 @@ #define GET_INACTIVE_DIRTY (ADDRESS_SPECIFIED << 13) /* obsolete */ #define SLAB_GET_COUNTS (ADDRESS_SPECIFIED << 14) #define SLAB_WALKTHROUGH (ADDRESS_SPECIFIED << 15) +#define GET_VMLIST_COUNT (ADDRESS_SPECIFIED << 16) +#define GET_VMLIST (ADDRESS_SPECIFIED << 17) +#define SLAB_DATA_NOSAVE (ADDRESS_SPECIFIED << 18) #define GET_ALL \ (GET_SHARED_PAGES|GET_TOTALRAM_PAGES|GET_BUFFERS_PAGES|GET_SLAB_PAGES) @@ -3159,7 +3237,7 @@ cmd_usage(pc->curcmd, SYNOPSIS); } - if (sflag || Sflag) + if (sflag || Sflag || !(vt->flags & KMEM_CACHE_INIT)) kmem_cache_init(); while (args[optind]) { @@ -3373,12 +3451,13 @@ buf = (char *)GETBUF(SIZE(page)); if (!readmem(pageptr, KVADDR, buf, SIZE(page), - "reserved page", RETURN_ON_ERROR|QUIET)) + "reserved page", RETURN_ON_ERROR|QUIET)) { + FREEBUF(buf); return; + } flags = ULONG(buf + OFFSET(page_flags)); - if (count_bits_long(flags) == 1) vt->PG_reserved = flags; else @@ -3386,12 +3465,50 @@ if (CRASHDEBUG(2)) fprintf(fp, - "PG_reserved bit: vaddr: %lx page: %lx flags: %lx => %lx\n", + "PG_reserved: vaddr: %lx page: %lx flags: %lx => %lx\n", vaddr, pageptr, flags, vt->PG_reserved); FREEBUF(buf); } +static void +PG_slab_flag_init(void) +{ + int bit; + ulong pageptr; + ulong vaddr, flags; + char buf[BUFSIZE]; /* safe for a page struct */ + + /* + * Set the old defaults in case the search below fails. + */ + if (VALID_MEMBER(page_pte)) { + if (THIS_KERNEL_VERSION < LINUX(2,6,0)) + vt->PG_slab = 10; + else if (THIS_KERNEL_VERSION >= LINUX(2,6,0)) + vt->PG_slab = 7; + } else if (THIS_KERNEL_VERSION >= LINUX(2,6,0)) + vt->PG_slab = 7; + + if (try_get_symbol_data("vm_area_cachep", sizeof(void *), &vaddr) && + phys_to_page((physaddr_t)VTOP(vaddr), &pageptr) && + readmem(pageptr, KVADDR, buf, SIZE(page), + "vm_area_cachep page", RETURN_ON_ERROR|QUIET)) { + + flags = ULONG(buf + OFFSET(page_flags)); + + if ((bit = ffsl(flags))) { + vt->PG_slab = bit - 1; + + if (CRASHDEBUG(2)) + fprintf(fp, + "PG_slab bit: vaddr: %lx page: %lx flags: %lx => %ld\n", + vaddr, pageptr, flags, vt->PG_slab); + + } + } +} + /* * dump_mem_map() displays basic data about each entry in the mem_map[] * array, or if an address is specified, just the mem_map[] entry for that @@ -3440,16 +3557,16 @@ static void dump_mem_map(struct meminfo *mi) { - long i, n; + ulong i; + long node; long total_pages; - int others, page_not_mapped, phys_not_mapped; + int others, page_not_mapped, phys_not_mapped, page_mapping; ulong pp, ppend; physaddr_t phys, physend; ulong tmp, reserved, shared, slabs; ulong PG_reserved_flag; long buffers; ulong inode, offset, flags, mapping, index; - ulong node_size; uint count; int print_hdr, pg_spec, phys_spec, done; int v22; @@ -3462,6 +3579,7 @@ char buf4[BUFSIZE]; char *page_cache; char *pcache; + ulong section, section_nr, nr_mem_sections, node_or_section_size; v22 = VALID_MEMBER(page_inode); /* page.inode vs. page.mapping */ @@ -3549,22 +3667,67 @@ done = FALSE; total_pages = 0; - for (n = 0; n < vt->numnodes; n++) { + if (IS_SPARSEMEM()) + nr_mem_sections = NR_MEM_SECTIONS(); + else + nr_mem_sections = 1; + + /* + * Iterate over all possible sections + */ + node = 0; + for (section_nr = 0; section_nr < nr_mem_sections ; section_nr++) { + + if (CRASHDEBUG(2)) + printf("section_nr = %ld\n", section_nr); + + /* + * If we are looking up a specific address, jump directly + * to the section with that page + */ + if (IS_SPARSEMEM() && (mi->flags & ADDRESS_SPECIFIED)) { + ulong pfn = mi->spec_addr >> PAGESHIFT(); + section_nr = pfn_to_section_nr(pfn); + } + + if (IS_SPARSEMEM() && + !(section = valid_section_nr(section_nr))) { +#ifdef NOTDEF + break; /* On a real sparsemem system we need to check + * every section as gaps may exist. But this + * can be slow. If we know we don't have gaps + * just stop validating sections when we + * get to the end of the valid ones. + * In the future find a way to short circuit + * this loop. + */ +#endif + continue; + } + if (print_hdr) { - fprintf(fp, "%s%s", n ? "\n" : "", hdr); + fprintf(fp, "%s%s", node ? "\n" : "", hdr); print_hdr = FALSE; } - nt = &vt->node_table[n]; - total_pages += nt->size; - pp = nt->mem_map; - phys = nt->start_paddr; - if ((vt->flags & V_MEM_MAP) && (vt->numnodes == 1)) - node_size = vt->max_mapnr; - else - node_size = nt->size; +next_node: + if (IS_SPARSEMEM()) { + pp = section_mem_map_addr(section); + pp = sparse_decode_mem_map(pp, section_nr); + phys = section_nr * PAGES_PER_SECTION() * PAGESIZE(); + node_or_section_size = PAGES_PER_SECTION(); + } else { + nt = &vt->node_table[node]; + total_pages += nt->size; + pp = nt->mem_map; + phys = nt->start_paddr; + if ((vt->flags & V_MEM_MAP) && (vt->numnodes == 1)) + node_or_section_size = vt->max_mapnr; + else + node_or_section_size = nt->size; + } - for (i = 0; i < node_size; + for (i = 0; i < node_or_section_size; i++, pp += SIZE(page), phys += PAGESIZE()) { if ((i % PGMM_CACHED) == 0) { @@ -3653,11 +3816,12 @@ } continue; } + page_mapping = VALID_MEMBER(page_mapping); if (v22) { inode = ULONG(pcache + OFFSET(page_inode)); offset = ULONG(pcache + OFFSET(page_offset)); - } else { + } else if (page_mapping) { mapping = ULONG(pcache + OFFSET(page_mapping)); index = ULONG(pcache + OFFSET(page_index)); @@ -3700,6 +3864,20 @@ space(MINSPACE), mkstring(buf4, 8, CENTER|RJUST, " "), " "); + else if (!page_mapping) + fprintf(fp, "%s%s%s%s%s%s%s %2d ", + mkstring(buf0, VADDR_PRLEN, + LJUST|LONG_HEX, MKSTR(pp)), + space(MINSPACE), + mkstring(buf1, MAX(PADDR_PRLEN, + strlen("PHYSICAL")), + RJUST|LONGLONG_HEX, MKSTR(&phys)), + space(MINSPACE), + mkstring(buf3, VADDR_PRLEN, + CENTER|RJUST, "-------"), + space(MINSPACE), + mkstring(buf4, 8, CENTER|RJUST, "-----"), + count); else fprintf(fp, "%s%s%s%s%s%s%8ld %2d ", mkstring(buf0, VADDR_PRLEN, @@ -3827,6 +4005,11 @@ if (done) break; + + node++; + + if ((node < vt->numnodes) && !IS_SPARSEMEM()) + goto next_node; } switch (mi->flags) @@ -4520,13 +4703,6 @@ */ static char *zone_hdr = "ZONE NAME SIZE FREE"; -/* - * From linux/mmzone.h - */ -#define ZONE_DMA 0 -#define ZONE_NORMAL 1 -#define ZONE_HIGHMEM 2 - static void dump_free_pages_zones_v1(struct meminfo *fi) { @@ -4610,7 +4786,7 @@ } if (fi->flags == GET_FREE_HIGHMEM_PAGES) { - if (i == ZONE_HIGHMEM) { + if (i == vt->ZONE_HIGHMEM) { readmem(node_zones+ OFFSET(zone_struct_free_pages), KVADDR, &value, sizeof(ulong), @@ -4702,7 +4878,7 @@ hq_close(); - if (fi->flags & (GET_FREE_PAGES|GET_ZONE_SIZES|GET_FREE_HIGHMEM_PAGES)){ + if (fi->flags & (GET_FREE_PAGES|GET_ZONE_SIZES|GET_FREE_HIGHMEM_PAGES)) { fi->retval = sum; return; } @@ -4899,7 +5075,7 @@ } if (fi->flags == GET_FREE_HIGHMEM_PAGES) { - if (i == ZONE_HIGHMEM) { + if (i == vt->ZONE_HIGHMEM) { readmem(node_zones+ OFFSET(zone_free_pages), KVADDR, &value, sizeof(ulong), @@ -4997,7 +5173,7 @@ hq_close(); - if (fi->flags & (GET_FREE_PAGES|GET_ZONE_SIZES|GET_FREE_HIGHMEM_PAGES)){ + if (fi->flags & (GET_FREE_PAGES|GET_ZONE_SIZES|GET_FREE_HIGHMEM_PAGES)) { fi->retval = sum; return; } @@ -5575,15 +5751,17 @@ char buf1[BUFSIZE]; char buf2[BUFSIZE]; ulong vmlist; - ulong addr, size, next, pcheck; + ulong addr, size, next, pcheck, count; physaddr_t paddr; get_symbol_data("vmlist", sizeof(void *), &vmlist); next = vmlist; + count = 0; while (next) { if ((next == vmlist) && - !(vi->flags & (GET_HIGHEST|GET_PHYS_TO_VMALLOC))) { + !(vi->flags & (GET_HIGHEST|GET_PHYS_TO_VMALLOC| + GET_VMLIST_COUNT|GET_VMLIST))) { fprintf(fp, "%s ", mkstring(buf, MAX(strlen("VM_STRUCT"), VADDR_PRLEN), CENTER|LJUST, "VM_STRUCT")); @@ -5599,6 +5777,20 @@ &size, sizeof(ulong), "vmlist size", FAULT_ON_ERROR); + if (vi->flags & (GET_VMLIST_COUNT|GET_VMLIST)) { + /* + * Preceding GET_VMLIST_COUNT set vi->retval. + */ + if (vi->flags & GET_VMLIST) { + if (count < vi->retval) { + vi->vmlist[count].addr = addr; + vi->vmlist[count].size = size; + } + } + count++; + goto next_entry; + } + if (!(vi->flags & ADDRESS_SPECIFIED) || ((vi->memtype == KVADDR) && ((vi->spec_addr >= addr) && (vi->spec_addr < (addr+size))))) @@ -5639,7 +5831,7 @@ } } - +next_entry: readmem(next+OFFSET(vm_struct_next), KVADDR, &next, sizeof(void *), "vmlist next", FAULT_ON_ERROR); @@ -5647,6 +5839,9 @@ if (vi->flags & GET_HIGHEST) vi->retval = addr+size; + + if (vi->flags & GET_VMLIST_COUNT) + vi->retval = count; } /* @@ -6136,9 +6331,14 @@ if (vt->flags & KMEM_CACHE_UNAVAIL) return; + if ((vt->flags & KMEM_CACHE_DELAY) && !(pc->flags & RUNTIME)) + return; + if (DUMPFILE() && (vt->flags & KMEM_CACHE_INIT)) return; + please_wait("gathering kmem slab cache data"); + if (!strlen(slab_hdr)) sprintf(slab_hdr, "SLAB%sMEMORY%sTOTAL ALLOCATED FREE\n", @@ -6177,9 +6377,11 @@ if (!readmem(cache, KVADDR, cache_buf, SIZE(kmem_cache_s), "kmem_cache_s buffer", RETURN_ON_ERROR)) { + FREEBUF(cache_buf); vt->flags |= KMEM_CACHE_UNAVAIL; error(INFO, - "unable to initialize kmem slab cache subsystem\n\n"); + "%sunable to initialize kmem slab cache subsystem\n\n", + DUMPFILE() ? "\n" : ""); return; } @@ -6190,6 +6392,13 @@ if ((tmp = max_cpudata_limit(cache, &tmp2)) > max_limit) max_limit = tmp; + /* + * Recognize and bail out on any max_cpudata_limit() failures. + */ + if (vt->flags & KMEM_CACHE_UNAVAIL) { + FREEBUF(cache_buf); + return; + } if (tmp2 > max_cpus) max_cpus = tmp2; @@ -6237,6 +6446,8 @@ NULL, 0); } + please_wait_done(); + vt->flags |= KMEM_CACHE_INIT; } @@ -6250,25 +6461,32 @@ ulong cpudata[NR_CPUS]; int limit; ulong max_limit; - + ulong shared; + ulong *start_address; + + if (vt->flags & PERCPU_KMALLOC_V2_NODES) + goto kmem_cache_s_array_nodes; + if (vt->flags & PERCPU_KMALLOC_V2) goto kmem_cache_s_array; + + if (INVALID_MEMBER(kmem_cache_s_cpudata)) { + *cpus = 0; + return 0; + } - if (INVALID_MEMBER(kmem_cache_s_cpudata)) { - *cpus = 0; - return 0; - } - - readmem(cache+OFFSET(kmem_cache_s_cpudata), - KVADDR, &cpudata[0], - sizeof(ulong) * ARRAY_LENGTH(kmem_cache_s_cpudata), - "cpudata array", FAULT_ON_ERROR); + if (!readmem(cache+OFFSET(kmem_cache_s_cpudata), + KVADDR, &cpudata[0], + sizeof(ulong) * ARRAY_LENGTH(kmem_cache_s_cpudata), + "cpudata array", RETURN_ON_ERROR)) + goto bail_out; for (i = max_limit = 0; (i < ARRAY_LENGTH(kmem_cache_s_cpudata)) && cpudata[i]; i++) { - readmem(cpudata[i]+OFFSET(cpucache_s_limit), - KVADDR, &limit, sizeof(int), - "cpucache limit", FAULT_ON_ERROR); + if (!readmem(cpudata[i]+OFFSET(cpucache_s_limit), + KVADDR, &limit, sizeof(int), + "cpucache limit", RETURN_ON_ERROR)) + goto bail_out; if (limit > max_limit) max_limit = limit; } @@ -6279,22 +6497,89 @@ kmem_cache_s_array: - readmem(cache+OFFSET(kmem_cache_s_array), - KVADDR, &cpudata[0], - sizeof(ulong) * ARRAY_LENGTH(kmem_cache_s_array), - "array cache array", FAULT_ON_ERROR); + if (!readmem(cache+OFFSET(kmem_cache_s_array), + KVADDR, &cpudata[0], + sizeof(ulong) * ARRAY_LENGTH(kmem_cache_s_array), + "array cache array", RETURN_ON_ERROR)) + goto bail_out; for (i = max_limit = 0; (i < ARRAY_LENGTH(kmem_cache_s_array)) && cpudata[i]; i++) { - readmem(cpudata[i]+OFFSET(array_cache_limit), - KVADDR, &limit, sizeof(int), - "array cache limit", FAULT_ON_ERROR); + if (!readmem(cpudata[i]+OFFSET(array_cache_limit), + KVADDR, &limit, sizeof(int), + "array cache limit", RETURN_ON_ERROR)) + goto bail_out; if (limit > max_limit) max_limit = limit; } + /* + * If the shared list can be accessed, check its size as well. + */ + if (VALID_MEMBER(kmem_list3_shared) && + VALID_MEMBER(kmem_cache_s_lists) && + readmem(cache+OFFSET(kmem_cache_s_lists)+OFFSET(kmem_list3_shared), + KVADDR, &shared, sizeof(void *), "kmem_list3 shared", + RETURN_ON_ERROR|QUIET) && + readmem(shared+OFFSET(array_cache_limit), + KVADDR, &limit, sizeof(int), "shared array_cache limit", + RETURN_ON_ERROR|QUIET)) { + if (limit > max_limit) + max_limit = limit; + } + *cpus = i; return max_limit; + +kmem_cache_s_array_nodes: + + if (!readmem(cache+OFFSET(kmem_cache_s_array), + KVADDR, &cpudata[0], + sizeof(ulong) * ARRAY_LENGTH(kmem_cache_s_array), + "array cache array", RETURN_ON_ERROR)) + goto bail_out; + + for (i = max_limit = 0; (i < ARRAY_LENGTH(kmem_cache_s_array)) && + cpudata[i]; i++) { + if (!readmem(cpudata[i]+OFFSET(array_cache_limit), + KVADDR, &limit, sizeof(int), + "array cache limit", RETURN_ON_ERROR)) + goto bail_out; + if (limit > max_limit) + max_limit = limit; + } + + *cpus = i; + + /* + * Check the shared list of all the nodes. + */ + start_address = (ulong *)GETBUF(sizeof(ulong) * vt->kmem_cache_len_nodes); + + if (VALID_MEMBER(kmem_list3_shared) && VALID_MEMBER(kmem_cache_s_lists) && + readmem(cache+OFFSET(kmem_cache_s_lists), KVADDR, &start_address[0], + sizeof(ulong) * vt->kmem_cache_len_nodes, "array nodelist array", + RETURN_ON_ERROR)) { + for (i = 0; i < vt->kmem_cache_len_nodes && start_address[i]; i++) { + if (readmem(start_address[i] + OFFSET(kmem_list3_shared), + KVADDR, &shared, sizeof(void *), + "kmem_list3 shared", RETURN_ON_ERROR|QUIET) && + readmem(shared + OFFSET(array_cache_limit), + KVADDR, &limit, sizeof(int), "shared array_cache limit", + RETURN_ON_ERROR|QUIET)) { + if (limit > max_limit) + max_limit = limit; + } + } + } + FREEBUF(start_address); + return max_limit; + +bail_out: + vt->flags |= KMEM_CACHE_UNAVAIL; + error(INFO, "unable to initialize kmem slab cache subsystem\n\n"); + *cpus = 0; + return 0; } /* @@ -6353,6 +6638,7 @@ #define KMEM_OBJECT_ADDR_INUSE (4) #define KMEM_OBJECT_ADDR_CACHED (5) #define KMEM_ON_SLAB (6) +#define KMEM_OBJECT_ADDR_SHARED (7) #define DUMP_KMEM_CACHE_INFO_V1() \ { \ @@ -6408,7 +6694,7 @@ { \ char b1[BUFSIZE], b2[BUFSIZE]; \ ulong allocated, freeobjs; \ - if (vt->flags & PERCPU_KMALLOC_V1) { \ + if (vt->flags & (PERCPU_KMALLOC_V1|PERCPU_KMALLOC_V2)) { \ allocated = si->s_inuse - si->cpucached_slab; \ freeobjs = si->c_num - allocated - si->cpucached_slab; \ } else { \ @@ -6419,8 +6705,8 @@ mkstring(b1, VADDR_PRLEN, LJUST|LONG_HEX, MKSTR(si->slab)), \ mkstring(b2, VADDR_PRLEN, LJUST|LONG_HEX, MKSTR(si->s_mem)), \ si->c_num, allocated, \ - vt->flags & PERCPU_KMALLOC_V1 ? freeobjs + si->cpucached_slab :\ - freeobjs); \ + vt->flags & (PERCPU_KMALLOC_V1|PERCPU_KMALLOC_V2) ? \ + freeobjs + si->cpucached_slab : freeobjs); \ } static void @@ -6857,6 +7143,13 @@ for (i = 0; i < vt->kmem_max_cpus; i++) si->cpudata[i] = (ulong *) GETBUF(vt->kmem_max_limit * sizeof(ulong)); + if(vt->flags & PERCPU_KMALLOC_V2_NODES) + si->shared_array_cache = (ulong *) + GETBUF(vt->kmem_cache_len_nodes * + (vt->kmem_max_limit+1) * sizeof(ulong)); + else + si->shared_array_cache = (ulong *) + GETBUF((vt->kmem_max_limit+1) * sizeof(ulong)); cnt = 0; @@ -6939,7 +7232,10 @@ "kmem_cache_s num", FAULT_ON_ERROR); si->c_num = (ulong)tmp_val; - do_slab_chain_percpu_v2(SLAB_GET_COUNTS, si); + if( vt->flags & PERCPU_KMALLOC_V2_NODES ) + do_slab_chain_percpu_v2_nodes(SLAB_GET_COUNTS, si); + else + do_slab_chain_percpu_v2(SLAB_GET_COUNTS, si); if (!(si->flags & (ADDRESS_SPECIFIED|GET_SLAB_PAGES))) { DUMP_KMEM_CACHE_INFO_V2(); @@ -6953,12 +7249,16 @@ if (si->flags & (VERBOSE|ADDRESS_SPECIFIED)) { - gather_cpudata_list_v2(si); + if (!(vt->flags & PERCPU_KMALLOC_V2_NODES)) + gather_cpudata_list_v2(si); si->slab = (si->flags & ADDRESS_SPECIFIED) ? vaddr_to_slab(si->spec_addr) : 0; - do_slab_chain_percpu_v2(SLAB_WALKTHROUGH, si); + if (vt->flags & PERCPU_KMALLOC_V2_NODES) + do_slab_chain_percpu_v2_nodes(SLAB_WALKTHROUGH, si); + else + do_slab_chain_percpu_v2(SLAB_WALKTHROUGH, si); if (si->found) { fprintf(fp, kmem_cache_hdr); @@ -7005,7 +7305,14 @@ " %lx (cpu %d cache)\n", (ulong)si->spec_addr, si->cpu); break; - } + + case KMEM_OBJECT_ADDR_SHARED: + fprintf(fp, free_inuse_hdr); + fprintf(fp, + " %lx (shared cache)\n", + (ulong)si->spec_addr); + break; + } break; } @@ -7033,6 +7340,7 @@ FREEBUF(si->kmem_bufctl); for (i = 0; i < vt->kmem_max_cpus; i++) FREEBUF(si->cpudata[i]); + FREEBUF(si->shared_array_cache); } @@ -7638,6 +7946,231 @@ } } + +/* +* Added To Traverse the Nodelists +*/ + +static void +do_slab_chain_percpu_v2_nodes(long cmd, struct meminfo *si) +{ + int i, tmp, s; + int list_borked; + char *slab_buf; + ulong specified_slab; + ulong last; + ulong slab_chains[SLAB_CHAINS]; + ulong *start_address; + int index; + + list_borked = 0; + si->slabsize = (power(2, si->order) * PAGESIZE()); + si->cpucached_slab = 0; + start_address = (ulong *)GETBUF(sizeof(ulong) * vt->kmem_cache_len_nodes); + + if (!readmem(si->cache+OFFSET(kmem_cache_s_lists), KVADDR, + &start_address[0], sizeof(ulong) * vt->kmem_cache_len_nodes, + "array nodelist array", RETURN_ON_ERROR)) + error(INFO, "cannot read kmem_cache nodelists array"); + + switch (cmd) + { + case SLAB_GET_COUNTS: + si->flags |= SLAB_GET_COUNTS; + si->flags &= ~SLAB_WALKTHROUGH; + si->cpucached_cache = 0; + si->num_slabs = si->inuse = 0; + slab_buf = GETBUF(SIZE(slab)); + for (index=0; (index < vt->kmem_cache_len_nodes) && start_address[index]; index++) + { + slab_chains[0] = start_address[index] + OFFSET(kmem_list3_slabs_partial); + slab_chains[1] = start_address[index] + OFFSET(kmem_list3_slabs_full); + slab_chains[2] = start_address[index] + OFFSET(kmem_list3_slabs_free); + + gather_cpudata_list_v2_nodes(si, index); + + if (CRASHDEBUG(1)) { + fprintf(fp, "[ %s: %lx ", si->curname, si->cache); + fprintf(fp, "partial: %lx full: %lx free: %lx ]\n", + slab_chains[0], slab_chains[1], slab_chains[2]); + } + + for (s = 0; s < SLAB_CHAINS; s++) { + if (!slab_chains[s]) + continue; + + if (!readmem(slab_chains[s], + KVADDR, &si->slab, sizeof(ulong), + "first slab", QUIET|RETURN_ON_ERROR)) { + error(INFO, + "%s: %s list: bad slab pointer: %lx\n", + si->curname, + slab_chain_name_v2[s], + slab_chains[s]); + list_borked = 1; + continue; + } + + if (slab_data_saved(si)) { + FREEBUF(slab_buf); + FREEBUF(start_address); + return; + } + + if (si->slab == slab_chains[s]) + continue; + + last = slab_chains[s]; + + do { + if (received_SIGINT()) { + FREEBUF(slab_buf); + FREEBUF(start_address); + restart(0); + } + + if (!verify_slab_v2(si, last, s)) { + list_borked = 1; + continue; + } + last = si->slab - OFFSET(slab_list); + + readmem(si->slab, KVADDR, slab_buf, + SIZE(slab), "slab buffer", + FAULT_ON_ERROR); + + tmp = INT(slab_buf + OFFSET(slab_inuse)); + si->inuse += tmp; + + if (ACTIVE()) + gather_cpudata_list_v2_nodes(si, index); + + si->s_mem = ULONG(slab_buf + + OFFSET(slab_s_mem)); + gather_slab_cached_count(si); + + si->num_slabs++; + + si->slab = ULONG(slab_buf + + OFFSET(slab_list)); + si->slab -= OFFSET(slab_list); + + /* + * Check for slab transition. (Tony Dziedzic) + */ + for (i = 0; i < SLAB_CHAINS; i++) { + if ((i != s) && + (si->slab == slab_chains[i])) { + error(NOTE, + "%s: slab chain inconsistency: %s list\n", + si->curname, + slab_chain_name_v2[s]); + list_borked = 1; + } + } + + } while (si->slab != slab_chains[s] && !list_borked); + } + } + + if (!list_borked) + save_slab_data(si); + break; + + case SLAB_WALKTHROUGH: + specified_slab = si->slab; + si->flags |= SLAB_WALKTHROUGH; + si->flags &= ~SLAB_GET_COUNTS; + slab_buf = GETBUF(SIZE(slab)); + for (index=0; (index < vt->kmem_cache_len_nodes) && start_address[index]; index++) + { + slab_chains[0] = start_address[index] + OFFSET(kmem_list3_slabs_partial); + slab_chains[1] = start_address[index] + OFFSET(kmem_list3_slabs_full); + slab_chains[2] = start_address[index] + OFFSET(kmem_list3_slabs_free); + + gather_cpudata_list_v2_nodes(si, index); + + if (CRASHDEBUG(1)) { + fprintf(fp, "[ %s: %lx ", si->curname, si->cache); + fprintf(fp, "partial: %lx full: %lx free: %lx ]\n", + slab_chains[0], slab_chains[1], slab_chains[2]); + } + + for (s = 0; s < SLAB_CHAINS; s++) { + if (!slab_chains[s]) + continue; + + if (!specified_slab) { + if (!readmem(slab_chains[s], + KVADDR, &si->slab, sizeof(ulong), + "slabs", QUIET|RETURN_ON_ERROR)) { + error(INFO, + "%s: %s list: bad slab pointer: %lx\n", + si->curname, + slab_chain_name_v2[s], + slab_chains[s]); + list_borked = 1; + continue; + } + last = slab_chains[s]; + } else + last = 0; + + if (si->slab == slab_chains[s]) + continue; + + readmem(si->slab, KVADDR, slab_buf, + SIZE(slab), "slab buffer", + FAULT_ON_ERROR); + + si->s_mem = ULONG(slab_buf + + OFFSET(slab_s_mem)); + + if (CRASHDEBUG(1)) { + fprintf(fp, "search cache: [%s] ", si->curname); + if (si->flags & ADDRESS_SPECIFIED) + fprintf(fp, "for %llx", si->spec_addr); + fprintf(fp, "\n"); + } + + do { + if (received_SIGINT()) + { + FREEBUF(start_address); + FREEBUF(slab_buf); + restart(0); + } + + if (!verify_slab_v2(si, last, s)) { + list_borked = 1; + continue; + } + last = si->slab - OFFSET(slab_list); + + dump_slab_percpu_v2(si); + + if (si->found) { + FREEBUF(start_address); + FREEBUF(slab_buf); + return; + } + + readmem(si->slab+OFFSET(slab_list), + KVADDR, &si->slab, sizeof(ulong), + "slab list", FAULT_ON_ERROR); + + si->slab -= OFFSET(slab_list); + + } while (si->slab != slab_chains[s] && !list_borked); + } + } + + break; + } + FREEBUF(slab_buf); + FREEBUF(start_address); +} + /* * Try to preclude any attempt to translate a bogus slab structure. */ @@ -7750,6 +8283,11 @@ { int i; + if (si->flags & SLAB_DATA_NOSAVE) { + si->flags &= ~SLAB_DATA_NOSAVE; + return; + } + if (ACTIVE()) return; @@ -7840,7 +8378,7 @@ if (si->flags & ADDRESS_SPECIFIED) { if (INSLAB(si->slab, si) && (si->spec_addr >= si->slab) && - (si->spec_addr < (si->slab+SIZE(kmem_slab_s)))){ + (si->spec_addr < (si->slab+SIZE(kmem_slab_s)))) { si->found = KMEM_SLAB_ADDR; return; } @@ -8213,7 +8751,7 @@ */ if (si->c_flags & SLAB_CFLGS_BUFCTL) { - for (i = 0, next = si->s_index; i < si->c_num; i++, next++){ + for (i = 0, next = si->s_index; i < si->c_num; i++, next++) { obj = si->s_mem + ((next - si->s_index) * si->c_offset); DUMP_SLAB_OBJECT(); @@ -8263,7 +8801,7 @@ dump_slab_objects_percpu(struct meminfo *si) { int i, j; - int on_free_list, on_cpudata_list; + int on_free_list, on_cpudata_list, on_shared_list; ulong cnt, expected; ulong obj; @@ -8285,6 +8823,7 @@ for (i = 0, obj = si->s_mem; i < si->c_num; i++, obj += si->size) { on_free_list = FALSE; on_cpudata_list = FALSE; + on_shared_list = FALSE; for (j = 0; j < si->c_num; j++) { if (obj == si->addrlist[j]) { @@ -8294,13 +8833,26 @@ } on_cpudata_list = check_cpudata_list(si, obj); + on_shared_list = check_shared_list(si, obj); if (on_free_list && on_cpudata_list) { error(INFO, - "\"%s\" cache: object %lx on both free and cpudata lists\n", + "\"%s\" cache: object %lx on both free and cpu %d lists\n", + si->curname, si->cpu, obj); + si->errors++; + } + if (on_free_list && on_shared_list) { + error(INFO, + "\"%s\" cache: object %lx on both free and shared lists\n", si->curname, obj); si->errors++; } + if (on_cpudata_list && on_shared_list) { + error(INFO, + "\"%s\" cache: object %lx on both cpu %d and shared lists\n", + si->curname, obj, si->cpu); + si->errors++; + } if (on_free_list) { if (!(si->flags & ADDRESS_SPECIFIED)) @@ -8324,6 +8876,17 @@ return; } } + } else if (on_shared_list) { + if (!(si->flags & ADDRESS_SPECIFIED)) + fprintf(fp, " %lx (shared cache)\n", obj); + cnt++; + if (si->flags & ADDRESS_SPECIFIED) { + if (INOBJECT(si->spec_addr, obj)) { + si->found = + KMEM_OBJECT_ADDR_SHARED; + return; + } + } } else { if (!(si->flags & ADDRESS_SPECIFIED)) fprintf(fp, " [%lx]\n", obj); @@ -8349,7 +8912,10 @@ /* * Determine how many of the "inuse" slab objects are actually cached * in the kmem_cache_s header. Set the per-slab count and update the - * cumulative per-cache count. + * cumulative per-cache count. With the addition of the shared list + * check, the terms "cpucached_cache" and "cpucached_slab" are somewhat + * misleading. But they both are types of objects that are cached + * in the kmem_cache_s header, just not necessarily per-cpu. */ static void @@ -8357,16 +8923,35 @@ { int i; ulong obj; + int in_cpudata, in_shared; si->cpucached_slab = 0; for (i = 0, obj = si->s_mem; i < si->c_num; i++, obj += si->size) { + in_cpudata = in_shared = 0; if (check_cpudata_list(si, obj)) { + in_cpudata = TRUE; si->cpucached_slab++; if (si->flags & SLAB_GET_COUNTS) { si->cpucached_cache++; } } + if (check_shared_list(si, obj)) { + in_shared = TRUE; + if (!in_cpudata) { + si->cpucached_slab++; + if (si->flags & SLAB_GET_COUNTS) { + si->cpucached_cache++; + } + } + } + if (in_cpudata && in_shared) { + si->flags |= SLAB_DATA_NOSAVE; + if (!(si->flags & VERBOSE)) + error(INFO, + "\"%s\" cache: object %lx on both cpu %d and shared lists\n", + si->curname, obj, si->cpu); + } } } @@ -8423,7 +9008,8 @@ } /* - * Updated for 2.6 slab percpu data structure. + * Updated for 2.6 slab percpu data structure, this also gathers + * the shared array_cache list as well. */ static void gather_cpudata_list_v2(struct meminfo *si) @@ -8431,6 +9017,7 @@ int i, j; int avail; ulong cpudata[NR_CPUS]; + ulong shared; readmem(si->cache+OFFSET(kmem_cache_s_array), KVADDR, &cpudata[0], @@ -8466,8 +9053,152 @@ if (CRASHDEBUG(2)) for (j = 0; j < avail; j++) - fprintf(fp, " %lx\n", si->cpudata[i][j]); + fprintf(fp, " %lx (cpu %d)\n", si->cpudata[i][j], i); } + + /* + * If the shared list contains anything, gather them as well. + */ + BZERO(si->shared_array_cache, sizeof(ulong) * vt->kmem_max_limit); + + if (!VALID_MEMBER(kmem_list3_shared) || + !VALID_MEMBER(kmem_cache_s_lists) || + !readmem(si->cache+OFFSET(kmem_cache_s_lists)+ + OFFSET(kmem_list3_shared), KVADDR, &shared, sizeof(void *), + "kmem_list3 shared", RETURN_ON_ERROR|QUIET) || + !readmem(shared+OFFSET(array_cache_avail), + KVADDR, &avail, sizeof(int), "shared array_cache avail", + RETURN_ON_ERROR|QUIET) || !avail) + return; + + if (avail > vt->kmem_max_limit) { + error(INFO, + "\"%s\" cache: shared array_cache.avail %d greater than limit %ld\n", + si->curname, avail, vt->kmem_max_limit); + si->errors++; + return; + } + + if (CRASHDEBUG(2)) + fprintf(fp, "%s: shared avail: %d\n", + si->curname, avail); + + readmem(shared+SIZE(array_cache), KVADDR, si->shared_array_cache, + sizeof(void *) * avail, "shared array_cache avail", + FAULT_ON_ERROR); + + if (CRASHDEBUG(2)) + for (j = 0; j < avail; j++) + fprintf(fp, " %lx (shared list)\n", si->shared_array_cache[j]); +} + + + +/* + * Updated gather_cpudata_list_v2 for per-node kmem_list3's in kmem_cache + */ +static void +gather_cpudata_list_v2_nodes(struct meminfo *si, int index) +{ + int i, j; + int avail; + ulong cpudata[NR_CPUS]; + ulong shared; + ulong *start_address; + + start_address = (ulong *) GETBUF(sizeof(ulong) * vt->kmem_cache_len_nodes); + readmem(si->cache+OFFSET(kmem_cache_s_array), + KVADDR, &cpudata[0], + sizeof(ulong) * ARRAY_LENGTH(kmem_cache_s_array), + "array_cache array", FAULT_ON_ERROR); + + for (i = 0; (i < ARRAY_LENGTH(kmem_cache_s_array)) && + (cpudata[i]) && !(index); i++) { + BZERO(si->cpudata[i], sizeof(ulong) * vt->kmem_max_limit); + + readmem(cpudata[i]+OFFSET(array_cache_avail), + KVADDR, &avail, sizeof(int), + "array cache avail", FAULT_ON_ERROR); + + if (!avail) + continue; + + if (avail > vt->kmem_max_limit) { + error(INFO, + "\"%s\" cache: array_cache.avail %d greater than limit %ld\n", + si->curname, avail, vt->kmem_max_limit); + si->errors++; + } + + if (CRASHDEBUG(2)) + fprintf(fp, "%s: cpu[%d] avail: %d\n", + si->curname, i, avail); + + readmem(cpudata[i]+SIZE(array_cache), + KVADDR, si->cpudata[i], + sizeof(void *) * avail, + "array_cache avail", FAULT_ON_ERROR); + + if (CRASHDEBUG(2)) + for (j = 0; j < avail; j++) + fprintf(fp, " %lx (cpu %d)\n", si->cpudata[i][j], i); + } + + /* + * If the shared list contains anything, gather them as well. + */ + if (!index) { + BZERO(si->shared_array_cache, sizeof(ulong) * + vt->kmem_max_limit * vt->kmem_cache_len_nodes); + si->current_cache_index = 0; + } + + if (!readmem(si->cache+OFFSET(kmem_cache_s_lists), KVADDR, &start_address[0], + sizeof(ulong) * vt->kmem_cache_len_nodes , "array nodelist array", + RETURN_ON_ERROR) || + !readmem(start_address[index] + OFFSET(kmem_list3_shared), KVADDR, &shared, + sizeof(void *), "kmem_list3 shared", RETURN_ON_ERROR|QUIET) || + !readmem(shared + OFFSET(array_cache_avail), KVADDR, &avail, sizeof(int), + "shared array_cache avail", RETURN_ON_ERROR|QUIET) || !avail) { + FREEBUF(start_address); + return; + } + + if (avail > vt->kmem_max_limit) { + error(INFO, + "\"%s\" cache: shared array_cache.avail %d greater than limit %ld\n", + si->curname, avail, vt->kmem_max_limit); + si->errors++; + FREEBUF(start_address); + return; + } + + if (CRASHDEBUG(2)) + fprintf(fp, "%s: shared avail: %d\n", + si->curname, avail); + + readmem(shared+SIZE(array_cache), KVADDR, si->shared_array_cache + si->current_cache_index, + sizeof(void *) * avail, "shared array_cache avail", + FAULT_ON_ERROR); + + if ((si->current_cache_index + avail) > + (vt->kmem_max_limit * vt->kmem_cache_len_nodes)) { + error(INFO, + "\"%s\" cache: total shared array_cache.avail %d greater than total limit %ld\n", + si->curname, + si->current_cache_index + avail, + vt->kmem_max_limit * vt->kmem_cache_len_nodes); + si->errors++; + FREEBUF(start_address); + return; + } + + if (CRASHDEBUG(2)) + for (j = si->current_cache_index; j < (si->current_cache_index + avail); j++) + fprintf(fp, " %lx (shared list)\n", si->shared_array_cache[j]); + + si->current_cache_index += avail; + FREEBUF(start_address); } /* @@ -8491,6 +9222,27 @@ return FALSE; } +/* + * Check whether a given address is contained in the previously-gathered + * shared object cache. + */ + +static int +check_shared_list(struct meminfo *si, ulong obj) +{ + int i; + + if (INVALID_MEMBER(kmem_list3_shared) || + !si->shared_array_cache) + return FALSE; + + for (i = 0; si->shared_array_cache[i]; i++) { + if (si->shared_array_cache[i] == obj) + return TRUE; + } + + return FALSE; +} /* * Search the various memory subsystems for instances of this address. @@ -8690,6 +9442,16 @@ physaddr_t pstart, pend; ulong node_size; + if (IS_SPARSEMEM()) { + ulong map; + map = pfn_to_map(phys >> PAGESHIFT()); + if (map) { + *pp = map; + return TRUE; + } + return FALSE; + } + for (n = 0; n < vt->numnodes; n++) { nt = &vt->node_table[n]; if ((vt->flags & V_MEM_MAP) && (vt->numnodes == 1)) @@ -8797,6 +9559,17 @@ fprintf(fp, "%sKMEM_CACHE_UNAVAIL", others++ ? "|" : ""); if (vt->flags & DISCONTIGMEM) fprintf(fp, "%sDISCONTIGMEM", others++ ? "|" : ""); + if (vt->flags & FLATMEM) + fprintf(fp, "%sFLATMEM", others++ ? "|" : ""); + if (vt->flags & SPARSEMEM) + fprintf(fp, "%sSPARSEMEM", others++ ? "|" : "");\ + if (vt->flags & SPARSEMEM_EX) + fprintf(fp, "%sSPARSEMEM_EX", others++ ? "|" : "");\ + if (vt->flags & KMEM_CACHE_DELAY) + fprintf(fp, "%sKMEM_CACHE_DELAY", others++ ? "|" : "");\ + if (vt->flags & PERCPU_KMALLOC_V2_NODES) + fprintf(fp, "%sPERCPU_KMALLOC_V2_NODES", others++ ? "|" : "");\ + fprintf(fp, ")\n"); if (vt->kernel_pgd[0] == vt->kernel_pgd[1]) fprintf(fp, " kernel_pgd[NR_CPUS]: %lx ...\n", @@ -8825,6 +9598,7 @@ fprintf(fp, " kmem_max_cpus: %ld\n", vt->kmem_max_cpus); fprintf(fp, " kmem_cache_count: %ld\n", vt->kmem_cache_count); fprintf(fp, " kmem_cache_namelen: %d\n", vt->kmem_cache_namelen); + fprintf(fp, "kmem_cache_nodelist_len: %ld\n", vt->kmem_cache_len_nodes); fprintf(fp, " PG_reserved: %lx\n", vt->PG_reserved); fprintf(fp, " PG_slab: %ld\n", vt->PG_slab); fprintf(fp, " paddr_prlen: %d\n", vt->paddr_prlen); @@ -8869,6 +9643,8 @@ fprintf(fp, " nr_swapfiles: %d\n", vt->nr_swapfiles); fprintf(fp, " last_swap_read: %lx\n", vt->last_swap_read); fprintf(fp, " swap_info_struct: %lx\n", (ulong)vt->swap_info_struct); + fprintf(fp, " mem_sec: %lx\n", (ulong)vt->mem_sec); + fprintf(fp, " ZONE_HIGHMEM: %d\n", vt->ZONE_HIGHMEM); dump_vma_cache(VERBOSE); } @@ -9321,6 +10097,43 @@ } /* + * Return the next mapped kernel virtual address in the vmlist + * that is equal to or comes after the passed-in address. + */ +static ulong +next_vmlist_vaddr(struct meminfo *mi, ulong vaddr) +{ + ulong i, count; + + BZERO(mi, sizeof(struct meminfo)); + + mi->flags = GET_VMLIST_COUNT; + dump_vmlist(mi); + count = mi->retval; + + if (!count) + return vaddr; + + mi->vmlist = (struct vmlist *)GETBUF(sizeof(struct vmlist)*count); + mi->flags = GET_VMLIST; + dump_vmlist(mi); + + for (i = 0; i < count; i++) { + if (vaddr <= mi->vmlist[i].addr) { + vaddr = mi->vmlist[i].addr; + break; + } + if (vaddr < (mi->vmlist[i].addr + mi->vmlist[i].size)) + break; + } + + FREEBUF(mi->vmlist); + + return vaddr; +} + + +/* * Return the next kernel virtual address page that comes after * the passed-in address. */ @@ -9348,6 +10161,8 @@ if (IS_VMALLOC_ADDR(vaddr_orig)) { if (IS_VMALLOC_ADDR(vaddr) && (vaddr < vmalloc_limit)) { + if (machine_type("X86_64")) + vaddr = next_vmlist_vaddr(&meminfo, vaddr); *nextvaddr = vaddr; return TRUE; } @@ -9377,6 +10192,7 @@ /* * We're in the physical range. */ + *nextvaddr = vaddr; return TRUE; } @@ -9446,7 +10262,7 @@ totalswap = totalused = 0; for (i = 0; i < vt->nr_swapfiles; i++, - swap_info += SIZE(swap_info_struct)){ + swap_info += SIZE(swap_info_struct)) { fill_swap_info(swap_info); flags = INT(vt->swap_info_struct + @@ -9471,8 +10287,12 @@ prio = INT(vt->swap_info_struct + OFFSET(swap_info_struct_prio)); - max = ULONG(vt->swap_info_struct + - OFFSET(swap_info_struct_max)); + if (MEMBER_SIZE("swap_info_struct", "max") == sizeof(int)) + max = UINT(vt->swap_info_struct + + OFFSET(swap_info_struct_max)); + else + max = ULONG(vt->swap_info_struct + + OFFSET(swap_info_struct_max)); swap_map = ULONG(vt->swap_info_struct + OFFSET(swap_info_struct_swap_map)); @@ -9741,13 +10561,13 @@ dump_memory_nodes(int initialize) { int i, j; - int n, id, flen, slen; + int n, id, flen, slen, badaddr; ulong node_mem_map; ulong node_start_paddr; ulong node_start_pfn; ulong node_start_mapnr; ulong node_spanned_pages; - ulong free_pages, zone_size, node_size; + ulong free_pages, zone_size, node_size, cum_zone_size; ulong zone_start_paddr, zone_start_mapnr, zone_mem_map; ulong zone_start_pfn; ulong bdata; @@ -9761,23 +10581,23 @@ char buf5[BUFSIZE]; struct node_table *nt; - if (!(vt->flags & NODES)) { - if (!initialize) - error(FATAL, - "memory nodes not supported by this kernel\n\n"); - else { - nt = &vt->node_table[0]; - nt->node_id = 0; - if (symbol_exists("contig_page_data")) - nt->pgdat = symbol_value("contig_page_data"); - else - nt->pgdat = 0; - nt->size = vt->total_pages; - nt->mem_map = vt->mem_map; - nt->start_paddr = 0; - nt->start_mapnr = 0; - return; - } + if (IS_SPARSEMEM() && !initialize) { + error(INFO,"Per node memory data is not available for this kernel\n"); + return; + } + + if (!(vt->flags & NODES) && initialize) { + nt = &vt->node_table[0]; + nt->node_id = 0; + if (symbol_exists("contig_page_data")) + nt->pgdat = symbol_value("contig_page_data"); + else + nt->pgdat = 0; + nt->size = vt->total_pages; + nt->mem_map = vt->mem_map; + nt->start_paddr = 0; + nt->start_mapnr = 0; + return; } if (initialize) @@ -9785,7 +10605,7 @@ else pgdat = vt->node_table[0].pgdat; - for (n = 0; pgdat; n++) { + for (n = 0, badaddr = FALSE; pgdat; n++) { if (n >= vt->numnodes) error(FATAL, "numnodes out of sync with pgdat_list?\n"); @@ -9794,9 +10614,14 @@ readmem(pgdat+OFFSET(pglist_data_node_id), KVADDR, &id, sizeof(int), "pglist node_id", FAULT_ON_ERROR); - readmem(pgdat+OFFSET(pglist_data_node_mem_map), KVADDR, - &node_mem_map, sizeof(ulong), - "node_mem_map", FAULT_ON_ERROR); + if (VALID_MEMBER(pglist_data_node_mem_map)) { + readmem(pgdat+OFFSET(pglist_data_node_mem_map), KVADDR, + &node_mem_map, sizeof(ulong), + "node_mem_map", FAULT_ON_ERROR); + } else { + node_mem_map = BADADDR; + badaddr = TRUE; + } if (VALID_MEMBER(pglist_data_node_start_paddr)) readmem(pgdat+OFFSET(pglist_data_node_start_paddr), @@ -9896,6 +10721,7 @@ } node_zones = pgdat + OFFSET(pglist_data_node_zones); + cum_zone_size = 0; for (i = 0; i < vt->nr_zones; i++) { if (CRASHDEBUG(7)) fprintf(fp, "zone at %lx\n", node_zones); @@ -9926,12 +10752,24 @@ if (!read_string(value, buf1, BUFSIZE-1)) sprintf(buf1, "(unknown) "); if (VALID_STRUCT(zone_struct)) { - readmem(node_zones+ - OFFSET(zone_struct_zone_start_paddr), - KVADDR, &zone_start_paddr, - sizeof(ulong), - "node_zones zone_start_paddr", - FAULT_ON_ERROR); + if (VALID_MEMBER(zone_struct_zone_start_paddr)) + { + readmem(node_zones+OFFSET + (zone_struct_zone_start_paddr), + KVADDR, &zone_start_paddr, + sizeof(ulong), + "node_zones zone_start_paddr", + FAULT_ON_ERROR); + } else { + readmem(node_zones+ + OFFSET(zone_struct_zone_start_pfn), + KVADDR, &zone_start_pfn, + sizeof(ulong), + "node_zones zone_start_pfn", + FAULT_ON_ERROR); + zone_start_paddr = + PTOB(zone_start_pfn); + } readmem(node_zones+ OFFSET(zone_struct_zone_start_mapnr), KVADDR, &zone_start_mapnr, @@ -9946,7 +10784,14 @@ "node_zones zone_start_pfn", FAULT_ON_ERROR); zone_start_paddr = PTOB(zone_start_pfn); - readmem(node_zones+ + if (!(vt->flags & NODES) && + INVALID_MEMBER(zone_zone_mem_map)) { + readmem(pgdat+OFFSET(pglist_data_node_mem_map), + KVADDR, &zone_mem_map, sizeof(void *), + "contig_page_data mem_map", FAULT_ON_ERROR); + if (zone_size) + zone_mem_map += cum_zone_size * SIZE(page); + } else readmem(node_zones+ OFFSET(zone_zone_mem_map), KVADDR, &zone_mem_map, sizeof(ulong), @@ -9959,15 +10804,27 @@ else zone_start_mapnr = 0; } - readmem(node_zones+ - OFFSET_OPTION(zone_struct_zone_mem_map, - zone_zone_mem_map), KVADDR, &zone_mem_map, - sizeof(ulong), "node_zones zone_mem_map", - FAULT_ON_ERROR); + if (!(vt->flags & NODES) && + INVALID_MEMBER(zone_struct_zone_mem_map) && + INVALID_MEMBER(zone_zone_mem_map)) { + readmem(pgdat+OFFSET(pglist_data_node_mem_map), + KVADDR, &zone_mem_map, sizeof(void *), + "contig_page_data mem_map", FAULT_ON_ERROR); + if (zone_size) + zone_mem_map += cum_zone_size * SIZE(page); + else + zone_mem_map = 0; + } else + readmem(node_zones+ + OFFSET_OPTION(zone_struct_zone_mem_map, + zone_zone_mem_map), KVADDR, &zone_mem_map, + sizeof(ulong), "node_zones zone_mem_map", + FAULT_ON_ERROR); if (!initialize) { fprintf(fp, " %2d %-9s %7ld ", i, buf1, zone_size); + cum_zone_size += zone_size; fprintf(fp, "%s %s %s\n", mkstring(buf1, VADDR_PRLEN, RJUST|LONG_HEX,MKSTR(zone_mem_map)), @@ -10011,19 +10868,21 @@ * Override numnodes -- some kernels may leave it at 1 on a system * with multiple memory nodes. */ - get_symbol_data("pgdat_list", sizeof(void *), &pgdat); - - for (n = 0; pgdat; n++) { - readmem(pgdat + OFFSET_OPTION(pglist_data_node_next, - pglist_data_pgdat_next), KVADDR, - &pgdat, sizeof(void *), "pglist_data node_next", - FAULT_ON_ERROR); - } - if (n != vt->numnodes) { - if (CRASHDEBUG(2)) - error(NOTE, "changing numnodes from %d to %d\n", - vt->numnodes, n); - vt->numnodes = n; + if (vt->flags & NODES) { + get_symbol_data("pgdat_list", sizeof(void *), &pgdat); + + for (n = 0; pgdat; n++) { + readmem(pgdat + OFFSET_OPTION(pglist_data_node_next, + pglist_data_pgdat_next), KVADDR, + &pgdat, sizeof(void *), "pglist_data node_next", + FAULT_ON_ERROR); + } + if (n != vt->numnodes) { + if (CRASHDEBUG(2)) + error(NOTE, "changing numnodes from %d to %d\n", + vt->numnodes, n); + vt->numnodes = n; + } } if (!(vt->node_table = (struct node_table *) @@ -10072,6 +10931,9 @@ { uint psz; + if (machdep->pagesize) + return machdep->pagesize; + if (REMOTE_MEMSRC()) return remote_page_size(); @@ -10081,6 +10943,14 @@ psz = diskdump_page_size(); break; + case XENDUMP: + psz = xendump_page_size(); + break; + + case KDUMP: + psz = kdump_page_size(); + break; + case NETDUMP: psz = netdump_page_size(); break; @@ -10115,6 +10985,48 @@ } /* + * If the page size cannot be determined by the dumpfile (like kdump), + * and the processor default cannot be used, allow the force-feeding + * of a crash command-line page size option. + */ +void +force_page_size(char *s) +{ + int k, err; + ulong psize; + + k = 1; + err = FALSE; + + switch (LASTCHAR(s)) + { + case 'k': + case 'K': + LASTCHAR(s) = NULLCHAR; + if (decimal(s, 0)) + k = 1024; + else + err = TRUE; + break; + + default: + if (decimal(s, 0)) + psize = dtol(s, QUIET|RETURN_ON_ERROR, &err); + else if (hexadecimal(s, 0)) + psize = htol(s, QUIET|RETURN_ON_ERROR, &err); + else + err = TRUE; + break; + } + + if (err) + error(INFO, "invalid page size: %s\n", s); + else + machdep->pagesize = psize * k; +} + + +/* * Return the vmalloc address referenced by the first vm_struct * on the vmlist. This can normally be used by the machine-specific * xxx_vmalloc_start() routines. @@ -10186,6 +11098,10 @@ retval = remote_memory_used(); else if (pc->flags & NETDUMP) retval = netdump_memory_used(); + else if (pc->flags & KDUMP) + retval = kdump_memory_used(); + else if (pc->flags & XENDUMP) + retval = xendump_memory_used(); else if (pc->flags & DISKDUMP) retval = diskdump_memory_used(); else if (pc->flags & LKCD) @@ -10201,6 +11117,10 @@ retval = remote_free_memory(); else if (pc->flags & NETDUMP) retval = netdump_free_memory(); + else if (pc->flags & KDUMP) + retval = kdump_free_memory(); + else if (pc->flags & XENDUMP) + retval = xendump_free_memory(); else if (pc->flags & DISKDUMP) retval = diskdump_free_memory(); else if (pc->flags & LKCD) @@ -10216,6 +11136,10 @@ retval = remote_memory_dump(0); else if (pc->flags & NETDUMP) retval = netdump_memory_dump(fp); + else if (pc->flags & KDUMP) + retval = kdump_memory_dump(fp); + else if (pc->flags & XENDUMP) + retval = xendump_memory_dump(fp); else if (pc->flags & DISKDUMP) retval = diskdump_memory_dump(fp); else if (pc->flags & LKCD) @@ -10238,3 +11162,197 @@ return retval; } +/* + * Functions for sparse mem support + */ +ulong +sparse_decode_mem_map(ulong coded_mem_map, ulong section_nr) +{ + return coded_mem_map + + (section_nr_to_pfn(section_nr) * SIZE(page)); +} + +void +sparse_mem_init(void) +{ + ulong addr; + ulong mem_section_size; + + if (!IS_SPARSEMEM()) + return; + + MEMBER_OFFSET_INIT(mem_section_section_mem_map, "mem_section", + "section_mem_map"); + STRUCT_SIZE_INIT(mem_section, "mem_section"); + + if (!MAX_PHYSMEM_BITS()) + error(FATAL, + "CONFIG_SPARSEMEM kernels not supported for this architecture\n"); + + if (get_array_length("mem_section", NULL, 0) == + (NR_MEM_SECTIONS() / _SECTIONS_PER_ROOT_EXTREME())) + vt->flags |= SPARSEMEM_EX; + + if (IS_SPARSEMEM_EX()) { + machdep->sections_per_root = _SECTIONS_PER_ROOT_EXTREME(); + mem_section_size = sizeof(void *) * NR_SECTION_ROOTS(); + } else { + machdep->sections_per_root = _SECTIONS_PER_ROOT(); + mem_section_size = SIZE(mem_section) * NR_SECTION_ROOTS(); + } + + if (CRASHDEBUG(1)) { + fprintf(fp, "PAGESIZE=%d\n",PAGESIZE()); + fprintf(fp,"mem_section_size = %ld\n", mem_section_size); + fprintf(fp, "NR_SECTION_ROOTS = %ld\n", NR_SECTION_ROOTS()); + fprintf(fp, "NR_MEM_SECTIONS = %ld\n", NR_MEM_SECTIONS()); + fprintf(fp, "SECTIONS_PER_ROOT = %ld\n", SECTIONS_PER_ROOT() ); + fprintf(fp, "SECTION_ROOT_MASK = 0x%lx\n", SECTION_ROOT_MASK()); + fprintf(fp, "PAGES_PER_SECTION = %ld\n", PAGES_PER_SECTION()); + } + + if (!(vt->mem_sec = malloc(mem_section_size))) + error(FATAL, "cannot malloc mem_sec cache\n"); + + addr = symbol_value("mem_section"); + readmem(addr, KVADDR,vt->mem_sec ,mem_section_size, + "memory section root table", FAULT_ON_ERROR); +} + +char +*read_mem_section(ulong addr) +{ + static char *mem_section; + + if (!mem_section) { + mem_section = GETBUF(SIZE(mem_section)); + } + + if (!IS_KVADDR(addr)) + return 0; + + readmem(addr, KVADDR, mem_section, SIZE(mem_section), + "memory section", FAULT_ON_ERROR); + + return mem_section; +} + +ulong +nr_to_section(ulong nr) +{ + ulong addr; + ulong *mem_sec = vt->mem_sec; + + if (!IS_KVADDR(mem_sec[SECTION_NR_TO_ROOT(nr)])) + return 0; + + if (IS_SPARSEMEM_EX()) + addr = mem_sec[SECTION_NR_TO_ROOT(nr)] + + (nr & SECTION_ROOT_MASK()) * SIZE(mem_section); + else + addr = mem_sec[0] + (nr & SECTION_ROOT_MASK()) * SIZE(mem_section); + + if (!IS_KVADDR(addr)) + return 0; + + return addr; +} + +/* + * We use the lower bits of the mem_map pointer to store + * a little bit of information. There should be at least + * 3 bits here due to 32-bit alignment. + */ +#define SECTION_MARKED_PRESENT (1UL<<0) +#define SECTION_HAS_MEM_MAP (1UL<<1) +#define SECTION_MAP_LAST_BIT (1UL<<2) +#define SECTION_MAP_MASK (~(SECTION_MAP_LAST_BIT-1)) + + +int +valid_section(ulong addr) +{ + char *mem_section; + + if ((mem_section = read_mem_section(addr))) + return (ULONG(mem_section + + OFFSET(mem_section_section_mem_map)) && + SECTION_MARKED_PRESENT); + return 0; +} + +int +section_has_mem_map(ulong addr) +{ + char *mem_section; + + if ((mem_section = read_mem_section(addr))) + return (ULONG(mem_section + + OFFSET(mem_section_section_mem_map)) + && SECTION_HAS_MEM_MAP); + return 0; +} + +ulong +section_mem_map_addr(ulong addr) +{ + char *mem_section; + ulong map; + + if ((mem_section = read_mem_section(addr))) { + map = ULONG(mem_section + + OFFSET(mem_section_section_mem_map)); + map &= SECTION_MAP_MASK; + return map; + } + return 0; +} + + +ulong +valid_section_nr(ulong nr) +{ + ulong addr = nr_to_section(nr); + + if (valid_section(addr)) + return addr; + + return 0; +} + +ulong +pfn_to_map(ulong pfn) +{ + ulong section, page_offset; + + section = pfn_to_section_nr(pfn); + + if (section_has_mem_map(section)) { + page_offset = pfn - section_nr_to_pfn(section); + return (section_mem_map_addr(section) + + (page_offset * PAGESIZE())); + } + + return 0; +} + +void +list_mem_sections(void) +{ + ulong nr,addr; + ulong nr_mem_sections = NR_MEM_SECTIONS(); + ulong coded_mem_map; + + for (nr = 0; nr <= nr_mem_sections ; nr++) { + if ((addr = valid_section_nr(nr))) { + coded_mem_map = section_mem_map_addr(addr); + fprintf(fp, + "nr=%ld section = %lx coded_mem_map=%lx pfn=%ld mem_map=%lx\n", + nr, + addr, + coded_mem_map, + section_nr_to_pfn(nr), + sparse_decode_mem_map(coded_mem_map,nr)); + } + } +} --- crash/filesys.c.orig 2006-05-15 17:21:18.000000000 -0400 +++ crash/filesys.c 2006-04-28 11:24:32.000000000 -0400 @@ -1,8 +1,8 @@ /* filesys.c - core analysis suite * * Copyright (C) 1999, 2000, 2001, 2002 Mission Critical Linux, Inc. - * Copyright (C) 2002, 2003, 2004, 2005 David Anderson - * Copyright (C) 2002, 2003, 2004, 2005 Red Hat, Inc. All rights reserved. + * Copyright (C) 2002, 2003, 2004, 2005, 2006 David Anderson + * Copyright (C) 2002, 2003, 2004, 2005, 2006 Red Hat, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -190,7 +190,15 @@ if (!netdump_init(pc->dumpfile, fp)) error(FATAL, "%s: initialization failed\n", pc->dumpfile); - } else if (pc->flags & NETDUMP) { + } else if (pc->flags & KDUMP) { + if (!kdump_init(pc->dumpfile, fp)) + error(FATAL, "%s: initialization failed\n", + pc->dumpfile); + } else if (pc->flags & XENDUMP) { + if (!xendump_init(pc->dumpfile, fp)) + error(FATAL, "%s: initialization failed\n", + pc->dumpfile); + } else if (pc->flags & DISKDUMP) { if (!diskdump_init(pc->dumpfile, fp)) error(FATAL, "%s: initialization failed\n", pc->dumpfile); @@ -253,7 +261,11 @@ return; } - if (find_booted_system_map()) + error(WARNING, "%s%sand /proc/version do not match!\n\n", + pc->namelist, + strlen(pc->namelist) > 39 ? "\n " : " "); + + if (find_booted_system_map()) pc->flags |= SYSMAP; } @@ -1706,12 +1718,20 @@ MEMBER_OFFSET_INIT(fs_struct_pwd, "fs_struct", "pwd"); MEMBER_OFFSET_INIT(fs_struct_rootmnt, "fs_struct", "rootmnt"); MEMBER_OFFSET_INIT(fs_struct_pwdmnt, "fs_struct", "pwdmnt"); - MEMBER_OFFSET_INIT(files_struct_max_fds, "files_struct", "max_fds"); - MEMBER_OFFSET_INIT(files_struct_max_fdset, "files_struct", "max_fdset"); - MEMBER_OFFSET_INIT(files_struct_open_fds, "files_struct", "open_fds"); MEMBER_OFFSET_INIT(files_struct_open_fds_init, "files_struct", "open_fds_init"); - MEMBER_OFFSET_INIT(files_struct_fd, "files_struct", "fd"); + MEMBER_OFFSET_INIT(files_struct_fdt, "files_struct", "fdt"); + if (VALID_MEMBER(files_struct_fdt)) { + MEMBER_OFFSET_INIT(fdtable_max_fds, "fdtable", "max_fds"); + MEMBER_OFFSET_INIT(fdtable_max_fdset, "fdtable", "max_fdset"); + MEMBER_OFFSET_INIT(fdtable_open_fds, "fdtable", "open_fds"); + MEMBER_OFFSET_INIT(fdtable_fd, "fdtable", "fd"); + } else { + MEMBER_OFFSET_INIT(files_struct_max_fds, "files_struct", "max_fds"); + MEMBER_OFFSET_INIT(files_struct_max_fdset, "files_struct", "max_fdset"); + MEMBER_OFFSET_INIT(files_struct_open_fds, "files_struct", "open_fds"); + MEMBER_OFFSET_INIT(files_struct_fd, "files_struct", "fd"); + } MEMBER_OFFSET_INIT(file_f_dentry, "file", "f_dentry"); MEMBER_OFFSET_INIT(file_f_vfsmnt, "file", "f_vfsmnt"); MEMBER_OFFSET_INIT(file_f_count, "file", "f_count"); @@ -1762,6 +1782,8 @@ STRUCT_SIZE_INIT(umode_t, "umode_t"); STRUCT_SIZE_INIT(dentry, "dentry"); STRUCT_SIZE_INIT(files_struct, "files_struct"); + if (VALID_MEMBER(files_struct_fdt)) + STRUCT_SIZE_INIT(fdtable, "fdtable"); STRUCT_SIZE_INIT(file, "file"); STRUCT_SIZE_INIT(inode, "inode"); STRUCT_SIZE_INIT(vfsmount, "vfsmount"); @@ -1998,8 +2020,9 @@ open_files_dump(ulong task, int flags, struct reference *ref) { struct task_context *tc; - ulong files_struct_addr; - char *files_struct_buf; + ulong files_struct_addr; + ulong fdtable_addr = 0; + char *files_struct_buf, *fdtable_buf = NULL; ulong fs_struct_addr; char *dentry_buf, *fs_struct_buf; ulong root_dentry, pwd_dentry; @@ -2027,6 +2050,8 @@ BZERO(root_pathname, BUFSIZE); BZERO(pwd_pathname, BUFSIZE); files_struct_buf = GETBUF(SIZE(files_struct)); + if (VALID_STRUCT(fdtable)) + fdtable_buf = GETBUF(SIZE(fdtable)); fill_task_struct(task); sprintf(files_header, " FD%s%s%s%s%s%s%sTYPE%sPATH\n", @@ -2107,24 +2132,42 @@ files_struct_addr = ULONG(tt->task_struct + OFFSET(task_struct_files)); - if (files_struct_addr) { - readmem(files_struct_addr, KVADDR, files_struct_buf, - SIZE(files_struct), "files_struct buffer", - FAULT_ON_ERROR); - - max_fdset = INT(files_struct_buf + + if (files_struct_addr) { + readmem(files_struct_addr, KVADDR, files_struct_buf, + SIZE(files_struct), "files_struct buffer", + FAULT_ON_ERROR); + + if (VALID_MEMBER(files_struct_max_fdset)) { + max_fdset = INT(files_struct_buf + OFFSET(files_struct_max_fdset)); - max_fds = INT(files_struct_buf + - OFFSET(files_struct_max_fds)); - } + max_fds = INT(files_struct_buf + + OFFSET(files_struct_max_fds)); + } + } - if (!files_struct_addr || max_fdset == 0 || max_fds == 0) { + if (VALID_MEMBER(files_struct_fdt)) { + fdtable_addr = ULONG(files_struct_buf + OFFSET(files_struct_fdt)); + + if (fdtable_addr) { + readmem(fdtable_addr, KVADDR, fdtable_buf, + SIZE(fdtable), "fdtable buffer", FAULT_ON_ERROR); + max_fdset = INT(fdtable_buf + + OFFSET(fdtable_max_fdset)); + max_fds = INT(fdtable_buf + + OFFSET(fdtable_max_fds)); + } + } + + if ((VALID_MEMBER(files_struct_fdt) && !fdtable_addr) || + !files_struct_addr || max_fdset == 0 || max_fds == 0) { if (ref) { if (ref->cmdflags & FILES_REF_FOUND) fprintf(fp, "\n"); } else fprintf(fp, "No open files\n"); + if (fdtable_buf) + FREEBUF(fdtable_buf); FREEBUF(files_struct_buf); return; } @@ -2146,8 +2189,12 @@ } } - open_fds_addr = ULONG(files_struct_buf + - OFFSET(files_struct_open_fds)); + if (VALID_MEMBER(fdtable_open_fds)) + open_fds_addr = ULONG(fdtable_buf + + OFFSET(fdtable_open_fds)); + else + open_fds_addr = ULONG(files_struct_buf + + OFFSET(files_struct_open_fds)); if (open_fds_addr) { if (VALID_MEMBER(files_struct_open_fds_init) && @@ -2157,16 +2204,21 @@ OFFSET(files_struct_open_fds_init), &open_fds, sizeof(fd_set)); else - readmem(open_fds_addr, KVADDR, &open_fds, - sizeof(fd_set), "files_struct open_fds", + readmem(open_fds_addr, KVADDR, &open_fds, + sizeof(fd_set), "fdtable open_fds", FAULT_ON_ERROR); } - fd = ULONG(files_struct_buf + OFFSET(files_struct_fd)); + if (VALID_MEMBER(fdtable_fd)) + fd = ULONG(fdtable_buf + OFFSET(fdtable_fd)); + else + fd = ULONG(files_struct_buf + OFFSET(files_struct_fd)); if (!open_fds_addr || !fd) { if (ref && (ref->cmdflags & FILES_REF_FOUND)) fprintf(fp, "\n"); + if (fdtable_buf) + FREEBUF(fdtable_buf); FREEBUF(files_struct_buf); return; } @@ -2220,6 +2272,8 @@ if (ref && (ref->cmdflags & FILES_REF_FOUND)) fprintf(fp, "\n"); + if (fdtable_buf) + FREEBUF(fdtable_buf); FREEBUF(files_struct_buf); } --- crash/help.c.orig 2006-05-15 17:21:18.000000000 -0400 +++ crash/help.c 2006-03-23 14:25:56.000000000 -0500 @@ -1,8 +1,8 @@ /* help.c - core analysis suite * * Copyright (C) 1999, 2000, 2001, 2002 Mission Critical Linux, Inc. - * Copyright (C) 2002, 2003, 2004, 2005 David Anderson - * Copyright (C) 2002, 2003, 2004, 2005 Red Hat, Inc. All rights reserved. + * Copyright (C) 2002, 2003, 2004, 2005, 2006 David Anderson + * Copyright (C) 2002, 2003, 2004, 2005, 2006 Red Hat, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -239,7 +239,7 @@ oflag = 0; while ((c = getopt(argcnt, args, - "efNDdmM:ngcaBbHhksvVoptTzLxO")) != EOF) { + "efNDdmM:ngcaBbHhkKsvVoptTzLxO")) != EOF) { switch(c) { case 'e': @@ -303,7 +303,11 @@ return; case 'k': - dump_kernel_table(); + dump_kernel_table(!VERBOSE); + return; + + case 'K': + dump_kernel_table(VERBOSE); return; case 's': @@ -349,6 +353,7 @@ fprintf(fp, " -D - dumpfile memory usage\n"); fprintf(fp, " -f - filesys table\n"); fprintf(fp, " -k - kernel_table\n"); + fprintf(fp, " -K - kernel_table (verbose)\n"); fprintf(fp, " -M machine specific\n"); fprintf(fp, " -m - machdep_table\n"); fprintf(fp, " -s - symbol table data\n"); @@ -508,16 +513,16 @@ " active perform the command(s) on the active thread on each CPU.\n", " If none of the task-identifying arguments above are entered, the command", " will be performed on all tasks.\n", -" command select one or more of the following commands on the tasks", +" command select one or more of the following commands to be run on the tasks", " selected, or on all tasks:\n", -" bt same as the \"bt\" command (optional flags: -r -t -l -e -R -f)", -" vm same as the \"vm\" command (optional flags: -p -v -m -R)", -" task same as the \"task\" command (optional flag: -R)", -" files same as the \"files\" command (optional flag: -R)", -" net same as the \"net\" command (optional flags: -s -S -R)", -" set same as the \"set\" command", -" sig same as the \"sig\" command", -" vtop same as the \"vtop\" command (optional flags: -c -u -k)\n", +" bt run the \"bt\" command (optional flags: -r -t -l -e -R -f -o)", +" vm run the \"vm\" command (optional flags: -p -v -m -R)", +" task run the \"task\" command (optional flag: -R)", +" files run the \"files\" command (optional flag: -R)", +" net run the \"net\" command (optional flags: -s -S -R)", +" set run the \"set\" command", +" sig run the \"sig\" command", +" vtop run the \"vtop\" command (optional flags: -c -u -k)\n", " flag Pass this optional flag to the command selected.", " argument Pass this argument to the command selected.", " ", @@ -1155,7 +1160,7 @@ "bt", "backtrace", #if defined(GDB_6_0) || defined(GDB_6_1) -"[-a|-r|-t|-l|-e|-E|-f] [-R ref] [ -I ip ] [-S sp] [pid | taskp]", +"[-a|-r|-t|-T|-l|-e|-E|-f|-o|-O] [-R ref] [ -I ip ] [-S sp] [pid | taskp]", #else "[-a|-r|-t|-l|-e|-f|-g] [-R ref] [ -I ip ] [-S sp] [pid | taskp]", #endif @@ -1167,6 +1172,9 @@ " pages of memory containing the task_union structure.", " -t display all text symbols found from the last known stack location", " to the top of the stack. (helpful if the back trace fails)", +" -T display all text symbols found from just above the task_struct or", +" thread_info to the top of the stack. (helpful if the back trace", +" fails or the -t option starts too high in the process stack).", " -l show file and line number of each stack trace text location.", " -e search the stack for possible kernel and user mode exception frames.", " -E search the IRQ stacks (x86, x86_64 and PPC64), and the exception", @@ -1175,6 +1183,11 @@ " -f display all stack data contained in a frame; this option can be", " used to determine the arguments passed to each function (x86 only);", " on IA64, the argument register contents are dumped.", +" -o use old backtrace method, permissable only on kernels that were", +" compiled without the -fomit-frame_pointer (x86 only).", +" -O use old backtrace method by default, permissable only on kernels", +" that were compiled without the -fomit-frame_pointer; subsequent", +" usage of this option toggles the backtrace method (x86 only).", #if !defined(GDB_6_0) && !defined(GDB_6_1) " -g use gdb stack trace code. (alpha only)", #endif @@ -1439,12 +1452,13 @@ " called \"echo\", which simply echoes back all arguments passed to it.", " Note the comments contained within it for further details. To build it,", " cut and paste the following output into a file, and call it, for example,", -" \"extlib.c\". Then compile like so:", +" \"echo.c\". Then compile like so:", " ", -" gcc -nostartfiles -shared -rdynamic -o extlib.so extlib.c", +" gcc -nostartfiles -shared -rdynamic -o echo.so echo.c -fPIC -D", " ", -" The extlib.so file may be dynamically linked into %s during runtime, or", -" during initialization by putting \"extend extlib.so\" into a .%src file", +" where must be one of the MACHINE_TYPE #define's in defs.h.", +" The echo.so file may be dynamically linked into %s during runtime, or", +" during initialization by putting \"extend echo.so\" into a .%src file", " located in the current directory, or in the user's $HOME directory.", " ", "---------------------------------- cut here ----------------------------------", @@ -3324,7 +3338,8 @@ "linked list", "[[-o] offset] [-e end] [-s struct[.member]] [-H] start", " This command dumps the contents of a linked list. The entries in a linked", -" are typically data structures that are tied together in one of two formats:", +" list are typically data structures that are tied together in one of two", +" formats:", " ", " 1. A starting address points to a data structure; that structure contains", " a member that is a pointer to the next structure, and so on. The list", @@ -3335,7 +3350,7 @@ " c. a pointer to the first item pointed to by the start address.", " d. a pointer to its containing structure.", " ", -" 2. Many Linux lists are linked via embedded list_head structures contained ", +" 2. Most Linux lists are linked via embedded list_head structures contained ", " within the data structures in the list. The linked list is headed by an", " external LIST_HEAD, which is simply a list_head structure initialized to", " point to itself, signifying that the list is empty:", @@ -3377,8 +3392,9 @@ " is pre-pended or not:", " ", " start The address of the first structure in the list.", -" -H start The address of the LIST_HEAD structure, typically expressed", -" symbolically.", +" -H start The address of the list_head structure, typically expressed", +" symbolically, but also can be an expression evaluating to the", +" address of the starting list_head structure.", "\nEXAMPLES", " Note that each task_struct is linked to its parent's task_struct via the", " p_pptr member:", @@ -4419,10 +4435,11 @@ " Display various network related data:\n", " -a display the ARP cache.", " -s display open network socket/sock addresses, their family and type,", -" and their source and destination addresses and ports.", +" and for INET and INET6 families, their source and destination", +" addresses and ports.", " -S displays open network socket/sock addresses followed by a dump", " of both structures.", -" -n addr translates an IP address expressed as a decimal or hexadecimal ", +" -n addr translates an IPv4 address expressed as a decimal or hexadecimal", " value into a standard numbers-and-dots notation.", " -R ref socket or sock address, or file descriptor.", " pid a process PID.", @@ -4450,8 +4467,8 @@ " Display the sockets for PID 2517, using both -s and -S output formats:\n", " %s> net -s 2517", " PID: 2517 TASK: c1598000 CPU: 1 COMMAND: \"rlogin\"", -" FD SOCKET SOCK FAMILY:TYPE SOURCE:PORT DESTINATION:PORT", -" 3 c57375dc c1ff1850 INET:STREAM 10.1.8.20:1023 10.1.16.62:513", +" FD SOCKET SOCK FAMILY:TYPE SOURCE-PORT DESTINATION-PORT", +" 3 c57375dc c1ff1850 INET:STREAM 10.1.8.20-1023 10.1.16.62-513", " ", " %s> net -S 2517", " PID: 2517 TASK: c1598000 CPU: 1 COMMAND: \"rlogin\"", @@ -4497,52 +4514,52 @@ " From \"foreach\", find all tasks with references to socket c08ea3cc:\n", " %s> foreach net -s -R c08ea3cc", " PID: 2184 TASK: c7026000 CPU: 1 COMMAND: \"klines.kss\"", -" FD SOCKET SOCK FAMILY:TYPE SOURCE:PORT DESTINATION:PORT", -" 5 c08ea3cc c50d3c80 INET:STREAM 0.0.0.0:1026 0.0.0.0:0", +" FD SOCKET SOCK FAMILY:TYPE SOURCE-PORT DESTINATION-PORT", +" 5 c08ea3cc c50d3c80 INET:STREAM 0.0.0.0-1026 0.0.0.0-0", " ", " PID: 2200 TASK: c670a000 CPU: 1 COMMAND: \"kpanel\"", -" FD SOCKET SOCK FAMILY:TYPE SOURCE:PORT DESTINATION:PORT", -" 5 c08ea3cc c50d3c80 INET:STREAM 0.0.0.0:1026 0.0.0.0:0", +" FD SOCKET SOCK FAMILY:TYPE SOURCE-PORT DESTINATION-PORT", +" 5 c08ea3cc c50d3c80 INET:STREAM 0.0.0.0-1026 0.0.0.0-0", " ", " PID: 2201 TASK: c648a000 CPU: 1 COMMAND: \"kbgndwm\"", -" FD SOCKET SOCK FAMILY:TYPE SOURCE:PORT DESTINATION:PORT", -" 5 c08ea3cc c50d3c80 INET:STREAM 0.0.0.0:1026 0.0.0.0:0", +" FD SOCKET SOCK FAMILY:TYPE SOURCE-PORT DESTINATION-PORT", +" 5 c08ea3cc c50d3c80 INET:STREAM 0.0.0.0-1026 0.0.0.0-0", " ", " PID: 19294 TASK: c250a000 CPU: 0 COMMAND: \"prefdm\"", -" FD SOCKET SOCK FAMILY:TYPE SOURCE:PORT DESTINATION:PORT", -" 5 c08ea3cc c50d3c80 INET:STREAM 0.0.0.0:1026 0.0.0.0:0", +" FD SOCKET SOCK FAMILY:TYPE SOURCE-PORT DESTINATION-PORT", +" 5 c08ea3cc c50d3c80 INET:STREAM 0.0.0.0-1026 0.0.0.0-0", " ", " PID: 2194 TASK: c62dc000 CPU: 1 COMMAND: \"kaudioserver\"", -" FD SOCKET SOCK FAMILY:TYPE SOURCE:PORT DESTINATION:PORT", -" 5 c08ea3cc c50d3c80 INET:STREAM 0.0.0.0:1026 0.0.0.0:0", +" FD SOCKET SOCK FAMILY:TYPE SOURCE-PORT DESTINATION-PORT", +" 5 c08ea3cc c50d3c80 INET:STREAM 0.0.0.0-1026 0.0.0.0-0", " ", " PID: 2195 TASK: c6684000 CPU: 1 COMMAND: \"maudio\"", -" FD SOCKET SOCK FAMILY:TYPE SOURCE:PORT DESTINATION:PORT", -" 5 c08ea3cc c50d3c80 INET:STREAM 0.0.0.0:1026 0.0.0.0:0", +" FD SOCKET SOCK FAMILY:TYPE SOURCE-PORT DESTINATION-PORT", +" 5 c08ea3cc c50d3c80 INET:STREAM 0.0.0.0-1026 0.0.0.0-0", " ", " PID: 2196 TASK: c6b58000 CPU: 1 COMMAND: \"kwmsound\"", -" FD SOCKET SOCK FAMILY:TYPE SOURCE:PORT DESTINATION:PORT", -" 5 c08ea3cc c50d3c80 INET:STREAM 0.0.0.0:1026 0.0.0.0:0", +" FD SOCKET SOCK FAMILY:TYPE SOURCE-PORT DESTINATION-PORT", +" 5 c08ea3cc c50d3c80 INET:STREAM 0.0.0.0-1026 0.0.0.0-0", " ", " PID: 2197 TASK: c6696000 CPU: 0 COMMAND: \"kfm\"", -" FD SOCKET SOCK FAMILY:TYPE SOURCE:PORT DESTINATION:PORT", -" 5 c08ea3cc c50d3c80 INET:STREAM 0.0.0.0:1026 0.0.0.0:0", +" FD SOCKET SOCK FAMILY:TYPE SOURCE-PORT DESTINATION-PORT", +" 5 c08ea3cc c50d3c80 INET:STREAM 0.0.0.0-1026 0.0.0.0-0", " ", " PID: 2199 TASK: c65ec000 CPU: 0 COMMAND: \"krootwm\"", -" FD SOCKET SOCK FAMILY:TYPE SOURCE:PORT DESTINATION:PORT", -" 5 c08ea3cc c50d3c80 INET:STREAM 0.0.0.0:1026 0.0.0.0:0", +" FD SOCKET SOCK FAMILY:TYPE SOURCE-PORT DESTINATION-PORT", +" 5 c08ea3cc c50d3c80 INET:STREAM 0.0.0.0-1026 0.0.0.0-0", " ", " PID: 694 TASK: c1942000 CPU: 0 COMMAND: \"prefdm\"", -" FD SOCKET SOCK FAMILY:TYPE SOURCE:PORT DESTINATION:PORT", -" 5 c08ea3cc c50d3c80 INET:STREAM 0.0.0.0:1026 0.0.0.0:0", +" FD SOCKET SOCK FAMILY:TYPE SOURCE-PORT DESTINATION-PORT", +" 5 c08ea3cc c50d3c80 INET:STREAM 0.0.0.0-1026 0.0.0.0-0", " ", " PID: 698 TASK: c6a2c000 CPU: 1 COMMAND: \"X\"", -" FD SOCKET SOCK FAMILY:TYPE SOURCE:PORT DESTINATION:PORT", -" 5 c08ea3cc c50d3c80 INET:STREAM 0.0.0.0:1026 0.0.0.0:0", +" FD SOCKET SOCK FAMILY:TYPE SOURCE-PORT DESTINATION-PORT", +" 5 c08ea3cc c50d3c80 INET:STREAM 0.0.0.0-1026 0.0.0.0-0", " ", " PID: 2159 TASK: c4a5a000 CPU: 1 COMMAND: \"kwm\"", -" FD SOCKET SOCK FAMILY:TYPE SOURCE:PORT DESTINATION:PORT", -" 5 c08ea3cc c50d3c80 INET:STREAM 0.0.0.0:1026 0.0.0.0:0", +" FD SOCKET SOCK FAMILY:TYPE SOURCE-PORT DESTINATION-PORT", +" 5 c08ea3cc c50d3c80 INET:STREAM 0.0.0.0-1026 0.0.0.0-0", " ", NULL }; @@ -4854,9 +4871,11 @@ static char *version_info[] = { -"Copyright (C) 2002, 2003, 2004, 2005 Red Hat, Inc.", -"Copyright (C) 2004, 2005 IBM Corporation", -"Copyright (C) 1999-2005 Hewlett-Packard Co", +"Copyright (C) 2002, 2003, 2004, 2005, 2006 Red Hat, Inc.", +"Copyright (C) 2004, 2005, 2006 IBM Corporation", +"Copyright (C) 1999-2006 Hewlett-Packard Co", +"Copyright (C) 2005 Fujitsu Limited", +"Copyright (C) 2005 NEC Corporation", "Copyright (C) 1999, 2002 Silicon Graphics, Inc.", "Copyright (C) 1999, 2000, 2001, 2002 Mission Critical Linux, Inc.", "This program is free software, covered by the GNU General Public License,", --- crash/task.c.orig 2006-05-15 17:21:18.000000000 -0400 +++ crash/task.c 2006-04-28 11:25:53.000000000 -0400 @@ -27,6 +27,7 @@ static void refresh_pidhash_task_table(void); static void refresh_pid_hash_task_table(void); static void refresh_hlist_task_table(void); +static void refresh_hlist_task_table_v2(void); static struct task_context *store_context(struct task_context *, ulong, char *); static void refresh_context(ulong, ulong); static void parent_list(ulong); @@ -193,6 +194,8 @@ MEMBER_OFFSET_INIT(pid_link_pid, "pid_link", "pid"); MEMBER_OFFSET_INIT(pid_hash_chain, "pid", "hash_chain"); + STRUCT_SIZE_INIT(pid_link, "pid_link"); + MEMBER_OFFSET_INIT(pid_pid_chain, "pid", "pid_chain"); STRUCT_SIZE_INIT(task_struct, "task_struct"); @@ -219,15 +222,7 @@ MEMBER_OFFSET_INIT(sigpending_signal, "sigpending", "signal"); STRUCT_SIZE_INIT(sigqueue, "sigqueue"); - if (VALID_STRUCT(sigqueue)) { - MEMBER_OFFSET_INIT(sigqueue_next, "sigqueue", "next"); - MEMBER_OFFSET_INIT(sigqueue_list, "sigqueue", "list"); - MEMBER_OFFSET_INIT(sigqueue_info, "sigqueue", "info"); - } else { - STRUCT_SIZE_INIT(signal_queue, "signal_queue"); - MEMBER_OFFSET_INIT(signal_queue_next, "signal_queue", "next"); - MEMBER_OFFSET_INIT(signal_queue_info, "signal_queue", "info"); - } + STRUCT_SIZE_INIT(signal_queue, "signal_queue"); STRUCT_SIZE_INIT(sighand_struct, "sighand_struct"); if (VALID_STRUCT(sighand_struct)) @@ -249,6 +244,19 @@ STRUCT_SIZE_INIT(cputime_t, "cputime_t"); + if (symbol_exists("cfq_slice_async")) { + uint cfq_slice_async; + + get_symbol_data("cfq_slice_async", sizeof(int), + &cfq_slice_async); + machdep->hz = cfq_slice_async * 25; + + if (CRASHDEBUG(2)) + fprintf(fp, + "cfq_slice_async exitsts: setting hz to %d\n", + machdep->hz); + } + if (VALID_MEMBER(runqueue_arrays)) MEMBER_OFFSET_INIT(task_struct_run_list, "task_struct", "run_list"); @@ -302,7 +310,11 @@ tt->refresh_task_table = refresh_pid_hash_task_table; } else { tt->pidhash_addr = symbol_value("pid_hash"); - tt->refresh_task_table = refresh_hlist_task_table; + if (!get_array_length("pid_hash", NULL, sizeof(void *)) && + VALID_STRUCT(pid_link)) + tt->refresh_task_table = refresh_hlist_task_table_v2; + else + tt->refresh_task_table = refresh_hlist_task_table; } tt->flags |= PID_HASH; @@ -987,9 +999,7 @@ return; if (DUMPFILE()) { /* impossible */ - fprintf(fp, (pc->flags & SILENT) || !(pc->flags & TTY) ? - "" : "\rplease wait... (gathering task table data)"); - fflush(fp); + please_wait("gathering task table data"); if (!symbol_exists("panic_threads")) tt->flags |= POPULATE_PANIC; } @@ -1152,11 +1162,7 @@ FREEBUF(pid_hash); - if (DUMPFILE()) { - fprintf(fp, (pc->flags & SILENT) || !(pc->flags & TTY) ? "" : - "\r \r"); - fflush(fp); - } + please_wait_done(); if (ACTIVE() && (tt->flags & TASK_INIT_DONE)) refresh_context(curtask, curpid); @@ -1192,9 +1198,7 @@ return; if (DUMPFILE()) { /* impossible */ - fprintf(fp, (pc->flags & SILENT) || !(pc->flags & TTY) ? - "" : "\rplease wait... (gathering task table data)"); - fflush(fp); + please_wait("gathering task table data"); if (!symbol_exists("panic_threads")) tt->flags |= POPULATE_PANIC; } @@ -1394,12 +1398,229 @@ FREEBUF(pid_hash); FREEBUF(nodebuf); - if (DUMPFILE()) { - fprintf(fp, (pc->flags & SILENT) || !(pc->flags & TTY) ? "" : - "\r \r"); - fflush(fp); + please_wait_done(); + + if (ACTIVE() && (tt->flags & TASK_INIT_DONE)) + refresh_context(curtask, curpid); + + tt->retries = MAX(tt->retries, retries); +} + +/* + * 2.6.17 replaced: + * static struct hlist_head *pid_hash[PIDTYPE_MAX]; + * with + * static struct hlist_head *pid_hash; + */ +static void +refresh_hlist_task_table_v2(void) +{ + int i; + ulong *pid_hash; + ulong pidhash_array; + ulong kpp; + char *tp; + ulong next, pnext, pprev; + char *nodebuf; + int len, cnt; + struct task_context *tc; + ulong curtask; + ulong curpid; + ulong retries; + ulong *tlp; + + if (DUMPFILE() && (tt->flags & TASK_INIT_DONE)) /* impossible */ + return; + + if (DUMPFILE()) { /* impossible */ + please_wait("gathering task table data"); + if (!symbol_exists("panic_threads")) + tt->flags |= POPULATE_PANIC; + } + + if (ACTIVE() && !(tt->flags & TASK_REFRESH)) + return; + + /* + * The current task's task_context entry may change, + * or the task may not even exist anymore. + */ + if (ACTIVE() && (tt->flags & TASK_INIT_DONE)) { + curtask = CURRENT_TASK(); + curpid = CURRENT_PID(); + } + + get_symbol_data("pid_hash", sizeof(void *), &pidhash_array); + + len = tt->pidhash_len; + pid_hash = (ulong *)GETBUF(len * SIZE(hlist_head)); + nodebuf = GETBUF(SIZE(pid_link)); + retries = 0; + +retry_pid_hash: + if (retries && DUMPFILE()) + error(FATAL, + "\ncannot gather a stable task list via pid_hash\n"); + + if ((retries == MAX_UNLIMITED_TASK_RETRIES) && + !(tt->flags & TASK_INIT_DONE)) + error(FATAL, + "\ncannot gather a stable task list via pid_hash (%d retries)\n", + retries); + + if (!readmem(pidhash_array, KVADDR, pid_hash, + len * SIZE(hlist_head), "pid_hash contents", RETURN_ON_ERROR)) + error(FATAL, "\ncannot read pid_hash array\n"); + + if (!hq_open()) { + error(INFO, "cannot hash task_struct entries\n"); + if (!(tt->flags & TASK_INIT_DONE)) + clean_exit(1); + error(INFO, "using stale task_structs\n"); + FREEBUF(pid_hash); + return; + } + + /* + * Get the idle threads first. + */ + cnt = 0; + for (i = 0; i < kt->cpus; i++) { + if (hq_enter(tt->idle_threads[i])) + cnt++; + else + error(WARNING, "%sduplicate idle tasks?\n", + DUMPFILE() ? "\n" : ""); } + for (i = 0; i < len; i++) { + if (!pid_hash[i]) + continue; + + if (!readmem(pid_hash[i], KVADDR, nodebuf, + SIZE(pid_link), "pid_hash node pid_link", RETURN_ON_ERROR|QUIET)) { + error(INFO, "\ncannot read pid_hash node pid_link\n"); + if (DUMPFILE()) + continue; + hq_close(); + retries++; + goto retry_pid_hash; + } + + kpp = pid_hash[i]; + next = ULONG(nodebuf + OFFSET(pid_link_pid)); + if (next) + next -= OFFSET(task_struct_pids); + pnext = ULONG(nodebuf + OFFSET(hlist_node_next)); + pprev = ULONG(nodebuf + OFFSET(hlist_node_pprev)); + + if (CRASHDEBUG(1)) + console("pid_hash[%d]: %lx task: %lx (node: %lx) next: %lx pprev: %lx\n", + i, pid_hash[i], next, kpp, pnext, pprev); + + while (next) { + if (!IS_TASK_ADDR(next)) { + error(INFO, + "%sinvalid task address in pid_hash: %lx\n", + DUMPFILE() ? "\n" : "", next); + if (DUMPFILE()) + break; + hq_close(); + retries++; + goto retry_pid_hash; + + } + + if (!is_idle_thread(next) && !hq_enter(next)) { + error(INFO, + "%sduplicate task in pid_hash: %lx\n", + DUMPFILE() ? "\n" : "", next); + if (DUMPFILE()) + break; + hq_close(); + retries++; + goto retry_pid_hash; + } + + cnt++; + + if (!pnext) + break; + + if (!readmem((ulonglong)pnext, KVADDR, nodebuf, + SIZE(pid_link), "task hlist_node pid_link", RETURN_ON_ERROR|QUIET)) { + error(INFO, "\ncannot read hlist_node pid_link from node next\n"); + if (DUMPFILE()) + break; + hq_close(); + retries++; + goto retry_pid_hash; + } + + kpp = (ulong)pnext; + next = ULONG(nodebuf + OFFSET(pid_link_pid)); + if (next) + next -= OFFSET(task_struct_pids); + pnext = ULONG(nodebuf + OFFSET(hlist_node_next)); + pprev = ULONG(nodebuf + OFFSET(hlist_node_pprev)); + + if (CRASHDEBUG(1)) + console(" chained task: %lx (node: %lx) next: %lx pprev: %lx\n", + next, kpp, pnext, pprev); + } + } + + BZERO(tt->task_local, tt->max_tasks * sizeof(void *)); + cnt = retrieve_list((ulong *)tt->task_local, cnt); + + hq_close(); + + clear_task_cache(); + + for (i = 0, tlp = (ulong *)tt->task_local, + tt->running_tasks = 0, tc = tt->context_array; + i < tt->max_tasks; i++, tlp++) { + if (!(*tlp)) + continue; + + if (!IS_TASK_ADDR(*tlp)) { + error(WARNING, + "%sinvalid task address found in task list: %lx\n", + DUMPFILE() ? "\n" : "", *tlp); + if (DUMPFILE()) + continue; + retries++; + goto retry_pid_hash; + } + + if (task_exists(*tlp)) { + error(WARNING, + "%sduplicate task address found in task list: %lx\n", + DUMPFILE() ? "\n" : "", *tlp); + if (DUMPFILE()) + continue; + retries++; + goto retry_pid_hash; + } + + if (!(tp = fill_task_struct(*tlp))) { + if (DUMPFILE()) + continue; + retries++; + goto retry_pid_hash; + } + + if (store_context(tc, *tlp, tp)) { + tc++; + tt->running_tasks++; + } + } + + FREEBUF(pid_hash); + FREEBUF(nodebuf); + + please_wait_done(); + if (ACTIVE() && (tt->flags & TASK_INIT_DONE)) refresh_context(curtask, curpid); @@ -2229,11 +2450,8 @@ use_kernel_timeval = STRUCT_EXISTS("kernel_timeval"); get_symbol_data("jiffies", sizeof(long), &jiffies); - if (symbol_exists("jiffies_64")) { - get_symbol_data("jiffies_64", sizeof(long long), &jiffies_64); - if ((jiffies_64 & 0xffffffff00000000ULL) == 0x100000000ULL) - jiffies_64 &= 0xffffffffULL; - } + if (symbol_exists("jiffies_64")) + get_uptime(NULL, &jiffies_64); tsp = task_start_times; tc = tcp ? tcp : FIRST_CONTEXT(); @@ -2330,8 +2548,7 @@ for (i = 0, tsp = task_start_times; i < tasks; i++, tsp++) { print_task_header(fp, tsp->tc, 0); fprintf(fp, " RUN TIME: %s\n", symbol_exists("jiffies_64") ? - convert_time(jiffies_64 - - convert_start_time(tsp->start_time, jiffies_64), buf1) : + convert_time(convert_start_time(tsp->start_time, jiffies_64), buf1) : convert_time(jiffies - tsp->start_time, buf1)); fprintf(fp, " START TIME: %llu\n", tsp->start_time); if (VALID_MEMBER(task_struct_times)) { @@ -2397,15 +2614,33 @@ static ulonglong convert_start_time(ulonglong start_time, ulonglong current) { + ulong tmp1, tmp2; + ulonglong wrapped; + switch(tt->flags & (TIMESPEC | NO_TIMESPEC)) { case TIMESPEC: - if ((start_time * (ulonglong)machdep->hz) > current) - return current; + if ((start_time * (ulonglong)machdep->hz) > current) + return 0; else - return start_time * (ulonglong)machdep->hz; + return current - (start_time * (ulonglong)machdep->hz); case NO_TIMESPEC: + if (THIS_KERNEL_VERSION >= LINUX(2,6,0)) { + wrapped = (start_time & 0xffffffff00000000ULL); + if (wrapped) { + wrapped -= 0x100000000; + start_time &= 0x00000000ffffffffULL; + start_time |= wrapped; + start_time += (ulonglong)(300*machdep->hz); + } else { + tmp1 = (ulong)(uint)(-300*machdep->hz); + tmp2 = (ulong)start_time; + start_time = (ulonglong)(tmp2 - tmp1); + } + } + break; + default: break; } @@ -2938,12 +3173,17 @@ if (is_task_active(tc->task)) { if (machdep->flags & HWRESET) fprintf(fp, "(HARDWARE RESET)"); - else if (machdep->flags & SYSRQ) + else if ((pc->flags & SYSRQ) && (tc->task == tt->panic_task)) fprintf(fp, "(SYSRQ)"); else if (machdep->flags & INIT) fprintf(fp, "(INIT)"); - else if (kt->cpu_flags[tc->processor] & NMI) + else if ((tc->processor >= 0) && + (tc->processor < NR_CPUS) && + (kt->cpu_flags[tc->processor] & NMI)) fprintf(fp, "(NMI)"); + else if ((tc->task == tt->panic_task) && + XENDUMP_DUMPFILE() && (kt->xen_flags & XEN_SUSPEND)) + fprintf(fp, "(SUSPEND)"); else if (tc->task == tt->panic_task) fprintf(fp, "(PANIC)"); else @@ -3411,6 +3651,9 @@ use_task_0: + if (CRASHDEBUG(1)) + error(INFO, "get_panic_context: panic task not found\n"); + tt->flags |= PANIC_TASK_NOT_FOUND; tc = FIRST_CONTEXT(); return(tc->task); @@ -3448,49 +3691,68 @@ int msg_found; BZERO(buf, BUFSIZE); + msg_found = FALSE; - if (tt->panicmsg) + if (tt->panicmsg) { read_string(tt->panicmsg, buf, BUFSIZE-1); - else if (LKCD_DUMPFILE()) + msg_found = TRUE; + } else if (LKCD_DUMPFILE()) { get_lkcd_panicmsg(buf); - else { - msg_found = FALSE; + msg_found = TRUE; + } - open_tmpfile(); - dump_log(FALSE); + if (msg_found == TRUE) + return(buf); - rewind(pc->tmpfile); - while (!msg_found && fgets(buf, BUFSIZE, pc->tmpfile)) { - if (strstr(buf, "Kernel panic: ")) - msg_found = TRUE; - } - rewind(pc->tmpfile); - while (!msg_found && fgets(buf, BUFSIZE, pc->tmpfile)) { - if (strstr(buf, "Oops: ") || - strstr(buf, "kernel BUG at")) - msg_found = TRUE; - } - rewind(pc->tmpfile); - while (!msg_found && fgets(buf, BUFSIZE, pc->tmpfile)) { - if (strstr(buf, "SysRq : Netdump") || - strstr(buf, "SysRq : Crash")) { - machdep->flags |= SYSRQ; - msg_found = TRUE; - } - } - rewind(pc->tmpfile); - while (!msg_found && fgets(buf, BUFSIZE, pc->tmpfile)) { - if (strstr(buf, "sysrq") && - symbol_exists("sysrq_pressed")) - get_symbol_data("sysrq_pressed", sizeof(int), - &msg_found); - } + open_tmpfile(); + dump_log(FALSE); - close_tmpfile(); + /* + * First check for a SYSRQ-generated crash, and set the + * active-task flag appropriately. The message may or + * may not be used as the panic message. + */ + rewind(pc->tmpfile); + while (fgets(buf, BUFSIZE, pc->tmpfile)) { + if (strstr(buf, "SysRq : Crash") || + strstr(buf, "SysRq : Trigger a crashdump")) { + pc->flags |= SYSRQ; + break; + } + } - if (!msg_found) - BZERO(buf, BUFSIZE); + rewind(pc->tmpfile); + while (!msg_found && fgets(buf, BUFSIZE, pc->tmpfile)) { + if (strstr(buf, "Kernel panic: ")) + msg_found = TRUE; + } + rewind(pc->tmpfile); + while (!msg_found && fgets(buf, BUFSIZE, pc->tmpfile)) { + if (strstr(buf, "Oops: ") || + strstr(buf, "kernel BUG at")) + msg_found = TRUE; } + rewind(pc->tmpfile); + while (!msg_found && fgets(buf, BUFSIZE, pc->tmpfile)) { + if (strstr(buf, "SysRq : Netdump") || + strstr(buf, "SysRq : Trigger a crashdump") || + strstr(buf, "SysRq : Crash")) { + pc->flags |= SYSRQ; + msg_found = TRUE; + } + } + rewind(pc->tmpfile); + while (!msg_found && fgets(buf, BUFSIZE, pc->tmpfile)) { + if (strstr(buf, "sysrq") && + symbol_exists("sysrq_pressed")) + get_symbol_data("sysrq_pressed", sizeof(int), + &msg_found); + } + + close_tmpfile(); + + if (!msg_found) + BZERO(buf, BUFSIZE); return(buf); } @@ -3517,7 +3779,7 @@ BZERO(&foreach_data, sizeof(struct foreach_data)); fd = &foreach_data; - while ((c = getopt(argcnt, args, "R:vomlgersStpukcf")) != EOF) { + while ((c = getopt(argcnt, args, "R:vomlgersStTpukcf")) != EOF) { switch(c) { case 'R': @@ -3560,6 +3822,10 @@ fd->flags |= FOREACH_r_FLAG; break; + case 'T': + fd->flags |= FOREACH_T_FLAG; + break; + case 't': fd->flags |= FOREACH_t_FLAG; break; @@ -3962,7 +4228,12 @@ bt->flags |= BT_SYMBOLIC_ARGS; if (fd->flags & FOREACH_t_FLAG) bt->flags |= BT_TEXT_SYMBOLS; - if (fd->flags & FOREACH_o_FLAG) + if (fd->flags & FOREACH_T_FLAG) { + bt->flags |= BT_TEXT_SYMBOLS; + bt->flags |= BT_TEXT_SYMBOLS_ALL; + } + if ((fd->flags & FOREACH_o_FLAG) || + (kt->flags & USE_OLD_BT)) bt->flags |= BT_OLD_BACK_TRACE; if (fd->flags & FOREACH_e_FLAG) bt->flags |= BT_EFRAME_SEARCH; @@ -4188,6 +4459,12 @@ break; } + if (strstr(buf, " crash_kexec at ") || + strstr(buf, " .crash_kexec at ")) { + found = TRUE; + break; + } + if (strstr(buf, " die at ")) { switch (dietask) { @@ -4211,6 +4488,10 @@ if (dietask == (NO_TASK+1)) error(WARNING, "multiple active tasks have called die\n\n"); + if (CRASHDEBUG(1)) + error(INFO, "panic_search: %lx (via foreach bt)\n", + lasttask); + found_panic_task: populate_panic_threads(); @@ -4229,6 +4510,9 @@ } } + if (CRASHDEBUG(1)) + error(INFO, "panic_search: failed (via foreach bt)\n"); + return NULL; } @@ -4240,25 +4524,28 @@ { ulong task; - if (LKCD_DUMPFILE()) - return(get_lkcd_panic_task()); - if (NETDUMP_DUMPFILE()) { task = pc->flags & REM_NETDUMP ? tt->panic_task : get_netdump_panic_task(); if (task) return task; - if (get_active_set()) - return(get_active_set_panic_task()); - } - - if (DISKDUMP_DUMPFILE()) { + } else if (KDUMP_DUMPFILE()) { + task = get_kdump_panic_task(); + if (task) + return task; + } else if (DISKDUMP_DUMPFILE()) { task = get_diskdump_panic_task(); if (task) return task; - if (get_active_set()) - return(get_active_set_panic_task()); - } + } else if (XENDUMP_DUMPFILE()) { + task = get_xendump_panic_task(); + if (task) + return task; + } else if (LKCD_DUMPFILE()) + return(get_lkcd_panic_task()); + + if (get_active_set()) + return(get_active_set_panic_task()); return NO_TASK; } @@ -4298,14 +4585,17 @@ tc = FIRST_CONTEXT(); for (i = 0; i < RUNNING_TASKS(); i++, tc++) { - if (task_has_cpu(tc->task, NULL)) { + if (task_has_cpu(tc->task, NULL) && + (tc->processor >= 0) && + (tc->processor < NR_CPUS)) { tt->panic_threads[tc->processor] = tc->task; found++; } } if (!found && !(kt->flags & SMP) && - (LKCD_DUMPFILE() || NETDUMP_DUMPFILE() || DISKDUMP_DUMPFILE())) + (LKCD_DUMPFILE() || NETDUMP_DUMPFILE() || + KDUMP_DUMPFILE() || DISKDUMP_DUMPFILE())) tt->panic_threads[0] = get_dumpfile_panic_task(); } @@ -4363,6 +4653,8 @@ fprintf(fp, "refresh_pid_hash_task_table()\n"); else if (tt->refresh_task_table == refresh_hlist_task_table) fprintf(fp, "refresh_hlist_task_table()\n"); + else if (tt->refresh_task_table == refresh_hlist_task_table_v2) + fprintf(fp, "refresh_hlist_task_table_v2()\n"); else fprintf(fp, "%lx\n", (ulong)tt->refresh_task_table); @@ -4799,23 +5091,47 @@ tt->flags &= ~ACTIVE_SET; } -#define RESOLVE_PANIC_AND_DIE_CALLERS() \ - if ((panic_task > (NO_TASK+1)) && !die_task) \ - return panic_task; \ - \ - if (panic_task && die_task) { \ - error(WARNING, \ - "multiple active tasks have called die and/or panic\n\n"); \ - return NO_TASK; \ - } \ - \ - if (die_task > (NO_TASK+1)) \ - return die_task; \ - else if (die_task == (NO_TASK+1)) \ - error(WARNING, \ +#define RESOLVE_PANIC_AND_DIE_CALLERS() \ + if (xen_panic_task) { \ + if (CRASHDEBUG(1)) \ + error(INFO, \ + "get_active_set_panic_task: %lx (xen_panic_event)\n", \ + xen_panic_task); \ + return xen_panic_task; \ + } \ + if (crash_kexec_task) { \ + if (CRASHDEBUG(1)) \ + error(INFO, \ + "get_active_set_panic_task: %lx (crash_kexec)\n", \ + crash_kexec_task); \ + return crash_kexec_task; \ + } \ + if ((panic_task > (NO_TASK+1)) && !die_task) { \ + if (CRASHDEBUG(1)) \ + fprintf(fp, \ + "get_active_set_panic_task: %lx (panic)\n", \ + panic_task); \ + return panic_task; \ + } \ + \ + if (panic_task && die_task) { \ + error(WARNING, \ + "multiple active tasks have called die and/or panic\n\n"); \ + goto no_panic_task_found; \ + } \ + \ + if (die_task > (NO_TASK+1)) { \ + if (CRASHDEBUG(1)) \ + fprintf(fp, \ + "get_active_set_panic_task: %lx (die)\n", \ + die_task); \ + return die_task; \ + } \ + else if (die_task == (NO_TASK+1)) \ + error(WARNING, \ "multiple active tasks have called die\n\n"); -#define SEARCH_STACK_FOR_PANIC_AND_DIE_CALLERS() \ +#define SEARCH_STACK_FOR_PANIC_DIE_AND_KEXEC_CALLERS() \ while (fgets(buf, BUFSIZE, pc->tmpfile)) { \ if (strstr(buf, " die+")) { \ switch (die_task) \ @@ -4833,12 +5149,23 @@ { \ case NO_TASK: \ panic_task = task; \ + if (XENDUMP_DUMPFILE()) \ + xendump_panic_hook(buf); \ break; \ default: \ panic_task = NO_TASK+1; \ break; \ } \ } \ + if (strstr(buf, " crash_kexec+") || \ + strstr(buf, " .crash_kexec+")) { \ + crash_kexec_task = task; \ + } \ + if (strstr(buf, " xen_panic_event+") || \ + strstr(buf, " .xen_panic_event+")){ \ + xen_panic_task = task; \ + xendump_panic_hook(buf); \ + } \ } /* @@ -4850,11 +5177,12 @@ int i, j, found; ulong task; char buf[BUFSIZE]; - ulong panic_task, die_task; + ulong panic_task, die_task, crash_kexec_task; + ulong xen_panic_task; char *tp; struct task_context *tc; - panic_task = die_task = NO_TASK; + panic_task = die_task = crash_kexec_task = xen_panic_task = NO_TASK; for (i = 0; i < NR_CPUS; i++) { if (!(task = tt->active_set[i])) @@ -4875,7 +5203,7 @@ raw_stack_dump(GET_STACKBASE(task), STACKSIZE()); rewind(pc->tmpfile); - SEARCH_STACK_FOR_PANIC_AND_DIE_CALLERS(); + SEARCH_STACK_FOR_PANIC_DIE_AND_KEXEC_CALLERS(); close_tmpfile(); } @@ -4903,7 +5231,7 @@ raw_stack_dump(tt->hardirq_ctx[i], SIZE(thread_union)); rewind(pc->tmpfile); - SEARCH_STACK_FOR_PANIC_AND_DIE_CALLERS(); + SEARCH_STACK_FOR_PANIC_DIE_AND_KEXEC_CALLERS(); close_tmpfile(); } @@ -4930,7 +5258,7 @@ raw_stack_dump(tt->softirq_ctx[i], SIZE(thread_union)); rewind(pc->tmpfile); - SEARCH_STACK_FOR_PANIC_AND_DIE_CALLERS(); + SEARCH_STACK_FOR_PANIC_DIE_AND_KEXEC_CALLERS(); close_tmpfile(); } @@ -4938,6 +5266,20 @@ RESOLVE_PANIC_AND_DIE_CALLERS(); } + if (crash_kexec_task) { + if (CRASHDEBUG(1)) + error(INFO, + "get_active_set_panic_task: %lx (crash_kexec)\n", + crash_kexec_task); + return crash_kexec_task; + } + +no_panic_task_found: + + if (CRASHDEBUG(1)) + error(INFO, + "get_active_set_panic_task: failed\n"); + return NO_TASK; } @@ -5416,6 +5758,15 @@ char buf3[BUFSIZE]; char buf4[BUFSIZE]; + if (VALID_STRUCT(sigqueue) && !VALID_MEMBER(sigqueue_next)) { + MEMBER_OFFSET_INIT(sigqueue_next, "sigqueue", "next"); + MEMBER_OFFSET_INIT(sigqueue_list, "sigqueue", "list"); + MEMBER_OFFSET_INIT(sigqueue_info, "sigqueue", "info"); + } else if (!VALID_MEMBER(signal_queue_next)) { + MEMBER_OFFSET_INIT(signal_queue_next, "signal_queue", "next"); + MEMBER_OFFSET_INIT(signal_queue_info, "signal_queue", "info"); + } + sigset = task_signal(tc->task); if (!tt->last_task_read) return; --- crash/kernel.c.orig 2006-05-15 17:21:18.000000000 -0400 +++ crash/kernel.c 2006-04-26 16:42:12.000000000 -0400 @@ -20,7 +20,7 @@ static void do_module_cmd(ulong, char *, ulong, char *, char *); static char *find_module_objfile(char *, char *, char *); -static char *get_uptime(char *); +static char *module_objfile_search(char *, char *, char *); static char *get_loadavg(char *); static void get_lkcd_regs(struct bt_info *, ulong *, ulong *); static void dump_sys_call_table(char *, int); @@ -51,7 +51,7 @@ kernel_init(int when) { int i; - char *p1, *p2, buf[BUFSIZE];; + char *p1, *p2, buf[BUFSIZE]; struct syment *sp1, *sp2; if (pc->flags & KERNEL_DEBUG_QUERY) @@ -69,6 +69,26 @@ } kt->end = symbol_value("_end"); + /* + * If Xen architecture, default to a guest kernel running + * with writable page tables; for now it can be overridden + * with the --hypervisor and --shared_page_tables command + * line options. + */ + if (symbol_exists("xen_start_info")) { + kt->flags |= ARCH_XEN; + if (!(kt->xen_flags & (SHADOW_PAGE_TABLES|CANONICAL_PAGE_TABLES))) + kt->xen_flags |= WRITABLE_PAGE_TABLES; + get_symbol_data("phys_to_machine_mapping", sizeof(ulong), + &kt->phys_to_machine_mapping); + if (machine_type("X86")) + get_symbol_data("max_pfn", sizeof(ulong), &kt->ptm_table_size); + if (machine_type("X86_64")) + get_symbol_data("end_pfn", sizeof(ulong), &kt->ptm_table_size); + if ((kt->machine_to_pseudo = (char *)malloc(PAGESIZE())) == NULL) + error(FATAL, "cannot malloc machine_to_pseudo space."); + } + if (symbol_exists("smp_num_cpus")) { kt->flags |= SMP; get_symbol_data("smp_num_cpus", sizeof(int), &kt->cpus); @@ -117,6 +137,7 @@ *p2 = NULLCHAR; kt->kernel_version[2] = atoi(p1); } + break; case POST_GDB: @@ -128,7 +149,8 @@ kt->flags |= PER_CPU_OFF; } MEMBER_OFFSET_INIT(runqueue_cpu, "runqueue", "cpu"); - if (VALID_MEMBER(runqueue_cpu)) { + if (VALID_MEMBER(runqueue_cpu) && + (get_array_length("runqueue.cpu", NULL, 0) > 0)) { MEMBER_OFFSET_INIT(cpu_s_curr, "cpu_s", "curr"); MEMBER_OFFSET_INIT(cpu_s_idle, "cpu_s", "idle"); STRUCT_SIZE_INIT(cpu_s, "cpu_s"); @@ -153,6 +175,7 @@ } else { MEMBER_OFFSET_INIT(runqueue_idle, "runqueue", "idle"); MEMBER_OFFSET_INIT(runqueue_curr, "runqueue", "curr"); + ASSIGN_OFFSET(runqueue_cpu) = INVALID_OFFSET; } MEMBER_OFFSET_INIT(runqueue_active, "runqueue", "active"); MEMBER_OFFSET_INIT(runqueue_expired, "runqueue", "expired"); @@ -471,6 +494,9 @@ } } + if (CRASHDEBUG(1)) + gdb_readnow_warning(); + return; bad_match: @@ -1140,7 +1166,10 @@ bt = &bt_info; BZERO(bt, sizeof(struct bt_info)); - while ((c = getopt(argcnt, args, "fF:I:S:aloreEgstd:R:")) != EOF) { + if (kt->flags & USE_OLD_BT) + bt->flags |= BT_OLD_BACK_TRACE; + + while ((c = getopt(argcnt, args, "fF:I:S:aloreEgstTd:R:O")) != EOF) { switch(c) { case 'f': @@ -1151,6 +1180,28 @@ bt->flags |= BT_OLD_BACK_TRACE; break; + case 'O': + if (!machine_type("X86")) + option_not_supported(c); + else if (kt->flags & USE_OLD_BT) { + /* + * Make this setting idempotent across the use of + * $HOME/.crashrc, ./.crashrc, and "-i input" files. + * If we've been here before during initialization, + * leave it alone. + */ + if (pc->flags & INIT_IFILE) { + error(INFO, "use old bt method by default (already set)\n"); + return; + } + kt->flags &= ~USE_OLD_BT; + error(INFO, "use new bt method by default\n"); + } else { + kt->flags |= USE_OLD_BT; + error(INFO, "use old bt method by default\n"); + } + return; + case 'R': if (refptr) error(INFO, "only one -R option allowed\n"); @@ -1241,6 +1292,8 @@ bt->flags |= BT_SYMBOLIC_ARGS; break; + case 'T': + bt->flags |= BT_TEXT_SYMBOLS_ALL; case 't': bt->flags |= BT_TEXT_SYMBOLS; break; @@ -1350,9 +1403,10 @@ char buf[BUFSIZE]; if (bt->flags & BT_TEXT_SYMBOLS) { - fprintf(fp, "%sSTART: %s at %lx\n", - space(VADDR_PRLEN > 8 ? 14 : 6), - closest_symbol(eip), eip); + if (!(bt->flags & BT_TEXT_SYMBOLS_ALL)) + fprintf(fp, "%sSTART: %s at %lx\n", + space(VADDR_PRLEN > 8 ? 14 : 6), + closest_symbol(eip), eip); } if (bt->hp) @@ -1435,6 +1489,9 @@ i < LONGS_PER_STACK; i++, up++) { if (is_kernel_text(*up)) fprintf(fp, "%lx: %s\n", + tt->flags & THREAD_INFO ? + bt->tc->thread_info + + (i * sizeof(long)) : bt->task + (i * sizeof(long)), value_to_symstr(*up, buf, 0)); } @@ -1461,8 +1518,8 @@ if (bt->hp) { if (bt->hp->esp && !INSTACK(bt->hp->esp, bt)) error(INFO, - "invalid stack address for this task: %lx\n", - bt->hp->esp); + "invalid stack address for this task: %lx\n (valid range: %lx - %lx)\n", + bt->hp->esp, bt->stackbase, bt->stacktop); eip = bt->hp->eip; esp = bt->hp->esp; @@ -1471,10 +1528,14 @@ } else if (NETDUMP_DUMPFILE()) get_netdump_regs(bt, &eip, &esp); + else if (KDUMP_DUMPFILE()) + get_kdump_regs(bt, &eip, &esp); else if (DISKDUMP_DUMPFILE()) get_diskdump_regs(bt, &eip, &esp); else if (LKCD_DUMPFILE()) get_lkcd_regs(bt, &eip, &esp); + else if (XENDUMP_DUMPFILE()) + get_xendump_regs(bt, &eip, &esp); else machdep->get_stack_frame(bt, &eip, &esp); @@ -1486,6 +1547,13 @@ if (bt->flags & (BT_TEXT_SYMBOLS|BT_TEXT_SYMBOLS_PRINT|BT_TEXT_SYMBOLS_NOPRINT)) { + if (bt->flags & BT_TEXT_SYMBOLS_ALL) { + esp = bt->stackbase + + ((tt->flags & THREAD_INFO) ? + SIZE(thread_info) : SIZE(task_struct)); + eip = 0; + } + if (machdep->flags & MACHDEP_BT_TEXT) { bt->instptr = eip; bt->stkptr = esp; @@ -1721,6 +1789,13 @@ *esp = *(up-1); return; } + /* Egenera */ + if (STREQ(sym, "netdump_ipi")) { + *eip = *up; + *esp = bt->task + + ((char *)(up-1) - bt->stackbuf); + return; + } if (STREQ(sym, "smp_stop_cpu_interrupt")) { *eip = *up; *esp = bt->task + @@ -1873,6 +1948,8 @@ kallsymsbuf = kt->flags & KALLSYMS_V1 ? GETBUF(SIZE(kallsyms_header)) : NULL; + please_wait("gathering module symbol data"); + for (mod = kt->module_list; mod != kt->kernel_module; mod = mod_next) { if (CRASHDEBUG(7)) fprintf(fp, "module: %lx\n", mod); @@ -1880,7 +1957,8 @@ if (!readmem(mod, KVADDR, modbuf, SIZE(module), "module struct", RETURN_ON_ERROR|QUIET)) { error(WARNING, - "cannot access vmalloc'd module memory\n\n"); + "%scannot access vmalloc'd module memory\n\n", + DUMPFILE() ? "\n" : ""); kt->mods_installed = 0; kt->flags |= NO_MODULE_ACCESS; FREEBUF(modbuf); @@ -1914,7 +1992,8 @@ kallsymsbuf, SIZE(kallsyms_header), "kallsyms_header", RETURN_ON_ERROR|QUIET)) { error(WARNING, - "cannot access module kallsyms_header\n"); + "%scannot access module kallsyms_header\n", + DUMPFILE() ? "\n" : ""); } else { nsyms = UINT(kallsymsbuf + OFFSET(kallsyms_header_symbols)); @@ -1947,6 +2026,8 @@ store_module_symbols_v2(total, kt->mods_installed); break; } + + please_wait_done(); } @@ -2459,7 +2540,7 @@ static char * -find_module_objfile(char *modref, char *filename, char *tree) +module_objfile_search(char *modref, char *filename, char *tree) { char buf[BUFSIZE]; char file[BUFSIZE]; @@ -2592,6 +2673,32 @@ return retbuf; } +/* + * First look for a module based upon its reference name. + * If that fails, try replacing any underscores in the + * reference name with a dash. + * + * Example: module name "dm_mod" comes from "dm-mod.ko" objfile + */ +static char * +find_module_objfile(char *modref, char *filename, char *tree) +{ + char * retbuf; + char tmpref[BUFSIZE]; + int c; + + retbuf = module_objfile_search(modref, filename, tree); + + if (!retbuf) { + strncpy(tmpref, modref, BUFSIZE); + for (c = 0; c < BUFSIZE && tmpref[c]; c++) + if (tmpref[c] == '_') + tmpref[c] = '-'; + retbuf = module_objfile_search(tmpref, filename, tree); + } + + return retbuf; +} /* * Unlink any temporary remote module object files. @@ -2876,7 +2983,7 @@ get_symbol_data("xtime", sizeof(struct timespec), &kt->date); fprintf(fp, " DATE: %s\n", strip_linefeeds(ctime(&kt->date.tv_sec))); - fprintf(fp, " UPTIME: %s\n", get_uptime(buf)); + fprintf(fp, " UPTIME: %s\n", get_uptime(buf, NULL)); fprintf(fp, "LOAD AVERAGE: %s\n", get_loadavg(buf)); fprintf(fp, " TASKS: %ld\n", RUNNING_TASKS()); fprintf(fp, " NODENAME: %s\n", uts->nodename); @@ -2891,6 +2998,9 @@ #ifdef WHO_CARES fprintf(fp, " DOMAINNAME: %s\n", uts->domainname); #endif + if (XENDUMP_DUMPFILE() && (kt->xen_flags & XEN_SUSPEND)) + return; + if (DUMPFILE()) { fprintf(fp, " PANIC: "); if (machdep->flags & HWRESET) @@ -2952,28 +3062,42 @@ /* * Calculate and return the uptime. */ - -static char * -get_uptime(char *buf) +char * +get_uptime(char *buf, ulonglong *j64p) { - ulong jiffies; + ulong jiffies, tmp1, tmp2; + ulonglong jiffies_64, wrapped; - get_symbol_data("jiffies", sizeof(long), &jiffies); - - if ((machine_type("S390") || machine_type("S390X")) && - (THIS_KERNEL_VERSION >= LINUX(2,6,0))) - jiffies -= ((unsigned long)(unsigned int)(-300*machdep->hz)); - else if (symbol_exists("jiffies_64") && BITS64() && - (((ulonglong)jiffies & 0xffffffff00000000ULL) == - 0x100000000ULL)) - jiffies &= 0xffffffff; - - convert_time((ulonglong)jiffies, buf); + if (symbol_exists("jiffies_64")) { + get_symbol_data("jiffies_64", sizeof(ulonglong), &jiffies_64); + if (THIS_KERNEL_VERSION >= LINUX(2,6,0)) { + wrapped = (jiffies_64 & 0xffffffff00000000ULL); + if (wrapped) { + wrapped -= 0x100000000; + jiffies_64 &= 0x00000000ffffffffULL; + jiffies_64 |= wrapped; + jiffies_64 += (ulonglong)(300*machdep->hz); + } else { + tmp1 = (ulong)(uint)(-300*machdep->hz); + tmp2 = (ulong)jiffies_64; + jiffies_64 = (ulonglong)(tmp2 - tmp1); + } + } + if (buf) + convert_time(jiffies_64, buf); + if (j64p) + *j64p = jiffies_64; + } else { + get_symbol_data("jiffies", sizeof(long), &jiffies); + if (buf) + convert_time((ulonglong)jiffies, buf); + if (j64p) + *j64p = (ulonglong)jiffies; + } return buf; } - #define FSHIFT 11 /* nr of bits of precision */ #define FIXED_1 (1<> FSHIFT) @@ -3181,7 +3305,7 @@ * "help -k" output */ void -dump_kernel_table(void) +dump_kernel_table(int verbose) { int i; struct new_utsname *uts; @@ -3225,6 +3349,10 @@ fprintf(fp, "%sKMOD_V2", others++ ? "|" : ""); if (kt->flags & KALLSYMS_V2) fprintf(fp, "%sKALLSYMS_V2", others++ ? "|" : ""); + if (kt->flags & USE_OLD_BT) + fprintf(fp, "%sUSE_OLD_BT", others++ ? "|" : ""); + if (kt->flags & ARCH_XEN) + fprintf(fp, "%sARCH_XEN", others++ ? "|" : ""); fprintf(fp, ")\n"); fprintf(fp, " stext: %lx\n", kt->stext); fprintf(fp, " etext: %lx\n", kt->etext); @@ -3277,7 +3405,37 @@ fprintf(fp, "\n cpu_flags[NR_CPUS]:"); for (i = 0; i < NR_CPUS; i++) fprintf(fp, "%lx ", kt->cpu_flags[i]); - fprintf(fp, "\n"); + others = 0; + fprintf(fp, "\n xen_flags: %lx (", kt->xen_flags); + if (kt->xen_flags & WRITABLE_PAGE_TABLES) + fprintf(fp, "%sWRITABLE_PAGE_TABLES", others++ ? "|" : ""); + if (kt->xen_flags & SHADOW_PAGE_TABLES) + fprintf(fp, "%sSHADOW_PAGE_TABLES", others++ ? "|" : ""); + if (kt->xen_flags & CANONICAL_PAGE_TABLES) + fprintf(fp, "%sCANONICAL_PAGE_TABLES", others++ ? "|" : ""); + if (kt->xen_flags & XEN_SUSPEND) + fprintf(fp, "%sXEN_SUSPEND", others++ ? "|" : ""); + fprintf(fp, ")\n"); + fprintf(fp, " machine_to_pseudo: %lx\n", (ulong)kt->machine_to_pseudo); + fprintf(fp, "phys_to_machine_mapping: %lx\n", kt->phys_to_machine_mapping); + fprintf(fp, " ptm_table_size: %ld\n", kt->ptm_table_size); + fprintf(fp, " ptm_mapping_cache[%d]: %s\n", PTM_MAPPING_CACHE, + verbose ? "" : "(use \"help -K\" to view cache contents)"); + for (i = 0; verbose && (i < PTM_MAPPING_CACHE); i++) { + if (!kt->ptm_mapping_cache[i].mapping) + continue; + fprintf(fp, " [%d] mapping: %lx mfn: %lx\n", + i, kt->ptm_mapping_cache[i].mapping, + kt->ptm_mapping_cache[i].mfn); + } + fprintf(fp, " last_mapping_read: %lx\n", kt->last_mapping_read); + fprintf(fp, " ptm_cache_index: %ld\n", kt->ptm_cache_index); + fprintf(fp, " ptm_pages_searched: %ld\n", kt->ptm_pages_searched); + fprintf(fp, " ptm_cache_hits: %ld ", kt->ptm_cache_hits); + if (kt->ptm_pages_searched) + fprintf(fp, "(%ld%%)\n", kt->ptm_cache_hits * 100 / kt->ptm_pages_searched); + else + fprintf(fp, "\n"); } /* @@ -4475,9 +4633,16 @@ ld->start = vec[i]; ld->list_head_offset = offset; ld->end = vec_kvaddr; + ld->flags = RETURN_ON_LIST_ERROR; hq_open(); - timer_cnt = do_list(ld); + if ((timer_cnt = do_list(ld)) == -1) { + /* Ignore chains with errors */ + error(INFO, + "ignoring faulty timer list at index %d of timer array\n", + i/2); + continue; + } if (!timer_cnt) continue; timer_list = (ulong *)GETBUF(timer_cnt * sizeof(ulong)); @@ -4708,6 +4873,8 @@ machdep->last_pgd_read = 0; machdep->last_pmd_read = 0; machdep->last_ptbl_read = 0; + if (machdep->clear_machdep_cache) + machdep->clear_machdep_cache(); } } @@ -4726,3 +4893,104 @@ return count_bits_long(cpu_online_map); } + +/* + * Generic xen machine-address to pseudo-physical-address translator. + */ +ulong +xen_machine_to_pseudo(ulong machine) +{ + ulong mapping, mfn, kmfn, pfn, p, i, c; + ulong *mp; + + mfn = XEN_MACHINE_TO_MFN(machine); + mp = (ulong *)kt->machine_to_pseudo; + mapping = kt->phys_to_machine_mapping; + + /* + * Check the FIFO cache first. + */ + for (c = 0; c < PTM_MAPPING_CACHE; c++) { + if (kt->ptm_mapping_cache[c].mapping && + (kt->ptm_mapping_cache[c].mfn == mfn)) { + + if (kt->ptm_mapping_cache[c].mapping != kt->last_mapping_read) { + if (!readmem(kt->ptm_mapping_cache[c].mapping, KVADDR, + mp, PAGESIZE(), "phys_to_machine_mapping page (cached)", + RETURN_ON_ERROR)) + error(FATAL, "cannot access " + "phys_to_machine_mapping page\n"); + else + kt->last_mapping_read = kt->ptm_mapping_cache[c].mapping; + } + + for (i = 0; i < XEN_PFNS_PER_PAGE; i++) { + kmfn = (*(mp+i)) & ~XEN_FOREIGN_FRAME; + if (kmfn == mfn) { + p = PTM_MAPPING_TO_PAGE_INDEX(c); + pfn = p + i; + + if (CRASHDEBUG(1)) + console("(cached) mfn: %lx (%lx) p: %ld" + " i: %ld pfn: %lx (%lx)\n", + mfn, machine, p, + i, pfn, XEN_PFN_TO_PSEUDO(pfn)); + kt->ptm_cache_hits++; + + return(XEN_PFN_TO_PSEUDO(pfn)); + } + } + /* + * Stale entry -- clear it out. + */ + kt->ptm_mapping_cache[c].mapping = 0; + } + } + + /* + * The machine address was not cached, so search from the + * beginning of the phys_to_machine_mapping array, caching + * only the found machine address. + */ + for (p = 0; p < kt->ptm_table_size; p += XEN_PFNS_PER_PAGE) + { + if (mapping != kt->last_mapping_read) { + if (!readmem(mapping, KVADDR, mp, PAGESIZE(), + "phys_to_machine_mapping page", RETURN_ON_ERROR)) + error(FATAL, + "cannot access phys_to_machine_mapping page\n"); + else + kt->last_mapping_read = mapping; + } + + kt->ptm_pages_searched++; + + for (i = 0; i < XEN_PFNS_PER_PAGE; i++) + { + kmfn = (*(mp+i)) & ~XEN_FOREIGN_FRAME; + if (kmfn == mfn) { + pfn = p + i; + if (CRASHDEBUG(1)) + console("pages: %d mfn: %lx (%lx) p: %ld" + " i: %ld pfn: %lx (%lx)\n", + (p/XEN_PFNS_PER_PAGE)+1, mfn, machine, + p, i, pfn, XEN_PFN_TO_PSEUDO(pfn)); + + c = kt->ptm_cache_index; + kt->ptm_mapping_cache[c].mfn = mfn; + kt->ptm_mapping_cache[c].mapping = mapping; + kt->ptm_cache_index = (c+1) % PTM_MAPPING_CACHE; + + return(XEN_PFN_TO_PSEUDO(pfn)); + } + } + + mapping += PAGESIZE(); + } + + if (CRASHDEBUG(1)) + console("machine address %lx not found\n", machine); + + return(XEN_MFN_NOT_FOUND); +} + --- crash/gdb_interface.c.orig 2006-05-15 17:21:18.000000000 -0400 +++ crash/gdb_interface.c 2006-04-11 17:25:48.000000000 -0400 @@ -31,9 +31,6 @@ { argc = 1; - if (CRASHDEBUG(1)) - gdb_readnow_warning(); - if (pc->flags & SILENT) { if (pc->flags & READNOW) argv[argc++] = "--readnow"; @@ -206,12 +203,17 @@ if (!debug_data_pulled_in) { if (CRASHDEBUG(1)) error(INFO, - "gdb_session_init: pulling in debug data by accessing init_mm.mmap\n"); + "gdb_session_init: pulling in debug data by accessing init_mm.mmap %s\n", + symbol_exists("sysfs_mount") ? + "and syfs_mount" : ""); debug_data_pulled_in = TRUE; req->command = GNU_PASS_THROUGH; req->flags = GNU_RETURN_ON_ERROR|GNU_NO_READMEM; req->name = NULL; - sprintf(req->buf, "print init_mm.mmap"); + if (symbol_exists("sysfs_mount")) + sprintf(req->buf, "print sysfs_mount, init_mm.mmap"); + else + sprintf(req->buf, "print init_mm.mmap"); gdb_interface(req); if (!(req->flags & GNU_COMMAND_FAILED)) goto retry; --- crash/configure.c.orig 2006-05-15 17:21:18.000000000 -0400 +++ crash/configure.c 2006-03-23 15:44:46.000000000 -0500 @@ -1128,7 +1128,7 @@ printf("License: GPL\n"); printf("Group: Development/Debuggers\n"); printf("Source: %%{name}-%%{version}-%%{release}.tar.gz\n"); - printf("URL: ftp://people.redhat.com/anderson/%%{name}-%%{version}-%%{release}.tar.gz\n"); + printf("URL: http://people.redhat.com/anderson\n"); printf("Distribution: Linux 2.2 or greater\n"); printf("Vendor: Red Hat, Inc.\n"); printf("Packager: Dave Anderson \n"); --- crash/net.c.orig 2006-05-15 17:21:18.000000000 -0400 +++ crash/net.c 2006-05-11 11:17:01.000000000 -0400 @@ -50,6 +50,7 @@ #define STRUCT_NET_DEVICE (0x4) #define SOCK_V1 (0x8) #define SOCK_V2 (0x10) +#define NO_INET_SOCK (0x20) #define DEV_NAME_MAX 100 struct devinfo { @@ -75,6 +76,7 @@ static void dump_sockets(ulong, struct reference *); static int sym_socket_dump(ulong, int, int, ulong, struct reference *); static void dump_hw_addr(unsigned char *, int); +static char *dump_in6_addr_port(uint16_t *, uint16_t, char *, int *); #define MK_TYPE_T(f,s,m) \ @@ -158,13 +160,6 @@ "in_ifaddr", "ifa_address"); STRUCT_SIZE_INIT(sock, "sock"); - MEMBER_OFFSET_INIT(sock_daddr, "sock", "daddr"); - MEMBER_OFFSET_INIT(sock_rcv_saddr, "sock", "rcv_saddr"); - MEMBER_OFFSET_INIT(sock_dport, "sock", "dport"); - MEMBER_OFFSET_INIT(sock_sport, "sock", "sport"); - MEMBER_OFFSET_INIT(sock_num, "sock", "num"); - MEMBER_OFFSET_INIT(sock_family, "sock", "family"); - MEMBER_OFFSET_INIT(sock_type, "sock", "type"); MEMBER_OFFSET_INIT(sock_family, "sock", "family"); if (VALID_MEMBER(sock_family)) { @@ -195,7 +190,23 @@ */ STRUCT_SIZE_INIT(inet_sock, "inet_sock"); STRUCT_SIZE_INIT(socket, "socket"); - MEMBER_OFFSET_INIT(inet_sock_inet, "inet_sock", "inet"); + + if (STRUCT_EXISTS("inet_opt")) { + MEMBER_OFFSET_INIT(inet_sock_inet, "inet_sock", "inet"); + MEMBER_OFFSET_INIT(inet_opt_daddr, "inet_opt", "daddr"); + MEMBER_OFFSET_INIT(inet_opt_rcv_saddr, "inet_opt", "rcv_saddr"); + MEMBER_OFFSET_INIT(inet_opt_dport, "inet_opt", "dport"); + MEMBER_OFFSET_INIT(inet_opt_sport, "inet_opt", "sport"); + MEMBER_OFFSET_INIT(inet_opt_num, "inet_opt", "num"); + } else { /* inet_opt moved to inet_sock */ + ASSIGN_OFFSET(inet_sock_inet) = 0; + MEMBER_OFFSET_INIT(inet_opt_daddr, "inet_sock", "daddr"); + MEMBER_OFFSET_INIT(inet_opt_rcv_saddr, "inet_sock", "rcv_saddr"); + MEMBER_OFFSET_INIT(inet_opt_dport, "inet_sock", "dport"); + MEMBER_OFFSET_INIT(inet_opt_sport, "inet_sock", "sport"); + MEMBER_OFFSET_INIT(inet_opt_num, "inet_sock", "num"); + } + if (VALID_STRUCT(inet_sock) && INVALID_MEMBER(inet_sock_inet)) { /* @@ -210,15 +221,36 @@ * to subtract the size of the inet_opt struct * from the size of the containing inet_sock. */ + net->flags |= NO_INET_SOCK; ASSIGN_OFFSET(inet_sock_inet) = SIZE(inet_sock) - STRUCT_SIZE("inet_opt"); } - MEMBER_OFFSET_INIT(inet_opt_daddr, "inet_opt", "daddr"); - MEMBER_OFFSET_INIT(inet_opt_rcv_saddr, "inet_opt", - "rcv_saddr"); - MEMBER_OFFSET_INIT(inet_opt_dport, "inet_opt", "dport"); - MEMBER_OFFSET_INIT(inet_opt_sport, "inet_opt", "sport"); - MEMBER_OFFSET_INIT(inet_opt_num, "inet_opt", "num"); + + /* + * If necessary, set inet_sock size and inet_sock_inet offset, + * accounting for the configuration-dependent, intervening, + * struct ipv6_pinfo pointer located in between the sock and + * inet_opt members of the inet_sock. + */ + if (!VALID_STRUCT(inet_sock)) + { + if (symbol_exists("tcpv6_protocol") && + symbol_exists("udpv6_protocol")) { + ASSIGN_SIZE(inet_sock) = SIZE(sock) + + sizeof(void *) + STRUCT_SIZE("inet_opt"); + ASSIGN_OFFSET(inet_sock_inet) = SIZE(sock) + + sizeof(void *); + } else { + ASSIGN_SIZE(inet_sock) = SIZE(sock) + + STRUCT_SIZE("inet_opt"); + ASSIGN_OFFSET(inet_sock_inet) = SIZE(sock); + } + } + + MEMBER_OFFSET_INIT(ipv6_pinfo_rcv_saddr, "ipv6_pinfo", "rcv_saddr"); + MEMBER_OFFSET_INIT(ipv6_pinfo_daddr, "ipv6_pinfo", "daddr"); + STRUCT_SIZE_INIT(in6_addr, "in6_addr"); + net->flags |= SOCK_V2; } } @@ -378,6 +410,24 @@ nhash_buckets = (i = ARRAY_LENGTH(neigh_table_hash_buckets)) ? i : get_array_length("neigh_table.hash_buckets", NULL, sizeof(void *)); + + /* + * NOTE: 2.6.8 -> 2.6.9 neigh_table struct changed from: + * + * struct neighbour *hash_buckets[32]; + * to + * struct neighbour **hash_buckets; + * + * Even after hardwiring and testing with the correct + * array size, other changes cause this command to break + * down, so it needs to be looked at by someone who cares... + */ + + if (nhash_buckets == 0) { + option_not_supported('a'); + return; + } + hash_bytes = nhash_buckets * sizeof(*hash_buckets); hash_buckets = (ulong *)GETBUF(hash_bytes); @@ -609,8 +659,14 @@ uint16_t dport, sport; ushort num, family, type; char *sockbuf, *inet_sockbuf; + ulong ipv6_pinfo, ipv6_rcv_saddr, ipv6_daddr; + uint16_t u6_addr16_src[8]; + uint16_t u6_addr16_dest[8]; + char buf2[BUFSIZE]; + int len; BZERO(buf, BUFSIZE); + BZERO(buf2, BUFSIZE); sockbuf = inet_sockbuf = NULL; switch (net->flags & (SOCK_V1|SOCK_V2)) @@ -646,6 +702,7 @@ OFFSET(inet_opt_num)); family = USHORT(inet_sockbuf + OFFSET(sock_common_skc_family)); type = USHORT(inet_sockbuf + OFFSET(sock_sk_type)); + ipv6_pinfo = ULONG(inet_sockbuf + SIZE(sock)); break; } @@ -723,27 +780,28 @@ } /* make sure we have room at the end... */ - sprintf(&buf[strlen(buf)], "%s", space(MINSPACE-1)); +// sprintf(&buf[strlen(buf)], "%s", space(MINSPACE-1)); + sprintf(&buf[strlen(buf)], " "); if (family == AF_INET) { if (BITS32()) { - sprintf(&buf[strlen(buf)], "%*s:%-*d%s", + sprintf(&buf[strlen(buf)], "%*s-%-*d%s", BYTES_IP_ADDR, inet_ntoa(*((struct in_addr *)&rcv_saddr)), BYTES_PORT_NUM, ntohs(sport), space(1)); - sprintf(&buf[strlen(buf)], "%*s:%-*d%s", + sprintf(&buf[strlen(buf)], "%*s-%-*d%s", BYTES_IP_ADDR, inet_ntoa(*((struct in_addr *)&daddr)), BYTES_PORT_NUM, ntohs(dport), space(1)); } else { - sprintf(&buf[strlen(buf)], " %s:%d ", + sprintf(&buf[strlen(buf)], " %s-%d ", inet_ntoa(*((struct in_addr *)&rcv_saddr)), ntohs(sport)); - sprintf(&buf[strlen(buf)], "%s:%d", + sprintf(&buf[strlen(buf)], "%s-%d", inet_ntoa(*((struct in_addr *)&daddr)), ntohs(dport)); } @@ -753,6 +811,60 @@ FREEBUF(sockbuf); if (inet_sockbuf) FREEBUF(inet_sockbuf); + + if (family != AF_INET6) + return; + + switch (net->flags & (SOCK_V1|SOCK_V2)) + { + case SOCK_V1: + break; + + case SOCK_V2: + if (INVALID_MEMBER(ipv6_pinfo_rcv_saddr) || + INVALID_MEMBER(ipv6_pinfo_daddr)) + break; + + ipv6_rcv_saddr = ipv6_pinfo + OFFSET(ipv6_pinfo_rcv_saddr); + ipv6_daddr = ipv6_pinfo + OFFSET(ipv6_pinfo_daddr); + + if (!readmem(ipv6_rcv_saddr, KVADDR, u6_addr16_src, SIZE(in6_addr), + "ipv6_rcv_saddr buffer", QUIET|RETURN_ON_ERROR)) + break; + if (!readmem(ipv6_daddr, KVADDR, u6_addr16_dest, SIZE(in6_addr), + "ipv6_daddr buffer", QUIET|RETURN_ON_ERROR)) + break; + + sprintf(&buf[strlen(buf)], "%*s ", BITS32() ? 22 : 12, + dump_in6_addr_port(u6_addr16_src, sport, buf2, &len)); + if (BITS32() && (len > 22)) + len = 1; + mkstring(dump_in6_addr_port(u6_addr16_dest, dport, buf2, NULL), + len, CENTER, NULL); + sprintf(&buf[strlen(buf)], "%s", buf2); + + break; + } +} + +static char * +dump_in6_addr_port(uint16_t *addr, uint16_t port, char *buf, int *len) +{ + sprintf(buf, "%x:%x:%x:%x:%x:%x:%x:%x-%d", + ntohs(addr[0]), + ntohs(addr[1]), + ntohs(addr[2]), + ntohs(addr[3]), + ntohs(addr[4]), + ntohs(addr[5]), + ntohs(addr[6]), + ntohs(addr[7]), + ntohs(port)); + + if (len) + *len = strlen(buf); + + return buf; } @@ -899,6 +1011,8 @@ fprintf(fp, "%sSTRUCT_DEVICE", others++ ? "|" : ""); if (net->flags & STRUCT_NET_DEVICE) fprintf(fp, "%sSTRUCT_NET_DEVICE", others++ ? "|" : ""); + if (net->flags & NO_INET_SOCK) + fprintf(fp, "%sNO_INET_SOCK", others++ ? "|" : ""); if (net->flags & SOCK_V1) fprintf(fp, "%sSOCK_V1", others++ ? "|" : ""); if (net->flags & SOCK_V2) @@ -972,7 +1086,7 @@ void dump_sockets_workhorse(ulong task, ulong flag, struct reference *ref) { - ulong files_struct_addr = 0; + ulong files_struct_addr = 0, fdtable_addr = 0; int max_fdset = 0; int max_fds = 0; ulong open_fds_addr = 0; @@ -1004,32 +1118,51 @@ sizeof(void *), "task files contents", FAULT_ON_ERROR); if (files_struct_addr) { - readmem(files_struct_addr + OFFSET(files_struct_max_fdset), - KVADDR, &max_fdset, sizeof(int), - "files_struct max_fdset", FAULT_ON_ERROR); - - readmem(files_struct_addr + OFFSET(files_struct_max_fds), - KVADDR, &max_fds, sizeof(int), "files_struct max_fds", - FAULT_ON_ERROR); - } + if (VALID_MEMBER(files_struct_max_fdset)) { + readmem(files_struct_addr + OFFSET(files_struct_max_fdset), + KVADDR, &max_fdset, sizeof(int), + "files_struct max_fdset", FAULT_ON_ERROR); + readmem(files_struct_addr + OFFSET(files_struct_max_fds), + KVADDR, &max_fds, sizeof(int), "files_struct max_fds", + FAULT_ON_ERROR); + } + else if (VALID_MEMBER(files_struct_fdt)) { + readmem(files_struct_addr + OFFSET(files_struct_fdt), KVADDR, + &fdtable_addr, sizeof(void *), "fdtable buffer", + FAULT_ON_ERROR); + readmem(fdtable_addr + OFFSET(fdtable_max_fdset), + KVADDR, &max_fdset, sizeof(int), + "fdtable_struct max_fdset", FAULT_ON_ERROR); + readmem(fdtable_addr + OFFSET(fdtable_max_fds), + KVADDR, &max_fds, sizeof(int), "fdtable_struct max_fds", + FAULT_ON_ERROR); + } + } - if (!files_struct_addr || (max_fdset == 0) || (max_fds == 0)) { + if ((VALID_MEMBER(files_struct_fdt) && !fdtable_addr) || + !files_struct_addr || (max_fdset == 0) || (max_fds == 0)) { if (!NET_REFERENCE_CHECK(ref)) fprintf(fp, "No open sockets.\n"); return; } - readmem(files_struct_addr + OFFSET(files_struct_open_fds), KVADDR, - &open_fds_addr, sizeof(void *), "files_struct open_fds addr", - FAULT_ON_ERROR); + if (VALID_MEMBER(fdtable_open_fds)){ + readmem(fdtable_addr + OFFSET(fdtable_open_fds), KVADDR, + &open_fds_addr, sizeof(void *), "files_struct open_fds addr", + FAULT_ON_ERROR); + readmem(fdtable_addr + OFFSET(fdtable_fd), KVADDR, &fd, + sizeof(void *), "files_struct fd addr", FAULT_ON_ERROR); + } else { + readmem(files_struct_addr + OFFSET(files_struct_open_fds), KVADDR, + &open_fds_addr, sizeof(void *), "files_struct open_fds addr", + FAULT_ON_ERROR); + readmem(files_struct_addr + OFFSET(files_struct_fd), KVADDR, &fd, + sizeof(void *), "files_struct fd addr", FAULT_ON_ERROR); + } if (open_fds_addr) - readmem(open_fds_addr, KVADDR, &open_fds, sizeof(fd_set), - "files_struct open_fds", FAULT_ON_ERROR); - - readmem(files_struct_addr + OFFSET(files_struct_fd), KVADDR, &fd, - sizeof(void *), "files_struct fd addr", FAULT_ON_ERROR); - + readmem(open_fds_addr, KVADDR, &open_fds, sizeof(fd_set), + "files_struct open_fds", FAULT_ON_ERROR); if (!open_fds_addr || !fd) { if (!NET_REFERENCE_CHECK(ref)) fprintf(fp, "No open sockets.\n"); @@ -1096,9 +1229,9 @@ */ static char *socket_hdr_32 = -"FD SOCKET SOCK FAMILY:TYPE SOURCE:PORT DESTINATION:PORT"; +"FD SOCKET SOCK FAMILY:TYPE SOURCE-PORT DESTINATION-PORT"; static char *socket_hdr_64 = -"FD SOCKET SOCK FAMILY:TYPE SOURCE:PORT DESTINATION:PORT"; +"FD SOCKET SOCK FAMILY:TYPE SOURCE-PORT DESTINATION-PORT"; static int sym_socket_dump(ulong file, @@ -1223,7 +1356,12 @@ dump_struct("sock", sock, 0); break; case SOCK_V2: - dump_struct("inet_sock", sock, 0); + if (STRUCT_EXISTS("inet_sock") && !(net->flags & NO_INET_SOCK)) + dump_struct("inet_sock", sock, 0); + else if (STRUCT_EXISTS("sock")) + dump_struct("sock", sock, 0); + else + fprintf(fp, "\nunable to display inet_sock structure\n"); break; } break; --- crash/dev.c.orig 2006-05-15 17:21:18.000000000 -0400 +++ crash/dev.c 2005-11-23 11:09:08.000000000 -0500 @@ -91,13 +91,13 @@ switch(c) { case 'i': - if (machine_type("X86") || machine_type("S390X")) + if (machine_type("S390X")) option_not_supported(c); do_io(); return; case 'p': - if (machine_type("X86") || machine_type("S390X")) + if (machine_type("S390X")) option_not_supported(c); do_pci(); return; --- crash/alpha.c.orig 2006-05-15 17:21:18.000000000 -0400 +++ crash/alpha.c 2005-11-04 17:37:53.000000000 -0500 @@ -1858,8 +1858,6 @@ fprintf(fp, " flags: %lx (", machdep->flags); if (machdep->flags & HWRESET) fprintf(fp, "%sHWRESET", others++ ? "|" : ""); - if (machdep->flags & SYSRQ) - fprintf(fp, "%sSYSRQ", others++ ? "|" : ""); fprintf(fp, ")\n"); fprintf(fp, " kvbase: %lx\n", machdep->kvbase); fprintf(fp, " identity_map_base: %lx\n", machdep->identity_map_base); --- crash/x86.c.orig 2006-05-15 17:21:18.000000000 -0400 +++ crash/x86.c 2006-04-26 15:53:35.000000000 -0400 @@ -685,6 +685,7 @@ bt->debug || (bt->flags & BT_FRAMESIZE_DEBUG) || !(bt->flags & BT_OLD_BACK_TRACE)) { + bt->flags &= ~BT_OLD_BACK_TRACE; lkcd_x86_back_trace(bt, 0, fp); return; } @@ -964,6 +965,10 @@ static int x86_kvtop(struct task_context *, ulong, physaddr_t *, int); static int x86_uvtop_pae(struct task_context *, ulong, physaddr_t *, int); static int x86_kvtop_pae(struct task_context *, ulong, physaddr_t *, int); +static int x86_uvtop_xen_wpt(struct task_context *, ulong, physaddr_t *, int); +static int x86_kvtop_xen_wpt(struct task_context *, ulong, physaddr_t *, int); +static int x86_uvtop_pae_xen_wpt(struct task_context *, ulong, physaddr_t *, int); +static int x86_kvtop_pae_xen_wpt(struct task_context *, ulong, physaddr_t *, int); static ulong x86_get_task_pgd(ulong); static ulong x86_processor_speed(void); static ulong x86_get_pc(struct bt_info *); @@ -983,6 +988,14 @@ static int x86_dis_filter(ulong, char *); static struct line_number_hook x86_line_number_hooks[]; static int x86_is_uvaddr(ulong, struct task_context *); +static void x86_init_kernel_pgd(void); +static int x86_xendump_ptm_create(struct xendump_data *); +static ulong x86_xendump_panic_task(struct xendump_data *); +static void x86_get_xendump_regs(struct xendump_data *, struct bt_info *, ulong *, ulong *); +static char *x86_xendump_load_page(ulong, char *); +static char *x86_xendump_load_page_PAE(ulong, char *); +static int x86_xendump_page_index(ulong); +static int x86_xendump_page_index_PAE(ulong); #define INT_EFRAME_SS (14) @@ -1420,6 +1433,17 @@ break; } + if (XEN() && ((short)pt->reg_value[INT_EFRAME_CS] == 0x61) && + ((short)pt->reg_value[INT_EFRAME_DS] == 0x7b) && + ((short)pt->reg_value[INT_EFRAME_ES] == 0x7b) && + IS_KVADDR(pt->reg_value[INT_EFRAME_EIP])) { + if (!(machdep->flags & OMIT_FRAME_PTR) && + !INSTACK(pt->reg_value[INT_EFRAME_EBP], bt)) + continue; + rv = bt->stackbase + sizeof(ulong) * (first - stack); + break; + } + /* check for user exception frame */ if (((short)pt->reg_value[INT_EFRAME_CS] == 0x23) && @@ -1441,6 +1465,20 @@ rv = bt->stackbase + sizeof(ulong) * (first - stack); break; } + + /* + * 2.6 kernels using sysenter_entry instead of system_call + * have a funky trampoline EIP address. + */ + if (((short)pt->reg_value[INT_EFRAME_CS] == 0x73) && + ((short)pt->reg_value[INT_EFRAME_DS] == 0x7b) && + ((short)pt->reg_value[INT_EFRAME_ES] == 0x7b) && + ((short)pt->reg_value[INT_EFRAME_SS] == 0x7b) && + (pt->reg_value[INT_EFRAME_EFLAGS] == 0x246) && + IS_UVADDR(pt->reg_value[INT_EFRAME_ESP], bt->tc)) { + rv = bt->stackbase + sizeof(ulong) * (first - stack); + break; + } } return(rv); } @@ -1536,6 +1574,8 @@ mode = "USER-MODE"; } else if ((cs == 0x10) || (cs == 0x60)) { mode = "KERNEL-MODE"; + } else if (XEN() && (cs == 0x61)) { + mode = "KERNEL-MODE"; } else { mode = "UNKNOWN-MODE"; } @@ -1639,7 +1679,7 @@ machdep->stacksize = machdep->pagesize * 2; if ((machdep->pgd = (char *)malloc(PAGESIZE())) == NULL) error(FATAL, "cannot malloc pgd space."); - if ((machdep->pmd = (char *)malloc(PAGESIZE())) == NULL) + if ((machdep->pmd = (char *)malloc(PAGESIZE())) == NULL) error(FATAL, "cannot malloc pmd space."); if ((machdep->ptbl = (char *)malloc(PAGESIZE())) == NULL) error(FATAL, "cannot malloc ptbl space."); @@ -1696,14 +1736,17 @@ machdep->cmd_mach = x86_cmd_mach; machdep->get_smp_cpus = x86_get_smp_cpus; machdep->line_number_hooks = x86_line_number_hooks; - if (x86_omit_frame_pointer()) - machdep->flags |= OMIT_FRAME_PTR; machdep->flags |= FRAMESIZE_DEBUG; machdep->value_to_symbol = generic_machdep_value_to_symbol; - machdep->init_kernel_pgd = NULL; + machdep->init_kernel_pgd = x86_init_kernel_pgd; + machdep->xendump_ptm_create = x86_xendump_ptm_create; + machdep->xendump_panic_task = x86_xendump_panic_task; + machdep->get_xendump_regs = x86_get_xendump_regs; break; case POST_GDB: + if (x86_omit_frame_pointer()) + machdep->flags |= OMIT_FRAME_PTR; STRUCT_SIZE_INIT(user_regs_struct, "user_regs_struct"); MEMBER_OFFSET_INIT(user_regs_struct_ebp, "user_regs_struct", "ebp"); @@ -1726,6 +1769,31 @@ machdep->hz = HZ; if (THIS_KERNEL_VERSION >= LINUX(2,6,0)) machdep->hz = 1000; + + if (machdep->flags & PAE){ + machdep->section_size_bits = _SECTION_SIZE_BITS_PAE; + machdep->max_physmem_bits = _MAX_PHYSMEM_BITS_PAE; + } else { + machdep->section_size_bits = _SECTION_SIZE_BITS; + machdep->max_physmem_bits = _MAX_PHYSMEM_BITS; + } + + if (XEN() && (kt->xen_flags & WRITABLE_PAGE_TABLES)) { + if (machdep->flags & PAE) + machdep->uvtop = x86_uvtop_pae_xen_wpt; + else + machdep->uvtop = x86_uvtop_xen_wpt; + } + + if (XEN()) { + MEMBER_OFFSET_INIT(vcpu_guest_context_user_regs, + "vcpu_guest_context", "user_regs"); + MEMBER_OFFSET_INIT(cpu_user_regs_esp, + "cpu_user_regs", "esp"); + MEMBER_OFFSET_INIT(cpu_user_regs_eip, + "cpu_user_regs", "eip"); + } + break; case POST_INIT: @@ -1825,7 +1893,7 @@ fprintf(fp, " PAGE: %s (4MB)\n\n", mkstring(buf, VADDR_PRLEN, RJUST|LONG_HEX, MKSTR(NONPAE_PAGEBASE(pgd_pte)))); - x86_translate_pte(0, 0, pgd_pte); + x86_translate_pte(pgd_pte, 0, 0); } *paddr = NONPAE_PAGEBASE(pgd_pte) + (vaddr & ~_4MB_PAGE_MASK); @@ -1892,6 +1960,169 @@ } static int +x86_uvtop_xen_wpt(struct task_context *tc, ulong vaddr, physaddr_t *paddr, int verbose) +{ + ulong mm, active_mm; + ulong *pgd; + ulong *page_dir; + ulong *page_middle; + ulong *machine_page_table, *pseudo_page_table; + ulong pgd_pte, pseudo_pgd_pte; + ulong pmd_pte; + ulong machine_pte, pseudo_pte; + char buf[BUFSIZE]; + + if (!tc) + error(FATAL, "current context invalid\n"); + + *paddr = 0; + + if (is_kernel_thread(tc->task) && IS_KVADDR(vaddr)) { + if (VALID_MEMBER(thread_struct_cr3)) + pgd = (ulong *)machdep->get_task_pgd(tc->task); + else { + if (INVALID_MEMBER(task_struct_active_mm)) + error(FATAL, "no cr3 or active_mm?\n"); + + readmem(tc->task + OFFSET(task_struct_active_mm), + KVADDR, &active_mm, sizeof(void *), + "task active_mm contents", FAULT_ON_ERROR); + + if (!active_mm) + error(FATAL, + "no active_mm for this kernel thread\n"); + + readmem(active_mm + OFFSET(mm_struct_pgd), + KVADDR, &pgd, sizeof(long), + "mm_struct pgd", FAULT_ON_ERROR); + } + } else { + if ((mm = task_mm(tc->task, TRUE))) + pgd = ULONG_PTR(tt->mm_struct + + OFFSET(mm_struct_pgd)); + else + readmem(tc->mm_struct + OFFSET(mm_struct_pgd), + KVADDR, &pgd, sizeof(long), "mm_struct pgd", + FAULT_ON_ERROR); + } + + if (verbose) + fprintf(fp, "PAGE DIRECTORY: %lx\n", (ulong)pgd); + + page_dir = pgd + (vaddr >> PGDIR_SHIFT); + + FILL_PGD(NONPAE_PAGEBASE(pgd), KVADDR, PAGESIZE()); + pgd_pte = ULONG(machdep->pgd + PAGEOFFSET(page_dir)); + + if (verbose) + fprintf(fp, " PGD: %s => %lx\n", + mkstring(buf, VADDR_PRLEN, RJUST|LONG_HEX, + MKSTR((ulong)page_dir)), + pgd_pte); + + if (!pgd_pte) + goto no_upage; + + if (pgd_pte & _PAGE_4M) { + if (verbose) + fprintf(fp, " PAGE: %s (4MB) [machine]\n", + mkstring(buf, VADDR_PRLEN, RJUST|LONG_HEX, + MKSTR(NONPAE_PAGEBASE(pgd_pte)))); + + pseudo_pgd_pte = xen_machine_to_pseudo(NONPAE_PAGEBASE(pgd_pte)); + + if (pseudo_pgd_pte == XEN_MFN_NOT_FOUND) { + if (verbose) + fprintf(fp, " PAGE: page not available\n"); + *paddr = PADDR_NOT_AVAILABLE; + return FALSE; + } + + pseudo_pgd_pte |= PAGEOFFSET(pgd_pte); + + if (verbose) { + fprintf(fp, " PAGE: %s (4MB)\n\n", + mkstring(buf, VADDR_PRLEN, RJUST|LONG_HEX, + MKSTR(NONPAE_PAGEBASE(pseudo_pgd_pte)))); + + x86_translate_pte(pseudo_pgd_pte, 0, 0); + } + + *paddr = NONPAE_PAGEBASE(pseudo_pgd_pte) + + (vaddr & ~_4MB_PAGE_MASK); + + return TRUE; + } + + page_middle = page_dir; + + FILL_PMD(NONPAE_PAGEBASE(page_middle), KVADDR, PAGESIZE()); + pmd_pte = ULONG(machdep->pmd + PAGEOFFSET(page_middle)); + + if (verbose) + fprintf(fp, " PMD: %s => %lx\n", + mkstring(buf, VADDR_PRLEN, RJUST|LONG_HEX, + MKSTR((ulong)page_middle)), + pmd_pte); + + if (!pmd_pte) + goto no_upage; + + machine_page_table = (ulong *)((NONPAE_PAGEBASE(pmd_pte)) + + ((vaddr>>10) & ((PTRS_PER_PTE-1)<<2))); + + pseudo_page_table = (ulong *) + xen_machine_to_pseudo(NONPAE_PAGEBASE(machine_page_table)); + + FILL_PTBL(NONPAE_PAGEBASE(pseudo_page_table), PHYSADDR, PAGESIZE()); + machine_pte = ULONG(machdep->ptbl + PAGEOFFSET(machine_page_table)); + + if (verbose) { + fprintf(fp, " PTE: %s [machine]\n", + mkstring(buf, VADDR_PRLEN, RJUST|LONG_HEX, + MKSTR((ulong)machine_page_table))); + + fprintf(fp, " PTE: %s => %lx\n", + mkstring(buf, VADDR_PRLEN, RJUST|LONG_HEX, + MKSTR((ulong)pseudo_page_table + + PAGEOFFSET(machine_page_table))), machine_pte); + } + + if (!(machine_pte & (_PAGE_PRESENT | _PAGE_PROTNONE))) { + *paddr = machine_pte; + + if (machine_pte && verbose) { + fprintf(fp, "\n"); + x86_translate_pte(machine_pte, 0, 0); + } + + goto no_upage; + } + + pseudo_pte = xen_machine_to_pseudo(NONPAE_PAGEBASE(machine_pte)); + pseudo_pte |= PAGEOFFSET(machine_pte); + + *paddr = NONPAE_PAGEBASE(pseudo_pte) + PAGEOFFSET(vaddr); + + if (verbose) { + fprintf(fp, " PAGE: %s [machine]\n", + mkstring(buf, VADDR_PRLEN, RJUST|LONG_HEX, + MKSTR(NONPAE_PAGEBASE(machine_pte)))); + + fprintf(fp, " PAGE: %s\n\n", + mkstring(buf, VADDR_PRLEN, RJUST|LONG_HEX, + MKSTR(NONPAE_PAGEBASE(pseudo_pte)))); + + x86_translate_pte(pseudo_pte, 0, 0); + } + + return TRUE; + +no_upage: + return FALSE; +} + +static int x86_uvtop_pae(struct task_context *tc, ulong vaddr, physaddr_t *paddr, int verbose) { ulong mm, active_mm; @@ -2040,6 +2271,12 @@ return FALSE; } +static int +x86_uvtop_pae_xen_wpt(struct task_context *tc, ulong vaddr, physaddr_t *paddr, int verbose) +{ + return(error(FATAL, "x86_uvtop_pae_xen_wpt: TBD\n")); +} + /* * Translates a kernel virtual address to its physical address. cmd_vtop() * sets the verbose flag so that the pte translation gets displayed; all @@ -2072,6 +2309,9 @@ return TRUE; } + if (XEN() && (kt->xen_flags & WRITABLE_PAGE_TABLES)) + return (x86_kvtop_xen_wpt(tc, kvaddr, paddr, verbose)); + pgd = (ulong *)vt->kernel_pgd[0]; if (verbose) @@ -2095,7 +2335,7 @@ fprintf(fp, " PAGE: %s (4MB)\n\n", mkstring(buf, VADDR_PRLEN, RJUST|LONG_HEX, MKSTR(NONPAE_PAGEBASE(pgd_pte)))); - x86_translate_pte(0, 0, pgd_pte); + x86_translate_pte(pgd_pte, 0, 0); } *paddr = NONPAE_PAGEBASE(pgd_pte) + (kvaddr & ~_4MB_PAGE_MASK); @@ -2158,6 +2398,131 @@ return FALSE; } +static int +x86_kvtop_xen_wpt(struct task_context *tc, ulong kvaddr, physaddr_t *paddr, int verbose) +{ + ulong *pgd; + ulong *page_dir; + ulong *page_middle; + ulong *machine_page_table, *pseudo_page_table; + ulong pgd_pte, pseudo_pgd_pte; + ulong pmd_pte; + ulong machine_pte, pseudo_pte; + char buf[BUFSIZE]; + + pgd = (ulong *)vt->kernel_pgd[0]; + + if (verbose) + fprintf(fp, "PAGE DIRECTORY: %lx\n", (ulong)pgd); + + page_dir = pgd + (kvaddr >> PGDIR_SHIFT); + + FILL_PGD(NONPAE_PAGEBASE(pgd), KVADDR, PAGESIZE()); + pgd_pte = ULONG(machdep->pgd + PAGEOFFSET(page_dir)); + + if (verbose) + fprintf(fp, " PGD: %s => %lx\n", + mkstring(buf, VADDR_PRLEN, RJUST|LONG_HEX, + MKSTR((ulong)page_dir)), pgd_pte); + + if (!pgd_pte) + goto no_kpage; + + if (pgd_pte & _PAGE_4M) { + if (verbose) + fprintf(fp, " PAGE: %s (4MB) [machine]\n", + mkstring(buf, VADDR_PRLEN, RJUST|LONG_HEX, + MKSTR(NONPAE_PAGEBASE(pgd_pte)))); + + pseudo_pgd_pte = xen_machine_to_pseudo(NONPAE_PAGEBASE(pgd_pte)); + + if (pseudo_pgd_pte == XEN_MFN_NOT_FOUND) { + if (verbose) + fprintf(fp, " PAGE: page not available\n"); + *paddr = PADDR_NOT_AVAILABLE; + return FALSE; + } + + pseudo_pgd_pte |= PAGEOFFSET(pgd_pte); + + if (verbose) { + fprintf(fp, " PAGE: %s (4MB)\n\n", + mkstring(buf, VADDR_PRLEN, RJUST|LONG_HEX, + MKSTR(NONPAE_PAGEBASE(pseudo_pgd_pte)))); + + x86_translate_pte(pseudo_pgd_pte, 0, 0); + } + + *paddr = NONPAE_PAGEBASE(pseudo_pgd_pte) + + (kvaddr & ~_4MB_PAGE_MASK); + + return TRUE; + } + + page_middle = page_dir; + + FILL_PMD(NONPAE_PAGEBASE(page_middle), KVADDR, PAGESIZE()); + pmd_pte = ULONG(machdep->pmd + PAGEOFFSET(page_middle)); + + if (verbose) + fprintf(fp, " PMD: %s => %lx\n", + mkstring(buf, VADDR_PRLEN, RJUST|LONG_HEX, + MKSTR((ulong)page_middle)), pmd_pte); + + if (!pmd_pte) + goto no_kpage; + + machine_page_table = (ulong *)((NONPAE_PAGEBASE(pmd_pte)) + + ((kvaddr>>10) & ((PTRS_PER_PTE-1)<<2))); + + pseudo_page_table = (ulong *) + xen_machine_to_pseudo(NONPAE_PAGEBASE(machine_page_table)); + + FILL_PTBL(NONPAE_PAGEBASE(pseudo_page_table), PHYSADDR, PAGESIZE()); + machine_pte = ULONG(machdep->ptbl + PAGEOFFSET(machine_page_table)); + + if (verbose) { + fprintf(fp, " PTE: %s [machine]\n", + mkstring(buf, VADDR_PRLEN, RJUST|LONG_HEX, + MKSTR((ulong)machine_page_table))); + + fprintf(fp, " PTE: %s => %lx\n", + mkstring(buf, VADDR_PRLEN, RJUST|LONG_HEX, + MKSTR((ulong)pseudo_page_table + + PAGEOFFSET(machine_page_table))), machine_pte); + } + + if (!(machine_pte & (_PAGE_PRESENT | _PAGE_PROTNONE))) { + if (machine_pte && verbose) { + fprintf(fp, "\n"); + x86_translate_pte(machine_pte, 0, 0); + } + goto no_kpage; + } + + pseudo_pte = xen_machine_to_pseudo(NONPAE_PAGEBASE(machine_pte)); + pseudo_pte |= PAGEOFFSET(machine_pte); + + if (verbose) { + fprintf(fp, " PAGE: %s [machine]\n", + mkstring(buf, VADDR_PRLEN, RJUST|LONG_HEX, + MKSTR(NONPAE_PAGEBASE(machine_pte)))); + + fprintf(fp, " PAGE: %s\n\n", + mkstring(buf, VADDR_PRLEN, RJUST|LONG_HEX, + MKSTR(NONPAE_PAGEBASE(pseudo_pte)))); + + x86_translate_pte(pseudo_pte, 0, 0); + } + + *paddr = NONPAE_PAGEBASE(pseudo_pte) + PAGEOFFSET(kvaddr); + + return TRUE; + +no_kpage: + return FALSE; +} + static int x86_kvtop_pae(struct task_context *tc, ulong kvaddr, physaddr_t *paddr, int verbose) @@ -2188,6 +2553,9 @@ return TRUE; } + if (XEN() && (kt->xen_flags & WRITABLE_PAGE_TABLES)) + return (x86_kvtop_pae_xen_wpt(tc, kvaddr, paddr, verbose)); + pgd = (ulonglong *)vt->kernel_pgd[0]; if (verbose) @@ -2289,6 +2657,12 @@ return FALSE; } +static int +x86_kvtop_pae_xen_wpt(struct task_context *tc, ulong kvaddr, physaddr_t *paddr, int verbose) +{ + return (error(FATAL, "x86_kvtop_pae_xen_wpt: TBD\n")); +} + /* * Get the relevant page directory pointer from a task structure. */ @@ -2341,6 +2715,7 @@ x86_dump_machdep_table(ulong arg) { int others; + ulong xen_wpt; switch (arg) { default: @@ -2355,8 +2730,6 @@ fprintf(fp, "%sPAE", others++ ? "|" : ""); if (machdep->flags & OMIT_FRAME_PTR) fprintf(fp, "%sOMIT_FRAME_PTR", others++ ? "|" : ""); - if (machdep->flags & SYSRQ) - fprintf(fp, "%sSYSRQ", others++ ? "|" : ""); if (machdep->flags & FRAMESIZE_DEBUG) fprintf(fp, "%sFRAMESIZE_DEBUG", others++ ? "|" : ""); fprintf(fp, ")\n"); @@ -2376,12 +2749,17 @@ fprintf(fp, " eframe_search: x86_eframe_search()\n"); fprintf(fp, " back_trace: x86_back_trace_cmd()\n"); fprintf(fp, "get_processor_speed: x86_processor_speed()\n"); + xen_wpt = XEN() && (kt->xen_flags & WRITABLE_PAGE_TABLES); if (machdep->flags & PAE) { - fprintf(fp, " uvtop: x86_uvtop_pae()\n"); - fprintf(fp, " kvtop: x86_uvtop_pae()\n"); + fprintf(fp, " uvtop: %s()\n", + xen_wpt ? "x86_uvtop_pae_xen_wpt" : "x86_uvtop_pae"); + fprintf(fp, " kvtop: x86_kvtop_pae()%s\n", + xen_wpt ? " -> x86_kvtop_pae_xen_wpt()" : ""); } else { - fprintf(fp, " uvtop: x86_uvtop()\n"); - fprintf(fp, " kvtop: x86_uvtop()\n"); + fprintf(fp, " uvtop: %s()\n", + xen_wpt ? "x86_uvtop_xen_wpt" : "x86_uvtop"); + fprintf(fp, " kvtop: x86_kvtop()%s\n", + xen_wpt ? " -> x86_kvtop_xen_wpt()" : ""); } fprintf(fp, " get_task_pgd: x86_get_task_pgd()\n"); fprintf(fp, " dump_irq: generic_dump_irq()\n"); @@ -2399,7 +2777,7 @@ fprintf(fp, " is_kvaddr: generic_is_kvaddr()\n"); fprintf(fp, " is_uvaddr: generic_is_uvaddr()\n"); fprintf(fp, " verify_paddr: generic_verify_paddr()\n"); - fprintf(fp, " init_kernel_pgd: NULL\n"); + fprintf(fp, " init_kernel_pgd: x86_init_kernel_pgd()\n"); fprintf(fp, " value_to_symbol: %s\n", machdep->value_to_symbol == generic_machdep_value_to_symbol ? "generic_machdep_value_to_symbol()" : @@ -2412,6 +2790,12 @@ fprintf(fp, " pmd: %lx\n", (ulong)machdep->pmd); fprintf(fp, " ptbl: %lx\n", (ulong)machdep->ptbl); fprintf(fp, " ptrs_per_pgd: %d\n", machdep->ptrs_per_pgd); + fprintf(fp, " section_size_bits: %ld\n", machdep->section_size_bits); + fprintf(fp, " max_physmem_bits: %ld\n", machdep->max_physmem_bits); + fprintf(fp, " sections_per_root: %ld\n", machdep->sections_per_root); + fprintf(fp, " xendump_ptm_create: x86_xendump_ptm_create()\n"); + fprintf(fp, " xendump_panic_task: x86_xendump_panic_task()\n"); + fprintf(fp, " get_xendump_regs: x86_get_xendump_regs()\n"); fprintf(fp, " machspec: x86_machine_specific\n"); fprintf(fp, " idt_table: %lx\n", (ulong)machdep->machspec->idt_table); @@ -2732,6 +3116,9 @@ switch (flag) { case READ_IDT_INIT: + if (!symbol_exists("idt_table")) + return NULL; + if (!(idt = (ulong *)malloc(desc_struct_size))) { error(WARNING, "cannot malloc idt_table\n\n"); return NULL; @@ -2779,6 +3166,10 @@ break; case READ_IDT_RUNTIME: + if (!symbol_exists("idt_table")) + error(FATAL, + "idt_table does not exist on this architecture\n"); + idt = (ulong *)GETBUF(desc_struct_size); readmem(symbol_value("idt_table"), KVADDR, idt, desc_struct_size, "idt_table", FAULT_ON_ERROR); @@ -2969,6 +3360,16 @@ } } + if (XEN() && (count == 1) && symbol_exists("cpu_present_map")) { + ulong cpu_present_map; + + get_symbol_data("cpu_present_map", sizeof(ulong), + &cpu_present_map); + + cpucount = count_bits_long(cpu_present_map); + count = MAX(cpucount, kt->cpus); + } + return count; } @@ -3092,31 +3493,31 @@ * with the -fomit-frame-pointer flag. */ #define PUSH_BP_MOV_ESP_BP 0xe58955 +#define PUSH_BP_CLR_EAX_MOV_ESP_BP 0xe589c03155ULL static int x86_omit_frame_pointer(void) { - ulong push_bp_mov_esp_bp[3]; + ulonglong push_bp_mov_esp_bp; + int i; + char *checkfuncs[] = {"sys_open", "sys_fork", "sys_read"}; if (pc->flags & KERNEL_DEBUG_QUERY) return FALSE; - if (!readmem(symbol_value("sys_open"), KVADDR, &push_bp_mov_esp_bp[0], - sizeof(ulong), "x86_omit_frame_pointer", RETURN_ON_ERROR)) - return TRUE; - if (!readmem(symbol_value("sys_fork"), KVADDR, &push_bp_mov_esp_bp[1], - sizeof(ulong), "x86_omit_frame_pointer", RETURN_ON_ERROR)) - return TRUE; - if (!readmem(symbol_value("sys_read"), KVADDR, &push_bp_mov_esp_bp[2], - sizeof(ulong), "x86_omit_frame_pointer", RETURN_ON_ERROR)) - return TRUE; - - if (((push_bp_mov_esp_bp[0] & 0xffffff) == PUSH_BP_MOV_ESP_BP) && - ((push_bp_mov_esp_bp[1] & 0xffffff) == PUSH_BP_MOV_ESP_BP) && - ((push_bp_mov_esp_bp[2] & 0xffffff) == PUSH_BP_MOV_ESP_BP)) - return FALSE; + for (i = 0; i < 2; i++) { + if (!readmem(symbol_value(checkfuncs[i]), KVADDR, + &push_bp_mov_esp_bp, sizeof(ulonglong), + "x86_omit_frame_pointer", RETURN_ON_ERROR)) + return TRUE; + if (!(((push_bp_mov_esp_bp & 0x0000ffffffULL) == + PUSH_BP_MOV_ESP_BP) || + ((push_bp_mov_esp_bp & 0xffffffffffULL) == + PUSH_BP_CLR_EAX_MOV_ESP_BP))) + return TRUE; + } - return TRUE; + return FALSE; } /* @@ -3207,4 +3608,305 @@ return ((sp = value_search(value, offset))); } + +static void +x86_init_kernel_pgd(void) +{ + int i; + ulong value; + + value = symbol_value("swapper_pg_dir"); + + if (XEN()) + get_symbol_data("swapper_pg_dir", sizeof(ulong), &value); + else + value = symbol_value("swapper_pg_dir"); + + for (i = 0; i < NR_CPUS; i++) + vt->kernel_pgd[i] = value; + +} + +#include "xendump.h" + +/* + * Create an index of mfns for each page that makes up the + * kernel's complete phys_to_machine_mapping[max_pfn] array. + */ +static int +x86_xendump_ptm_create(struct xendump_data *xd) +{ + int i, idx; + ulong mfn, kvaddr, ctrlreg[8], ctrlreg_offset; + ulong *up; + off_t offset; + + if ((ctrlreg_offset = MEMBER_OFFSET("vcpu_guest_context", "ctrlreg")) == + INVALID_OFFSET) + error(FATAL, + "cannot determine vcpu_guest_context.ctrlreg offset\n"); + else if (CRASHDEBUG(1)) + fprintf(xd->ofp, + "MEMBER_OFFSET(vcpu_guest_context, ctrlreg): %ld\n", + ctrlreg_offset); + + offset = (off_t)xd->xc_core.header.xch_ctxt_offset + + (off_t)ctrlreg_offset; + + if (lseek(xd->xfd, offset, SEEK_SET) == -1) + error(FATAL, "cannot lseek to xch_ctxt_offset\n"); + + if (read(xd->xfd, &ctrlreg, sizeof(ctrlreg)) != + sizeof(ctrlreg)) + error(FATAL, "cannot read vcpu_guest_context ctrlreg[8]\n"); + + for (i = 0; CRASHDEBUG(1) && (i < 8); i++) { + fprintf(xd->ofp, "ctrlreg[%d]: %lx\n", i, ctrlreg[i]); + } + + mfn = ctrlreg[3] >> PAGESHIFT(); + + if (!xc_core_mfn_to_page(mfn, machdep->pgd)) + error(FATAL, "cannot read/find cr3 page\n"); + + if (CRASHDEBUG(1)) { + fprintf(xd->ofp, "contents of page directory page:\n"); + + up = (ulong *)machdep->pgd; + for (i = 0; i < 256; i++) { + fprintf(xd->ofp, "%08lx: %08lx %08lx %08lx %08lx\n", + (ulong)((i * 4) * sizeof(ulong)), + *up, *(up+1), *(up+2), *(up+3)); + up += 4; + } + } + + kvaddr = symbol_value("max_pfn"); + if (!x86_xendump_load_page(kvaddr, xd->page)) + return FALSE; + up = (ulong *)(xd->page + PAGEOFFSET(kvaddr)); + if (CRASHDEBUG(1)) + fprintf(xd->ofp, "max_pfn: %lx\n", *up); + + xd->xc_core.ptm_frames = *up/(PAGESIZE()/sizeof(ulong)); + + if ((xd->xc_core.ptm_frame_index_list = (ulong *) + malloc(xd->xc_core.ptm_frames * sizeof(int))) == NULL) + error(FATAL, "cannot malloc ptm_mfn_frame_list"); + + kvaddr = symbol_value("phys_to_machine_mapping"); + if (!x86_xendump_load_page(kvaddr, xd->page)) + return FALSE; + up = (ulong *)(xd->page + PAGEOFFSET(kvaddr)); + if (CRASHDEBUG(1)) + fprintf(fp, "phys_to_machine_mapping: %lx\n", *up); + + kvaddr = *up; + machdep->last_ptbl_read = BADADDR; + + for (i = 0; i < xd->xc_core.ptm_frames; i++) { + if ((idx = x86_xendump_page_index(kvaddr)) == MFN_NOT_FOUND) + return FALSE; + xd->xc_core.ptm_frame_index_list[i] = idx; + kvaddr += PAGESIZE(); + } + + machdep->last_ptbl_read = 0; + + return TRUE; +} + +/* + * Find the page associate with the kvaddr, and read its contents + * into the passed-in buffer. + */ +static char * +x86_xendump_load_page(ulong kvaddr, char *pgbuf) +{ + ulong *entry; + ulong *up; + ulong mfn; + + if (machdep->flags & PAE) + return x86_xendump_load_page_PAE(kvaddr, pgbuf); + + up = (ulong *)machdep->pgd; + entry = up + (kvaddr >> PGDIR_SHIFT); + mfn = (*entry) >> PAGESHIFT(); + + if (!xc_core_mfn_to_page(mfn, pgbuf)) { + error(INFO, "cannot read/find pgd entry from cr3 page\n"); + return NULL; + } + + up = (ulong *)pgbuf; + entry = up + ((kvaddr>>12) & (PTRS_PER_PTE-1)); + + mfn = (*entry) >> PAGESHIFT(); + + if (!xc_core_mfn_to_page(mfn, pgbuf)) { + error(INFO, "cannot read/find page table page\n"); + return NULL; + } + + return pgbuf; +} + +static char * +x86_xendump_load_page_PAE(ulong kvaddr, char *pgbuf) +{ + error(FATAL, "x86_xendump_load_page_PAE: TBD\n"); + return NULL; +} + +/* + * Find the dumpfile page index associated with the kvaddr. + */ +static int +x86_xendump_page_index(ulong kvaddr) +{ + int idx; + ulong *entry; + ulong *up; + ulong mfn; + char *pgbuf; + + pgbuf = machdep->ptbl; + + if (machdep->flags & PAE) + return x86_xendump_page_index_PAE(kvaddr); + + up = (ulong *)machdep->pgd; + entry = up + (kvaddr >> PGDIR_SHIFT); + mfn = (*entry) >> PAGESHIFT(); + + if ((mfn != machdep->last_ptbl_read) && + !xc_core_mfn_to_page(mfn, pgbuf)) { + error(INFO, "cannot read/find pgd entry from cr3 page\n"); + return MFN_NOT_FOUND; + } + + machdep->last_ptbl_read = mfn; + + up = (ulong *)pgbuf; + entry = up + ((kvaddr>>12) & (PTRS_PER_PTE-1)); + + mfn = (*entry) >> PAGESHIFT(); + + if ((idx = xc_core_mfn_to_page_index(mfn)) == MFN_NOT_FOUND) + error(INFO, "cannot determine page index for %lx\n", + kvaddr); + + return idx; +} + +static int +x86_xendump_page_index_PAE(ulong kvaddr) +{ + error(FATAL, "x86_xendump_load_index_PAE: TBD\n"); + return -1; +} + +/* + * Pull the esp from the cpu_user_regs struct in the header + * turn it into a task, and match it with the active_set. + * Unfortunately, the registers in the vcpu_guest_context + * are not necessarily those of the panic task, so for now + * let get_active_set_panic_task() get the right task. + */ +static ulong +x86_xendump_panic_task(struct xendump_data *xd) +{ + return NO_TASK; + +#ifdef TO_BE_REVISITED + int i; + ulong esp; + off_t offset; + ulong task; + + + if (INVALID_MEMBER(vcpu_guest_context_user_regs) || + INVALID_MEMBER(cpu_user_regs_esp)) + return NO_TASK; + + offset = (off_t)xd->xc_core.header.xch_ctxt_offset + + (off_t)OFFSET(vcpu_guest_context_user_regs) + + (off_t)OFFSET(cpu_user_regs_esp); + + if (lseek(xd->xfd, offset, SEEK_SET) == -1) + return NO_TASK; + + if (read(xd->xfd, &esp, sizeof(ulong)) != sizeof(ulong)) + return NO_TASK; + + if (IS_KVADDR(esp) && (task = stkptr_to_task(esp))) { + + for (i = 0; i < NR_CPUS; i++) { + if (task == tt->active_set[i]) { + if (CRASHDEBUG(0)) + error(INFO, + "x86_xendump_panic_task: esp: %lx -> task: %lx\n", + esp, task); + return task; + } + } + + error(WARNING, + "x86_xendump_panic_task: esp: %lx -> task: %lx (not active)\n", + esp); + } + + return NO_TASK; +#endif +} + +/* + * Because of an off-by-one vcpu bug in early xc_domain_dumpcore() + * instantiations, the registers in the vcpu_guest_context are not + * necessarily those of the panic task. If not, the eip/esp will be + * in stop_this_cpu, as a result of the IP interrupt in panic(), + * but the trace is strange because it comes out of the hypervisor + * at least if the vcpu had been idle. + */ +static void +x86_get_xendump_regs(struct xendump_data *xd, struct bt_info *bt, ulong *eip, ulong *esp) +{ + ulong task, xeip, xesp; + off_t offset; + + if (INVALID_MEMBER(vcpu_guest_context_user_regs) || + INVALID_MEMBER(cpu_user_regs_eip) || + INVALID_MEMBER(cpu_user_regs_esp)) + goto generic; + + offset = (off_t)xd->xc_core.header.xch_ctxt_offset + + (off_t)OFFSET(vcpu_guest_context_user_regs) + + (off_t)OFFSET(cpu_user_regs_esp); + if (lseek(xd->xfd, offset, SEEK_SET) == -1) + goto generic; + if (read(xd->xfd, &xesp, sizeof(ulong)) != sizeof(ulong)) + goto generic; + + offset = (off_t)xd->xc_core.header.xch_ctxt_offset + + (off_t)OFFSET(vcpu_guest_context_user_regs) + + (off_t)OFFSET(cpu_user_regs_eip); + if (lseek(xd->xfd, offset, SEEK_SET) == -1) + goto generic; + if (read(xd->xfd, &xeip, sizeof(ulong)) != sizeof(ulong)) + goto generic; + + if (IS_KVADDR(xesp) && (task = stkptr_to_task(xesp)) && + (task == bt->task)) { + if (CRASHDEBUG(1)) + fprintf(xd->ofp, + "hooks from vcpu_guest_context: eip: %lx esp: %lx\n", xeip, xesp); + *eip = xeip; + *esp = xesp; + return; + } + +generic: + return machdep->get_stack_frame(bt, eip, esp); +} #endif /* X86 */ --- crash/ppc.c.orig 2006-05-15 17:21:18.000000000 -0400 +++ crash/ppc.c 2006-04-11 15:22:52.000000000 -0400 @@ -138,6 +138,8 @@ machdep->hz = HZ; if (THIS_KERNEL_VERSION >= LINUX(2,6,0)) machdep->hz = 1000; + machdep->section_size_bits = _SECTION_SIZE_BITS; + machdep->max_physmem_bits = _MAX_PHYSMEM_BITS; break; case POST_INIT: @@ -154,8 +156,6 @@ fprintf(fp, " flags: %lx (", machdep->flags); if (machdep->flags & KSYMS_START) fprintf(fp, "%sKSYMS_START", others++ ? "|" : ""); - if (machdep->flags & SYSRQ) - fprintf(fp, "%sSYSRQ", others++ ? "|" : ""); fprintf(fp, ")\n"); fprintf(fp, " kvbase: %lx\n", machdep->kvbase); @@ -205,6 +205,9 @@ fprintf(fp, " pmd: %lx\n", (ulong)machdep->pmd); fprintf(fp, " ptbl: %lx\n", (ulong)machdep->ptbl); fprintf(fp, " ptrs_per_pgd: %d\n", machdep->ptrs_per_pgd); + fprintf(fp, " section_size_bits: %ld\n", machdep->section_size_bits); + fprintf(fp, " max_physmem_bits: %ld\n", machdep->max_physmem_bits); + fprintf(fp, " sections_per_root: %ld\n", machdep->sections_per_root); fprintf(fp, " machspec: %lx\n", (ulong)machdep->machspec); } --- crash/ia64.c.orig 2006-05-15 17:21:18.000000000 -0400 +++ crash/ia64.c 2006-04-26 15:56:33.000000000 -0400 @@ -1,8 +1,8 @@ /* ia64.c - core analysis suite * * Copyright (C) 1999, 2000, 2001, 2002 Mission Critical Linux, Inc. - * Copyright (C) 2002, 2003, 2004, 2005 David Anderson - * Copyright (C) 2002, 2003, 2004, 2005 Red Hat, Inc. All rights reserved. + * Copyright (C) 2002, 2003, 2004, 2005, 2006 David Anderson + * Copyright (C) 2002, 2003, 2004, 2005, 2006 Red Hat, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -92,6 +92,9 @@ case 16384: machdep->stacksize = (power(2, 1) * PAGESIZE()); break; + case 65536: + machdep->stacksize = (power(2, 0) * PAGESIZE()); + break; default: machdep->stacksize = 32*1024; break; @@ -401,8 +404,6 @@ fprintf(fp, "%sUNW_R0", others++ ? "|" : ""); if (machdep->flags & MEM_LIMIT) fprintf(fp, "%sMEM_LIMIT", others++ ? "|" : ""); - if (machdep->flags & SYSRQ) - fprintf(fp, "%sSYSRQ", others++ ? "|" : ""); if (machdep->flags & DEVMEMRD) fprintf(fp, "%sDEVMEMRD", others++ ? "|" : ""); if (machdep->flags & INIT) @@ -445,6 +446,7 @@ (machdep->verify_paddr == ia64_verify_paddr) ? "ia64_verify_paddr" : "generic_verify_paddr"); fprintf(fp, " init_kernel_pgd: NULL\n"); + fprintf(fp, " xendump_ptm_create: NULL\n"); fprintf(fp, " value_to_symbol: generic_machdep_value_to_symbol()\n"); fprintf(fp, " line_number_hooks: ia64_line_number_hooks\n"); fprintf(fp, " last_pgd_read: %lx\n", machdep->last_pgd_read); @@ -2605,6 +2607,7 @@ ia64_post_init(void) { struct machine_specific *ms; + struct gnu_request req; ms = &ia64_machine_specific; @@ -2677,9 +2680,10 @@ } } - if (symbol_exists("ia64_init_stack") && !ms->ia64_init_stack_size) - ms->ia64_init_stack_size = get_array_length("ia64_init_stack", - NULL, 0); + if (symbol_exists("ia64_init_stack") && !ms->ia64_init_stack_size) { + get_symbol_type("ia64_init_stack", NULL, &req); + ms->ia64_init_stack_size = req.length; + } if (DUMPFILE() && ia64_in_init_stack(SWITCH_STACK_ADDR(CURRENT_TASK()))) machdep->flags |= INIT; --- crash/s390.c.orig 2006-05-15 17:21:18.000000000 -0400 +++ crash/s390.c 2005-11-04 17:37:53.000000000 -0500 @@ -178,8 +178,6 @@ fprintf(fp, " flags: %lx (", machdep->flags); if (machdep->flags & KSYMS_START) fprintf(fp, "%sKSYMS_START", others++ ? "|" : ""); - if (machdep->flags & SYSRQ) - fprintf(fp, "%sSYSRQ", others++ ? "|" : ""); fprintf(fp, ")\n"); fprintf(fp, " kvbase: %lx\n", machdep->kvbase); --- crash/s390x.c.orig 2006-05-15 17:21:18.000000000 -0400 +++ crash/s390x.c 2005-11-04 17:37:53.000000000 -0500 @@ -193,8 +193,6 @@ fprintf(fp, " flags: %lx (", machdep->flags); if (machdep->flags & KSYMS_START) fprintf(fp, "%sKSYMS_START", others++ ? "|" : ""); - if (machdep->flags & SYSRQ) - fprintf(fp, "%sSYSRQ", others++ ? "|" : ""); fprintf(fp, ")\n"); fprintf(fp, " kvbase: %lx\n", machdep->kvbase); --- crash/ppc64.c.orig 2006-05-15 17:21:18.000000000 -0400 +++ crash/ppc64.c 2006-04-26 15:57:36.000000000 -0400 @@ -47,6 +47,9 @@ static char * ppc64_check_eframe(struct ppc64_pt_regs *); static void ppc64_print_eframe(char *, struct ppc64_pt_regs *, struct bt_info *); +static void parse_cmdline_arg(void); +static void ppc64_paca_init(void); +static void ppc64_clear_machdep_cache(void); struct machine_specific ppc64_machine_specific = { { 0 }, 0, 0 }; @@ -75,12 +78,18 @@ error(FATAL, "cannot malloc pmd space."); if ((machdep->ptbl = (char *)malloc(PAGESIZE())) == NULL) error(FATAL, "cannot malloc ptbl space."); + if ((machdep->machspec->level4 = (char *)malloc(PAGESIZE())) == NULL) + error(FATAL, "cannot malloc level4 space."); machdep->last_pgd_read = 0; machdep->last_pmd_read = 0; machdep->last_ptbl_read = 0; + machdep->machspec->last_level4_read = 0; machdep->verify_paddr = generic_verify_paddr; machdep->ptrs_per_pgd = PTRS_PER_PGD; machdep->flags |= MACHDEP_BT_TEXT; + if (machdep->cmdline_arg) + parse_cmdline_arg(); + machdep->clear_machdep_cache = ppc64_clear_machdep_cache; break; case PRE_GDB: @@ -109,6 +118,56 @@ break; case POST_GDB: + if (!(machdep->flags & (VM_ORIG|VM_4_LEVEL))) { + if (THIS_KERNEL_VERSION >= LINUX(2,6,14)) { + machdep->flags |= VM_4_LEVEL; + } else { + machdep->flags |= VM_ORIG; + } + } + if (machdep->flags & VM_ORIG) { + /* pre-2.6.14 layout */ + free(machdep->machspec->level4); + machdep->machspec->level4 = NULL; + machdep->ptrs_per_pgd = PTRS_PER_PGD; + } else { + /* 2.6.14 layout */ + struct machine_specific *m = machdep->machspec; + if (machdep->pagesize == 65536) { + /* 64K pagesize */ + m->l1_index_size = PTE_INDEX_SIZE_L4_64K; + m->l2_index_size = PMD_INDEX_SIZE_L4_64K; + m->l3_index_size = PUD_INDEX_SIZE_L4_64K; + m->l4_index_size = PGD_INDEX_SIZE_L4_64K; + m->pte_shift = PTE_SHIFT_L4_64K; + m->l2_masked_bits = PMD_MASKED_BITS_64K; + } else { + /* 4K pagesize */ + m->l1_index_size = PTE_INDEX_SIZE_L4_4K; + m->l2_index_size = PMD_INDEX_SIZE_L4_4K; + m->l3_index_size = PUD_INDEX_SIZE_L4_4K; + m->l4_index_size = PGD_INDEX_SIZE_L4_4K; + m->pte_shift = PTE_SHIFT_L4_4K; + m->l2_masked_bits = PMD_MASKED_BITS_4K; + } + + /* Compute ptrs per each level */ + m->l1_shift = machdep->pageshift; + m->ptrs_per_l1 = (1 << m->l1_index_size); + m->ptrs_per_l2 = (1 << m->l2_index_size); + m->ptrs_per_l3 = (1 << m->l3_index_size); + + machdep->ptrs_per_pgd = m->ptrs_per_l3; + + /* Compute shifts */ + m->l2_shift = m->l1_shift + m->l1_index_size; + m->l3_shift = m->l2_shift + m->l2_index_size; + m->l4_shift = m->l3_shift + m->l3_index_size; + } + + machdep->section_size_bits = _SECTION_SIZE_BITS; + machdep->max_physmem_bits = _MAX_PHYSMEM_BITS; + ppc64_paca_init(); machdep->vmalloc_start = ppc64_vmalloc_start; MEMBER_OFFSET_INIT(thread_struct_pg_tables, "thread_struct", "pg_tables"); @@ -229,10 +288,12 @@ fprintf(fp, " flags: %lx (", machdep->flags); if (machdep->flags & KSYMS_START) fprintf(fp, "%sKSYMS_START", others++ ? "|" : ""); - if (machdep->flags & SYSRQ) - fprintf(fp, "%sSYSRQ", others++ ? "|" : ""); if (machdep->flags & MACHDEP_BT_TEXT) fprintf(fp, "%sMACHDEP_BT_TEXT", others++ ? "|" : ""); + if (machdep->flags & VM_ORIG) + fprintf(fp, "%sVM_ORIG", others++ ? "|" : ""); + if (machdep->flags & VM_4_LEVEL) + fprintf(fp, "%sVM_4_LEVEL", others++ ? "|" : ""); fprintf(fp, ")\n"); fprintf(fp, " kvbase: %lx\n", machdep->kvbase); @@ -269,15 +330,24 @@ fprintf(fp, " is_kvaddr: generic_is_kvaddr()\n"); fprintf(fp, " is_uvaddr: generic_is_uvaddr()\n"); fprintf(fp, " verify_paddr: generic_verify_paddr()\n"); + fprintf(fp, " xendump_ptm_create: NULL\n"); fprintf(fp, " line_number_hooks: ppc64_line_number_hooks\n"); fprintf(fp, " last_pgd_read: %lx\n", machdep->last_pgd_read); fprintf(fp, " last_pmd_read: %lx\n", machdep->last_pmd_read); fprintf(fp, " last_ptbl_read: %lx\n", machdep->last_ptbl_read); + fprintf(fp, "clear_machdep_cache: ppc64_clear_machdep_cache()\n"); fprintf(fp, " pgd: %lx\n", (ulong)machdep->pgd); fprintf(fp, " pmd: %lx\n", (ulong)machdep->pmd); fprintf(fp, " ptbl: %lx\n", (ulong)machdep->ptbl); fprintf(fp, " ptrs_per_pgd: %d\n", machdep->ptrs_per_pgd); + fprintf(fp, " section_size_bits: %ld\n", machdep->section_size_bits); + fprintf(fp, " max_physmem_bits: %ld\n", machdep->max_physmem_bits); + fprintf(fp, " sections_per_root: %ld\n", machdep->sections_per_root); fprintf(fp, " machspec: %lx\n", (ulong)machdep->machspec); + fprintf(fp, " pgd_index_size: %d\n", machdep->machspec->l4_index_size); + fprintf(fp, " pud_index_size: %d\n", machdep->machspec->l3_index_size); + fprintf(fp, " pmd_index_size: %d\n", machdep->machspec->l2_index_size); + fprintf(fp, " pte_index_size: %d\n", machdep->machspec->l1_index_size); } /* @@ -342,7 +412,7 @@ if (!(pte & _PAGE_PRESENT)) { if (pte && verbose) { fprintf(fp, "\n"); - ppc64_translate_pte(pte, 0, 0); + ppc64_translate_pte(pte, 0, PTE_SHIFT); } return FALSE; } @@ -354,7 +424,90 @@ if (verbose) { fprintf(fp, " PAGE: %lx\n\n", PAGEBASE(*paddr)); - ppc64_translate_pte(pte, 0, 0); + ppc64_translate_pte(pte, 0, PTE_SHIFT); + } + + return TRUE; +} + +/* + * Virtual to physical memory translation. This function will be called + * by both ppc64_kvtop and ppc64_uvtop. + */ +static int +ppc64_vtop_level4(ulong vaddr, ulong *level4, physaddr_t *paddr, int verbose) +{ + ulong *level4_dir; + ulong *page_dir; + ulong *page_middle; + ulong *page_table; + ulong level4_pte, pgd_pte, pmd_pte; + ulong pte; + + if (verbose) + fprintf(fp, "PAGE DIRECTORY: %lx\n", (ulong)level4); + + level4_dir = (ulong *)((ulong *)level4 + L4_OFFSET(vaddr)); + FILL_L4(PAGEBASE(level4), KVADDR, PAGESIZE()); + level4_pte = ULONG(machdep->machspec->level4 + PAGEOFFSET(level4_dir)); + if (verbose) + fprintf(fp, " L4: %lx => %lx\n", (ulong)level4_dir, level4_pte); + if (!level4_pte) + return FALSE; + + /* Sometimes we don't have level3 pagetable entries */ + if (machdep->machspec->l3_index_size != 0) { + page_dir = (ulong *)((ulong *)level4_pte + PGD_OFFSET_L4(vaddr)); + FILL_PGD(PAGEBASE(level4_pte), KVADDR, PAGESIZE()); + pgd_pte = ULONG(machdep->pgd + PAGEOFFSET(page_dir)); + + if (verbose) + fprintf(fp, " PGD: %lx => %lx\n", (ulong)page_dir, pgd_pte); + if (!pgd_pte) + return FALSE; + } else { + pgd_pte = level4_pte; + } + + page_middle = (ulong *)((ulong *)pgd_pte + PMD_OFFSET_L4(vaddr)); + FILL_PMD(PAGEBASE(pgd_pte), KVADDR, PAGESIZE()); + pmd_pte = ULONG(machdep->pmd + PAGEOFFSET(page_middle)); + + if (verbose) + fprintf(fp, " PMD: %lx => %lx\n", (ulong)page_middle, pmd_pte); + + if (!(pmd_pte)) + return FALSE; + + page_table = (ulong *)(pmd_pte & ~(machdep->machspec->l2_masked_bits)) + + (BTOP(vaddr) & (machdep->machspec->ptrs_per_l1 - 1)); + if (verbose) + fprintf(fp, " PMD: %lx => %lx\n",(ulong)page_middle, + (ulong)page_table); + + FILL_PTBL(PAGEBASE(pmd_pte), KVADDR, PAGESIZE()); + pte = ULONG(machdep->ptbl + PAGEOFFSET(page_table)); + + if (verbose) + fprintf(fp, " PTE: %lx => %lx\n", (ulong)page_table, pte); + + if (!(pte & _PAGE_PRESENT)) { + if (pte && verbose) { + fprintf(fp, "\n"); + ppc64_translate_pte(pte, 0, machdep->machspec->pte_shift); + } + return FALSE; + } + + if (!pte) + return FALSE; + + *paddr = PAGEBASE(PTOB(pte >> machdep->machspec->pte_shift)) + + PAGEOFFSET(vaddr); + + if (verbose) { + fprintf(fp, " PAGE: %lx\n\n", PAGEBASE(*paddr)); + ppc64_translate_pte(pte, 0, machdep->machspec->pte_shift); } return TRUE; @@ -411,7 +564,10 @@ FAULT_ON_ERROR); } - return ppc64_vtop(vaddr, pgd, paddr, verbose); + if (machdep->flags & VM_4_LEVEL) + return ppc64_vtop_level4(vaddr, pgd, paddr, verbose); + else + return ppc64_vtop(vaddr, pgd, paddr, verbose); } /* @@ -436,7 +592,10 @@ return TRUE; } - return ppc64_vtop(kvaddr, (ulong *)vt->kernel_pgd[0], paddr, verbose); + if (machdep->flags & VM_4_LEVEL) + return ppc64_vtop_level4(kvaddr, (ulong *)vt->kernel_pgd[0], paddr, verbose); + else + return ppc64_vtop(kvaddr, (ulong *)vt->kernel_pgd[0], paddr, verbose); } /* @@ -657,7 +816,7 @@ * If a physaddr pointer is passed in, don't print anything. */ static int -ppc64_translate_pte(ulong pte, void *physaddr, ulonglong unused) +ppc64_translate_pte(ulong pte, void *physaddr, ulonglong pte_shift) { int c, len1, len2, len3, others, page_present; char buf[BUFSIZE]; @@ -668,7 +827,7 @@ char *arglist[MAXARGS]; ulong paddr; - paddr = PTOB(pte >> PTE_SHIFT); + paddr = PTOB(pte >> pte_shift); page_present = (pte & _PAGE_PRESENT); if (physaddr) { @@ -1034,8 +1193,12 @@ ms->hwstacksize + STACK_FRAME_OVERHEAD; bt->stackbuf = ms->hwstackbuf; alter_stackbuf(bt); - } else - error(FATAL, "cannot find the stack info"); + } else { + if (CRASHDEBUG(1)) { + fprintf(fp, "cannot find the stack info.\n"); + } + return; + } } @@ -1270,20 +1433,11 @@ return NULL; } -/* - * Print exception frame information for ppc64 - */ static void -ppc64_print_eframe(char *efrm_str, struct ppc64_pt_regs *regs, - struct bt_info *bt) +ppc64_print_regs(struct ppc64_pt_regs *regs) { int i; - if (BT_REFERENCE_CHECK(bt)) - return; - - fprintf(fp, " %s [%lx] exception frame:", efrm_str, regs->trap); - /* print out the gprs... */ for(i=0; i<32; i++) { if(!(i % 3)) @@ -1315,9 +1469,78 @@ fprintf(fp, "DAR: %016lx\n", regs->dar); fprintf(fp, " DSISR: %016lx ", regs->dsisr); fprintf(fp, " Syscall Result: %016lx\n", regs->result); +} + +/* + * Print the exception frame information + */ +static void +ppc64_print_eframe(char *efrm_str, struct ppc64_pt_regs *regs, + struct bt_info *bt) +{ + if (BT_REFERENCE_CHECK(bt)) + return; + + fprintf(fp, " %s [%lx] exception frame:", efrm_str, regs->trap); + ppc64_print_regs(regs); fprintf(fp, "\n"); } +/* + * get SP and IP from the saved ptregs. + */ +static int +ppc64_kdump_stack_frame(struct bt_info *bt_in, ulong *nip, ulong *ksp) +{ + struct ppc64_pt_regs *pt_regs; + unsigned long unip; + + pt_regs = (struct ppc64_pt_regs *)bt_in->machdep; + if (!pt_regs->gpr[1]) { + /* + * Not collected regs. May be the corresponding CPU not + * responded to an IPI. + */ + fprintf(fp, "%0lx: GPR1 register value (SP) was not saved\n", + bt_in->task); + return FALSE; + } + *ksp = pt_regs->gpr[1]; + if (IS_KVADDR(*ksp)) { + readmem(*ksp+16, KVADDR, &unip, sizeof(ulong), "Regs NIP value", + FAULT_ON_ERROR); + *nip = unip; + } else { + if (IN_TASK_VMA(bt_in->task, *ksp)) + fprintf(fp, "%0lx: Task is running in user space\n", + bt_in->task); + else + fprintf(fp, "%0lx: Invalid Stack Pointer %0lx\n", + bt_in->task, *ksp); + *nip = pt_regs->nip; + } + + if (bt_in->flags && + ((BT_TEXT_SYMBOLS|BT_TEXT_SYMBOLS_PRINT|BT_TEXT_SYMBOLS_NOPRINT))) + return TRUE; + + /* + * Print the collected regs for the active task + */ + ppc64_print_regs(pt_regs); + if (!IS_KVADDR(*ksp)) + return FALSE; + + fprintf(fp, " NIP [%016lx] %s\n", pt_regs->nip, + closest_symbol(pt_regs->nip)); + if (unip != pt_regs->link) + fprintf(fp, " LR [%016lx] %s\n", pt_regs->link, + closest_symbol(pt_regs->link)); + + fprintf(fp, "\n"); + + return TRUE; +} /* * Get the starting point for the active cpus in a diskdump/netdump. @@ -1335,12 +1558,18 @@ ulong ur_ksp = 0; int check_hardirq, check_softirq; int check_intrstack = TRUE; + struct ppc64_pt_regs *pt_regs; + + /* + * For the kdump vmcore, Use SP and IP values that are saved in ptregs. + */ + if (pc->flags & KDUMP) + return ppc64_kdump_stack_frame(bt_in, nip, ksp); bt = &bt_local; BCOPY(bt_in, bt, sizeof(struct bt_info)); ms = machdep->machspec; ur_nip = ur_ksp = 0; - struct ppc64_pt_regs *pt_regs; panic_task = tt->panic_task == bt->task ? TRUE : FALSE; @@ -1424,6 +1653,7 @@ if (STREQ(sym, ".netconsole_netdump") || STREQ(sym, ".netpoll_start_netdump") || STREQ(sym, ".start_disk_dump") || + STREQ(sym, ".crash_kexec") || STREQ(sym, ".disk_dump")) { *nip = *up; *ksp = bt->stackbase + @@ -2000,4 +2230,145 @@ ppc64_dump_line_number(0); } +/* + * Force the VM address-range selection via: + * + * --machdep vm=orig + * --machdep vm=2.6.14 + */ + +void +parse_cmdline_arg(void) +{ + int i, c, errflag; + char *p; + char buf[BUFSIZE]; + char *arglist[MAXARGS]; + int lines = 0; + + if (!strstr(machdep->cmdline_arg, "=")) { + error(WARNING, "ignoring --machdep option: %s\n\n", + machdep->cmdline_arg); + return; + } + + strcpy(buf, machdep->cmdline_arg); + + for (p = buf; *p; p++) { + if (*p == ',') + *p = ' '; + } + + c = parse_line(buf, arglist); + + for (i = 0; i < c; i++) { + errflag = 0; + + if (STRNEQ(arglist[i], "vm=")) { + p = arglist[i] + strlen("vm="); + if (strlen(p)) { + if (STREQ(p, "orig")) { + machdep->flags |= VM_ORIG; + continue; + } else if (STREQ(p, "2.6.14")) { + machdep->flags |= VM_4_LEVEL; + continue; + } + } + } + + error(WARNING, "ignoring --machdep option: %s\n", arglist[i]); + lines++; + } + + switch (machdep->flags & (VM_ORIG|VM_4_LEVEL)) + { + case VM_ORIG: + error(NOTE, "using original PPC64 VM address ranges\n"); + lines++; + break; + + case VM_4_LEVEL: + error(NOTE, "using 4-level pagetable PPC64 VM address ranges\n"); + lines++; + break; + + case (VM_ORIG|VM_4_LEVEL): + error(WARNING, "cannot set both vm=orig and vm=2.6.14\n"); + lines++; + machdep->flags &= ~(VM_ORIG|VM_4_LEVEL); + break; + } + + if (lines) + fprintf(fp, "\n"); +} + +/* + * Updating any smp-related items that were possibly bypassed + * or improperly initialized in kernel_init(). + */ +static void +ppc64_paca_init(void) +{ +#define BITS_FOR_LONG sizeof(ulong)*8 + int i, cpus, nr_paca; + char *cpu_paca_buf; + ulong data_offset; + ulong cpu_online_map[NR_CPUS/BITS_FOR_LONG]; + + if (!symbol_exists("paca")) + error(FATAL, "PPC64: Could not find 'paca' symbol\n"); + + if (!symbol_exists("cpu_online_map")) + error(FATAL, "PPC64: Could not find 'cpu_online_map' symbol\n"); + + if (!MEMBER_EXISTS("paca_struct", "data_offset")) + return; + + STRUCT_SIZE_INIT(ppc64_paca, "paca_struct"); + data_offset = MEMBER_OFFSET("paca_struct", "data_offset"); + + cpu_paca_buf = GETBUF(SIZE(ppc64_paca)); + + if (!(nr_paca = get_array_length("paca", NULL, 0))) + nr_paca = NR_CPUS; + + if (nr_paca > NR_CPUS) { + error(WARNING, + "PPC64: Number of paca entries (%d) greater than NR_CPUS (%d)\n", + nr_paca, NR_CPUS); + error(FATAL, "Recompile crash with larger NR_CPUS\n"); + } + + readmem(symbol_value("cpu_online_map"), KVADDR, &cpu_online_map[0], + nr_paca/8, "cpu_online_map", FAULT_ON_ERROR); + + for (i = cpus = 0; i < nr_paca; i++) { + div_t val = div(i, BITS_FOR_LONG); + /* + * CPU online? + */ + if (!(cpu_online_map[val.quot] & (0x1UL << val.rem))) + continue; + + readmem(symbol_value("paca") + (i * SIZE(ppc64_paca)), + KVADDR, cpu_paca_buf, SIZE(ppc64_paca), + "paca entry", FAULT_ON_ERROR); + + kt->__per_cpu_offset[i] = ULONG(cpu_paca_buf + data_offset); + kt->flags |= PER_CPU_OFF; + cpus++; + } + kt->cpus = cpus; + if (kt->cpus > 1) + kt->flags |= SMP; +} + +void +ppc64_clear_machdep_cache(void) +{ + if (machdep->machspec->last_level4_read != vt->kernel_pgd[0]) + machdep->machspec->last_level4_read = 0; +} #endif /* PPC64 */ --- crash/x86_64.c.orig 2006-05-15 17:21:18.000000000 -0400 +++ crash/x86_64.c 2006-04-26 16:12:41.000000000 -0400 @@ -18,7 +18,10 @@ #ifdef X86_64 static int x86_64_kvtop(struct task_context *, ulong, physaddr_t *, int); +static int x86_64_kvtop_xen_wpt(struct task_context *, ulong, physaddr_t *, int); static int x86_64_uvtop(struct task_context *, ulong, physaddr_t *, int); +static int x86_64_uvtop_level4(struct task_context *, ulong, physaddr_t *, int); +static int x86_64_uvtop_level4_xen_wpt(struct task_context *, ulong, physaddr_t *, int); static ulong x86_64_vmalloc_start(void); static int x86_64_is_task_addr(ulong); static int x86_64_verify_symbol(const char *, ulong, char); @@ -32,6 +35,7 @@ #define EFRAME_VERIFY (0x2) #define EFRAME_CS (0x4) #define EFRAME_SEARCH (0x8) +static int x86_64_print_eframe_location(ulong, int, FILE *); static void x86_64_back_trace_cmd(struct bt_info *); static ulong x86_64_in_exception_stack(struct bt_info *); static ulong x86_64_in_irqstack(struct bt_info *); @@ -56,6 +60,7 @@ static void x86_64_display_memmap(void); static void x86_64_dump_line_number(ulong); static struct line_number_hook x86_64_line_number_hooks[]; +static int x86_64_is_module_addr(ulong); static int x86_64_is_kvaddr(ulong); static int x86_64_is_uvaddr(ulong, struct task_context *); void x86_64_compiler_warning_stub(void); @@ -63,7 +68,14 @@ static void x86_64_cpu_pda_init(void); static void x86_64_ist_init(void); static void x86_64_post_init(void); - +static void parse_cmdline_arg(void); +static void x86_64_clear_machdep_cache(void); +static int x86_64_xendump_ptm_create(struct xendump_data *); +static char *x86_64_xendump_load_page(ulong, struct xendump_data *); +static int x86_64_xendump_page_index(ulong, struct xendump_data *); +static void x86_64_debug_dump_page(FILE *, char *, char *); +static void x86_64_get_xendump_regs(struct xendump_data *, struct bt_info *, ulong *, ulong *); +static ulong x86_64_xendump_panic_task(struct xendump_data *); struct machine_specific x86_64_machine_specific = { 0 }; @@ -86,6 +98,8 @@ machdep->pageoffset = machdep->pagesize - 1; machdep->pagemask = ~((ulonglong)machdep->pageoffset); machdep->stacksize = machdep->pagesize * 2; + if ((machdep->machspec->upml = (char *)malloc(PAGESIZE())) == NULL) + error(FATAL, "cannot malloc upml space."); if ((machdep->pgd = (char *)malloc(PAGESIZE())) == NULL) error(FATAL, "cannot malloc pgd space."); if ((machdep->pmd = (char *)malloc(PAGESIZE())) == NULL) @@ -93,17 +107,69 @@ if ((machdep->ptbl = (char *)malloc(PAGESIZE())) == NULL) error(FATAL, "cannot malloc ptbl space."); if ((machdep->machspec->pml4 = - (char *)malloc(PAGESIZE())) == NULL) + (char *)malloc(PAGESIZE()*2)) == NULL) error(FATAL, "cannot malloc pml4 space."); + machdep->machspec->last_upml_read = 0; + machdep->machspec->last_pml4_read = 0; machdep->last_pgd_read = 0; machdep->last_pmd_read = 0; machdep->last_ptbl_read = 0; machdep->verify_paddr = generic_verify_paddr; machdep->ptrs_per_pgd = PTRS_PER_PGD; machdep->flags |= MACHDEP_BT_TEXT; + if (machdep->cmdline_arg) + parse_cmdline_arg(); break; case PRE_GDB: + if (!(machdep->flags & (VM_ORIG|VM_2_6_11|VM_XEN))) { + if (symbol_exists("boot_vmalloc_pgt")) + machdep->flags |= VM_ORIG; + else if (symbol_exists("xen_start_info")) + machdep->flags |= VM_XEN; + else + machdep->flags |= VM_2_6_11; + } + + switch (machdep->flags & (VM_ORIG|VM_2_6_11|VM_XEN)) + { + case VM_ORIG: + /* pre-2.6.11 layout */ + machdep->machspec->userspace_top = USERSPACE_TOP_ORIG; + machdep->machspec->page_offset = PAGE_OFFSET_ORIG; + machdep->machspec->vmalloc_start_addr = VMALLOC_START_ADDR_ORIG; + machdep->machspec->vmalloc_end = VMALLOC_END_ORIG; + machdep->machspec->modules_vaddr = MODULES_VADDR_ORIG; + machdep->machspec->modules_end = MODULES_END_ORIG; + + free(machdep->machspec->upml); + machdep->machspec->upml = NULL; + + machdep->uvtop = x86_64_uvtop; + break; + + case VM_2_6_11: + /* 2.6.11 layout */ + machdep->machspec->userspace_top = USERSPACE_TOP_2_6_11; + machdep->machspec->page_offset = PAGE_OFFSET_2_6_11; + machdep->machspec->vmalloc_start_addr = VMALLOC_START_ADDR_2_6_11; + machdep->machspec->vmalloc_end = VMALLOC_END_2_6_11; + machdep->machspec->modules_vaddr = MODULES_VADDR_2_6_11; + machdep->machspec->modules_end = MODULES_END_2_6_11; + + machdep->uvtop = x86_64_uvtop_level4; + break; + + case VM_XEN: + /* Xen layout */ + machdep->machspec->userspace_top = USERSPACE_TOP_XEN; + machdep->machspec->page_offset = PAGE_OFFSET_XEN; + machdep->machspec->vmalloc_start_addr = VMALLOC_START_ADDR_XEN; + machdep->machspec->vmalloc_end = VMALLOC_END_XEN; + machdep->machspec->modules_vaddr = MODULES_VADDR_XEN; + machdep->machspec->modules_end = MODULES_END_XEN; + break; + } machdep->kvbase = (ulong)PAGE_OFFSET; machdep->identity_map_base = (ulong)PAGE_OFFSET; machdep->is_kvaddr = x86_64_is_kvaddr; @@ -111,7 +177,6 @@ machdep->eframe_search = x86_64_eframe_search; machdep->back_trace = x86_64_low_budget_back_trace_cmd; machdep->processor_speed = x86_64_processor_speed; - machdep->uvtop = x86_64_uvtop; machdep->kvtop = x86_64_kvtop; machdep->get_task_pgd = x86_64_get_task_pgd; machdep->get_stack_frame = x86_64_get_stack_frame; @@ -126,6 +191,10 @@ machdep->line_number_hooks = x86_64_line_number_hooks; machdep->value_to_symbol = generic_machdep_value_to_symbol; machdep->init_kernel_pgd = x86_64_init_kernel_pgd; + machdep->clear_machdep_cache = x86_64_clear_machdep_cache; + machdep->xendump_ptm_create = x86_64_xendump_ptm_create; + machdep->get_xendump_regs = x86_64_get_xendump_regs; + machdep->xendump_panic_task = x86_64_xendump_panic_task; break; case POST_GDB: @@ -168,6 +237,20 @@ machdep->hz = HZ; if (THIS_KERNEL_VERSION >= LINUX(2,6,0)) machdep->hz = 1000; + machdep->section_size_bits = _SECTION_SIZE_BITS; + machdep->max_physmem_bits = _MAX_PHYSMEM_BITS; + if (XEN() && (kt->xen_flags & WRITABLE_PAGE_TABLES)) + machdep->uvtop = x86_64_uvtop_level4_xen_wpt; + else + machdep->uvtop = x86_64_uvtop_level4; + if (XEN()) { + MEMBER_OFFSET_INIT(vcpu_guest_context_user_regs, + "vcpu_guest_context", "user_regs"); + ASSIGN_OFFSET(cpu_user_regs_rsp) = + MEMBER_OFFSET("cpu_user_regs", "ss") - sizeof(ulong); + ASSIGN_OFFSET(cpu_user_regs_rip) = + MEMBER_OFFSET("cpu_user_regs", "cs") - sizeof(ulong); + } break; case POST_INIT: @@ -191,10 +274,18 @@ fprintf(fp, "%sKSYMS_START", others++ ? "|" : ""); if (machdep->flags & PT_REGS_INIT) fprintf(fp, "%sPT_REGS_INIT", others++ ? "|" : ""); - if (machdep->flags & SYSRQ) - fprintf(fp, "%sSYSRQ", others++ ? "|" : ""); if (machdep->flags & MACHDEP_BT_TEXT) fprintf(fp, "%sMACHDEP_BT_TEXT", others++ ? "|" : ""); + if (machdep->flags & VM_ORIG) + fprintf(fp, "%sVM_ORIG", others++ ? "|" : ""); + if (machdep->flags & VM_2_6_11) + fprintf(fp, "%sVM_2_6_11", others++ ? "|" : ""); + if (machdep->flags & VM_XEN) + fprintf(fp, "%sVM_XEN", others++ ? "|" : ""); + if (machdep->flags & NO_TSS) + fprintf(fp, "%sNO_TSS", others++ ? "|" : ""); + if (machdep->flags & SCHED_TEXT) + fprintf(fp, "%sSCHED_TEXT", others++ ? "|" : ""); fprintf(fp, ")\n"); fprintf(fp, " kvbase: %lx\n", machdep->kvbase); @@ -220,7 +311,14 @@ fprintf(fp, " back_trace: %lx\n", (ulong)machdep->back_trace); fprintf(fp, " processor_speed: x86_64_processor_speed()\n"); - fprintf(fp, " uvtop: x86_64_uvtop()\n"); + if (machdep->uvtop == x86_64_uvtop) + fprintf(fp, " uvtop: x86_64_uvtop()\n"); + else if (machdep->uvtop == x86_64_uvtop_level4) + fprintf(fp, " uvtop: x86_64_uvtop_level4()\n"); + else if (machdep->uvtop == x86_64_uvtop_level4_xen_wpt) + fprintf(fp, " uvtop: x86_64_uvtop_level4_xen_wpt()\n"); + else + fprintf(fp, " uvtop: %lx\n", (ulong)machdep->uvtop); fprintf(fp, " kvtop: x86_64_kvtop()\n"); fprintf(fp, " get_task_pgd: x86_64_get_task_pgd()\n"); fprintf(fp, " dump_irq: x86_64_dump_irq()\n"); @@ -239,6 +337,10 @@ fprintf(fp, " is_uvaddr: x86_64_is_uvaddr()\n"); fprintf(fp, " verify_paddr: generic_verify_paddr()\n"); fprintf(fp, " init_kernel_pgd: x86_64_init_kernel_pgd()\n"); + fprintf(fp, "clear_machdep_cache: x86_64_clear_machdep_cache()\n"); + fprintf(fp, " xendump_ptm_create: x86_64_xendump_ptm_create()\n"); + fprintf(fp, " get_xendump_regs: x86_64_get_xendump_regs()\n"); + fprintf(fp, " xendump_panic_task: x86_64_xendump_panic_task()\n"); fprintf(fp, " line_number_hooks: x86_64_line_number_hooks\n"); fprintf(fp, " value_to_symbol: generic_machdep_value_to_symbol()\n"); fprintf(fp, " last_pgd_read: %lx\n", machdep->last_pgd_read); @@ -248,8 +350,26 @@ fprintf(fp, " pmd: %lx\n", (ulong)machdep->pmd); fprintf(fp, " ptbl: %lx\n", (ulong)machdep->ptbl); fprintf(fp, " ptrs_per_pgd: %d\n", machdep->ptrs_per_pgd); - fprintf(fp, " machspec: %lx\n", (ulong)machdep->machspec); + fprintf(fp, " section_size_bits: %ld\n", machdep->section_size_bits); + fprintf(fp, " max_physmem_bits: %ld\n", machdep->max_physmem_bits); + fprintf(fp, " sections_per_root: %ld\n", machdep->sections_per_root); + + fprintf(fp, " machspec: %016lx\n", (ulong)machdep->machspec); + fprintf(fp, " userspace_top: %016lx\n", (ulong)ms->userspace_top); + fprintf(fp, " page_offset: %016lx\n", (ulong)ms->page_offset); + fprintf(fp, " vmalloc_start_addr: %016lx\n", (ulong)ms->vmalloc_start_addr); + fprintf(fp, " vmalloc_end: %016lx\n", (ulong)ms->vmalloc_end); + fprintf(fp, " modules_vaddr: %016lx\n", (ulong)ms->modules_vaddr); + fprintf(fp, " modules_end: %016lx\n", (ulong)ms->modules_end); fprintf(fp, " pml4: %lx\n", (ulong)ms->pml4); + fprintf(fp, " last_pml4_read: %lx\n", (ulong)ms->last_pml4_read); + if (ms->upml) { + fprintf(fp, " upml: %lx\n", (ulong)ms->upml); + fprintf(fp, " last_upml_read: %lx\n", (ulong)ms->last_upml_read); + } else { + fprintf(fp, " upml: (unused)\n"); + fprintf(fp, " last_upml_read: (unused)\n"); + } fprintf(fp, " irqstack: %lx\n", (ulong)ms->irqstack); fprintf(fp, " pto: %s", machdep->flags & PT_REGS_INIT ? "\n" : "(uninitialized)\n"); @@ -276,8 +396,10 @@ fprintf(fp, " rsp: %ld\n", ms->pto.rsp); fprintf(fp, " ss: %ld\n", ms->pto.ss); } - fprintf(fp, " stkinfo: esize: %d isize: %d\n", - ms->stkinfo.esize, ms->stkinfo.isize); + fprintf(fp, " stkinfo: esize: %d%sisize: %d\n", + ms->stkinfo.esize, + machdep->flags & NO_TSS ? " (NO TSS) " : " ", + ms->stkinfo.isize); fprintf(fp, " ebase[%s][7]:", arg ? "NR_CPUS" : "cpus"); cpus = arg ? NR_CPUS : kt->cpus; @@ -306,9 +428,9 @@ static void x86_64_cpu_pda_init(void) { - int i, cpus, nr_pda, cpunumber; + int i, cpus, nr_pda, cpunumber, _cpu_pda; char *cpu_pda_buf; - ulong level4_pgt, data_offset; + ulong level4_pgt, data_offset, cpu_pda_addr; struct syment *sp, *nsp; ulong offset, istacksize; @@ -320,18 +442,36 @@ MEMBER_OFFSET_INIT(x8664_pda_irqstackptr, "x8664_pda", "irqstackptr"); MEMBER_OFFSET_INIT(x8664_pda_level4_pgt, "x8664_pda", "level4_pgt"); MEMBER_OFFSET_INIT(x8664_pda_cpunumber, "x8664_pda", "cpunumber"); + MEMBER_OFFSET_INIT(x8664_pda_me, "x8664_pda", "me"); cpu_pda_buf = GETBUF(SIZE(x8664_pda)); - if (!(nr_pda = get_array_length("cpu_pda", NULL, 0))) - nr_pda = NR_CPUS; + if (symbol_exists("_cpu_pda")) { + if (!(nr_pda = get_array_length("_cpu_pda", NULL, 0))) + nr_pda = NR_CPUS; + _cpu_pda = TRUE; + } else { + if (!(nr_pda = get_array_length("cpu_pda", NULL, 0))) + nr_pda = NR_CPUS; + _cpu_pda = FALSE; + } for (i = cpus = 0; i < nr_pda; i++) { - if (!CPU_PDA_READ(i, cpu_pda_buf)) - break; - level4_pgt = ULONG(cpu_pda_buf + OFFSET(x8664_pda_level4_pgt)); + if (_cpu_pda) { + if (!_CPU_PDA_READ(i, cpu_pda_buf)) + break; + } else { + if (!CPU_PDA_READ(i, cpu_pda_buf)) + break; + } + + if (VALID_MEMBER(x8664_pda_level4_pgt)) { + level4_pgt = ULONG(cpu_pda_buf + OFFSET(x8664_pda_level4_pgt)); + if (!VALID_LEVEL4_PGT_ADDR(level4_pgt)) + break; + } cpunumber = INT(cpu_pda_buf + OFFSET(x8664_pda_cpunumber)); - if (!VALID_LEVEL4_PGT_ADDR(level4_pgt) || (cpunumber != cpus)) + if (cpunumber != cpus) break; cpus++; @@ -448,6 +588,13 @@ if (ms->stkinfo.ebase[c][0] == 0) break; } + } else if (!symbol_exists("boot_exception_stacks")) { + machdep->flags |= NO_TSS; + + if (CRASHDEBUG(1)) + error(NOTE, "CONFIG_X86_NO_TSS\n"); + + return; } if (ms->stkinfo.ebase[0][0] && ms->stkinfo.ebase[0][1]) @@ -535,6 +682,10 @@ if (clues >= 2) kt->cpu_flags[c] |= NMI; } + + if (symbol_exists("__sched_text_start") && + (symbol_value("__sched_text_start") == symbol_value("schedule"))) + machdep->flags |= SCHED_TEXT; } /* @@ -584,12 +735,19 @@ /* * Include both vmalloc'd and module address space as VMALLOC space. */ -int x86_64_IS_VMALLOC_ADDR(ulong vaddr) +int +x86_64_IS_VMALLOC_ADDR(ulong vaddr) { return ((vaddr >= VMALLOC_START && vaddr <= VMALLOC_END) || (vaddr >= MODULES_VADDR && vaddr <= MODULES_END)); } +static int +x86_64_is_module_addr(ulong vaddr) +{ + return (vaddr >= MODULES_VADDR && vaddr <= MODULES_END); +} + /* * Refining this may cause more problems than just doing it this way. */ @@ -616,6 +774,257 @@ */ static int +x86_64_uvtop_level4(struct task_context *tc, ulong uvaddr, physaddr_t *paddr, int verbose) +{ + ulong mm; + ulong *pml; + ulong pml_paddr; + ulong pml_pte; + ulong *pgd; + ulong pgd_paddr; + ulong pgd_pte; + ulong *pmd; + ulong pmd_paddr; + ulong pmd_pte; + ulong *ptep; + ulong pte_paddr; + ulong pte; + physaddr_t physpage; + + if (!tc) + error(FATAL, "current context invalid\n"); + + *paddr = 0; + + if (IS_KVADDR(uvaddr)) + return x86_64_kvtop(tc, uvaddr, paddr, verbose); + + if ((mm = task_mm(tc->task, TRUE))) + pml = ULONG_PTR(tt->mm_struct + OFFSET(mm_struct_pgd)); + else + readmem(tc->mm_struct + OFFSET(mm_struct_pgd), KVADDR, &pml, + sizeof(long), "mm_struct pgd", FAULT_ON_ERROR); + + pml_paddr = x86_64_VTOP((ulong)pml); + FILL_UPML(pml_paddr, PHYSADDR, PAGESIZE()); + pml = ((ulong *)pml_paddr) + pml4_index(uvaddr); + pml_pte = ULONG(machdep->machspec->upml + PAGEOFFSET(pml)); + if (verbose) + fprintf(fp, " PML: %lx => %lx\n", (ulong)pml, pml_pte); + if (!(pml_pte & _PAGE_PRESENT)) + goto no_upage; + + pgd_paddr = pml_pte & PHYSICAL_PAGE_MASK; + FILL_PGD(pgd_paddr, PHYSADDR, PAGESIZE()); + pgd = ((ulong *)pgd_paddr) + pgd_index(uvaddr); + pgd_pte = ULONG(machdep->pgd + PAGEOFFSET(pgd)); + if (verbose) + fprintf(fp, " PUD: %lx => %lx\n", (ulong)pgd, pgd_pte); + if (!(pgd_pte & _PAGE_PRESENT)) + goto no_upage; + + /* + * pmd = pmd_offset(pgd, address); + */ + pmd_paddr = pgd_pte & PHYSICAL_PAGE_MASK; + FILL_PMD(pmd_paddr, PHYSADDR, PAGESIZE()); + pmd = ((ulong *)pmd_paddr) + pmd_index(uvaddr); + pmd_pte = ULONG(machdep->pmd + PAGEOFFSET(pmd)); + if (verbose) + fprintf(fp, " PMD: %lx => %lx\n", (ulong)pmd, pmd_pte); + if (!(pmd_pte & _PAGE_PRESENT)) + goto no_upage; + if (pmd_pte & _PAGE_PSE) { + if (verbose) { + fprintf(fp, " PAGE: %lx (2MB)\n\n", + PAGEBASE(pmd_pte) & PHYSICAL_PAGE_MASK); + x86_64_translate_pte(pmd_pte, 0, 0); + } + + physpage = (PAGEBASE(pmd_pte) & PHYSICAL_PAGE_MASK) + + (uvaddr & ~_2MB_PAGE_MASK); + *paddr = physpage; + return TRUE; + } + + /* + * ptep = pte_offset_map(pmd, address); + * pte = *ptep; + */ + pte_paddr = pmd_pte & PHYSICAL_PAGE_MASK; + FILL_PTBL(pte_paddr, PHYSADDR, PAGESIZE()); + ptep = ((ulong *)pte_paddr) + pte_index(uvaddr); + pte = ULONG(machdep->ptbl + PAGEOFFSET(ptep)); + if (verbose) + fprintf(fp, " PTE: %lx => %lx\n", (ulong)ptep, pte); + if (!(pte & (_PAGE_PRESENT))) { + if (pte && verbose) { + fprintf(fp, "\n"); + x86_64_translate_pte(pte, 0, 0); + } + goto no_upage; + } + + *paddr = (PAGEBASE(pte) & PHYSICAL_PAGE_MASK) + PAGEOFFSET(uvaddr); + + if (verbose) { + fprintf(fp, " PAGE: %lx\n\n", + PAGEBASE(*paddr) & PHYSICAL_PAGE_MASK); + x86_64_translate_pte(pte, 0, 0); + } + + return TRUE; + +no_upage: + + return FALSE; +} + +static int +x86_64_uvtop_level4_xen_wpt(struct task_context *tc, ulong uvaddr, physaddr_t *paddr, int verbose) +{ + ulong mm; + ulong *pml; + ulong pml_paddr; + ulong pml_pte; + ulong *pgd; + ulong pgd_paddr; + ulong pgd_pte; + ulong *pmd; + ulong pmd_paddr; + ulong pmd_pte; + ulong pseudo_pmd_pte; + ulong *ptep; + ulong pte_paddr; + ulong pte; + ulong pseudo_pte; + physaddr_t physpage; + char buf[BUFSIZE]; + + if (!tc) + error(FATAL, "current context invalid\n"); + + *paddr = 0; + + if (IS_KVADDR(uvaddr)) + return x86_64_kvtop(tc, uvaddr, paddr, verbose); + + if ((mm = task_mm(tc->task, TRUE))) + pml = ULONG_PTR(tt->mm_struct + OFFSET(mm_struct_pgd)); + else + readmem(tc->mm_struct + OFFSET(mm_struct_pgd), KVADDR, &pml, + sizeof(long), "mm_struct pgd", FAULT_ON_ERROR); + + pml_paddr = x86_64_VTOP((ulong)pml); + FILL_UPML(pml_paddr, PHYSADDR, PAGESIZE()); + pml = ((ulong *)pml_paddr) + pml4_index(uvaddr); + pml_pte = ULONG(machdep->machspec->upml + PAGEOFFSET(pml)); + if (verbose) + fprintf(fp, " PML: %lx => %lx [machine]\n", (ulong)pml, pml_pte); + if (!(pml_pte & _PAGE_PRESENT)) + goto no_upage; + + pgd_paddr = pml_pte & PHYSICAL_PAGE_MASK; + pgd_paddr = xen_machine_to_pseudo(pgd_paddr); + if (verbose) + fprintf(fp, " PML: %lx\n", pgd_paddr); + FILL_PGD(pgd_paddr, PHYSADDR, PAGESIZE()); + pgd = ((ulong *)pgd_paddr) + pgd_index(uvaddr); + pgd_pte = ULONG(machdep->pgd + PAGEOFFSET(pgd)); + if (verbose) + fprintf(fp, " PUD: %lx => %lx [machine]\n", (ulong)pgd, pgd_pte); + if (!(pgd_pte & _PAGE_PRESENT)) + goto no_upage; + + /* + * pmd = pmd_offset(pgd, address); + */ + pmd_paddr = pgd_pte & PHYSICAL_PAGE_MASK; + pmd_paddr = xen_machine_to_pseudo(pmd_paddr); + if (verbose) + fprintf(fp, " PUD: %lx\n", pmd_paddr); + FILL_PMD(pmd_paddr, PHYSADDR, PAGESIZE()); + pmd = ((ulong *)pmd_paddr) + pmd_index(uvaddr); + pmd_pte = ULONG(machdep->pmd + PAGEOFFSET(pmd)); + if (verbose) + fprintf(fp, " PMD: %lx => %lx [machine]\n", (ulong)pmd, pmd_pte); + if (!(pmd_pte & _PAGE_PRESENT)) + goto no_upage; + if (pmd_pte & _PAGE_PSE) { + if (verbose) + fprintf(fp, " PAGE: %lx (2MB) [machine]\n", + PAGEBASE(pmd_pte) & PHYSICAL_PAGE_MASK); + + pseudo_pmd_pte = xen_machine_to_pseudo(PAGEBASE(pmd_pte)); + + if (pseudo_pmd_pte == XEN_MFN_NOT_FOUND) { + if (verbose) + fprintf(fp, " PAGE: page not available\n"); + *paddr = PADDR_NOT_AVAILABLE; + return FALSE; + } + + pseudo_pmd_pte |= PAGEOFFSET(pmd_pte); + + if (verbose) { + fprintf(fp, " PAGE: %s (2MB)\n\n", + mkstring(buf, VADDR_PRLEN, RJUST|LONG_HEX, + MKSTR(PAGEBASE(pseudo_pmd_pte) & + PHYSICAL_PAGE_MASK))); + + x86_64_translate_pte(pseudo_pmd_pte, 0, 0); + } + + physpage = (PAGEBASE(pseudo_pmd_pte) & PHYSICAL_PAGE_MASK) + + (uvaddr & ~_2MB_PAGE_MASK); + + *paddr = physpage; + return TRUE; + } + + /* + * ptep = pte_offset_map(pmd, address); + * pte = *ptep; + */ + pte_paddr = pmd_pte & PHYSICAL_PAGE_MASK; + pte_paddr = xen_machine_to_pseudo(pte_paddr); + if (verbose) + fprintf(fp, " PMD: %lx\n", pte_paddr); + FILL_PTBL(pte_paddr, PHYSADDR, PAGESIZE()); + ptep = ((ulong *)pte_paddr) + pte_index(uvaddr); + pte = ULONG(machdep->ptbl + PAGEOFFSET(ptep)); + if (verbose) + fprintf(fp, " PTE: %lx => %lx [machine]\n", (ulong)ptep, pte); + if (!(pte & (_PAGE_PRESENT))) { + if (pte && verbose) { + fprintf(fp, "\n"); + x86_64_translate_pte(pte, 0, 0); + } + goto no_upage; + } + + pseudo_pte = xen_machine_to_pseudo(pte & PHYSICAL_PAGE_MASK); + if (verbose) + fprintf(fp, " PTE: %lx\n", pseudo_pte + PAGEOFFSET(pte)); + + *paddr = (PAGEBASE(pseudo_pte) & PHYSICAL_PAGE_MASK) + PAGEOFFSET(uvaddr); + + if (verbose) { + fprintf(fp, " PAGE: %lx [machine]\n", + PAGEBASE(pte) & PHYSICAL_PAGE_MASK); + fprintf(fp, " PAGE: %lx\n\n", + PAGEBASE(*paddr) & PHYSICAL_PAGE_MASK); + x86_64_translate_pte(pseudo_pte + PAGEOFFSET(pte), 0, 0); + } + + return TRUE; + +no_upage: + + return FALSE; +} + +static int x86_64_uvtop(struct task_context *tc, ulong uvaddr, physaddr_t *paddr, int verbose) { ulong mm; @@ -748,6 +1157,9 @@ return TRUE; } + if (XEN() && (kt->xen_flags & WRITABLE_PAGE_TABLES)) + return (x86_64_kvtop_xen_wpt(tc, kvaddr, paddr, verbose)); + /* * pgd = pgd_offset_k(addr); */ @@ -764,7 +1176,7 @@ pgd = ((ulong *)pgd_paddr) + pgd_index(kvaddr); pgd_pte = ULONG(machdep->pgd + PAGEOFFSET(pgd)); if (verbose) - fprintf(fp, " PGD: %lx => %lx\n", (ulong)pgd, pgd_pte); + fprintf(fp, " PUD: %lx => %lx\n", (ulong)pgd, pgd_pte); if (!(pgd_pte & _PAGE_PRESENT)) goto no_kpage; @@ -824,6 +1236,136 @@ return FALSE; } + +static int +x86_64_kvtop_xen_wpt(struct task_context *tc, ulong kvaddr, physaddr_t *paddr, int verbose) +{ + ulong *pml4; + ulong *pgd; + ulong pgd_paddr; + ulong pgd_pte; + ulong *pmd; + ulong pmd_paddr; + ulong pmd_pte; + ulong pseudo_pmd_pte; + ulong *ptep; + ulong pte_paddr; + ulong pte; + ulong pseudo_pte; + physaddr_t physpage; + char buf[BUFSIZE]; + + /* + * pgd = pgd_offset_k(addr); + */ + FILL_PML4(); + pml4 = ((ulong *)machdep->machspec->pml4) + pml4_index(kvaddr); + if (verbose) { + fprintf(fp, "PML4 DIRECTORY: %lx\n", vt->kernel_pgd[0]); + fprintf(fp, "PAGE DIRECTORY: %lx [machine]\n", *pml4); + } + if (!(*pml4) & _PAGE_PRESENT) + goto no_kpage; + pgd_paddr = (*pml4) & PHYSICAL_PAGE_MASK; + pgd_paddr = xen_machine_to_pseudo(pgd_paddr); + if (verbose) + fprintf(fp, "PAGE DIRECTORY: %lx\n", pgd_paddr); + FILL_PGD(pgd_paddr, PHYSADDR, PAGESIZE()); + pgd = ((ulong *)pgd_paddr) + pgd_index(kvaddr); + pgd_pte = ULONG(machdep->pgd + PAGEOFFSET(pgd)); + if (verbose) + fprintf(fp, " PUD: %lx => %lx [machine]\n", (ulong)pgd, pgd_pte); + if (!(pgd_pte & _PAGE_PRESENT)) + goto no_kpage; + + /* + * pmd = pmd_offset(pgd, addr); + */ + pmd_paddr = pgd_pte & PHYSICAL_PAGE_MASK; + pmd_paddr = xen_machine_to_pseudo(pmd_paddr); + if (verbose) + fprintf(fp, " PUD: %lx\n", pmd_paddr); + FILL_PMD(pmd_paddr, PHYSADDR, PAGESIZE()); + pmd = ((ulong *)pmd_paddr) + pmd_index(kvaddr); + pmd_pte = ULONG(machdep->pmd + PAGEOFFSET(pmd)); + if (verbose) + fprintf(fp, " PMD: %lx => %lx [machine]\n", (ulong)pmd, pmd_pte); + if (!(pmd_pte & _PAGE_PRESENT)) + goto no_kpage; + if (pmd_pte & _PAGE_PSE) { + if (verbose) + fprintf(fp, " PAGE: %lx (2MB) [machine]\n", + PAGEBASE(pmd_pte) & PHYSICAL_PAGE_MASK); + + pseudo_pmd_pte = xen_machine_to_pseudo(PAGEBASE(pmd_pte)); + + if (pseudo_pmd_pte == XEN_MFN_NOT_FOUND) { + if (verbose) + fprintf(fp, " PAGE: page not available\n"); + *paddr = PADDR_NOT_AVAILABLE; + return FALSE; + } + + pseudo_pmd_pte |= PAGEOFFSET(pmd_pte); + + if (verbose) { + fprintf(fp, " PAGE: %s (2MB)\n\n", + mkstring(buf, VADDR_PRLEN, RJUST|LONG_HEX, + MKSTR(PAGEBASE(pseudo_pmd_pte) & + PHYSICAL_PAGE_MASK))); + + x86_64_translate_pte(pseudo_pmd_pte, 0, 0); + } + + physpage = (PAGEBASE(pseudo_pmd_pte) & PHYSICAL_PAGE_MASK) + + (kvaddr & ~_2MB_PAGE_MASK); + + *paddr = physpage; + return TRUE; + } + + /* + * ptep = pte_offset_map(pmd, addr); + * pte = *ptep; + */ + pte_paddr = pmd_pte & PHYSICAL_PAGE_MASK; + pte_paddr = xen_machine_to_pseudo(pte_paddr); + if (verbose) + fprintf(fp, " PMD: %lx\n", pte_paddr); + FILL_PTBL(pte_paddr, PHYSADDR, PAGESIZE()); + ptep = ((ulong *)pte_paddr) + pte_index(kvaddr); + pte = ULONG(machdep->ptbl + PAGEOFFSET(ptep)); + if (verbose) + fprintf(fp, " PTE: %lx => %lx [machine]\n", (ulong)ptep, pte); + if (!(pte & (_PAGE_PRESENT))) { + if (pte && verbose) { + fprintf(fp, "\n"); + x86_64_translate_pte(pte, 0, 0); + } + goto no_kpage; + } + + pseudo_pte = xen_machine_to_pseudo(pte & PHYSICAL_PAGE_MASK); + if (verbose) + fprintf(fp, " PTE: %lx\n", pseudo_pte + PAGEOFFSET(pte)); + + *paddr = (PAGEBASE(pseudo_pte) & PHYSICAL_PAGE_MASK) + PAGEOFFSET(kvaddr); + + if (verbose) { + fprintf(fp, " PAGE: %lx [machine]\n", + PAGEBASE(pte) & PHYSICAL_PAGE_MASK); + fprintf(fp, " PAGE: %lx\n\n", + PAGEBASE(*paddr) & PHYSICAL_PAGE_MASK); + x86_64_translate_pte(pseudo_pte + PAGEOFFSET(pte), 0, 0); + } + + return TRUE; + +no_kpage: + return FALSE; +} + + /* * Determine where vmalloc'd memory starts. */ @@ -852,13 +1394,13 @@ static ulong x86_64_processor_speed(void) { - unsigned long cpu_khz; + unsigned long cpu_khz = 0; if (machdep->mhz) return (machdep->mhz); if (symbol_exists("cpu_khz")) { - get_symbol_data("cpu_khz", sizeof(long), &cpu_khz); + get_symbol_data("cpu_khz", sizeof(int), &cpu_khz); if (cpu_khz) return(machdep->mhz = cpu_khz/1000); } @@ -878,7 +1420,6 @@ if (!name || !strlen(name) || !(machdep->flags & KSYMS_START)) return FALSE; - return TRUE; } @@ -1091,6 +1632,9 @@ ulong *up; ulong words, addr; + if (rsp < bt->frameptr) + return; + words = (rsp - bt->frameptr) / sizeof(ulong) + 1; addr = bt->frameptr; @@ -1308,7 +1852,7 @@ (rsp < (ms->stkinfo.ebase[c][i] + ms->stkinfo.esize))) { estack = ms->stkinfo.ebase[c][i]; - if (c != bt->tc->processor) + if (CRASHDEBUG(1) && (c != bt->tc->processor)) error(INFO, "task cpu: %d exception stack cpu: %d\n", bt->tc->processor, c); @@ -1341,7 +1885,7 @@ if ((rsp >= ms->stkinfo.ibase[c]) && (rsp < (ms->stkinfo.ibase[c] + ms->stkinfo.isize))) { irqstack = ms->stkinfo.ibase[c]; - if (c != bt->tc->processor) + if (CRASHDEBUG(1) && (c != bt->tc->processor)) error(INFO, "task cpu: %d IRQ stack cpu: %d\n", bt->tc->processor, c); @@ -1399,12 +1943,13 @@ ofp = fp; if (bt->flags & BT_TEXT_SYMBOLS) { - fprintf(ofp, "%sSTART: %s%s at %lx\n", - space(VADDR_PRLEN > 8 ? 14 : 6), - closest_symbol(bt->instptr), - STREQ(closest_symbol(bt->instptr), "thread_return") ? - " (schedule)" : "", - bt->instptr); + if (!(bt->flags & BT_TEXT_SYMBOLS_ALL)) + fprintf(ofp, "%sSTART: %s%s at %lx\n", + space(VADDR_PRLEN > 8 ? 14 : 6), + closest_symbol(bt->instptr), + STREQ(closest_symbol(bt->instptr), "thread_return") ? + " (schedule)" : "", + bt->instptr); } else if (bt->flags & BT_START) { x86_64_print_stack_entry(bt, ofp, level, 0, bt->instptr); @@ -1647,8 +2192,12 @@ bt->stackbuf + (irq_eframe - bt->stackbase), bt, ofp); if (cs & 3) done = TRUE; /* IRQ from user-mode */ - else + else { + if (x86_64_print_eframe_location(rsp, level, ofp)) + level++; rsp += SIZE(pt_regs); + irq_eframe = 0; + } level++; } @@ -1691,6 +2240,10 @@ bt->call_target); continue; } + } else if ((machdep->flags & SCHED_TEXT) && + STREQ(bt->call_target, "schedule") && + STREQ(sp->name, "__sched_text_start")) { + ; /* bait and switch */ } else if (!STREQ(sp->name, bt->call_target)) { /* * We got function called by the text routine, @@ -1709,6 +2262,8 @@ { case BACKTRACE_ENTRY_AND_EFRAME_DISPLAYED: last_process_stack_eframe = rsp + 8; + if (x86_64_print_eframe_location(last_process_stack_eframe, level, ofp)) + level++; rsp += SIZE(pt_regs); i += SIZE(pt_regs)/sizeof(ulong); case BACKTRACE_ENTRY_DISPLAYED: @@ -1842,6 +2397,8 @@ long rax, rbx, rcx, rdx, rsi, rdi; long r8, r9, r10, r11, r12, r13, r14, r15; struct machine_specific *ms; + struct syment *sp; + ulong offset; char *pt_regs_buf; long verified; int err; @@ -1950,8 +2507,17 @@ cs & 3 ? "USER" : "KERNEL", kvaddr ? kvaddr : (local - bt->stackbuf) + bt->stackbase); + } else if (!(cs & 3)) { + fprintf(ofp, " [exception RIP: "); + if ((sp = value_search(rip, &offset))) { + fprintf(ofp, "%s", sp->name); + if (offset) + fprintf(ofp, (output_radix == 16) ? + "+0x%lx" : "+%ld", offset); + } else + fprintf(ofp, "unknown or invalid address"); + fprintf(ofp, "]\n"); } - fprintf(ofp, " RIP: %016lx RSP: %016lx RFLAGS: %08lx\n", rip, rsp, rflags); fprintf(ofp, " RAX: %016lx RBX: %016lx RCX: %016lx\n", @@ -2006,6 +2572,39 @@ return 0; } +static int +x86_64_print_eframe_location(ulong eframe, int level, FILE *ofp) +{ + return FALSE; + +#ifdef NOTDEF + ulong rip; + char *pt_regs_buf; + struct machine_specific *ms; + struct syment *sp; + + ms = machdep->machspec; + + pt_regs_buf = GETBUF(SIZE(pt_regs)); + if (!readmem(eframe, KVADDR, pt_regs_buf, SIZE(pt_regs), + "pt_regs", RETURN_ON_ERROR|QUIET)) { + FREEBUF(pt_regs_buf); + return FALSE; + } + + rip = ULONG(pt_regs_buf + ms->pto.rip); + FREEBUF(pt_regs_buf); + + if (!(sp = value_search(rip, NULL))) + return FALSE; + + fprintf(ofp, "%s#%d [%8lx] %s at %lx\n", level < 10 ? " " : "", level+1, + eframe, sp->name, rip); + + return TRUE; +#endif +} + /* * Check that the verifiable registers contain reasonable data. */ @@ -2021,6 +2620,11 @@ if ((cs == 0x10) && (ss == 0x18)) { if (is_kernel_text(rip) && IS_KVADDR(rsp)) return TRUE; + + if (x86_64_is_module_addr(rip) && + IS_KVADDR(rsp) && + (rsp == (kvaddr + SIZE(pt_regs)))) + return TRUE; } if ((cs == 0x10) && kvaddr) { @@ -2040,6 +2644,26 @@ return TRUE; } + if (XEN() && ((cs == 0x33) || (cs == 0xe033)) && + ((ss == 0x2b) || (ss == 0xe02b))) { + if (IS_UVADDR(rip, bt->tc) && IS_UVADDR(rsp, bt->tc)) + return TRUE; + } + + if (XEN() && ((cs == 0x10000e030) || (cs == 0xe030)) && + (ss == 0xe02b)) { + if (is_kernel_text(rip) && IS_KVADDR(rsp)) + return TRUE; + } + + /* + * 32-bit segments + */ + if ((cs == 0x23) && (ss == 0x2b)) { + if (IS_UVADDR(rip, bt->tc) && IS_UVADDR(rsp, bt->tc)) + return TRUE; + } + return FALSE; } @@ -2065,7 +2689,7 @@ x86_64_get_dumpfile_stack_frame(struct bt_info *bt_in, ulong *rip, ulong *rsp) { int panic_task; - int i, panic, stage; + int i, estack, panic, stage; char *sym; struct syment *sp; ulong *up; @@ -2080,6 +2704,7 @@ ms = machdep->machspec; ur_rip = ur_rsp = 0; stage = 0; + estack = -1; panic_task = tt->panic_task == bt->task ? TRUE : FALSE; @@ -2119,13 +2744,14 @@ STREQ(sym, "netpoll_start_netdump") || STREQ(sym, "start_disk_dump") || STREQ(sym, "disk_dump") || + STREQ(sym, "crash_kexec") || STREQ(sym, "try_crashdump")) { *rip = *up; *rsp = bt->stackbase + ((char *)(up) - bt->stackbuf); return; } - if ((stage == 2) && + if ((estack >= 0) && (STREQ(sym, "nmi_watchdog_tick") || STREQ(sym, "default_do_nmi"))) { sp = x86_64_function_called_by((*up)-5); @@ -2161,7 +2787,7 @@ next_sysrq: *rip = *up; *rsp = bt->stackbase + ((char *)(up) - bt->stackbuf); - machdep->flags |= SYSRQ; + pc->flags |= SYSRQ; for (i++, up++; i < LONGS_PER_STACK; i++, up++) { sym = closest_symbol(*up); if (STREQ(sym, "sysrq_handle_crash")) @@ -2176,6 +2802,12 @@ *rsp = bt->stackbase + ((char *)(up) - bt->stackbuf); return; } + + if (!panic_task && STREQ(sym, "crash_nmi_callback")) { + *rip = *up; + *rsp = bt->stackbase + ((char *)(up) - bt->stackbuf); + return; + } } if (panic) @@ -2191,25 +2823,30 @@ bt->stackbase = ms->stkinfo.ibase[bt->tc->processor]; bt->stacktop = ms->stkinfo.ibase[bt->tc->processor] + ms->stkinfo.isize; + console("x86_64_get_dumpfile_stack_frame: searching IRQ stack at %lx\n", + bt->stackbase); bt->stackbuf = ms->irqstack; alter_stackbuf(bt); stage = 1; goto next_stack; /* - * Check the NMI exception stack. + * Check the exception stacks. */ case 1: - bt->stackbase = ms->stkinfo.ebase[bt->tc->processor][NMI_STACK]; - bt->stacktop = ms->stkinfo.ebase[bt->tc->processor][NMI_STACK] + - ms->stkinfo.esize; + if (++estack == 7) + break; + bt->stackbase = ms->stkinfo.ebase[bt->tc->processor][estack]; + bt->stacktop = ms->stkinfo.ebase[bt->tc->processor][estack] + + ms->stkinfo.esize; + console("x86_64_get_dumpfile_stack_frame: searching %s estack at %lx\n", + x86_64_exception_stacks[estack], bt->stackbase); + if (!(bt->stackbase)) + goto skip_stage; bt->stackbuf = ms->irqstack; alter_stackbuf(bt); - stage = 2; goto next_stack; - case 2: - break; } /* @@ -2258,7 +2895,7 @@ { ulong offset, rip; - if (INVALID_MEMBER(thread_struct_rip)) + if (INVALID_MEMBER(thread_struct_rip)) return symbol_value("thread_return"); if (tt->flags & THREAD_INFO) { @@ -2457,25 +3094,40 @@ int x86_64_get_smp_cpus(void) { - int i, cpus, nr_pda, cpunumber; + int i, cpus, nr_pda, cpunumber, _cpu_pda; char *cpu_pda_buf; - ulong level4_pgt; + ulong level4_pgt, cpu_pda_addr; if (!VALID_STRUCT(x8664_pda)) return 1; cpu_pda_buf = GETBUF(SIZE(x8664_pda)); - if (!(nr_pda = get_array_length("cpu_pda", NULL, 0))) - nr_pda = NR_CPUS; - + if (symbol_exists("_cpu_pda")) { + if (!(nr_pda = get_array_length("_cpu_pda", NULL, 0))) + nr_pda = NR_CPUS; + _cpu_pda = TRUE; + } else { + if (!(nr_pda = get_array_length("cpu_pda", NULL, 0))) + nr_pda = NR_CPUS; + _cpu_pda = FALSE; + } for (i = cpus = 0; i < nr_pda; i++) { - if (!CPU_PDA_READ(i, cpu_pda_buf)) - break; - level4_pgt = ULONG(cpu_pda_buf + OFFSET(x8664_pda_level4_pgt)); + if (_cpu_pda) { + if (!_CPU_PDA_READ(i, cpu_pda_buf)) + break; + } else { + if (!CPU_PDA_READ(i, cpu_pda_buf)) + break; + } + if (VALID_MEMBER(x8664_pda_level4_pgt)) { + level4_pgt = ULONG(cpu_pda_buf + OFFSET(x8664_pda_level4_pgt)); + if (!VALID_LEVEL4_PGT_ADDR(level4_pgt)) + break; + } cpunumber = INT(cpu_pda_buf + OFFSET(x8664_pda_cpunumber)); - if (!VALID_LEVEL4_PGT_ADDR(level4_pgt) || (cpunumber != cpus)) - break; + if (cpunumber != cpus) + break; cpus++; } @@ -2551,9 +3203,9 @@ static void x86_64_display_cpu_data(void) { - int cpu, cpus, boot_cpu; + int cpu, cpus, boot_cpu, _cpu_pda; ulong cpu_data; - ulong cpu_pda; + ulong cpu_pda, cpu_pda_addr; if (symbol_exists("cpu_data")) { cpu_data = symbol_value("cpu_data"); @@ -2564,7 +3216,13 @@ boot_cpu = TRUE; cpus = 1; } - cpu_pda = symbol_value("cpu_pda"); + if (symbol_exists("_cpu_pda")) { + cpu_pda = symbol_value("_cpu_pda"); + _cpu_pda = TRUE; + } else if (symbol_exists("cpu_pda")) { + cpu_pda = symbol_value("cpu_pda"); + _cpu_pda = FALSE; + } for (cpu = 0; cpu < cpus; cpu++) { if (boot_cpu) @@ -2574,10 +3232,17 @@ dump_struct("cpuinfo_x86", cpu_data, 0); fprintf(fp, "\n"); - dump_struct("x8664_pda", cpu_pda, 0); + if (_cpu_pda) { + readmem(cpu_pda, KVADDR, &cpu_pda_addr, + sizeof(unsigned long), "_cpu_pda addr", FAULT_ON_ERROR); + dump_struct("x8664_pda", cpu_pda_addr, 0); + cpu_pda += sizeof(void *); + } else { + dump_struct("x8664_pda", cpu_pda, 0); + cpu_pda += SIZE(x8664_pda); + } cpu_data += SIZE(cpuinfo_x86); - cpu_pda += SIZE(x8664_pda); } } @@ -2691,4 +3356,440 @@ x86_64_dump_line_number(0); } +/* + * Force the VM address-range selection via: + * + * --machdep vm=orig + * --machdep vm=2.6.11 + */ + +void +parse_cmdline_arg(void) +{ + int i, c, errflag; + char *p; + char buf[BUFSIZE]; + char *arglist[MAXARGS]; + int lines = 0; + + if (!strstr(machdep->cmdline_arg, "=")) { + error(WARNING, "ignoring --machdep option: %s\n\n", + machdep->cmdline_arg); + return; + } + + strcpy(buf, machdep->cmdline_arg); + + for (p = buf; *p; p++) { + if (*p == ',') + *p = ' '; + } + + c = parse_line(buf, arglist); + + for (i = 0; i < c; i++) { + errflag = 0; + + if (STRNEQ(arglist[i], "vm=")) { + p = arglist[i] + strlen("vm="); + if (strlen(p)) { + if (STREQ(p, "orig")) { + machdep->flags |= VM_ORIG; + continue; + } else if (STREQ(p, "2.6.11")) { + machdep->flags |= VM_2_6_11; + continue; + } else if (STREQ(p, "xen")) { + machdep->flags |= VM_XEN; + continue; + } + } + } + + error(WARNING, "ignoring --machdep option: %s\n", arglist[i]); + lines++; + } + + switch (machdep->flags & (VM_ORIG|VM_2_6_11|VM_XEN)) + { + case 0: + break; + + case VM_ORIG: + error(NOTE, "using original x86_64 VM address ranges\n"); + lines++; + break; + + case VM_2_6_11: + error(NOTE, "using 2.6.11 x86_64 VM address ranges\n"); + lines++; + break; + + case VM_XEN: + error(NOTE, "using xen x86_64 VM address ranges\n"); + lines++; + break; + + default: + error(WARNING, "cannot set multiple vm values\n"); + lines++; + machdep->flags &= ~(VM_ORIG|VM_2_6_11|VM_XEN); + break; + } + + if (lines) + fprintf(fp, "\n"); +} + +void +x86_64_clear_machdep_cache(void) +{ + machdep->machspec->last_upml_read = 0; +} + +#include "xendump.h" + +/* + * Create an index of mfns for each page that makes up the + * kernel's complete phys_to_machine_mapping[max_pfn] array. + */ +static int +x86_64_xendump_ptm_create(struct xendump_data *xd) +{ + int i, idx; + ulong mfn, kvaddr, ctrlreg[8], ctrlreg_offset; + ulong *up; + off_t offset; + + if ((ctrlreg_offset = MEMBER_OFFSET("vcpu_guest_context", "ctrlreg")) == + INVALID_OFFSET) + error(FATAL, + "cannot determine vcpu_guest_context.ctrlreg offset\n"); + else if (CRASHDEBUG(1)) + fprintf(xd->ofp, + "MEMBER_OFFSET(vcpu_guest_context, ctrlreg): %ld\n", + ctrlreg_offset); + + offset = (off_t)xd->xc_core.header.xch_ctxt_offset + + (off_t)ctrlreg_offset; + + if (lseek(xd->xfd, offset, SEEK_SET) == -1) + error(FATAL, "cannot lseek to xch_ctxt_offset\n"); + + if (read(xd->xfd, &ctrlreg, sizeof(ctrlreg)) != + sizeof(ctrlreg)) + error(FATAL, "cannot read vcpu_guest_context ctrlreg[8]\n"); + + for (i = 0; CRASHDEBUG(1) && (i < 8); i++) + fprintf(xd->ofp, "ctrlreg[%d]: %lx\n", i, ctrlreg[i]); + + mfn = ctrlreg[3] >> PAGESHIFT(); + + if (!xc_core_mfn_to_page(mfn, machdep->machspec->pml4)) + error(FATAL, "cannot read/find cr3 page\n"); + + if (CRASHDEBUG(7)) + x86_64_debug_dump_page(xd->ofp, machdep->machspec->pml4, + "contents of PML4 page:"); + + kvaddr = symbol_value("end_pfn"); + if (!x86_64_xendump_load_page(kvaddr, xd)) + return FALSE; + + up = (ulong *)(xd->page + PAGEOFFSET(kvaddr)); + if (CRASHDEBUG(1)) + fprintf(xd->ofp, "end_pfn: %lx\n", *up); + + xd->xc_core.ptm_frames = *up/(PAGESIZE()/sizeof(ulong)); + + if ((xd->xc_core.ptm_frame_index_list = (ulong *) + malloc(xd->xc_core.ptm_frames * sizeof(ulong))) == NULL) + error(FATAL, "cannot malloc ptm_mfn_frame_list"); + + kvaddr = symbol_value("phys_to_machine_mapping"); + if (!x86_64_xendump_load_page(kvaddr, xd)) + return FALSE; + + up = (ulong *)(xd->page + PAGEOFFSET(kvaddr)); + if (CRASHDEBUG(1)) + fprintf(fp, "phys_to_machine_mapping: %lx\n", *up); + + kvaddr = *up; + machdep->last_ptbl_read = BADADDR; + + for (i = 0; i < xd->xc_core.ptm_frames; i++) { + if ((idx = x86_64_xendump_page_index(kvaddr, xd)) == MFN_NOT_FOUND) + return FALSE; + xd->xc_core.ptm_frame_index_list[i] = idx; + kvaddr += PAGESIZE(); + } + + machdep->last_ptbl_read = 0; + + return TRUE; +} + +static void +x86_64_debug_dump_page(FILE *ofp, char *page, char *name) +{ + int i; + ulong *up; + + fprintf(ofp, "%s\n", name); + + up = (ulong *)page; + for (i = 0; i < 256; i++) { + fprintf(ofp, "%016lx: %016lx %016lx\n", + (ulong)((i * 2) * sizeof(ulong)), + *up, *(up+1)); + up += 2; + } +} + +/* + * Find the page associate with the kvaddr, and read its contents + * into the passed-in buffer. + */ +static char * +x86_64_xendump_load_page(ulong kvaddr, struct xendump_data *xd) +{ + ulong mfn; + ulong *pml4, *pgd, *pmd, *ptep; + + pml4 = ((ulong *)machdep->machspec->pml4) + pml4_index(kvaddr); + mfn = ((*pml4) & PHYSICAL_PAGE_MASK) >> PAGESHIFT(); + + if (CRASHDEBUG(3)) + fprintf(xd->ofp, + "[%lx] pml4: %lx mfn: %lx pml4_index: %lx\n", + kvaddr, *pml4, mfn, pml4_index(kvaddr)); + + if (!xc_core_mfn_to_page(mfn, machdep->pgd)) + error(FATAL, "cannot read/find pud page\n"); + + if (CRASHDEBUG(7)) + x86_64_debug_dump_page(xd->ofp, machdep->pgd, + "contents of page upper directory page:"); + + pgd = ((ulong *)machdep->pgd) + pgd_index(kvaddr); + mfn = ((*pgd) & PHYSICAL_PAGE_MASK) >> PAGESHIFT(); + + if (CRASHDEBUG(3)) + fprintf(xd->ofp, + "[%lx] pgd: %lx mfn: %lx pgd_index: %lx\n", + kvaddr, *pgd, mfn, pgd_index(kvaddr)); + + if (!xc_core_mfn_to_page(mfn, machdep->pmd)) + error(FATAL, "cannot read/find pmd page\n"); + + if (CRASHDEBUG(7)) + x86_64_debug_dump_page(xd->ofp, machdep->pmd, + "contents of page middle directory page:"); + + pmd = ((ulong *)machdep->pmd) + pmd_index(kvaddr); + mfn = ((*pmd) & PHYSICAL_PAGE_MASK) >> PAGESHIFT(); + + if (CRASHDEBUG(3)) + fprintf(xd->ofp, + "[%lx] pmd: %lx mfn: %lx pmd_index: %lx\n", + kvaddr, *pmd, mfn, pmd_index(kvaddr)); + + if (!xc_core_mfn_to_page(mfn, machdep->ptbl)) + error(FATAL, "cannot read/find page table page\n"); + + if (CRASHDEBUG(7)) + x86_64_debug_dump_page(xd->ofp, machdep->ptbl, + "contents of page table page:"); + + ptep = ((ulong *)machdep->ptbl) + pte_index(kvaddr); + mfn = ((*ptep) & PHYSICAL_PAGE_MASK) >> PAGESHIFT(); + + if (CRASHDEBUG(3)) + fprintf(xd->ofp, + "[%lx] ptep: %lx mfn: %lx pte_index: %lx\n", + kvaddr, *ptep, mfn, pte_index(kvaddr)); + + if (!xc_core_mfn_to_page(mfn, xd->page)) + error(FATAL, "cannot read/find pte page\n"); + + if (CRASHDEBUG(7)) + x86_64_debug_dump_page(xd->ofp, xd->page, + "contents of page:"); + + return xd->page; +} + +/* + * Find the dumpfile page index associated with the kvaddr. + */ +static int +x86_64_xendump_page_index(ulong kvaddr, struct xendump_data *xd) +{ + int idx; + ulong mfn; + ulong *pml4, *pgd, *pmd, *ptep; + + pml4 = ((ulong *)machdep->machspec->pml4) + pml4_index(kvaddr); + mfn = ((*pml4) & PHYSICAL_PAGE_MASK) >> PAGESHIFT(); + + if (!xc_core_mfn_to_page(mfn, machdep->pgd)) + error(FATAL, "cannot read/find pud page\n"); + + pgd = ((ulong *)machdep->pgd) + pgd_index(kvaddr); + mfn = ((*pgd) & PHYSICAL_PAGE_MASK) >> PAGESHIFT(); + + if (!xc_core_mfn_to_page(mfn, machdep->pmd)) + error(FATAL, "cannot read/find pmd page\n"); + + pmd = ((ulong *)machdep->pmd) + pmd_index(kvaddr); + mfn = ((*pmd) & PHYSICAL_PAGE_MASK) >> PAGESHIFT(); + + if ((mfn != machdep->last_ptbl_read) && + !xc_core_mfn_to_page(mfn, machdep->ptbl)) + error(FATAL, "cannot read/find page table page\n"); + + machdep->last_ptbl_read = mfn; + + ptep = ((ulong *)machdep->ptbl) + pte_index(kvaddr); + mfn = ((*ptep) & PHYSICAL_PAGE_MASK) >> PAGESHIFT(); + + if ((idx = xc_core_mfn_to_page_index(mfn)) == MFN_NOT_FOUND) + error(INFO, "cannot determine page index for %lx\n", + kvaddr); + + return idx; +} + +/* + * Pull the rsp from the cpu_user_regs struct in the header + * turn it into a task, and match it with the active_set. + * Unfortunately, the registers in the vcpu_guest_context + * are not necessarily those of the panic task, so for now + * let get_active_set_panic_task() get the right task. + */ +static ulong +x86_64_xendump_panic_task(struct xendump_data *xd) +{ + int i; + ulong rsp; + off_t offset; + ulong task; + + if (INVALID_MEMBER(vcpu_guest_context_user_regs) || + INVALID_MEMBER(cpu_user_regs_esp)) + return NO_TASK; + + offset = (off_t)xd->xc_core.header.xch_ctxt_offset + + (off_t)OFFSET(vcpu_guest_context_user_regs) + + (off_t)OFFSET(cpu_user_regs_rsp); + + if (lseek(xd->xfd, offset, SEEK_SET) == -1) + return NO_TASK; + + if (read(xd->xfd, &rsp, sizeof(ulong)) != sizeof(ulong)) + return NO_TASK; + + if (IS_KVADDR(rsp) && (task = stkptr_to_task(rsp))) { + + for (i = 0; i < NR_CPUS; i++) { + if (task == tt->active_set[i]) { + if (CRASHDEBUG(0)) + error(INFO, + "x86_64_xendump_panic_task: rsp: %lx -> task: %lx\n", + rsp, task); + return task; + } + } + + error(WARNING, + "x86_64_xendump_panic_task: rsp: %lx -> task: %lx (not active)\n", + rsp); + } + + return NO_TASK; +} + +/* + * Because of an off-by-one vcpu bug in early xc_domain_dumpcore() + * instantiations, the registers in the vcpu_guest_context are not + * necessarily those of the panic task. Furthermore, the rsp is + * seemingly unassociated with the task, presumably due a hypervisor + * callback, so only accept the contents if they retfer to the panic + * task's stack. + */ +static void +x86_64_get_xendump_regs(struct xendump_data *xd, struct bt_info *bt, ulong *rip, ulong *rsp) +{ + ulong task, xrip, xrsp; + off_t offset; + struct syment *sp; + int cpu; + + if (INVALID_MEMBER(vcpu_guest_context_user_regs) || + INVALID_MEMBER(cpu_user_regs_rip) || + INVALID_MEMBER(cpu_user_regs_rsp)) + goto generic; + + offset = (off_t)xd->xc_core.header.xch_ctxt_offset + + (off_t)OFFSET(vcpu_guest_context_user_regs) + + (off_t)OFFSET(cpu_user_regs_rsp); + if (lseek(xd->xfd, offset, SEEK_SET) == -1) + goto generic; + if (read(xd->xfd, &xrsp, sizeof(ulong)) != sizeof(ulong)) + goto generic; + + offset = (off_t)xd->xc_core.header.xch_ctxt_offset + + (off_t)OFFSET(vcpu_guest_context_user_regs) + + (off_t)OFFSET(cpu_user_regs_rip); + if (lseek(xd->xfd, offset, SEEK_SET) == -1) + goto generic; + if (read(xd->xfd, &xrip, sizeof(ulong)) != sizeof(ulong)) + goto generic; + + /* + * This works -- comes from smp_send_stop call in panic. + * But xendump_panic_hook() will forestall this function + * from being called (for now). + */ + if (IS_KVADDR(xrsp) && (task = stkptr_to_task(xrsp)) && + (task == bt->task)) { + if (CRASHDEBUG(1)) + fprintf(xd->ofp, + "hooks from vcpu_guest_context: rip: %lx rsp: %lx\n", xrip, xrsp); + *rip = xrip; + *rsp = xrsp; + return; + } + +generic: + + machdep->get_stack_frame(bt, rip, rsp); + + /* + * If this is an active task showing itself in schedule(), + * then the thread_struct rsp is stale. It has to be coming + * from a callback via the interrupt stack. + */ + if (is_task_active(bt->task) && (symbol_value("thread_return") == *rip)) { + cpu = bt->tc->processor; + xrsp = machdep->machspec->stkinfo.ibase[cpu] + + machdep->machspec->stkinfo.isize - sizeof(ulong); + + while (readmem(xrsp, KVADDR, &xrip, + sizeof(ulong), "xendump rsp", RETURN_ON_ERROR)) { + if ((sp = value_search(xrip, (ulong *)&offset)) && + STREQ(sp->name, "smp_really_stop_cpu") && offset) { + *rip = xrip; + *rsp = xrsp; + if (CRASHDEBUG(1)) + error(INFO, + "switch thread_return to smp_call_function_interrupt\n"); + break; + } + xrsp -= sizeof(ulong); + if (xrsp <= machdep->machspec->stkinfo.ibase[cpu]) + break; + } + } +} #endif /* X86_64 */ --- crash/extensions.c.orig 2006-05-15 17:21:18.000000000 -0400 +++ crash/extensions.c 2005-11-10 09:47:46.000000000 -0500 @@ -18,9 +18,6 @@ #include "defs.h" #include -static void load_extension(char *); -static void unload_extension(char *); - #define DUMP_EXTENSIONS (0) #define LOAD_EXTENSION (1) #define UNLOAD_EXTENSION (2) @@ -171,7 +168,7 @@ /* * Load an extension library. */ -static void +void load_extension(char *lib) { struct extension_table *ext; @@ -252,7 +249,7 @@ /* * Unload all, or as specified, extension libraries. */ -static void +void unload_extension(char *lib) { struct extension_table *ext; --- crash/va_server.c.orig 2006-05-15 17:21:18.000000000 -0400 +++ crash/va_server.c 2006-03-22 08:55:34.000000000 -0500 @@ -57,13 +57,15 @@ extern int monitor_memory(long *, long *, long *, long *); -int Page_Size = PAGE_SIZE; /* temporary setting until disk header is read */ +int Page_Size; ulong vas_debug = 0; extern void *malloc(size_t); int va_server_init(char *crash_file, u_long *start, u_long *end, u_long *stride) { + Page_Size = getpagesize(); /* temporary setting until disk header is read */ + if(read_map(crash_file)) { if(va_server_init_v1(crash_file, start, end, stride)) return -1; --- crash/symbols.c.orig 2006-05-15 17:21:18.000000000 -0400 +++ crash/symbols.c 2006-05-10 14:14:28.000000000 -0400 @@ -1,8 +1,8 @@ /* symbols.c - core analysis suite * * Copyright (C) 1999, 2000, 2001, 2002 Mission Critical Linux, Inc. - * Copyright (C) 2002, 2003, 2004, 2005 David Anderson - * Copyright (C) 2002, 2003, 2004, 2005 Red Hat, Inc. All rights reserved. + * Copyright (C) 2002, 2003, 2004, 2005, 2006 David Anderson + * Copyright (C) 2002, 2003, 2004, 2005, 2006 Red Hat, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -3463,6 +3463,22 @@ } /* + * Same as above, but allow for failure. + */ +int +try_get_symbol_data(char *symbol, long size, void *local) +{ + struct syment *sp; + + if ((sp = symbol_search(symbol)) && + readmem(sp->value, KVADDR, local, + size, symbol, RETURN_ON_ERROR|QUIET)) + return TRUE; + + return FALSE; +} + +/* * Return the value of a given symbol. */ ulong @@ -5433,6 +5449,8 @@ lenptr = &array_table.kmem_cache_s_c_name; else if (STREQ(s, "kmem_cache_s.array")) lenptr = &array_table.kmem_cache_s_array; + else if (STREQ(s, "kmem_cache.array")) + lenptr = &array_table.kmem_cache_s_array; else if (STREQ(s, "kmem_cache_s.cpudata")) lenptr = &array_table.kmem_cache_s_cpudata; else if (STREQ(s, "log_buf")) @@ -5766,6 +5784,8 @@ OFFSET(mm_struct_pgd)); fprintf(fp, " mm_struct_rss: %ld\n", OFFSET(mm_struct_rss)); + fprintf(fp, " mm_struct_anon_rss: %ld\n", + OFFSET(mm_struct_anon_rss)); fprintf(fp, " mm_struct_total_vm: %ld\n", OFFSET(mm_struct_total_vm)); fprintf(fp, " mm_struct_start_code: %ld\n", @@ -5972,6 +5992,16 @@ fprintf(fp, " irq_cpustat_t___softirq_mask: %ld\n", OFFSET(irq_cpustat_t___softirq_mask)); + fprintf(fp, " files_struct_fdt: %ld\n", + OFFSET(files_struct_fdt)); + fprintf(fp, " fdtable_max_fds: %ld\n", + OFFSET(fdtable_max_fds)); + fprintf(fp, " fdtable_max_fdset: %ld\n", + OFFSET(fdtable_max_fdset)); + fprintf(fp, " fdtable_open_fds: %ld\n", + OFFSET(fdtable_open_fds)); + fprintf(fp, " fdtable_fd: %ld\n", + OFFSET(fdtable_fd)); fprintf(fp, " files_struct_max_fds: %ld\n", OFFSET(files_struct_max_fds)); fprintf(fp, " files_struct_max_fdset: %ld\n", @@ -6217,6 +6247,11 @@ fprintf(fp, " inet_opt_num: %ld\n", OFFSET(inet_opt_num)); + fprintf(fp, " ipv6_pinfo_rcv_saddr: %ld\n", + OFFSET(ipv6_pinfo_rcv_saddr)); + fprintf(fp, " ipv6_pinfo_daddr: %ld\n", + OFFSET(ipv6_pinfo_daddr)); + fprintf(fp, " timer_list_list: %ld\n", OFFSET(timer_list_list)); fprintf(fp, " timer_list_next: %ld\n", @@ -6291,6 +6326,8 @@ OFFSET(zone_struct_size)); fprintf(fp, " zone_struct_memsize: %ld\n", OFFSET(zone_struct_memsize)); + fprintf(fp, " zone_struct_zone_start_pfn: %ld\n", + OFFSET(zone_struct_zone_start_pfn)); fprintf(fp, " zone_struct_zone_start_paddr: %ld\n", OFFSET(zone_struct_zone_start_paddr)); fprintf(fp, " zone_struct_zone_start_mapnr: %ld\n", @@ -6471,10 +6508,25 @@ OFFSET(x8664_pda_irqstackptr)); fprintf(fp, " x8664_pda_level4_pgt: %ld\n", OFFSET(x8664_pda_level4_pgt)); + fprintf(fp, " x8664_pda_me: %ld\n", + OFFSET(x8664_pda_me)); fprintf(fp, " tss_struct_ist: %ld\n", OFFSET(tss_struct_ist)); + fprintf(fp, " mem_section_section_mem_map: %ld\n", + OFFSET(mem_section_section_mem_map)); + fprintf(fp, " vcpu_guest_context_user_regs: %ld\n", + OFFSET(vcpu_guest_context_user_regs)); + fprintf(fp, " cpu_user_regs_eip: %ld\n", + OFFSET(cpu_user_regs_eip)); + fprintf(fp, " cpu_user_regs_esp: %ld\n", + OFFSET(cpu_user_regs_esp)); + fprintf(fp, " cpu_user_regs_rip: %ld\n", + OFFSET(cpu_user_regs_rip)); + fprintf(fp, " cpu_user_regs_rsp: %ld\n", + OFFSET(cpu_user_regs_rsp)); + fprintf(fp, "\n size_table:\n"); fprintf(fp, " page: %ld\n", SIZE(page)); @@ -6512,6 +6564,7 @@ fprintf(fp, " fs_struct: %ld\n", SIZE(fs_struct)); fprintf(fp, " files_struct: %ld\n", SIZE(files_struct)); + fprintf(fp, " fdtable: %ld\n", SIZE(fdtable)); fprintf(fp, " file: %ld\n", SIZE(file)); fprintf(fp, " inode: %ld\n", SIZE(inode)); fprintf(fp, " vfsmount: %ld\n", SIZE(vfsmount)); @@ -6546,6 +6599,7 @@ fprintf(fp, " sock: %ld\n", SIZE(sock)); fprintf(fp, " inet_sock: %ld\n", SIZE(inet_sock)); fprintf(fp, " socket: %ld\n", SIZE(socket)); + fprintf(fp, " in6_addr: %ld\n", SIZE(in6_addr)); fprintf(fp, " signal_struct: %ld\n", SIZE(signal_struct)); fprintf(fp, " signal_queue: %ld\n", @@ -6601,6 +6655,8 @@ fprintf(fp, " x8664_pda: %ld\n", SIZE(x8664_pda)); + fprintf(fp, " ppc64_paca: %ld\n", + SIZE(ppc64_paca)); fprintf(fp, " gate_struct: %ld\n", SIZE(gate_struct)); fprintf(fp, " tss_struct: %ld\n", @@ -6609,7 +6665,10 @@ SIZE(task_struct_start_time)); fprintf(fp, " cputime_t: %ld\n", SIZE(cputime_t)); - + fprintf(fp, " mem_section: %ld\n", + SIZE(mem_section)); + fprintf(fp, " pid_link: %ld\n", + SIZE(pid_link)); fprintf(fp, "\n array_table:\n"); /* --- crash/cmdline.c.orig 2006-05-15 17:21:18.000000000 -0400 +++ crash/cmdline.c 2005-11-18 10:49:59.000000000 -0500 @@ -71,14 +71,17 @@ * 4. from a terminal. * 5. from a pipe, if stdin is a pipe rather than a terminal. */ - if (pc->flags & RCHOME_IFILE) + if (pc->flags & RCHOME_IFILE) { sprintf(pc->command_line, "< %s/.%src", pc->home, pc->program_name); - else if (pc->flags & RCLOCAL_IFILE) + pc->flags |= INIT_IFILE; + } else if (pc->flags & RCLOCAL_IFILE) { sprintf(pc->command_line, "< .%src", pc->program_name); - else if (pc->flags & CMDLINE_IFILE) + pc->flags |= INIT_IFILE; + } else if (pc->flags & CMDLINE_IFILE) { sprintf(pc->command_line, "< %s", pc->input_file); - else if (pc->flags & TTY) { + pc->flags |= INIT_IFILE; + } else if (pc->flags & TTY) { if (!(pc->readline = readline(pc->prompt))) { args[0] = NULL; fprintf(fp, "\n"); @@ -918,7 +921,7 @@ wait_for_children(ZOMBIES_ONLY); - pc->flags &= ~(RUNTIME_IFILE|_SIGINT_); + pc->flags &= ~(INIT_IFILE|RUNTIME_IFILE|_SIGINT_); pc->sigint_cnt = 0; pc->redirect = 0; pc->pipe_command[0] = NULLCHAR; --- crash/lkcd_common.c.orig 2006-05-15 17:21:18.000000000 -0400 +++ crash/lkcd_common.c 2005-11-10 15:24:16.000000000 -0500 @@ -53,6 +53,8 @@ struct lkcd_environment lkcd_environment = { 0 }; struct lkcd_environment *lkcd = &lkcd_environment; +static int uncompress_errloc; +static int uncompress_recover(unsigned char *, ulong, unsigned char *, ulong); ulonglong fix_lkcd_address(ulonglong addr) @@ -208,6 +210,7 @@ case LKCD_DUMP_V8: case LKCD_DUMP_V9: + case LKCD_DUMP_V10: lkcd->version = LKCD_DUMP_V8; return TRUE; @@ -1164,40 +1167,103 @@ return 1; } +/* Returns the bit offset if it's able to correct, or negative if not */ +static int +uncompress_recover(unsigned char *dest, ulong destlen, + unsigned char *source, ulong sourcelen) +{ + int byte, bit; + ulong retlen = destlen; + int good_decomp = 0, good_rv = -1; + + /* Generate all single bit errors */ + if (sourcelen > 16384) { + lkcd_print("uncompress_recover: sourcelen %ld too long\n", + sourcelen); + return(-1); + } + for (byte = 0; byte < sourcelen; byte++) { + for (bit = 0; bit < 8; bit++) { + source[byte] ^= (1 << bit); + + if (uncompress(dest, &retlen, source, sourcelen) == Z_OK && + retlen == destlen) { + good_decomp++; + lkcd_print("good for flipping byte %d bit %d\n", + byte, bit); + good_rv = bit + byte * 8; + } + + /* Put it back */ + source[byte] ^= (1 << bit); + } + } + if (good_decomp == 0) { + lkcd_print("Could not correct gzip errors.\n"); + return -2; + } else if (good_decomp > 1) { + lkcd_print("Too many valid gzip decompressions: %d.\n", good_decomp); + return -3; + } else { + source[good_rv >> 8] ^= 1 << (good_rv % 8); + uncompress(dest, &retlen, source, sourcelen); + source[good_rv >> 8] ^= 1 << (good_rv % 8); + return good_rv; + } +} + + /* * Uncompress a gzip'd buffer. + * + * Returns FALSE on error. If set, then + * a non-negative value of uncompress_errloc indicates the location of + * a single-bit error, and the data may be used. */ static int lkcd_uncompress_gzip(unsigned char *dest, ulong destlen, unsigned char *source, ulong sourcelen) { ulong retlen = destlen; + int rc; switch (uncompress(dest, &retlen, source, sourcelen)) { case Z_OK: if (retlen == destlen) - return TRUE; + rc = TRUE; + break; lkcd_print("uncompress: returned length not page size: %ld\n", retlen); - return FALSE; + rc = FALSE; + break; case Z_MEM_ERROR: lkcd_print("uncompress: Z_MEM_ERROR (not enough memory)\n"); - return FALSE; + rc = FALSE; + break; case Z_BUF_ERROR: lkcd_print("uncompress: " "Z_BUF_ERROR (not enough room in output buffer)\n"); - return FALSE; + rc = FALSE; + break; case Z_DATA_ERROR: lkcd_print("uncompress: Z_DATA_ERROR (input data corrupted)\n"); - return FALSE; + rc = FALSE; + break; + default: + rc = FALSE; + break; } - return FALSE; + if (rc == FALSE) { + uncompress_errloc = + uncompress_recover(dest, destlen, source, sourcelen); + } + return rc; } @@ -1252,8 +1318,9 @@ dp_flags = lkcd->get_dp_flags(); dp_address = lkcd->get_dp_address(); - if (dp_flags & LKCD_DUMP_END) + if (dp_flags & LKCD_DUMP_END) { return LKCD_DUMPFILE_END; + } if ((lkcd->flags & LKCD_VALID) && (page > lkcd->total_pages)) lkcd->total_pages = page; --- crash/lkcd_v7.c.orig 2006-05-15 17:21:18.000000000 -0400 +++ crash/lkcd_v7.c 2005-11-10 15:21:50.000000000 -0500 @@ -89,7 +89,11 @@ ifd = 0; #ifdef LKCD_INDEX_FILE - lkcd->memory_pages = (dh->dh_memory_size * (getpagesize()/lkcd->page_size)) * 2; + if (dh->dh_memory_end < 0x1000000000LL) { + lkcd->memory_pages = dh->dh_memory_end / lkcd->page_size + 1; + } else { + lkcd->memory_pages = (dh->dh_memory_size * (getpagesize()/lkcd->page_size)) * 2; + } dump_index_size = (lkcd->memory_pages * sizeof(off_t)); lkcd->page_offsets = 0; strcpy(dumpfile_index_name, dumpfile); --- crash/lkcd_v8.c.orig 2006-05-15 17:21:18.000000000 -0400 +++ crash/lkcd_v8.c 2005-12-15 08:59:52.000000000 -0500 @@ -26,6 +26,7 @@ // static dump_header_asm_t dump_header_asm_v8 = { 0 }; static dump_page_t dump_page = { 0 }; static void mclx_cache_page_headers_v8(void); +static off_t lkcd_offset_to_first_page = LKCD_OFFSET_TO_FIRST_PAGE; /* * Verify and initialize the LKCD environment, storing the common data @@ -56,10 +57,13 @@ if (read(lkcd->fd, dh, sizeof(dump_header_t)) != sizeof(dump_header_t)) return FALSE; - if ((dh->dh_version & LKCD_DUMP_VERSION_NUMBER_MASK) == LKCD_DUMP_V9) + if ((dh->dh_version & LKCD_DUMP_VERSION_NUMBER_MASK) == LKCD_DUMP_V9){ if (read(lkcd->fd, &dh_dump_buffer_size, sizeof(dh_dump_buffer_size)) != sizeof(dh_dump_buffer_size)) return FALSE; + lkcd_offset_to_first_page = dh_dump_buffer_size; + } else + lkcd_offset_to_first_page = LKCD_OFFSET_TO_FIRST_PAGE; lkcd->dump_page = dp; lkcd->dump_header = dh; @@ -146,7 +150,7 @@ lkcd->compression = dh->dh_dump_compress; lkcd->page_header_size = sizeof(dump_page_t); - lseek(lkcd->fd, LKCD_OFFSET_TO_FIRST_PAGE, SEEK_SET); + lseek(lkcd->fd, lkcd_offset_to_first_page, SEEK_SET); /* * Read all of the pages and save the page offsets for lkcd_lseek(). @@ -483,7 +487,7 @@ /* * Determine the granularity between offsets. */ - if (lseek(lkcd->fd, page_headers[0] + LKCD_OFFSET_TO_FIRST_PAGE, + if (lseek(lkcd->fd, page_headers[0] + lkcd_offset_to_first_page, SEEK_SET) == -1) return; if (read(lkcd->fd, dp, lkcd->page_header_size) != @@ -491,7 +495,7 @@ return; physaddr1 = (dp->dp_address - lkcd->kvbase) << lkcd->page_shift; - if (lseek(lkcd->fd, page_headers[1] + LKCD_OFFSET_TO_FIRST_PAGE, + if (lseek(lkcd->fd, page_headers[1] + lkcd_offset_to_first_page, SEEK_SET) == -1) return; if (read(lkcd->fd, dp, lkcd->page_header_size) @@ -508,7 +512,7 @@ for (i = 0; i < (MCLX_PAGE_HEADERS-1); i++) { if (!page_headers[i]) break; - lkcd->curhdroffs = page_headers[i] + LKCD_OFFSET_TO_FIRST_PAGE; + lkcd->curhdroffs = page_headers[i] + lkcd_offset_to_first_page; set_mb_benchmark((granularity * (i+1))/lkcd->page_size); } } --- crash/s390_dump.c.orig 2006-05-15 17:21:18.000000000 -0400 +++ crash/s390_dump.c 2006-05-15 17:15:56.000000000 -0400 @@ -16,7 +16,7 @@ * GNU General Public License for more details. */ #include "defs.h" -#include +//#include #include "ibm_common.h" static FILE * s390_file; @@ -69,10 +69,13 @@ return WRITE_ERROR; } +#define S390_PAGE_SHIFT 12 +#define S390_PAGE_SIZE (1UL << S390_PAGE_SHIFT) + uint s390_page_size(void) { - return PAGE_SIZE; + return S390_PAGE_SIZE; } int --- crash/lkcd_x86_trace.c.orig 2006-05-15 17:21:18.000000000 -0400 +++ crash/lkcd_x86_trace.c 2006-05-15 16:28:35.000000000 -0400 @@ -47,11 +47,13 @@ static int setup_trace_rec(kaddr_t, kaddr_t, int, trace_t *); static int valid_ra(kaddr_t); static int valid_ra_function(kaddr_t, char *); +static int eframe_incr(kaddr_t, char *); static int find_trace(kaddr_t, kaddr_t, kaddr_t, kaddr_t, trace_t *, int); static void dump_stack_frame(trace_t *, sframe_t *, FILE *); static void print_trace(trace_t *, int, FILE *); struct pt_regs; static int eframe_type(struct pt_regs *); +char *funcname_display(char *); static void print_eframe(FILE *, struct pt_regs *); static void trace_banner(FILE *); static void print_kaddr(kaddr_t, FILE *, int); @@ -505,7 +507,7 @@ { "receive_chars", NULL, COMPILER_VERSION_EQUAL, GCC(2,96,0), 0, 0, 48 }, { "default_idle", NULL, - COMPILER_VERSION_START, GCC(3,3,2), 0, -4, 0 }, + COMPILER_VERSION_START, GCC(2,96,0), 0, -4, 0 }, { NULL, NULL, 0, 0, 0, 0, 0 }, }; @@ -1118,7 +1120,9 @@ } #include +#ifndef REDHAT #include +#endif #define KERNEL_EFRAME 0 #define USER_EFRAME 1 #define KERNEL_EFRAME_SZ 13 /* no ss and esp */ @@ -1153,6 +1157,9 @@ else if (((regs->xcs & 0xffff) == 0x60) && ((regs->xds & 0xffff) == 0x7b)) return KERNEL_EFRAME; + else if (XEN() && ((regs->xcs & 0xffff) == 0x61) && + ((regs->xds & 0xffff) == 0x7b)) + return KERNEL_EFRAME; #endif else if (((regs->xcs & 0xffff) == __USER_CS) && ((regs->xds & 0xffff) == __USER_DS)) @@ -1206,6 +1213,93 @@ } \ } #endif + +/* + * Determine how much to increment the stack pointer to find the + * exception frame associated with a generic "error_code" or "nmi" + * exception. + * + * The incoming addr is that of the call to the generic error_code + * or nmi exception handler function. Until later 2.6 kernels, the next + * instruction had always been an "addl $8,%esp". However, with later + * 2.6 kernels, that esp adjustment is no long valid, and there will be + * an immediate "jmp" instruction. Returns 4 or 12, whichever is appropriate. + * Cache the value the first time, and allow for future changes or additions. + */ + +#define NMI_ADJ (0) +#define ERROR_CODE_ADJ (1) +#define EFRAME_ADJUSTS (ERROR_CODE_ADJ+1) + +static int eframe_adjust[EFRAME_ADJUSTS] = { 0 }; + +static int +eframe_incr(kaddr_t addr, char *funcname) +{ + instr_rec_t irp; + kaddr_t next; + int size, adj, val; + + if (STRNEQ(funcname, "nmi")) { + adj = NMI_ADJ; + val = eframe_adjust[NMI_ADJ]; + } else if (strstr(funcname, "error_code")) { + adj = ERROR_CODE_ADJ; + val = eframe_adjust[ERROR_CODE_ADJ]; + } else { + adj = -1; + val = 0; + error(INFO, + "unexpected exception frame marker: %lx (%s)\n", + addr, funcname); + } + + if (val) { + console("eframe_incr(%lx, %s): eframe_adjust[%d]: %d\n", + addr, funcname, adj, val); + return val; + } + + console("eframe_incr(%lx, %s): TBD:\n", addr, funcname); + + bzero(&irp, sizeof(irp)); + irp.aflag = 1; + irp.dflag = 1; + if (!(size = get_instr_info(addr, &irp))) { + if (CRASHDEBUG(1)) + error(INFO, + "eframe_incr(%lx, %s): get_instr_info(%lx) failed\n", + addr, funcname, addr); + return((THIS_KERNEL_VERSION > LINUX(2,6,9)) ? 4 : 12); + } + console(" addr: %lx size: %d opcode: 0x%x insn: \"%s\"\n", + addr, size, irp.opcode, irp.opcodep->name); + + next = addr + size; + bzero(&irp, sizeof(irp)); + irp.aflag = 1; + irp.dflag = 1; + if (!(size = get_instr_info(next, &irp))) { + if (CRASHDEBUG(1)) + error(INFO, + "eframe_incr(%lx, %s): get_instr_info(%lx) failed\n", + addr, funcname, next); + return((THIS_KERNEL_VERSION > LINUX(2,6,9)) ? 4 : 12); + } + console(" next: %lx size: %d opcode: 0x%x insn: \"%s\"\n", + next, size, irp.opcode, irp.opcodep->name); + + if (STREQ(irp.opcodep->name, "jmp")) + val = 4; + else + val = 12; + + if (adj >= 0) + eframe_adjust[adj] = val; + + return val; +} + /* * find_trace() * @@ -1253,6 +1347,7 @@ int flag; int interrupted_system_call = FALSE; struct bt_info *bt = trace->bt; + struct pt_regs *pt; #endif sbp = trace->stack[curstkidx].ptr; sbase = trace->stack[curstkidx].addr; @@ -1503,12 +1598,13 @@ return(trace->nframes); #ifdef REDHAT } else if (strstr(func_name, "error_code") + || STREQ(func_name, "nmi_stack_correct") || STREQ(func_name, "nmi")) { #else } else if (strstr(func_name, "error_code")) { #endif /* an exception frame */ - sp = curframe->fp+12; + sp = curframe->fp + eframe_incr(pc, func_name); bp = sp + (KERNEL_EFRAME_SZ-1)*4; asp = (uaddr_t*)((uaddr_t)sbp + (STACK_SIZE - @@ -1572,6 +1668,22 @@ } } + /* + * Check for hypervisor_callback from user-space. + */ + if ((bt->flags & BT_XEN_STOP_THIS_CPU) && bt->tc->mm_struct && + STREQ(kl_funcname(curframe->pc), "hypervisor_callback")) { + pt = (struct pt_regs *)(curframe->asp+1); + if (eframe_type(pt) == USER_EFRAME) { + if (program_context.debug >= 1) /* pc above */ + error(INFO, + "hypervisor_callback from user space\n"); + curframe->asp++; + curframe->flag |= EX_FRAME; + return(trace->nframes); + } + } + /* Make sure our next frame pointer is valid (in the stack). */ if ((bp < sbase) || (bp >= saddr)) { @@ -1684,8 +1796,15 @@ (bt->flags & (BT_HARDIRQ|BT_SOFTIRQ))) return; - print_stack_entry(trace->bt, - trace->bt->flags & BT_BUMP_FRAME_LEVEL ? + if ((frmp->level == 0) && (bt->flags & BT_XEN_STOP_THIS_CPU)) { + print_stack_entry(trace->bt, 0, trace->bt->stkptr, + symbol_value("stop_this_cpu"), + value_symbol(symbol_value("stop_this_cpu")), + frmp, ofp); + } + + print_stack_entry(trace->bt, (trace->bt->flags & + (BT_BUMP_FRAME_LEVEL|BT_XEN_STOP_THIS_CPU)) ? frmp->level + 1 : frmp->level, fp ? (ulong)fp : trace->bt->stkptr, (ulong)frmp->pc, frmp->funcname, frmp, ofp); @@ -1708,6 +1827,10 @@ #endif if (frmp->flag & EX_FRAME) { pt = (struct pt_regs *)frmp->asp; + if (CRASHDEBUG(1)) + fprintf(ofp, + " EXCEPTION FRAME: %lx\n", + (unsigned long)frmp->sp); print_eframe(ofp, pt); } #ifdef REDHAT @@ -1789,6 +1912,114 @@ if (kt->flags & RA_SEEK) bt->flags |= BT_SPECULATE; + if (XENDUMP_DUMPFILE() && is_task_active(bt->task) && + STREQ(kl_funcname(bt->instptr), "stop_this_cpu")) { + /* + * bt->instptr of "stop_this_cpu" is not a return + * address -- replace it with the actual return + * address found at the bt->stkptr location. + */ + if (readmem((ulong)bt->stkptr, KVADDR, &eip, + sizeof(ulong), "xendump eip", RETURN_ON_ERROR)) + bt->instptr = eip; + bt->flags |= BT_XEN_STOP_THIS_CPU; + if (CRASHDEBUG(1)) + error(INFO, "replacing stop_this_cpu with %s\n", + kl_funcname(bt->instptr)); + } + + if (XENDUMP_DUMPFILE() && is_idle_thread(bt->task) && + is_task_active(bt->task) && + !(kt->xen_flags & XEN_SUSPEND) && + STREQ(kl_funcname(bt->instptr), "schedule")) { + /* + * This is an invalid (stale) schedule reference + * left in the task->thread. Move down the stack + * until the smp_call_function_interrupt return + * address is found. + */ + saddr = bt->stkptr; + while (readmem(saddr, KVADDR, &eip, + sizeof(ulong), "xendump esp", RETURN_ON_ERROR)) { + if (STREQ(kl_funcname(eip), "smp_call_function_interrupt")) { + bt->instptr = eip; + bt->stkptr = saddr; + bt->flags |= BT_XEN_STOP_THIS_CPU; + if (CRASHDEBUG(1)) + error(INFO, + "switch schedule to smp_call_function_interrupt\n"); + break; + } + saddr -= sizeof(void *); + if (saddr <= bt->stackbase) + break; + } + } + + if (XENDUMP_DUMPFILE() && is_idle_thread(bt->task) && + is_task_active(bt->task) && + (kt->xen_flags & XEN_SUSPEND) && + STREQ(kl_funcname(bt->instptr), "schedule")) { + int framesize = 0; + /* + * This is an invalid (stale) schedule reference + * left in the task->thread. Move down the stack + * until the hypercall_page() return address is + * found, and fix up its framesize as we go. + */ + saddr = bt->stacktop; + while (readmem(saddr, KVADDR, &eip, + sizeof(ulong), "xendump esp", RETURN_ON_ERROR)) { + + if (STREQ(kl_funcname(eip), "xen_idle")) + framesize += sizeof(ulong); + else if (framesize) + framesize += sizeof(ulong); + + if (STREQ(kl_funcname(eip), "hypercall_page")) { + int framesize = 24; + bt->instptr = eip; + bt->stkptr = saddr; + if (CRASHDEBUG(1)) + error(INFO, + "switch schedule to hypercall_page (framesize: %d)\n", + framesize); + FRAMESIZE_CACHE_ENTER(eip, &framesize); + break; + } + saddr -= sizeof(void *); + if (saddr <= bt->stackbase) + break; + } + } + + if (XENDUMP_DUMPFILE() && !is_idle_thread(bt->task) && + is_task_active(bt->task) && + STREQ(kl_funcname(bt->instptr), "schedule")) { + /* + * This is an invalid (stale) schedule reference + * left in the task->thread. Move down the stack + * until the smp_call_function_interrupt return + * address is found. + */ + saddr = bt->stacktop; + while (readmem(saddr, KVADDR, &eip, + sizeof(ulong), "xendump esp", RETURN_ON_ERROR)) { + if (STREQ(kl_funcname(eip), "smp_call_function_interrupt")) { + bt->instptr = eip; + bt->stkptr = saddr; + bt->flags |= BT_XEN_STOP_THIS_CPU; + if (CRASHDEBUG(1)) + error(INFO, + "switch schedule to smp_call_function_interrupt\n"); + break; + } + saddr -= sizeof(void *); + if (saddr <= bt->stackbase) + break; + } + } + if (!verify_back_trace(bt) && !recoverable(bt, ofp) && !BT_REFERENCE_CHECK(bt)) error(INFO, "cannot resolve stack trace:\n"); @@ -2192,11 +2423,12 @@ else buf[0] = NULLCHAR; - if ((sp = eframe_label(funcname, eip))) + if ((sp = eframe_label(funcname, eip))) funcname = sp->name; fprintf(ofp, "%s#%d [%8lx] %s%s at %lx\n", - level < 10 ? " " : "", level, esp, funcname, + level < 10 ? " " : "", level, esp, + funcname_display(funcname), strlen(buf) ? buf : "", eip); if (bt->flags & BT_LINE_NUMBERS) { @@ -2325,6 +2557,25 @@ } /* + * If it makes sense to display a different function/label name + * in a stack entry, it can be done here. Unlike eframe_label(), + * this routine won't cause the passed-in function name pointer + * to be changed -- this is strictly for display purposes only. + */ +char * +funcname_display(char *funcname) +{ + struct syment *sp; + + if (STREQ(funcname, "nmi_stack_correct") && + (sp = symbol_search("nmi"))) + return sp->name; + + return funcname; +} + + +/* * Cache 2k starting from the passed-in text address. This sits on top * of the instrbuf 256-byte cache, but we don't want to extend its size * because we can run off the end of a module segment -- if this routine --- crash/netdump.c.orig 2006-05-15 17:21:18.000000000 -0400 +++ crash/netdump.c 2006-02-14 14:36:21.000000000 -0500 @@ -22,12 +22,12 @@ physaddr_t phys_end; }; -struct netdump_data { +struct vmcore_data { ulong flags; int ndfd; FILE *ofp; uint header_size; - char *netdump_header; + char *elf_header; uint num_pt_load_segments; struct pt_load_segment *pt_load_segments; Elf32_Ehdr *elf32; @@ -40,11 +40,14 @@ void *nt_prpsinfo; void *nt_taskstruct; ulong task_struct; + uint page_size; ulong switch_stack; + uint num_prstatus_notes; + void *nt_prstatus_percpu[NR_CPUS]; }; -static struct netdump_data netdump_data = { 0 }; -static struct netdump_data *nd = &netdump_data; +static struct vmcore_data vmcore_data = { 0 }; +static struct vmcore_data *nd = &vmcore_data; static void netdump_print(char *, ...); static void dump_Elf32_Ehdr(Elf32_Ehdr *); static void dump_Elf32_Phdr(Elf32_Phdr *, int); @@ -52,19 +55,19 @@ static void dump_Elf64_Ehdr(Elf64_Ehdr *); static void dump_Elf64_Phdr(Elf64_Phdr *, int); static size_t dump_Elf64_Nhdr(Elf64_Off offset, int); -static void get_netdump_regs_x86(struct bt_info *, ulong *, ulong *); -static void get_netdump_regs_x86_64(struct bt_info *, ulong *, ulong *); static void get_netdump_regs_ppc64(struct bt_info *, ulong *, ulong *); #define ELFSTORE 1 #define ELFREAD 0 + +#define MIN_PAGE_SIZE (4096) /* - * Determine whether a file is a netdump creation, and if TRUE, - * initialize the netdump_data structure. + * Determine whether a file is a netdump/diskdump/kdump creation, + * and if TRUE, initialize the vmcore_data structure. */ int -is_netdump(char *file, ulong source) +is_netdump(char *file, ulong source_query) { int i; int fd; @@ -77,6 +80,8 @@ size_t size, len, tot; Elf32_Off offset32; Elf64_Off offset64; + ulong tmp_flags; + char *tmp_elf_header; if ((fd = open(file, O_RDWR)) < 0) { if ((fd = open(file, O_RDONLY)) < 0) { @@ -99,11 +104,24 @@ goto bailout; } + tmp_flags = 0; elf32 = (Elf32_Ehdr *)&header[0]; elf64 = (Elf64_Ehdr *)&header[0]; /* - * Verify the ELF header + * Verify the ELF header, and determine the dumpfile format. + * + * For now, kdump vmcores differ from netdump/diskdump like so: + * + * 1. The first kdump PT_LOAD segment is packed just after + * the ELF header, whereas netdump/diskdump page-align + * the first PT_LOAD segment. + * 2. Each kdump PT_LOAD segment has a p_align field of zero, + * whereas netdump/diskdump have their p_align fields set + * to the system page-size. + * + * If either kdump difference is seen, presume kdump -- this + * is obviously subject to change. */ if (STRNEQ(elf32->e_ident, ELFMAG) && (elf32->e_ident[EI_CLASS] == ELFCLASS32) && @@ -120,10 +138,16 @@ default: goto bailout; } - nd->flags |= NETDUMP_ELF32; + load32 = (Elf32_Phdr *) &header[sizeof(Elf32_Ehdr)+sizeof(Elf32_Phdr)]; size = (size_t)load32->p_offset; + + if ((load32->p_offset & (MIN_PAGE_SIZE-1)) && + (load32->p_align == 0)) + tmp_flags |= KDUMP_ELF32; + else + tmp_flags |= NETDUMP_ELF32; } else if (STRNEQ(elf64->e_ident, ELFMAG) && (elf64->e_ident[EI_CLASS] == ELFCLASS64) && (elf64->e_ident[EI_VERSION] == EV_CURRENT) && @@ -153,35 +177,68 @@ else goto bailout; + case EM_386: + if ((elf64->e_ident[EI_DATA] == ELFDATA2LSB) && + machine_type("X86")) + break; + else + goto bailout; + default: goto bailout; } - nd->flags |= NETDUMP_ELF64; + load64 = (Elf64_Phdr *) &header[sizeof(Elf64_Ehdr)+sizeof(Elf64_Phdr)]; size = (size_t)load64->p_offset; + if ((load64->p_offset & (MIN_PAGE_SIZE-1)) && + (load64->p_align == 0)) + tmp_flags |= KDUMP_ELF64; + else + tmp_flags |= NETDUMP_ELF64; } else goto bailout; - if ((nd->netdump_header = (char *)malloc(size)) == NULL) { - fprintf(stderr, "cannot malloc netdump header buffer\n"); + switch (DUMPFILE_FORMAT(tmp_flags)) + { + case NETDUMP_ELF32: + case NETDUMP_ELF64: + if (source_query & (NETDUMP_LOCAL|NETDUMP_REMOTE)) + break; + else + goto bailout; + + case KDUMP_ELF32: + case KDUMP_ELF64: + if (source_query & KDUMP_LOCAL) + break; + else + goto bailout; + } + + if ((tmp_elf_header = (char *)malloc(size)) == NULL) { + fprintf(stderr, "cannot malloc ELF header buffer\n"); clean_exit(1); } - if (read(fd, nd->netdump_header, size) != size) { + if (read(fd, tmp_elf_header, size) != size) { sprintf(buf, "%s: read", file); perror(buf); + free(tmp_elf_header); goto bailout; } nd->ndfd = fd; - nd->flags |= source; + nd->elf_header = tmp_elf_header; + nd->flags = tmp_flags; + nd->flags |= source_query; - switch (nd->flags & (NETDUMP_ELF32|NETDUMP_ELF64)) + switch (DUMPFILE_FORMAT(nd->flags)) { case NETDUMP_ELF32: + case KDUMP_ELF32: nd->header_size = load32->p_offset; - nd->elf32 = (Elf32_Ehdr *)&nd->netdump_header[0]; + nd->elf32 = (Elf32_Ehdr *)&nd->elf_header[0]; nd->num_pt_load_segments = nd->elf32->e_phnum - 1; if ((nd->pt_load_segments = (struct pt_load_segment *) malloc(sizeof(struct pt_load_segment) * @@ -190,9 +247,11 @@ clean_exit(1); } nd->notes32 = (Elf32_Phdr *) - &nd->netdump_header[sizeof(Elf32_Ehdr)]; + &nd->elf_header[sizeof(Elf32_Ehdr)]; nd->load32 = (Elf32_Phdr *) - &nd->netdump_header[sizeof(Elf32_Ehdr)+sizeof(Elf32_Phdr)]; + &nd->elf_header[sizeof(Elf32_Ehdr)+sizeof(Elf32_Phdr)]; + if (DUMPFILE_FORMAT(nd->flags) == NETDUMP_ELF32) + nd->page_size = (uint)nd->load32->p_align; dump_Elf32_Ehdr(nd->elf32); dump_Elf32_Phdr(nd->notes32, ELFREAD); for (i = 0; i < nd->num_pt_load_segments; i++) @@ -205,8 +264,9 @@ break; case NETDUMP_ELF64: + case KDUMP_ELF64: nd->header_size = load64->p_offset; - nd->elf64 = (Elf64_Ehdr *)&nd->netdump_header[0]; + nd->elf64 = (Elf64_Ehdr *)&nd->elf_header[0]; nd->num_pt_load_segments = nd->elf64->e_phnum - 1; if ((nd->pt_load_segments = (struct pt_load_segment *) malloc(sizeof(struct pt_load_segment) * @@ -215,9 +275,11 @@ clean_exit(1); } nd->notes64 = (Elf64_Phdr *) - &nd->netdump_header[sizeof(Elf64_Ehdr)]; + &nd->elf_header[sizeof(Elf64_Ehdr)]; nd->load64 = (Elf64_Phdr *) - &nd->netdump_header[sizeof(Elf64_Ehdr)+sizeof(Elf64_Phdr)]; + &nd->elf_header[sizeof(Elf64_Ehdr)+sizeof(Elf64_Phdr)]; + if (DUMPFILE_FORMAT(nd->flags) == NETDUMP_ELF64) + nd->page_size = (uint)nd->load64->p_align; dump_Elf64_Ehdr(nd->elf64); dump_Elf64_Phdr(nd->notes64, ELFREAD); for (i = 0; i < nd->num_pt_load_segments; i++) @@ -230,6 +292,9 @@ break; } + if (CRASHDEBUG(1)) + netdump_memory_dump(fp); + return nd->header_size; bailout: @@ -243,7 +308,7 @@ int netdump_init(char *unused, FILE *fptr) { - if (!NETDUMP_VALID()) + if (!VMCORE_VALID()) return FALSE; nd->ofp = fptr; @@ -263,19 +328,19 @@ /* * The Elf32_Phdr has 32-bit fields for p_paddr, p_filesz and * p_memsz, so for now, multiple PT_LOAD segment support is - * restricted to 64-bit machines. Until a "standard" becomes - * available in the future that deals with physical memory - * segments that start at greater then 4GB, or memory segments - * sizes that are greater than 4GB (kexec?), then this feature - * is restricted to 64-bit machines. + * restricted to 64-bit machines for netdump/diskdump vmcores. + * However, kexec/kdump has introduced the optional use of a + * 64-bit ELF header for 32-bit processors. */ - switch (nd->flags & (NETDUMP_ELF32|NETDUMP_ELF64)) + switch (DUMPFILE_FORMAT(nd->flags)) { case NETDUMP_ELF32: offset = (off_t)paddr + (off_t)nd->header_size; break; case NETDUMP_ELF64: + case KDUMP_ELF32: + case KDUMP_ELF64: if (nd->num_pt_load_segments == 1) { offset = (off_t)paddr + (off_t)nd->header_size; break; @@ -302,24 +367,57 @@ if (read(nd->ndfd, bufptr, cnt) != cnt) return READ_ERROR; + return cnt; } /* - * Write to a netdump-created dumpfile. + * Write to a netdump-created dumpfile. Note that cmd_wr() does not + * allow writes to dumpfiles, so you can't get here from there. + * But, if it would ever be helpful, here it is... */ int write_netdump(int fd, void *bufptr, int cnt, ulong addr, physaddr_t paddr) { off_t offset; + struct pt_load_segment *pls; + int i; + + switch (DUMPFILE_FORMAT(nd->flags)) + { + case NETDUMP_ELF32: + offset = (off_t)paddr + (off_t)nd->header_size; + break; - offset = (off_t)paddr + (off_t)nd->header_size; + case NETDUMP_ELF64: + case KDUMP_ELF32: + case KDUMP_ELF64: + if (nd->num_pt_load_segments == 1) { + offset = (off_t)paddr + (off_t)nd->header_size; + break; + } - if (lseek(nd->ndfd, offset, SEEK_SET) != offset) + for (i = offset = 0; i < nd->num_pt_load_segments; i++) { + pls = &nd->pt_load_segments[i]; + if ((paddr >= pls->phys_start) && + (paddr < pls->phys_end)) { + offset = (off_t)(paddr - pls->phys_start) + + pls->file_offset; + break; + } + } + + if (!offset) + return READ_ERROR; + + break; + } + + if (lseek(nd->ndfd, offset, SEEK_SET) == -1) return SEEK_ERROR; if (write(nd->ndfd, bufptr, cnt) != cnt) - return WRITE_ERROR; + return READ_ERROR; return cnt; } @@ -330,7 +428,7 @@ FILE * set_netdump_fp(FILE *fp) { - if (!NETDUMP_VALID()) + if (!VMCORE_VALID()) return NULL; nd->ofp = fp; @@ -346,7 +444,7 @@ char buf[BUFSIZE]; va_list ap; - if (!fmt || !strlen(fmt) || !NETDUMP_VALID()) + if (!fmt || !strlen(fmt) || !VMCORE_VALID()) return; va_start(ap, fmt); @@ -362,33 +460,21 @@ uint netdump_page_size(void) { - uint pagesz; - - if (!NETDUMP_VALID()) + if (!VMCORE_VALID()) return 0; - switch (nd->flags & (NETDUMP_ELF32|NETDUMP_ELF64)) - { - case NETDUMP_ELF32: - pagesz = (uint)nd->load32->p_align; - break; - case NETDUMP_ELF64: - pagesz = (uint)nd->load64->p_align; - break; - } - - return pagesz; + return nd->page_size; } int netdump_free_memory(void) { - return (NETDUMP_VALID() ? 0 : 0); + return (VMCORE_VALID() ? 0 : 0); } int netdump_memory_used(void) { - return (NETDUMP_VALID() ? 0 : 0); + return (VMCORE_VALID() ? 0 : 0); } /* @@ -414,21 +500,57 @@ #ifdef DAEMON return nd->task_struct; #else - int i; + int i, crashing_cpu; size_t len; char *user_regs; ulong ebp, esp, task; - if (!NETDUMP_VALID() || !get_active_set()) - return NO_TASK; + if (!VMCORE_VALID() || !get_active_set()) + goto panic_task_undetermined; - if (nd->task_struct) + if (nd->task_struct) { + if (CRASHDEBUG(1)) + error(INFO, + "get_netdump_panic_task: NT_TASKSTRUCT: %lx\n", + nd->task_struct); return nd->task_struct; + } + + switch (DUMPFILE_FORMAT(nd->flags)) + { + case NETDUMP_ELF32: + case NETDUMP_ELF64: + crashing_cpu = -1; + break; + + case KDUMP_ELF32: + case KDUMP_ELF64: + crashing_cpu = -1; + if (symbol_exists("crashing_cpu")) { + get_symbol_data("crashing_cpu", sizeof(int), &i); + if ((i >= 0) && (i < nd->num_prstatus_notes)) { + crashing_cpu = i; + if (CRASHDEBUG(1)) + error(INFO, + "get_netdump_panic_task: crashing_cpu: %d\n", + crashing_cpu); + } + } + + if ((nd->num_prstatus_notes > 1) && (crashing_cpu == -1)) + goto panic_task_undetermined; + break; + } + + if (nd->elf32 && (nd->elf32->e_machine == EM_386)) { + Elf32_Nhdr *note32; + + if ((nd->num_prstatus_notes > 1) && (crashing_cpu != -1)) + note32 = (Elf32_Nhdr *) + nd->nt_prstatus_percpu[crashing_cpu]; + else + note32 = (Elf32_Nhdr *)nd->nt_prstatus; - if (nd->elf32 && nd->elf32->e_machine == EM_386) { - Elf32_Nhdr *note32 = (Elf32_Nhdr *) - ((char *)nd->elf32 + nd->notes32->p_offset); - len = sizeof(Elf32_Nhdr); len = roundup(len + note32->n_namesz, 4); len = roundup(len + note32->n_descsz, 4); @@ -437,14 +559,15 @@ - SIZE(user_regs_struct) - sizeof(int); ebp = ULONG(user_regs + OFFSET(user_regs_struct_ebp)); esp = ULONG(user_regs + OFFSET(user_regs_struct_esp)); +check_ebp_esp: if (CRASHDEBUG(1)) - fprintf(fp, - "get_netdump_panic_task: esp: %lx ebp: %lx\n", + error(INFO, + "get_netdump_panic_task: NT_PRSTATUS esp: %lx ebp: %lx\n", esp, ebp); if (IS_KVADDR(esp)) { task = stkptr_to_task(esp); if (CRASHDEBUG(1)) - fprintf(fp, + error(INFO, "get_netdump_panic_task: esp: %lx -> task: %lx\n", esp, task); for (i = 0; task && (i < NR_CPUS); i++) { @@ -455,7 +578,7 @@ if (IS_KVADDR(ebp)) { task = stkptr_to_task(ebp); if (CRASHDEBUG(1)) - fprintf(fp, + error(INFO, "get_netdump_panic_task: ebp: %lx -> task: %lx\n", ebp, task); for (i = 0; task && (i < NR_CPUS); i++) { @@ -464,25 +587,37 @@ } } } else if (nd->elf64) { - Elf64_Nhdr *note64 = (Elf64_Nhdr *) - ((char *)nd->elf64 + nd->notes64->p_offset); - + Elf64_Nhdr *note64; + + if ((nd->num_prstatus_notes > 1) && (crashing_cpu != -1)) + note64 = (Elf64_Nhdr *) + nd->nt_prstatus_percpu[crashing_cpu]; + else + note64 = (Elf64_Nhdr *)nd->nt_prstatus; + len = sizeof(Elf64_Nhdr); len = roundup(len + note64->n_namesz, 4); user_regs = (char *)((char *)note64 + len + MEMBER_OFFSET("elf_prstatus", "pr_reg")); + + if (nd->elf64->e_machine == EM_386) { + ebp = ULONG(user_regs + OFFSET(user_regs_struct_ebp)); + esp = ULONG(user_regs + OFFSET(user_regs_struct_esp)); + goto check_ebp_esp; + } + if (nd->elf64->e_machine == EM_PPC64) { /* * Get the GPR1 register value. */ esp = *(ulong *)((char *)user_regs + 8); if (CRASHDEBUG(1)) - fprintf(fp, - "get_netdump_panic_task: esp: %lx\n", esp); + error(INFO, + "get_netdump_panic_task: NT_PRSTATUS esp: %lx\n", esp); if (IS_KVADDR(esp)) { task = stkptr_to_task(esp); if (CRASHDEBUG(1)) - fprintf(fp, + error(INFO, "get_netdump_panic_task: esp: %lx -> task: %lx\n", esp, task); for (i = 0; task && (i < NR_CPUS); i++) { @@ -493,8 +628,10 @@ } } +panic_task_undetermined: + if (CRASHDEBUG(1)) - fprintf(fp, "get_netdump_panic_task: returning NO_TASK\n"); + error(INFO, "get_netdump_panic_task: failed\n"); return NO_TASK; #endif @@ -512,7 +649,7 @@ return nd->switch_stack; return 0; #else - if (!NETDUMP_VALID() || !get_active_set()) + if (!VMCORE_VALID() || !get_active_set()) return 0; if (nd->task_struct == task) @@ -525,30 +662,36 @@ int netdump_memory_dump(FILE *fp) { - int i, others; + int i, others, wrap, flen; size_t len, tot; FILE *fpsave; Elf32_Off offset32; Elf32_Off offset64; struct pt_load_segment *pls; - if (!NETDUMP_VALID()) + if (!VMCORE_VALID()) return FALSE; fpsave = nd->ofp; nd->ofp = fp; - netdump_print("netdump_data: \n"); + netdump_print("vmcore_data: \n"); netdump_print(" flags: %lx (", nd->flags); others = 0; if (nd->flags & NETDUMP_LOCAL) netdump_print("%sNETDUMP_LOCAL", others++ ? "|" : ""); + if (nd->flags & KDUMP_LOCAL) + netdump_print("%sKDUMP_LOCAL", others++ ? "|" : ""); if (nd->flags & NETDUMP_REMOTE) netdump_print("%sNETDUMP_REMOTE", others++ ? "|" : ""); if (nd->flags & NETDUMP_ELF32) netdump_print("%sNETDUMP_ELF32", others++ ? "|" : ""); if (nd->flags & NETDUMP_ELF64) netdump_print("%sNETDUMP_ELF64", others++ ? "|" : ""); + if (nd->flags & KDUMP_ELF32) + netdump_print("%sKDUMP_ELF32", others++ ? "|" : ""); + if (nd->flags & KDUMP_ELF64) + netdump_print("%sKDUMP_ELF64", others++ ? "|" : ""); if (nd->flags & PARTIAL_DUMP) netdump_print("%sPARTIAL_DUMP", others++ ? "|" : ""); netdump_print(")\n"); @@ -566,7 +709,7 @@ netdump_print(" phys_end: %llx\n", pls->phys_end); } - netdump_print(" netdump_header: %lx\n", nd->netdump_header); + netdump_print(" elf_header: %lx\n", nd->elf_header); netdump_print(" elf32: %lx\n", nd->elf32); netdump_print(" notes32: %lx\n", nd->notes32); netdump_print(" load32: %lx\n", nd->load32); @@ -577,11 +720,28 @@ netdump_print(" nt_prpsinfo: %lx\n", nd->nt_prpsinfo); netdump_print(" nt_taskstruct: %lx\n", nd->nt_taskstruct); netdump_print(" task_struct: %lx\n", nd->task_struct); - netdump_print(" switch_stack: %lx\n\n", nd->switch_stack); + netdump_print(" page_size: %d\n", nd->page_size); + netdump_print(" switch_stack: %lx\n", nd->switch_stack); + netdump_print(" num_prstatus_notes: %d\n", nd->num_prstatus_notes); + netdump_print(" nt_prstatus_percpu: "); + wrap = sizeof(void *) == SIZEOF_32BIT ? 8 : 4; + flen = sizeof(void *) == SIZEOF_32BIT ? 8 : 16; + if (nd->num_prstatus_notes == 1) + netdump_print("%.*lx\n", flen, nd->nt_prstatus_percpu[0]); + else { + for (i = 0; i < nd->num_prstatus_notes; i++) { + if ((i % wrap) == 0) + netdump_print("\n "); + netdump_print("%.*lx ", flen, + nd->nt_prstatus_percpu[i]); + } + } + netdump_print("\n\n"); - switch (nd->flags & (NETDUMP_ELF32|NETDUMP_ELF64)) + switch (DUMPFILE_FORMAT(nd->flags)) { case NETDUMP_ELF32: + case KDUMP_ELF32: dump_Elf32_Ehdr(nd->elf32); dump_Elf32_Phdr(nd->notes32, ELFREAD); for (i = 0; i < nd->num_pt_load_segments; i++) @@ -594,6 +754,7 @@ break; case NETDUMP_ELF64: + case KDUMP_ELF64: dump_Elf64_Ehdr(nd->elf64); dump_Elf64_Phdr(nd->notes64, ELFREAD); for (i = 0; i < nd->num_pt_load_segments; i++) @@ -865,6 +1026,9 @@ netdump_print(" e_machine: %d ", elf->e_machine); switch (elf->e_machine) { + case EM_386: + netdump_print("(EM_386)\n"); + break; case EM_IA_64: netdump_print("(EM_IA_64)\n"); break; @@ -1061,7 +1225,7 @@ */ static size_t -dump_Elf32_Nhdr(Elf32_Off offset, int store_addresses) +dump_Elf32_Nhdr(Elf32_Off offset, int store) { int i, lf; Elf32_Nhdr *note; @@ -1085,17 +1249,26 @@ { case NT_PRSTATUS: netdump_print("(NT_PRSTATUS)\n"); - if (store_addresses) - nd->nt_prstatus = (void *)note; + if (store) { + if (!nd->nt_prstatus) + nd->nt_prstatus = (void *)note; + for (i = 0; i < NR_CPUS; i++) { + if (!nd->nt_prstatus_percpu[i]) { + nd->nt_prstatus_percpu[i] = (void *)note; + nd->num_prstatus_notes++; + break; + } + } + } break; case NT_PRPSINFO: netdump_print("(NT_PRPSINFO)\n"); - if (store_addresses) + if (store) nd->nt_prpsinfo = (void *)note; break; case NT_TASKSTRUCT: netdump_print("(NT_TASKSTRUCT)\n"); - if (store_addresses) { + if (store) { nd->nt_taskstruct = (void *)note; nd->task_struct = *((ulong *)(ptr + note->n_namesz)); nd->switch_stack = *((ulong *) @@ -1105,14 +1278,36 @@ case NT_DISKDUMP: netdump_print("(NT_DISKDUMP)\n"); uptr = (ulong *)(ptr + note->n_namesz); - if (*uptr) + if (*uptr && store) nd->flags |= PARTIAL_DUMP; break; +#ifdef NOTDEF + /* + * Note: Based upon the original, abandoned, proposal for + * its contents -- keep around for potential future use. + */ + case NT_KDUMPINFO: + netdump_print("(NT_KDUMPINFO)\n"); + if (store) { + uptr = (note->n_namesz == 5) ? + (ulong *)(ptr + ((note->n_namesz + 3) & ~3)) : + (ulong *)(ptr + note->n_namesz); + nd->page_size = (uint)(1 << *uptr); + uptr++; + nd->task_struct = *uptr; + } + break; +#endif default: netdump_print("(?)\n"); } uptr = (ulong *)(ptr + note->n_namesz); + /* + * kdumps are off-by-1, because their n_namesz is 5 for "CORE". + */ + if ((nd->flags & KDUMP_ELF32) && (note->n_namesz == 5)) + uptr = (ulong *)(ptr + ((note->n_namesz + 3) & ~3)); for (i = lf = 0; i < note->n_descsz/sizeof(ulong); i++) { if (((i%4)==0)) { netdump_print("%s ", @@ -1135,7 +1330,7 @@ static size_t -dump_Elf64_Nhdr(Elf64_Off offset, int store_addresses) +dump_Elf64_Nhdr(Elf64_Off offset, int store) { int i, lf; Elf64_Nhdr *note; @@ -1160,17 +1355,26 @@ { case NT_PRSTATUS: netdump_print("(NT_PRSTATUS)\n"); - if (store_addresses) - nd->nt_prstatus = (void *)note; + if (store) { + if (!nd->nt_prstatus) + nd->nt_prstatus = (void *)note; + for (i = 0; i < NR_CPUS; i++) { + if (!nd->nt_prstatus_percpu[i]) { + nd->nt_prstatus_percpu[i] = (void *)note; + nd->num_prstatus_notes++; + break; + } + } + } break; case NT_PRPSINFO: netdump_print("(NT_PRPSINFO)\n"); - if (store_addresses) + if (store) nd->nt_prpsinfo = (void *)note; break; case NT_TASKSTRUCT: netdump_print("(NT_TASKSTRUCT)\n"); - if (store_addresses) { + if (store) { nd->nt_taskstruct = (void *)note; nd->task_struct = *((ulong *)(ptr + note->n_namesz)); nd->switch_stack = *((ulong *) @@ -1180,16 +1384,49 @@ case NT_DISKDUMP: netdump_print("(NT_DISKDUMP)\n"); iptr = (int *)(ptr + note->n_namesz); - if (*iptr) + if (*iptr && store) nd->flags |= PARTIAL_DUMP; if (note->n_descsz < sizeof(ulonglong)) netdump_print(" %08x", *iptr); break; +#ifdef NOTDEF + /* + * Note: Based upon the original, abandoned, proposal for + * its contents -- keep around for potential future use. + */ + case NT_KDUMPINFO: + netdump_print("(NT_KDUMPINFO)\n"); + if (store) { + uint32_t *u32ptr; + + if (nd->elf64->e_machine == EM_386) { + u32ptr = (note->n_namesz == 5) ? + (uint *)(ptr + ((note->n_namesz + 3) & ~3)) : + (uint *)(ptr + note->n_namesz); + nd->page_size = 1 << *u32ptr; + u32ptr++; + nd->task_struct = *u32ptr; + } else { + uptr = (note->n_namesz == 5) ? + (ulonglong *)(ptr + ((note->n_namesz + 3) & ~3)) : + (ulonglong *)(ptr + note->n_namesz); + nd->page_size = (uint)(1 << *uptr); + uptr++; + nd->task_struct = *uptr; + } + } + break; +#endif default: netdump_print("(?)\n"); } uptr = (ulonglong *)(ptr + note->n_namesz); + /* + * kdumps are off-by-1, because their n_namesz is 5 for "CORE". + */ + if ((nd->flags & KDUMP_ELF64) && (note->n_namesz == 5)) + uptr = (ulonglong *)(ptr + ((note->n_namesz + 3) & ~3)); for (i = lf = 0; i < note->n_descsz/sizeof(ulonglong); i++) { if (((i%2)==0)) { netdump_print("%s ", @@ -1251,12 +1488,12 @@ default: error(FATAL, - "netdump support for ELF machine type %d not available\n", + "support for ELF machine type %d not available\n", e_machine); } } -static void +void get_netdump_regs_x86_64(struct bt_info *bt, ulong *ripp, ulong *rspp) { Elf64_Nhdr *note; @@ -1267,8 +1504,13 @@ if (is_task_active(bt->task)) bt->flags |= BT_DUMPFILE_SEARCH; - if (VALID_STRUCT(user_regs_struct) && (bt->task == tt->panic_task)) { - note = (Elf64_Nhdr *)nd->nt_prstatus; + if ((NETDUMP_DUMPFILE() || KDUMP_DUMPFILE()) && + VALID_STRUCT(user_regs_struct) && (bt->task == tt->panic_task)) { + if (nd->num_prstatus_notes > 1) + note = (Elf64_Nhdr *) + nd->nt_prstatus_percpu[bt->tc->processor]; + else + note = (Elf64_Nhdr *)nd->nt_prstatus; len = sizeof(Elf64_Nhdr); len = roundup(len + note->n_namesz, 4); @@ -1295,7 +1537,7 @@ * the raw stack for some reasonable hooks. */ -static void +void get_netdump_regs_x86(struct bt_info *bt, ulong *eip, ulong *esp) { int i, search, panic; @@ -1320,6 +1562,7 @@ if (STREQ(sym, "netconsole_netdump") || STREQ(sym, "netpoll_start_netdump") || STREQ(sym, "start_disk_dump") || + STREQ(sym, "crash_kexec") || STREQ(sym, "disk_dump")) { *eip = *up; *esp = search ? @@ -1354,7 +1597,7 @@ next_sysrq: *eip = *up; *esp = bt->stackbase + ((char *)(up+4) - bt->stackbuf); - machdep->flags |= SYSRQ; + pc->flags |= SYSRQ; for (i++, up++; i < LONGS_PER_STACK; i++, up++) { sym = closest_symbol(*up); if (STREQ(sym, "sysrq_handle_crash")) @@ -1371,7 +1614,15 @@ *esp = search ? bt->stackbase + ((char *)(up+1) - bt->stackbuf) : *(up-1); - machdep->flags |= SYSRQ; + pc->flags |= SYSRQ; + return; + } + + if (STREQ(sym, "crash_nmi_callback")) { + *eip = *up; + *esp = search ? + bt->stackbase + ((char *)(up+1) - bt->stackbuf) : + *(up-1); return; } @@ -1418,7 +1669,7 @@ goto retry; } - console("get_netdump_regs_x86: cannot find anything useful\n"); + console("get_netdump_regs_x86: cannot find anything useful for task: %lx\n", bt->task); machdep->get_stack_frame(bt, eip, esp); } @@ -1429,8 +1680,18 @@ Elf64_Nhdr *note; size_t len; - if (bt->task == tt->panic_task) { - note = (Elf64_Nhdr *)nd->nt_prstatus; + if ((bt->task == tt->panic_task) || + (is_task_active(bt->task) && nd->num_prstatus_notes > 1)) { + /* + * Registers are saved during the dump process for the + * panic task. Whereas in kdump, regs are captured for all + * CPUs if they responded to an IPI. + */ + if (nd->num_prstatus_notes > 1) + note = (Elf64_Nhdr *) + nd->nt_prstatus_percpu[bt->tc->processor]; + else + note = (Elf64_Nhdr *)nd->nt_prstatus; len = sizeof(Elf64_Nhdr); len = roundup(len + note->n_namesz, 4); @@ -1446,3 +1707,78 @@ { return (nd->flags & PARTIAL_DUMP ? TRUE : FALSE); } + + +/* + * kexec/kdump generated vmcore files are similar enough in + * nature to netdump/diskdump such that most vmcore access + * functionality may be borrowed from the equivalent netdump + * function. If not, re-work them here. + */ +int +is_kdump(char *file, ulong source_query) +{ + return is_netdump(file, source_query); +} + +int +kdump_init(char *unused, FILE *fptr) +{ + return netdump_init(unused, fptr); +} + +ulong +get_kdump_panic_task(void) +{ + return get_netdump_panic_task(); +} + +int +read_kdump(int fd, void *bufptr, int cnt, ulong addr, physaddr_t paddr) +{ + return read_netdump(fd, bufptr, cnt, addr, paddr); +} + +int +write_kdump(int fd, void *bufptr, int cnt, ulong addr, physaddr_t paddr) +{ + return write_netdump(fd, bufptr, cnt, addr, paddr); +} + +void +get_kdump_regs(struct bt_info *bt, ulong *eip, ulong *esp) +{ + get_netdump_regs(bt, eip, esp); +} + +uint +kdump_page_size(void) +{ + uint pagesz; + + if (!VMCORE_VALID()) + return 0; + + if (!(pagesz = nd->page_size)) + pagesz = (uint)getpagesize(); + + return pagesz; +} + +int +kdump_free_memory(void) +{ + return netdump_free_memory(); +} + +int +kdump_memory_used(void) +{ + return netdump_memory_used(); +} + +int +kdump_memory_dump(FILE *fp) +{ + return netdump_memory_dump(fp); +} --- crash/diskdump.c.orig 2006-05-15 17:21:18.000000000 -0400 +++ crash/diskdump.c 2006-03-23 14:29:25.000000000 -0500 @@ -1,16 +1,16 @@ /* * diskdump.c * - * NOTE: The Red Hat diskdump module currently creates - * vmcore dumpfiles that are identical to those made - * by the Red Hat netdump module, and therefore the - * dumpfile is recognized as such. But just in case - * there's ever a divergence, this file is being kept - * in place, along with the DISKDUMP-related #define's - * and their usage throughout the crash sources. + * The diskdump module optionally creates either ELF vmcore + * dumpfiles, or compressed dumpfiles derived from the LKCD format. + * In the case of ELF vmcore files, since they are identical to + * netdump dumpfiles, the facilities in netdump.c are used. For + * compressed dumpfiles, the facilities in this file are used. * * Copyright (C) 2004, 2005 David Anderson * Copyright (C) 2004, 2005 Red Hat, Inc. All rights reserved. + * Copyright (C) 2005 FUJITSU LIMITED + * Copyright (C) 2005 NEC Corporation * * This software may be freely redistributed under the terms of the * GNU General Public License. @@ -18,23 +18,230 @@ * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Author: David Anderson */ #include "defs.h" #include "diskdump.h" +#define BITMAP_SECT_LEN 4096 + struct diskdump_data { ulong flags; /* DISKDUMP_LOCAL, plus anything else... */ int dfd; /* dumpfile file descriptor */ FILE *ofp; /* fprintf(dd->ofp, "xxx"); */ int machine_type; /* machine type identifier */ + + /* header */ + struct disk_dump_header *header; + struct disk_dump_sub_header *sub_header; + + size_t data_offset; + int block_size; + int block_shift; + char *bitmap; + int bitmap_len; + char *dumpable_bitmap; + int byte, bit; + char *compressed_page; /* copy of compressed page data */ + char *curbufptr; /* ptr to uncompressed page buffer */ + + /* page cache */ + struct page_cache_hdr { /* header for each cached page */ + uint32_t pg_flags; + uint64_t pg_addr; + char *pg_bufptr; + ulong pg_hit_count; + } page_cache_hdr[DISKDUMP_CACHED_PAGES]; + char *page_cache_buf; /* base of cached buffer pages */ + int evict_index; /* next page to evict */ + ulong evictions; /* total evictions done */ + ulong cached_reads; + ulong *valid_pages; }; static struct diskdump_data diskdump_data = { 0 }; static struct diskdump_data *dd = &diskdump_data; +static inline int get_bit(char *map, int byte, int bit) +{ + return map[byte] & (1<bitmap, nr >> 3, nr & 7); +} + +static inline int page_is_dumpable(unsigned int nr) +{ + return dd->dumpable_bitmap[nr>>3] & (1 << (nr & 7)); +} + +static inline int dump_is_partial(const struct disk_dump_header *header) +{ + return header->bitmap_blocks >= + divideup(divideup(header->max_mapnr, 8), dd->block_size) * 2; +} + +static int open_dump_file(char *file) +{ + int fd; + + fd = open(file, O_RDONLY); + if (fd < 0) { + error(INFO, "diskdump: unable to open dump file %s", file); + return FALSE; + } + dd->dfd = fd; + return TRUE; +} + +static int read_dump_header(void) +{ + struct disk_dump_header *header = NULL; + struct disk_dump_sub_header *sub_header = NULL; + int bitmap_len; + const int block_size = (int)sysconf(_SC_PAGESIZE); + off_t offset; + const off_t failed = (off_t)-1; + ulong pfn; + int i, j, max_sect_len; + + if (block_size < 0) + return FALSE; + + header = malloc(block_size); + + if (lseek(dd->dfd, 0, SEEK_SET) == failed) { + if (CRASHDEBUG(1)) + error(INFO, "diskdump: cannot lseek dump header\n"); + goto err; + } + + if (read(dd->dfd, header, block_size) < block_size) { + if (CRASHDEBUG(1)) + error(INFO, "diskdump: cannot read dump header\n"); + goto err; + } + + /* validate dump header */ + if (memcmp(header->signature, DISK_DUMP_SIGNATURE, + sizeof(header->signature))) { + if (CRASHDEBUG(1)) + error(INFO, "diskdump: dump does not have panic dump header\n"); + goto err; + } + + if (header->block_size != block_size) { + error(INFO, "diskdump: block size in the dump header does not match" + " with system page size\n"); + goto err; + } + dd->block_size = block_size; + dd->block_shift = ffs(block_size) - 1; + + if (sizeof(*header) + sizeof(void *) * header->nr_cpus > block_size || + header->nr_cpus <= 0) { + error(INFO, "diskdump: invalid nr_cpus value: %d\n", header->nr_cpus); + goto err; + } + + /* read sub header */ + offset = (off_t)block_size; + if (lseek(dd->dfd, offset, SEEK_SET) == failed) { + error(INFO, "diskdump: cannot lseek dump sub header\n"); + goto err; + } + sub_header = malloc(block_size); + if (read(dd->dfd, sub_header, block_size) + < block_size) { + error(INFO, "diskdump: cannot read dump sub header\n"); + goto err; + } + + dd->sub_header = sub_header; + + /* read memory bitmap */ + bitmap_len = block_size * header->bitmap_blocks; + dd->bitmap_len = bitmap_len; + + offset = (off_t)block_size * (1 + header->sub_hdr_size); + if (lseek(dd->dfd, offset, SEEK_SET) == failed) { + error(INFO, "diskdump: cannot lseek memory bitmap\n"); + goto err; + } + + dd->bitmap = malloc(bitmap_len); + dd->dumpable_bitmap = calloc(bitmap_len, 1); + if (read(dd->dfd, dd->bitmap, bitmap_len) < bitmap_len) { + error(INFO, "diskdump: cannot read memory bitmap\n"); + goto err; + } + + if (dump_is_partial(header)) + memcpy(dd->dumpable_bitmap, dd->bitmap + bitmap_len/2, + bitmap_len/2); + else + memcpy(dd->dumpable_bitmap, dd->bitmap, bitmap_len); + + dd->data_offset + = (1 + header->sub_hdr_size + header->bitmap_blocks) + * header->block_size; + + dd->header = header; + + if (machine_type("X86")) + dd->machine_type = EM_386; + else if (machine_type("X86_64")) + dd->machine_type = EM_X86_64; + else if (machine_type("IA64")) + dd->machine_type = EM_IA_64; + else if (machine_type("PPC64")) + dd->machine_type = EM_PPC64; + else { + error(INFO, "diskdump: unsupported machine type: %s\n", MACHINE_TYPE); + goto err; + } + + max_sect_len = divideup(header->max_mapnr, BITMAP_SECT_LEN); + + dd->valid_pages = calloc(sizeof(ulong), max_sect_len + 1); + pfn = 0; + for (i = 1; i < max_sect_len + 1; i++) { + dd->valid_pages[i] = dd->valid_pages[i - 1]; + for (j = 0; j < BITMAP_SECT_LEN; j++, pfn++) + if (page_is_dumpable(pfn)) + dd->valid_pages[i]++; + } + + return TRUE; + +err: + free(header); + if (sub_header) + free(sub_header); + if (dd->bitmap) + free(dd->bitmap); + if (dd->dumpable_bitmap) + free(dd->dumpable_bitmap); + return FALSE; +} + +static int +pfn_to_pos(ulong pfn) +{ + int desc_pos, j, valid; + + valid = dd->valid_pages[pfn / BITMAP_SECT_LEN]; + + for (j = round(pfn, BITMAP_SECT_LEN), desc_pos = valid; j <= pfn; j++) + if (page_is_dumpable(j)) + desc_pos++; + + return desc_pos; +} + + /* * Determine whether a file is a diskdump creation, and if TRUE, * initialize the diskdump_data structure based upon the contents @@ -43,6 +250,31 @@ int is_diskdump(char *file) { + int sz, i; + + if (!open_dump_file(file) || !read_dump_header()) + return FALSE; + + sz = dd->block_size * (DISKDUMP_CACHED_PAGES); + if ((dd->page_cache_buf = malloc(sz)) == NULL) + return FALSE; + + for (i = 0; i < DISKDUMP_CACHED_PAGES; i++) + dd->page_cache_hdr[i].pg_bufptr = + &dd->page_cache_buf[i * dd->block_size]; + + if ((dd->compressed_page = (char *)malloc(dd->block_size)) == NULL) + goto err; + + dd->flags |= DISKDUMP_LOCAL; + + return TRUE; + +err: + if (dd->page_cache_buf) + free(dd->page_cache_buf); + if (dd->compressed_page) + free(dd->compressed_page); return FALSE; } @@ -53,11 +285,123 @@ int diskdump_init(char *unused, FILE *fptr) { - if (!DISKDUMP_VALID()) - return FALSE; + if (!DISKDUMP_VALID()) + return FALSE; - dd->ofp = fptr; - return TRUE; + dd->ofp = fptr; + return TRUE; +} + +/* + * Check whether paddr is already cached. + */ +static int +page_is_cached(physaddr_t paddr) +{ + int i; + struct page_cache_hdr *pgc; + + for (i = 0; i < DISKDUMP_CACHED_PAGES; i++) { + + pgc = &dd->page_cache_hdr[i]; + + if (!DISKDUMP_VALID_PAGE(pgc->pg_flags)) + continue; + + if (pgc->pg_addr == paddr) { + pgc->pg_hit_count++; + dd->curbufptr = pgc->pg_bufptr; + dd->cached_reads++; + return TRUE; + } + } + return FALSE; +} + +/* + * Cache the page's data. + * + * If an empty page cache location is available, take it. Otherwise, evict + * the entry indexed by evict_index, and then bump evict index. The hit_count + * is only gathered for dump_diskdump_environment(). + * + * If the page is compressed, uncompress it into the selected page cache entry. + * If the page is raw, just copy it into the selected page cache entry. + * If all works OK, update diskdump->curbufptr to point to the page's + * uncompressed data. + */ +static int +cache_page(physaddr_t paddr) +{ + int i, ret; + int found; + ulong pfn; + int desc_pos; + off_t seek_offset; + page_desc_t pd; + const int block_size = dd->block_size; + const off_t failed = (off_t)-1; + ulong retlen; + + for (i = found = 0; i < DISKDUMP_CACHED_PAGES; i++) { + if (DISKDUMP_VALID_PAGE(dd->page_cache_hdr[i].pg_flags)) + continue; + found = TRUE; + break; + } + + if (!found) { + i = dd->evict_index; + dd->page_cache_hdr[i].pg_hit_count = 0; + dd->evict_index = + (dd->evict_index+1) % DISKDUMP_CACHED_PAGES; + dd->evictions++; + } + + dd->page_cache_hdr[i].pg_flags = 0; + dd->page_cache_hdr[i].pg_addr = paddr; + dd->page_cache_hdr[i].pg_hit_count++; + + /* find page descriptor */ + pfn = paddr >> dd->block_shift; + desc_pos = pfn_to_pos(pfn); + seek_offset = dd->data_offset + + (off_t)(desc_pos - 1)*sizeof(page_desc_t); + lseek(dd->dfd, seek_offset, SEEK_SET); + + /* read page descriptor */ + if (read(dd->dfd, &pd, sizeof(pd)) != sizeof(pd)) + return READ_ERROR; + + /* sanity check */ + if (pd.size > block_size) + return READ_ERROR; + + if (lseek(dd->dfd, pd.offset, SEEK_SET) == failed) + return SEEK_ERROR; + + /* read page data */ + if (read(dd->dfd, dd->compressed_page, pd.size) != pd.size) + return READ_ERROR; + + if (pd.flags & DUMP_DH_COMPRESSED) { + retlen = block_size; + ret = uncompress((unsigned char *)dd->page_cache_hdr[i].pg_bufptr, + &retlen, + (unsigned char *)dd->compressed_page, + pd.size); + if ((ret != Z_OK) || (retlen != block_size)) { + error(INFO, "diskdump: uncompress failed: %d\n", ret); + return READ_ERROR; + } + } else + memcpy(dd->page_cache_hdr[i].pg_bufptr, + dd->compressed_page, block_size); + + dd->page_cache_hdr[i].pg_flags |= PAGE_VALID; + dd->curbufptr = dd->page_cache_hdr[i].pg_bufptr; + + return TRUE; } /* @@ -66,7 +410,28 @@ int read_diskdump(int fd, void *bufptr, int cnt, ulong addr, physaddr_t paddr) { - return 0; + int ret; + physaddr_t curpaddr; + ulong pfn, page_offset; + + pfn = paddr >> dd->block_shift; + curpaddr = paddr & ~((physaddr_t)(dd->block_size-1)); + page_offset = paddr & ((physaddr_t)(dd->block_size-1)); + + if ((pfn >= dd->header->max_mapnr) || !page_is_ram(pfn)) + return SEEK_ERROR; + if (!page_is_dumpable(pfn)) { + memset(bufptr, 0, cnt); + return cnt; + } + + if (!page_is_cached(curpaddr)) + if ((ret = cache_page(curpaddr)) < 0) + return ret; + + memcpy(bufptr, dd->curbufptr + page_offset, cnt); + + return cnt; } /* @@ -81,7 +446,22 @@ ulong get_diskdump_panic_task(void) { - return NO_TASK; + if (!DISKDUMP_VALID() || !get_active_set()) + return NO_TASK; + + return (ulong)dd->header->tasks[dd->header->current_cpu]; +} + +extern void get_netdump_regs_x86(struct bt_info *, ulong *, ulong *); +extern void get_netdump_regs_x86_64(struct bt_info *, ulong *, ulong *); + +static void +get_diskdump_regs_ppc64(struct bt_info *bt, ulong *eip, ulong *esp) +{ + if (bt->task == tt->panic_task) + bt->machdep = &dd->sub_header->elf_regs; + + machdep->get_stack_frame(bt, eip, esp); } /* @@ -91,12 +471,32 @@ void get_diskdump_regs(struct bt_info *bt, ulong *eip, ulong *esp) { - switch (dd->machine_type) - { - default: - error(FATAL, - "diskdump support for this machine type is not available\n"); - } + switch (dd->machine_type) + { + case EM_386: + return get_netdump_regs_x86(bt, eip, esp); + break; + + case EM_IA_64: + /* For normal backtraces, this information will be obtained + * frome the switch_stack structure, which is pointed to by + * the thread.ksp field of the task_struct. But it's still + * needed by the "bt -t" option. + */ + machdep->get_stack_frame(bt, eip, esp); + break; + + case EM_PPC64: + return get_diskdump_regs_ppc64(bt, eip, esp); + break; + + case EM_X86_64: + return get_netdump_regs_x86_64(bt, eip, esp); + break; + + default: + error(FATAL, "diskdump: unsupported machine type: %s\n", MACHINE_TYPE); + } } /* @@ -105,7 +505,10 @@ uint diskdump_page_size(void) { - return 0; + if (!DISKDUMP_VALID()) + return 0; + + return dd->header->block_size; } /* --- crash/xendump.c.orig 2006-05-15 17:21:18.000000000 -0400 +++ crash/xendump.c 2006-04-26 16:17:27.000000000 -0400 @@ -0,0 +1,1167 @@ +/* + * xendump.c + * + * Copyright (C) 2006 David Anderson + * Copyright (C) 2006 Red Hat, Inc. All rights reserved. + * + * This software may be freely redistributed under the terms of the + * GNU General Public License. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include "defs.h" +#include "xendump.h" + +static struct xendump_data xendump_data = { 0 }; +static struct xendump_data *xd = &xendump_data; + +static int xc_save_verify(char *); +static int xc_core_verify(char *); +static int xc_save_read(void *, int, ulong, physaddr_t); +static int xc_core_read(void *, int, ulong, physaddr_t); + +static void poc_store(ulong, off_t); +static off_t poc_get(ulong, int *); + +static void xen_dump_vmconfig(FILE *); + +static void xc_core_ptm_create(void); +static ulong xc_core_pfn_to_page_index(ulong); + +/* + * Determine whether a file is a xendump creation, and if TRUE, + * initialize the xendump_data structure. + */ +int +is_xendump(char *file) +{ + int verified; + char buf[BUFSIZE]; + + if ((xd->xfd = open(file, O_RDWR)) < 0) { + if ((xd->xfd = open(file, O_RDONLY)) < 0) { + sprintf(buf, "%s: open", file); + perror(buf); + return FALSE; + } + } + + if (read(xd->xfd, buf, BUFSIZE) != BUFSIZE) + return FALSE; + + if (machine_type("X86") || machine_type("X86_64")) + xd->page_size = 4096; + else + xd->page_size = machdep->pagesize; + + verified = xc_save_verify(buf) || xc_core_verify(buf); + + if (!verified) + close(xd->xfd); + + return (verified); +} + +/* + * Verify whether the dump was created by the xc_domain_dumpcore() + * library function in libxc/xc_core.c. + */ +static int +xc_core_verify(char *buf) +{ + struct xc_core_header *xcp; + + xcp = (struct xc_core_header *)buf; + + if (xcp->xch_magic != XC_CORE_MAGIC) + return FALSE; + + if (!xcp->xch_nr_vcpus) { + error(INFO, + "faulty xc_core dump file header: xch_nr_vcpus is 0\n\n"); + + fprintf(stderr, " xch_magic: %x (XC_CORE_MAGIC)\n", xcp->xch_magic); + fprintf(stderr, " xch_nr_vcpus: %d\n", xcp->xch_nr_vcpus); + fprintf(stderr, " xch_nr_pages: %d\n", xcp->xch_nr_pages); + fprintf(stderr, " xch_ctxt_offset: %d\n", xcp->xch_ctxt_offset); + fprintf(stderr, " xch_index_offset: %d\n", xcp->xch_index_offset); + fprintf(stderr, " xch_pages_offset: %d\n\n", xcp->xch_pages_offset); + + clean_exit(1); + } + + BCOPY(xcp, &xd->xc_core.header, + sizeof(struct xc_core_header)); + + xd->flags |= (XENDUMP_LOCAL | XC_CORE); + + if (!xd->page_size) + error(FATAL, + "unknown page size: use -p command line option\n"); + + if (!(xd->page = (char *)malloc(xd->page_size))) + error(FATAL, "cannot malloc page space."); + + if (!(xd->poc = (struct pfn_offset_cache *)calloc + (PFN_TO_OFFSET_CACHE_ENTRIES, + sizeof(struct pfn_offset_cache)))) + error(FATAL, "cannot malloc pfn_offset_cache\n"); + xd->last_pfn = ~(0UL); + + if (CRASHDEBUG(1)) + xendump_memory_dump(stderr); + + return TRUE; +} + +/* + * Do the work for read_xendump() for the XC_CORE dumpfile format. + */ +static int +xc_core_read(void *bufptr, int cnt, ulong addr, physaddr_t paddr) +{ + ulong pfn, page_index; + off_t offset; + int redundant; + + if (!(xd->flags & XC_CORE_PTM_INIT)) + xc_core_ptm_create(); + + pfn = (ulong)BTOP(paddr); + + if ((offset = poc_get(pfn, &redundant))) { + if (!redundant) { + if (lseek(xd->xfd, offset, SEEK_SET) == -1) + return SEEK_ERROR; + if (read(xd->xfd, xd->page, xd->page_size) != + xd->page_size) + return READ_ERROR; + } + + BCOPY(xd->page + PAGEOFFSET(paddr), bufptr, cnt); + return cnt; + } + + if ((page_index = xc_core_pfn_to_page_index(pfn)) == + PFN_NOT_FOUND) + return READ_ERROR; + + offset = (off_t)xd->xc_core.header.xch_pages_offset + + ((off_t)(page_index) * (off_t)xd->page_size); + + if (lseek(xd->xfd, offset, SEEK_SET) == -1) + return SEEK_ERROR; + + if (read(xd->xfd, xd->page, xd->page_size) != xd->page_size) + return READ_ERROR; + + poc_store(pfn, offset); + + BCOPY(xd->page + PAGEOFFSET(paddr), bufptr, cnt); + + return cnt; +} + +/* + * Verify whether the dumpfile was created by the "xm save" facility. + * This gets started by the "save" function in XendCheckpoint.py, and + * then by xc_save.c, with the work done in the xc_linux_save() library + * function in libxc/xc_linux_save.c. + */ + +#define MAX_BATCH_SIZE 1024 +/* + * Number of P2M entries in a page. + */ +#define ULPP (xd->page_size/sizeof(unsigned long)) +/* + * Number of P2M entries in the pfn_to_mfn_frame_list. + */ +#define P2M_FL_ENTRIES (((xd->xc_save.nr_pfns)+ULPP-1)/ULPP) +/* + * Size in bytes of the pfn_to_mfn_frame_list. + */ +#define P2M_FL_SIZE ((P2M_FL_ENTRIES)*sizeof(unsigned long)) + +#define XTAB (0xf<<28) /* invalid page */ +#define LTAB_MASK XTAB + +static int +xc_save_verify(char *buf) +{ + int i, batch_count, done_batch, *intptr; + ulong flags, *ulongptr; + ulong batch_index, total_pages_read; + + if (!STRNEQ(buf, XC_SAVE_SIGNATURE)) + return FALSE; + + if (lseek(xd->xfd, strlen(XC_SAVE_SIGNATURE), SEEK_SET) == -1) + return FALSE; + + flags = XC_SAVE; + + if (CRASHDEBUG(1)) { + fprintf(stderr, "\"%s\"\n", buf); + fprintf(stderr, "endian: %d %s\n", __BYTE_ORDER, + __BYTE_ORDER == __BIG_ENDIAN ? "__BIG_ENDIAN" : + (__BYTE_ORDER == __LITTLE_ENDIAN ? + "__LITTLE_ENDIAN" : "???")); + } + + /* + * size of vmconfig data structure (big-endian) + */ + if (read(xd->xfd, buf, sizeof(int)) != sizeof(int)) + return FALSE; + + intptr = (int *)buf; + + if (CRASHDEBUG(1) && BYTE_SWAP_REQUIRED(__BIG_ENDIAN)) { + fprintf(stderr, "byte-swap required for this:\n"); + for (i = 0; i < sizeof(int); i++) + fprintf(stderr, "[%x]", buf[i] & 0xff); + fprintf(stderr, ": %x -> ", *intptr); + } + + xd->xc_save.vmconfig_size = swab32(*intptr); + + if (CRASHDEBUG(1)) + fprintf(stderr, "%x\n", xd->xc_save.vmconfig_size); + + if (!(xd->xc_save.vmconfig_buf = (char *)malloc + (xd->xc_save.vmconfig_size))) + error(FATAL, "cannot malloc xc_save vmconfig space."); + + if (!xd->page_size) + error(FATAL, + "unknown page size: use -p command line option\n"); + + if (!(xd->page = (char *)malloc(xd->page_size))) + error(FATAL, "cannot malloc page space."); + + if (!(xd->poc = (struct pfn_offset_cache *)calloc + (PFN_TO_OFFSET_CACHE_ENTRIES, + sizeof(struct pfn_offset_cache)))) + error(FATAL, "cannot malloc pfn_offset_cache\n"); + xd->last_pfn = ~(0UL); + + if (!(xd->xc_save.region_pfn_type = (ulong *)calloc + (MAX_BATCH_SIZE, sizeof(ulong)))) + error(FATAL, "cannot malloc region_pfn_type\n"); + + if (read(xd->xfd, xd->xc_save.vmconfig_buf, + xd->xc_save.vmconfig_size) != xd->xc_save.vmconfig_size) + goto xc_save_bailout; + + /* + * nr_pfns (native byte order) + */ + if (read(xd->xfd, buf, sizeof(ulong)) != sizeof(ulong)) + goto xc_save_bailout; + + ulongptr = (ulong *)buf; + + if (CRASHDEBUG(1)) { + for (i = 0; i < sizeof(ulong); i++) + fprintf(stderr, "[%x]", buf[i] & 0xff); + fprintf(stderr, ": %lx (native)\n", *ulongptr); + } + + xd->xc_save.nr_pfns = *ulongptr; + + /* + * Get a local copy of the live_P2M_frame_list + */ + if (!(xd->xc_save.p2m_frame_list = (unsigned long *)malloc(P2M_FL_SIZE))) + error(FATAL, "Cannot allocate p2m_frame_list array"); + + if (!(xd->xc_save.batch_offsets = (off_t *)calloc((size_t)P2M_FL_ENTRIES, + sizeof(off_t)))) + error(FATAL, "Cannot allocate batch_offsets array"); + + xd->xc_save.batch_count = P2M_FL_ENTRIES; + + if (read(xd->xfd, xd->xc_save.p2m_frame_list, P2M_FL_SIZE) != + P2M_FL_SIZE) + goto xc_save_bailout; + + if (CRASHDEBUG(1)) + fprintf(stderr, "pre-batch file pointer: %lld\n", + (ulonglong)lseek(xd->xfd, 0L, SEEK_CUR)); + + /* + * ... + * int batch_count + * ulong region pfn_type[batch_count] + * page 0 + * page 1 + * ... + * page batch_count-1 + * (repeat) + */ + + total_pages_read = 0; + batch_index = 0; + done_batch = FALSE; + + while (!done_batch) { + + xd->xc_save.batch_offsets[batch_index] = (off_t) + lseek(xd->xfd, 0L, SEEK_CUR); + + if (read(xd->xfd, &batch_count, sizeof(int)) != sizeof(int)) + goto xc_save_bailout; + + if (CRASHDEBUG(1)) + fprintf(stderr, "batch[%ld]: %d ", + batch_index, batch_count); + + batch_index++; + + if (batch_index >= P2M_FL_ENTRIES) { + fprintf(stderr, "more than %ld batches encountered?\n", + P2M_FL_ENTRIES); + goto xc_save_bailout; + } + + switch (batch_count) + { + case 0: + if (CRASHDEBUG(1)) { + fprintf(stderr, + ": Batch work is done: %ld pages read (P2M_FL_ENTRIES: %ld)\n", + total_pages_read, P2M_FL_ENTRIES); + } + done_batch = TRUE; + continue; + + case -1: + if (CRASHDEBUG(1)) + fprintf(stderr, ": Entering page verify mode\n"); + continue; + + default: + if (batch_count > MAX_BATCH_SIZE) { + if (CRASHDEBUG(1)) + fprintf(stderr, + ": Max batch size exceeded. Giving up.\n"); + done_batch = TRUE; + continue; + } + if (CRASHDEBUG(1)) + fprintf(stderr, "\n"); + break; + } + + if (read(xd->xfd, xd->xc_save.region_pfn_type, batch_count * sizeof(ulong)) != + batch_count * sizeof(ulong)) + goto xc_save_bailout; + + for (i = 0; i < batch_count; i++) { + unsigned long pagetype; + unsigned long pfn; + + pfn = xd->xc_save.region_pfn_type[i] & ~LTAB_MASK; + pagetype = xd->xc_save.region_pfn_type[i] & LTAB_MASK; + + if (pagetype == XTAB) + /* a bogus/unmapped page: skip it */ + continue; + + if (pfn > xd->xc_save.nr_pfns) { + if (CRASHDEBUG(1)) + fprintf(stderr, + "batch_count: %d pfn %ld out of range", + batch_count, pfn); + } + + if (lseek(xd->xfd, xd->page_size, SEEK_CUR) == -1) + goto xc_save_bailout; + + total_pages_read++; + } + } + + /* + * Get the list of PFNs that are not in the psuedo-phys map + */ + if (read(xd->xfd, &xd->xc_save.pfns_not, + sizeof(xd->xc_save.pfns_not)) != sizeof(xd->xc_save.pfns_not)) + goto xc_save_bailout; + + if (CRASHDEBUG(1)) + fprintf(stderr, "PFNs not in pseudo-phys map: %d\n", + xd->xc_save.pfns_not); + + if ((total_pages_read + xd->xc_save.pfns_not) != + xd->xc_save.nr_pfns) + error(WARNING, + "nr_pfns: %ld != (total pages: %ld + pages not saved: %d)\n", + xd->xc_save.nr_pfns, total_pages_read, + xd->xc_save.pfns_not); + + xd->xc_save.pfns_not_offset = lseek(xd->xfd, 0L, SEEK_CUR); + + if (lseek(xd->xfd, sizeof(ulong) * xd->xc_save.pfns_not, SEEK_CUR) == -1) + goto xc_save_bailout; + + xd->xc_save.vcpu_ctxt_offset = lseek(xd->xfd, 0L, SEEK_CUR); + + lseek(xd->xfd, 0, SEEK_END); + lseek(xd->xfd, -((off_t)(xd->page_size)), SEEK_CUR); + + xd->xc_save.shared_info_page_offset = lseek(xd->xfd, 0L, SEEK_CUR); + + xd->flags |= (XENDUMP_LOCAL | flags); + kt->xen_flags |= (CANONICAL_PAGE_TABLES|XEN_SUSPEND); + + if (CRASHDEBUG(1)) + xendump_memory_dump(stderr); + + return TRUE; + +xc_save_bailout: + + error(INFO, + "xc_save_verify: \"LinuxGuestRecord\" file handling/format error\n"); + + if (xd->xc_save.p2m_frame_list) { + free(xd->xc_save.p2m_frame_list); + xd->xc_save.p2m_frame_list = NULL; + } + if (xd->xc_save.batch_offsets) { + free(xd->xc_save.batch_offsets); + xd->xc_save.batch_offsets = NULL; + } + if (xd->xc_save.vmconfig_buf) { + free(xd->xc_save.vmconfig_buf); + xd->xc_save.vmconfig_buf = NULL; + } + if (xd->page) { + free(xd->page); + xd->page = NULL; + } + + return FALSE; +} + +/* + * Do the work for read_xendump() for the XC_SAVE dumpfile format. + */ +static int +xc_save_read(void *bufptr, int cnt, ulong addr, physaddr_t paddr) +{ + int b, i, redundant; + ulong reqpfn; + int batch_count; + off_t file_offset; + + reqpfn = (ulong)BTOP(paddr); + + if (CRASHDEBUG(8)) + fprintf(xd->ofp, + "xc_save_read(bufptr: %lx cnt: %d addr: %lx paddr: %llx (%ld, 0x%lx)\n", + (ulong)bufptr, cnt, addr, (ulonglong)paddr, reqpfn, reqpfn); + + if ((file_offset = poc_get(reqpfn, &redundant))) { + if (!redundant) { + if (lseek(xd->xfd, file_offset, SEEK_SET) == -1) + return SEEK_ERROR; + if (read(xd->xfd, xd->page, xd->page_size) != xd->page_size) + return READ_ERROR; + } else + console("READ %ld (0x%lx) skipped!\n", reqpfn, reqpfn); + + BCOPY(xd->page + PAGEOFFSET(paddr), bufptr, cnt); + return cnt; + } + + /* + * ... + * int batch_count + * ulong region pfn_type[batch_count] + * page 0 + * page 1 + * ... + * page batch_count-1 + * (repeat) + */ + for (b = 0; b < xd->xc_save.batch_count; b++) { + + if (lseek(xd->xfd, xd->xc_save.batch_offsets[b], SEEK_SET) == -1) + return SEEK_ERROR; + + if (CRASHDEBUG(8)) + fprintf(xd->ofp, "check batch[%d]: offset: %llx\n", + b, (ulonglong)xd->xc_save.batch_offsets[b]); + + if (read(xd->xfd, &batch_count, sizeof(int)) != sizeof(int)) + return READ_ERROR; + + switch (batch_count) + { + case 0: + if (CRASHDEBUG(1)) { + fprintf(xd->ofp, + "batch[%d]: has count of zero -- bailing out on pfn %ld\n", + b, reqpfn); + } + return READ_ERROR; + + case -1: + return READ_ERROR; + + default: + if (CRASHDEBUG(8)) + fprintf(xd->ofp, + "batch[%d]: offset: %llx batch count: %d\n", + b, (ulonglong)xd->xc_save.batch_offsets[b], + batch_count); + break; + } + + if (read(xd->xfd, xd->xc_save.region_pfn_type, batch_count * sizeof(ulong)) != + batch_count * sizeof(ulong)) + return READ_ERROR; + + for (i = 0; i < batch_count; i++) { + unsigned long pagetype; + unsigned long pfn; + + pfn = xd->xc_save.region_pfn_type[i] & ~LTAB_MASK; + pagetype = xd->xc_save.region_pfn_type[i] & LTAB_MASK; + + if (pagetype == XTAB) + /* a bogus/unmapped page: skip it */ + continue; + + if (pfn > xd->xc_save.nr_pfns) { + if (CRASHDEBUG(1)) + fprintf(stderr, + "batch_count: %d pfn %ld out of range", + batch_count, pfn); + } + + if (pfn == reqpfn) { + file_offset = lseek(xd->xfd, 0, SEEK_CUR); + poc_store(pfn, file_offset); + + if (read(xd->xfd, xd->page, xd->page_size) != + xd->page_size) + return READ_ERROR; + + BCOPY(xd->page + PAGEOFFSET(paddr), bufptr, cnt); + return cnt; + } + + if (lseek(xd->xfd, xd->page_size, SEEK_CUR) == -1) + return SEEK_ERROR; + } + } + + return READ_ERROR; +} + +/* + * Stash a pfn's offset. If they're all in use, put it in the + * least-used slot that's closest to the beginning of the array. + */ +static void +poc_store(ulong pfn, off_t file_offset) +{ + int i; + struct pfn_offset_cache *poc, *plow; + ulong curlow; + + curlow = ~(0UL); + plow = NULL; + poc = xd->poc; + + for (i = 0; i < PFN_TO_OFFSET_CACHE_ENTRIES; i++, poc++) { + if (poc->cnt == 0) { + poc->cnt = 1; + poc->pfn = pfn; + poc->file_offset = file_offset; + xd->last_pfn = pfn; + return; + } + + if (poc->cnt < curlow) { + curlow = poc->cnt; + plow = poc; + } + } + + plow->cnt = 1; + plow->pfn = pfn; + plow->file_offset = file_offset; + xd->last_pfn = pfn; +} + +/* + * Check whether a pfn's offset has been cached. + */ +static off_t +poc_get(ulong pfn, int *redundant) +{ + int i; + struct pfn_offset_cache *poc; + + xd->accesses++; + + if (pfn == xd->last_pfn) { + xd->redundant++; + *redundant = TRUE; + return 1; + } else + *redundant = FALSE; + + poc = xd->poc; + + for (i = 0; i < PFN_TO_OFFSET_CACHE_ENTRIES; i++, poc++) { + if (poc->cnt && (poc->pfn == pfn)) { + poc->cnt++; + xd->cache_hits++; + xd->last_pfn = pfn; + return poc->file_offset; + } + } + + return 0; +} + + +/* + * Perform any post-dumpfile determination stuff here. + */ +int +xendump_init(char *unused, FILE *fptr) +{ + if (!XENDUMP_VALID()) + return FALSE; + + xd->ofp = fptr; + return TRUE; +} + +int +read_xendump(int fd, void *bufptr, int cnt, ulong addr, physaddr_t paddr) +{ + switch (xd->flags & (XC_SAVE|XC_CORE)) + { + case XC_SAVE: + return xc_save_read(bufptr, cnt, addr, paddr); + + case XC_CORE: + return xc_core_read(bufptr, cnt, addr, paddr); + + default: + return READ_ERROR; + } +} + +int +write_xendump(int fd, void *bufptr, int cnt, ulong addr, physaddr_t paddr) +{ + return WRITE_ERROR; +} + +uint +xendump_page_size(void) +{ + if (!XENDUMP_VALID()) + return 0; + + return xd->page_size; +} + +/* + * xendump_free_memory(), and xendump_memory_used() + * are debug only, and typically unnecessary to implement. + */ +int +xendump_free_memory(void) +{ + return 0; +} + +int +xendump_memory_used(void) +{ + return 0; +} + +/* + * This function is dump-type independent, used here to + * to dump the xendump_data structure contents. + */ +int +xendump_memory_dump(FILE *fp) +{ + int i, linefeed, used, others; + ulong *ulongptr; + + fprintf(fp, " flags: %lx (", xd->flags); + others = 0; + if (xd->flags & XENDUMP_LOCAL) + fprintf(fp, "%sXENDUMP_LOCAL", others++ ? "|" : ""); + if (xd->flags & XC_SAVE) + fprintf(fp, "%sXC_SAVE", others++ ? "|" : ""); + if (xd->flags & XC_CORE) + fprintf(fp, "%sXC_CORE", others++ ? "|" : ""); + if (xd->flags & XC_CORE_PTM_INIT) + fprintf(fp, "%sXC_CORE_PTM_INIT", others++ ? "|" : ""); + fprintf(fp, ")\n"); + fprintf(fp, " xfd: %d\n", xd->xfd); + fprintf(fp, " page_size: %d\n", xd->page_size); + fprintf(fp, " ofp: %lx\n", (ulong)xd->ofp); + fprintf(fp, " page: %lx\n", (ulong)xd->page); + fprintf(fp, " panic_pc: %lx\n", xd->panic_pc); + fprintf(fp, " panic_sp: %lx\n", xd->panic_sp); + fprintf(fp, " accesses: %ld\n", (ulong)xd->accesses); + fprintf(fp, " cache_hits: %ld ", (ulong)xd->cache_hits); + if (xd->accesses) + fprintf(fp, "(%ld%%)\n", xd->cache_hits * 100 / xd->accesses); + else + fprintf(fp, "\n"); + fprintf(fp, " last_pfn: %ld\n", xd->last_pfn); + fprintf(fp, " redundant: %ld ", (ulong)xd->redundant); + if (xd->accesses) + fprintf(fp, "(%ld%%)\n", xd->redundant * 100 / xd->accesses); + else + fprintf(fp, "\n"); + for (i = used = 0; i < PFN_TO_OFFSET_CACHE_ENTRIES; i++) + if (xd->poc[i].cnt) + used++; + fprintf(fp, " poc[%d]: %lx %s", PFN_TO_OFFSET_CACHE_ENTRIES, (ulong)xd->poc, + xd->poc ? "" : "(none)"); + for (i = 0; i < PFN_TO_OFFSET_CACHE_ENTRIES; i++) { + if (!xd->poc) + break; + if (!xd->poc[i].cnt) { + if (!i) + fprintf(fp, "(none used)\n"); + break; + } else if (!i) + fprintf(fp, "(%d used)\n", used); + fprintf(fp, " [%d]: pfn: %ld (0x%lx) count: %ld file_offset: %llx\n", + i, + xd->poc[i].pfn, + xd->poc[i].pfn, + xd->poc[i].cnt, + (ulonglong)xd->poc[i].file_offset); + } + if (!xd->poc) + fprintf(fp, "\n"); + + fprintf(fp, " xc_save:\n"); + fprintf(fp, " nr_pfns: %ld (0x%lx)\n", + xd->xc_save.nr_pfns, xd->xc_save.nr_pfns); + fprintf(fp, " vmconfig_size: %d (0x%x)\n", xd->xc_save.vmconfig_size, + xd->xc_save.vmconfig_size); + fprintf(fp, " vmconfig_buf: %lx\n", (ulong)xd->xc_save.vmconfig_buf); + if (xd->flags & XC_SAVE) + xen_dump_vmconfig(fp); + fprintf(fp, " p2m_frame_list: %lx ", (ulong)xd->xc_save.p2m_frame_list); + if (xd->flags & XC_SAVE) { + fprintf(fp, "\n"); + ulongptr = xd->xc_save.p2m_frame_list; + for (i = 0; i < P2M_FL_ENTRIES; i++, ulongptr++) + fprintf(fp, "%ld ", *ulongptr); + fprintf(fp, "\n"); + } else + fprintf(fp, "(none)\n"); + fprintf(fp, " pfns_not: %d\n", xd->xc_save.pfns_not); + fprintf(fp, " pfns_not_offset: %lld\n", + (ulonglong)xd->xc_save.pfns_not_offset); + fprintf(fp, " vcpu_ctxt_offset: %lld\n", + (ulonglong)xd->xc_save.vcpu_ctxt_offset); + fprintf(fp, " shared_info_page_offset: %lld\n", + (ulonglong)xd->xc_save.shared_info_page_offset); + fprintf(fp, " region_pfn_type: %lx\n", (ulong)xd->xc_save.region_pfn_type); + fprintf(fp, " batch_count: %ld\n", (ulong)xd->xc_save.batch_count); + fprintf(fp, " batch_offsets: %lx %s\n", + (ulong)xd->xc_save.batch_offsets, + xd->xc_save.batch_offsets ? "" : "(none)"); + for (i = linefeed = 0; i < xd->xc_save.batch_count; i++) { + fprintf(fp, "[%d]: %llx ", i, + (ulonglong)xd->xc_save.batch_offsets[i]); + if (((i+1)%4) == 0) { + fprintf(fp, "\n"); + linefeed = FALSE; + } else + linefeed = TRUE; + } + if (linefeed) + fprintf(fp, "\n"); + + fprintf(fp, " xc_core:\n"); + fprintf(fp, " header:\n"); + fprintf(fp, " xch_magic: %x (%s)\n", + xd->xc_core.header.xch_magic, + xd->xc_core.header.xch_magic == XC_CORE_MAGIC ? + "XC_CORE_MAGIC" : "unknown"); + fprintf(fp, " xch_nr_vcpus: %d\n", + xd->xc_core.header.xch_nr_vcpus); + fprintf(fp, " xch_nr_pages: %d (0x%x)\n", + xd->xc_core.header.xch_nr_pages, + xd->xc_core.header.xch_nr_pages); + fprintf(fp, " xch_ctxt_offset: %d (0x%x)\n", + xd->xc_core.header.xch_ctxt_offset, + xd->xc_core.header.xch_ctxt_offset); + fprintf(fp, " xch_index_offset: %d (0x%x)\n", + xd->xc_core.header.xch_index_offset, + xd->xc_core.header.xch_index_offset); + fprintf(fp, " xch_pages_offset: %d (0x%x)\n", + xd->xc_core.header.xch_pages_offset, + xd->xc_core.header.xch_pages_offset); + + fprintf(fp, " ptm_frames: %d\n", + xd->xc_core.ptm_frames); + fprintf(fp, " ptm_frame_index_list:\n"); + for (i = 0; i < xd->xc_core.ptm_frames; i++) { + fprintf(fp, "%ld ", + xd->xc_core.ptm_frame_index_list[i]); + } + fprintf(fp, xd->xc_core.ptm_frames ? "\n\n" : "\n"); + + return 0; +} + +static void +xen_dump_vmconfig(FILE *fp) +{ + int i, opens, closes; + char *p; + + opens = closes = 0; + p = xd->xc_save.vmconfig_buf; + for (i = 0; i < xd->xc_save.vmconfig_size; i++, p++) { + if (ascii(*p)) + fprintf(fp, "%c", *p); + else + fprintf(fp, "<%x>", *p); + + if (*p == '(') + opens++; + else if (*p == ')') + closes++; + } + fprintf(fp, "\n"); + + if (opens != closes) + error(WARNING, "invalid vmconfig contents?\n"); +} + +/* + * Looking at the active set, try to determine who panicked, + * or who was the "suspend" kernel thread. + */ +ulong get_xendump_panic_task(void) +{ + int i; + ulong task; + struct task_context *tc; + + switch (xd->flags & (XC_CORE|XC_SAVE)) + { + case XC_CORE: + if (machdep->xendump_panic_task) + return (machdep->xendump_panic_task((void *)xd)); + break; + + case XC_SAVE: + for (i = 0; i < NR_CPUS; i++) { + if (!(task = tt->active_set[i])) + continue; + tc = task_to_context(task); + if (is_kernel_thread(task) && + STREQ(tc->comm, "suspend")) + return tc->task; + } + break; + } + + return NO_TASK; +} + +/* + * Figure out the back trace hooks. + */ +void get_xendump_regs(struct bt_info *bt, ulong *pc, ulong *sp) +{ + int i; + ulong *up; + + if ((tt->panic_task == bt->task) && + (xd->panic_pc && xd->panic_sp)) { + *pc = xd->panic_pc; + *sp = xd->panic_sp; + return; + } + + switch (xd->flags & (XC_CORE|XC_SAVE)) + { + case XC_CORE: + if (machdep->get_xendump_regs) + return (machdep->get_xendump_regs(xd, bt, pc, sp)); + break; + + case XC_SAVE: + if (tt->panic_task != bt->task) + break; + + for (i = 0, up = (ulong *)bt->stackbuf; + i < LONGS_PER_STACK; i++, up++) { + if (is_kernel_text(*up) && + (STREQ(closest_symbol(*up), + "__do_suspend"))) { + *pc = *up; + *sp = tt->flags & THREAD_INFO ? + bt->tc->thread_info + + (i * sizeof(long)) : + bt->task + + (i * sizeof(long)); + xd->panic_pc = *pc; + xd->panic_sp = *sp; + return; + } + } + } + + machdep->get_stack_frame(bt, pc, sp); +} + +/* + * Farm out most of the work to the proper architecture. + */ +static void +xc_core_ptm_create(void) +{ + if (!machdep->xendump_ptm_create) + error(FATAL, + "xen xc_core dumpfiles not supported on this architecture"); + + if (!machdep->xendump_ptm_create((void *)xd)) + error(FATAL, + "cannot create xen pfn-to-mfn mapping\n"); + + xd->flags |= XC_CORE_PTM_INIT; + + if (CRASHDEBUG(1)) + xendump_memory_dump(xd->ofp); +} + +/* + * Find the page index containing the mfn, and read the + * machine page into the buffer. + */ +char * +xc_core_mfn_to_page(ulong mfn, char *pgbuf) +{ + int i, b, idx, done; + ulong tmp[MAX_BATCH_SIZE]; + off_t offset; + + if (lseek(xd->xfd, (off_t)xd->xc_core.header.xch_index_offset, + SEEK_SET) == -1) { + error(INFO, "cannot lseek to page index\n"); + return NULL; + } + + for (b = 0, idx = -1, done = FALSE; + !done && (b < xd->xc_core.header.xch_nr_pages); + b += MAX_BATCH_SIZE) { + + if (read(xd->xfd, tmp, sizeof(ulong) * MAX_BATCH_SIZE) != + (MAX_BATCH_SIZE * sizeof(ulong))) { + error(INFO, "cannot read index page %d\n", b); + return NULL; + } + + for (i = 0; i < MAX_BATCH_SIZE; i++) { + if ((b+i) >= xd->xc_core.header.xch_nr_pages) { + done = TRUE; + break; + } + if (tmp[i] == mfn) { + idx = i+b; + if (CRASHDEBUG(2)) + fprintf(xd->ofp, + "page: found mfn 0x%lx (%ld) at index %d\n", + mfn, mfn, idx); + done = TRUE; + } + } + } + + if (idx == -1) { + error(INFO, "cannot find mfn %ld (0x%lx) in page index\n", + mfn, mfn); + return NULL; + } + + if (lseek(xd->xfd, (off_t)xd->xc_core.header.xch_pages_offset, + SEEK_SET) == -1) { + error(INFO, "cannot lseek to xch_pages_offset\n"); + return NULL; + } + + offset = (off_t)(idx) * (off_t)xd->page_size; + + if (lseek(xd->xfd, offset, SEEK_CUR) == -1) { + error(INFO, "cannot lseek to mfn-specified page\n"); + return NULL; + } + + if (read(xd->xfd, pgbuf, xd->page_size) != xd->page_size) { + error(INFO, "cannot read mfn-specified page\n"); + return NULL; + } + + return pgbuf; +} + + +/* + * Find and return the page index containing the mfn. + */ +int +xc_core_mfn_to_page_index(ulong mfn) +{ + int i, b; + ulong tmp[MAX_BATCH_SIZE]; + + if (lseek(xd->xfd, (off_t)xd->xc_core.header.xch_index_offset, + SEEK_SET) == -1) { + error(INFO, "cannot lseek to page index\n"); + return MFN_NOT_FOUND; + } + + for (b = 0; b < xd->xc_core.header.xch_nr_pages; b += MAX_BATCH_SIZE) { + + if (read(xd->xfd, tmp, sizeof(ulong) * MAX_BATCH_SIZE) != + (MAX_BATCH_SIZE * sizeof(ulong))) { + error(INFO, "cannot read index page %d\n", b); + return MFN_NOT_FOUND; + } + + for (i = 0; i < MAX_BATCH_SIZE; i++) { + if ((b+i) >= xd->xc_core.header.xch_nr_pages) + break; + + if (tmp[i] == mfn) { + if (CRASHDEBUG(2)) + fprintf(xd->ofp, + "index: batch: %d found mfn %ld (0x%lx) at index %d\n", + b/MAX_BATCH_SIZE, mfn, mfn, i+b); + return (i+b); + } + } + } + + return MFN_NOT_FOUND; +} + +/* + * Given a normal kernel pfn, determine the page index in the dumpfile. + * + * - First determine which of the pages making up the + * phys_to_machine_mapping[] array would contain the pfn. + * - From the phys_to_machine_mapping page, determine the mfn. + * - Find the mfn in the dumpfile page index. + */ + +#define PFNS_PER_PAGE (xd->page_size/sizeof(unsigned long)) + +static ulong +xc_core_pfn_to_page_index(ulong pfn) +{ + ulong idx, ptm_idx, mfn_idx; + ulong *up, mfn; + off_t offset; + + idx = pfn/PFNS_PER_PAGE; + + if (idx >= xd->xc_core.ptm_frames) { + error(INFO, "pfn: %lx is too large for dumpfile\n", + pfn); + return PFN_NOT_FOUND; + } + + ptm_idx = xd->xc_core.ptm_frame_index_list[idx]; + + if (lseek(xd->xfd, (off_t)xd->xc_core.header.xch_pages_offset, + SEEK_SET) == -1) { + error(INFO, "cannot lseek to xch_pages_offset\n"); + return PFN_NOT_FOUND; + } + + offset = (off_t)(ptm_idx) * (off_t)xd->page_size; + + if (lseek(xd->xfd, offset, SEEK_CUR) == -1) { + error(INFO, "cannot lseek to pfn-specified page\n"); + return PFN_NOT_FOUND; + } + + if (read(xd->xfd, xd->page, xd->page_size) != xd->page_size) { + error(INFO, "cannot read pfn-specified page\n"); + return PFN_NOT_FOUND; + } + + up = (ulong *)xd->page; + up += (pfn%PFNS_PER_PAGE); + + mfn = *up; + + if ((mfn_idx = xc_core_mfn_to_page_index(mfn)) == MFN_NOT_FOUND) { + error(INFO, "cannot find mfn in page index\n"); + return PFN_NOT_FOUND; + } + + return mfn_idx; +} + +/* + * Store the panic task's stack hooks from where it was found + * in get_active_set_panic_task(). + */ +void +xendump_panic_hook(char *stack) +{ + int i, err, argc; + char *arglist[MAXARGS]; + char buf[BUFSIZE]; + ulong value, *sp; + + strcpy(buf, stack); + + argc = parse_line(buf, arglist); + + if ((value = htol(strip_ending_char(arglist[0], ':'), + RETURN_ON_ERROR, &err)) == BADADDR) + return; + for (sp = (ulong *)value, i = 1; i < argc; i++, sp++) { + if (strstr(arglist[i], "xen_panic_event")) { + if (!readmem((ulong)sp, KVADDR, &value, + sizeof(ulong), "xen_panic_event address", + RETURN_ON_ERROR)) + return; + + xd->panic_sp = (ulong)sp; + xd->panic_pc = value; + } else if (strstr(arglist[i], "panic") && !xd->panic_sp) { + if (!readmem((ulong)sp, KVADDR, &value, + sizeof(ulong), "xen_panic_event address", + RETURN_ON_ERROR)) + return; + + xd->panic_sp = (ulong)sp; + xd->panic_pc = value; + } + } +} --- crash/unwind.c.orig 2006-05-15 17:21:18.000000000 -0400 +++ crash/unwind.c 2005-11-23 09:37:52.000000000 -0500 @@ -1397,9 +1397,22 @@ req = &request; if (get_symbol_type("unw", "tables", req) == TYPE_CODE_UNDEF) { - error(WARNING, "cannot determine unw.tables offset\n"); - machdep->flags |= UNW_OUT_OF_SYNC; - } else { + /* + * KLUDGE ALERT: + * If unw.tables cannot be ascertained by gdb, try unw.save_order, + * given that it is the field just after unw.tables. + */ + if (get_symbol_type("unw", "save_order", req) == TYPE_CODE_UNDEF) { + error(WARNING, "cannot determine unw.tables offset\n"); + machdep->flags |= UNW_OUT_OF_SYNC; + } else + req->member_offset -= BITS_PER_BYTE * sizeof(void *); + + if (CRASHDEBUG(1)) + error(WARNING, "using unw.save_order to determine unw.tables\n"); + } + + if (!(machdep->flags & UNW_OUT_OF_SYNC)) { machdep->machspec->unw_tables_offset = req->member_offset/BITS_PER_BYTE; --- crash/defs.h.orig 2006-05-15 17:21:18.000000000 -0400 +++ crash/defs.h 2006-05-01 11:49:13.000000000 -0400 @@ -1,8 +1,8 @@ /* defs.h - core analysis suite * * Copyright (C) 1999, 2000, 2001, 2002 Mission Critical Linux, Inc. - * Copyright (C) 2002, 2003, 2004, 2005 David Anderson - * Copyright (C) 2002, 2003, 2004, 2005 Red Hat, Inc. All rights reserved. + * Copyright (C) 2002, 2003, 2004, 2005, 2006 David Anderson + * Copyright (C) 2002, 2003, 2004, 2005, 2006 Red Hat, Inc. All rights reserved. * Copyright (C) 2002 Silicon Graphics, Inc. * * This program is free software; you can redistribute it and/or modify @@ -59,7 +59,7 @@ #define NR_CPUS (32) #endif #ifdef X86_64 -#define NR_CPUS (32) +#define NR_CPUS (256) #endif #ifdef ALPHA #define NR_CPUS (64) @@ -106,6 +106,8 @@ typedef uint64_t physaddr_t; +#define PADDR_NOT_AVAILABLE (0x1ULL) + typedef unsigned long long int ulonglong; struct number_option { ulong num; @@ -155,8 +157,8 @@ #define UNLINK_MODULES (0x1000000000ULL) #define S390D (0x2000000000ULL) #define REM_S390D (0x4000000000ULL) -#define PC_UNUSED_1 (0x8000000000ULL) -#define PC_UNUSED_2 (0x10000000000ULL) +#define SYSRQ (0x8000000000ULL) +#define KDUMP (0x10000000000ULL) #define NETDUMP (0x20000000000ULL) #define REM_NETDUMP (0x40000000000ULL) #define SYSMAP (0x80000000000ULL) @@ -169,11 +171,13 @@ #define VERSION_QUERY (0x4000000000000ULL) #define READNOW (0x8000000000000ULL) #define NOCRASHRC (0x10000000000000ULL) +#define INIT_IFILE (0x20000000000000ULL) +#define XENDUMP (0x40000000000000ULL) #define ACTIVE() (pc->flags & LIVE_SYSTEM) #define DUMPFILE() (!(pc->flags & LIVE_SYSTEM)) -#define MEMORY_SOURCES (NETDUMP|MCLXCD|LKCD|DEVMEM|S390D|MEMMOD|DISKDUMP) -#define DUMPFILE_TYPES (DISKDUMP|NETDUMP|MCLXCD|LKCD|S390D) +#define MEMORY_SOURCES (NETDUMP|KDUMP|MCLXCD|LKCD|DEVMEM|S390D|MEMMOD|DISKDUMP|XENDUMP) +#define DUMPFILE_TYPES (DISKDUMP|NETDUMP|KDUMP|MCLXCD|LKCD|S390D|XENDUMP) #define REMOTE() (pc->flags & REMOTE_DAEMON) #define REMOTE_ACTIVE() (pc->flags & REM_LIVE_SYSTEM) #define REMOTE_DUMPFILE() \ @@ -182,17 +186,29 @@ #define LKCD_DUMPFILE() (pc->flags & (LKCD|REM_LKCD)) #define NETDUMP_DUMPFILE() (pc->flags & (NETDUMP|REM_NETDUMP)) #define DISKDUMP_DUMPFILE() (pc->flags & DISKDUMP) +#define KDUMP_DUMPFILE() (pc->flags & KDUMP) +#define XENDUMP_DUMPFILE() (pc->flags & XENDUMP) +#define SYSRQ_TASK(X) ((pc->flags & SYSRQ) && is_task_active(X)) #define NETDUMP_LOCAL (0x1) /* netdump_data flags */ #define NETDUMP_REMOTE (0x2) -#define NETDUMP_VALID() (nd->flags & (NETDUMP_LOCAL|NETDUMP_REMOTE)) +#define VMCORE_VALID() (nd->flags & (NETDUMP_LOCAL|NETDUMP_REMOTE|KDUMP_LOCAL)) #define NETDUMP_ELF32 (0x4) #define NETDUMP_ELF64 (0x8) #define PARTIAL_DUMP (0x10) /* netdump or diskdump */ +#define KDUMP_ELF32 (0x20) +#define KDUMP_ELF64 (0x40) +#define KDUMP_LOCAL (0x80) + +#define DUMPFILE_FORMAT(flags) ((flags) & \ + (NETDUMP_ELF32|NETDUMP_ELF64|KDUMP_ELF32|KDUMP_ELF64)) #define DISKDUMP_LOCAL (0x1) #define DISKDUMP_VALID() (dd->flags & DISKDUMP_LOCAL) +#define XENDUMP_LOCAL (0x1) +#define XENDUMP_VALID() (xd->flags & XENDUMP_LOCAL) + #define CRASHDEBUG(x) (pc->debug >= (x)) #define CRASHDEBUG_SUSPEND(X) { pc->debug_save = pc->debug; pc->debug = X; } @@ -407,9 +423,19 @@ #define KALLSYMS_V2 (0x2000) #define TVEC_BASES_V2 (0x4000) #define GCC_3_3_3 (0x8000) +#define USE_OLD_BT (0x10000) +#define ARCH_XEN (0x20000) #define GCC_VERSION_DEPRECATED (GCC_3_2|GCC_3_2_3|GCC_2_96|GCC_3_3_2|GCC_3_3_3) +#define XEN() (kt->flags & ARCH_XEN) + +#define XEN_MACHINE_TO_MFN(m) ((ulong)(m) >> PAGESHIFT()) +#define XEN_PFN_TO_PSEUDO(p) ((ulong)(p) << PAGESHIFT()) +#define XEN_PFNS_PER_PAGE (PAGESIZE()/sizeof(ulong)) +#define XEN_MFN_NOT_FOUND (~0UL) +#define XEN_FOREIGN_FRAME (1UL << (BITS()-1)) + struct kernel_table { /* kernel data */ ulong flags; ulong stext; @@ -433,8 +459,28 @@ long __rq_idx[NR_CPUS]; long __cpu_idx[NR_CPUS]; long __per_cpu_offset[NR_CPUS]; - long cpu_flags[NR_CPUS]; + ulong cpu_flags[NR_CPUS]; #define NMI 0x1 + ulong xen_flags; +#define WRITABLE_PAGE_TABLES (0x1) +#define SHADOW_PAGE_TABLES (0x2) +#define CANONICAL_PAGE_TABLES (0x4) +#define XEN_SUSPEND (0x8) + char *machine_to_pseudo; + ulong phys_to_machine_mapping; + ulong ptm_table_size; +#define PTM_MAPPING_CACHE (512) + struct ptm_mapping_cache { + ulong mapping; + ulong mfn; + } ptm_mapping_cache[PTM_MAPPING_CACHE]; +#define PTM_MAPPING_TO_PAGE_INDEX(c) \ + (((kt->ptm_mapping_cache[c].mapping - kt->phys_to_machine_mapping)/PAGESIZE()) \ + * XEN_PFNS_PER_PAGE) + ulong last_mapping_read; + ulong ptm_cache_index; + ulong ptm_pages_searched; + ulong ptm_cache_hits; }; /* @@ -602,6 +648,7 @@ (void *)(&bt->stackbuf[(ulong)STACK_OFFSET_TYPE(OFF)]), (size_t)(SZ)) struct machine_specific; /* uniquely defined below each machine's area */ +struct xendump_data; struct machdep_table { ulong flags; @@ -653,6 +700,13 @@ int ptrs_per_pgd; char *cmdline_arg; struct machine_specific *machspec; + ulong section_size_bits; + ulong max_physmem_bits; + ulong sections_per_root; + int (*xendump_ptm_create)(struct xendump_data *); + ulong (*xendump_panic_task)(struct xendump_data *); + void (*get_xendump_regs)(struct xendump_data *, struct bt_info *, ulong *, ulong *); + void (*clear_machdep_cache)(void); }; /* @@ -660,13 +714,11 @@ * as defined in their processor-specific files below. (see KSYMS_START defs). */ #define HWRESET (0x80000000) -#define SYSRQ (0x40000000) -#define OMIT_FRAME_PTR (0x20000000) -#define FRAMESIZE_DEBUG (0x10000000) -#define MACHDEP_BT_TEXT (0x8000000) -#define DEVMEMRD (0x4000000) -#define INIT (0x2000000) -#define SYSRQ_TASK(X) ((machdep->flags & SYSRQ) && is_task_active(X)) +#define OMIT_FRAME_PTR (0x40000000) +#define FRAMESIZE_DEBUG (0x20000000) +#define MACHDEP_BT_TEXT (0x10000000) +#define DEVMEMRD (0x8000000) +#define INIT (0x4000000) extern struct machdep_table *machdep; @@ -737,6 +789,7 @@ #define FOREACH_c_FLAG (0x40000) #define FOREACH_f_FLAG (0x80000) #define FOREACH_o_FLAG (0x100000) +#define FOREACH_T_FLAG (0x200000) struct foreach_data { ulong flags; @@ -875,6 +928,7 @@ long mm_struct_mmap; long mm_struct_pgd; long mm_struct_rss; + long mm_struct_anon_rss; long mm_struct_total_vm; long mm_struct_start_code; long vm_area_struct_vm_mm; @@ -970,6 +1024,11 @@ long hw_interrupt_type_set_affinity; long irq_cpustat_t___softirq_active; long irq_cpustat_t___softirq_mask; + long fdtable_max_fds; + long fdtable_max_fdset; + long fdtable_open_fds; + long fdtable_fd; + long files_struct_fdt; long files_struct_max_fds; long files_struct_max_fdset; long files_struct_open_fds; @@ -1088,6 +1147,8 @@ long inet_opt_dport; long inet_opt_sport; long inet_opt_num; + long ipv6_pinfo_rcv_saddr; + long ipv6_pinfo_daddr; long timer_list_list; long timer_list_next; long timer_list_entry; @@ -1123,6 +1184,7 @@ long zone_struct_name; long zone_struct_size; long zone_struct_memsize; + long zone_struct_zone_start_pfn; long zone_struct_zone_start_paddr; long zone_struct_zone_start_mapnr; long zone_struct_zone_mem_map; @@ -1210,7 +1272,14 @@ long x8664_pda_irqstackptr; long x8664_pda_level4_pgt; long x8664_pda_cpunumber; + long x8664_pda_me; long tss_struct_ist; + long mem_section_section_mem_map; + long vcpu_guest_context_user_regs; + long cpu_user_regs_eip; + long cpu_user_regs_esp; + long cpu_user_regs_rip; + long cpu_user_regs_rsp; }; struct size_table { /* stash of commonly-used sizes */ @@ -1239,6 +1308,7 @@ long umode_t; long dentry; long files_struct; + long fdtable; long fs_struct; long file; long inode; @@ -1292,15 +1362,19 @@ long address_space; long char_device_struct; long inet_sock; + long in6_addr; long socket; long spinlock_t; long radix_tree_root; long radix_tree_node; long x8664_pda; + long ppc64_paca; long gate_struct; long tss_struct; long task_struct_start_time; long cputime_t; + long mem_section; + long pid_link; }; struct array_table { @@ -1389,6 +1463,7 @@ #define ULONGLONG(ADDR) *((ulonglong *)((char *)(ADDR))) #define ULONG_PTR(ADDR) *((ulong **)((char *)(ADDR))) #define USHORT(ADDR) *((ushort *)((char *)(ADDR))) +#define SHORT(ADDR) *((short *)((char *)(ADDR))) #define VOID_PTR(ADDR) *((void **)((char *)(ADDR))) struct node_table { @@ -1420,6 +1495,7 @@ ulong kmem_max_limit; ulong kmem_max_cpus; ulong kmem_cache_count; + ulong kmem_cache_len_nodes; ulong PG_reserved; ulong PG_slab; int kmem_cache_namelen; @@ -1441,17 +1517,29 @@ ulong cached_vma_hits[VMA_CACHE]; int vma_cache_index; ulong vma_cache_fills; + void *mem_sec; + int ZONE_HIGHMEM; }; -#define NODES (0x1) -#define ZONES (0x2) -#define PERCPU_KMALLOC_V1 (0x4) -#define COMMON_VADDR (0x8) -#define KMEM_CACHE_INIT (0x10) -#define V_MEM_MAP (0x20) -#define PERCPU_KMALLOC_V2 (0x40) -#define KMEM_CACHE_UNAVAIL (0x80) -#define DISCONTIGMEM (0x100) +#define NODES (0x1) +#define ZONES (0x2) +#define PERCPU_KMALLOC_V1 (0x4) +#define COMMON_VADDR (0x8) +#define KMEM_CACHE_INIT (0x10) +#define V_MEM_MAP (0x20) +#define PERCPU_KMALLOC_V2 (0x40) +#define KMEM_CACHE_UNAVAIL (0x80) +#define FLATMEM (0x100) +#define DISCONTIGMEM (0x200) +#define SPARSEMEM (0x400) +#define SPARSEMEM_EX (0x800) +#define PERCPU_KMALLOC_V2_NODES (0x1000) +#define KMEM_CACHE_DELAY (0x2000) + +#define IS_FLATMEM() (vt->flags & FLATMEM) +#define IS_DISCONTIGMEM() (vt->flags & DISCONTIGMEM) +#define IS_SPARSEMEM() (vt->flags & SPARSEMEM) +#define IS_SPARSEMEM_EX() (vt->flags & SPARSEMEM_EX) #define COMMON_VADDR_SPACE() (vt->flags & COMMON_VADDR) #define PADDR_PRLEN (vt->paddr_prlen) @@ -1680,6 +1768,33 @@ #define VIRTPAGEBASE(X) (((ulong)(X)) & (ulong)machdep->pagemask) #define PHYSPAGEBASE(X) (((physaddr_t)(X)) & (physaddr_t)machdep->pagemask) +/* + * Sparse memory stuff + * These must follow the definitions in the kernel mmzone.h + */ +#define SECTION_SIZE_BITS() (machdep->section_size_bits) +#define MAX_PHYSMEM_BITS() (machdep->max_physmem_bits) +#define SECTIONS_SHIFT() (MAX_PHYSMEM_BITS() - SECTION_SIZE_BITS()) +#define PA_SECTION_SHIFT() (SECTION_SIZE_BITS()) +#define PFN_SECTION_SHIFT() (SECTION_SIZE_BITS() - PAGESHIFT()) +#define NR_MEM_SECTIONS() (1UL << SECTIONS_SHIFT()) +#define PAGES_PER_SECTION() (1UL << PFN_SECTION_SHIFT()) +#define PAGE_SECTION_MASK() (~(PAGES_PER_SECTION()-1)) + +#define pfn_to_section_nr(pfn) ((pfn) >> PFN_SECTION_SHIFT()) +#define section_nr_to_pfn(sec) ((sec) << PFN_SECTION_SHIFT()) + +#define SECTIONS_PER_ROOT() (machdep->sections_per_root) + +/* CONFIG_SPARSEMEM_EXTREME */ +#define _SECTIONS_PER_ROOT_EXTREME() (PAGESIZE() / SIZE(mem_section)) +/* !CONFIG_SPARSEMEM_EXTREME */ +#define _SECTIONS_PER_ROOT() (1) + +#define SECTION_NR_TO_ROOT(sec) ((sec) / SECTIONS_PER_ROOT()) +#define NR_SECTION_ROOTS() (NR_MEM_SECTIONS() / SECTIONS_PER_ROOT()) +#define SECTION_ROOT_MASK() (SECTIONS_PER_ROOT() - 1) + /* * Machine specific stuff */ @@ -1724,22 +1839,51 @@ #define TIF_SIGPENDING (2) +// CONFIG_X86_PAE +#define _SECTION_SIZE_BITS_PAE 30 +#define _MAX_PHYSMEM_BITS_PAE 36 + +// !CONFIG_X86_PAE +#define _SECTION_SIZE_BITS 26 +#define _MAX_PHYSMEM_BITS 32 + #endif /* X86 */ #ifdef X86_64 #define _64BIT_ #define MACHINE_TYPE "X86_64" -#define USERSPACE_TOP 0x0000008000000000 -#define __START_KERNEL_map 0xffffffff80000000 -#define PAGE_OFFSET 0x0000010000000000 +#define USERSPACE_TOP (machdep->machspec->userspace_top) +#define PAGE_OFFSET (machdep->machspec->page_offset) +#define VMALLOC_START (machdep->machspec->vmalloc_start_addr) +#define VMALLOC_END (machdep->machspec->vmalloc_end) +#define MODULES_VADDR (machdep->machspec->modules_vaddr) +#define MODULES_END (machdep->machspec->modules_end) -#define VMALLOC_START 0xffffff0000000000 -#define VMALLOC_END 0xffffff7fffffffff -#define MODULES_VADDR 0xffffffffa0000000 -#define MODULES_END 0xffffffffafffffff +#define __START_KERNEL_map 0xffffffff80000000 #define MODULES_LEN (MODULES_END - MODULES_VADDR) +#define USERSPACE_TOP_ORIG 0x0000008000000000 +#define PAGE_OFFSET_ORIG 0x0000010000000000 +#define VMALLOC_START_ADDR_ORIG 0xffffff0000000000 +#define VMALLOC_END_ORIG 0xffffff7fffffffff +#define MODULES_VADDR_ORIG 0xffffffffa0000000 +#define MODULES_END_ORIG 0xffffffffafffffff + +#define USERSPACE_TOP_2_6_11 0x0000800000000000 +#define PAGE_OFFSET_2_6_11 0xffff810000000000 +#define VMALLOC_START_ADDR_2_6_11 0xffffc20000000000 +#define VMALLOC_END_2_6_11 0xffffe1ffffffffff +#define MODULES_VADDR_2_6_11 0xffffffff88000000 +#define MODULES_END_2_6_11 0xfffffffffff00000 + +#define USERSPACE_TOP_XEN 0x0000800000000000 +#define PAGE_OFFSET_XEN 0xffff880000000000 +#define VMALLOC_START_ADDR_XEN 0xffffc20000000000 +#define VMALLOC_END_XEN 0xffffe1ffffffffff +#define MODULES_VADDR_XEN 0xffffffff88000000 +#define MODULES_END_XEN 0xfffffffffff00000 + #define PTOV(X) ((unsigned long)(X)+(machdep->kvbase)) #define VTOP(X) x86_64_VTOP((ulong)(X)) #define IS_VMALLOC_ADDR(X) x86_64_IS_VMALLOC_ADDR((ulong)(X)) @@ -1757,12 +1901,25 @@ #define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) #define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +#define IS_LAST_PML4_READ(pml4) ((ulong)(pml4) == machdep->machspec->last_pml4_read) + #define FILL_PML4() { \ if (!(pc->flags & RUNTIME) || ACTIVE()) \ - readmem(vt->kernel_pgd[0], KVADDR, machdep->machspec->pml4, \ + if (!IS_LAST_PML4_READ(vt->kernel_pgd[0])) \ + readmem(vt->kernel_pgd[0], KVADDR, machdep->machspec->pml4, \ PAGESIZE(), "init_level4_pgt", FAULT_ON_ERROR); \ + machdep->machspec->last_pml4_read = (ulong)(vt->kernel_pgd[0]); \ } +#define IS_LAST_UPML_READ(pml) ((ulong)(pml) == machdep->machspec->last_upml_read) + +#define FILL_UPML(PML, TYPE, SIZE) \ + if (!IS_LAST_UPML_READ(PML)) { \ + readmem((ulonglong)((ulong)(PML)), TYPE, machdep->machspec->upml, \ + SIZE, "pml page", FAULT_ON_ERROR); \ + machdep->machspec->last_upml_read = (ulong)(PML); \ + } + /* * PHYSICAL_PAGE_MASK changed (enlarged) between 2.4 and 2.6, so * for safety, use the 2.6 values to generate it. @@ -1796,6 +1953,15 @@ #define PAGEBASE(X) (((ulong)(X)) & (ulong)machdep->pagemask) +#define _CPU_PDA_READ(CPU, BUFFER) \ + ((STRNEQ("_cpu_pda", closest_symbol((symbol_value("_cpu_pda") + \ + ((CPU) * sizeof(unsigned long)))))) && \ + (readmem(symbol_value("_cpu_pda") + ((CPU) * sizeof(void *)), \ + KVADDR, &cpu_pda_addr, sizeof(unsigned long), \ + "_cpu_pda addr", FAULT_ON_ERROR)) && \ + (readmem(cpu_pda_addr, KVADDR, (BUFFER), SIZE(x8664_pda), \ + "cpu_pda entry", FAULT_ON_ERROR))) + #define CPU_PDA_READ(CPU, BUFFER) \ (STRNEQ("cpu_pda", closest_symbol((symbol_value("cpu_pda") + \ ((CPU) * SIZE(x8664_pda))))) && \ @@ -1806,6 +1972,9 @@ #define VALID_LEVEL4_PGT_ADDR(X) \ (((X) == VIRTPAGEBASE(X)) && IS_KVADDR(X) && !IS_VMALLOC_ADDR(X)) +#define _SECTION_SIZE_BITS 27 +#define _MAX_PHYSMEM_BITS 40 + #endif /* X86_64 */ #ifdef ALPHA @@ -1884,6 +2053,9 @@ #define TIF_SIGPENDING (2) +#define _SECTION_SIZE_BITS 24 +#define _MAX_PHYSMEM_BITS 44 + #endif /* PPC */ #ifdef IA64 @@ -2067,6 +2239,32 @@ #define PGD_OFFSET(vaddr) ((vaddr >> PGDIR_SHIFT) & 0x7ff) #define PMD_OFFSET(vaddr) ((vaddr >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) +/* 4-level page table support */ + +/* 4K pagesize */ +#define PTE_INDEX_SIZE_L4_4K 9 +#define PMD_INDEX_SIZE_L4_4K 7 +#define PUD_INDEX_SIZE_L4_4K 7 +#define PGD_INDEX_SIZE_L4_4K 9 +#define PTE_SHIFT_L4_4K 17 +#define PMD_MASKED_BITS_4K 0 + +/* 64K pagesize */ +#define PTE_INDEX_SIZE_L4_64K 12 +#define PMD_INDEX_SIZE_L4_64K 12 +#define PUD_INDEX_SIZE_L4_64K 0 +#define PGD_INDEX_SIZE_L4_64K 4 +#define PTE_SHIFT_L4_64K 32 +#define PMD_MASKED_BITS_64K 0x1ff + +#define L4_OFFSET(vaddr) ((vaddr >> (machdep->machspec->l4_shift)) & 0x1ff) + +#define PGD_OFFSET_L4(vaddr) \ + ((vaddr >> (machdep->machspec->l3_shift)) & (machdep->machspec->ptrs_per_l3 - 1)) + +#define PMD_OFFSET_L4(vaddr) \ + ((vaddr >> (machdep->machspec->l2_shift)) & (machdep->machspec->ptrs_per_l2 - 1)) + #define _PAGE_PRESENT 0x001UL /* software: pte contains a translation */ #define _PAGE_USER 0x002UL /* matches one of the PP bits */ #define _PAGE_RW 0x004UL /* software: user write access allowed */ @@ -2087,6 +2285,9 @@ #define STACK_FRAME_OVERHEAD 112 #define EXCP_FRAME_MARKER 0x7265677368657265 +#define _SECTION_SIZE_BITS 24 +#define _MAX_PHYSMEM_BITS 44 + #endif /* PPC64 */ #ifdef S390 @@ -2685,6 +2886,8 @@ int machine_type(char *); void command_not_supported(void); void option_not_supported(int); +void please_wait(char *); +void please_wait_done(void); /* @@ -2721,6 +2924,7 @@ struct syment *next_symbol(char *, struct syment *); struct syment *prev_symbol(char *, struct syment *); void get_symbol_data(char *, long, void *); +int try_get_symbol_data(char *, long, void *); char *value_to_symstr(ulong, char *, ulong); char *value_symbol(ulong); ulong symbol_value(char *); @@ -2807,6 +3011,7 @@ char *swap_location(ulonglong, char *); void clear_swap_info_cache(void); uint memory_page_size(void); +void force_page_size(char *); ulong first_vmalloc_address(void); int l1_cache_size(void); int dumpfile_memory(int); @@ -3005,6 +3210,8 @@ */ void register_extension(struct command_table_entry *); void dump_extension_table(int); +void load_extension(char *); +void unload_extension(char *); /* * kernel.c @@ -3020,7 +3227,8 @@ void generic_dump_irq(int); int generic_dis_filter(ulong, char *); void display_sys_stats(void); -void dump_kernel_table(void); +char *get_uptime(char *, ulonglong *); +void dump_kernel_table(int); void dump_bt_info(struct bt_info *); void dump_log(int); void set_cpu(int); @@ -3069,6 +3277,8 @@ #define BT_DUMPFILE_SEARCH (0x800000000ULL) #define BT_EFRAME_SEARCH2 (0x1000000000ULL) #define BT_START (0x2000000000ULL) +#define BT_TEXT_SYMBOLS_ALL (0x4000000000ULL) +#define BT_XEN_STOP_THIS_CPU (0x8000000000ULL) #define BT_REF_HEXVAL (0x1) #define BT_REF_SYMBOL (0x2) @@ -3101,6 +3311,8 @@ #define TYPE_S390D (REMOTE_VERBOSE << 6) #define TYPE_NETDUMP (REMOTE_VERBOSE << 7) +ulong xen_machine_to_pseudo(ulong); + /* * dev.c */ @@ -3194,7 +3406,16 @@ #define NMI_STACK 2 /* ebase[] offset to NMI exception stack */ struct machine_specific { + ulong userspace_top; + ulong page_offset; + ulong vmalloc_start_addr; + ulong vmalloc_end; + ulong modules_vaddr; + ulong modules_end; char *pml4; + char *upml; + ulong last_upml_read; + ulong last_pml4_read; char *irqstack; struct x86_64_pt_regs_offsets pto; struct x86_64_stkinfo stkinfo; @@ -3202,6 +3423,11 @@ #define KSYMS_START (0x1) #define PT_REGS_INIT (0x2) +#define VM_ORIG (0x4) +#define VM_2_6_11 (0x8) +#define VM_XEN (0x10) +#define NO_TSS (0x20) +#define SCHED_TEXT (0x40) #define _2MB_PAGE_MASK (~((MEGABYTES(2))-1)) #endif @@ -3240,13 +3466,43 @@ ulong hwintrstack[NR_CPUS]; char *hwstackbuf; uint hwstacksize; -}; + char *level4; + ulong last_level4_read; + + uint l4_index_size; + uint l3_index_size; + uint l2_index_size; + uint l1_index_size; + + uint ptrs_per_l3; + uint ptrs_per_l2; + uint ptrs_per_l1; + + uint l4_shift; + uint l3_shift; + uint l2_shift; + uint l1_shift; + + uint pte_shift; + uint l2_masked_bits; +}; + +#define IS_LAST_L4_READ(l4) ((ulong)(l4) == machdep->machspec->last_level4_read) + +#define FILL_L4(L4, TYPE, SIZE) \ + if (!IS_LAST_L4_READ(L4)) { \ + readmem((ulonglong)((ulong)(L4)), TYPE, machdep->machspec->level4, \ + SIZE, "level4 page", FAULT_ON_ERROR); \ + machdep->machspec->last_level4_read = (ulong)(L4); \ + } void ppc64_init(int); void ppc64_dump_machdep_table(ulong); #define display_idt_table() \ error(FATAL, "-d option is not applicable to PowerPC architecture\n") #define KSYMS_START (0x1) +#define VM_ORIG (0x2) +#define VM_4_LEVEL (0x4) #endif /* @@ -3396,10 +3652,23 @@ int netdump_init(char *, FILE *); ulong get_netdump_panic_task(void); ulong get_netdump_switch_stack(ulong); -int netdump_memory_dump(FILE *); FILE *set_netdump_fp(FILE *); +int netdump_memory_dump(FILE *); void get_netdump_regs(struct bt_info *, ulong *, ulong *); int is_partial_netdump(void); +void get_netdump_regs_x86(struct bt_info *, ulong *, ulong *); +void get_netdump_regs_x86_64(struct bt_info *, ulong *, ulong *); + +int read_kdump(int, void *, int, ulong, physaddr_t); +int write_kdump(int, void *, int, ulong, physaddr_t); +int is_kdump(char *, ulong); +int kdump_init(char *, FILE *); +ulong get_kdump_panic_task(void); +uint kdump_page_size(void); +int kdump_free_memory(void); +int kdump_memory_used(void); +int kdump_memory_dump(FILE *); +void get_kdump_regs(struct bt_info *, ulong *, ulong *); /* * diskdump.c @@ -3418,6 +3687,23 @@ void get_diskdump_regs(struct bt_info *, ulong *, ulong *); /* + * xendump.c + */ +int is_xendump(char *); +int read_xendump(int, void *, int, ulong, physaddr_t); +int write_xendump(int, void *, int, ulong, physaddr_t); +uint xendump_page_size(void); +int xendump_free_memory(void); +int xendump_memory_used(void); +int xendump_init(char *, FILE *); +int xendump_memory_dump(FILE *); +ulong get_xendump_panic_task(void); +void get_xendump_regs(struct bt_info *, ulong *, ulong *); +char *xc_core_mfn_to_page(ulong, char *); +int xc_core_mfn_to_page_index(ulong); +void xendump_panic_hook(char *); + +/* * net.c */ void net_init(void); @@ -3560,6 +3846,7 @@ #define LKCD_DUMP_V7 (0x7) /* DUMP_VERSION_NUMBER */ #define LKCD_DUMP_V8 (0x8) /* DUMP_VERSION_NUMBER */ #define LKCD_DUMP_V9 (0x9) /* DUMP_VERSION_NUMBER */ +#define LKCD_DUMP_V10 (0xa) /* DUMP_VERSION_NUMBER */ #define LKCD_DUMP_VERSION_NUMBER_MASK (0xf) #define LKCD_DUMP_RAW (0x1) /* DUMP_[DH_]RAW */ --- crash/vas_crash.h.orig 2006-05-15 17:21:18.000000000 -0400 +++ crash/vas_crash.h 2006-05-15 16:50:28.000000000 -0400 @@ -19,7 +19,7 @@ */ #include -#include +//#include void save_core(void); --- crash/netdump.h.orig 2006-05-15 17:21:18.000000000 -0400 +++ crash/netdump.h 2005-11-04 17:37:53.000000000 -0500 @@ -24,3 +24,14 @@ #define NT_TASKSTRUCT 4 #define NT_DISKDUMP 0x70000001 + +#ifdef NOTDEF +/* + * Note: Based upon the original, abandoned, proposal for + * its contents -- keep around for potential future use. + */ +#ifndef NT_KDUMPINFO +#define NT_KDUMPINFO 7 +#endif + +#endif /* NOTDEF */ --- crash/diskdump.h.orig 2006-05-15 17:21:18.000000000 -0400 +++ crash/diskdump.h 2005-11-30 15:15:00.000000000 -0500 @@ -3,6 +3,8 @@ * * Copyright (C) 2004, 2005 David Anderson * Copyright (C) 2004, 2005 Red Hat, Inc. All rights reserved. + * Copyright (C) 2005 FUJITSU LIMITED + * Copyright (C) 2005 NEC Corporation * * This software may be freely redistributed under the terms of the * GNU General Public License. @@ -10,7 +12,59 @@ * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Author: David Anderson */ +#include + +#define divideup(x, y) (((x) + ((y) - 1)) / (y)) +#define round(x, y) (((x) / (y)) * (y)) + +#define DUMP_PARTITION_SIGNATURE "diskdump" +#define SIG_LEN (sizeof(DUMP_PARTITION_SIGNATURE) - 1) +#define DISK_DUMP_SIGNATURE "DISKDUMP" + +#define DUMP_HEADER_COMPLETED 0 +#define DUMP_HEADER_INCOMPLETED 1 +#define DUMP_HEADER_COMPRESSED 8 + +struct disk_dump_header { + char signature[SIG_LEN]; /* = "DISKDUMP" */ + int header_version; /* Dump header version */ + struct new_utsname utsname; /* copy of system_utsname */ + struct timeval timestamp; /* Time stamp */ + unsigned int status; /* Above flags */ + int block_size; /* Size of a block in byte */ + int sub_hdr_size; /* Size of arch dependent + header in blocks */ + unsigned int bitmap_blocks; /* Size of Memory bitmap in + block */ + unsigned int max_mapnr; /* = max_mapnr */ + unsigned int total_ram_blocks;/* Number of blocks should be + written */ + unsigned int device_blocks; /* Number of total blocks in + * the dump device */ + unsigned int written_blocks; /* Number of written blocks */ + unsigned int current_cpu; /* CPU# which handles dump */ + int nr_cpus; /* Number of CPUs */ + struct task_struct *tasks[0]; +}; + +struct disk_dump_sub_header { + long elf_regs; +}; + +/* page flags */ +#define DUMP_DH_COMPRESSED 0x1 /* page is compressed */ + +/* descriptor of each page for vmcore */ +typedef struct page_desc { + off_t offset; /* the offset of the page data*/ + unsigned int size; /* the size of this dump page */ + unsigned int flags; /* flags */ + unsigned long long page_flags; /* page flags */ +} page_desc_t; + +#define DISKDUMP_CACHED_PAGES (16) +#define PAGE_VALID (0x1) /* flags */ +#define DISKDUMP_VALID_PAGE(flags) ((flags) & PAGE_VALID) + --- crash/xendump.h.orig 2006-05-15 17:21:18.000000000 -0400 +++ crash/xendump.h 2006-04-26 15:58:59.000000000 -0400 @@ -0,0 +1,97 @@ +/* + * xendump.h + * + * Copyright (C) 2006 David Anderson + * Copyright (C) 2006 Red Hat, Inc. All rights reserved. + * + * This software may be freely redistributed under the terms of the + * GNU General Public License. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include + +#define XC_SAVE_SIGNATURE "LinuxGuestRecord" +#define XC_CORE_MAGIC 0xF00FEBED + +/* + * From xenctrl.h, but probably not on most host machines. + */ +typedef struct xc_core_header { + unsigned int xch_magic; + unsigned int xch_nr_vcpus; + unsigned int xch_nr_pages; + unsigned int xch_ctxt_offset; + unsigned int xch_index_offset; + unsigned int xch_pages_offset; +} xc_core_header_t; + +struct pfn_offset_cache { + off_t file_offset; + ulong pfn; + ulong cnt; +}; + +#define PFN_TO_OFFSET_CACHE_ENTRIES (1024) + +struct xendump_data { + ulong flags; /* XENDUMP_LOCAL, plus anything else... */ + int xfd; + int pc_next; + uint page_size; + FILE *ofp; + char *page; + ulong accesses; + ulong cache_hits; + ulong redundant; + ulong last_pfn; + struct pfn_offset_cache *poc; + + struct xc_core_data { + int ptm_frames; + ulong *ptm_frame_index_list; + struct xc_core_header header; + } xc_core; + + struct xc_save_data { + ulong nr_pfns; + int vmconfig_size; + char *vmconfig_buf; + ulong *p2m_frame_list; + uint pfns_not; + off_t pfns_not_offset; + off_t vcpu_ctxt_offset; + off_t shared_info_page_offset; + off_t *batch_offsets; + ulong batch_count; + ulong *region_pfn_type; + } xc_save; + + ulong panic_pc; + ulong panic_sp; +}; + +#define XC_SAVE (XENDUMP_LOCAL << 1) +#define XC_CORE (XENDUMP_LOCAL << 2) +#define XC_CORE_PTM_INIT (XENDUMP_LOCAL << 3) + +#define MACHINE_BYTE_ORDER() \ + (machine_type("X86") || \ + machine_type("X86_64") || \ + machine_type("IA64") ? __LITTLE_ENDIAN : __BIG_ENDIAN) + +#define BYTE_SWAP_REQUIRED(endian) (endian != MACHINE_BYTE_ORDER()) + +static inline uint32_t +swab32(uint32_t x) +{ + return (((x & 0x000000ffU) << 24) | + ((x & 0x0000ff00U) << 8) | + ((x & 0x00ff0000U) >> 8) | + ((x & 0xff000000U) >> 24)); +} + +#define MFN_NOT_FOUND (-1) +#define PFN_NOT_FOUND (-1) --- crash/lkcd_dump_v5.h.orig 2006-05-15 17:21:18.000000000 -0400 +++ crash/lkcd_dump_v5.h 2006-03-22 08:45:35.000000000 -0500 @@ -35,7 +35,7 @@ #ifndef _DUMP_H #define _DUMP_H -#include +//#include /* define TRUE and FALSE for use in our dump modules */ #ifndef FALSE --- crash/lkcd_dump_v7.h.orig 2006-05-15 17:21:18.000000000 -0400 +++ crash/lkcd_dump_v7.h 2006-03-22 08:45:57.000000000 -0500 @@ -35,7 +35,7 @@ #ifndef _DUMP_H #define _DUMP_H -#include +//#include /* define TRUE and FALSE for use in our dump modules */ #ifndef FALSE --- crash/Makefile.orig 2006-05-15 17:21:18.000000000 -0400 +++ crash/Makefile 2006-05-15 17:21:18.000000000 -0400 @@ -3,8 +3,8 @@ # Copyright (C) 1999, 2000, 2001, 2002 Mission Critical Linux, Inc. # www.missioncriticallinux.com, info@missioncriticallinux.com # -# Copyright (C) 2002, 2003, 2004, 2005 David Anderson -# Copyright (C) 2002, 2003, 2004, 2005 Red Hat, Inc. All rights reserved. +# Copyright (C) 2002, 2003, 2004, 2005, 2006 David Anderson +# Copyright (C) 2002, 2003, 2004, 2005, 2006 Red Hat, Inc. All rights reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -39,6 +39,8 @@ GDB_FILES= GDB_OFILES= +GDB_PATCH_FILES=gdb-6.1.patch + # # Default installation directory # @@ -62,7 +64,7 @@ GENERIC_HFILES=defs.h MCORE_HFILES=va_server.h vas_crash.h -REDHAT_HFILES=netdump.h diskdump.h +REDHAT_HFILES=netdump.h diskdump.h xendump.h LKCD_DUMP_HFILES=lkcd_vmdump_v1.h lkcd_vmdump_v2_v3.h lkcd_dump_v5.h \ lkcd_dump_v7.h lkcd_dump_v8.h lkcd_fix_mem.h LKCD_TRACE_HFILES=lkcd_x86_trace.h @@ -75,7 +77,7 @@ extensions.c remote.c va_server.c va_server_v1.c symbols.c cmdline.c \ lkcd_common.c lkcd_v1.c lkcd_v2_v3.c lkcd_v5.c lkcd_v7.c lkcd_v8.c\ lkcd_fix_mem.c s390_dump.c lkcd_x86_trace.c \ - netdump.c diskdump.c unwind.c unwind_decoder.c + netdump.c diskdump.c xendump.c unwind.c unwind_decoder.c SOURCE_FILES=${CFILES} ${GENERIC_HFILES} ${MCORE_HFILES} \ ${REDHAT_CFILES} ${REDHAT_HFILES} ${UNWIND_HFILES} \ @@ -86,9 +88,18 @@ alpha.o x86.o ppc.o ia64.o s390.o s390x.o ppc64.o x86_64.o \ extensions.o remote.o va_server.o va_server_v1.o symbols.o cmdline.o \ lkcd_common.o lkcd_v1.o lkcd_v2_v3.o lkcd_v5.o lkcd_v7.o lkcd_v8.o \ - lkcd_fix_mem.o s390_dump.o netdump.o diskdump.o \ + lkcd_fix_mem.o s390_dump.o netdump.o diskdump.o xendump.o \ lkcd_x86_trace.o unwind_v1.o unwind_v2.o unwind_v3.o +# These are the current set of crash extensions sources. They are not built +# by default unless the third command line of the "all:" stanza is uncommented. +# Alternatively, they can be built by entering "make extensions" from this +# directory. + +EXTENSIONS=extensions +EXTENSION_SOURCE_FILES=${EXTENSIONS}/Makefile ${EXTENSIONS}/echo.c ${EXTENSIONS}/dminfo.c +EXTENSION_OBJECT_FILES=echo.so dminfo.so + DAEMON_OBJECT_FILES=remote_daemon.o va_server.o va_server_v1.o \ lkcd_common.o lkcd_v1.o lkcd_v2_v3.o lkcd_v5.o lkcd_v7.o lkcd_v8.o \ s390_dump.o netdump_daemon.o @@ -150,10 +161,11 @@ ${GDB}/gdb/main.c ${GDB}/gdb/symtab.c ${GDB}/gdb/target.c \ ${GDB}/gdb/symfile.c ${GDB}/gdb/elfread.c \ ${GDB}/gdb/ui-file.c ${GDB}/gdb/utils.c ${GDB}/gdb/dwarf2read.c \ - ${GDB}/include/obstack.h + ${GDB}/include/obstack.h ${GDB}/gdb/ppc-linux-tdep.c GDB_6.1_OFILES=${GDB}/gdb/main.o ${GDB}/gdb/symtab.o \ ${GDB}/gdb/target.o ${GDB}/gdb/symfile.o ${GDB}/gdb/elfread.o \ - ${GDB}/gdb/ui-file.o ${GDB}/gdb/utils.o ${GDB}/gdb/dwarf2read.o + ${GDB}/gdb/ui-file.o ${GDB}/gdb/utils.o ${GDB}/gdb/dwarf2read.o \ + ${GDB}/gdb/ppc-linux-tdep.o # # GDB_FLAGS is passed up from the gdb Makefile. @@ -175,7 +187,8 @@ CFLAGS=-g -D${TARGET} ${TARGET_CFLAGS} -TAR_FILES=${SOURCE_FILES} Makefile COPYING README .rh_rpm_package crash.8 +TAR_FILES=${SOURCE_FILES} Makefile COPYING README .rh_rpm_package crash.8 \ + ${EXTENSION_SOURCE_FILES} CSCOPE_FILES=${SOURCE_FILES} READLINE_DIRECTORY=./${GDB}/readline @@ -184,9 +197,13 @@ REDHATFLAGS=-DREDHAT +# To build the extensions library by default, uncomment the third command +# line below. Otherwise they can be built by entering "make extensions". + all: make_configure @./configure -p "RPMPKG=${RPMPKG}" -b @make --no-print-directory gdb_merge +# @make --no-print-directory extensions gdb_merge: force @if [ ! -f ${GDB}/README ]; then \ @@ -206,6 +223,11 @@ @for FILE in ${GDB_FILES}; do\ echo $$FILE >> gdb.files; done @tar --exclude-from gdb.files -xvzmf ${GDB}.tar.gz + @make --no-print-directory gdb_patch + +gdb_patch: + if [ -f ${GDB}.patch ] && [ -s ${GDB}.patch ]; then \ + patch -p0 < ${GDB}.patch; fi library: make_build_data ${OBJECT_FILES} ar -rs ${PROGRAM}lib.a ${OBJECT_FILES} @@ -318,7 +340,7 @@ remote_daemon.o: ${GENERIC_HFILES} remote.c cc -c ${CFLAGS} -DDAEMON remote.c -o remote_daemon.o ${WARNING_OPTIONS} ${WARNING_ERROR} -x86.o: ${GENERIC_HFILES} x86.c +x86.o: ${GENERIC_HFILES} ${REDHAT_HFILES} x86.c cc -c ${CFLAGS} -DMCLX x86.c ${WARNING_OPTIONS} ${WARNING_ERROR} alpha.o: ${GENERIC_HFILES} alpha.c @@ -353,6 +375,9 @@ diskdump.o: ${GENERIC_HFILES} ${REDHAT_HFILES} diskdump.c cc -c ${CFLAGS} diskdump.c ${WARNING_OPTIONS} ${WARNING_ERROR} +xendump.o: ${GENERIC_HFILES} ${REDHAT_HFILES} xendump.c + cc -c ${CFLAGS} xendump.c ${WARNING_OPTIONS} ${WARNING_ERROR} + extensions.o: ${GENERIC_HFILES} extensions.c cc -c ${CFLAGS} extensions.c ${WARNING_OPTIONS} ${WARNING_ERROR} @@ -393,13 +418,13 @@ gdb_files: make_configure @./configure -q -b - @echo ${GDB_FILES} + @echo ${GDB_FILES} ${GDB_PATCH_FILES} show_files: @if [ -f ${PROGRAM} ]; then \ ./${PROGRAM} --no_crashrc -h README > README; fi - @echo ${SOURCE_FILES} Makefile ${GDB_FILES} COPYING README \ - .rh_rpm_package crash.8 + @echo ${SOURCE_FILES} Makefile ${GDB_FILES} ${GDB_PATCH_FILES} COPYING README \ + .rh_rpm_package crash.8 ${EXTENSION_SOURCE_FILES} ctags: ctags ${SOURCE_FILES} @@ -411,7 +436,7 @@ do_tar: @if [ -f ${PROGRAM} ]; then \ ./${PROGRAM} --no_crashrc -h README > README; fi - tar cvzf ${PROGRAM}.tar.gz ${TAR_FILES} ${GDB_FILES} + tar cvzf ${PROGRAM}.tar.gz ${TAR_FILES} ${GDB_FILES} ${GDB_PATCH_FILES} @echo; ls -l ${PROGRAM}.tar.gz # To create a base tar file for Red Hat RPM packaging, pass the base RPM @@ -446,8 +471,8 @@ @rm -f ${PROGRAM}-${RELEASE}.tar.gz @rm -f ${PROGRAM}-${RELEASE}.src.rpm @chown root ./RELDIR/${PROGRAM}-${RELEASE} - @tar cf - ${SOURCE_FILES} Makefile ${GDB_FILES} COPYING \ - .rh_rpm_package crash.8 | (cd ./RELDIR/${PROGRAM}-${RELEASE}; tar xf -) + @tar cf - ${SOURCE_FILES} Makefile ${GDB_FILES} ${GDB_PATCH_FILES} COPYING \ + .rh_rpm_package crash.8 ${EXTENSION_SOURCE_FILES} | (cd ./RELDIR/${PROGRAM}-${RELEASE}; tar xf -) @cp ${GDB}.tar.gz ./RELDIR/${PROGRAM}-${RELEASE} @./${PROGRAM} --no_crashrc -h README > ./RELDIR/${PROGRAM}-${RELEASE}/README @(cd ./RELDIR; find . -exec chown root {} ";") @@ -488,3 +513,10 @@ dis: objdump --disassemble --line-numbers ${PROGRAM} > ${PROGRAM}.dis + +extensions: make_configure + @./configure -q -b + @make --no-print-directory do_extensions + +do_extensions: + @(cd extensions; make -i OBJECTS="$(EXTENSION_OBJECT_FILES)" TARGET=$(TARGET)) --- crash/gdb-6.1.patch.orig 2006-05-15 17:21:18.000000000 -0400 +++ crash/gdb-6.1.patch 2006-01-03 13:36:25.000000000 -0500 @@ -0,0 +1,11 @@ +--- gdb-6.1/bfd/coff-alpha.c.orig ++++ gdb-6.1/bfd/coff-alpha.c +@@ -1455,7 +1455,7 @@ alpha_relocate_section (output_bfd, info + amt = sizeof (struct ecoff_section_tdata); + lita_sec_data = ((struct ecoff_section_tdata *) + bfd_zalloc (input_bfd, amt)); +- ecoff_section_data (input_bfd, lita_sec) = lita_sec_data; ++ lita_sec->used_by_bfd = lita_sec_data; + } + + if (lita_sec_data->gp != 0) --- crash/README.orig 2006-05-15 17:21:18.000000000 -0400 +++ crash/README 2006-05-15 17:21:16.000000000 -0400 @@ -69,7 +69,7 @@ After the kernel is re-compiled, the uncompressed "vmlinux" kernel that is created in the top-level kernel build directory must be saved. - To build this utility, simply uncompress the tar file, enter the crash-4.0 + To build this utility, simply uncompress the tar file, enter the crash-4.0-2.26 subdirectory, and type "make". The initial build will take several minutes because the gdb module must be configured and and built. Alternatively, the crash source RPM file may be installed and built, and the resultant crash @@ -89,10 +89,12 @@ $ crash - crash 4.0 - Copyright (C) 2002, 2003, 2004, 2005 Red Hat, Inc. - Copyright (C) 2004, 2005 IBM Corporation - Copyright (C) 1999-2005 Hewlett-Packard Co + crash 4.0-2.26 + Copyright (C) 2002, 2003, 2004, 2005, 2006 Red Hat, Inc. + Copyright (C) 2004, 2005, 2006 IBM Corporation + Copyright (C) 1999-2006 Hewlett-Packard Co + Copyright (C) 2005 Fujitsu Limited + Copyright (C) 2005 NEC Corporation Copyright (C) 1999, 2002 Silicon Graphics, Inc. Copyright (C) 1999, 2000, 2001, 2002 Mission Critical Linux, Inc. This program is free software, covered by the GNU General Public License, @@ -111,7 +113,7 @@ KERNEL: /boot/vmlinux DUMPFILE: /dev/mem CPUS: 1 - DATE: Wed Jul 13 13:26:00 2005 + DATE: Mon May 15 17:21:15 2006 UPTIME: 10 days, 22:55:18 LOAD AVERAGE: 0.08, 0.03, 0.01 TASKS: 42 @@ -139,7 +141,7 @@ exit log rd task extend mach repeat timer - crash version: 4.0 gdb version: 6.1 + crash version: 4.0-2.26 gdb version: 6.1 For help on any command above, enter "help ". For help on input options, enter "help input". For help on output options, enter "help output". @@ -152,10 +154,12 @@ $ crash vmlinux vmcore - crash 4.0 - Copyright (C) 2002, 2003, 2004, 2005 Red Hat, Inc. - Copyright (C) 2004, 2005 IBM Corporation - Copyright (C) 1999-2005 Hewlett-Packard Co + crash 4.0-2.26 + Copyright (C) 2002, 2003, 2004, 2005, 2006 Red Hat, Inc. + Copyright (C) 2004, 2005, 2006 IBM Corporation + Copyright (C) 1999-2006 Hewlett-Packard Co + Copyright (C) 2005 Fujitsu Limited + Copyright (C) 2005 NEC Corporation Copyright (C) 1999, 2002 Silicon Graphics, Inc. Copyright (C) 1999, 2000, 2001, 2002 Mission Critical Linux, Inc. This program is free software, covered by the GNU General Public License, @@ -196,10 +200,12 @@ $ crash vmlinux.17 lcore.cr.17 - crash 4.0 - Copyright (C) 2002, 2003, 2004, 2005 Red Hat, Inc. - Copyright (C) 2004, 2005 IBM Corporation - Copyright (C) 1999-2005 Hewlett-Packard Co + crash 4.0-2.26 + Copyright (C) 2002, 2003, 2004, 2005, 2006 Red Hat, Inc. + Copyright (C) 2004, 2005, 2006 IBM Corporation + Copyright (C) 1999-2006 Hewlett-Packard Co + Copyright (C) 2005 Fujitsu Limited + Copyright (C) 2005 NEC Corporation Copyright (C) 1999, 2002 Silicon Graphics, Inc. Copyright (C) 1999, 2000, 2001, 2002 Mission Critical Linux, Inc. This program is free software, covered by the GNU General Public License,