diff --git a/.cvsignore b/.cvsignore index 8492747..b752456 100644 --- a/.cvsignore +++ b/.cvsignore @@ -4,3 +4,4 @@ makedumpfile-1.1.1.tar.gz kexec-tools-po.tar.gz makedumpfile-1.1.5.tar.gz kexec-tools-testing-20070330.tar.bz2 +makedumpfile-1.2.6.tar.gz diff --git a/98-kexec.rules b/98-kexec.rules new file mode 100644 index 0000000..8b47671 --- /dev/null +++ b/98-kexec.rules @@ -0,0 +1,4 @@ +SUBSYSTEM=="cpu", ACTION=="online", PROGRAM="/etc/init.d/kdump restart" +SUBSYSTEM=="cpu", ACTION=="offline", PROGRAM="/etc/init.d/kdump restart" +SUBSYSTEM=="memory", ACTION=="add", PROGRAM="/etc/init.d/kdump restart" +SUBSYSTEM=="memory", ACTION=="remove", PROGRAM="/etc/init.d/kdump restart" diff --git a/kdump.conf b/kdump.conf index 92d462f..1e454f8 100644 --- a/kdump.conf +++ b/kdump.conf @@ -1,11 +1,14 @@ # Configures where to put the kdump /proc/vmcore files # # This file contains a series of commands to perform (in order) when a -# kernel crash has happened and the kdump kernel has been loaded +# kernel crash has happened and the kdump kernel has been loaded. Directives in +# this file are only applicable to the kdump initramfs, and have no effect if +# the root filesystem is mounted and the normal init scripts are processed # # Currently only one dump target and path may be configured at once # if the configured dump target fails, the default action will be preformed -# the default action may be configured with the default directive below +# the default action may be configured with the default directive below. If the +# configured dump target succedes # # Basics commands supported are: # raw - Will dd /proc/vmcore into . @@ -27,7 +30,7 @@ # If unset, will default to /var/crash. # # core_collector makedumpfile -# This directive allows you to use the dump filtering +# - This directive allows you to use the dump filtering # program makedumpfile to retrieve your core, which on # some arches can drastically reduce core file size. # See /sbin/makedumpfile --help for a list of options. @@ -36,19 +39,55 @@ # a config file appropriate for the running kernel. # # link_delay -# Some network cards take a long time to initialize, and +# - Some network cards take a long time to initialize, and # some spanning tree enabled networks do not transmit # user traffic for long periods after a link state # changes. This optional parameter defines a wait # period after a link is activated in which the # initramfs will wait before attempting to transmit # user data. -# default -# Action to preform instead of mounting root fs and +# +# kdump_post +# - This directive allows you to run a specified +# executable just after the memory dump process +# terminates. The exit status from the dump process +# is fed to the kdump_post executable, which can be +# used to trigger different actions for success or +# failure. +# +# kdump_pre +# - works just like the kdump_post directive, but instead +# of running after the dump process, runs immediately +# before. Exit status of this binary is interpreted +# as follows: +# 0 - continue with dump process as usual +# non 0 - reboot/halt the system +# +# extra_bins +# - This directive allows you to specify additional +# binaries or shell scripts you'd like to include in +# your kdump initrd. Generally only useful in +# conjunction with a kdump_post binary or script that +# relies on other binaries or scripts. +# +# extra_modules +# - This directive allows you to specify extra kernel +# modules that you want to be loaded in the kdump +# initrd, typically used to set up access to +# non-boot-path dump targets that might otherwise +# not be accessible in the kdump environment. Multiple +# modules can be listed, separated by a space, and any +# dependent modules will automatically be included. +# +# default +# - Action to preform instead of mounting root fs and # running init process # reboot: If the default action is reboot simply reboot # the system and loose the core that you are # trying to retrieve. +# halt: If the default action is halt, then simply +# halt the system after attempting to capture +# a vmcore, regardless of success or failure. # shell: If the default action is shell, then drop to # an msh session inside the initramfs from # where you can try to record the core manually. @@ -65,5 +104,8 @@ #path /var/crash #core_collector makedumpfile -c #link_delay 60 +#kdump_post /var/crash/scripts/kdump-post.sh +#extra_bins /usr/bin/lftp +#extra_modules gfs2 #default shell diff --git a/kdump.sysconfig b/kdump.sysconfig index 8c9f596..0acce5f 100644 --- a/kdump.sysconfig +++ b/kdump.sysconfig @@ -13,7 +13,7 @@ KDUMP_COMMANDLINE="" # This variable lets us append arguments to the current kdump commandline # As taken from either KDUMP_COMMANDLINE above, or from /proc/cmdline -KDUMP_COMMANDLINE_APPEND="irqpoll maxcpus=1" +KDUMP_COMMANDLINE_APPEND="irqpoll maxcpus=1 reset_devices" # Any additional kexec arguments required. In most situations, this should # be left empty diff --git a/kdump.sysconfig.i386 b/kdump.sysconfig.i386 index 34b5f16..2ca127a 100644 --- a/kdump.sysconfig.i386 +++ b/kdump.sysconfig.i386 @@ -13,7 +13,7 @@ KDUMP_COMMANDLINE="" # This variable lets us append arguments to the current kdump commandline # As taken from either KDUMP_COMMANDLINE above, or from /proc/cmdline -KDUMP_COMMANDLINE_APPEND="irqpoll maxcpus=1" +KDUMP_COMMANDLINE_APPEND="irqpoll maxcpus=1 reset_devices" # Any additional kexec arguments required. In most situations, this should # be left empty diff --git a/kdump.sysconfig.ia64 b/kdump.sysconfig.ia64 index c73cf6e..83e8ad0 100644 --- a/kdump.sysconfig.ia64 +++ b/kdump.sysconfig.ia64 @@ -13,7 +13,7 @@ KDUMP_COMMANDLINE="" # This variable lets us append arguments to the current kdump commandline # As taken from either KDUMP_COMMANDLINE above, or from /proc/cmdline -KDUMP_COMMANDLINE_APPEND="irqpoll maxcpus=1" +KDUMP_COMMANDLINE_APPEND="irqpoll maxcpus=1 reset_devices" # Any additional kexec arguments required. In most situations, this should # be left empty diff --git a/kdump.sysconfig.ppc64 b/kdump.sysconfig.ppc64 index 21db4a5..af9034a 100644 --- a/kdump.sysconfig.ppc64 +++ b/kdump.sysconfig.ppc64 @@ -13,7 +13,7 @@ KDUMP_COMMANDLINE="" # This variable lets us append arguments to the current kdump commandline # As taken from either KDUMP_COMMANDLINE above, or from /proc/cmdline -KDUMP_COMMANDLINE_APPEND="irqpoll maxcpus=1 noirqdistrib" +KDUMP_COMMANDLINE_APPEND="irqpoll maxcpus=1 noirqdistrib reset_devices" # Any additional kexec arguments required. In most situations, this should # be left empty diff --git a/kdump.sysconfig.x86_64 b/kdump.sysconfig.x86_64 index 34b5f16..2ca127a 100644 --- a/kdump.sysconfig.x86_64 +++ b/kdump.sysconfig.x86_64 @@ -13,7 +13,7 @@ KDUMP_COMMANDLINE="" # This variable lets us append arguments to the current kdump commandline # As taken from either KDUMP_COMMANDLINE above, or from /proc/cmdline -KDUMP_COMMANDLINE_APPEND="irqpoll maxcpus=1" +KDUMP_COMMANDLINE_APPEND="irqpoll maxcpus=1 reset_devices" # Any additional kexec arguments required. In most situations, this should # be left empty diff --git a/kexec-kdump-howto.txt b/kexec-kdump-howto.txt index 06e45d6..418d9d6 100644 --- a/kexec-kdump-howto.txt +++ b/kexec-kdump-howto.txt @@ -2,33 +2,51 @@ Kexec/Kdump HOWTO Introduction -Kexec and kdump are new features in the 2.6 mainstream kernel. These features are included in Red Hat Enterprise Linux 5. The purpose of these features is to ensure faster boot up and creation of reliable kernel vmcores for diagnostic purposes. +Kexec and kdump are new features in the 2.6 mainstream kernel. These features +are included in Red Hat Enterprise Linux 5. The purpose of these features +is to ensure faster boot up and creation of reliable kernel vmcores for +diagnostic purposes. Overview Kexec -Kexec is a fastboot mechanism which allows booting a Linux kernel from the context of already running kernel without going through BIOS. BIOS can be very time consuming especially on the big servers with lots of peripherals. This can save a lot of time for developers who end up booting a machine numerous times. +Kexec is a fastboot mechanism which allows booting a Linux kernel from the +context of already running kernel without going through BIOS. BIOS can be very +time consuming especially on the big servers with lots of peripherals. This can +save a lot of time for developers who end up booting a machine numerous times. Kdump -Kdump is a new kernel crash dumping mechanism and is very reliable because the crash dump is captured from the context of a freshly booted kernel and not from the context of the crashed kernel. Kdump uses kexec to boot into a second kernel whenever system crashes. This second kernel, often called a capture kernel, boots with very little memory and captures the dump image. +Kdump is a new kernel crash dumping mechanism and is very reliable because +the crash dump is captured from the context of a freshly booted kernel and +not from the context of the crashed kernel. Kdump uses kexec to boot into +a second kernel whenever system crashes. This second kernel, often called +a capture kernel, boots with very little memory and captures the dump image. -The first kernel reserves a section of memory that the second kernel uses to boot. Kexec enables booting the capture kernel without going through BIOS hence contents of first kernel's memory are preserved, which is essentially the kernel crash dump. +The first kernel reserves a section of memory that the second kernel uses +to boot. Kexec enables booting the capture kernel without going through BIOS +hence contents of first kernel's memory are preserved, which is essentially +the kernel crash dump. -Kdump is supported on the i686, x86_64, ia64 and ppc64 platforms. The standard kernel and capture kernel are one in the same on i686, x86_64 and ia64, while ppc64 requires a separate capture kernel (provided by the kernel-kdump package) at this time. +Kdump is supported on the i686, x86_64, ia64 and ppc64 platforms. The +standard kernel and capture kernel are one in the same on i686, x86_64 +and ia64, while ppc64 requires a separate capture kernel (provided by the +kernel-kdump package) at this time. -If you're reading this document, you should already have kexec-tools installed. If not, you install it via the following command: +If you're reading this document, you should already have kexec-tools +installed. If not, you install it via the following command: # yum install kexec-tools Now load a kernel with kexec: - # kver=`uname -r` - # kexec -l /boot/vmlinuz-$kver --initrd=/boot/initrd-$kver.img \ + # kver=`uname -r` # kexec -l /boot/vmlinuz-$kver + --initrd=/boot/initrd-$kver.img \ --command-line="`cat /proc/cmdline`" -NOTE: The above will boot you back into the kernel you're currently running, if you want to load a different kernel, substitute it in place of `uname -r`. +NOTE: The above will boot you back into the kernel you're currently running, +if you want to load a different kernel, substitute it in place of `uname -r`. Now reboot your system, taking note that it should bypass the BIOS: @@ -37,7 +55,8 @@ Now reboot your system, taking note that it should bypass the BIOS: How to configure kdump: -Again, we assume if you're reading this document, you should already have kexec-tools installed. If not, you install it via the following command: +Again, we assume if you're reading this document, you should already have +kexec-tools installed. If not, you install it via the following command: # yum install kexec-tools @@ -45,39 +64,46 @@ If you're on ppc64, you'll first need to install the kernel-kdump package: # yum install kernel-kdump -To be able to do much of anything interesting in the way of debug analysis, you'll also need to install the kernel-debuginfo package, of the same arch as your running kernel, and the crash utility: +To be able to do much of anything interesting in the way of debug analysis, +you'll also need to install the kernel-debuginfo package, of the same arch +as your running kernel, and the crash utility: # yum --enablerepo=\*debuginfo install kernel-debuginfo.$(uname -m) crash -Next up, we need to modify some boot parameters to reserve a chunk of memory for the capture kernel. For i686 and x86_64, edit /etc/grub.conf, and append "crashkernel=128M@16M" to the end of your kernel line. Similarly, append the same to the append line in /etc/yaboot.conf for ppc64, followed by a /sbin/ybin to load the new configuration (not needed for grub). On ia64, edit /etc/elilo.conf, adding "crashkernel=256M@256M" to the append line for your kernel. Note that the X@Y values are such that X = the amount of memory to reserve for the capture kernel and Y = the offset into memory at which that reservation should start. +Next up, we need to modify some boot parameters to reserve a chunk of memory +for the capture kernel. For i686 and x86_64, edit /etc/grub.conf, and append +"crashkernel=128M@16M" to the end of your kernel line. Similarly, append +the same to the append line in /etc/yaboot.conf for ppc64, followed by a +/sbin/ybin to load the new configuration (not needed for grub). On ia64, +edit /etc/elilo.conf, adding "crashkernel=256M@256M" to the append line for +your kernel. Note that the X@Y values are such that X = the amount of memory +to reserve for the capture kernel and Y = the offset into memory at which +that reservation should start. Examples: - # cat /etc/grub.conf # grub.conf generated by anaconda # # Note that you do not have to rerun grub after making changes to this file # NOTICE: You have a /boot partition. This means that # all kernel and initrd paths are relative to /boot/, eg. # root (hd0,0) -# kernel /vmlinuz-version ro root=/dev/VolGroup00/root +# kernel /vmlinuz-version ro root=/dev/VolGroup00/LogVol00 # initrd /initrd-version.img #boot=/dev/hda default=0 timeout=5 splashimage=(hd0,0)/grub/splash.xpm.gz hiddenmenu -title Red Hat Enterprise Linux (2.6.17-1.2621.el5) +title Red Hat Enterprise Linux (2.6.18-8.el5) root (hd0,0) - kernel /vmlinuz-2.6.17-1.2621.el5 ro root=/dev/VolGroup00/root crashkernel=128M@16M - initrd /initrd-2.6.17-1.2621.el5.img + kernel /vmlinuz-2.6.18-8.el5 ro root=/dev/VolGroup00/LogVol00 + initrd /initrd-2.6.18-8.el5.img - - # cat /etc/yaboot.conf +# cat /etc/yaboot.conf # yaboot.conf generated by anaconda -boot=/dev/sda1 +boot=/dev/sda1 init-message=Welcome to Red Hat Enterprise Linux!\nHit for boot options - partition=2 timeout=80 install=/usr/lib/yaboot/yaboot @@ -89,28 +115,32 @@ nonvram fstype=raw image=/vmlinuz-2.6.17-1.2621.el5 - label=linux - read-only - initrd=/initrd-2.6.17-1.2621.el5.img - append="root=LABEL=/ crashkernel=128M@16M" + label=linux read-only + initrd=/initrd-2.6.17-1.2621.el5.img + append="root=LABEL=/ crashkernel=128M@16M" - # cat /etc/elilo.conf +# cat /etc/elilo.conf prompt timeout=20 default=2.6.17-1.2621.el5 relocatable image=vmlinuz-2.6.17-1.2621.el5 - label=2.6.17-1.2621.el5 - initrd=initrd-2.6.17-1.2621.el5.img - read-only - append="-- root=LABEL=/ crashkernel=256M@256M" + label=2.6.17-1.2621.el5 + initrd=initrd-2.6.17-1.2621.el5.img read-only + append="-- root=LABEL=/ crashkernel=256M@256M" -After making said changes, reboot your system, so that the X MB of memory starting Y MB into your memory is left untouched by the normal system, reserved for the capture kernel. Take note that the output of 'free -m' will show X MB less memory than without this parameter, which is expected. You may be able to get by with less than 128M, but testing with only 64M has proven unreliable of late. On ia64, as much as 512M may be required. +After making said changes, reboot your system, so that the X MB of memory +starting Y MB into your memory is left untouched by the normal system, +reserved for the capture kernel. Take note that the output of 'free -m' will +show X MB less memory than without this parameter, which is expected. You +may be able to get by with less than 128M, but testing with only 64M has +proven unreliable of late. On ia64, as much as 512M may be required. -Now that you've got that reserved memory region set up, you want to turn on the kdump init script: +Now that you've got that reserved memory region set up, you want to turn on +the kdump init script: # chkconfig kdump on @@ -118,29 +148,331 @@ Then, start up kdump as well: # service kdump start -This should load your kernel-kdump image via kexec, leaving the system ready to capture a vmcore upon crashing. To test this out, you can force-crash your system by echo'ing a c into /proc/sysrq-trigger: +This should load your kernel-kdump image via kexec, leaving the system ready +to capture a vmcore upon crashing. To test this out, you can force-crash +your system by echo'ing a c into /proc/sysrq-trigger: # echo c > /proc/sysrq-trigger -You should see some panic output, followed by the system restarting into the kdump kernel. When the boot process gets to the point where it starts the kdump service, your vmcore should be copied out to disk (by default, in /var/crash//vmcore), then the system rebooted back into your normal kernel. +You should see some panic output, followed by the system restarting into +the kdump kernel. When the boot process gets to the point where it starts +the kdump service, your vmcore should be copied out to disk (by default, +in /var/crash//vmcore), then the system rebooted back into +your normal kernel. -Once back to your normal kernel, you can use the previously installed crash kernel in conjunction with the previously installed kernel-debuginfo to perform postmortem analysis: +Once back to your normal kernel, you can use the previously installed crash +kernel in conjunction with the previously installed kernel-debuginfo to +perform postmortem analysis: - # crash /usr/lib/debug/lib/modules/2.6.17-1.2621.el5/vmlinux /var/crash/2006-08-23-15:34/vmcore + # crash /usr/lib/debug/lib/modules/2.6.17-1.2621.el5/vmlinux + /var/crash/2006-08-23-15:34/vmcore crash> bt and so on... +Dump Triggering methods: + +This section talks about the various ways, other than a Kernel Panic, in which +Kdump can be triggered. The following methods assume that Kdump is configured +on your system, with the scripts enabled as described in the section above. + +1) AltSysRq C + +Kdump can be triggered with the combination of the 'Alt','SysRq' and 'C' +keyboard keys. Please refer to the following link for more details: + +http://kbase.redhat.com/faq/FAQ_43_5559.shtm + +In addition, on PowerPC boxes, Kdump can also be triggered via Hardware +Management Console(HMC) using 'Ctrl', 'O' and 'C' keyboard keys. + +2) NMI_WATCHDOG + +In case a machine has a hard hang, it is quite possible that it does not +respond to keyboard interrupts. As a result 'Alt-SysRq' keys will not help +trigger a dump. In such scenarios Nmi Watchdog feature can prove to be useful. +The following link has more details on configuring Nmi watchdog option. + +http://kbase.redhat.com/faq/FAQ_85_9129.shtm + +Once this feature has been enabled in the kernel, any lockups will result in an +OOPs message to be generated, followed by Kdump being triggered. + +Please refrain from simultaneously enabling 'nmi_watchdog' and setting +/proc/sys/kernel/unknown_nmi_panic, as this would result in a Kernel Panic +from legitimate NMIs generated by the nmi_watchdog. + +3) Kernel OOPs + +If we want to generate a dump everytime the Kernel OOPses, we can achieve this +by setting the 'Panic On OOPs' option as follows: + + # echo 1 > /proc/sys/kernel/panic_on_oops + +This is enabled by default on RHEL5. + +4) NMI(Non maskable interrupt) button + +In cases where the system is in a hung state, and is not accepting keyboard +interrupts, using NMI button for triggering Kdump can be very useful. NMI +button is present on most of the newer x86 and x86_64 machines. Please refer +to the User guides/manuals to locate the button, though in most occasions it +is not very well documented. In most cases it is hidden behind a small hole +on the front or back panel of the machine. You could use a toothpick or some +other non-conducting probe to press the button. + +For example, on the IBM X series 366 machine, the NMI button is located behind +a small hole on the bottom center of the rear panel. + +To enable this method of dump triggering using NMI button, you will need to set +the 'unknown_nmi_panic' option as follows: + + # echo 1 > /proc/sys/kernel/unknown_nmi_panic + +When enabling unknown_nmi_panic please be careful not to enable Nmi Watchdog +feature, else the system will panic. + +5) PowerPC specific methods: + +On IBM PowerPC machines, issuing a soft reset invokes the XMON debugger(if +XMON is configured). To configure XMON one needs to compile the kernel with +the CONFIG_XMON and CONFIG_XMON_DEFAULT options, or by compiling with +CONFIG_XMON and booting the kernel with xmon=on option. + +Following are the ways to remotely issue a soft reset on PowerPC boxes, which +would drop you to XMON. Pressing a 'X' (capital alphabet X) followed by an +'Enter' here will trigger the dump. + +5.1) HMC + +Hardware Management Console(HMC) available on Power4 and Power5 machines allow +partitions to be reset remotely. This is specially useful in hang situations +where the system is not accepting any keyboard inputs. + +Once you have HMC configured, the following steps will enable you to trigger +Kdump via a soft reset: + +On Power4 + Using GUI + + * In the right pane, right click on the partition you wish to dump. + * Select "Operating System->Reset". + * Select "Soft Reset". + * Select "Yes". + + Using HMC Commandline + + # reset_partition -m -p -t soft + +On Power5 + Using GUI + + * In the right pane, right click on the partition you wish to dump. + * Select "Restart Partition". + * Select "Dump". + * Select "OK". + + Using HMC Commandline + + # chsysstate -m -n -o dumprestart -r lpar + +5.2) Blade Management Console for Blade Center + +To initiate a dump operation, go to Power/Restart option under "Blade Tasks" in +the Blade Management Console. Select the corresponding blade for which you want +to initate the dump and then click "Restart blade with NMI". This issues a +system reset and invokes xmon debugger. + + +Advanced Setups: + +In addition to being able to capture a vmcore to your system's local file +system, kdump can be configured to capture a vmcore to a number of other +locations, including a raw disk partition, a dedicated file system, an NFS +mounted file system, or a remote system via ssh/scp. Additional options +exist for specifying the relative path under which the dump is captured, +what to do if the capture fails, and for compressing and filtering the dump +(so as to produce smaller, more manageable, vmcore files). + +In theory, dumping to a location other than the local file system should be +safer than kdump's default setup, as its possible the default setup will try +dumping to a file system that has become corrupted. The raw disk partition and +dedicated file system options allow you to still dump to the local system, +but without having to remount your possibly corrupted file system(s), +thereby decreasing the chance a vmcore won't be captured. Dumping to an +NFS server or remote system via ssh/scp also has this advantage, as well +as allowing for the centralization of vmcore files, should you have several +systems from which you'd like to obtain vmcore files. Of course, note that +these configurations could present problems if your network is unreliable. + +Advanced setups are configured via modifications to /etc/kdump.conf, +which out of the box, is fairly well documented itself. Any alterations to +/etc/kdump.conf should be followed by a restart of the kdump service, so +the changes can be incorporated in the kdump initrd. Restarting the kdump +service is as simple as '/sbin/service kdump restart'. + + +Note that kdump.conf is used as a configuration mechanism for capturing dump +files from the initramfs (in the interests of safety), the root file system is +mounted, and the init process is started, only as a last resort if the +initramfs fails to capture the vmcore. As such, configuration made in +/etc/kdump.conf is only applicable to capture recorded in the initramfs. If +for any reason the init process is started on the root file system, only a +simple copying of the vmcore from /proc/vmcore to /var/crash/$DATE/vmcore will +be preformed. + +Raw partition + +Raw partition dumping requires that a disk partition in the system, at least +as large as the amount of memory in the system, be left unformatted. Assuming +/dev/sda5 is left unformatted, kdump.conf can be configured with 'raw +/dev/sda5', and the vmcore file will be copied via dd directly onto partition +/dev/sda5. Restart the kdump service via '/sbin/service kdump restart' +to commit this change to your kdump initrd. + +Dedicated file system + +Similar to raw partition dumping, you can format a partition with the file +system of your choice, leaving it unmounted during normal operation. Again, +it should be at least as large as the amount of memory in the system. Assuming +/dev/sda3 has been formatted ext3, specify 'ext3 /dev/sda3' in kdump.conf, +and a vmcore file will be copied onto the file system after it has been +mounted. Dumping to a dedicated partition has the advantage that you can dump +multiple vmcores to the file system, space permitting, without overwriting +previous ones, as would be the case in a raw partition setup. Restart the +kdump service via '/sbin/service kdump restart' to commit this change to +your kdump initrd. Note that for local file systems ext3 and ext2 are +supported as dumpable targets. Kdump will not prevent you from specifying +other filesystems, and they will most likely work, but their operation +cannot be guaranteed. for instance specifying a vfat filesystem or msdos +filesystem will result in a successful load of the kdump service, but during +crash recovery, the dump will fail if the system has more than 2GB of memory +(since vfat and msdos filesystems do not support more than 2GB files). +Be careful of your filesystem selection when using this target. + +NFS mount + +Dumping over NFS requires an NFS server configured to export a file system +with full read/write access for the root user. All operations done within +the kdump initial ramdisk are done as root, and to write out a vmcore file, +we obviously must be able to write to the NFS mount. Configuring an NFS +server is outside the scope of this document, but either the no_root_squash +or anonuid options on the NFS server side are likely of interest to permit +the kdump initrd operations write to the NFS mount as root. + +Assuming your're exporting /dump on the machine nfs-server.example.com, +once the mount is properly configured, specify it in kdump.conf, via 'net +nfs-server.example.com:/dump'. The server portion can be specified either +by host name or IP address. Following a system crash, the kdump initrd will +mount the NFS mount and copy out the vmcore to your NFS server. Restart the +kdump service via '/sbin/service kdump restart' to commit this change to +your kdump initrd. + +Remote system via ssh/scp + +Dumping over ssh/scp requires setting up passwordless ssh keys for every +machine you wish to have dump via this method. First up, configure kdump.conf +for ssh/scp dumping, adding a config line of 'net user@server', where 'user' +can be any user on the target system you choose, and 'server' is the host +name or IP address of the target system. Using a dedicated, restricted user +account on the target system is recommended, as there will be keyless ssh +access to this account. + +Once kdump.conf is appropriately configured, issue the command '/sbin/service +kdump propagate' to automatically set up the ssh host keys and transmit +the necessary bits to the target server. You'll have to type in 'yes' +to accept the host key for your targer server if this is the first time +you've connected to it, and then input the target system user's password +to send over the necessary ssh key file. Restart the kdump service via +'/sbin/service kdump restart' to commit this change to your kdump initrd. + +Path + +By default, local file system vmcore files are written to /var/crash/%DATE +on the local system, ssh/scp dumps to /var/crash/%HOST-%DATE on the target +system, dedicated file system partition dumps to ./var/crash/%DATE, and +NFS dumps to ./var/crash/%HOST-%DATE, the latter two both relative to +their respective mount points within the kdump initrd (usually /mnt). The +'/var/crash' portion of the path can be overridden using kdump.conf's 'path' +variable, should you wish to write the vmcore out to a different location. For +example, 'path /data/coredumps' would lead to vmcore files being written to +/data/coredumps/%DATE if you were dumping to your local file system. Note +that the path option is ingnored if your kdump configuration results in the +core being saved from the initscripts in the root filesystem. + +Kdump Post-Capture Executable + +It is possible to specify a custom script or binary you wish to run following +an attempt to capture a vmcore. The executable is passed an exit code from +the capture process, which can be used to trigger different actions from +within your post-capture executable. + +Extra Binaries + +If you have specific binaries or scripts you want to have made available +within your kdump initrd, you can specify them by their full path, and they +will be included in your kdump initrd, along with all dependent libraries. +This may be particularly useful for those running post-capture scripts that +rely on other binaries. + +Extra Modules + +By default, only the bare minimum of kernel modules will be included in your +kdump initrd. Should you wish to capture your vmcore files to a non-boot-path +storage device, such as an iscsi target disk or clustered file system, you may +need to manually specify additional kernel modules to load into your kdump +initrd. + +Default action + +By default, if a configured dump method fails, the kdump initrd falls back +to trying to dump to the local file system (i.e., into the file system(s) +you would have mounted under normal system operation). The system always +reboots following an attempted dump to your local file system, regardless +of success or failure. + +However, for any of the advanced methods, if the dump fails, you can configure +the kdump initrd to skip trying to dump to the local file system, instead +immediately rebooting ('default reboot'), halting the system ('default halt') +or dropping you to a shell within the initrd ('default shell'), from which you +could try to capture the vmcore manually. Again, if the 'default' parameter is +unset, a local file system dump will be attempted, then the system will reboot. + +Compression and filtering + +The 'core_collector' parameter in kdump.conf allows you to specify a custom +dump capture method. The most common alternate method is makedumpfile, which +is a dump filtering and compression utility provided with kexec-tools. On +some architectures, it can drastically reduce the size of your vmcore files, +which becomes very useful on systems with large amounts of memory. + +A typical setup is 'core_collector makedumpfile -c', but check the output of +'/sbin/makedumpfile --help' for a list of all available options (-i and -g +don't need to be specified, they're automatically taken care of). Note that +use of makedumpfile requires that the kernel-debuginfo package corresponding +with your running kernel be installed. + +Also note that makedumpfile is only used from the initramfs. Saving a +core from the initscript in the root filesystem is considered a last ditch +effort, only used when the initramfs has failed to save the core properly. +As such only the cp utiltiy is used in the initscripts. The implication +here is that in order to use makedumpfile as your core collector, you must +specify a dump target in /etc/kdump.conf. + Caveats: -Console frame-buffers and X are not properly supported. If you typically run with something along the lines of "vga=791" in your kernel config line or have X running, console video will be garbled when a kernel is booted via kexec. Note that the kdump kernel should still be able to create a dump, and when the system reboots, video should be restored to normal. +Console frame-buffers and X are not properly supported. If you typically run +with something along the lines of "vga=791" in your kernel config line or +have X running, console video will be garbled when a kernel is booted via +kexec. Note that the kdump kernel should still be able to create a dump, +and when the system reboots, video should be restored to normal. -Notes on configuration: +Notes on RHEL5 configuration: -The kexec-utils package contains two extra configuration files: +The RHEL5 kexec-utils package contains two extra configuration files: /etc/sysconfig/kdump This file allows you to specify an alternate kernel to boot in the @@ -151,15 +483,15 @@ See documentation in the template kdump sysconfig file for exact usage /etc/kdump.conf This file allows you to configure how kdump will record your core -file. Unlike the stock version of kdump, this version of kdump attempts +file. Unlike the stock version of kdump, the RHEL5 version of kdump attempts to record your vmcore file from the initramfs, so as to still function properly in the event that your root file system is corrupted and unmountable. This file is interrogated on kdump service start and is used to populate the initramfs for the kdump kernel with the appropriate data and utilities to copy your core file to the desired location. See documentation in /etc/kdump.conf for available config directives and targets. Note especially the ifc option. -kdump will attempt to determine which network interface to use when dumping to a -remote server, but due to the possibility of interface renaming, or alternate +kdump will attempt to determine which network interface to use when dumping to +a remote server, but due to the possibility of interface renaming, or alternate module load strategies, the interface name may change in the kdump kernel. This option is used to override that guess, so that the appropriate interface will be activated in the kdump kernel. diff --git a/kexec-tools-1.102pre-ia64-altix_vmcore.patch b/kexec-tools-1.102pre-ia64-altix_vmcore.patch new file mode 100644 index 0000000..53979c2 --- /dev/null +++ b/kexec-tools-1.102pre-ia64-altix_vmcore.patch @@ -0,0 +1,27 @@ +diff -up kexec-tools-testing-20070330/kexec/arch/ia64/crashdump-ia64.c.orig kexec-tools-testing-20070330/kexec/arch/ia64/crashdump-ia64.c +--- kexec-tools-testing-20070330/kexec/arch/ia64/crashdump-ia64.c.orig 2008-04-08 14:36:59.000000000 -0400 ++++ kexec-tools-testing-20070330/kexec/arch/ia64/crashdump-ia64.c 2008-04-08 14:41:43.000000000 -0400 +@@ -224,9 +224,22 @@ int load_crashdump_segments(struct kexec + void *tmp; + if (info->kexec_flags & KEXEC_ON_CRASH ) { + if (get_crash_memory_ranges(&mem_range, &nr_ranges) == 0) { ++ int i; + + info->kern_paddr_start = kernel_code_start; +- info->kern_vaddr_start = LOAD_OFFSET; ++ for (i=0; i < nr_ranges; i++) { ++ unsigned long long mstart = crash_memory_range[i].start; ++ unsigned long long mend = crash_memory_range[i].end; ++ if (!mstart && !mend) ++ continue; ++ if (kernel_code_start >= mstart && ++ kernel_code_start < mend) { ++ info->kern_vaddr_start = mstart + ++ LOAD_OFFSET; ++ break; ++ } ++ } ++ + info->kern_size = kernel_code_end - kernel_code_start + 1; + if (crash_create_elf64_headers(info, &elf_info, + crash_memory_range, diff --git a/kexec-tools-1.102pre-ia64-efi_ususable_map.patch b/kexec-tools-1.102pre-ia64-efi_ususable_map.patch new file mode 100644 index 0000000..f343da7 --- /dev/null +++ b/kexec-tools-1.102pre-ia64-efi_ususable_map.patch @@ -0,0 +1,48 @@ +diff -up kexec-tools-testing-20070330/purgatory/arch/ia64/purgatory-ia64.c.orig kexec-tools-testing-20070330/purgatory/arch/ia64/purgatory-ia64.c +--- kexec-tools-testing-20070330/purgatory/arch/ia64/purgatory-ia64.c.orig 2007-03-30 00:34:36.000000000 -0400 ++++ kexec-tools-testing-20070330/purgatory/arch/ia64/purgatory-ia64.c 2008-03-26 08:05:11.000000000 -0400 +@@ -123,6 +123,7 @@ typedef struct { + struct loaded_segment { + unsigned long start; + unsigned long end; ++ unsigned long reserved; + }; + + struct kexec_boot_params { +@@ -201,7 +202,8 @@ patch_efi_memmap(struct kexec_boot_param + } + dst_md->phys_addr = seg->start; + dst_md->num_pages = mid_pages; +- dst_md->type = EFI_LOADER_DATA; ++ dst_md->type = seg->reserved ? ++ EFI_UNUSABLE_MEMORY:EFI_LOADER_DATA; + if (!end_pages) + break; + dest += boot_param->efi_memdesc_size; +diff -up kexec-tools-testing-20070330/kexec/arch/ia64/crashdump-ia64.c.orig kexec-tools-testing-20070330/kexec/arch/ia64/crashdump-ia64.c +--- kexec-tools-testing-20070330/kexec/arch/ia64/crashdump-ia64.c.orig 2007-03-30 00:34:36.000000000 -0400 ++++ kexec-tools-testing-20070330/kexec/arch/ia64/crashdump-ia64.c 2008-03-26 08:05:11.000000000 -0400 +@@ -53,6 +53,7 @@ static unsigned long kernel_code_end; + struct loaded_segment { + unsigned long start; + unsigned long end; ++ unsigned long reserved; + }; + + #define MAX_LOAD_SEGMENTS 128 +@@ -87,6 +88,7 @@ static void add_loaded_segments_info(str + start&~(ELF_PAGE_SIZE-1); + loaded_segments[loaded_segments_num].end = + (end + ELF_PAGE_SIZE - 1)&~(ELF_PAGE_SIZE - 1); ++ loaded_segments[loaded_segments_num].reserved = 0; + loaded_segments_num++; + } + } +@@ -239,6 +241,7 @@ int load_crashdump_segments(struct kexec + loaded_segments[loaded_segments_num].start = elfcorehdr; + loaded_segments[loaded_segments_num].end = elfcorehdr + + sz; ++ loaded_segments[loaded_segments_num].reserved = 1; + loaded_segments_num++; + cmdline_add_elfcorehdr(cmdline, elfcorehdr); + } diff --git a/kexec-tools-1.102pre-ppc64-rtas.patch b/kexec-tools-1.102pre-ppc64-rtas.patch new file mode 100644 index 0000000..048ac79 --- /dev/null +++ b/kexec-tools-1.102pre-ppc64-rtas.patch @@ -0,0 +1,29 @@ +diff -up kexec-tools-testing-20070330/kexec/arch/ppc64/crashdump-ppc64.c.orig kexec-tools-testing-20070330/kexec/arch/ppc64/crashdump-ppc64.c +--- kexec-tools-testing-20070330/kexec/arch/ppc64/crashdump-ppc64.c.orig 2008-05-19 15:25:25.000000000 -0400 ++++ kexec-tools-testing-20070330/kexec/arch/ppc64/crashdump-ppc64.c 2008-05-19 15:33:22.000000000 -0400 +@@ -107,6 +107,7 @@ static int get_crash_memory_ranges(struc + struct dirent *dentry, *mentry; + int i, n, crash_rng_len = 0; + unsigned long long start, end, cstart, cend; ++ int page_size = getpagesize(); + + crash_max_memory_ranges = max_memory_ranges + 6; + crash_rng_len = sizeof(struct memory_range) * crash_max_memory_ranges; +@@ -219,6 +220,17 @@ static int get_crash_memory_ranges(struc + if (cend > crash_base + crash_size) + cend = crash_base + crash_size; + crash_memory_range[memory_ranges].start = cstart; ++ /* ++ * The rtas section created here is formed by reading rtas-base ++ * and rtas-size from /proc/device-tree/rtas. Unfortunately ++ * rtas-size is not required to be a multiple of PAGE_SIZE ++ * The remainder of the page it ends on is just garbage, and is ++ * safe to read, its just not accounted in rtas-size. Since ++ * we're creating an elf section here though, lets round it up ++ * to the next page size boundary though, so makedumpfile can ++ * read it safely without going south on us. ++ */ ++ cend = (cend + page_size - 1) & (~(page_size - 1)); + crash_memory_range[memory_ranges++].end = cend; + } + /* diff --git a/kexec-tools-1.102pre-x86-phys_base.patch b/kexec-tools-1.102pre-x86-phys_base.patch new file mode 100644 index 0000000..dcb8bcd --- /dev/null +++ b/kexec-tools-1.102pre-x86-phys_base.patch @@ -0,0 +1,20 @@ +diff -up kexec-tools-testing-20070330/makedumpfile/x86_64.c.orig kexec-tools-testing-20070330/makedumpfile/x86_64.c +--- kexec-tools-testing-20070330/makedumpfile/x86_64.c.orig 2008-04-10 11:59:09.000000000 -0400 ++++ kexec-tools-testing-20070330/makedumpfile/x86_64.c 2008-04-10 11:59:52.000000000 -0400 +@@ -71,12 +71,11 @@ vaddr_to_offset_x86_64(unsigned long vad + struct pt_load_segment *pls; + + /* +- * Check the relocatable kernel. ++ * Note this computation of phys_base works for RHEL5 ++ * because we always keep phys_base at 0x200000. This ++ * doesn't work in general + */ +- if (SYMBOL(phys_base) != NOT_FOUND_SYMBOL) +- phys_base = info->phys_base; +- else +- phys_base = 0; ++ phys_base = info->phys_base; + + if (vaddr >= __START_KERNEL_map) + paddr = vaddr - __START_KERNEL_map + phys_base; diff --git a/kexec-tools.spec b/kexec-tools.spec index 98ff7ea..9622a03 100644 --- a/kexec-tools.spec +++ b/kexec-tools.spec @@ -1,6 +1,6 @@ Name: kexec-tools Version: 1.102pre -Release: 10%{?dist} +Release: 11%{?dist} License: GPL Group: Applications/System Summary: The kexec/kdump userspace component. @@ -13,11 +13,12 @@ Source5: kdump.sysconfig.ppc64 Source6: kdump.sysconfig.ia64 Source7: mkdumprd Source8: kdump.conf -Source9: makedumpfile-1.1.5.tar.gz +Source9: makedumpfile-1.2.6.tar.gz Source10: kexec-kdump-howto.txt Source11: firstboot_kdump.py Source12: mkdumprd.8 Source13: kexec-tools-po.tar.gz +Source14: 98-kexec.rules BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) Requires(pre): coreutils chkconfig sed zlib Requires: busybox >= 1.2.0 @@ -27,29 +28,32 @@ BuildRequires: pkgconfig intltool gettext Obsoletes: diskdumputils netdump %endif + +#START INSERT + # # Patches 0 through 100 are meant for x86 kexec-tools enablement # Patch1: kexec-tools-1.102pre-elf-core-type.patch Patch2: kexec-tools-1.102pre-bzimage-options.patch +Patch3: kexec-tools-1.102pre-cmdline-length.patch # # Patches 101 through 200 are meant for x86_64 kexec-tools enablement # -Patch101: kexec-tools-1.102pre-disable-kdump-x8664.patch -Patch102: kexec-tools-1.102pre-x86_64-exactmap.patch # # Patches 201 through 300 are meant for ia64 kexec-tools enablement # +Patch201: kexec-tools-1.102pre-ia64-efi_ususable_map.patch +Patch202: kexec-tools-1.102pre-ia64-altix_vmcore.patch # # Patches 301 through 400 are meant for ppc64 kexec-tools enablement # Patch301: kexec-tools-1.102pre-ppc64_rmo_top.patch -Patch302: kexec-tools-1.102pre-ppc64-buffer-overflow.patch -Patch303: kexec-tools-1.102pre-ppc-boots-ppc64.patch -Patch304: kexec-tools-1.102pre-ppc64-devtree.patch +Patch302: kexec-tools-1.102pre-ppc64-rtas.patch + # # Patches 401 through 500 are meant for s390 kexec-tools enablement # @@ -64,11 +68,9 @@ Patch501: kexec-tools-1.102pre-ppc-fixup.patch # Patch601: kexec-tools-1.102pre-elf-format.patch Patch602: kexec-tools-1.102pre-x86-add_buffer_retry.patch -Patch603: kexec-tools-1.102pre-makedumpfile-xen-syms.patch -Patch604: kexec-tools-1.102pre-disable-kexec-test.patch -Patch605: kexec-tools-1.102pre-vmcoreinfo.patch -Patch606: kexec-tools-1.102pre-makedumpfile-makefile.patch -Patch607: kexec-tools-1.102pre-cmdline-length.patch +Patch603: kexec-tools-1.102pre-disable-kexec-test.patch +Patch604: kexec-tools-1.102pre-vmcoreinfo.patch +Patch605: kexec-tools-1.102pre-x86-phys_base.patch %description kexec-tools provides /sbin/kexec binary that facilitates a new @@ -79,14 +81,16 @@ component of the kernel's kexec feature. %prep %setup -q -n %{name}-testing-20070330 -rm -f ../kexec-tools-1.101.spec + %patch1 -p1 %patch2 -p1 +%patch3 -p1 + +%patch201 -p1 +%patch202 -p1 %patch301 -p1 %patch302 -p1 -%patch303 -p1 -%patch304 -p1 %patch501 -p1 @@ -96,19 +100,25 @@ tar -z -x -v -f %{SOURCE9} %patch601 -p1 %patch602 -p1 -%patch603 -p1 +%patch603 -p1 %patch604 -p1 %patch605 -p1 -%patch606 -p1 -%patch607 -p1 - tar -z -x -v -f %{SOURCE13} %ifarch ppc -%define archdef ARCH=ppc64 +%define archdef ARCH=ppc %endif %build +%ifarch ia64 +# ia64 gcc seems to have a problem adding -fexception -fstack-protect and +# -param ssp-protect-size, like the %configure macro does +# while that shouldn't be a problem, and it still builds fine, it results in +# the kdump kernel hanging on kexec boot. I don't yet know why, but since those +# options aren't critical, I'm just overrideing them here for ia64 +export CFLAGS="-O2 -g -pipe -Wall -Wp,-D_FORTIFY_SOURCE=2" +%endif + %configure \ %ifarch ppc64 --host=powerpc64-redhat-linux-gnu \ @@ -117,21 +127,22 @@ tar -z -x -v -f %{SOURCE13} --sbindir=/sbin rm -f kexec-tools.spec.in cp %{SOURCE10} . -make %{?archdef} -%ifarch %{ix86} x86_64 ia64 ppc64 ppc +make +%ifarch %{ix86} x86_64 ia64 ppc64 make -C makedumpfile %endif make -C kexec-tools-po %install rm -rf $RPM_BUILD_ROOT -make install %{?archdef} DESTDIR=$RPM_BUILD_ROOT +make install DESTDIR=$RPM_BUILD_ROOT mkdir -p -m755 $RPM_BUILD_ROOT%{_sysconfdir}/rc.d/init.d mkdir -p -m755 $RPM_BUILD_ROOT%{_sysconfdir}/sysconfig mkdir -p -m755 $RPM_BUILD_ROOT%{_localstatedir}/crash mkdir -p -m755 $RPM_BUILD_ROOT%{_mandir}/man8/ mkdir -p -m755 $RPM_BUILD_ROOT%{_docdir} mkdir -p -m755 $RPM_BUILD_ROOT%{_datadir}/kdump +mkdir -p -m755 $RPM_BUILD_ROOT%{_sysconfdir}/udev/rules.d install -m 755 %{SOURCE1} $RPM_BUILD_ROOT%{_sysconfdir}/rc.d/init.d/kdump if [ -f $RPM_SOURCE_DIR/kdump.sysconfig.%{_target_cpu} ]; then install -m 644 $RPM_SOURCE_DIR/kdump.sysconfig.%{_target_cpu} $RPM_BUILD_ROOT%{_sysconfdir}/sysconfig/kdump @@ -143,9 +154,10 @@ install -m 644 %{SOURCE8} $RPM_BUILD_ROOT%{_sysconfdir}/kdump.conf install -m 644 kexec/kexec.8 $RPM_BUILD_ROOT%{_mandir}/man8/kexec.8 install -m 755 %{SOURCE11} $RPM_BUILD_ROOT%{_datadir}/kdump/firstboot_kdump.py install -m 644 %{SOURCE12} $RPM_BUILD_ROOT%{_mandir}/man8/mkdumprd.8 -%ifarch %{ix86} x86_64 ia64 ppc64 ppc +install -m 644 %{SOURCE14} $RPM_BUILD_ROOT%{_sysconfdir}/udev/rules.d/98-kexec.rules + +%ifarch %{ix86} x86_64 ia64 ppc64 install -m 755 makedumpfile/makedumpfile $RPM_BUILD_ROOT/sbin/makedumpfile -install -m 755 makedumpfile/makedumpfile-R.pl $RPM_BUILD_ROOT/sbin/makedumpfile-reasm %endif make -C kexec-tools-po install DESTDIR=$RPM_BUILD_ROOT %find_lang %{name} @@ -153,22 +165,29 @@ make -C kexec-tools-po install DESTDIR=$RPM_BUILD_ROOT %clean rm -rf $RPM_BUILD_ROOT - %post touch /etc/kdump.conf /sbin/chkconfig --add kdump -#This portion of the script is temporary. Its only here -#to fix up broken boxes that require special settings -#in /etc/sysconfig/kdump. It will be removed when -#These systems are fixed. +# This portion of the script is temporary. Its only here +# to fix up broken boxes that require special settings +# in /etc/sysconfig/kdump. It will be removed when +# These systems are fixed. -#This is for HP zx1 machines -#They require machvec=dig on the kernel command line if [ -d /proc/bus/mckinley ] then + # This is for HP zx1 machines + # They require machvec=dig on the kernel command line sed -e's/\(^KDUMP_COMMANDLINE_APPEND.*\)\("$\)/\1 machvec=dig"/' \ /etc/sysconfig/kdump > /etc/sysconfig/kdump.new mv /etc/sysconfig/kdump.new /etc/sysconfig/kdump +elif [ -d /proc/sgi_sn ] +then + # This is for SGI SN boxes + # They require the --noio option to kexec + # since they don't support legacy io + sed -e's/\(^KEXEC_ARGS.*\)\("$\)/\1 --noio"/' \ + /etc/sysconfig/kdump > /etc/sysconfig/kdump.new + mv /etc/sysconfig/kdump.new /etc/sysconfig/kdump fi @@ -186,26 +205,42 @@ fi exit 0 %triggerin -- firstboot +# we enable kdump everywhere except for paravirtualized xen domains; check here +if [ -f /proc/xen/capabilities ]; then + if [ -z `grep control_d /proc/xen/capabilities` ]; then + exit 0 + fi +fi if [ ! -e %{_datadir}/firstboot/modules/firstboot_kdump.py ] then ln -s %{_datadir}/kdump/firstboot_kdump.py %{_datadir}/firstboot/modules/firstboot_kdump.py fi +%triggerin -- kernel-kdump +touch %{_sysconfdir}/kdump.conf + %triggerun -- firstboot rm -f %{_datadir}/firstboot/modules/firstboot_kdump.py -%triggerpostun -- kernel +%triggerpostun -- kernel kernel-xen kernel-debug kernel-PAE kernel-kdump # List out the initrds here, strip out version nubmers # and search for corresponding kernel installs, if a kernel # is not found, remove the corresponding kdump initrd #start by getting a list of all the kdump initrds -for i in /boot/initrd*kdump.img +MY_ARCH=`uname -m` +if [ "$MY_ARCH" == "ia64" ] +then + IMGDIR=/boot/efi/efi/redhat +else + IMGDIR=/boot +fi + +for i in `ls $IMGDIR/initrd*kdump.img 2>/dev/null` do - [ -e "$i" ] || continue - KDVER="${i##*initrd-}" ; KDVER="${KDVER%%kdump*}" - if [ ! -e /boot/vmlinuz-$KDVER ] + KDVER=`echo $i | sed -e's/^.*initrd-//' -e's/kdump.*$//'` + if [ ! -e $IMGDIR/vmlinuz-$KDVER ] then # We have found an initrd with no corresponding kernel # so we should be able to remove it @@ -213,7 +248,6 @@ do fi done - %files -f %{name}.lang %defattr(-,root,root,-) /sbin/* @@ -221,6 +255,7 @@ done %config(noreplace,missingok) %{_sysconfdir}/sysconfig/kdump %config(noreplace,missingok) %{_sysconfdir}/kdump.conf %config %{_sysconfdir}/rc.d/init.d/kdump +%config %{_sysconfdir}/udev/rules.d/* %dir %{_localstatedir}/crash %{_mandir}/man8/* %doc News @@ -228,7 +263,12 @@ done %doc TODO %doc kexec-kdump-howto.txt + %changelog +* Thu Jun 05 2008 Neil Horman - 1.102pre-11 +- Update to latest makedumpfile from upstream +- Mass import of RHEL fixes missing in rawhide + * Thu Apr 24 2008 Neil Horman - 1.102pre-10 - Fix mkdumprd to properly pull in libs for lvm/mdadm (bz 443878) diff --git a/mkdumprd b/mkdumprd index 78e8cf7..215100f 100644 --- a/mkdumprd +++ b/mkdumprd @@ -73,6 +73,8 @@ bin="" KDUMP_POST="" extra_kdump_mods="" +TMPDISKLIST=`mktemp /tmp/disklist.XXXXXX` + vecho() { NONL="" @@ -101,7 +103,7 @@ usage () { fi $cmd "usage: $cmdname [--version] [--help] [-v] [-d] [-f] [--preload ]" - $cmd " [--image-version] [--with-module]" + $cmd " [--image-version]" $cmd " [--builtin=] [--omit-dmraid]" $cmd " [--fstab=] [--nocompress] " $cmd "" @@ -381,11 +383,13 @@ handlelvordev() { *) vg_list="$vg_list $vg" for device in `vgdisplay -v $vg 2>/dev/null | sed -n 's/PV Name//p'`; do + echo $device | sed -e's/\/dev\///' -e's/[0-9]\+//' >> $TMPDISKLIST findstoragedriver ${device##/dev/} done ;; esac else + echo $1 | sed -e's/\/dev\///' -e's/[0-9]\+//' >> $TMPDISKLIST findstoragedriver ${1##/dev/} fi } @@ -942,6 +946,7 @@ if [ -n "$KDUMP_CONFIG_FILE" ]; then echo "Cannot use the core_collector option on this arch" rm -rf $MNTIMAGE rm -rf $IMAGE + rm -f $TMPDISKLIST exit 1 fi ;; @@ -1019,6 +1024,7 @@ if [ -n "$CORE_COLLECTOR" -a ! -e /sys/kernel/vmcoreinfo ]; then echo "please install it and restart the kdump service" rm -rf $MNTIMAGE rm -rf $IMAGE + rm -f $TMPDISKLIST exit 1 fi XEN_OPTS="" @@ -1034,6 +1040,7 @@ if [ -n "$CORE_COLLECTOR" -a ! -e /sys/kernel/vmcoreinfo ]; then echo "please install it and restart the kdump service" rm -rf $MNTIMAGE rm -rf $IMAGE + rm -f $TMPDISKLIST exit 1 fi @@ -1046,6 +1053,7 @@ if [ -n "$CORE_COLLECTOR" -a ! -e /sys/kernel/vmcoreinfo ]; then echo "could not generate makedumpfile configuration. aborting" rm -rf $MNTIMAGE rm -rf $IMAGE + rm -f $TMPDISKLIST exit 1; fi fi @@ -1087,6 +1095,9 @@ then done fi +#this provides us with a list of disks that we need to make sure we have available before we capture our core +mv $TMPDISKLIST $MNTIMAGE/etc/critical_disks + #THIS IS WHERE WE GENERATE OUR ADDITINONAL UTILITIES #Busybox doesn't have a /bin/sh applet, #so we build a reasonable faximilie here @@ -1314,6 +1325,102 @@ do done EOF +cat >> $SCRIPTDIR/monitor_dd_progress << EOF +#!/bin/msh +SRC_FILE_SIZE=\`ls -l /proc/vmcore | awk '{print \$5}'\` +BLOCK_SIZE=\$1 +SRC_FILE_MB=\`dc \$SRC_FILE_SIZE 1048576 / p\` + +while true +do + DD_PID=\`ps | grep -v awk | awk '/dd/ {print \$1}'\` + if [ -n "\$DD_PID" ] + then + break + fi +done + +while true +do + sleep 5 + if [ ! -d /proc/\$CP_PID ] + then + break + fi + + kill -SIGUSR1 \$DD_PID + CURRENT_SIZE=\`tail -n 1 /tmp/dd_progress_file | sed "s/[^0-9].*//g"\` + CURRENT_MB=\`dc \$CURRENT_SIZE \$BLOCK_SIZE \* 1048576 / p\` + echo -n -e "Copied \$CURRENT_MB MB / \$SRC_FILE_MB MB\\\r" +done + +rm -f /tmp/dd_progres_file +EOF + +cat >> $SCRIPTDIR/monitor_scp_progress << EOF +#!/bin/msh +SRC_FILE_SIZE=\`ls -l /proc/vmcore | awk '{print \$5}'\` +LOCATION=\$1 +REMOTE_FILE=\$2 + +SRC_FILE_MB=\`dc \$SRC_FILE_SIZE 1048576 / p\` + +while true +do + SCP_PID=\`ps | grep -v awk | awk '/scp/ {print \$1}'\` + if [ -n "\$SCP_PID" ] + then + break + fi +done + +while true +do + sleep 5 + if [ ! -d /proc/\$SCP_PID ] + then + break + fi + + SSH_OUTPUT=\`ssh -q -o BatchMode=yes -o StrictHostKeyChecking=no \$LOCATION ls -l \$REMOTE_FILE\` + REMOTE_SIZE=\`echo \$SSH_OUTPUT | awk '{print \$5}'\` + REMOTE_SIZE_MB=\`dc \$REMOTE_SIZE 1048576 / p\` + echo -n -e "Copied \$REMOTE_SIZE_MB MB / \$SRC_FILE_MB MB\\\r" +done + +EOF + +cat >> $SCRIPTDIR/monitor_cp_progress <> $MNTIMAGE/init << EOF +echo "Waiting for required block device discovery" +for i in \`cat /etc/critical_disks\` +do + echo -n Waiting for \$i... + while [ ! -d /sys/block/\$i ] + do + sleep 1 + done + echo Found +done +EOF + # HACK: module loading + device creation isn't necessarily synchronous... # this will make sure that we have all of our devices before trying # things like RAID or LVM @@ -1527,18 +1649,18 @@ if [ -n "$vg_list" ]; then emit "echo Activating logical volumes" emit "lvm vgchange -a y --ignorelockingfailure" emit "DM_NUM=0" - emit "for i in \`lvm lvs | awk '{ if (NR > 1) print \$1}'\`" + emit "lvm lvs --noheadings -o lv_name,vg_name | while read i" emit "do" - emit " VGRP=\`lvm lvs | grep \$i | cut -d\" \" -f4\`" + emit " LV=\`echo \$i | awk '{ print \$1 }'\`" + emit " VGRP=\`echo \$i | awk '{ print \$2 }'\`" emit " mkdir -p /dev/\$VGRP" - emit " if [ ! -e /dev/\$VGRP/\$i ]" + emit " if [ ! -e /dev/\$VGRP/\$LV ]" emit " then" - emit " ln -s /dev/mapper/\$VGRP-\$i /dev/\$VGRP/\$i" - emit " ln -s /dev/mapper/\$VGRP-\$i /dev/dm-\$DM_NUM" + emit " ln -s /dev/mapper/\$VGRP-\$LV /dev/\$VGRP/\$LV" emit " DM_NUM=\`echo \$DM_NUM 1 + p | dc\`" emit " if [ -z \"\$noresume\" ]" emit " then" - emit " /sbin/dmsetup resume /dev/mapper/\$VGRP-\$i" + emit " /sbin/dmsetup resume /dev/mapper/\$VGRP-\$LV" emit " fi" emit " fi" emit "done" @@ -1599,27 +1721,28 @@ if [ -n "$KDUMP_CONFIG_FILE" ]; then fi #setup raw case - if [ -n "$KDUMP_PRE_INTERNAL" ] + if [ -n "$KDUMP_PRE" ] then - emit "$KDUMP_PRE_INTERNAL" - emit "if [ $? -ne 0 ]" + emit "$KDUMP_PRE" + emit "if [ \$? -ne 0 ]" emit "then" emit " echo kdump_pre script exited with non-zero status" emit " $FINAL_ACTION" emit "fi" fi emit "echo Saving to partition $config_val" + emit "monitor_dd_progress 512 &" if [ -z "$CORE_COLLECTOR" ] then - emit "dd if=/proc/vmcore of=$config_val" + emit "dd if=/proc/vmcore of=$config_val bs=512 >> /tmp/dd_progress_file 2>&1" else CORE_COLLECTOR=`echo $CORE_COLLECTOR | sed -e's/\(^makedumpfile\)\(.*$\)/\1 -F \2/'` - emit "$CORE_COLLECTOR /proc/vmcore | dd of=$config_val" + emit "$CORE_COLLECTOR /proc/vmcore | dd of=$config_val bs=512 >> /tmp/dd_progress_file 2>&1" fi emit "exitcode=\$?" emit "if [ \$exitcode == 0 ]" emit "then" - emit " echo \"Saving core complete\"" + emit " echo -e \"\\\033[0JSaving core complete\"" emit "fi" if [ -x "$KDUMP_POST" ]; then emit "$KDUMP_POST \$exitcode" @@ -1705,10 +1828,10 @@ if [ -n "$KDUMP_CONFIG_FILE" ]; then fi #setup nfs case - if [ -n "$KDUMP_PRE_INTERNAL" ] + if [ -n "$KDUMP_PRE" ] then - emit "$KDUMP_PRE_INTERNAL" - emit "if [ $? -ne 0 ]" + emit "$KDUMP_PRE" + emit "if [ \$? -ne 0 ]" emit "then" emit " echo kdump_pre script exited with non-zero status" emit " $FINAL_ACTION" @@ -1722,12 +1845,13 @@ if [ -n "$KDUMP_CONFIG_FILE" ]; then emit " mkdir -p /mnt/$SAVE_PATH/$lhost-\$DATE" emit " VMCORE=/mnt/$SAVE_PATH/$lhost-\$DATE/vmcore" emit " export VMCORE" + emit " monitor_cp_progress \$VMCORE-incomplete &" emit " $CORE_COLLECTOR /proc/vmcore \$VMCORE-incomplete >/dev/null" emit " exitcode=\$?" emit " if [ \$exitcode == 0 ]" emit " then" emit " mv \$VMCORE-incomplete \$VMCORE" - emit " echo \"Saving core complete\"" + emit " echo -e \"\\\033[0JSaving core complete\"" emit " fi" if [ -x "$KDUMP_POST" ]; then emit " $KDUMP_POST \$exitcode" @@ -1767,10 +1891,10 @@ if [ -n "$KDUMP_CONFIG_FILE" ]; then ssh -q $s_opts $rlocation rmdir $tdir #setup ssh case, quick check to see if setup already - if [ -n "$KDUMP_PRE_INTERNAL" ] + if [ -n "$KDUMP_PRE" ] then - emit "$KDUMP_PRE_INTERNAL" - emit "if [ $? -ne 0 ]" + emit "$KDUMP_PRE" + emit "if [ \$? -ne 0 ]" emit "then" emit " echo kdump_pre script exited with non-zero status" emit " $FINAL_ACTION" @@ -1788,6 +1912,7 @@ if [ -n "$KDUMP_CONFIG_FILE" ]; then emit "ssh -q -o BatchMode=yes -o StrictHostKeyChecking=no $rlocation mkdir $SAVE_PATH/$lhost-\$DATE" emit "VMCORE=$SAVE_PATH/$lhost-\$DATE/vmcore" emit "export VMCORE" + emit "monitor_scp_progress $rlocation $SAVE_PATH/$lhost-\$DATE/vmcore-incomplete &" if [ -z "$CORE_COLLECTOR" ] then emit "scp -q -o BatchMode=yes -o StrictHostKeyChecking=no /proc/vmcore $rlocation:\$VMCORE-incomplete" @@ -1803,7 +1928,7 @@ if [ -n "$KDUMP_CONFIG_FILE" ]; then else emit " ssh -q -o BatchMode=yes -o StrictHostKeyChecking=no $rlocation mv \$VMCORE-incomplete \$VMCORE.flat" fi - emit " echo \"Saving core complete\"" + emit " echo -e \"\\\033[0JSaving core complete\"" emit "fi" if [ -x "$KDUMP_POST" ]; then emit "$KDUMP_POST \$exitcode" @@ -1813,6 +1938,8 @@ if [ -n "$KDUMP_CONFIG_FILE" ]; then ;; core_collector) ;; + kdump_pre) + ;; kdump_post) ;; extra_bins) @@ -1849,10 +1976,10 @@ if [ -n "$KDUMP_CONFIG_FILE" ]; then #setup filesystem case - if [ -n "$KDUMP_PRE_INTERNAL" ] + if [ -n "$KDUMP_PRE" ] then - emit "$KDUMP_PRE_INTERNAL" - emit "if [ $? -ne 0 ]" + emit "$KDUMP_PRE" + emit "if [ \$? -ne 0 ]" emit "then" emit " echo kdump_pre script exited with non-zero status" emit " $FINAL_ACTION" @@ -1879,12 +2006,15 @@ if [ -n "$KDUMP_CONFIG_FILE" ]; then emit " mkdir -p /mnt/$SAVE_PATH/127.0.0.1-\$DATE" emit " VMCORE=/mnt/$SAVE_PATH/127.0.0.1-\$DATE/vmcore" emit " export VMCORE" + if [ $CORE_COLLECTOR == "cp" ]; then + emit " monitor_cp_progress \$VMCORE-incomplete &" + fi emit " $CORE_COLLECTOR /proc/vmcore \$VMCORE-incomplete >/dev/null" emit " exitcode=\$?" emit " if [ \$exitcode == 0 ]" emit " then" emit " mv \$VMCORE-incomplete \$VMCORE" - emit " echo \"Saving core complete\"" + emit " echo -e \"\\\033[0JSaving core complete\"" emit " fi" if [ -x "$KDUMP_POST" ]; then emit " $KDUMP_POST \$exitcode" @@ -1914,7 +2044,7 @@ if [ -n "$KDUMP_CONFIG_FILE" ]; then #find the shared libraries. this snippet taken from kboot TEMPLDCFG=`mktemp` - for lib in `ls /etc/ld.so.conf.d/* | grep -v kernelcap` + for lib in `ls /etc/ld.so.conf.d/* 2>/dev/null | grep -v kernelcap` do echo "include " $lib >> $TEMPLDCFG done @@ -1995,6 +2125,6 @@ if [ -n "$compress" ]; then else cp -a $IMAGE $target || rc=1 fi -rm -rf $MNTIMAGE $IMAGE +rm -rf $MNTIMAGE $IMAGE $TMPDISKLIST if [ -n "$MNTPOINT" ]; then rm -rf $MNTPOINT ; fi exit $rc diff --git a/mkdumprd.8 b/mkdumprd.8 index 28b55d2..fb8cb37 100644 --- a/mkdumprd.8 +++ b/mkdumprd.8 @@ -2,12 +2,12 @@ .SH NAME mkdumprd \- creates initial ramdisk images for kdump crash recovery .SH SYNOPSIS -\fBmkdumprd\fR [--version] [-v] [-f] +\fBmkdumprd\fR [--version] [-v] [-f] [-d] [--preload=\fImodule\fR] [--omit-scsi-modules] [--omit-raid-modules] [--omit-lvm-modules] - [--with=\fImodule\fR] [--image-version] + [--image-version] [--fstab=\fIfstab\fR] [--nocompress] - [--builtin=\fImodule\fR] [--nopivot] + [--builtin=\fImodule\fR] \fIimage\fR \fIkernel-version\fR .SH DESCRIPTION @@ -60,14 +60,6 @@ is created. Normally the created initrd image is compressed with \fBgzip\fR. If this option is specified, the compression is skipped. -.TP -\fB-\-nopivot -Do not use the \fBpivot_root\fR system call as part of the initrd. This -lets \fBmkdumprd\fR build proper images for Linux 2.2 kernels at the expense -of some features. In particular, some filesystems (such as ext3) will not -work properly and filesystem options will not be used to mount root. This -option is not recommended, and will be removed in future versions. - .TP \fB-\-omit-lvm-modules Do not load any lvm modules, even if /etc/fstab expects them. @@ -93,15 +85,12 @@ Prints out verbose information while creating the image (normally the \fBmkdumprd\fR runs silently). .TP -\fB-\-version\fR -Prints the version of \fBmkdumprd\fR that's being used and then exits. +\fB-d\fR +Query /etc/kdump for initrd construction configuration .TP -\fB-\-with=\fR\fImodule\fR -Load the modules \fImodule\fR in the initial ramdisk image. The module -gets loaded after any SCSI modules which are specified in -\fI/etc/modprobe.conf\fR. This option may be used as many times as -necessary. +\fB-\-version\fR +Prints the version of \fBmkdumprd\fR that's being used and then exits. .SH FILES .PD 0 diff --git a/sources b/sources index d2c5987..58dac0c 100644 --- a/sources +++ b/sources @@ -1,3 +1,3 @@ 68f68403304c5c89c7087b07563911f8 kexec-tools-po.tar.gz -dbf062772132ccb43d47920df89a3cf9 makedumpfile-1.1.5.tar.gz ec7cb260d6b708e14acbc74a45356d27 kexec-tools-testing-20070330.tar.bz2 +9b8434ea5fdd546f24fde82bd2ce59c3 makedumpfile-1.2.6.tar.gz