s390utils/0012-s390-tools-1.8.1-ziomon-fixes.patch

198 lines
6.5 KiB
Diff
Raw Normal View History

From 1833f9dae371a48e3f52891262ad2d5fd75fc205 Mon Sep 17 00:00:00 2001
From: =?utf-8?q?Dan=20Hor=C3=A1k?= <dan@danny.cz>
Date: Fri, 5 Jun 2009 14:12:52 +0200
Subject: [PATCH] s390-tools-1.8.1-ziomon-fixes
---
ziomon/stats.h | 2 +-
ziomon/ziomon | 84 ++++++++++++++++++++++++++++++++++++++++++++-----
ziomon/ziomon_util.c | 2 +-
3 files changed, 77 insertions(+), 11 deletions(-)
diff --git a/ziomon/stats.h b/ziomon/stats.h
index a28d436..0920b27 100644
--- a/ziomon/stats.h
+++ b/ziomon/stats.h
@@ -142,7 +142,7 @@ static inline void histlog2_print(const char *s, const __u32 a[],
int i;
printf("%s:\n", s);
- for (i = 0; i < h->num; i++) {
+ for (i = 0; i < h->num - 1; i++) {
printf(" %10ld:%6d",
(unsigned long)(histlog2_upper_limit(i, h)), a[i]);
if (!((i + 1) % 4))
diff --git a/ziomon/ziomon b/ziomon/ziomon
index aa1cf78..fe4d8ec 100755
--- a/ziomon/ziomon
+++ b/ziomon/ziomon
@@ -32,7 +32,7 @@ WRP_DEVICES=();
WRP_LUNS=();
WRP_LOGFILE="";
# limit of actual data in percent that need space on disk
-WRP_SIZE_THRESHOLD="25";
+WRP_SIZE_THRESHOLD="10";
WRP_FORCE=0;
function debug() {
@@ -234,6 +234,7 @@ function start_trace() {
local hosts_param;
local luns_param;
local i;
+ local len;
if [ $WRP_DEBUG -ne 0 ]; then
verbose="-V";
@@ -276,7 +277,7 @@ function start_trace() {
blkiomon_command="blkiomon --interval=$WRP_INTERVAL -Q $WRP_MSG_Q_PATH -q $WRP_MSG_Q_ID -m $WRP_MSG_Q_BLKIOMON_ID $verbose_blk -d -";
zfcpdd_command="ziomon_zfcpdd -Q $WRP_MSG_Q_PATH -q $WRP_MSG_Q_ID -m $WRP_MSG_Q_ZIOMON_ZFCPDD_ID -i $WRP_INTERVAL";
debug "starting blktrace: $blktrace_command | $blkiomon_command | $zfcpdd_command";
- $blktrace_command | $blkiomon_command | $zfcpdd_command > $WRP_MSG_Q_PATH/blktrace.log &
+ $blktrace_command 2>$WRP_MSG_Q_PATH/blktrace.err | $blkiomon_command | $zfcpdd_command > $WRP_MSG_Q_PATH/blktrace.log &
i=0;
# might take a moment to start all processes in the pipe if system under load
while [ $i -lt 60 ]; do
@@ -303,7 +304,17 @@ function start_trace() {
echo "done";
echo -n "Collecting data...";
- sleep $WRP_DURATION;
+ # pay extra attention to blktrace
+ for (( i=0; i<$WRP_DURATION; ++i )); do
+ len=`cat $WRP_MSG_Q_PATH/blktrace.err | wc -l`;
+ if [ $len -ne 0 ]; then
+ cat $WRP_MSG_Q_PATH/blktrace.err;
+ echo "Error: blktrace has errors, aborting";
+ return;
+ fi
+ sleep 1;
+ done
+
echo "done";
}
@@ -358,6 +369,58 @@ function emergency_shutdown() {
}
+function check_cpuplugd {
+ # check if cpuplugd is running
+ # If so, the whole per-cpu mechanism of blktrace gets corrupted, which
+ # results in the infamous 'bad trace magic' message
+ if [ -e /var/run/cpuplugd.pid ]; then
+ echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!";
+ echo "ziomon: Warning: cpuplugd is running which can corrupt the traces.";
+ echo " It is recommended to stop cpuplugd for the duration of the";
+ echo " trace using 'service cpuplugd stop'.";
+ echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!";
+ fi
+}
+
+
+# we need 2MB per device and CPU
+function check_vmalloc_space() {
+ local total;
+ local used;
+ local free;
+ local num_cpus;
+ local required;
+ local result;
+
+ num_cpus=`cat /proc/cpuinfo | grep processors | awk '{print $4}'`;
+ total=`cat /proc/meminfo | grep VmallocTotal | awk '{print $2}'`;
+ used=`cat /proc/meminfo | grep VmallocUsed | awk '{print $2}'`;
+
+ (( free=$total-$used ));
+ (( required=$num_cpus*${#WRP_DEVICES[@]}*2048 ));
+ (( result=$free-$required ));
+ debug "Required Vmalloc space: $required KBytes";
+ if [ $result -lt 0 ]; then
+ echo "$WRP_TOOLNAME: Not enough free Vmalloc space:";
+ echo " Required: $required KBytes";
+ echo " Free: $free KBytes";
+ exit 1;
+ fi
+
+ return 0;
+}
+
+
+function check_blkiomon() {
+ # check blkiomon version
+ ver=`blkiomon -V | awk '{print $3}'`;
+ if [ "$ver" != "0.2" ]; then
+ echo "$WRP_TOOLNAME: Unsupported blkiomon version $ver detected, aborting";
+ exit 1;
+ fi
+}
+
+
function setup() {
while [ -e $WRP_MSG_Q_PATH ]; do
WRP_MSG_Q_PATH="$WRP_MSG_Q_PATH$RANDOM";
@@ -476,7 +539,7 @@ function determine_host_adapters() {
local num_s_devs;
local s_dev_ratio;
- echo -n "check devices...";
+ echo -n "Check devices...";
# Estimate fraction of /dev/s* devices - if >50%, start with check for regular devices
num_s_devs=`echo ${WRP_DEVICES[@]} | sed "s/ /\n/g" | grep /dev/s | wc -l`;
@@ -599,7 +662,6 @@ function check_size_requirements() {
local estimated_size;
local free_space;
local logpath=`dirname $WRP_LOGFILE`;
- local num_uniq_devs;
set `ziomon_mgr -e`;
util_base_sz=$1;
@@ -611,12 +673,10 @@ function check_size_requirements() {
# NOTE: Since blktrace and ziomon_zfcpdd write messages only when there is
# traffic, the estimate is an upper boundary only
- num_uniq_devs=`echo ${WRP_LUNS[@]} | sed 's/ /\n/g' | cut -d : -f 4 | sort | uniq | wc -l`;
- debug "number of unique devices: $num_uniq_devs";
debug "disk space requirements:";
(( size_per_record = $util_base_sz + ${#WRP_HOST_ADAPTERS[@]} * $util_variable_sz + $ioerr_base_sz
- + $num_uniq_devs * ( $ioerr_variable_sz + $blkiotrace_sz + $zfcpiotrace_sz )
- + ( 2 + $num_uniq_devs) * 8 ));
+ + ${#WRP_DEVICES[@]} * ( $ioerr_variable_sz + $blkiotrace_sz + $zfcpiotrace_sz )
+ + ( 2 + ${#WRP_DEVICES[@]}) * 8 ));
debug " size per interval: $size_per_record Bytes";
(( total_num_records = $WRP_DURATION / $WRP_INTERVAL ));
debug " total number of intervals: $total_num_records";
@@ -653,10 +713,16 @@ setup;
parse_params $@;
+check_cpuplugd;
+
+check_blkiomon;
+
check_for_existing_output;
determine_host_adapters;
+check_vmalloc_space;
+
check_size_requirements;
[ $? -eq 0 ] && start_trace;
diff --git a/ziomon/ziomon_util.c b/ziomon/ziomon_util.c
index e3e0762..043d3d1 100644
--- a/ziomon/ziomon_util.c
+++ b/ziomon/ziomon_util.c
@@ -597,7 +597,7 @@ static int poll_ioerr_cnt(int init, struct ioerr_data *data,
for (i=0; i<opts->num_luns; ++i) {
/* read ioerr_cnt attribute */
if (read_attribute(opts->luns[i], line, NULL)) {
- fprintf(stderr, "%s: Warning: Could read %s\n",
+ fprintf(stderr, "%s: Warning: Could not read %s\n",
toolname, opts->luns[i]);
grc++;
continue;
--
1.6.0.6