From 38b1c10fabe164d19cd78d4a79ef80a1a5752cc6 Mon Sep 17 00:00:00 2001 From: Crystal Wood Date: Wed, 14 Dec 2022 11:30:21 -0600 Subject: [PATCH] oslat: Add command line option for bucket width Resolves: rhbz#2041637 Signed-off-by: Crystal Wood --- ...command-line-option-for-bucket-width.patch | 300 ++++++++++++++++++ realtime-tests.spec | 8 +- 2 files changed, 307 insertions(+), 1 deletion(-) create mode 100644 oslat-Add-command-line-option-for-bucket-width.patch diff --git a/oslat-Add-command-line-option-for-bucket-width.patch b/oslat-Add-command-line-option-for-bucket-width.patch new file mode 100644 index 0000000..b861c86 --- /dev/null +++ b/oslat-Add-command-line-option-for-bucket-width.patch @@ -0,0 +1,300 @@ +From cbec2872f7533a87f0b63a30d70386d203ebd138 Mon Sep 17 00:00:00 2001 +From: Crystal Wood +Date: Tue, 13 Dec 2022 16:38:28 -0600 +Subject: [PATCH] oslat: Add command line option for bucket width + +New option -W/--bucket-width allows the user to specify how large of a +range of latencies is covered by a single bucket, including allowing the +creation of sub-microsecond buckets. + +When the flag is not used, output should be unchanged. However, if a +bucket width is specified that is not a multiple of one microsecond, +latencies will be output as fractional microseconds, at nanosecond +precision. This includes JSON output. + +When using this option, it is up to the user to determine what level +of precision is meaningful relative to measurement error, as is noted +in the documentation. + +Signed-off-by: Crystal Wood +Signed-off-by: John Kacur +--- + src/oslat/oslat.8 | 9 +++- + src/oslat/oslat.c | 110 ++++++++++++++++++++++++++++++++-------------- + 2 files changed, 85 insertions(+), 34 deletions(-) + +diff --git a/src/oslat/oslat.8 b/src/oslat/oslat.8 +index 39b36df0db3f..eb96448bfff1 100644 +--- a/src/oslat/oslat.8 ++++ b/src/oslat/oslat.8 +@@ -7,7 +7,7 @@ oslat \- OS Latency Detector + .RI "[ \-shvz ] [ \-b " bucket-size " ] [ \-B " bias " ] [ \-c " cpu-list " ] \ + [ \-C " cpu-main-thread " ] [ \-f " rt-prio " ] [ \-\-json " filename " ] \ + [ \-m " workload-mem " ] [\-t " runtime " ] [ \-T " trace-threshold " ] \ +-[ \-w " workload " ]" ++[ \-w " workload " ] [ \-W " bucket-width " ]" + .SH DESCRIPTION + .B oslat + is an open source userspace polling mode stress program to detect OS level +@@ -57,6 +57,13 @@ NOTE: please make sure the CPU frequency on all testing cores + are locked before using this parmater. If you don't know how + to lock the freq then please don't use this parameter. + .TP ++.B \-W, \-\-bucket-width ++Interval between buckets in nanoseconds ++ ++NOTE: Widths not a multiple of 1000 cause ns-precision output ++You are responsible for considering the impact of measurement ++overhead at the nanosecond scale. ++.TP + .B \-h, \-\-help + Show the help message. + .TP +diff --git a/src/oslat/oslat.c b/src/oslat/oslat.c +index 55302f11986b..b680f1ebce96 100644 +--- a/src/oslat/oslat.c ++++ b/src/oslat/oslat.c +@@ -192,6 +192,10 @@ struct global { + struct timeval tv_start; + int rtprio; + int bucket_size; ++ bool bucket_size_param; ++ int bucket_width; ++ int unit_per_us; ++ int precision; + int trace_threshold; + int runtime; + /* The core that we run the main thread. Default is cpu0 */ +@@ -325,45 +329,46 @@ static float cycles_to_sec(const struct thread *t, uint64_t cycles) + + static void insert_bucket(struct thread *t, stamp_t value) + { +- int index, us; ++ int index; ++ unsigned int lat; + uint64_t extra; ++ double us; + +- index = value / t->counter_mhz; +- assert(index >= 0); +- us = index + 1; +- assert(us > 0); +- ++ lat = (value * g.unit_per_us + t->counter_mhz - 1) / t->counter_mhz; ++ us = (double)lat / g.unit_per_us; + if (!g.preheat && g.trace_threshold && us >= g.trace_threshold) { +- char *line = "%s: Trace threshold (%d us) triggered on cpu %d with %u us!\n" ++ char *line = "%s: Trace threshold (%d us) triggered on cpu %d with %.*f us!\n" + "Stopping the test.\n"; +- tracemark(line, g.app_name, g.trace_threshold, t->core_i, us); +- err_quit(line, g.app_name, g.trace_threshold, t->core_i, us); ++ tracemark(line, g.app_name, g.trace_threshold, t->core_i, ++ g.precision, us); ++ err_quit(line, g.app_name, g.trace_threshold, t->core_i, ++ g.precision, us); + } + + /* Update max latency */ +- if (us > t->maxlat) +- t->maxlat = us; ++ if (lat > t->maxlat) ++ t->maxlat = lat; + +- if (us < t->minlat) +- t->minlat = us; ++ if (lat < t->minlat) ++ t->minlat = lat; + + if (g.bias) { + /* t->bias will be set after pre-heat if user enabled it */ +- us -= g.bias; ++ lat -= g.bias; + /* + * Negative should hardly happen, but if it happens, we assume we're in +- * the smallest bucket, which is 1us. Same to index. ++ * the smallest bucket. + */ +- if (us <= 0) +- us = 1; +- index -= g.bias; +- if (index < 0) +- index = 0; ++ if (lat <= 0) ++ lat = 1; + } + ++ index = lat / g.bucket_width; ++ assert(index >= 0); ++ + /* Too big the jitter; put into the last bucket */ + if (index >= g.bucket_size) { +- /* Keep the extra bit (in us) */ ++ /* Keep the extra bit (in bucket width multiples) */ + extra = index - g.bucket_size; + if (t->overflow_sum + extra < t->overflow_sum) { + /* The uint64_t even overflowed itself; bail out */ +@@ -455,6 +460,19 @@ static void *thread_main(void *arg) + printf("%s\n", end); \ + } while (0) + ++#define putfieldp(label, val, end) do { \ ++ printf("%12s:\t", label); \ ++ for (i = 0; i < g.n_threads; ++i) \ ++ printf(" %.*f", g.precision, \ ++ (double)(val) / g.unit_per_us); \ ++ printf("%s\n", end); \ ++ } while (0) ++ ++static double bucket_to_lat(int bucket) ++{ ++ return (g.bias + (bucket + 1) * (double)g.bucket_width) / g.unit_per_us; ++} ++ + void calculate(struct thread *t) + { + int i, j; +@@ -465,11 +483,11 @@ void calculate(struct thread *t) + /* Calculate average */ + sum = count = 0; + for (j = 0; j < g.bucket_size; j++) { +- sum += 1.0 * t[i].buckets[j] * (g.bias+j+1); ++ sum += t[i].buckets[j] * bucket_to_lat(j); + count += t[i].buckets[j]; + } + /* Add the extra amount of huge spikes in */ +- sum += t->overflow_sum; ++ sum += t->overflow_sum * g.bucket_width; + t[i].average = sum / count; + } + } +@@ -501,16 +519,16 @@ static void write_summary(struct thread *t) + print_dotdotdot = 0; + } + +- snprintf(bucket_name, sizeof(bucket_name), "%03"PRIu64 +- " (us)", g.bias+j+1); ++ snprintf(bucket_name, sizeof(bucket_name), "%03.*f (us)", ++ g.precision, bucket_to_lat(j)); + putfield(bucket_name, t[i].buckets[j], PRIu64, + (j == g.bucket_size - 1) ? " (including overflows)" : ""); + } + +- putfield("Minimum", t[i].minlat, PRIu64, " (us)"); ++ putfieldp("Minimum", t[i].minlat, " (us)"); + putfield("Average", t[i].average, ".3lf", " (us)"); +- putfield("Maximum", t[i].maxlat, PRIu64, " (us)"); +- putfield("Max-Min", t[i].maxlat - t[i].minlat, PRIu64, " (us)"); ++ putfieldp("Maximum", t[i].maxlat, " (us)"); ++ putfieldp("Max-Min", t[i].maxlat - t[i].minlat, " (us)"); + putfield("Duration", cycles_to_sec(&(t[i]), t[i].runtime), + ".3f", " (sec)"); + printf("\n"); +@@ -537,8 +555,8 @@ static void write_summary_json(FILE *f, void *data) + if (t[i].buckets[j] == 0) + continue; + fprintf(f, "%s", comma ? ",\n" : "\n"); +- fprintf(f, " \"%" PRIu64 "\": %" PRIu64, +- g.bias+j+1, t[i].buckets[j]); ++ fprintf(f, " \"%.*f\": %" PRIu64, ++ g.precision, bucket_to_lat(j), t[i].buckets[j]); + comma = 1; + } + if (comma) +@@ -610,6 +628,10 @@ static void usage(int error) + "-v, --version Display the version of the software.\n" + "-w, --workload Specify a kind of workload, default is no workload\n" + " (options: no, memmove)\n" ++ "-W, --bucket-width Interval between buckets in nanoseconds\n" ++ " NOTE: Widths not a multiple of 1000 cause ns-precision output\n" ++ " You are responsible for considering the impact of measurement\n" ++ " overhead at the nanosecond scale.\n" + "-z, --zero-omit Don't display buckets in the output histogram if all zeros.\n" + ); + exit(error); +@@ -630,7 +652,7 @@ static int workload_select(char *name) + } + + enum option_value { +- OPT_BUCKETSIZE=1, OPT_CPU_LIST, OPT_CPU_MAIN_THREAD, ++ OPT_BUCKETSIZE = 1, OPT_BUCKETWIDTH, OPT_CPU_LIST, OPT_CPU_MAIN_THREAD, + OPT_DURATION, OPT_JSON, OPT_RT_PRIO, OPT_HELP, OPT_TRACE_TH, + OPT_WORKLOAD, OPT_WORKLOAD_MEM, OPT_BIAS, + OPT_QUIET, OPT_SINGLE_PREHEAT, OPT_ZERO_OMIT, +@@ -644,6 +666,7 @@ static void parse_options(int argc, char *argv[]) + int option_index = 0; + static struct option options[] = { + { "bucket-size", required_argument, NULL, OPT_BUCKETSIZE }, ++ { "bucket-width", required_argument, NULL, OPT_BUCKETWIDTH }, + { "cpu-list", required_argument, NULL, OPT_CPU_LIST }, + { "cpu-main-thread", required_argument, NULL, OPT_CPU_MAIN_THREAD}, + { "duration", required_argument, NULL, OPT_DURATION }, +@@ -660,7 +683,7 @@ static void parse_options(int argc, char *argv[]) + { "version", no_argument, NULL, OPT_VERSION }, + { NULL, 0, NULL, 0 }, + }; +- int i, c = getopt_long(argc, argv, "b:Bc:C:D:f:hm:qsw:T:vz", ++ int i, c = getopt_long(argc, argv, "b:Bc:C:D:f:hm:qsw:W:T:vz", + options, &option_index); + long ncores; + +@@ -670,6 +693,7 @@ static void parse_options(int argc, char *argv[]) + switch (c) { + case OPT_BUCKETSIZE: + case 'b': ++ g.bucket_size_param = true; + g.bucket_size = strtol(optarg, NULL, 10); + if (g.bucket_size > 1024 || g.bucket_size <= 4) { + printf("Illegal bucket size: %s (should be: 4-1024)\n", +@@ -677,6 +701,20 @@ static void parse_options(int argc, char *argv[]) + exit(1); + } + break; ++ case OPT_BUCKETWIDTH: ++ case 'W': ++ g.bucket_width = strtol(optarg, NULL, 10); ++ if (g.bucket_width <= 0) { ++ printf("Illegal bucket width: %s\n", optarg); ++ exit(1); ++ } ++ if (g.bucket_width % 1000) { ++ g.unit_per_us = 1000; ++ g.precision = 3; ++ } else { ++ g.bucket_width /= 1000; ++ } ++ break; + case OPT_BIAS: + case 'B': + g.enable_bias = 1; +@@ -776,6 +814,9 @@ static void parse_options(int argc, char *argv[]) + break; + } + } ++ ++ if (!g.bucket_size_param && g.precision == 3 && g.bucket_width < 1000) ++ g.bucket_size = BUCKET_SIZE * 1000 / g.bucket_width; + } + + void dump_globals(void) +@@ -811,7 +852,8 @@ static void record_bias(struct thread *t) + bias = t[i].minlat; + } + g.bias = bias; +- printf("Global bias set to %" PRId64 " (us)\n", bias); ++ printf("Global bias set to %.*f (us)\n", g.precision, ++ (double)bias / g.unit_per_us); + } + + int main(int argc, char *argv[]) +@@ -835,6 +877,8 @@ int main(int argc, char *argv[]) + g.app_name = argv[0]; + g.rtprio = 0; + g.bucket_size = BUCKET_SIZE; ++ g.bucket_width = 1; ++ g.unit_per_us = 1; + g.runtime = 1; + g.workload = &workload_list[WORKLOAD_DEFAULT]; + g.workload_mem_size = WORKLOAD_MEM_SIZE; +-- +2.38.1 + diff --git a/realtime-tests.spec b/realtime-tests.spec index db2aec9..9e85bf7 100644 --- a/realtime-tests.spec +++ b/realtime-tests.spec @@ -6,7 +6,7 @@ Name: realtime-tests # Numa argument to make: NUMA=1 # Version: 2.4 -Release: 5%{?dist} +Release: 6%{?dist} License: GPLv2 URL: https://git.kernel.org/pub/scm/utils/rt-tests/rt-tests.git Source0: https://www.kernel.org/pub/linux/utils/rt-tests/rt-tests-%{version}.tar.xz @@ -27,6 +27,7 @@ Patch4: rt-tests-hackbench-Fix-compile-warning-about-fall-th.patch Patch5: rt-tests-hwlatdetect.py-Covert-to-f-strings.patch Patch6: rt-tests-hwlatdetect-Update-to-integer-division.patch Patch7: rt-tests-hwlatdetect-Fix-incorrect-comment-about-tes.patch +Patch8: oslat-Add-command-line-option-for-bucket-width.patch %description realtime-tests is a set of programs that test and measure various components of @@ -42,6 +43,7 @@ latency. It also tests the functioning of priority-inheritance mutexes. %patch5 -p1 %patch6 -p1 %patch7 -p1 +%patch8 -p1 %build %set_build_flags @@ -92,6 +94,10 @@ latency. It also tests the functioning of priority-inheritance mutexes. %{_mandir}/man8/determine_maximum_mpps.8.* %changelog +* Wed Dec 14 2022 Crystal Wood - 2.4-6 +- oslat: Add command line option for bucket width +Resolves: rhbz#2041637 + * Mon Nov 14 2022 Leah Leshchinsky - 2.4-5 - hwlatdetect: Convert to fstrings - hwlatdetect: Update to integer division