Add benchmark comparison

Create a new package glibc-benchtests with the benchmark binaries that one may download and run to benchmark glibc for their machine. More importantly, the glibc-bench-compare and bench.mk scripts can run benchmarks and compare performance of two arbitrary glibc versions as long as both versions have the glibc-benchtests package. Usage: Scenario 1: Compare two build numbers, e.g.: /usr/libexec/glibc-benchtests/glibc-bench-compare 2.20-1.fc21 2.21.90-11.fc22 If a second build is omitted, comparison is done with the currently installed glibc. Scenario 2: Compare two downloaded rpms - only glibc, glibc-benchtests and glibc-common are needed for both versions. e.g.: /usr/libexec/glibc-benchtests/glibc-bench-compare -p <dir1> <dir2>
2015-05-08 11:49:59 +05:30 · 2015-05-08 11:49:59 +05:30 · 67b30d7656
commit 67b30d7656
parent adbfe47d74
5 changed files with 749 additions and 1 deletions
--- a/bench.mk
+++ b/bench.mk
@ -0,0 +1,77 @@
 objpfx = $(prefix)/$(ver)/usr/libexec/glibc-benchtests/
 bench-math := acos acosh asin asinh atan atanh cos cosh exp exp2 ffs ffsll \
 	      log log2 modf pow rint sin sincos sinh sqrt tan tanh
 bench-pthread := pthread_once
 bench := $(bench-math) $(bench-pthread)
 run-bench := $(prefix)/$(ver)/lib64/ld-linux-x86-64.so.2 --library-path $(prefix)/$(ver)/lib64 $${run}
 # String function benchmarks.
 string-bench := bcopy bzero memccpy memchr memcmp memcpy memmem memmove \
 		mempcpy memset rawmemchr stpcpy stpncpy strcasecmp strcasestr \
 		strcat strchr strchrnul strcmp strcpy strcspn strlen \
 		strncasecmp strncat strncmp strncpy strnlen strpbrk strrchr \
 		strspn strstr strcpy_chk stpcpy_chk memrchr strsep strtok
 string-bench-all := $(string-bench)
 stdlib-bench := strtod
 benchset := $(string-bench-all) $(stdlib-bench)
 bench-malloc := malloc-thread
 binaries-bench := $(addprefix $(objpfx)bench-,$(bench))
 binaries-benchset := $(addprefix $(objpfx)bench-,$(benchset))
 binaries-bench-malloc := $(addprefix $(objpfx)bench-,$(bench-malloc))
 DETAILED_OPT :=
 ifdef DETAILED
 	DETAILED_OPT := -d
 endif
 bench: bench-set bench-func bench-malloc
 bench-set: $(binaries-benchset)
 	for run in $^; do \
 	  outfile=$(prefix)/$$(basename $${run}.$(ver).out); \
 	  echo "Running $${run}"; \
 	  $(run-bench) > $${outfile}.tmp; \
 	  mv $${outfile}{.tmp,}; \
 	done
 bench-malloc: $(binaries-bench-malloc)
 	run=$(objpfx)bench-malloc-thread; \
 	outfile=$(prefix)/$$(basename $${run}.$(ver).out); \
 	for thr in 1 8 16 32; do \
 	  echo "Running $${run} $${thr}"; \
 	  $(run-bench) $${thr} > $${outfile}.tmp; \
 	  mv $${outfile}{.tmp,}; \
 	done
 # Build and execute the benchmark functions.  This target generates JSON
 # formatted bench.out.  Each of the programs produce independent JSON output,
 # so one could even execute them individually and process it using any JSON
 # capable language or tool.
 bench-func: $(binaries-bench)
 	{ echo "{\"timing_type\": \"hp-timing\","; \
 	echo " \"functions\": {"; \
 	for run in $^; do \
 	  if ! [ "x$${run}" = "x$<" ]; then \
 	    echo ","; \
 	  fi; \
 	  echo "Running $${run}" >&2; \
 	  $(run-bench) $(DETAILED_OPT); \
 	done; \
 	echo; \
 	echo " }"; \
 	echo "}"; } > $(prefix)/bench.$(ver).out-tmp; \
 	if [ -f $(prefix)/bench.$(ver).out ]; then \
 	  mv -f $(prefix)/bench.$(ver).out{,.old}; \
 	fi; \
 	mv -f $(prefix)/bench.$(ver).out{-tmp,}
 #	scripts/validate_benchout.py bench.out \
 #		scripts/benchout.schema.json
--- a/glibc-bench-build.patch
+++ b/glibc-bench-build.patch
@ -0,0 +1,41 @@
 diff --git a/Makefile.in b/Makefile.in
 index 710ce7e..3fe9e73 100644
 --- a/Makefile.in
 +++ b/Makefile.in
@@ -12,7 +12,7 @@ install:
 	LC_ALL=C; export LC_ALL; \
 	$(MAKE) -r PARALLELMFLAGS="$(PARALLELMFLAGS)" -C $(srcdir) objdir=`pwd` $@
 -bench bench-clean:
 +bench bench-clean bench-build:
 	$(MAKE) -C $(srcdir)/benchtests $(PARALLELMFLAGS) objdir=`pwd` $@
 # Convenience target to rebuild ULPs for all math tests.
 diff --git a/Rules b/Rules
 index 4f9cdf3..42d0368 100644
 --- a/Rules
 +++ b/Rules
@@ -83,7 +83,7 @@ common-generated += dummy.o dummy.c
 # This makes all the auxiliary and test programs.
 -.PHONY: others tests bench
 +.PHONY: others tests bench bench-build
 ifeq ($(build-programs),yes)
 others: $(addprefix $(objpfx),$(others) $(sysdep-others) $(extra-objs))
 diff --git a/benchtests/Makefile b/benchtests/Makefile
 index fd3036d..7cbceaa 100644
 --- a/benchtests/Makefile
 +++ b/benchtests/Makefile
@@ -103,6 +103,10 @@ bench-clean:
 bench: $(timing-type) bench-set bench-func bench-malloc
 +bench-build: bench-set-build bench-func-build
 +bench-set-build: $(binaries-benchset)
 +bench-func-build: $(binaries-bench) $(binaries-bench-malloc)
 +
 bench-set: $(binaries-benchset)
 	for run in $^; do \
 	  echo "Running $${run}"; \
--- a/153
+++ b/153
@ -0,0 +1,153 @@
 #!/usr/bin/bash
 # This script can be invoked as follows:
 #
 # glibc-bench-compare [options] <BUILD> [BUILD]
 #
 # Options may be one of the following:
 #
 # -t		The BUILD arguments are task ids and not a version-release string
 # -a ARCH	Do comparison for ARCH architecture
 #
 # If any of the above options are given, both BUILD arguments must be given.
 # Otherwise, if only one BUILD is specified, then it is compared against the
 # installed glibc.
 # Silence the pushd/popd messages
 pushd() {
 	command pushd "$@" > /dev/null 2>&1
 }
 popd() {
 	command popd "$@" > /dev/null 2>&1
 }
 # Clean up any downloaded files before we exit
 trap "rm -rf /tmp/glibc-bench-compare.$BASHPID.*" EXIT
 task=0
 arch=$(uname -i)
 options=0
 path=0
 installed=
 # Look for any commandline options
 while getopts ":tpa:" opt; do
 	case $opt in
 		p)
 		path=1
 		;;
 		t)
 		task=1
 		options=1
 		echo "Not implemented."
 		exit 1
 		;;
 		a)
 		arch=$OPTARG
 		options=1
 		;;
 		*)
 		;;
 	esac
 done
 # Done, now shift all option arguments out.
 shift $((OPTIND-1))
 if [ $# -gt 2 ] || [ $# -eq 0 ] || [ $# -lt 2 -a $options -eq 1 ]; then
 	echo "Usage: $0 [OPTIONS] <old> [new]"
 	echo
 	echo "OPTIONS:"
 	echo -e "\t-t\tCompare two brew tasks"
 	echo -e "\t-a ARCH\tGet rpms for the ARCH architecture"
 	echo -e "\t-p\tCompare built rpms in two paths."
 	echo -e "\t\tThis minimally needs glibc, glibc-common and glibc-benchtests"
 	exit 1
 fi
 if [ -z $2 ]; then
 	new="$1"
 	old=$(rpm --queryformat "%{VERSION}-%{RELEASE}\n" -q glibc | head -1)
 	installed=$old
 else
 	new="$2"
 	old="$1"
 fi
 decompress_rpms() {
 	# We were given a path to the rpms.  Figure out the version-release and
 	# decompress the rpms.
 	if [ -n $1 ]; then
 		vr=$(rpm --queryformat="%{VERSION}-%{RELEASE}" -qp $1/glibc-2*.rpm | head -1)
 		mkdir $vr && pushd $vr
 	fi
 	for r in $1*.rpm; do
 		( rpm2cpio $r | cpio -di ) > /dev/null
 	done
 	if [ -n $1 ]; then
 		popd
 		echo $vr
 	fi
 }
 # Get rpms for a build and decompress them
 get_build() {
 	echo "Processing build $1"
 	mkdir $1 && pushd $1
 	brew buildinfo "glibc-$1" |
 	sed -n -e "s|/mnt/koji\(.\+$arch.\+\)|http://kojipkgs.fedoraproject.org\1|p" |
 	while read url; do
 		echo "Downloading $url"
 		wget -q $url
 	done
 	decompress_rpms
 	echo "Removing rpms"
 	rm -f $1/*.rpm
 	popd
 }
 # Run benchmarks for a build
 run_bench() {
 	if [ -z $1 ]; then
 		make DETAILED=1 ver=$installed prefix= -f /usr/libexec/glibc-benchtests/bench.mk bench
 	else
 		make DETAILED=1 ver=$1 prefix=$PWD -f $1/usr/libexec/glibc-benchtests/bench.mk bench
 	fi
 }
 # Get absolute paths if needed, since we will change into the working directory
 # next.
 if [ $path -eq 1 ]; then
 	old_path=$(realpath $old)/
 	new_path=$(realpath $new)/
 fi
 tmpdir=$(mktemp -p /tmp -d glibc-bench-compare.$$.XXXX)
 pushd $tmpdir
 # Get both builds.
 if [ $path -eq 0 ]; then
 	if [ -z $installed ]; then
 		get_build $old
 	fi
 	get_build $new
 else
 	old=$(decompress_rpms $old_path)
 	new=$(decompress_rpms $new_path)
 fi
 # make bench for each of those.
 if [ -z $installed ]; then
 	run_bench $old
 else
 	run_bench
 fi
 run_bench $new
 # Now run the comparison script.
 $old/usr/libexec/glibc-benchtests/compare_bench.py $old/usr/libexec/glibc-benchtests/benchout.schema.json \
 	bench.$old.out bench.$new.out
--- a/glibc-bench-compare.patch
+++ b/glibc-bench-compare.patch
@ -0,0 +1,412 @@
 diff -pruN a/benchtests/scripts/compare_bench.py b/benchtests/scripts/compare_bench.py
 --- a/benchtests/scripts/compare_bench.py	1970-01-01 05:30:00.000000000 +0530
 +++ b/benchtests/scripts/compare_bench.py	2015-05-07 15:32:41.843584024 +0530
@@ -0,0 +1,184 @@
 +#!/usr/bin/python
 +# Copyright (C) 2015 Free Software Foundation, Inc.
 +# This file is part of the GNU C Library.
 +#
 +# The GNU C Library is free software; you can redistribute it and/or
 +# modify it under the terms of the GNU Lesser General Public
 +# License as published by the Free Software Foundation; either
 +# version 2.1 of the License, or (at your option) any later version.
 +#
 +# The GNU C Library is distributed in the hope that it will be useful,
 +# but WITHOUT ANY WARRANTY; without even the implied warranty of
 +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 +# Lesser General Public License for more details.
 +#
 +# You should have received a copy of the GNU Lesser General Public
 +# License along with the GNU C Library; if not, see
 +# <http://www.gnu.org/licenses/>.
 +"""Compare two benchmark results
 +
 +Given two benchmark result files and a threshold, this script compares the
 +benchmark results and flags differences in performance beyond a given
 +threshold.
 +"""
 +import sys
 +import os
 +import pylab
 +import import_bench as bench
 +
 +def do_compare(func, var, tl1, tl2, par, threshold):
 +    """Compare one of the aggregate measurements
 +
 +    Helper function to compare one of the aggregate measurements of a function
 +    variant.
 +
 +    Args:
 +        func: Function name
 +        var: Function variant name
 +        tl1: The first timings list
 +        tl2: The second timings list
 +        par: The aggregate to measure
 +        threshold: The threshold for differences, beyond which the script should
 +        print a warning.
 +    """
 +    d = abs(tl2[par] - tl1[par]) * 100 / tl1[str(par)]
 +    if d > threshold:
 +        if tl1[par] > tl2[par]:
 +            ind = '+++'
 +        else:
 +            ind = '---'
 +        print('%s %s(%s)[%s]: (%.2lf%%) from %g to %g' %
 +                (ind, func, var, par, d, tl1[par], tl2[par]))
 +
 +
 +def compare_runs(pts1, pts2, threshold):
 +    """Compare two benchmark runs
 +
 +    Args:
 +        pts1: Timing data from first machine
 +        pts2: Timing data from second machine
 +    """
 +
 +    # XXX We assume that the two benchmarks have identical functions and
 +    # variants.  We cannot compare two benchmarks that may have different
 +    # functions or variants.  Maybe that is something for the future.
 +    for func in pts1['functions'].keys():
 +        for var in pts1['functions'][func].keys():
 +            tl1 = pts1['functions'][func][var]
 +            tl2 = pts2['functions'][func][var]
 +
 +            # Compare the consolidated numbers
 +            # do_compare(func, var, tl1, tl2, 'max', threshold)
 +            do_compare(func, var, tl1, tl2, 'min', threshold)
 +            do_compare(func, var, tl1, tl2, 'mean', threshold)
 +
 +            # Skip over to the next variant or function if there is no detailed
 +            # timing info for the function variant.
 +            if 'timings' not in pts1['functions'][func][var].keys() or \
 +                'timings' not in pts2['functions'][func][var].keys():
 +                    continue
 +
 +            # If two lists do not have the same length then it is likely that
 +            # the performance characteristics of the function have changed.
 +            # XXX: It is also likely that there was some measurement that
 +            # strayed outside the usual range.  Such ouiers should not
 +            # happen on an idle machine with identical hardware and
 +            # configuration, but ideal environments are hard to come by.
 +            if len(tl1['timings']) != len(tl2['timings']):
 +                print('* %s(%s): Timing characteristics changed' %
 +                        (func, var))
 +                print('\tBefore: [%s]' %
 +                        ', '.join([str(x) for x in tl1['timings']]))
 +                print('\tAfter: [%s]' %
 +                        ', '.join([str(x) for x in tl2['timings']]))
 +                continue
 +
 +            # Collect numbers whose differences cross the threshold we have
 +            # set.
 +            issues = [(x, y) for x, y in zip(tl1['timings'], tl2['timings']) \
 +                        if abs(y - x) * 100 / x > threshold]
 +
 +            # Now print them.
 +            for t1, t2 in issues:
 +                d = abs(t2 - t1) * 100 / t1
 +                if t2 > t1:
 +                    ind = '-'
 +                else:
 +                    ind = '+'
 +
 +                print("%s %s(%s): (%.2lf%%) from %g to %g" %
 +                        (ind, func, var, d, t1, t2))
 +
 +
 +def plot_graphs(bench1, bench2):
 +    """Plot graphs for functions
 +
 +    Make scatter plots for the functions and their variants.
 +
 +    Args:
 +        bench1: Set of points from the first machine
 +        bench2: Set of points from the second machine.
 +    """
 +    for func in bench1['functions'].keys():
 +        for var in bench1['functions'][func].keys():
 +            # No point trying to print a graph if there are no detailed
 +            # timings.
 +            if u'timings' not in bench1['functions'][func][var].keys():
 +                print('Skipping graph for %s(%s)' % (func, var))
 +                continue
 +
 +            pylab.clf()
 +            pylab.ylabel('Time (cycles)')
 +
 +            # First set of points
 +            length = len(bench1['functions'][func][var]['timings'])
 +            X = [float(x) for x in range(length)]
 +            lines = pylab.scatter(X, bench1['functions'][func][var]['timings'],
 +                    1.5 + 100 / length)
 +            pylab.setp(lines, 'color', 'r')
 +
 +            # Second set of points
 +            length = len(bench2['functions'][func][var]['timings'])
 +            X = [float(x) for x in range(length)]
 +            lines = pylab.scatter(X, bench2['functions'][func][var]['timings'],
 +                    1.5 + 100 / length)
 +            pylab.setp(lines, 'color', 'g')
 +
 +            if var:
 +                filename = "%s-%s.png" % (func, var)
 +            else:
 +                filename = "%s.png" % func
 +            print('Writing out %s' % filename)
 +            pylab.savefig(filename)
 +
 +
 +def main(args):
 +    """Program Entry Point
 +
 +    Take two benchmark output files and compare their timings.
 +    """
 +    if len(args) > 4 or len(args) < 3:
 +        print('Usage: %s <schema> <file1> <file2> [threshold in %%]' % sys.argv[0])
 +        sys.exit(os.EX_USAGE)
 +
 +    bench1 = bench.parse_bench(args[1], args[0])
 +    bench2 = bench.parse_bench(args[2], args[0])
 +    if len(args) == 4:
 +        threshold = float(args[3])
 +    else:
 +        threshold = 10.0
 +
 +    if (bench1['timing_type'] != bench2['timing_type']):
 +        print('Cannot compare benchmark outputs: timing types are different')
 +        return
 +
 +    plot_graphs(bench1, bench2)
 +
 +    bench.compress_timings(bench1)
 +    bench.compress_timings(bench2)
 +
 +    compare_runs(bench1, bench2, threshold)
 +
 +
 +if __name__ == '__main__':
 +    main(sys.argv[1:])
 diff -pruN a/benchtests/scripts/import_bench.py b/benchtests/scripts/import_bench.py
 --- a/benchtests/scripts/import_bench.py	1970-01-01 05:30:00.000000000 +0530
 +++ b/benchtests/scripts/import_bench.py	2015-05-07 15:32:41.844584032 +0530
@@ -0,0 +1,141 @@
 +#!/usr/bin/python
 +# Copyright (C) 2015 Free Software Foundation, Inc.
 +# This file is part of the GNU C Library.
 +#
 +# The GNU C Library is free software; you can redistribute it and/or
 +# modify it under the terms of the GNU Lesser General Public
 +# License as published by the Free Software Foundation; either
 +# version 2.1 of the License, or (at your option) any later version.
 +#
 +# The GNU C Library is distributed in the hope that it will be useful,
 +# but WITHOUT ANY WARRANTY; without even the implied warranty of
 +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 +# Lesser General Public License for more details.
 +#
 +# You should have received a copy of the GNU Lesser General Public
 +# License along with the GNU C Library; if not, see
 +# <http://www.gnu.org/licenses/>.
 +"""Functions to import benchmark data and process it"""
 +
 +import json
 +try:
 +    import jsonschema as validator
 +except ImportError:
 +    print('Could not find jsonschema module.')
 +    raise
 +
 +
 +def mean(lst):
 +    """Compute and return mean of numbers in a list
 +
 +    The numpy average function has horrible performance, so implement our
 +    own mean function.
 +
 +    Args:
 +        lst: The list of numbers to average.
 +    Return:
 +        The mean of members in the list.
 +    """
 +    return sum(lst) / len(lst)
 +
 +
 +def split_list(bench, func, var):
 +    """ Split the list into a smaller set of more distinct points
 +
 +    Group together points such that the difference between the smallest
 +    point and the mean is less than 1/3rd of the mean.  This means that
 +    the mean is at most 1.5x the smallest member of that group.
 +
 +    mean - xmin < mean / 3
 +    i.e. 2 * mean / 3 < xmin
 +    i.e. mean < 3 * xmin / 2
 +
 +    For an evenly distributed group, the largest member will be less than
 +    twice the smallest member of the group.
 +    Derivation:
 +
 +    An evenly distributed series would be xmin, xmin + d, xmin + 2d...
 +
 +    mean = (2 * n * xmin + n * (n - 1) * d) / 2 * n
 +    and max element is xmin + (n - 1) * d
 +
 +    Now, mean < 3 * xmin / 2
 +
 +    3 * xmin > 2 * mean
 +    3 * xmin > (2 * n * xmin + n * (n - 1) * d) / n
 +    3 * n * xmin > 2 * n * xmin + n * (n - 1) * d
 +    n * xmin > n * (n - 1) * d
 +    xmin > (n - 1) * d
 +    2 * xmin > xmin + (n-1) * d
 +    2 * xmin > xmax
 +
 +    Hence, proved.
 +
 +    Similarly, it is trivial to prove that for a similar aggregation by using
 +    the maximum element, the maximum element in the group must be at most 4/3
 +    times the mean.
 +
 +    Args:
 +        bench: The benchmark object
 +        func: The function name
 +        var: The function variant name
 +    """
 +    means = []
 +    lst = bench['functions'][func][var]['timings']
 +    last = len(lst) - 1
 +    while lst:
 +        for i in range(last + 1):
 +            avg = mean(lst[i:])
 +            if avg > 0.75 * lst[last]:
 +                means.insert(0, avg)
 +                lst = lst[:i]
 +                last = i - 1
 +                break
 +    bench['functions'][func][var]['timings'] = means
 +
 +
 +def do_for_all_timings(bench, callback):
 +    """Call a function for all timing objects for each function and its
 +    variants.
 +
 +    Args:
 +        bench: The benchmark object
 +        callback: The callback function
 +    """
 +    for func in bench['functions'].keys():
 +        for k in bench['functions'][func].keys():
 +            if 'timings' not in bench['functions'][func][k].keys():
 +                continue
 +
 +            callback(bench, func, k)
 +
 +
 +def compress_timings(points):
 +    """Club points with close enough values into a single mean value
 +
 +    See split_list for details on how the clubbing is done.
 +
 +    Args:
 +        points: The set of points.
 +    """
 +    do_for_all_timings(points, split_list)
 +
 +
 +def parse_bench(filename, schema_filename):
 +    """Parse the input file
 +
 +    Parse and validate the json file containing the benchmark outputs.  Return
 +    the resulting object.
 +    Args:
 +        filename: Name of the benchmark output file.
 +    Return:
 +        The bench dictionary.
 +    """
 +    with open(schema_filename, 'r') as schemafile:
 +        schema = json.load(schemafile)
 +        with open(filename, 'r') as benchfile:
 +            bench = json.load(benchfile)
 +            validator.validate(bench, schema)
 +            do_for_all_timings(bench, lambda b, f, v:
 +                    b['functions'][f][v]['timings'].sort())
 +            return bench
 diff -pruN a/benchtests/scripts/validate_benchout.py b/benchtests/scripts/validate_benchout.py
 --- a/benchtests/scripts/validate_benchout.py	2015-05-07 11:58:40.000000000 +0530
 +++ b/benchtests/scripts/validate_benchout.py	2015-05-07 15:32:41.844584032 +0530
@@ -27,37 +27,26 @@ import sys
 import os
 try:
 -    import jsonschema
 +    import import_bench as bench
 except ImportError:
 -    print('Could not find jsonschema module.  Output not validated.')
 +    print('Import Error: Output will not be validated.')
     # Return success because we don't want the bench target to fail just
     # because the jsonschema module was not found.
     sys.exit(os.EX_OK)
 -def validate_bench(benchfile, schemafile):
 -    """Validate benchmark file
 -
 -    Validate a benchmark output file against a JSON schema.
 +def print_and_exit(message, exitcode):
 +    """Prints message to stderr and returns the exit code.
     Args:
 -        benchfile: The file name of the bench.out file.
 -        schemafile: The file name of the JSON schema file to validate
 -        bench.out against.
 +        message: The message to print
 +        exitcode: The exit code to return
 -    Exceptions:
 -        jsonschema.ValidationError: When bench.out is not valid
 -        jsonschema.SchemaError: When the JSON schema is not valid
 -        IOError: If any of the files are not found.
 +    Returns:
 +        The passed exit code
     """
 -    with open(benchfile, 'r') as bfile:
 -        with open(schemafile, 'r') as sfile:
 -            bench = json.load(bfile)
 -            schema = json.load(sfile)
 -            jsonschema.validate(bench, schema)
 -
 -    # If we reach here, we're all good.
 -    print("Benchmark output in %s is valid." % benchfile)
 +    print(message, file=sys.stderr)
 +    return exitcode
 def main(args):
@@ -73,11 +62,23 @@ def main(args):
         Exceptions thrown by validate_bench
     """
     if len(args) != 2:
 -        print("Usage: %s <bench.out file> <bench.out schema>" % sys.argv[0],
 -                file=sys.stderr)
 -        return os.EX_USAGE
 +        return print_and_exit("Usage: %s <bench.out file> <bench.out schema>"
 +                % sys.argv[0], os.EX_USAGE)
 +
 +    try:
 +        bench.parse_bench(args[0], args[1])
 +    except IOError as e:
 +        return print_and_exit("IOError(%d): %s" % (e.errno, e.strerror),
 +                os.EX_OSFILE)
 +
 +    except bench.validator.ValidationError as e:
 +        return print_and_exit("Invalid benchmark output: %s" % e.message,
 +            os.EX_DATAERR)
 +
 +    except bench.validator.SchemaError as e:
 +        return print_and_exit("Invalid schema: %s" % e.message, os.EX_DATAERR)
 -    validate_bench(args[0], args[1])
 +    print("Benchmark output in %s is valid." % args[0])
     return os.EX_OK
--- a/glibc.spec
+++ b/glibc.spec
@ -14,6 +14,7 @@
 # If run_glibc_tests is zero then tests are not run for the build.
 # You must always set run_glibc_tests to one for production builds.
 %define run_glibc_tests 1
 %define build_benchtests 1
 # Run valgrind test to ensure compatibility.
 %ifarch %{ix86} x86_64 ppc ppc64le s390x armv7hl aarch64
 %define run_valgrind_tests 1
@ -110,6 +111,8 @@ Source3: libc-lock.h
 Source4: nscd.conf
 Source7: nsswitch.conf
 Source8: power6emul.c
 Source9: bench.mk
 Source10: glibc-bench-compare
 ##############################################################################
 # Start of glibc patches
@ -249,6 +252,14 @@ Patch2031: %{name}-rh1070416.patch
 Patch2033: %{name}-aarch64-tls-fixes.patch
 Patch2034: %{name}-aarch64-workaround-nzcv-clobber-in-tlsdesc.patch
 ##############################################################################
 #
 # Benchmark comparison patches.
 #
 ##############################################################################
 Patch3001: %{name}-bench-compare.patch
 Patch3002: %{name}-bench-build.patch
 ##############################################################################
 # End of glibc patches.
 ##############################################################################
@ -532,6 +543,15 @@ package or when debugging this package.
 %endif # %{debuginfocommonarches}
 %endif # 0%{?_enable_debug_packages}
 %if %{build_benchtests}
 %package benchtests
 Summary: Benchmarking binaries and scripts for %{name}
 Group: Development/Debug
 %description benchtests
 This package provides built benchmark binaries and scripts to run
 microbenchmark tests on the system.
 %endif
 ##############################################################################
 # Prepare for the build.
 ##############################################################################
@ -583,6 +603,8 @@ package or when debugging this package.
 %patch0053 -p1
 %patch0054 -p1
 %patch0055 -p1 -R
 %patch3001 -p1
 %patch3002 -p1
 ##############################################################################
 # %%prep - Additional prep required...
@ -601,6 +623,9 @@ package or when debugging this package.
 rm -f sysdeps/powerpc/powerpc32/power4/hp-timing.[ch]
 %endif
 # Make benchmark scripts executable
 chmod +x benchtests/scripts/*.py scripts/pylint
 # Remove all files generated from patching.
 find . -type f -size 0 -o -name "*.orig" -exec rm -f {} \;
@ -1289,6 +1314,38 @@ ln -sf /%{_lib}/ld64.so.1 $RPM_BUILD_ROOT/lib/ld64.so.1
 ln -sf /lib/ld-linux-armhf.so.3 $RPM_BUILD_ROOT/lib/ld-linux.so.3
 %endif
 # Build benchmark binaries.  Ignore the output of the benchmark runs.
 pushd build-%{target}
 make BENCH_DURATION=1 bench-build
 popd
 %if %{build_benchtests}
 # Copy over benchmark binaries.
 mkdir -p $RPM_BUILD_ROOT%{_prefix}/libexec/glibc-benchtests
 cp $(find build-%{target}/benchtests -type f -executable) $RPM_BUILD_ROOT%{_prefix}/libexec/glibc-benchtests/
 find build-%{target}/benchtests -type f -executable | while read b; do
 	echo "%{_prefix}/libexec/glibc-benchtests/$(basename $b)"
 done >> benchtests.filelist
 # ... and the makefile.
 for b in %{SOURCE9} %{SOURCE10}; do
 	cp $b $RPM_BUILD_ROOT%{_prefix}/libexec/glibc-benchtests/
 	echo "%{_prefix}/libexec/glibc-benchtests/$(basename $b)" >> benchtests.filelist
 done
 # .. and finally, the comparison scripts.
 cp benchtests/scripts/benchout.schema.json $RPM_BUILD_ROOT%{_prefix}/libexec/glibc-benchtests/
 cp benchtests/scripts/compare_bench.py $RPM_BUILD_ROOT%{_prefix}/libexec/glibc-benchtests/
 cp benchtests/scripts/import_bench.py $RPM_BUILD_ROOT%{_prefix}/libexec/glibc-benchtests/
 cp benchtests/scripts/validate_benchout.py $RPM_BUILD_ROOT%{_prefix}/libexec/glibc-benchtests/
 echo "%{_prefix}/libexec/glibc-benchtests/benchout.schema.json" >> benchtests.filelist
 echo "%{_prefix}/libexec/glibc-benchtests/compare_bench.py*" >> benchtests.filelist
 echo "%{_prefix}/libexec/glibc-benchtests/import_bench.py*" >> benchtests.filelist
 echo "%{_prefix}/libexec/glibc-benchtests/validate_benchout.py*" >> benchtests.filelist
 %endif
 ###############################################################################
 # Rebuild libpthread.a using --whole-archive to ensure all of libpthread
 # is included in a static link. This prevents any problems when linking
@ -1334,7 +1391,7 @@ find_debuginfo_args="$find_debuginfo_args \
 	-p '.*/(sbin|libexec)/.*' \
 	-o debuginfocommon.filelist \
 	-l rpm.filelist \
-	-l nosegneg.filelist"
+	-l nosegneg.filelist -l benchtests.filelist"
 %endif
 eval /usr/lib/rpm/find-debuginfo.sh \
 	"$find_debuginfo_args" \
@ -1754,7 +1811,15 @@ rm -f *.filelist*
 %endif
 %endif
 %if %{build_benchtests}
 %files benchtests -f benchtests.filelist
 %defattr(-,root,root)
 %endif
 %changelog
 * Fri May 08 2015 Siddhesh Poyarekar <siddhesh@redhat.com> - 2.21.90-12
 - Add benchmark comparison scripts.
 * Thu May 07 2015 Siddhesh Poyarekar <siddhesh@redhat.com> - 2.21.90-11
 - Auto-sync with upstream master.
 - Revert arena threshold fix to work around #1209451.