xref: /freebsd/contrib/cortex-strings/scripts/bench.py (revision 63d1fd5970ec814904aa0f4580b10a0d302d08b2)
1#!/usr/bin/env python
2
3"""Simple harness that benchmarks different variants of the routines,
4caches the results, and emits all of the records at the end.
5
6Results are generated for different values of:
7 * Source
8 * Routine
9 * Length
10 * Alignment
11"""
12
13import argparse
14import subprocess
15import math
16import sys
17
18# Prefix to the executables
19build = '../build/try-'
20
21ALL = 'memchr memcmp memcpy memset strchr strcmp strcpy strlen'
22
23HAS = {
24    'this': 'bounce memchr memcpy memset strchr strcmp strcpy strlen',
25    'bionic-a9': 'memcmp memcpy memset strcmp strcpy strlen',
26    'bionic-a15': 'memcmp memcpy memset strcmp strcpy strlen',
27    'bionic-c': ALL,
28    'csl': 'memcpy memset',
29    'glibc': 'memcpy memset strchr strlen',
30    'glibc-c': ALL,
31    'newlib': 'memcpy strcmp strcpy strlen',
32    'newlib-c': ALL,
33    'newlib-xscale': 'memchr memcpy memset strchr strcmp strcpy strlen',
34    'plain': 'memset memcpy strcmp strcpy',
35}
36
37BOUNCE_ALIGNMENTS = ['1']
38SINGLE_BUFFER_ALIGNMENTS = ['1', '2', '4', '8', '16', '32']
39DUAL_BUFFER_ALIGNMENTS = ['1:32', '2:32', '4:32', '8:32', '16:32', '32:32']
40
41ALIGNMENTS = {
42    'bounce': BOUNCE_ALIGNMENTS,
43    'memchr': SINGLE_BUFFER_ALIGNMENTS,
44    'memset': SINGLE_BUFFER_ALIGNMENTS,
45    'strchr': SINGLE_BUFFER_ALIGNMENTS,
46    'strlen': SINGLE_BUFFER_ALIGNMENTS,
47    'memcmp': DUAL_BUFFER_ALIGNMENTS,
48    'memcpy': DUAL_BUFFER_ALIGNMENTS,
49    'strcmp': DUAL_BUFFER_ALIGNMENTS,
50    'strcpy': DUAL_BUFFER_ALIGNMENTS,
51}
52
53VARIANTS = sorted(HAS.keys())
54FUNCTIONS = sorted(ALIGNMENTS.keys())
55
56NUM_RUNS = 5
57
58def run(cache, variant, function, bytes, loops, alignment, run_id, quiet=False):
59    """Perform a single run, exercising the cache as appropriate."""
60    key = ':'.join('%s' % x for x in (variant, function, bytes, loops, alignment, run_id))
61
62    if key in cache:
63        got = cache[key]
64    else:
65        xbuild = build
66        cmd = '%(xbuild)s%(variant)s -t %(function)s -c %(bytes)s -l %(loops)s -a %(alignment)s -r %(run_id)s' % locals()
67
68        try:
69            got = subprocess.check_output(cmd.split()).strip()
70        except OSError, ex:
71            assert False, 'Error %s while running %s' % (ex, cmd)
72
73    parts = got.split(':')
74    took = float(parts[7])
75
76    cache[key] = got
77
78    if not quiet:
79        print got
80        sys.stdout.flush()
81
82    return took
83
84def run_many(cache, variants, bytes, all_functions):
85    # We want the data to come out in a useful order.  So fix an
86    # alignment and function, and do all sizes for a variant first
87    bytes = sorted(bytes)
88    mid = bytes[int(len(bytes)/1.5)]
89
90    if not all_functions:
91        # Use the ordering in 'this' as the default
92        all_functions = HAS['this'].split()
93
94        # Find all other functions
95        for functions in HAS.values():
96            for function in functions.split():
97                if function not in all_functions:
98                    all_functions.append(function)
99
100    for function in all_functions:
101        for alignment in ALIGNMENTS[function]:
102            for variant in variants:
103                if function not in HAS[variant].split():
104                    continue
105
106                # Run a tracer through and see how long it takes and
107                # adjust the number of loops based on that.  Not great
108                # for memchr() and similar which are O(n), but it will
109                # do
110                f = 50000000
111                want = 5.0
112
113                loops = int(f / math.sqrt(max(1, mid)))
114                took = run(cache, variant, function, mid, loops, alignment, 0,
115                           quiet=True)
116                # Keep it reasonable for silly routines like bounce
117                factor = min(20, max(0.05, want/took))
118                f = f * factor
119
120                # Round f to a few significant figures
121                scale = 10**int(math.log10(f) - 1)
122                f = scale*int(f/scale)
123
124                for b in sorted(bytes):
125                    # Figure out the number of loops to give a roughly consistent run
126                    loops = int(f / math.sqrt(max(1, b)))
127                    for run_id in range(0, NUM_RUNS):
128                        run(cache, variant, function, b, loops, alignment,
129                            run_id)
130
131def run_top(cache):
132    parser = argparse.ArgumentParser()
133    parser.add_argument("-v", "--variants", nargs="+", help="library variant to run (run all if not specified)", default = VARIANTS, choices = VARIANTS)
134    parser.add_argument("-f", "--functions", nargs="+", help="function to run (run all if not specified)", default = FUNCTIONS, choices = FUNCTIONS)
135    parser.add_argument("-l", "--limit", type=int, help="upper limit to test to (in bytes)", default = 512*1024)
136    args = parser.parse_args()
137
138    # Test all powers of 2
139    step1 = 2.0
140    # Test intermediate powers of 1.4
141    step2 = 1.4
142
143    bytes = []
144
145    for step in [step1, step2]:
146        if step:
147            # Figure out how many steps get us up to the top
148            steps = int(round(math.log(args.limit) / math.log(step)))
149            bytes.extend([int(step**x) for x in range(0, steps+1)])
150
151    run_many(cache, args.variants, bytes, args.functions)
152
153def main():
154    cachename = 'cache.txt'
155
156    cache = {}
157
158    try:
159        with open(cachename) as f:
160            for line in f:
161                line = line.strip()
162                parts = line.split(':')
163                cache[':'.join(parts[:7])] = line
164    except:
165        pass
166
167    try:
168        run_top(cache)
169    finally:
170        with open(cachename, 'w') as f:
171            for line in sorted(cache.values()):
172                print >> f, line
173
174if __name__ == '__main__':
175    main()
176