1#!/usr/bin/env python 2 3"""Simple harness that benchmarks different variants of the routines, 4caches the results, and emits all of the records at the end. 5 6Results are generated for different values of: 7 * Source 8 * Routine 9 * Length 10 * Alignment 11""" 12 13import argparse 14import subprocess 15import math 16import sys 17 18# Prefix to the executables 19build = '../build/try-' 20 21ALL = 'memchr memcmp memcpy memset strchr strcmp strcpy strlen' 22 23HAS = { 24 'this': 'bounce memchr memcpy memset strchr strcmp strcpy strlen', 25 'bionic-a9': 'memcmp memcpy memset strcmp strcpy strlen', 26 'bionic-a15': 'memcmp memcpy memset strcmp strcpy strlen', 27 'bionic-c': ALL, 28 'csl': 'memcpy memset', 29 'glibc': 'memcpy memset strchr strlen', 30 'glibc-c': ALL, 31 'newlib': 'memcpy strcmp strcpy strlen', 32 'newlib-c': ALL, 33 'newlib-xscale': 'memchr memcpy memset strchr strcmp strcpy strlen', 34 'plain': 'memset memcpy strcmp strcpy', 35} 36 37BOUNCE_ALIGNMENTS = ['1'] 38SINGLE_BUFFER_ALIGNMENTS = ['1', '2', '4', '8', '16', '32'] 39DUAL_BUFFER_ALIGNMENTS = ['1:32', '2:32', '4:32', '8:32', '16:32', '32:32'] 40 41ALIGNMENTS = { 42 'bounce': BOUNCE_ALIGNMENTS, 43 'memchr': SINGLE_BUFFER_ALIGNMENTS, 44 'memset': SINGLE_BUFFER_ALIGNMENTS, 45 'strchr': SINGLE_BUFFER_ALIGNMENTS, 46 'strlen': SINGLE_BUFFER_ALIGNMENTS, 47 'memcmp': DUAL_BUFFER_ALIGNMENTS, 48 'memcpy': DUAL_BUFFER_ALIGNMENTS, 49 'strcmp': DUAL_BUFFER_ALIGNMENTS, 50 'strcpy': DUAL_BUFFER_ALIGNMENTS, 51} 52 53VARIANTS = sorted(HAS.keys()) 54FUNCTIONS = sorted(ALIGNMENTS.keys()) 55 56NUM_RUNS = 5 57 58def run(cache, variant, function, bytes, loops, alignment, run_id, quiet=False): 59 """Perform a single run, exercising the cache as appropriate.""" 60 key = ':'.join('%s' % x for x in (variant, function, bytes, loops, alignment, run_id)) 61 62 if key in cache: 63 got = cache[key] 64 else: 65 xbuild = build 66 cmd = '%(xbuild)s%(variant)s -t %(function)s -c %(bytes)s -l %(loops)s -a %(alignment)s -r %(run_id)s' % locals() 67 68 try: 69 got = subprocess.check_output(cmd.split()).strip() 70 except OSError, ex: 71 assert False, 'Error %s while running %s' % (ex, cmd) 72 73 parts = got.split(':') 74 took = float(parts[7]) 75 76 cache[key] = got 77 78 if not quiet: 79 print got 80 sys.stdout.flush() 81 82 return took 83 84def run_many(cache, variants, bytes, all_functions): 85 # We want the data to come out in a useful order. So fix an 86 # alignment and function, and do all sizes for a variant first 87 bytes = sorted(bytes) 88 mid = bytes[int(len(bytes)/1.5)] 89 90 if not all_functions: 91 # Use the ordering in 'this' as the default 92 all_functions = HAS['this'].split() 93 94 # Find all other functions 95 for functions in HAS.values(): 96 for function in functions.split(): 97 if function not in all_functions: 98 all_functions.append(function) 99 100 for function in all_functions: 101 for alignment in ALIGNMENTS[function]: 102 for variant in variants: 103 if function not in HAS[variant].split(): 104 continue 105 106 # Run a tracer through and see how long it takes and 107 # adjust the number of loops based on that. Not great 108 # for memchr() and similar which are O(n), but it will 109 # do 110 f = 50000000 111 want = 5.0 112 113 loops = int(f / math.sqrt(max(1, mid))) 114 took = run(cache, variant, function, mid, loops, alignment, 0, 115 quiet=True) 116 # Keep it reasonable for silly routines like bounce 117 factor = min(20, max(0.05, want/took)) 118 f = f * factor 119 120 # Round f to a few significant figures 121 scale = 10**int(math.log10(f) - 1) 122 f = scale*int(f/scale) 123 124 for b in sorted(bytes): 125 # Figure out the number of loops to give a roughly consistent run 126 loops = int(f / math.sqrt(max(1, b))) 127 for run_id in range(0, NUM_RUNS): 128 run(cache, variant, function, b, loops, alignment, 129 run_id) 130 131def run_top(cache): 132 parser = argparse.ArgumentParser() 133 parser.add_argument("-v", "--variants", nargs="+", help="library variant to run (run all if not specified)", default = VARIANTS, choices = VARIANTS) 134 parser.add_argument("-f", "--functions", nargs="+", help="function to run (run all if not specified)", default = FUNCTIONS, choices = FUNCTIONS) 135 parser.add_argument("-l", "--limit", type=int, help="upper limit to test to (in bytes)", default = 512*1024) 136 args = parser.parse_args() 137 138 # Test all powers of 2 139 step1 = 2.0 140 # Test intermediate powers of 1.4 141 step2 = 1.4 142 143 bytes = [] 144 145 for step in [step1, step2]: 146 if step: 147 # Figure out how many steps get us up to the top 148 steps = int(round(math.log(args.limit) / math.log(step))) 149 bytes.extend([int(step**x) for x in range(0, steps+1)]) 150 151 run_many(cache, args.variants, bytes, args.functions) 152 153def main(): 154 cachename = 'cache.txt' 155 156 cache = {} 157 158 try: 159 with open(cachename) as f: 160 for line in f: 161 line = line.strip() 162 parts = line.split(':') 163 cache[':'.join(parts[:7])] = line 164 except: 165 pass 166 167 try: 168 run_top(cache) 169 finally: 170 with open(cachename, 'w') as f: 171 for line in sorted(cache.values()): 172 print >> f, line 173 174if __name__ == '__main__': 175 main() 176