xref: /freebsd/contrib/arm-optimized-routines/string/bench/strlen.c (revision 072a4ba82a01476eaee33781ccd241033eefcf0b)
1 /*
2  * strlen benchmark.
3  *
4  * Copyright (c) 2020-2021, Arm Limited.
5  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6  */
7 
8 #define _GNU_SOURCE
9 #include <stdint.h>
10 #include <stdio.h>
11 #include <string.h>
12 #include <assert.h>
13 #include "stringlib.h"
14 #include "benchlib.h"
15 
16 #define ITERS 5000
17 #define ITERS2 20000000
18 #define ITERS3 2000000
19 #define NUM_TESTS 16384
20 
21 #define MAX_ALIGN 32
22 #define MAX_STRLEN 256
23 
24 static char a[(MAX_STRLEN + 1) * MAX_ALIGN] __attribute__((__aligned__(4096)));
25 
26 #define F(x, mte) {#x, x, mte},
27 
28 static const struct fun
29 {
30   const char *name;
31   size_t (*fun) (const char *s);
32   int test_mte;
33 } funtab[] = {
34   // clang-format off
35   F(strlen, 0)
36 #if __aarch64__
37   F(__strlen_aarch64, 0)
38   F(__strlen_aarch64_mte, 1)
39 # if __ARM_FEATURE_SVE
40   F(__strlen_aarch64_sve, 1)
41 # endif
42 #elif __arm__
43 # if __ARM_ARCH >= 6 && __ARM_ARCH_ISA_THUMB == 2
44   F(__strlen_armv6t2, 0)
45 # endif
46 #endif
47   {0, 0, 0}
48   // clang-format on
49 };
50 #undef F
51 
52 static uint16_t strlen_tests[NUM_TESTS];
53 
54 typedef struct { uint16_t size; uint16_t freq; } freq_data_t;
55 typedef struct { uint8_t align; uint16_t freq; } align_data_t;
56 
57 #define SIZE_NUM 65536
58 #define SIZE_MASK (SIZE_NUM - 1)
59 static uint8_t strlen_len_arr[SIZE_NUM];
60 
61 /* Frequency data for strlen sizes up to 128 based on SPEC2017.  */
62 static freq_data_t strlen_len_freq[] =
63 {
64   { 12,22671}, { 18,12834}, { 13, 9555}, {  6, 6348}, { 17, 6095}, { 11, 2115},
65   { 10, 1335}, {  7,  814}, {  2,  646}, {  9,  483}, {  8,  471}, { 16,  418},
66   {  4,  390}, {  1,  388}, {  5,  233}, {  3,  204}, {  0,   79}, { 14,   79},
67   { 15,   69}, { 26,   36}, { 22,   35}, { 31,   24}, { 32,   24}, { 19,   21},
68   { 25,   17}, { 28,   15}, { 21,   14}, { 33,   14}, { 20,   13}, { 24,    9},
69   { 29,    9}, { 30,    9}, { 23,    7}, { 34,    7}, { 27,    6}, { 44,    5},
70   { 42,    4}, { 45,    3}, { 47,    3}, { 40,    2}, { 41,    2}, { 43,    2},
71   { 58,    2}, { 78,    2}, { 36,    2}, { 48,    1}, { 52,    1}, { 60,    1},
72   { 64,    1}, { 56,    1}, { 76,    1}, { 68,    1}, { 80,    1}, { 84,    1},
73   { 72,    1}, { 86,    1}, { 35,    1}, { 39,    1}, { 50,    1}, { 38,    1},
74   { 37,    1}, { 46,    1}, { 98,    1}, {102,    1}, {128,    1}, { 51,    1},
75   {107,    1}, { 0,     0}
76 };
77 
78 #define ALIGN_NUM 1024
79 #define ALIGN_MASK (ALIGN_NUM - 1)
80 static uint8_t strlen_align_arr[ALIGN_NUM];
81 
82 /* Alignment data for strlen based on SPEC2017.  */
83 static align_data_t string_align_freq[] =
84 {
85   {8, 470}, {32, 427}, {16, 99}, {1, 19}, {2, 6}, {4, 3}, {0, 0}
86 };
87 
88 static void
init_strlen_distribution(void)89 init_strlen_distribution (void)
90 {
91   int i, j, freq, size, n;
92 
93   for (n = i = 0; (freq = strlen_len_freq[i].freq) != 0; i++)
94     for (j = 0, size = strlen_len_freq[i].size; j < freq; j++)
95       strlen_len_arr[n++] = size;
96   assert (n == SIZE_NUM);
97 
98   for (n = i = 0; (freq = string_align_freq[i].freq) != 0; i++)
99     for (j = 0, size = string_align_freq[i].align; j < freq; j++)
100       strlen_align_arr[n++] = size;
101   assert (n == ALIGN_NUM);
102 }
103 
104 static void
init_strlen_tests(void)105 init_strlen_tests (void)
106 {
107   uint16_t index[MAX_ALIGN];
108 
109   memset (a, 'x', sizeof (a));
110 
111   /* Create indices for strings at all alignments.  */
112   for (int i = 0; i < MAX_ALIGN; i++)
113     {
114       index[i] = i * (MAX_STRLEN + 1);
115       a[index[i] + MAX_STRLEN] = 0;
116     }
117 
118   /* Create a random set of strlen input strings using the string length
119      and alignment distributions.  */
120   for (int n = 0; n < NUM_TESTS; n++)
121     {
122       int align = strlen_align_arr[rand32 (0) & ALIGN_MASK];
123       int exp_len = strlen_len_arr[rand32 (0) & SIZE_MASK];
124 
125       strlen_tests[n] =
126 	index[(align + exp_len) & (MAX_ALIGN - 1)] + MAX_STRLEN - exp_len;
127     }
128 }
129 
130 static volatile size_t maskv = 0;
131 
main(void)132 int main (void)
133 {
134   rand32 (0x12345678);
135   init_strlen_distribution ();
136   init_strlen_tests ();
137 
138   printf ("\nRandom strlen (bytes/ns):\n");
139   for (int f = 0; funtab[f].name != 0; f++)
140     {
141       size_t res = 0, strlen_size = 0, mask = maskv;
142       printf ("%22s ", funtab[f].name);
143 
144       for (int c = 0; c < NUM_TESTS; c++)
145 	strlen_size += funtab[f].fun (a + strlen_tests[c]);
146       strlen_size *= ITERS;
147 
148       /* Measure latency of strlen result with (res & mask).  */
149       uint64_t t = clock_get_ns ();
150       for (int i = 0; i < ITERS; i++)
151 	for (int c = 0; c < NUM_TESTS; c++)
152 	  res = funtab[f].fun (a + strlen_tests[c] + (res & mask));
153       t = clock_get_ns () - t;
154       printf ("%.2f\n", (double)strlen_size / t);
155     }
156 
157   printf ("\nSmall aligned strlen (bytes/ns):\n");
158   for (int f = 0; funtab[f].name != 0; f++)
159     {
160       printf ("%22s ", funtab[f].name);
161 
162       for (int size = 1; size <= 64; size *= 2)
163 	{
164 	  memset (a, 'x', size);
165 	  a[size - 1] = 0;
166 
167 	  uint64_t t = clock_get_ns ();
168 	  for (int i = 0; i < ITERS2; i++)
169 	    funtab[f].fun (a);
170 	  t = clock_get_ns () - t;
171 	  printf ("%d%c: %.2f ", size < 1024 ? size : size / 1024,
172 		  size < 1024 ? 'B' : 'K', (double)size * ITERS2 / t);
173 	}
174       printf ("\n");
175     }
176 
177   printf ("\nSmall unaligned strlen (bytes/ns):\n");
178   for (int f = 0; funtab[f].name != 0; f++)
179     {
180       printf ("%22s ", funtab[f].name);
181 
182       int align = 9;
183       for (int size = 1; size <= 64; size *= 2)
184 	{
185 	  memset (a + align, 'x', size);
186 	  a[align + size - 1] = 0;
187 
188 	  uint64_t t = clock_get_ns ();
189 	  for (int i = 0; i < ITERS2; i++)
190 	    funtab[f].fun (a + align);
191 	  t = clock_get_ns () - t;
192 	  printf ("%d%c: %.2f ", size < 1024 ? size : size / 1024,
193 		  size < 1024 ? 'B' : 'K', (double)size * ITERS2 / t);
194 	}
195       printf ("\n");
196     }
197 
198   printf ("\nMedium strlen (bytes/ns):\n");
199   for (int f = 0; funtab[f].name != 0; f++)
200     {
201       printf ("%22s ", funtab[f].name);
202 
203       for (int size = 128; size <= 4096; size *= 2)
204 	{
205 	  memset (a, 'x', size);
206 	  a[size - 1] = 0;
207 
208 	  uint64_t t = clock_get_ns ();
209 	  for (int i = 0; i < ITERS3; i++)
210 	    funtab[f].fun (a);
211 	  t = clock_get_ns () - t;
212 	  printf ("%d%c: %.2f ", size < 1024 ? size : size / 1024,
213 		  size < 1024 ? 'B' : 'K', (double)size * ITERS3 / t);
214 	}
215       printf ("\n");
216     }
217 
218   printf ("\n");
219 
220   return 0;
221 }
222