xref: /freebsd/contrib/arm-optimized-routines/string/bench/strlen.c (revision dd21556857e8d40f66bf5ad54754d9d52669ebf7)
1 /*
2  * strlen benchmark.
3  *
4  * Copyright (c) 2020-2021, Arm Limited.
5  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6  */
7 
8 #define _GNU_SOURCE
9 #include <stdint.h>
10 #include <stdio.h>
11 #include <string.h>
12 #include <assert.h>
13 #include "stringlib.h"
14 #include "benchlib.h"
15 
16 #define ITERS 5000
17 #define ITERS2 40000000
18 #define ITERS3 4000000
19 #define NUM_TESTS 65536
20 
21 #define MAX_ALIGN 32
22 #define MAX_STRLEN 128
23 
24 static char a[(MAX_STRLEN + 1) * MAX_ALIGN] __attribute__((__aligned__(4096)));
25 
26 #define DOTEST(STR,TESTFN)			\
27   printf (STR);					\
28   RUN (TESTFN, strlen);				\
29   RUNA64 (TESTFN, __strlen_aarch64);		\
30   RUNA64 (TESTFN, __strlen_aarch64_mte);	\
31   RUNSVE (TESTFN, __strlen_aarch64_sve);	\
32   RUNT32 (TESTFN, __strlen_armv6t2);		\
33   printf ("\n");
34 
35 static uint16_t strlen_tests[NUM_TESTS];
36 
37 typedef struct { uint16_t size; uint16_t freq; } freq_data_t;
38 typedef struct { uint8_t align; uint16_t freq; } align_data_t;
39 
40 #define SIZE_NUM 65536
41 #define SIZE_MASK (SIZE_NUM - 1)
42 static uint8_t strlen_len_arr[SIZE_NUM];
43 
44 /* Frequency data for strlen sizes up to 128 based on SPEC2017.  */
45 static freq_data_t strlen_len_freq[] =
46 {
47   { 12,22671}, { 18,12834}, { 13, 9555}, {  6, 6348}, { 17, 6095}, { 11, 2115},
48   { 10, 1335}, {  7,  814}, {  2,  646}, {  9,  483}, {  8,  471}, { 16,  418},
49   {  4,  390}, {  1,  388}, {  5,  233}, {  3,  204}, {  0,   79}, { 14,   79},
50   { 15,   69}, { 26,   36}, { 22,   35}, { 31,   24}, { 32,   24}, { 19,   21},
51   { 25,   17}, { 28,   15}, { 21,   14}, { 33,   14}, { 20,   13}, { 24,    9},
52   { 29,    9}, { 30,    9}, { 23,    7}, { 34,    7}, { 27,    6}, { 44,    5},
53   { 42,    4}, { 45,    3}, { 47,    3}, { 40,    2}, { 41,    2}, { 43,    2},
54   { 58,    2}, { 78,    2}, { 36,    2}, { 48,    1}, { 52,    1}, { 60,    1},
55   { 64,    1}, { 56,    1}, { 76,    1}, { 68,    1}, { 80,    1}, { 84,    1},
56   { 72,    1}, { 86,    1}, { 35,    1}, { 39,    1}, { 50,    1}, { 38,    1},
57   { 37,    1}, { 46,    1}, { 98,    1}, {102,    1}, {128,    1}, { 51,    1},
58   {107,    1}, { 0,     0}
59 };
60 
61 #define ALIGN_NUM 1024
62 #define ALIGN_MASK (ALIGN_NUM - 1)
63 static uint8_t strlen_align_arr[ALIGN_NUM];
64 
65 /* Alignment data for strlen based on SPEC2017.  */
66 static align_data_t string_align_freq[] =
67 {
68   {8, 470}, {32, 427}, {16, 99}, {1, 19}, {2, 6}, {4, 3}, {0, 0}
69 };
70 
71 static void
72 init_strlen_distribution (void)
73 {
74   int i, j, freq, size, n;
75 
76   for (n = i = 0; (freq = strlen_len_freq[i].freq) != 0; i++)
77     for (j = 0, size = strlen_len_freq[i].size; j < freq; j++)
78       strlen_len_arr[n++] = size;
79   assert (n == SIZE_NUM);
80 
81   for (n = i = 0; (freq = string_align_freq[i].freq) != 0; i++)
82     for (j = 0, size = string_align_freq[i].align; j < freq; j++)
83       strlen_align_arr[n++] = size;
84   assert (n == ALIGN_NUM);
85 }
86 
87 static void
88 init_strlen_tests (void)
89 {
90   uint16_t index[MAX_ALIGN];
91 
92   memset (a, 'x', sizeof (a));
93 
94   /* Create indices for strings at all alignments.  */
95   for (int i = 0; i < MAX_ALIGN; i++)
96     {
97       index[i] = i * (MAX_STRLEN + 1);
98       a[index[i] + MAX_STRLEN] = 0;
99     }
100 
101   /* Create a random set of strlen input strings using the string length
102      and alignment distributions.  */
103   for (int n = 0; n < NUM_TESTS; n++)
104     {
105       int align = strlen_align_arr[rand32 (0) & ALIGN_MASK];
106       int exp_len = strlen_len_arr[rand32 (0) & SIZE_MASK];
107 
108       strlen_tests[n] =
109 	index[(align + exp_len) & (MAX_ALIGN - 1)] + MAX_STRLEN - exp_len;
110       assert ((strlen_tests[n] & (align - 1)) == 0);
111       assert (strlen (a + strlen_tests[n]) == exp_len);
112     }
113 }
114 
115 static volatile size_t maskv = 0;
116 
117 static void inline __attribute ((always_inline))
118 strlen_random (const char *name, size_t (*fn)(const char *))
119 {
120   size_t res = 0, mask = maskv;
121   uint64_t strlen_size = 0;
122   printf ("%22s ", name);
123 
124   for (int c = 0; c < NUM_TESTS; c++)
125     strlen_size += fn (a + strlen_tests[c]) + 1;
126   strlen_size *= ITERS;
127 
128   /* Measure throughput of strlen.  */
129   uint64_t t = clock_get_ns ();
130   for (int i = 0; i < ITERS; i++)
131     for (int c = 0; c < NUM_TESTS; c++)
132       res += fn (a + strlen_tests[c]);
133   t = clock_get_ns () - t;
134   printf ("tp: %.3f ", (double)strlen_size / t);
135 
136   /* Measure latency of strlen result with (res & mask).  */
137   t = clock_get_ns ();
138   for (int i = 0; i < ITERS; i++)
139     for (int c = 0; c < NUM_TESTS; c++)
140       res += fn (a + strlen_tests[c] + (res & mask));
141   t = clock_get_ns () - t;
142   printf ("lat: %.3f\n", (double)strlen_size / t);
143   maskv = res & mask;
144 }
145 
146 static void inline __attribute ((always_inline))
147 strlen_small_aligned (const char *name, size_t (*fn)(const char *))
148 {
149   printf ("%22s ", name);
150 
151   size_t res = 0, mask = maskv;
152   for (int size = 1; size <= 64; size *= 2)
153     {
154       memset (a, 'x', size);
155       a[size - 1] = 0;
156 
157       uint64_t t = clock_get_ns ();
158       for (int i = 0; i < ITERS2; i++)
159 	res += fn (a + (i & mask));
160       t = clock_get_ns () - t;
161       printf ("%d%c: %5.2f ", size < 1024 ? size : size / 1024,
162 	      size < 1024 ? 'B' : 'K', (double)size * ITERS2 / t);
163     }
164   maskv &= res;
165   printf ("\n");
166 }
167 
168 static void inline __attribute ((always_inline))
169 strlen_small_unaligned (const char *name, size_t (*fn)(const char *))
170 {
171   printf ("%22s ", name);
172 
173   size_t res = 0, mask = maskv;
174   int align = 9;
175   for (int size = 1; size <= 64; size *= 2)
176     {
177       memset (a + align, 'x', size);
178       a[align + size - 1] = 0;
179 
180       uint64_t t = clock_get_ns ();
181       for (int i = 0; i < ITERS2; i++)
182 	res += fn (a + align + (i & mask));
183       t = clock_get_ns () - t;
184       printf ("%d%c: %5.2f ", size < 1024 ? size : size / 1024,
185 	      size < 1024 ? 'B' : 'K', (double)size * ITERS2 / t);
186     }
187   maskv &= res;
188   printf ("\n");
189 }
190 
191 static void inline __attribute ((always_inline))
192 strlen_medium (const char *name, size_t (*fn)(const char *))
193 {
194   printf ("%22s ", name);
195 
196   size_t res = 0, mask = maskv;
197   for (int size = 128; size <= 4096; size *= 2)
198     {
199       memset (a, 'x', size);
200       a[size - 1] = 0;
201 
202       uint64_t t = clock_get_ns ();
203       for (int i = 0; i < ITERS3; i++)
204 	res += fn (a + (i & mask));
205       t = clock_get_ns () - t;
206       printf ("%d%c: %5.2f ", size < 1024 ? size : size / 1024,
207 	      size < 1024 ? 'B' : 'K', (double)size * ITERS3 / t);
208     }
209   maskv &= res;
210   printf ("\n");
211 }
212 
213 int main (void)
214 {
215   rand32 (0x12345678);
216   init_strlen_distribution ();
217   init_strlen_tests ();
218 
219   DOTEST ("Random strlen (bytes/ns):\n", strlen_random);
220   DOTEST ("Small aligned strlen (bytes/ns):\n", strlen_small_aligned);
221   DOTEST ("Small unaligned strlen (bytes/ns):\n", strlen_small_unaligned);
222   DOTEST ("Medium strlen (bytes/ns):\n", strlen_medium);
223 
224   return 0;
225 }
226