xref: /freebsd/contrib/arm-optimized-routines/string/bench/strlen.c (revision 072a4ba82a01476eaee33781ccd241033eefcf0b)
131914882SAlex Richardson /*
231914882SAlex Richardson  * strlen benchmark.
331914882SAlex Richardson  *
4d49ad206SAndrew Turner  * Copyright (c) 2020-2021, Arm Limited.
5*072a4ba8SAndrew Turner  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
631914882SAlex Richardson  */
731914882SAlex Richardson 
831914882SAlex Richardson #define _GNU_SOURCE
931914882SAlex Richardson #include <stdint.h>
1031914882SAlex Richardson #include <stdio.h>
1131914882SAlex Richardson #include <string.h>
1231914882SAlex Richardson #include <assert.h>
1331914882SAlex Richardson #include "stringlib.h"
1431914882SAlex Richardson #include "benchlib.h"
1531914882SAlex Richardson 
16d49ad206SAndrew Turner #define ITERS 5000
1731914882SAlex Richardson #define ITERS2 20000000
1831914882SAlex Richardson #define ITERS3 2000000
19d49ad206SAndrew Turner #define NUM_TESTS 16384
2031914882SAlex Richardson 
2131914882SAlex Richardson #define MAX_ALIGN 32
2231914882SAlex Richardson #define MAX_STRLEN 256
2331914882SAlex Richardson 
2431914882SAlex Richardson static char a[(MAX_STRLEN + 1) * MAX_ALIGN] __attribute__((__aligned__(4096)));
2531914882SAlex Richardson 
2631914882SAlex Richardson #define F(x, mte) {#x, x, mte},
2731914882SAlex Richardson 
2831914882SAlex Richardson static const struct fun
2931914882SAlex Richardson {
3031914882SAlex Richardson   const char *name;
3131914882SAlex Richardson   size_t (*fun) (const char *s);
3231914882SAlex Richardson   int test_mte;
3331914882SAlex Richardson } funtab[] = {
3431914882SAlex Richardson   // clang-format off
3531914882SAlex Richardson   F(strlen, 0)
3631914882SAlex Richardson #if __aarch64__
3731914882SAlex Richardson   F(__strlen_aarch64, 0)
3831914882SAlex Richardson   F(__strlen_aarch64_mte, 1)
3931914882SAlex Richardson # if __ARM_FEATURE_SVE
4031914882SAlex Richardson   F(__strlen_aarch64_sve, 1)
4131914882SAlex Richardson # endif
4231914882SAlex Richardson #elif __arm__
4331914882SAlex Richardson # if __ARM_ARCH >= 6 && __ARM_ARCH_ISA_THUMB == 2
4431914882SAlex Richardson   F(__strlen_armv6t2, 0)
4531914882SAlex Richardson # endif
4631914882SAlex Richardson #endif
4731914882SAlex Richardson   {0, 0, 0}
4831914882SAlex Richardson   // clang-format on
4931914882SAlex Richardson };
5031914882SAlex Richardson #undef F
5131914882SAlex Richardson 
52d49ad206SAndrew Turner static uint16_t strlen_tests[NUM_TESTS];
5331914882SAlex Richardson 
5431914882SAlex Richardson typedef struct { uint16_t size; uint16_t freq; } freq_data_t;
5531914882SAlex Richardson typedef struct { uint8_t align; uint16_t freq; } align_data_t;
5631914882SAlex Richardson 
5731914882SAlex Richardson #define SIZE_NUM 65536
5831914882SAlex Richardson #define SIZE_MASK (SIZE_NUM - 1)
5931914882SAlex Richardson static uint8_t strlen_len_arr[SIZE_NUM];
6031914882SAlex Richardson 
6131914882SAlex Richardson /* Frequency data for strlen sizes up to 128 based on SPEC2017.  */
6231914882SAlex Richardson static freq_data_t strlen_len_freq[] =
6331914882SAlex Richardson {
6431914882SAlex Richardson   { 12,22671}, { 18,12834}, { 13, 9555}, {  6, 6348}, { 17, 6095}, { 11, 2115},
6531914882SAlex Richardson   { 10, 1335}, {  7,  814}, {  2,  646}, {  9,  483}, {  8,  471}, { 16,  418},
6631914882SAlex Richardson   {  4,  390}, {  1,  388}, {  5,  233}, {  3,  204}, {  0,   79}, { 14,   79},
6731914882SAlex Richardson   { 15,   69}, { 26,   36}, { 22,   35}, { 31,   24}, { 32,   24}, { 19,   21},
6831914882SAlex Richardson   { 25,   17}, { 28,   15}, { 21,   14}, { 33,   14}, { 20,   13}, { 24,    9},
6931914882SAlex Richardson   { 29,    9}, { 30,    9}, { 23,    7}, { 34,    7}, { 27,    6}, { 44,    5},
7031914882SAlex Richardson   { 42,    4}, { 45,    3}, { 47,    3}, { 40,    2}, { 41,    2}, { 43,    2},
7131914882SAlex Richardson   { 58,    2}, { 78,    2}, { 36,    2}, { 48,    1}, { 52,    1}, { 60,    1},
7231914882SAlex Richardson   { 64,    1}, { 56,    1}, { 76,    1}, { 68,    1}, { 80,    1}, { 84,    1},
7331914882SAlex Richardson   { 72,    1}, { 86,    1}, { 35,    1}, { 39,    1}, { 50,    1}, { 38,    1},
7431914882SAlex Richardson   { 37,    1}, { 46,    1}, { 98,    1}, {102,    1}, {128,    1}, { 51,    1},
7531914882SAlex Richardson   {107,    1}, { 0,     0}
7631914882SAlex Richardson };
7731914882SAlex Richardson 
7831914882SAlex Richardson #define ALIGN_NUM 1024
7931914882SAlex Richardson #define ALIGN_MASK (ALIGN_NUM - 1)
8031914882SAlex Richardson static uint8_t strlen_align_arr[ALIGN_NUM];
8131914882SAlex Richardson 
8231914882SAlex Richardson /* Alignment data for strlen based on SPEC2017.  */
8331914882SAlex Richardson static align_data_t string_align_freq[] =
8431914882SAlex Richardson {
8531914882SAlex Richardson   {8, 470}, {32, 427}, {16, 99}, {1, 19}, {2, 6}, {4, 3}, {0, 0}
8631914882SAlex Richardson };
8731914882SAlex Richardson 
8831914882SAlex Richardson static void
init_strlen_distribution(void)8931914882SAlex Richardson init_strlen_distribution (void)
9031914882SAlex Richardson {
9131914882SAlex Richardson   int i, j, freq, size, n;
9231914882SAlex Richardson 
9331914882SAlex Richardson   for (n = i = 0; (freq = strlen_len_freq[i].freq) != 0; i++)
9431914882SAlex Richardson     for (j = 0, size = strlen_len_freq[i].size; j < freq; j++)
9531914882SAlex Richardson       strlen_len_arr[n++] = size;
9631914882SAlex Richardson   assert (n == SIZE_NUM);
9731914882SAlex Richardson 
9831914882SAlex Richardson   for (n = i = 0; (freq = string_align_freq[i].freq) != 0; i++)
9931914882SAlex Richardson     for (j = 0, size = string_align_freq[i].align; j < freq; j++)
10031914882SAlex Richardson       strlen_align_arr[n++] = size;
10131914882SAlex Richardson   assert (n == ALIGN_NUM);
10231914882SAlex Richardson }
10331914882SAlex Richardson 
10431914882SAlex Richardson static void
init_strlen_tests(void)10531914882SAlex Richardson init_strlen_tests (void)
10631914882SAlex Richardson {
10731914882SAlex Richardson   uint16_t index[MAX_ALIGN];
10831914882SAlex Richardson 
10931914882SAlex Richardson   memset (a, 'x', sizeof (a));
11031914882SAlex Richardson 
11131914882SAlex Richardson   /* Create indices for strings at all alignments.  */
11231914882SAlex Richardson   for (int i = 0; i < MAX_ALIGN; i++)
11331914882SAlex Richardson     {
11431914882SAlex Richardson       index[i] = i * (MAX_STRLEN + 1);
11531914882SAlex Richardson       a[index[i] + MAX_STRLEN] = 0;
11631914882SAlex Richardson     }
11731914882SAlex Richardson 
11831914882SAlex Richardson   /* Create a random set of strlen input strings using the string length
11931914882SAlex Richardson      and alignment distributions.  */
120d49ad206SAndrew Turner   for (int n = 0; n < NUM_TESTS; n++)
12131914882SAlex Richardson     {
12231914882SAlex Richardson       int align = strlen_align_arr[rand32 (0) & ALIGN_MASK];
12331914882SAlex Richardson       int exp_len = strlen_len_arr[rand32 (0) & SIZE_MASK];
12431914882SAlex Richardson 
12531914882SAlex Richardson       strlen_tests[n] =
12631914882SAlex Richardson 	index[(align + exp_len) & (MAX_ALIGN - 1)] + MAX_STRLEN - exp_len;
12731914882SAlex Richardson     }
12831914882SAlex Richardson }
12931914882SAlex Richardson 
13031914882SAlex Richardson static volatile size_t maskv = 0;
13131914882SAlex Richardson 
main(void)13231914882SAlex Richardson int main (void)
13331914882SAlex Richardson {
13431914882SAlex Richardson   rand32 (0x12345678);
13531914882SAlex Richardson   init_strlen_distribution ();
13631914882SAlex Richardson   init_strlen_tests ();
13731914882SAlex Richardson 
13831914882SAlex Richardson   printf ("\nRandom strlen (bytes/ns):\n");
13931914882SAlex Richardson   for (int f = 0; funtab[f].name != 0; f++)
14031914882SAlex Richardson     {
14131914882SAlex Richardson       size_t res = 0, strlen_size = 0, mask = maskv;
14231914882SAlex Richardson       printf ("%22s ", funtab[f].name);
14331914882SAlex Richardson 
144d49ad206SAndrew Turner       for (int c = 0; c < NUM_TESTS; c++)
14531914882SAlex Richardson 	strlen_size += funtab[f].fun (a + strlen_tests[c]);
14631914882SAlex Richardson       strlen_size *= ITERS;
14731914882SAlex Richardson 
14831914882SAlex Richardson       /* Measure latency of strlen result with (res & mask).  */
14931914882SAlex Richardson       uint64_t t = clock_get_ns ();
15031914882SAlex Richardson       for (int i = 0; i < ITERS; i++)
151d49ad206SAndrew Turner 	for (int c = 0; c < NUM_TESTS; c++)
15231914882SAlex Richardson 	  res = funtab[f].fun (a + strlen_tests[c] + (res & mask));
15331914882SAlex Richardson       t = clock_get_ns () - t;
15431914882SAlex Richardson       printf ("%.2f\n", (double)strlen_size / t);
15531914882SAlex Richardson     }
15631914882SAlex Richardson 
15731914882SAlex Richardson   printf ("\nSmall aligned strlen (bytes/ns):\n");
15831914882SAlex Richardson   for (int f = 0; funtab[f].name != 0; f++)
15931914882SAlex Richardson     {
16031914882SAlex Richardson       printf ("%22s ", funtab[f].name);
16131914882SAlex Richardson 
16231914882SAlex Richardson       for (int size = 1; size <= 64; size *= 2)
16331914882SAlex Richardson 	{
16431914882SAlex Richardson 	  memset (a, 'x', size);
16531914882SAlex Richardson 	  a[size - 1] = 0;
16631914882SAlex Richardson 
16731914882SAlex Richardson 	  uint64_t t = clock_get_ns ();
16831914882SAlex Richardson 	  for (int i = 0; i < ITERS2; i++)
16931914882SAlex Richardson 	    funtab[f].fun (a);
17031914882SAlex Richardson 	  t = clock_get_ns () - t;
17131914882SAlex Richardson 	  printf ("%d%c: %.2f ", size < 1024 ? size : size / 1024,
17231914882SAlex Richardson 		  size < 1024 ? 'B' : 'K', (double)size * ITERS2 / t);
17331914882SAlex Richardson 	}
17431914882SAlex Richardson       printf ("\n");
17531914882SAlex Richardson     }
17631914882SAlex Richardson 
17731914882SAlex Richardson   printf ("\nSmall unaligned strlen (bytes/ns):\n");
17831914882SAlex Richardson   for (int f = 0; funtab[f].name != 0; f++)
17931914882SAlex Richardson     {
18031914882SAlex Richardson       printf ("%22s ", funtab[f].name);
18131914882SAlex Richardson 
18231914882SAlex Richardson       int align = 9;
18331914882SAlex Richardson       for (int size = 1; size <= 64; size *= 2)
18431914882SAlex Richardson 	{
18531914882SAlex Richardson 	  memset (a + align, 'x', size);
18631914882SAlex Richardson 	  a[align + size - 1] = 0;
18731914882SAlex Richardson 
18831914882SAlex Richardson 	  uint64_t t = clock_get_ns ();
18931914882SAlex Richardson 	  for (int i = 0; i < ITERS2; i++)
19031914882SAlex Richardson 	    funtab[f].fun (a + align);
19131914882SAlex Richardson 	  t = clock_get_ns () - t;
19231914882SAlex Richardson 	  printf ("%d%c: %.2f ", size < 1024 ? size : size / 1024,
19331914882SAlex Richardson 		  size < 1024 ? 'B' : 'K', (double)size * ITERS2 / t);
19431914882SAlex Richardson 	}
19531914882SAlex Richardson       printf ("\n");
19631914882SAlex Richardson     }
19731914882SAlex Richardson 
19831914882SAlex Richardson   printf ("\nMedium strlen (bytes/ns):\n");
19931914882SAlex Richardson   for (int f = 0; funtab[f].name != 0; f++)
20031914882SAlex Richardson     {
20131914882SAlex Richardson       printf ("%22s ", funtab[f].name);
20231914882SAlex Richardson 
20331914882SAlex Richardson       for (int size = 128; size <= 4096; size *= 2)
20431914882SAlex Richardson 	{
20531914882SAlex Richardson 	  memset (a, 'x', size);
20631914882SAlex Richardson 	  a[size - 1] = 0;
20731914882SAlex Richardson 
20831914882SAlex Richardson 	  uint64_t t = clock_get_ns ();
20931914882SAlex Richardson 	  for (int i = 0; i < ITERS3; i++)
21031914882SAlex Richardson 	    funtab[f].fun (a);
21131914882SAlex Richardson 	  t = clock_get_ns () - t;
21231914882SAlex Richardson 	  printf ("%d%c: %.2f ", size < 1024 ? size : size / 1024,
21331914882SAlex Richardson 		  size < 1024 ? 'B' : 'K', (double)size * ITERS3 / t);
21431914882SAlex Richardson 	}
21531914882SAlex Richardson       printf ("\n");
21631914882SAlex Richardson     }
21731914882SAlex Richardson 
21831914882SAlex Richardson   printf ("\n");
21931914882SAlex Richardson 
22031914882SAlex Richardson   return 0;
22131914882SAlex Richardson }
222