131914882SAlex Richardson /*
231914882SAlex Richardson * strlen benchmark.
331914882SAlex Richardson *
4d49ad206SAndrew Turner * Copyright (c) 2020-2021, Arm Limited.
5*072a4ba8SAndrew Turner * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
631914882SAlex Richardson */
731914882SAlex Richardson
831914882SAlex Richardson #define _GNU_SOURCE
931914882SAlex Richardson #include <stdint.h>
1031914882SAlex Richardson #include <stdio.h>
1131914882SAlex Richardson #include <string.h>
1231914882SAlex Richardson #include <assert.h>
1331914882SAlex Richardson #include "stringlib.h"
1431914882SAlex Richardson #include "benchlib.h"
1531914882SAlex Richardson
16d49ad206SAndrew Turner #define ITERS 5000
1731914882SAlex Richardson #define ITERS2 20000000
1831914882SAlex Richardson #define ITERS3 2000000
19d49ad206SAndrew Turner #define NUM_TESTS 16384
2031914882SAlex Richardson
2131914882SAlex Richardson #define MAX_ALIGN 32
2231914882SAlex Richardson #define MAX_STRLEN 256
2331914882SAlex Richardson
2431914882SAlex Richardson static char a[(MAX_STRLEN + 1) * MAX_ALIGN] __attribute__((__aligned__(4096)));
2531914882SAlex Richardson
2631914882SAlex Richardson #define F(x, mte) {#x, x, mte},
2731914882SAlex Richardson
2831914882SAlex Richardson static const struct fun
2931914882SAlex Richardson {
3031914882SAlex Richardson const char *name;
3131914882SAlex Richardson size_t (*fun) (const char *s);
3231914882SAlex Richardson int test_mte;
3331914882SAlex Richardson } funtab[] = {
3431914882SAlex Richardson // clang-format off
3531914882SAlex Richardson F(strlen, 0)
3631914882SAlex Richardson #if __aarch64__
3731914882SAlex Richardson F(__strlen_aarch64, 0)
3831914882SAlex Richardson F(__strlen_aarch64_mte, 1)
3931914882SAlex Richardson # if __ARM_FEATURE_SVE
4031914882SAlex Richardson F(__strlen_aarch64_sve, 1)
4131914882SAlex Richardson # endif
4231914882SAlex Richardson #elif __arm__
4331914882SAlex Richardson # if __ARM_ARCH >= 6 && __ARM_ARCH_ISA_THUMB == 2
4431914882SAlex Richardson F(__strlen_armv6t2, 0)
4531914882SAlex Richardson # endif
4631914882SAlex Richardson #endif
4731914882SAlex Richardson {0, 0, 0}
4831914882SAlex Richardson // clang-format on
4931914882SAlex Richardson };
5031914882SAlex Richardson #undef F
5131914882SAlex Richardson
52d49ad206SAndrew Turner static uint16_t strlen_tests[NUM_TESTS];
5331914882SAlex Richardson
5431914882SAlex Richardson typedef struct { uint16_t size; uint16_t freq; } freq_data_t;
5531914882SAlex Richardson typedef struct { uint8_t align; uint16_t freq; } align_data_t;
5631914882SAlex Richardson
5731914882SAlex Richardson #define SIZE_NUM 65536
5831914882SAlex Richardson #define SIZE_MASK (SIZE_NUM - 1)
5931914882SAlex Richardson static uint8_t strlen_len_arr[SIZE_NUM];
6031914882SAlex Richardson
6131914882SAlex Richardson /* Frequency data for strlen sizes up to 128 based on SPEC2017. */
6231914882SAlex Richardson static freq_data_t strlen_len_freq[] =
6331914882SAlex Richardson {
6431914882SAlex Richardson { 12,22671}, { 18,12834}, { 13, 9555}, { 6, 6348}, { 17, 6095}, { 11, 2115},
6531914882SAlex Richardson { 10, 1335}, { 7, 814}, { 2, 646}, { 9, 483}, { 8, 471}, { 16, 418},
6631914882SAlex Richardson { 4, 390}, { 1, 388}, { 5, 233}, { 3, 204}, { 0, 79}, { 14, 79},
6731914882SAlex Richardson { 15, 69}, { 26, 36}, { 22, 35}, { 31, 24}, { 32, 24}, { 19, 21},
6831914882SAlex Richardson { 25, 17}, { 28, 15}, { 21, 14}, { 33, 14}, { 20, 13}, { 24, 9},
6931914882SAlex Richardson { 29, 9}, { 30, 9}, { 23, 7}, { 34, 7}, { 27, 6}, { 44, 5},
7031914882SAlex Richardson { 42, 4}, { 45, 3}, { 47, 3}, { 40, 2}, { 41, 2}, { 43, 2},
7131914882SAlex Richardson { 58, 2}, { 78, 2}, { 36, 2}, { 48, 1}, { 52, 1}, { 60, 1},
7231914882SAlex Richardson { 64, 1}, { 56, 1}, { 76, 1}, { 68, 1}, { 80, 1}, { 84, 1},
7331914882SAlex Richardson { 72, 1}, { 86, 1}, { 35, 1}, { 39, 1}, { 50, 1}, { 38, 1},
7431914882SAlex Richardson { 37, 1}, { 46, 1}, { 98, 1}, {102, 1}, {128, 1}, { 51, 1},
7531914882SAlex Richardson {107, 1}, { 0, 0}
7631914882SAlex Richardson };
7731914882SAlex Richardson
7831914882SAlex Richardson #define ALIGN_NUM 1024
7931914882SAlex Richardson #define ALIGN_MASK (ALIGN_NUM - 1)
8031914882SAlex Richardson static uint8_t strlen_align_arr[ALIGN_NUM];
8131914882SAlex Richardson
8231914882SAlex Richardson /* Alignment data for strlen based on SPEC2017. */
8331914882SAlex Richardson static align_data_t string_align_freq[] =
8431914882SAlex Richardson {
8531914882SAlex Richardson {8, 470}, {32, 427}, {16, 99}, {1, 19}, {2, 6}, {4, 3}, {0, 0}
8631914882SAlex Richardson };
8731914882SAlex Richardson
8831914882SAlex Richardson static void
init_strlen_distribution(void)8931914882SAlex Richardson init_strlen_distribution (void)
9031914882SAlex Richardson {
9131914882SAlex Richardson int i, j, freq, size, n;
9231914882SAlex Richardson
9331914882SAlex Richardson for (n = i = 0; (freq = strlen_len_freq[i].freq) != 0; i++)
9431914882SAlex Richardson for (j = 0, size = strlen_len_freq[i].size; j < freq; j++)
9531914882SAlex Richardson strlen_len_arr[n++] = size;
9631914882SAlex Richardson assert (n == SIZE_NUM);
9731914882SAlex Richardson
9831914882SAlex Richardson for (n = i = 0; (freq = string_align_freq[i].freq) != 0; i++)
9931914882SAlex Richardson for (j = 0, size = string_align_freq[i].align; j < freq; j++)
10031914882SAlex Richardson strlen_align_arr[n++] = size;
10131914882SAlex Richardson assert (n == ALIGN_NUM);
10231914882SAlex Richardson }
10331914882SAlex Richardson
10431914882SAlex Richardson static void
init_strlen_tests(void)10531914882SAlex Richardson init_strlen_tests (void)
10631914882SAlex Richardson {
10731914882SAlex Richardson uint16_t index[MAX_ALIGN];
10831914882SAlex Richardson
10931914882SAlex Richardson memset (a, 'x', sizeof (a));
11031914882SAlex Richardson
11131914882SAlex Richardson /* Create indices for strings at all alignments. */
11231914882SAlex Richardson for (int i = 0; i < MAX_ALIGN; i++)
11331914882SAlex Richardson {
11431914882SAlex Richardson index[i] = i * (MAX_STRLEN + 1);
11531914882SAlex Richardson a[index[i] + MAX_STRLEN] = 0;
11631914882SAlex Richardson }
11731914882SAlex Richardson
11831914882SAlex Richardson /* Create a random set of strlen input strings using the string length
11931914882SAlex Richardson and alignment distributions. */
120d49ad206SAndrew Turner for (int n = 0; n < NUM_TESTS; n++)
12131914882SAlex Richardson {
12231914882SAlex Richardson int align = strlen_align_arr[rand32 (0) & ALIGN_MASK];
12331914882SAlex Richardson int exp_len = strlen_len_arr[rand32 (0) & SIZE_MASK];
12431914882SAlex Richardson
12531914882SAlex Richardson strlen_tests[n] =
12631914882SAlex Richardson index[(align + exp_len) & (MAX_ALIGN - 1)] + MAX_STRLEN - exp_len;
12731914882SAlex Richardson }
12831914882SAlex Richardson }
12931914882SAlex Richardson
13031914882SAlex Richardson static volatile size_t maskv = 0;
13131914882SAlex Richardson
main(void)13231914882SAlex Richardson int main (void)
13331914882SAlex Richardson {
13431914882SAlex Richardson rand32 (0x12345678);
13531914882SAlex Richardson init_strlen_distribution ();
13631914882SAlex Richardson init_strlen_tests ();
13731914882SAlex Richardson
13831914882SAlex Richardson printf ("\nRandom strlen (bytes/ns):\n");
13931914882SAlex Richardson for (int f = 0; funtab[f].name != 0; f++)
14031914882SAlex Richardson {
14131914882SAlex Richardson size_t res = 0, strlen_size = 0, mask = maskv;
14231914882SAlex Richardson printf ("%22s ", funtab[f].name);
14331914882SAlex Richardson
144d49ad206SAndrew Turner for (int c = 0; c < NUM_TESTS; c++)
14531914882SAlex Richardson strlen_size += funtab[f].fun (a + strlen_tests[c]);
14631914882SAlex Richardson strlen_size *= ITERS;
14731914882SAlex Richardson
14831914882SAlex Richardson /* Measure latency of strlen result with (res & mask). */
14931914882SAlex Richardson uint64_t t = clock_get_ns ();
15031914882SAlex Richardson for (int i = 0; i < ITERS; i++)
151d49ad206SAndrew Turner for (int c = 0; c < NUM_TESTS; c++)
15231914882SAlex Richardson res = funtab[f].fun (a + strlen_tests[c] + (res & mask));
15331914882SAlex Richardson t = clock_get_ns () - t;
15431914882SAlex Richardson printf ("%.2f\n", (double)strlen_size / t);
15531914882SAlex Richardson }
15631914882SAlex Richardson
15731914882SAlex Richardson printf ("\nSmall aligned strlen (bytes/ns):\n");
15831914882SAlex Richardson for (int f = 0; funtab[f].name != 0; f++)
15931914882SAlex Richardson {
16031914882SAlex Richardson printf ("%22s ", funtab[f].name);
16131914882SAlex Richardson
16231914882SAlex Richardson for (int size = 1; size <= 64; size *= 2)
16331914882SAlex Richardson {
16431914882SAlex Richardson memset (a, 'x', size);
16531914882SAlex Richardson a[size - 1] = 0;
16631914882SAlex Richardson
16731914882SAlex Richardson uint64_t t = clock_get_ns ();
16831914882SAlex Richardson for (int i = 0; i < ITERS2; i++)
16931914882SAlex Richardson funtab[f].fun (a);
17031914882SAlex Richardson t = clock_get_ns () - t;
17131914882SAlex Richardson printf ("%d%c: %.2f ", size < 1024 ? size : size / 1024,
17231914882SAlex Richardson size < 1024 ? 'B' : 'K', (double)size * ITERS2 / t);
17331914882SAlex Richardson }
17431914882SAlex Richardson printf ("\n");
17531914882SAlex Richardson }
17631914882SAlex Richardson
17731914882SAlex Richardson printf ("\nSmall unaligned strlen (bytes/ns):\n");
17831914882SAlex Richardson for (int f = 0; funtab[f].name != 0; f++)
17931914882SAlex Richardson {
18031914882SAlex Richardson printf ("%22s ", funtab[f].name);
18131914882SAlex Richardson
18231914882SAlex Richardson int align = 9;
18331914882SAlex Richardson for (int size = 1; size <= 64; size *= 2)
18431914882SAlex Richardson {
18531914882SAlex Richardson memset (a + align, 'x', size);
18631914882SAlex Richardson a[align + size - 1] = 0;
18731914882SAlex Richardson
18831914882SAlex Richardson uint64_t t = clock_get_ns ();
18931914882SAlex Richardson for (int i = 0; i < ITERS2; i++)
19031914882SAlex Richardson funtab[f].fun (a + align);
19131914882SAlex Richardson t = clock_get_ns () - t;
19231914882SAlex Richardson printf ("%d%c: %.2f ", size < 1024 ? size : size / 1024,
19331914882SAlex Richardson size < 1024 ? 'B' : 'K', (double)size * ITERS2 / t);
19431914882SAlex Richardson }
19531914882SAlex Richardson printf ("\n");
19631914882SAlex Richardson }
19731914882SAlex Richardson
19831914882SAlex Richardson printf ("\nMedium strlen (bytes/ns):\n");
19931914882SAlex Richardson for (int f = 0; funtab[f].name != 0; f++)
20031914882SAlex Richardson {
20131914882SAlex Richardson printf ("%22s ", funtab[f].name);
20231914882SAlex Richardson
20331914882SAlex Richardson for (int size = 128; size <= 4096; size *= 2)
20431914882SAlex Richardson {
20531914882SAlex Richardson memset (a, 'x', size);
20631914882SAlex Richardson a[size - 1] = 0;
20731914882SAlex Richardson
20831914882SAlex Richardson uint64_t t = clock_get_ns ();
20931914882SAlex Richardson for (int i = 0; i < ITERS3; i++)
21031914882SAlex Richardson funtab[f].fun (a);
21131914882SAlex Richardson t = clock_get_ns () - t;
21231914882SAlex Richardson printf ("%d%c: %.2f ", size < 1024 ? size : size / 1024,
21331914882SAlex Richardson size < 1024 ? 'B' : 'K', (double)size * ITERS3 / t);
21431914882SAlex Richardson }
21531914882SAlex Richardson printf ("\n");
21631914882SAlex Richardson }
21731914882SAlex Richardson
21831914882SAlex Richardson printf ("\n");
21931914882SAlex Richardson
22031914882SAlex Richardson return 0;
22131914882SAlex Richardson }
222