1/* 2 * strnlen - calculate the length of a string with limit. 3 * 4 * Copyright (c) 2020, Arm Limited. 5 * SPDX-License-Identifier: MIT 6 */ 7 8/* Assumptions: 9 * 10 * ARMv8-a, AArch64, Advanced SIMD. 11 * MTE compatible. 12 */ 13 14#include "../asmdefs.h" 15 16#define srcin x0 17#define cntin x1 18#define result x0 19 20#define src x2 21#define synd x3 22#define shift x4 23#define wtmp w4 24#define tmp x4 25#define cntrem x5 26 27#define qdata q0 28#define vdata v0 29#define vhas_chr v1 30#define vrepmask v2 31#define vend v3 32#define dend d3 33 34/* 35 Core algorithm: 36 37 For each 16-byte chunk we calculate a 64-bit syndrome value with four bits 38 per byte. For even bytes, bits 0-3 are set if the relevant byte matched the 39 requested character or the byte is NUL. Bits 4-7 must be zero. Bits 4-7 are 40 set likewise for odd bytes so that adjacent bytes can be merged. Since the 41 bits in the syndrome reflect the order in which things occur in the original 42 string, counting trailing zeros identifies exactly which byte matched. */ 43 44ENTRY (__strnlen_aarch64) 45 PTR_ARG (0) 46 SIZE_ARG (1) 47 bic src, srcin, 15 48 mov wtmp, 0xf00f 49 cbz cntin, L(nomatch) 50 ld1 {vdata.16b}, [src], 16 51 dup vrepmask.8h, wtmp 52 cmeq vhas_chr.16b, vdata.16b, 0 53 lsl shift, srcin, 2 54 and vhas_chr.16b, vhas_chr.16b, vrepmask.16b 55 addp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */ 56 fmov synd, dend 57 lsr synd, synd, shift 58 cbz synd, L(start_loop) 59L(finish): 60 rbit synd, synd 61 clz synd, synd 62 lsr result, synd, 2 63 cmp cntin, result 64 csel result, cntin, result, ls 65 ret 66 67L(start_loop): 68 sub tmp, src, srcin 69 subs cntrem, cntin, tmp 70 b.ls L(nomatch) 71 72 /* Make sure that it won't overread by a 16-byte chunk */ 73 add tmp, cntrem, 15 74 tbnz tmp, 4, L(loop32_2) 75 76 .p2align 5 77L(loop32): 78 ldr qdata, [src], 16 79 cmeq vhas_chr.16b, vdata.16b, 0 80 umaxp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */ 81 fmov synd, dend 82 cbnz synd, L(end) 83L(loop32_2): 84 ldr qdata, [src], 16 85 subs cntrem, cntrem, 32 86 cmeq vhas_chr.16b, vdata.16b, 0 87 b.ls L(end) 88 umaxp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */ 89 fmov synd, dend 90 cbz synd, L(loop32) 91 92L(end): 93 and vhas_chr.16b, vhas_chr.16b, vrepmask.16b 94 addp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */ 95 sub src, src, 16 96 mov synd, vend.d[0] 97 sub result, src, srcin 98#ifndef __AARCH64EB__ 99 rbit synd, synd 100#endif 101 clz synd, synd 102 add result, result, synd, lsr 2 103 cmp cntin, result 104 csel result, cntin, result, ls 105 ret 106 107L(nomatch): 108 mov result, cntin 109 ret 110 111END (__strnlen_aarch64) 112 113