1/* 2 * strlen - calculate the length of a string. 3 * 4 * Copyright (c) 2020-2022, Arm Limited. 5 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 */ 7 8/* Assumptions: 9 * 10 * ARMv8-a, AArch64, Advanced SIMD. 11 * MTE compatible. 12 */ 13 14#include "asmdefs.h" 15 16#define srcin x0 17#define result x0 18 19#define src x1 20#define synd x2 21#define tmp x3 22#define shift x4 23 24#define data q0 25#define vdata v0 26#define vhas_nul v1 27#define vend v2 28#define dend d2 29 30/* Core algorithm: 31 Process the string in 16-byte aligned chunks. Compute a 64-bit mask with 32 four bits per byte using the shrn instruction. A count trailing zeros then 33 identifies the first zero byte. */ 34 35ENTRY (__strlen_aarch64_mte) 36 bic src, srcin, 15 37 ld1 {vdata.16b}, [src] 38 cmeq vhas_nul.16b, vdata.16b, 0 39 lsl shift, srcin, 2 40 shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */ 41 fmov synd, dend 42 lsr synd, synd, shift 43 cbz synd, L(next16) 44 45 rbit synd, synd 46 clz result, synd 47 lsr result, result, 2 48 ret 49 50L(next16): 51 ldr data, [src, 16] 52 cmeq vhas_nul.16b, vdata.16b, 0 53 shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */ 54 fmov synd, dend 55 cbz synd, L(loop) 56 add src, src, 16 57#ifndef __AARCH64EB__ 58 rbit synd, synd 59#endif 60 sub result, src, srcin 61 clz tmp, synd 62 add result, result, tmp, lsr 2 63 ret 64 65 .p2align 5 66L(loop): 67 ldr data, [src, 32]! 68 cmeq vhas_nul.16b, vdata.16b, 0 69 addhn vend.8b, vhas_nul.8h, vhas_nul.8h 70 fmov synd, dend 71 cbnz synd, L(loop_end) 72 ldr data, [src, 16] 73 cmeq vhas_nul.16b, vdata.16b, 0 74 addhn vend.8b, vhas_nul.8h, vhas_nul.8h 75 fmov synd, dend 76 cbz synd, L(loop) 77 add src, src, 16 78L(loop_end): 79 sub result, shift, src, lsl 2 /* (srcin - src) << 2. */ 80#ifndef __AARCH64EB__ 81 rbit synd, synd 82 sub result, result, 3 83#endif 84 clz tmp, synd 85 sub result, tmp, result 86 lsr result, result, 2 87 ret 88 89END (__strlen_aarch64_mte) 90