1/* 2 * strlen - calculate the length of a string. 3 * 4 * Copyright (c) 2020-2022, Arm Limited. 5 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 */ 7 8/* Assumptions: 9 * 10 * ARMv8-a, AArch64, Advanced SIMD. 11 * MTE compatible. 12 */ 13 14#include "asmdefs.h" 15 16#define srcin x0 17#define result x0 18 19#define src x1 20#define synd x2 21#define tmp x3 22#define shift x4 23 24#define data q0 25#define vdata v0 26#define vhas_nul v1 27#define vend v2 28#define dend d2 29 30/* Core algorithm: 31 Process the string in 16-byte aligned chunks. Compute a 64-bit mask with 32 four bits per byte using the shrn instruction. A count trailing zeros then 33 identifies the first zero byte. */ 34 35ENTRY (__strlen_aarch64_mte) 36 PTR_ARG (0) 37 bic src, srcin, 15 38 ld1 {vdata.16b}, [src] 39 cmeq vhas_nul.16b, vdata.16b, 0 40 lsl shift, srcin, 2 41 shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */ 42 fmov synd, dend 43 lsr synd, synd, shift 44 cbz synd, L(loop) 45 46 rbit synd, synd 47 clz result, synd 48 lsr result, result, 2 49 ret 50 51 .p2align 5 52L(loop): 53 ldr data, [src, 16] 54 cmeq vhas_nul.16b, vdata.16b, 0 55 umaxp vend.16b, vhas_nul.16b, vhas_nul.16b 56 fmov synd, dend 57 cbnz synd, L(loop_end) 58 ldr data, [src, 32]! 59 cmeq vhas_nul.16b, vdata.16b, 0 60 umaxp vend.16b, vhas_nul.16b, vhas_nul.16b 61 fmov synd, dend 62 cbz synd, L(loop) 63 sub src, src, 16 64L(loop_end): 65 shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */ 66 sub result, src, srcin 67 fmov synd, dend 68#ifndef __AARCH64EB__ 69 rbit synd, synd 70#endif 71 add result, result, 16 72 clz tmp, synd 73 add result, result, tmp, lsr 2 74 ret 75 76END (__strlen_aarch64_mte) 77 78