131914882SAlex Richardson/* 231914882SAlex Richardson * strlen - calculate the length of a string. 331914882SAlex Richardson * 4*072a4ba8SAndrew Turner * Copyright (c) 2020-2022, Arm Limited. 5*072a4ba8SAndrew Turner * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 631914882SAlex Richardson */ 731914882SAlex Richardson 831914882SAlex Richardson/* Assumptions: 931914882SAlex Richardson * 1031914882SAlex Richardson * ARMv8-a, AArch64, Advanced SIMD. 1131914882SAlex Richardson * MTE compatible. 1231914882SAlex Richardson */ 1331914882SAlex Richardson 14*072a4ba8SAndrew Turner#include "asmdefs.h" 1531914882SAlex Richardson 1631914882SAlex Richardson#define srcin x0 1731914882SAlex Richardson#define result x0 1831914882SAlex Richardson 1931914882SAlex Richardson#define src x1 2031914882SAlex Richardson#define synd x2 2131914882SAlex Richardson#define tmp x3 2231914882SAlex Richardson#define shift x4 2331914882SAlex Richardson 2431914882SAlex Richardson#define data q0 2531914882SAlex Richardson#define vdata v0 2631914882SAlex Richardson#define vhas_nul v1 27*072a4ba8SAndrew Turner#define vend v2 28*072a4ba8SAndrew Turner#define dend d2 2931914882SAlex Richardson 3031914882SAlex Richardson/* Core algorithm: 31*072a4ba8SAndrew Turner Process the string in 16-byte aligned chunks. Compute a 64-bit mask with 32*072a4ba8SAndrew Turner four bits per byte using the shrn instruction. A count trailing zeros then 33*072a4ba8SAndrew Turner identifies the first zero byte. */ 3431914882SAlex Richardson 3531914882SAlex RichardsonENTRY (__strlen_aarch64_mte) 3631914882SAlex Richardson PTR_ARG (0) 3731914882SAlex Richardson bic src, srcin, 15 3831914882SAlex Richardson ld1 {vdata.16b}, [src] 3931914882SAlex Richardson cmeq vhas_nul.16b, vdata.16b, 0 4031914882SAlex Richardson lsl shift, srcin, 2 41*072a4ba8SAndrew Turner shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */ 4231914882SAlex Richardson fmov synd, dend 4331914882SAlex Richardson lsr synd, synd, shift 4431914882SAlex Richardson cbz synd, L(loop) 4531914882SAlex Richardson 4631914882SAlex Richardson rbit synd, synd 4731914882SAlex Richardson clz result, synd 4831914882SAlex Richardson lsr result, result, 2 4931914882SAlex Richardson ret 5031914882SAlex Richardson 5131914882SAlex Richardson .p2align 5 5231914882SAlex RichardsonL(loop): 53*072a4ba8SAndrew Turner ldr data, [src, 16] 54*072a4ba8SAndrew Turner cmeq vhas_nul.16b, vdata.16b, 0 55*072a4ba8SAndrew Turner umaxp vend.16b, vhas_nul.16b, vhas_nul.16b 56*072a4ba8SAndrew Turner fmov synd, dend 57*072a4ba8SAndrew Turner cbnz synd, L(loop_end) 58*072a4ba8SAndrew Turner ldr data, [src, 32]! 5931914882SAlex Richardson cmeq vhas_nul.16b, vdata.16b, 0 6031914882SAlex Richardson umaxp vend.16b, vhas_nul.16b, vhas_nul.16b 6131914882SAlex Richardson fmov synd, dend 6231914882SAlex Richardson cbz synd, L(loop) 63*072a4ba8SAndrew Turner sub src, src, 16 64*072a4ba8SAndrew TurnerL(loop_end): 65*072a4ba8SAndrew Turner shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */ 6631914882SAlex Richardson sub result, src, srcin 6731914882SAlex Richardson fmov synd, dend 6831914882SAlex Richardson#ifndef __AARCH64EB__ 6931914882SAlex Richardson rbit synd, synd 7031914882SAlex Richardson#endif 71*072a4ba8SAndrew Turner add result, result, 16 7231914882SAlex Richardson clz tmp, synd 7331914882SAlex Richardson add result, result, tmp, lsr 2 7431914882SAlex Richardson ret 7531914882SAlex Richardson 7631914882SAlex RichardsonEND (__strlen_aarch64_mte) 7731914882SAlex Richardson 78