1/* 2 * strchrnul - find a character or nul in a string 3 * 4 * Copyright (c) 2020-2022, Arm Limited. 5 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 */ 7 8/* Assumptions: 9 * 10 * ARMv8-a, AArch64, Advanced SIMD. 11 * MTE compatible. 12 */ 13 14#include "asmdefs.h" 15 16#define srcin x0 17#define chrin w1 18#define result x0 19 20#define src x2 21#define tmp1 x1 22#define tmp2 x3 23 24#define vrepchr v0 25#define vdata v1 26#define qdata q1 27#define vhas_nul v2 28#define vhas_chr v3 29#define vend v4 30#define dend d4 31 32/* 33 Core algorithm: 34 For each 16-byte chunk we calculate a 64-bit nibble mask value with four bits 35 per byte. We take 4 bits of every comparison byte with shift right and narrow 36 by 4 instruction. Since the bits in the nibble mask reflect the order in 37 which things occur in the original string, counting leading zeros identifies 38 exactly which byte matched. */ 39 40ENTRY (__strchrnul_aarch64_mte) 41 bic src, srcin, 15 42 dup vrepchr.16b, chrin 43 ld1 {vdata.16b}, [src] 44 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b 45 cmhs vhas_chr.16b, vhas_chr.16b, vdata.16b 46 lsl tmp2, srcin, 2 47 shrn vend.8b, vhas_chr.8h, 4 /* 128->64 */ 48 fmov tmp1, dend 49 lsr tmp1, tmp1, tmp2 /* Mask padding bits. */ 50 cbz tmp1, L(loop) 51 52 rbit tmp1, tmp1 53 clz tmp1, tmp1 54 add result, srcin, tmp1, lsr 2 55 ret 56 57 .p2align 4 58L(loop): 59 ldr qdata, [src, 16] 60 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b 61 cmhs vhas_chr.16b, vhas_chr.16b, vdata.16b 62 umaxp vend.16b, vhas_chr.16b, vhas_chr.16b 63 fmov tmp1, dend 64 cbnz tmp1, L(end) 65 ldr qdata, [src, 32]! 66 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b 67 cmhs vhas_chr.16b, vhas_chr.16b, vdata.16b 68 umaxp vend.16b, vhas_chr.16b, vhas_chr.16b 69 fmov tmp1, dend 70 cbz tmp1, L(loop) 71 sub src, src, 16 72L(end): 73 shrn vend.8b, vhas_chr.8h, 4 /* 128->64 */ 74 add src, src, 16 75 fmov tmp1, dend 76#ifndef __AARCH64EB__ 77 rbit tmp1, tmp1 78#endif 79 clz tmp1, tmp1 80 add result, src, tmp1, lsr 2 81 ret 82 83END (__strchrnul_aarch64_mte) 84 85