1/* 2 * strchr - find a character in a string 3 * 4 * Copyright (c) 2020-2022, Arm Limited. 5 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 */ 7 8/* Assumptions: 9 * 10 * ARMv8-a, AArch64, Advanced SIMD. 11 * MTE compatible. 12 */ 13 14#include "asmdefs.h" 15 16#define srcin x0 17#define chrin w1 18#define result x0 19 20#define src x2 21#define tmp1 x1 22#define tmp2 x3 23 24#define vrepchr v0 25#define vdata v1 26#define qdata q1 27#define vhas_nul v2 28#define vhas_chr v3 29#define vrepmask v4 30#define vend v5 31#define dend d5 32 33/* Core algorithm. 34 35 For each 16-byte chunk we calculate a 64-bit syndrome value with four bits 36 per byte. Bits 0-1 are set if the relevant byte matched the requested 37 character, bits 2-3 are set if the byte is NUL or matched. Count trailing 38 zeroes gives the position of the matching byte if it is a multiple of 4. 39 If it is not a multiple of 4, there was no match. */ 40 41ENTRY (__strchr_aarch64_mte) 42 bic src, srcin, 15 43 dup vrepchr.16b, chrin 44 ld1 {vdata.16b}, [src] 45 movi vrepmask.16b, 0x33 46 cmeq vhas_nul.16b, vdata.16b, 0 47 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b 48 bit vhas_nul.16b, vhas_chr.16b, vrepmask.16b 49 lsl tmp2, srcin, 2 50 shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */ 51 fmov tmp1, dend 52 lsr tmp1, tmp1, tmp2 53 cbz tmp1, L(loop) 54 55 rbit tmp1, tmp1 56 clz tmp1, tmp1 57 /* Tmp1 is an even multiple of 2 if the target character was 58 found first. Otherwise we've found the end of string. */ 59 tst tmp1, 2 60 add result, srcin, tmp1, lsr 2 61 csel result, result, xzr, eq 62 ret 63 64 .p2align 4 65L(loop): 66 ldr qdata, [src, 16] 67 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b 68 cmhs vhas_nul.16b, vhas_chr.16b, vdata.16b 69 umaxp vend.16b, vhas_nul.16b, vhas_nul.16b 70 fmov tmp1, dend 71 cbnz tmp1, L(end) 72 ldr qdata, [src, 32]! 73 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b 74 cmhs vhas_nul.16b, vhas_chr.16b, vdata.16b 75 umaxp vend.16b, vhas_nul.16b, vhas_nul.16b 76 fmov tmp1, dend 77 cbz tmp1, L(loop) 78 sub src, src, 16 79L(end): 80 81#ifdef __AARCH64EB__ 82 bif vhas_nul.16b, vhas_chr.16b, vrepmask.16b 83 shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */ 84 fmov tmp1, dend 85#else 86 bit vhas_nul.16b, vhas_chr.16b, vrepmask.16b 87 shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */ 88 fmov tmp1, dend 89 rbit tmp1, tmp1 90#endif 91 add src, src, 16 92 clz tmp1, tmp1 93 /* Tmp1 is a multiple of 4 if the target character was found. */ 94 tst tmp1, 2 95 add result, src, tmp1, lsr 2 96 csel result, result, xzr, eq 97 ret 98 99END (__strchr_aarch64_mte) 100 101