1/* 2 * strchrnul - find a character or nul in a string 3 * 4 * Copyright (c) 2020, Arm Limited. 5 * SPDX-License-Identifier: MIT 6 */ 7 8/* Assumptions: 9 * 10 * ARMv8-a, AArch64, Advanced SIMD. 11 * MTE compatible. 12 */ 13 14#include "../asmdefs.h" 15 16#define srcin x0 17#define chrin w1 18#define result x0 19 20#define src x2 21#define tmp1 x1 22#define tmp2 x3 23#define tmp2w w3 24 25#define vrepchr v0 26#define vdata v1 27#define qdata q1 28#define vhas_nul v2 29#define vhas_chr v3 30#define vrepmask v4 31#define vend v5 32#define dend d5 33 34/* Core algorithm: 35 36 For each 16-byte chunk we calculate a 64-bit syndrome value with four bits 37 per byte. For even bytes, bits 0-3 are set if the relevant byte matched the 38 requested character or the byte is NUL. Bits 4-7 must be zero. Bits 4-7 are 39 set likewise for odd bytes so that adjacent bytes can be merged. Since the 40 bits in the syndrome reflect the order in which things occur in the original 41 string, counting trailing zeros identifies exactly which byte matched. */ 42 43ENTRY (__strchrnul_aarch64_mte) 44 PTR_ARG (0) 45 bic src, srcin, 15 46 dup vrepchr.16b, chrin 47 ld1 {vdata.16b}, [src] 48 mov tmp2w, 0xf00f 49 dup vrepmask.8h, tmp2w 50 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b 51 cmhs vhas_chr.16b, vhas_chr.16b, vdata.16b 52 lsl tmp2, srcin, 2 53 and vhas_chr.16b, vhas_chr.16b, vrepmask.16b 54 addp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */ 55 fmov tmp1, dend 56 lsr tmp1, tmp1, tmp2 /* Mask padding bits. */ 57 cbz tmp1, L(loop) 58 59 rbit tmp1, tmp1 60 clz tmp1, tmp1 61 add result, srcin, tmp1, lsr 2 62 ret 63 64 .p2align 4 65L(loop): 66 ldr qdata, [src, 16]! 67 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b 68 cmhs vhas_chr.16b, vhas_chr.16b, vdata.16b 69 umaxp vend.16b, vhas_chr.16b, vhas_chr.16b 70 fmov tmp1, dend 71 cbz tmp1, L(loop) 72 73 and vhas_chr.16b, vhas_chr.16b, vrepmask.16b 74 addp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */ 75 fmov tmp1, dend 76#ifndef __AARCH64EB__ 77 rbit tmp1, tmp1 78#endif 79 clz tmp1, tmp1 80 add result, src, tmp1, lsr 2 81 ret 82 83END (__strchrnul_aarch64_mte) 84 85