131914882SAlex Richardson/* 231914882SAlex Richardson * strchr - find a character in a string 331914882SAlex Richardson * 4*072a4ba8SAndrew Turner * Copyright (c) 2014-2022, Arm Limited. 5*072a4ba8SAndrew Turner * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 631914882SAlex Richardson */ 731914882SAlex Richardson 831914882SAlex Richardson/* Assumptions: 931914882SAlex Richardson * 1031914882SAlex Richardson * ARMv8-a, AArch64 1131914882SAlex Richardson * Neon Available. 1231914882SAlex Richardson */ 1331914882SAlex Richardson 14*072a4ba8SAndrew Turner#include "asmdefs.h" 1531914882SAlex Richardson 1631914882SAlex Richardson/* Arguments and results. */ 1731914882SAlex Richardson#define srcin x0 1831914882SAlex Richardson#define chrin w1 1931914882SAlex Richardson 2031914882SAlex Richardson#define result x0 2131914882SAlex Richardson 2231914882SAlex Richardson#define src x2 2331914882SAlex Richardson#define tmp1 x3 2431914882SAlex Richardson#define wtmp2 w4 2531914882SAlex Richardson#define tmp3 x5 2631914882SAlex Richardson 2731914882SAlex Richardson#define vrepchr v0 2831914882SAlex Richardson#define vdata1 v1 2931914882SAlex Richardson#define vdata2 v2 3031914882SAlex Richardson#define vhas_nul1 v3 3131914882SAlex Richardson#define vhas_nul2 v4 3231914882SAlex Richardson#define vhas_chr1 v5 3331914882SAlex Richardson#define vhas_chr2 v6 3431914882SAlex Richardson#define vrepmask_0 v7 3531914882SAlex Richardson#define vrepmask_c v16 3631914882SAlex Richardson#define vend1 v17 3731914882SAlex Richardson#define vend2 v18 3831914882SAlex Richardson 3931914882SAlex Richardson/* Core algorithm. 4031914882SAlex Richardson 4131914882SAlex Richardson For each 32-byte hunk we calculate a 64-bit syndrome value, with 4231914882SAlex Richardson two bits per byte (LSB is always in bits 0 and 1, for both big 4331914882SAlex Richardson and little-endian systems). For each tuple, bit 0 is set iff 4431914882SAlex Richardson the relevant byte matched the requested character; bit 1 is set 4531914882SAlex Richardson iff the relevant byte matched the NUL end of string (we trigger 4631914882SAlex Richardson off bit0 for the special case of looking for NUL). Since the bits 4731914882SAlex Richardson in the syndrome reflect exactly the order in which things occur 4831914882SAlex Richardson in the original string a count_trailing_zeros() operation will 4931914882SAlex Richardson identify exactly which byte is causing the termination, and why. */ 5031914882SAlex Richardson 5131914882SAlex Richardson/* Locals and temporaries. */ 5231914882SAlex Richardson 5331914882SAlex RichardsonENTRY (__strchr_aarch64) 5431914882SAlex Richardson /* Magic constant 0xc0300c03 to allow us to identify which lane 5531914882SAlex Richardson matches the requested byte. Even bits are set if the character 5631914882SAlex Richardson matches, odd bits if either the char is NUL or matches. */ 5731914882SAlex Richardson mov wtmp2, 0x0c03 5831914882SAlex Richardson movk wtmp2, 0xc030, lsl 16 5931914882SAlex Richardson dup vrepchr.16b, chrin 6031914882SAlex Richardson bic src, srcin, #31 /* Work with aligned 32-byte hunks. */ 6131914882SAlex Richardson dup vrepmask_c.4s, wtmp2 6231914882SAlex Richardson ands tmp1, srcin, #31 6331914882SAlex Richardson add vrepmask_0.4s, vrepmask_c.4s, vrepmask_c.4s /* equiv: lsl #1 */ 6431914882SAlex Richardson b.eq L(loop) 6531914882SAlex Richardson 6631914882SAlex Richardson /* Input string is not 32-byte aligned. Rather than forcing 6731914882SAlex Richardson the padding bytes to a safe value, we calculate the syndrome 6831914882SAlex Richardson for all the bytes, but then mask off those bits of the 6931914882SAlex Richardson syndrome that are related to the padding. */ 7031914882SAlex Richardson ld1 {vdata1.16b, vdata2.16b}, [src], #32 7131914882SAlex Richardson neg tmp1, tmp1 7231914882SAlex Richardson cmeq vhas_nul1.16b, vdata1.16b, #0 7331914882SAlex Richardson cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b 7431914882SAlex Richardson cmeq vhas_nul2.16b, vdata2.16b, #0 7531914882SAlex Richardson cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b 7631914882SAlex Richardson bif vhas_nul1.16b, vhas_chr1.16b, vrepmask_0.16b 7731914882SAlex Richardson bif vhas_nul2.16b, vhas_chr2.16b, vrepmask_0.16b 7831914882SAlex Richardson and vend1.16b, vhas_nul1.16b, vrepmask_c.16b 7931914882SAlex Richardson and vend2.16b, vhas_nul2.16b, vrepmask_c.16b 8031914882SAlex Richardson lsl tmp1, tmp1, #1 8131914882SAlex Richardson addp vend1.16b, vend1.16b, vend2.16b // 256->128 8231914882SAlex Richardson mov tmp3, #~0 8331914882SAlex Richardson addp vend1.16b, vend1.16b, vend2.16b // 128->64 8431914882SAlex Richardson lsr tmp1, tmp3, tmp1 8531914882SAlex Richardson 8631914882SAlex Richardson mov tmp3, vend1.d[0] 8731914882SAlex Richardson bic tmp1, tmp3, tmp1 // Mask padding bits. 8831914882SAlex Richardson cbnz tmp1, L(tail) 8931914882SAlex Richardson 9031914882SAlex Richardson .p2align 4 9131914882SAlex RichardsonL(loop): 9231914882SAlex Richardson ld1 {vdata1.16b, vdata2.16b}, [src], #32 9331914882SAlex Richardson cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b 9431914882SAlex Richardson cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b 9531914882SAlex Richardson cmhs vhas_nul1.16b, vhas_chr1.16b, vdata1.16b 9631914882SAlex Richardson cmhs vhas_nul2.16b, vhas_chr2.16b, vdata2.16b 9731914882SAlex Richardson orr vend1.16b, vhas_nul1.16b, vhas_nul2.16b 9831914882SAlex Richardson umaxp vend1.16b, vend1.16b, vend1.16b 9931914882SAlex Richardson mov tmp1, vend1.d[0] 10031914882SAlex Richardson cbz tmp1, L(loop) 10131914882SAlex Richardson 10231914882SAlex Richardson /* Termination condition found. Now need to establish exactly why 10331914882SAlex Richardson we terminated. */ 10431914882SAlex Richardson bif vhas_nul1.16b, vhas_chr1.16b, vrepmask_0.16b 10531914882SAlex Richardson bif vhas_nul2.16b, vhas_chr2.16b, vrepmask_0.16b 10631914882SAlex Richardson and vend1.16b, vhas_nul1.16b, vrepmask_c.16b 10731914882SAlex Richardson and vend2.16b, vhas_nul2.16b, vrepmask_c.16b 10831914882SAlex Richardson addp vend1.16b, vend1.16b, vend2.16b // 256->128 10931914882SAlex Richardson addp vend1.16b, vend1.16b, vend2.16b // 128->64 11031914882SAlex Richardson mov tmp1, vend1.d[0] 11131914882SAlex RichardsonL(tail): 11231914882SAlex Richardson /* Count the trailing zeros, by bit reversing... */ 11331914882SAlex Richardson rbit tmp1, tmp1 11431914882SAlex Richardson /* Re-bias source. */ 11531914882SAlex Richardson sub src, src, #32 11631914882SAlex Richardson clz tmp1, tmp1 /* And counting the leading zeros. */ 11731914882SAlex Richardson /* Tmp1 is even if the target charager was found first. Otherwise 11831914882SAlex Richardson we've found the end of string and we weren't looking for NUL. */ 11931914882SAlex Richardson tst tmp1, #1 12031914882SAlex Richardson add result, src, tmp1, lsr #1 12131914882SAlex Richardson csel result, result, xzr, eq 12231914882SAlex Richardson ret 12331914882SAlex Richardson 12431914882SAlex RichardsonEND (__strchr_aarch64) 12531914882SAlex Richardson 126