131914882SAlex Richardson/* 231914882SAlex Richardson * strrchr - find last position of a character in a string. 331914882SAlex Richardson * 4*072a4ba8SAndrew Turner * Copyright (c) 2014-2022, Arm Limited. 5*072a4ba8SAndrew Turner * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 631914882SAlex Richardson */ 731914882SAlex Richardson 831914882SAlex Richardson/* Assumptions: 931914882SAlex Richardson * 1031914882SAlex Richardson * ARMv8-a, AArch64 1131914882SAlex Richardson * Neon Available. 1231914882SAlex Richardson */ 1331914882SAlex Richardson 14*072a4ba8SAndrew Turner#include "asmdefs.h" 1531914882SAlex Richardson 1631914882SAlex Richardson/* Arguments and results. */ 1731914882SAlex Richardson#define srcin x0 1831914882SAlex Richardson#define chrin w1 1931914882SAlex Richardson 2031914882SAlex Richardson#define result x0 2131914882SAlex Richardson 2231914882SAlex Richardson#define src x2 2331914882SAlex Richardson#define tmp1 x3 2431914882SAlex Richardson#define wtmp2 w4 2531914882SAlex Richardson#define tmp3 x5 2631914882SAlex Richardson#define src_match x6 2731914882SAlex Richardson#define src_offset x7 2831914882SAlex Richardson#define const_m1 x8 2931914882SAlex Richardson#define tmp4 x9 3031914882SAlex Richardson#define nul_match x10 3131914882SAlex Richardson#define chr_match x11 3231914882SAlex Richardson 3331914882SAlex Richardson#define vrepchr v0 3431914882SAlex Richardson#define vdata1 v1 3531914882SAlex Richardson#define vdata2 v2 3631914882SAlex Richardson#define vhas_nul1 v3 3731914882SAlex Richardson#define vhas_nul2 v4 3831914882SAlex Richardson#define vhas_chr1 v5 3931914882SAlex Richardson#define vhas_chr2 v6 4031914882SAlex Richardson#define vrepmask_0 v7 4131914882SAlex Richardson#define vrepmask_c v16 4231914882SAlex Richardson#define vend1 v17 4331914882SAlex Richardson#define vend2 v18 4431914882SAlex Richardson 4531914882SAlex Richardson/* Core algorithm. 4631914882SAlex Richardson 4731914882SAlex Richardson For each 32-byte hunk we calculate a 64-bit syndrome value, with 4831914882SAlex Richardson two bits per byte (LSB is always in bits 0 and 1, for both big 4931914882SAlex Richardson and little-endian systems). For each tuple, bit 0 is set iff 5031914882SAlex Richardson the relevant byte matched the requested character; bit 1 is set 5131914882SAlex Richardson iff the relevant byte matched the NUL end of string (we trigger 5231914882SAlex Richardson off bit0 for the special case of looking for NUL). Since the bits 5331914882SAlex Richardson in the syndrome reflect exactly the order in which things occur 5431914882SAlex Richardson in the original string a count_trailing_zeros() operation will 5531914882SAlex Richardson identify exactly which byte is causing the termination, and why. */ 5631914882SAlex Richardson 5731914882SAlex RichardsonENTRY (__strrchr_aarch64) 5831914882SAlex Richardson PTR_ARG (0) 5931914882SAlex Richardson /* Magic constant 0x40100401 to allow us to identify which lane 6031914882SAlex Richardson matches the requested byte. Magic constant 0x80200802 used 6131914882SAlex Richardson similarly for NUL termination. */ 6231914882SAlex Richardson mov wtmp2, #0x0401 6331914882SAlex Richardson movk wtmp2, #0x4010, lsl #16 6431914882SAlex Richardson dup vrepchr.16b, chrin 6531914882SAlex Richardson bic src, srcin, #31 /* Work with aligned 32-byte hunks. */ 6631914882SAlex Richardson dup vrepmask_c.4s, wtmp2 6731914882SAlex Richardson mov src_offset, #0 6831914882SAlex Richardson ands tmp1, srcin, #31 6931914882SAlex Richardson add vrepmask_0.4s, vrepmask_c.4s, vrepmask_c.4s /* equiv: lsl #1 */ 7031914882SAlex Richardson b.eq L(aligned) 7131914882SAlex Richardson 7231914882SAlex Richardson /* Input string is not 32-byte aligned. Rather than forcing 7331914882SAlex Richardson the padding bytes to a safe value, we calculate the syndrome 7431914882SAlex Richardson for all the bytes, but then mask off those bits of the 7531914882SAlex Richardson syndrome that are related to the padding. */ 7631914882SAlex Richardson ld1 {vdata1.16b, vdata2.16b}, [src], #32 7731914882SAlex Richardson neg tmp1, tmp1 7831914882SAlex Richardson cmeq vhas_nul1.16b, vdata1.16b, #0 7931914882SAlex Richardson cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b 8031914882SAlex Richardson cmeq vhas_nul2.16b, vdata2.16b, #0 8131914882SAlex Richardson cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b 8231914882SAlex Richardson and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b 8331914882SAlex Richardson and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b 8431914882SAlex Richardson and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b 8531914882SAlex Richardson and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b 8631914882SAlex Richardson addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul2.16b // 256->128 8731914882SAlex Richardson addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128 8831914882SAlex Richardson addp vend1.16b, vhas_nul1.16b, vhas_chr1.16b // 128->64 8931914882SAlex Richardson mov nul_match, vend1.d[0] 9031914882SAlex Richardson lsl tmp1, tmp1, #1 9131914882SAlex Richardson mov const_m1, #~0 9231914882SAlex Richardson lsr tmp3, const_m1, tmp1 9331914882SAlex Richardson mov chr_match, vend1.d[1] 9431914882SAlex Richardson 9531914882SAlex Richardson bic nul_match, nul_match, tmp3 // Mask padding bits. 9631914882SAlex Richardson bic chr_match, chr_match, tmp3 // Mask padding bits. 9731914882SAlex Richardson cbnz nul_match, L(tail) 9831914882SAlex Richardson 9931914882SAlex Richardson .p2align 4 10031914882SAlex RichardsonL(loop): 10131914882SAlex Richardson cmp chr_match, #0 10231914882SAlex Richardson csel src_match, src, src_match, ne 10331914882SAlex Richardson csel src_offset, chr_match, src_offset, ne 10431914882SAlex RichardsonL(aligned): 10531914882SAlex Richardson ld1 {vdata1.16b, vdata2.16b}, [src], #32 10631914882SAlex Richardson cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b 10731914882SAlex Richardson cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b 10831914882SAlex Richardson uminp vend1.16b, vdata1.16b, vdata2.16b 10931914882SAlex Richardson and vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b 11031914882SAlex Richardson and vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b 11131914882SAlex Richardson cmeq vend1.16b, vend1.16b, 0 11231914882SAlex Richardson addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128 11331914882SAlex Richardson addp vend1.16b, vend1.16b, vhas_chr1.16b // 128->64 11431914882SAlex Richardson mov nul_match, vend1.d[0] 11531914882SAlex Richardson mov chr_match, vend1.d[1] 11631914882SAlex Richardson cbz nul_match, L(loop) 11731914882SAlex Richardson 11831914882SAlex Richardson cmeq vhas_nul1.16b, vdata1.16b, #0 11931914882SAlex Richardson cmeq vhas_nul2.16b, vdata2.16b, #0 12031914882SAlex Richardson and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b 12131914882SAlex Richardson and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b 12231914882SAlex Richardson addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul2.16b 12331914882SAlex Richardson addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul1.16b 12431914882SAlex Richardson mov nul_match, vhas_nul1.d[0] 12531914882SAlex Richardson 12631914882SAlex RichardsonL(tail): 12731914882SAlex Richardson /* Work out exactly where the string ends. */ 12831914882SAlex Richardson sub tmp4, nul_match, #1 12931914882SAlex Richardson eor tmp4, tmp4, nul_match 13031914882SAlex Richardson ands chr_match, chr_match, tmp4 13131914882SAlex Richardson /* And pick the values corresponding to the last match. */ 13231914882SAlex Richardson csel src_match, src, src_match, ne 13331914882SAlex Richardson csel src_offset, chr_match, src_offset, ne 13431914882SAlex Richardson 13531914882SAlex Richardson /* Count down from the top of the syndrome to find the last match. */ 13631914882SAlex Richardson clz tmp3, src_offset 13731914882SAlex Richardson /* Src_match points beyond the word containing the match, so we can 13831914882SAlex Richardson simply subtract half the bit-offset into the syndrome. Because 13931914882SAlex Richardson we are counting down, we need to go back one more character. */ 14031914882SAlex Richardson add tmp3, tmp3, #2 14131914882SAlex Richardson sub result, src_match, tmp3, lsr #1 14231914882SAlex Richardson /* But if the syndrome shows no match was found, then return NULL. */ 14331914882SAlex Richardson cmp src_offset, #0 14431914882SAlex Richardson csel result, result, xzr, ne 14531914882SAlex Richardson 14631914882SAlex Richardson ret 14731914882SAlex Richardson 14831914882SAlex RichardsonEND (__strrchr_aarch64) 14931914882SAlex Richardson 150