1/* 2 * strrchr - find last position of a character in a string. 3 * 4 * Copyright (c) 2020, Arm Limited. 5 * SPDX-License-Identifier: MIT 6 */ 7 8/* Assumptions: 9 * 10 * ARMv8-a, AArch64, Advanced SIMD. 11 * MTE compatible. 12 */ 13 14#include "../asmdefs.h" 15 16#define srcin x0 17#define chrin w1 18#define result x0 19 20#define src x2 21#define tmp x3 22#define wtmp w3 23#define synd x3 24#define shift x4 25#define src_match x4 26#define nul_match x5 27#define chr_match x6 28 29#define vrepchr v0 30#define vdata v1 31#define vhas_nul v2 32#define vhas_chr v3 33#define vrepmask v4 34#define vrepmask2 v5 35#define vend v5 36#define dend d5 37 38/* Core algorithm. 39 40 For each 16-byte chunk we calculate a 64-bit syndrome value, with 41 four bits per byte (LSB is always in bits 0 and 1, for both big 42 and little-endian systems). For each tuple, bits 0-1 are set if 43 the relevant byte matched the requested character; bits 2-3 are set 44 if the relevant byte matched the NUL end of string. */ 45 46ENTRY (__strrchr_aarch64_mte) 47 PTR_ARG (0) 48 bic src, srcin, 15 49 dup vrepchr.16b, chrin 50 mov wtmp, 0x3003 51 dup vrepmask.8h, wtmp 52 tst srcin, 15 53 beq L(loop1) 54 55 ld1 {vdata.16b}, [src], 16 56 cmeq vhas_nul.16b, vdata.16b, 0 57 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b 58 mov wtmp, 0xf00f 59 dup vrepmask2.8h, wtmp 60 bit vhas_nul.16b, vhas_chr.16b, vrepmask.16b 61 and vhas_nul.16b, vhas_nul.16b, vrepmask2.16b 62 addp vend.16b, vhas_nul.16b, vhas_nul.16b 63 lsl shift, srcin, 2 64 fmov synd, dend 65 lsr synd, synd, shift 66 lsl synd, synd, shift 67 ands nul_match, synd, 0xcccccccccccccccc 68 bne L(tail) 69 cbnz synd, L(loop2) 70 71 .p2align 5 72L(loop1): 73 ld1 {vdata.16b}, [src], 16 74 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b 75 cmhs vhas_nul.16b, vhas_chr.16b, vdata.16b 76 umaxp vend.16b, vhas_nul.16b, vhas_nul.16b 77 fmov synd, dend 78 cbz synd, L(loop1) 79 80 cmeq vhas_nul.16b, vdata.16b, 0 81 bit vhas_nul.16b, vhas_chr.16b, vrepmask.16b 82 bic vhas_nul.8h, 0x0f, lsl 8 83 addp vend.16b, vhas_nul.16b, vhas_nul.16b 84 fmov synd, dend 85 ands nul_match, synd, 0xcccccccccccccccc 86 beq L(loop2) 87 88L(tail): 89 sub nul_match, nul_match, 1 90 and chr_match, synd, 0x3333333333333333 91 ands chr_match, chr_match, nul_match 92 sub result, src, 1 93 clz tmp, chr_match 94 sub result, result, tmp, lsr 2 95 csel result, result, xzr, ne 96 ret 97 98 .p2align 4 99L(loop2): 100 cmp synd, 0 101 csel src_match, src, src_match, ne 102 csel chr_match, synd, chr_match, ne 103 ld1 {vdata.16b}, [src], 16 104 cmeq vhas_nul.16b, vdata.16b, 0 105 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b 106 bit vhas_nul.16b, vhas_chr.16b, vrepmask.16b 107 umaxp vend.16b, vhas_nul.16b, vhas_nul.16b 108 fmov synd, dend 109 tst synd, 0xcccccccccccccccc 110 beq L(loop2) 111 112 bic vhas_nul.8h, 0x0f, lsl 8 113 addp vend.16b, vhas_nul.16b, vhas_nul.16b 114 fmov synd, dend 115 and nul_match, synd, 0xcccccccccccccccc 116 sub nul_match, nul_match, 1 117 and tmp, synd, 0x3333333333333333 118 ands tmp, tmp, nul_match 119 csel chr_match, tmp, chr_match, ne 120 csel src_match, src, src_match, ne 121 sub src_match, src_match, 1 122 clz tmp, chr_match 123 sub result, src_match, tmp, lsr 2 124 ret 125 126END (__strrchr_aarch64_mte) 127 128