1/*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2024 Strahinja Stanisic <strajabot@FreeBSD.org> 5 */ 6 7#include <machine/asm.h> 8 9 .weak rindex 10 .set rindex, strrchr 11 12/* 13 * a0 - const char *s 14 * a1 - int c 15 */ 16ENTRY(strrchr) 17 /* 18 * a0 - const char *ptr_align 19 * a1 - temporary 20 * a2 - temporary 21 * a3 - temporary 22 * a4 - temporary 23 * a5 - const char[8] cccccccc 24 * a6 - const uint64_t *save_align 25 * a7 - const uint64_t save_iter 26 * t0 - const uintr64_t REP8_0X01 27 * t1 - const uintr64_t REP8_0X80 28 */ 29 30 /* 31 * save_align = 0 32 * save_iter = 0xFFFFFFFFFFFFFF00 33 * REP8_0X01 = 0x0101010101010101 34 * cccccccc = (char)c * REP8_0X01 35 * REP8_0X80 = (REP8_0X80 << 7) << ((str % 8) * 8) 36 * ptr_align = str - str % 8 37 */ 38 li t0, 0x01010101 39 li a6, 0 40 slli a2, a0, 3 41 slli t1, t0, 32 42 li a7, 0xFFFFFFFFFFFFFF00 43 or t0, t0, t1 44 andi a1, a1, 0xFF 45 slli t1, t0, 7 46 andi a0, a0, ~0b111 47 mul a5, a1, t0 48 sll t1, t1, a2 49 50.Lloop: /* do { */ 51 ld a1, 0(a0) /* a1 -> data = *ptr_align */ 52 not a3, a1 /* a3 -> nhz = ~data */ 53 xor a2, a1, a5 /* a2 -> iter = data ^ cccccccc */ 54 sub a1, a1, t0 /* a1 -> hz = data - REP8_0X01 */ 55 not a4, a2 /* a4 -> nhc = ~iter */ 56 and a1, a1, a3 /* hz = hz & nhz */ 57 sub a3, a2, t0 /* a3 -> hc = iter - REP8_0X01 */ 58 and a1, a1, t1 /* hz = hz & REP8_0X80 */ 59 and a3, a3, a4 /* hc = hc & nhc */ 60 addi a4, a1, -1 /* a4 -> mask_end = hz - 1 */ 61 and a3, a3, t1 /* hc = hc & REP8_0X80 */ 62 xor a4, a4, a1 /* mask_end = mask_end ^ hz */ 63 addi a0, a0, 8 /* ptr_align = ptr_align + 8 */ 64 and a3, a3, a4 /* hc = hc & mask_end */ 65 slli t1, t0, 7 /* REP8_0X80 = REP8_0X01 << 7 */ 66 not a4, a4 /* mask_end = ~mask_end */ 67 68 beqz a3, .Lskip_save /* if(!hc) goto skip_save */ 69 or a2, a2, a4 /* iter = iter | mask_end */ 70 addi a6, a0, -8 /* save_align = ptr_align - 8 */ 71 mv a7, a2 /* save_iter = iter */ 72 73.Lskip_save: 74 beqz a1, .Lloop /* } while(!hz) */ 75 76.Lfind_char: 77 /* 78 * a1 -> iter = save_iter 79 * a2 -> mask_iter = 0xFF00000000000000 80 * a3 -> match_off = 7 81 */ 82 li a2, 0xFF 83 mv a1, a7 84 slli a2, a2, 56 85 li a3, 7 86 87 and a0, a1, a2 88 srli a2, a2, 8 89 beqz a0, .Lret 90 91 addi a3, a3, -1 92 and a0, a1, a2 93 srli a2, a2, 8 94 beqz a0, .Lret 95 96 addi a3, a3, -1 97 and a0, a1, a2 98 srli a2, a2, 8 99 beqz a0, .Lret 100 101 addi a3, a3, -1 102 and a0, a1, a2 103 srli a2, a2, 8 104 beqz a0, .Lret 105 106 addi a3, a3, -1 107 and a0, a1, a2 108 srli a2, a2, 8 109 beqz a0, .Lret 110 111 addi a3, a3, -1 112 and a0, a1, a2 113 srli a2, a2, 8 114 beqz a0, .Lret 115 116 addi a3, a3, -1 117 and a0, a1, a2 118 srli a2, a2, 8 119 beqz a0, .Lret 120 121 addi a3, a3, -1 122 123.Lret: 124 /* return save_align + match_offset */ 125 add a0, a6, a3 126 ret 127END(strrchr) 128