1/*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2024 Strahinja Stanisic <strajabot@FreeBSD.org> 5 */ 6 7#include <machine/asm.h> 8 9/* 10 * a0 - const char *s 11 * a1 - int c 12 */ 13ENTRY(strrchr) 14 /* 15 * a0 - const char *ptr_align 16 * a1 - temporary 17 * a2 - temporary 18 * a3 - temporary 19 * a4 - temporary 20 * a5 - const char[8] cccccccc 21 * a6 - const uint64_t *save_align 22 * a7 - const uint64_t save_iter 23 * t0 - const uintr64_t REP8_0X01 24 * t1 - const uintr64_t REP8_0X80 25 */ 26 27 /* 28 * save_align = 0 29 * save_iter = 0xFFFFFFFFFFFFFF00 30 * REP8_0X01 = 0x0101010101010101 31 * cccccccc = (char)c * REP8_0X01 32 * REP8_0X80 = (REP8_0X80 << 7) << ((str % 8) * 8) 33 * ptr_align = str - str % 8 34 */ 35 li t0, 0x01010101 36 li a6, 0 37 slli a2, a0, 3 38 slli t1, t0, 32 39 li a7, 0xFFFFFFFFFFFFFF00 40 or t0, t0, t1 41 andi a1, a1, 0xFF 42 slli t1, t0, 7 43 andi a0, a0, ~0b111 44 mul a5, a1, t0 45 sll t1, t1, a2 46 47.Lloop: /* do { */ 48 ld a1, 0(a0) /* a1 -> data = *ptr_align */ 49 not a3, a1 /* a3 -> nhz = ~data */ 50 xor a2, a1, a5 /* a2 -> iter = data ^ cccccccc */ 51 sub a1, a1, t0 /* a1 -> hz = data - REP8_0X01 */ 52 not a4, a2 /* a4 -> nhc = ~iter */ 53 and a1, a1, a3 /* hz = hz & nhz */ 54 sub a3, a2, t0 /* a3 -> hc = iter - REP8_0X01 */ 55 and a1, a1, t1 /* hz = hz & REP8_0X80 */ 56 and a3, a3, a4 /* hc = hc & nhc */ 57 addi a4, a1, -1 /* a4 -> mask_end = hz - 1 */ 58 and a3, a3, t1 /* hc = hc & REP8_0X80 */ 59 xor a4, a4, a1 /* mask_end = mask_end ^ hz */ 60 addi a0, a0, 8 /* ptr_align = ptr_align + 8 */ 61 and a3, a3, a4 /* hc = hc & mask_end */ 62 slli t1, t0, 7 /* REP8_0X80 = REP8_0X01 << 7 */ 63 not a4, a4 /* mask_end = ~mask_end */ 64 65 beqz a3, .Lskip_save /* if(!hc) goto skip_save */ 66 or a2, a2, a4 /* iter = iter | mask_end */ 67 addi a6, a0, -8 /* save_align = ptr_align - 8 */ 68 mv a7, a2 /* save_iter = iter */ 69 70.Lskip_save: 71 beqz a1, .Lloop /* } while(!hz) */ 72 73.Lfind_char: 74 /* 75 * a1 -> iter = save_iter 76 * a2 -> mask_iter = 0xFF00000000000000 77 * a3 -> match_off = 7 78 */ 79 li a2, 0xFF 80 mv a1, a7 81 slli a2, a2, 56 82 li a3, 7 83 84 and a0, a1, a2 85 srli a2, a2, 8 86 beqz a0, .Lret 87 88 addi a3, a3, -1 89 and a0, a1, a2 90 srli a2, a2, 8 91 beqz a0, .Lret 92 93 addi a3, a3, -1 94 and a0, a1, a2 95 srli a2, a2, 8 96 beqz a0, .Lret 97 98 addi a3, a3, -1 99 and a0, a1, a2 100 srli a2, a2, 8 101 beqz a0, .Lret 102 103 addi a3, a3, -1 104 and a0, a1, a2 105 srli a2, a2, 8 106 beqz a0, .Lret 107 108 addi a3, a3, -1 109 and a0, a1, a2 110 srli a2, a2, 8 111 beqz a0, .Lret 112 113 addi a3, a3, -1 114 and a0, a1, a2 115 srli a2, a2, 8 116 beqz a0, .Lret 117 118 addi a3, a3, -1 119 120.Lret: 121 /* return save_align + match_offset */ 122 add a0, a6, a3 123 ret 124END(strrchr) 125