xref: /freebsd/lib/libc/riscv/string/strrchr.S (revision 63ff982b17ee398c7808be323d8fd37819a5863c)
1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2024 Strahinja Stanisic <strajabot@FreeBSD.org>
5 */
6
7#include <machine/asm.h>
8
9        .weak   rindex
10        .set    rindex, strrchr
11
12/*
13 * a0 - const char *s
14 * a1 - int c
15 */
16ENTRY(strrchr)
17	/*
18	 * a0 - const char *ptr_align
19	 * a1 - temporary
20	 * a2 -	temporary
21	 * a3 - temporary
22	 * a4 -	temporary
23	 * a5 - const char[8] cccccccc
24	 * a6 - const uint64_t *save_align
25	 * a7 - const uint64_t save_iter
26	 * t0 - const uintr64_t REP8_0X01
27	 * t1 - const uintr64_t REP8_0X80
28	 */
29
30	/*
31	 * save_align = 0
32	 * save_iter = 0xFFFFFFFFFFFFFF00
33	 * REP8_0X01 = 0x0101010101010101
34	 * cccccccc = (char)c * REP8_0X01
35	 * REP8_0X80 = (REP8_0X80 << 7) << ((str % 8) * 8)
36	 * ptr_align = str - str % 8
37	 */
38	li t0, 0x01010101
39	li a6, 0
40	slli a2, a0, 3
41	slli t1, t0, 32
42	li a7, 0xFFFFFFFFFFFFFF00
43	or t0, t0, t1
44	andi a1, a1, 0xFF
45	slli t1, t0, 7
46	andi a0, a0, ~0b111
47	mul a5, a1, t0
48	sll t1, t1, a2
49
50.Lloop:					/* do {				*/
51	ld a1, 0(a0)			/* a1 -> data = *ptr_align	*/
52	not a3, a1			/* a3 -> nhz = ~data		*/
53	xor a2, a1, a5			/* a2 -> iter = data ^ cccccccc	*/
54	sub a1, a1, t0			/* a1 -> hz = data - REP8_0X01	*/
55	not a4, a2			/* a4 -> nhc = ~iter		*/
56	and a1, a1, a3			/* hz = hz & nhz		*/
57	sub a3, a2, t0			/* a3 -> hc = iter - REP8_0X01	*/
58	and a1, a1, t1			/* hz = hz & REP8_0X80		*/
59	and a3, a3, a4			/* hc = hc & nhc		*/
60	addi a4, a1, -1			/* a4 -> mask_end = hz - 1	*/
61	and a3, a3, t1			/* hc = hc & REP8_0X80		*/
62	xor a4, a4, a1			/* mask_end = mask_end ^ hz	*/
63	addi a0, a0, 8			/* ptr_align = ptr_align + 8	*/
64	and a3, a3, a4			/* hc = hc & mask_end		*/
65	slli t1, t0, 7			/* REP8_0X80 = REP8_0X01 << 7	*/
66	not a4, a4			/* mask_end = ~mask_end		*/
67
68	beqz a3, .Lskip_save		/* if(!hc) goto skip_save	*/
69	or a2, a2, a4			/* iter = iter | mask_end	*/
70	addi a6, a0, -8			/* save_align = ptr_align - 8	*/
71	mv a7, a2			/* save_iter = iter		*/
72
73.Lskip_save:
74	beqz a1, .Lloop			/* } while(!hz)			*/
75
76.Lfind_char:
77	/*
78	 * a1 -> iter = save_iter
79	 * a2 -> mask_iter = 0xFF00000000000000
80	 * a3 -> match_off = 7
81	 */
82	li a2, 0xFF
83	mv a1, a7
84	slli a2, a2, 56
85	li a3, 7
86
87	and a0, a1, a2
88	srli a2, a2, 8
89	beqz a0, .Lret
90
91	addi a3, a3, -1
92	and a0, a1, a2
93	srli a2, a2, 8
94	beqz a0, .Lret
95
96	addi a3, a3, -1
97	and a0, a1, a2
98	srli a2, a2, 8
99	beqz a0, .Lret
100
101	addi a3, a3, -1
102	and a0, a1, a2
103	srli a2, a2, 8
104	beqz a0, .Lret
105
106	addi a3, a3, -1
107	and a0, a1, a2
108	srli a2, a2, 8
109	beqz a0, .Lret
110
111	addi a3, a3, -1
112	and a0, a1, a2
113	srli a2, a2, 8
114	beqz a0, .Lret
115
116	addi a3, a3, -1
117	and a0, a1, a2
118	srli a2, a2, 8
119	beqz a0, .Lret
120
121	addi a3, a3, -1
122
123.Lret:
124	/* return save_align + match_offset */
125	add a0, a6, a3
126	ret
127END(strrchr)
128