xref: /freebsd/lib/libc/riscv/string/strnlen.S (revision 5a52f0704435b089199201be0029e0d7c9ef2fce)
1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2024 Strahinja Stanisic <strajabot@FreeBSD.org>
5 */
6
7#include <machine/asm.h>
8
9/*
10 * a0 - const char *s
11 * a1 - size_t maxlen;
12 */
13ENTRY(strnlen)
14	/*
15	 * a0 - const char *s;
16	 * a1 - size_t maxlen;
17	 * a2 - uint64_t *ptr;
18	 * a3 - char iter[8];
19	 * a4 - uint64_t *end_align;
20	 * a5 - uint64_t *end_unroll;
21	 */
22
23	beqz a1, .Lnot_found
24
25	/* ptr = s & ~0b111 */
26	/* t0 = 0x0101010101010101 */
27	/* t1 = 0x8080808080808080 */
28	/* end_align = (s + maxlen + 7) & ~0b111 */
29	/* mask_start = t0 >> ((-s.value) << 3) */
30	add a4, a0, a1
31	li t0, 0x01010101
32	addi a4, a4, 7
33	slli t1, t0, 32
34	neg t2, a0
35	andi a4, a4, ~0b111
36	or t0, t0, t1
37	slli t2, t2, 3
38	andi a2, a0, ~0b111
39	slli t1, t0, 7
40	srl t2, t0, t2
41
42	/* if pointer is aligned skip to loop */
43	beq a0, a2, .Lskip_start
44
45	/* iter = *ptr */
46	ld a3, (a2)
47
48	/* iter = iter | mask_start */
49	or a3, a3, t2
50
51	/* has_zero */
52	not t2, a3
53	sub a3, a3, t0
54	and t2, t2, t1
55	and a3, a3, t2
56
57	addi a2, a2, 8
58	bnez a3, .Lfind_zero
59
60.Lskip_start:
61	/* end_unroll */
62	sub t2, a4, a2
63	andi t2, t2, ~0b1111
64	add a5, a2, t2
65
66	/* while (ptr != end_unroll) */
67	beq a2, a5, .Lskip_loop
68.Lloop:
69	ld a3, (a2)
70	ld a6, 8(a2)
71
72	/* has_zero */
73	not t2, a3
74	not t3, a6
75	sub a3, a3, t0
76	sub a6, a6, t0
77	and t2, t2, t1
78	and t3, t3, t1
79	and a3, a3, t2
80	and a6, a6, t3
81
82	addi a2, a2, 8
83	bnez a3, .Lfind_zero
84
85	mv a3, a6
86
87	addi a2, a2, 8
88	bnez a3, .Lfind_zero
89
90	bne a2, a5, .Lloop
91
92.Lskip_loop:
93
94	beq a2, a4, .Lnot_found
95
96	ld a3, (a2)
97
98	/* has_zero */
99	not t2, a3
100	sub a3, a3, t0
101	and t2, t2, t1
102	and a3, a3, t2
103
104
105	addi a2, a2, 8
106	beqz a3, .Lnot_found
107
108.Lfind_zero:
109
110	/* move ptr back */
111	addi a2, a2, -8
112
113	/* isolate lowest set bit */
114	neg t0, a3
115	and a3, a3, t0
116
117	li t0, 0x0001020304050607
118	srli a3, a3, 7
119
120	/* lowest set bit is 2^(8*k)
121	 * multiplying by it shifts the idx array in t0 by k bytes to the left */
122	mul	a3, a3, t0
123
124	/* highest byte contains idx of first zero */
125	srli a3, a3, 56
126
127	/* zero_idx */
128	sub a2, a2, a0
129	add a2, a2, a3
130
131	/* min(zero_idx, maxlen) */
132	sub a2, a2, a1
133	srai t1, a2, 63
134	and a2, a2, t1
135	add a0, a1, a2
136
137	ret
138
139.Lnot_found:
140	mv a0, a1
141	ret
142
143END(strnlen)
144