xref: /freebsd/lib/libc/riscv/string/strnlen.S (revision 5a52f0704435b089199201be0029e0d7c9ef2fce)
1*5a52f070SStrahinja Stanišić/*-
2*5a52f070SStrahinja Stanišić * SPDX-License-Identifier: BSD-2-Clause
3*5a52f070SStrahinja Stanišić *
4*5a52f070SStrahinja Stanišić * Copyright (c) 2024 Strahinja Stanisic <strajabot@FreeBSD.org>
5*5a52f070SStrahinja Stanišić */
6*5a52f070SStrahinja Stanišić
7*5a52f070SStrahinja Stanišić#include <machine/asm.h>
8*5a52f070SStrahinja Stanišić
9*5a52f070SStrahinja Stanišić/*
10*5a52f070SStrahinja Stanišić * a0 - const char *s
11*5a52f070SStrahinja Stanišić * a1 - size_t maxlen;
12*5a52f070SStrahinja Stanišić */
13*5a52f070SStrahinja StanišićENTRY(strnlen)
14*5a52f070SStrahinja Stanišić	/*
15*5a52f070SStrahinja Stanišić	 * a0 - const char *s;
16*5a52f070SStrahinja Stanišić	 * a1 - size_t maxlen;
17*5a52f070SStrahinja Stanišić	 * a2 - uint64_t *ptr;
18*5a52f070SStrahinja Stanišić	 * a3 - char iter[8];
19*5a52f070SStrahinja Stanišić	 * a4 - uint64_t *end_align;
20*5a52f070SStrahinja Stanišić	 * a5 - uint64_t *end_unroll;
21*5a52f070SStrahinja Stanišić	 */
22*5a52f070SStrahinja Stanišić
23*5a52f070SStrahinja Stanišić	beqz a1, .Lnot_found
24*5a52f070SStrahinja Stanišić
25*5a52f070SStrahinja Stanišić	/* ptr = s & ~0b111 */
26*5a52f070SStrahinja Stanišić	/* t0 = 0x0101010101010101 */
27*5a52f070SStrahinja Stanišić	/* t1 = 0x8080808080808080 */
28*5a52f070SStrahinja Stanišić	/* end_align = (s + maxlen + 7) & ~0b111 */
29*5a52f070SStrahinja Stanišić	/* mask_start = t0 >> ((-s.value) << 3) */
30*5a52f070SStrahinja Stanišić	add a4, a0, a1
31*5a52f070SStrahinja Stanišić	li t0, 0x01010101
32*5a52f070SStrahinja Stanišić	addi a4, a4, 7
33*5a52f070SStrahinja Stanišić	slli t1, t0, 32
34*5a52f070SStrahinja Stanišić	neg t2, a0
35*5a52f070SStrahinja Stanišić	andi a4, a4, ~0b111
36*5a52f070SStrahinja Stanišić	or t0, t0, t1
37*5a52f070SStrahinja Stanišić	slli t2, t2, 3
38*5a52f070SStrahinja Stanišić	andi a2, a0, ~0b111
39*5a52f070SStrahinja Stanišić	slli t1, t0, 7
40*5a52f070SStrahinja Stanišić	srl t2, t0, t2
41*5a52f070SStrahinja Stanišić
42*5a52f070SStrahinja Stanišić	/* if pointer is aligned skip to loop */
43*5a52f070SStrahinja Stanišić	beq a0, a2, .Lskip_start
44*5a52f070SStrahinja Stanišić
45*5a52f070SStrahinja Stanišić	/* iter = *ptr */
46*5a52f070SStrahinja Stanišić	ld a3, (a2)
47*5a52f070SStrahinja Stanišić
48*5a52f070SStrahinja Stanišić	/* iter = iter | mask_start */
49*5a52f070SStrahinja Stanišić	or a3, a3, t2
50*5a52f070SStrahinja Stanišić
51*5a52f070SStrahinja Stanišić	/* has_zero */
52*5a52f070SStrahinja Stanišić	not t2, a3
53*5a52f070SStrahinja Stanišić	sub a3, a3, t0
54*5a52f070SStrahinja Stanišić	and t2, t2, t1
55*5a52f070SStrahinja Stanišić	and a3, a3, t2
56*5a52f070SStrahinja Stanišić
57*5a52f070SStrahinja Stanišić	addi a2, a2, 8
58*5a52f070SStrahinja Stanišić	bnez a3, .Lfind_zero
59*5a52f070SStrahinja Stanišić
60*5a52f070SStrahinja Stanišić.Lskip_start:
61*5a52f070SStrahinja Stanišić	/* end_unroll */
62*5a52f070SStrahinja Stanišić	sub t2, a4, a2
63*5a52f070SStrahinja Stanišić	andi t2, t2, ~0b1111
64*5a52f070SStrahinja Stanišić	add a5, a2, t2
65*5a52f070SStrahinja Stanišić
66*5a52f070SStrahinja Stanišić	/* while (ptr != end_unroll) */
67*5a52f070SStrahinja Stanišić	beq a2, a5, .Lskip_loop
68*5a52f070SStrahinja Stanišić.Lloop:
69*5a52f070SStrahinja Stanišić	ld a3, (a2)
70*5a52f070SStrahinja Stanišić	ld a6, 8(a2)
71*5a52f070SStrahinja Stanišić
72*5a52f070SStrahinja Stanišić	/* has_zero */
73*5a52f070SStrahinja Stanišić	not t2, a3
74*5a52f070SStrahinja Stanišić	not t3, a6
75*5a52f070SStrahinja Stanišić	sub a3, a3, t0
76*5a52f070SStrahinja Stanišić	sub a6, a6, t0
77*5a52f070SStrahinja Stanišić	and t2, t2, t1
78*5a52f070SStrahinja Stanišić	and t3, t3, t1
79*5a52f070SStrahinja Stanišić	and a3, a3, t2
80*5a52f070SStrahinja Stanišić	and a6, a6, t3
81*5a52f070SStrahinja Stanišić
82*5a52f070SStrahinja Stanišić	addi a2, a2, 8
83*5a52f070SStrahinja Stanišić	bnez a3, .Lfind_zero
84*5a52f070SStrahinja Stanišić
85*5a52f070SStrahinja Stanišić	mv a3, a6
86*5a52f070SStrahinja Stanišić
87*5a52f070SStrahinja Stanišić	addi a2, a2, 8
88*5a52f070SStrahinja Stanišić	bnez a3, .Lfind_zero
89*5a52f070SStrahinja Stanišić
90*5a52f070SStrahinja Stanišić	bne a2, a5, .Lloop
91*5a52f070SStrahinja Stanišić
92*5a52f070SStrahinja Stanišić.Lskip_loop:
93*5a52f070SStrahinja Stanišić
94*5a52f070SStrahinja Stanišić	beq a2, a4, .Lnot_found
95*5a52f070SStrahinja Stanišić
96*5a52f070SStrahinja Stanišić	ld a3, (a2)
97*5a52f070SStrahinja Stanišić
98*5a52f070SStrahinja Stanišić	/* has_zero */
99*5a52f070SStrahinja Stanišić	not t2, a3
100*5a52f070SStrahinja Stanišić	sub a3, a3, t0
101*5a52f070SStrahinja Stanišić	and t2, t2, t1
102*5a52f070SStrahinja Stanišić	and a3, a3, t2
103*5a52f070SStrahinja Stanišić
104*5a52f070SStrahinja Stanišić
105*5a52f070SStrahinja Stanišić	addi a2, a2, 8
106*5a52f070SStrahinja Stanišić	beqz a3, .Lnot_found
107*5a52f070SStrahinja Stanišić
108*5a52f070SStrahinja Stanišić.Lfind_zero:
109*5a52f070SStrahinja Stanišić
110*5a52f070SStrahinja Stanišić	/* move ptr back */
111*5a52f070SStrahinja Stanišić	addi a2, a2, -8
112*5a52f070SStrahinja Stanišić
113*5a52f070SStrahinja Stanišić	/* isolate lowest set bit */
114*5a52f070SStrahinja Stanišić	neg t0, a3
115*5a52f070SStrahinja Stanišić	and a3, a3, t0
116*5a52f070SStrahinja Stanišić
117*5a52f070SStrahinja Stanišić	li t0, 0x0001020304050607
118*5a52f070SStrahinja Stanišić	srli a3, a3, 7
119*5a52f070SStrahinja Stanišić
120*5a52f070SStrahinja Stanišić	/* lowest set bit is 2^(8*k)
121*5a52f070SStrahinja Stanišić	 * multiplying by it shifts the idx array in t0 by k bytes to the left */
122*5a52f070SStrahinja Stanišić	mul	a3, a3, t0
123*5a52f070SStrahinja Stanišić
124*5a52f070SStrahinja Stanišić	/* highest byte contains idx of first zero */
125*5a52f070SStrahinja Stanišić	srli a3, a3, 56
126*5a52f070SStrahinja Stanišić
127*5a52f070SStrahinja Stanišić	/* zero_idx */
128*5a52f070SStrahinja Stanišić	sub a2, a2, a0
129*5a52f070SStrahinja Stanišić	add a2, a2, a3
130*5a52f070SStrahinja Stanišić
131*5a52f070SStrahinja Stanišić	/* min(zero_idx, maxlen) */
132*5a52f070SStrahinja Stanišić	sub a2, a2, a1
133*5a52f070SStrahinja Stanišić	srai t1, a2, 63
134*5a52f070SStrahinja Stanišić	and a2, a2, t1
135*5a52f070SStrahinja Stanišić	add a0, a1, a2
136*5a52f070SStrahinja Stanišić
137*5a52f070SStrahinja Stanišić	ret
138*5a52f070SStrahinja Stanišić
139*5a52f070SStrahinja Stanišić.Lnot_found:
140*5a52f070SStrahinja Stanišić	mv a0, a1
141*5a52f070SStrahinja Stanišić	ret
142*5a52f070SStrahinja Stanišić
143*5a52f070SStrahinja StanišićEND(strnlen)
144