xref: /freebsd/lib/libc/riscv/string/strlen.S (revision e09c1583eddd345bdb79f3db8a91166f6f139207)
1*e09c1583SStrahinja Stanišić/*-
2*e09c1583SStrahinja Stanišić * SPDX-License-Identifier: BSD-2-Clause
3*e09c1583SStrahinja Stanišić *
4*e09c1583SStrahinja Stanišić * Copyright (c) 2024 Strahinja Stanisic <strajabot@FreeBSD.org>
5*e09c1583SStrahinja Stanišić */
6*e09c1583SStrahinja Stanišić
7*e09c1583SStrahinja Stanišić#include <machine/asm.h>
8*e09c1583SStrahinja Stanišić
9*e09c1583SStrahinja Stanišić/*
10*e09c1583SStrahinja Stanišić * https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
11*e09c1583SStrahinja Stanišić * uses haszero(v) (((v) - 0x01010101UL) & ~(v) & 0x80808080UL)
12*e09c1583SStrahinja Stanišić * which evalutates > 0 when there is zero in v
13*e09c1583SStrahinja Stanišić *
14*e09c1583SStrahinja Stanišić * register a0 - char *s
15*e09c1583SStrahinja Stanišić */
16*e09c1583SStrahinja StanišićENTRY(strlen)
17*e09c1583SStrahinja Stanišić	/*
18*e09c1583SStrahinja Stanišić	 * register a0 - char *str_start
19*e09c1583SStrahinja Stanišić	 * register a1 - char *str_ptr
20*e09c1583SStrahinja Stanišić	 * register a2 - char[8] iter
21*e09c1583SStrahinja Stanišić	 */
22*e09c1583SStrahinja Stanišić
23*e09c1583SStrahinja Stanišić	/* load constants for haszero */
24*e09c1583SStrahinja Stanišić	li t0, 0x0101010101010101
25*e09c1583SStrahinja Stanišić	slli t1, t0, 7				# 0x8080808080808080, avoid li
26*e09c1583SStrahinja Stanišić
27*e09c1583SStrahinja Stanišić	/* check alignment of str_start */
28*e09c1583SStrahinja Stanišić	andi a1, a0, ~0b111
29*e09c1583SStrahinja Stanišić	ld a2, (a1)
30*e09c1583SStrahinja Stanišić	beq a1, a0, .Lhas_zero
31*e09c1583SStrahinja Stanišić
32*e09c1583SStrahinja Stanišić	/* fill bytes before str_start with non-zero */
33*e09c1583SStrahinja Stanišić	slli t2, a0, 3
34*e09c1583SStrahinja Stanišić	addi t3, t2, -64
35*e09c1583SStrahinja Stanišić	neg t3, t3
36*e09c1583SStrahinja Stanišić	srl t3, t0, t3
37*e09c1583SStrahinja Stanišić	or a2, a2, t3
38*e09c1583SStrahinja Stanišić
39*e09c1583SStrahinja Stanišić	/* unrolled iteration of haszero */
40*e09c1583SStrahinja Stanišić	not t2, a2
41*e09c1583SStrahinja Stanišić	sub a2, a2, t0
42*e09c1583SStrahinja Stanišić	and a2, a2, t2
43*e09c1583SStrahinja Stanišić	and a2, a2, t1
44*e09c1583SStrahinja Stanišić
45*e09c1583SStrahinja Stanišić	bnez a2, .Lfind_zero
46*e09c1583SStrahinja Stanišić
47*e09c1583SStrahinja Stanišić.Lloop_has_zero:
48*e09c1583SStrahinja Stanišić	ld a2, 8(a1)
49*e09c1583SStrahinja Stanišić	addi a1, a1, 8	# move ptr to next 8byte
50*e09c1583SStrahinja Stanišić.Lhas_zero:
51*e09c1583SStrahinja Stanišić	not t2, a2
52*e09c1583SStrahinja Stanišić	sub a2, a2, t0
53*e09c1583SStrahinja Stanišić	and a2, a2, t2
54*e09c1583SStrahinja Stanišić	and a2, a2, t1
55*e09c1583SStrahinja Stanišić
56*e09c1583SStrahinja Stanišić	beqz a2, .Lloop_has_zero
57*e09c1583SStrahinja Stanišić
58*e09c1583SStrahinja Stanišić.Lfind_zero:
59*e09c1583SStrahinja Stanišić	/* use (iter & -iter) to isolate lowest set bit */
60*e09c1583SStrahinja Stanišić	sub a3, zero, a2	#a3 = -iter
61*e09c1583SStrahinja Stanišić	and t1, a2, a3		#t1 = (iter & -iter)
62*e09c1583SStrahinja Stanišić
63*e09c1583SStrahinja Stanišić	li t0, 0x0001020304050607
64*e09c1583SStrahinja Stanišić	srli t1, t1, 7
65*e09c1583SStrahinja Stanišić	/*
66*e09c1583SStrahinja Stanišić	 * lowest set bit is 2^(8*k)
67*e09c1583SStrahinja Stanišić	 * multiplying by it shifts the idx array in t0 by k bytes to the left
68*e09c1583SStrahinja Stanišić	 */
69*e09c1583SStrahinja Stanišić	mul	t1, t1, t0
70*e09c1583SStrahinja Stanišić	/* highest byte contains idx of first zero */
71*e09c1583SStrahinja Stanišić	srli t1, t1, 56
72*e09c1583SStrahinja Stanišić
73*e09c1583SStrahinja Stanišić	add a1, a1, t1
74*e09c1583SStrahinja Stanišić	sub a0, a1, a0
75*e09c1583SStrahinja Stanišić	ret
76*e09c1583SStrahinja StanišićEND(strlen)
77*e09c1583SStrahinja Stanišić
78