xref: /freebsd/lib/libc/riscv/string/strlen.S (revision e09c1583eddd345bdb79f3db8a91166f6f139207)
1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2024 Strahinja Stanisic <strajabot@FreeBSD.org>
5 */
6
7#include <machine/asm.h>
8
9/*
10 * https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
11 * uses haszero(v) (((v) - 0x01010101UL) & ~(v) & 0x80808080UL)
12 * which evalutates > 0 when there is zero in v
13 *
14 * register a0 - char *s
15 */
16ENTRY(strlen)
17	/*
18	 * register a0 - char *str_start
19	 * register a1 - char *str_ptr
20	 * register a2 - char[8] iter
21	 */
22
23	/* load constants for haszero */
24	li t0, 0x0101010101010101
25	slli t1, t0, 7				# 0x8080808080808080, avoid li
26
27	/* check alignment of str_start */
28	andi a1, a0, ~0b111
29	ld a2, (a1)
30	beq a1, a0, .Lhas_zero
31
32	/* fill bytes before str_start with non-zero */
33	slli t2, a0, 3
34	addi t3, t2, -64
35	neg t3, t3
36	srl t3, t0, t3
37	or a2, a2, t3
38
39	/* unrolled iteration of haszero */
40	not t2, a2
41	sub a2, a2, t0
42	and a2, a2, t2
43	and a2, a2, t1
44
45	bnez a2, .Lfind_zero
46
47.Lloop_has_zero:
48	ld a2, 8(a1)
49	addi a1, a1, 8	# move ptr to next 8byte
50.Lhas_zero:
51	not t2, a2
52	sub a2, a2, t0
53	and a2, a2, t2
54	and a2, a2, t1
55
56	beqz a2, .Lloop_has_zero
57
58.Lfind_zero:
59	/* use (iter & -iter) to isolate lowest set bit */
60	sub a3, zero, a2	#a3 = -iter
61	and t1, a2, a3		#t1 = (iter & -iter)
62
63	li t0, 0x0001020304050607
64	srli t1, t1, 7
65	/*
66	 * lowest set bit is 2^(8*k)
67	 * multiplying by it shifts the idx array in t0 by k bytes to the left
68	 */
69	mul	t1, t1, t0
70	/* highest byte contains idx of first zero */
71	srli t1, t1, 56
72
73	add a1, a1, t1
74	sub a0, a1, a0
75	ret
76END(strlen)
77
78