xref: /freebsd/lib/libc/aarch64/string/strlen.S (revision 521c1fe0e2002dfd7d8db86eb7144b7865229912)
13863fec1SGetz Mikalsen/*-
23863fec1SGetz Mikalsen * SPDX-License-Identifier: BSD-2-Clause
33863fec1SGetz Mikalsen *
43863fec1SGetz Mikalsen * Copyright (c) 2024 Getz Mikalsen <getz@FreeBSD.org>
53863fec1SGetz Mikalsen*/
63863fec1SGetz Mikalsen
73863fec1SGetz Mikalsen#include <machine/asm.h>
83863fec1SGetz Mikalsen
93863fec1SGetz Mikalsen	.weak	strlen
103863fec1SGetz Mikalsen	.set	strlen, __strlen
113863fec1SGetz Mikalsen	.text
123863fec1SGetz Mikalsen
133863fec1SGetz MikalsenENTRY(__strlen)
143863fec1SGetz Mikalsen	bic	x10, x0, #0xf		// aligned src
153863fec1SGetz Mikalsen	and	x9, x0, #0xf
163863fec1SGetz Mikalsen	ldr	q0, [x10]
173863fec1SGetz Mikalsen	cmeq	v0.16b, v0.16b, #0
183863fec1SGetz Mikalsen	shrn	v0.8b, v0.8h, #4
193863fec1SGetz Mikalsen	fmov	x1, d0
203863fec1SGetz Mikalsen	cbz	x9, .Laligned
213863fec1SGetz Mikalsen	lsl	x2, x0, #2		// get the byte offset
223863fec1SGetz Mikalsen	lsr	x1, x1, x2		// shift by offset index
233863fec1SGetz Mikalsen	cbz	x1, .Lloop
243863fec1SGetz Mikalsen	rbit	x1, x1
253863fec1SGetz Mikalsen	clz	x0, x1
263863fec1SGetz Mikalsen	lsr	x0, x0, #2
273863fec1SGetz Mikalsen	ret
283863fec1SGetz Mikalsen
293863fec1SGetz Mikalsen.Laligned:
303863fec1SGetz Mikalsen	cbnz	x1, .Ldone
313863fec1SGetz Mikalsen
323863fec1SGetz Mikalsen.Lloop:
333863fec1SGetz Mikalsen	ldr	q0, [x10, #16]!
343863fec1SGetz Mikalsen	cmeq	v0.16b, v0.16b, #0
353863fec1SGetz Mikalsen	shrn	v0.8b, v0.8h, #4	// reduce to fit mask in GPR
363863fec1SGetz Mikalsen	fmov	x1, d0
37*521c1fe0SRobert Clausecker	cbz	x1, .Lloop
383863fec1SGetz Mikalsen.Ldone:
393863fec1SGetz Mikalsen	sub	x0, x10, x0
403863fec1SGetz Mikalsen	rbit	x1, x1			// reverse bits as NEON has no ctz
413863fec1SGetz Mikalsen	clz	x3, x1
423863fec1SGetz Mikalsen	lsr	x3, x3, #2
433863fec1SGetz Mikalsen	add	x0, x0, x3
443863fec1SGetz Mikalsen	ret
453863fec1SGetz MikalsenEND(__strlen)
46