13863fec1SGetz Mikalsen/*- 23863fec1SGetz Mikalsen * SPDX-License-Identifier: BSD-2-Clause 33863fec1SGetz Mikalsen * 43863fec1SGetz Mikalsen * Copyright (c) 2024 Getz Mikalsen <getz@FreeBSD.org> 53863fec1SGetz Mikalsen*/ 63863fec1SGetz Mikalsen 73863fec1SGetz Mikalsen#include <machine/asm.h> 83863fec1SGetz Mikalsen 93863fec1SGetz Mikalsen .weak strlen 103863fec1SGetz Mikalsen .set strlen, __strlen 113863fec1SGetz Mikalsen .text 123863fec1SGetz Mikalsen 133863fec1SGetz MikalsenENTRY(__strlen) 143863fec1SGetz Mikalsen bic x10, x0, #0xf // aligned src 153863fec1SGetz Mikalsen and x9, x0, #0xf 163863fec1SGetz Mikalsen ldr q0, [x10] 173863fec1SGetz Mikalsen cmeq v0.16b, v0.16b, #0 183863fec1SGetz Mikalsen shrn v0.8b, v0.8h, #4 193863fec1SGetz Mikalsen fmov x1, d0 203863fec1SGetz Mikalsen cbz x9, .Laligned 213863fec1SGetz Mikalsen lsl x2, x0, #2 // get the byte offset 223863fec1SGetz Mikalsen lsr x1, x1, x2 // shift by offset index 233863fec1SGetz Mikalsen cbz x1, .Lloop 243863fec1SGetz Mikalsen rbit x1, x1 253863fec1SGetz Mikalsen clz x0, x1 263863fec1SGetz Mikalsen lsr x0, x0, #2 273863fec1SGetz Mikalsen ret 283863fec1SGetz Mikalsen 293863fec1SGetz Mikalsen.Laligned: 303863fec1SGetz Mikalsen cbnz x1, .Ldone 313863fec1SGetz Mikalsen 323863fec1SGetz Mikalsen.Lloop: 333863fec1SGetz Mikalsen ldr q0, [x10, #16]! 343863fec1SGetz Mikalsen cmeq v0.16b, v0.16b, #0 353863fec1SGetz Mikalsen shrn v0.8b, v0.8h, #4 // reduce to fit mask in GPR 363863fec1SGetz Mikalsen fmov x1, d0 37*521c1fe0SRobert Clausecker cbz x1, .Lloop 383863fec1SGetz Mikalsen.Ldone: 393863fec1SGetz Mikalsen sub x0, x10, x0 403863fec1SGetz Mikalsen rbit x1, x1 // reverse bits as NEON has no ctz 413863fec1SGetz Mikalsen clz x3, x1 423863fec1SGetz Mikalsen lsr x3, x3, #2 433863fec1SGetz Mikalsen add x0, x0, x3 443863fec1SGetz Mikalsen ret 453863fec1SGetz MikalsenEND(__strlen) 46