1*e09c1583SStrahinja Stanišić/*- 2*e09c1583SStrahinja Stanišić * SPDX-License-Identifier: BSD-2-Clause 3*e09c1583SStrahinja Stanišić * 4*e09c1583SStrahinja Stanišić * Copyright (c) 2024 Strahinja Stanisic <strajabot@FreeBSD.org> 5*e09c1583SStrahinja Stanišić */ 6*e09c1583SStrahinja Stanišić 7*e09c1583SStrahinja Stanišić#include <machine/asm.h> 8*e09c1583SStrahinja Stanišić 9*e09c1583SStrahinja Stanišić/* 10*e09c1583SStrahinja Stanišić * https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord 11*e09c1583SStrahinja Stanišić * uses haszero(v) (((v) - 0x01010101UL) & ~(v) & 0x80808080UL) 12*e09c1583SStrahinja Stanišić * which evalutates > 0 when there is zero in v 13*e09c1583SStrahinja Stanišić * 14*e09c1583SStrahinja Stanišić * register a0 - char *s 15*e09c1583SStrahinja Stanišić */ 16*e09c1583SStrahinja StanišićENTRY(strlen) 17*e09c1583SStrahinja Stanišić /* 18*e09c1583SStrahinja Stanišić * register a0 - char *str_start 19*e09c1583SStrahinja Stanišić * register a1 - char *str_ptr 20*e09c1583SStrahinja Stanišić * register a2 - char[8] iter 21*e09c1583SStrahinja Stanišić */ 22*e09c1583SStrahinja Stanišić 23*e09c1583SStrahinja Stanišić /* load constants for haszero */ 24*e09c1583SStrahinja Stanišić li t0, 0x0101010101010101 25*e09c1583SStrahinja Stanišić slli t1, t0, 7 # 0x8080808080808080, avoid li 26*e09c1583SStrahinja Stanišić 27*e09c1583SStrahinja Stanišić /* check alignment of str_start */ 28*e09c1583SStrahinja Stanišić andi a1, a0, ~0b111 29*e09c1583SStrahinja Stanišić ld a2, (a1) 30*e09c1583SStrahinja Stanišić beq a1, a0, .Lhas_zero 31*e09c1583SStrahinja Stanišić 32*e09c1583SStrahinja Stanišić /* fill bytes before str_start with non-zero */ 33*e09c1583SStrahinja Stanišić slli t2, a0, 3 34*e09c1583SStrahinja Stanišić addi t3, t2, -64 35*e09c1583SStrahinja Stanišić neg t3, t3 36*e09c1583SStrahinja Stanišić srl t3, t0, t3 37*e09c1583SStrahinja Stanišić or a2, a2, t3 38*e09c1583SStrahinja Stanišić 39*e09c1583SStrahinja Stanišić /* unrolled iteration of haszero */ 40*e09c1583SStrahinja Stanišić not t2, a2 41*e09c1583SStrahinja Stanišić sub a2, a2, t0 42*e09c1583SStrahinja Stanišić and a2, a2, t2 43*e09c1583SStrahinja Stanišić and a2, a2, t1 44*e09c1583SStrahinja Stanišić 45*e09c1583SStrahinja Stanišić bnez a2, .Lfind_zero 46*e09c1583SStrahinja Stanišić 47*e09c1583SStrahinja Stanišić.Lloop_has_zero: 48*e09c1583SStrahinja Stanišić ld a2, 8(a1) 49*e09c1583SStrahinja Stanišić addi a1, a1, 8 # move ptr to next 8byte 50*e09c1583SStrahinja Stanišić.Lhas_zero: 51*e09c1583SStrahinja Stanišić not t2, a2 52*e09c1583SStrahinja Stanišić sub a2, a2, t0 53*e09c1583SStrahinja Stanišić and a2, a2, t2 54*e09c1583SStrahinja Stanišić and a2, a2, t1 55*e09c1583SStrahinja Stanišić 56*e09c1583SStrahinja Stanišić beqz a2, .Lloop_has_zero 57*e09c1583SStrahinja Stanišić 58*e09c1583SStrahinja Stanišić.Lfind_zero: 59*e09c1583SStrahinja Stanišić /* use (iter & -iter) to isolate lowest set bit */ 60*e09c1583SStrahinja Stanišić sub a3, zero, a2 #a3 = -iter 61*e09c1583SStrahinja Stanišić and t1, a2, a3 #t1 = (iter & -iter) 62*e09c1583SStrahinja Stanišić 63*e09c1583SStrahinja Stanišić li t0, 0x0001020304050607 64*e09c1583SStrahinja Stanišić srli t1, t1, 7 65*e09c1583SStrahinja Stanišić /* 66*e09c1583SStrahinja Stanišić * lowest set bit is 2^(8*k) 67*e09c1583SStrahinja Stanišić * multiplying by it shifts the idx array in t0 by k bytes to the left 68*e09c1583SStrahinja Stanišić */ 69*e09c1583SStrahinja Stanišić mul t1, t1, t0 70*e09c1583SStrahinja Stanišić /* highest byte contains idx of first zero */ 71*e09c1583SStrahinja Stanišić srli t1, t1, 56 72*e09c1583SStrahinja Stanišić 73*e09c1583SStrahinja Stanišić add a1, a1, t1 74*e09c1583SStrahinja Stanišić sub a0, a1, a0 75*e09c1583SStrahinja Stanišić ret 76*e09c1583SStrahinja StanišićEND(strlen) 77*e09c1583SStrahinja Stanišić 78