1/*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2024 Strahinja Stanisic <strajabot@FreeBSD.org> 5 */ 6 7#include <machine/asm.h> 8 9/* 10 * a0 - const char *s 11 * a1 - size_t maxlen; 12 */ 13ENTRY(strnlen) 14 /* 15 * a0 - const char *s; 16 * a1 - size_t maxlen; 17 * a2 - uint64_t *ptr; 18 * a3 - char iter[8]; 19 * a4 - uint64_t *end_align; 20 * a5 - uint64_t *end_unroll; 21 */ 22 23 beqz a1, .Lnot_found 24 25 /* ptr = s & ~0b111 */ 26 /* t0 = 0x0101010101010101 */ 27 /* t1 = 0x8080808080808080 */ 28 /* end_align = (s + maxlen + 7) & ~0b111 */ 29 /* mask_start = t0 >> ((-s.value) << 3) */ 30 add a4, a0, a1 31 li t0, 0x01010101 32 addi a4, a4, 7 33 slli t1, t0, 32 34 neg t2, a0 35 andi a4, a4, ~0b111 36 or t0, t0, t1 37 slli t2, t2, 3 38 andi a2, a0, ~0b111 39 slli t1, t0, 7 40 srl t2, t0, t2 41 42 /* if pointer is aligned skip to loop */ 43 beq a0, a2, .Lskip_start 44 45 /* iter = *ptr */ 46 ld a3, (a2) 47 48 /* iter = iter | mask_start */ 49 or a3, a3, t2 50 51 /* has_zero */ 52 not t2, a3 53 sub a3, a3, t0 54 and t2, t2, t1 55 and a3, a3, t2 56 57 addi a2, a2, 8 58 bnez a3, .Lfind_zero 59 60.Lskip_start: 61 /* end_unroll */ 62 sub t2, a4, a2 63 andi t2, t2, ~0b1111 64 add a5, a2, t2 65 66 /* while (ptr != end_unroll) */ 67 beq a2, a5, .Lskip_loop 68.Lloop: 69 ld a3, (a2) 70 ld a6, 8(a2) 71 72 /* has_zero */ 73 not t2, a3 74 not t3, a6 75 sub a3, a3, t0 76 sub a6, a6, t0 77 and t2, t2, t1 78 and t3, t3, t1 79 and a3, a3, t2 80 and a6, a6, t3 81 82 addi a2, a2, 8 83 bnez a3, .Lfind_zero 84 85 mv a3, a6 86 87 addi a2, a2, 8 88 bnez a3, .Lfind_zero 89 90 bne a2, a5, .Lloop 91 92.Lskip_loop: 93 94 beq a2, a4, .Lnot_found 95 96 ld a3, (a2) 97 98 /* has_zero */ 99 not t2, a3 100 sub a3, a3, t0 101 and t2, t2, t1 102 and a3, a3, t2 103 104 105 addi a2, a2, 8 106 beqz a3, .Lnot_found 107 108.Lfind_zero: 109 110 /* move ptr back */ 111 addi a2, a2, -8 112 113 /* isolate lowest set bit */ 114 neg t0, a3 115 and a3, a3, t0 116 117 li t0, 0x0001020304050607 118 srli a3, a3, 7 119 120 /* lowest set bit is 2^(8*k) 121 * multiplying by it shifts the idx array in t0 by k bytes to the left */ 122 mul a3, a3, t0 123 124 /* highest byte contains idx of first zero */ 125 srli a3, a3, 56 126 127 /* zero_idx */ 128 sub a2, a2, a0 129 add a2, a2, a3 130 131 /* min(zero_idx, maxlen) */ 132 sub a2, a2, a1 133 srai t1, a2, 63 134 and a2, a2, t1 135 add a0, a1, a2 136 137 ret 138 139.Lnot_found: 140 mv a0, a1 141 ret 142 143END(strnlen) 144