1/*- 2 * Written by Mateusz Guzik <mjg@freebsd.org> 3 * Copyright (c) 2023 The FreeBSD Foundation 4 * 5 * Portions of this software were developed by Robert Clausecker 6 * <fuz@FreeBSD.org> under sponsorship from the FreeBSD Foundation. 7 * 8 * Public domain. 9 */ 10 11#include <machine/asm.h> 12#include "amd64_archlevel.h" 13 14/* 15 * Note: this routine was written with kernel use in mind (read: no simd), 16 * it is only present in userspace as a temporary measure until something 17 * better gets imported. 18 */ 19 20#define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */ 21 22ARCHFUNCS(strlen) 23 ARCHFUNC(strlen, scalar) 24 ARCHFUNC(strlen, baseline) 25ENDARCHFUNCS(strlen) 26 27/* 28 * strlen(string) 29 * %rdi 30 * 31 * Uses the ((x - 0x01....01) & ~x & 0x80....80) trick. 32 * 33 * 0x01....01 is replaced with 0x0 - 0x01....01 so that it can be added 34 * with leaq. 35 * 36 * For a description see either: 37 * - "Hacker's Delight" by Henry S. Warren, Jr. 38 * - "Optimizing subroutines in assembly language: An optimization guide for x86 platforms" 39 * by Agner Fog 40 * 41 * The latter contains a 32-bit variant of the same algorithm coded in assembly for i386. 42 */ 43ARCHENTRY(strlen, scalar) 44 movabsq $0xfefefefefefefeff,%r8 45 movabsq $0x8080808080808080,%r9 46 47 movq %rdi,%r10 48 movq %rdi,%rcx 49 testb $7,%dil 50 jz 2f 51 52 /* 53 * Handle misaligned reads: align to 8 and fill 54 * the spurious bytes. 55 */ 56 andq $~7,%rdi 57 movq (%rdi),%r11 58 shlq $3,%rcx 59 movq $-1,%rdx 60 shlq %cl,%rdx 61 notq %rdx 62 orq %rdx,%r11 63 64 leaq (%r11,%r8),%rcx 65 notq %r11 66 andq %r11,%rcx 67 andq %r9,%rcx 68 jnz 3f 69 70 /* 71 * Main loop. 72 */ 73 ALIGN_TEXT 741: 75 leaq 8(%rdi),%rdi 762: 77 movq (%rdi),%r11 78 leaq (%r11,%r8),%rcx 79 notq %r11 80 andq %r11,%rcx 81 andq %r9,%rcx 82 jz 1b 833: 84 bsfq %rcx,%rcx 85 shrq $3,%rcx 86 leaq (%rcx,%rdi),%rax 87 subq %r10,%rax 88 ret 89ARCHEND(strlen, scalar) 90 91ARCHENTRY(strlen, baseline) 92 mov %rdi, %rcx 93 pxor %xmm1, %xmm1 94 and $~0xf, %rdi # align string 95 pcmpeqb (%rdi), %xmm1 # compare head (with junk before string) 96 mov %rcx, %rsi # string pointer copy for later 97 and $0xf, %ecx # amount of bytes rdi is past 16 byte alignment 98 pmovmskb %xmm1, %eax 99 add $32, %rdi # advance to next iteration 100 shr %cl, %eax # clear out matches in junk bytes 101 test %eax, %eax # any match? (can't use ZF from SHR as CL=0 is possible) 102 jnz 2f 103 104 ALIGN_TEXT 1051: pxor %xmm1, %xmm1 106 pcmpeqb -16(%rdi), %xmm1 # find NUL bytes 107 pmovmskb %xmm1, %eax 108 test %eax, %eax # were any NUL bytes present? 109 jnz 3f 110 111 /* the same unrolled once more */ 112 pxor %xmm1, %xmm1 113 pcmpeqb (%rdi), %xmm1 114 pmovmskb %xmm1, %eax 115 add $32, %rdi # advance to next iteration 116 test %eax, %eax 117 jz 1b 118 119 /* match found in loop body */ 120 sub $16, %rdi # undo half the advancement 1213: tzcnt %eax, %eax # find the first NUL byte 122 sub %rsi, %rdi # string length until beginning of (%rdi) 123 lea -16(%rdi, %rax, 1), %rax # that plus loc. of NUL byte: full string length 124 ret 125 126 /* match found in head */ 1272: tzcnt %eax, %eax # compute string length 128 ret 129ARCHEND(strlen, baseline) 130 131 .section .note.GNU-stack,"",%progbits 132