1/*- 2 * Written by Mateusz Guzik <mjg@freebsd.org> 3 * Copyright (c) 2023 The FreeBSD Foundation 4 * 5 * Portions of this software were developed by Robert Clausecker 6 * <fuz@FreeBSD.org> under sponsorship from the FreeBSD Foundation. 7 * 8 * Public domain. 9 */ 10 11#include <machine/asm.h> 12__FBSDID("$FreeBSD$"); 13 14#include "amd64_archlevel.h" 15 16/* 17 * Note: this routine was written with kernel use in mind (read: no simd), 18 * it is only present in userspace as a temporary measure until something 19 * better gets imported. 20 */ 21 22#define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */ 23 24ARCHFUNCS(strlen) 25 ARCHFUNC(strlen, scalar) 26 ARCHFUNC(strlen, baseline) 27ENDARCHFUNCS(strlen) 28 29/* 30 * strlen(string) 31 * %rdi 32 * 33 * Uses the ((x - 0x01....01) & ~x & 0x80....80) trick. 34 * 35 * 0x01....01 is replaced with 0x0 - 0x01....01 so that it can be added 36 * with leaq. 37 * 38 * For a description see either: 39 * - "Hacker's Delight" by Henry S. Warren, Jr. 40 * - "Optimizing subroutines in assembly language: An optimization guide for x86 platforms" 41 * by Agner Fog 42 * 43 * The latter contains a 32-bit variant of the same algorithm coded in assembly for i386. 44 */ 45ARCHENTRY(strlen, scalar) 46 movabsq $0xfefefefefefefeff,%r8 47 movabsq $0x8080808080808080,%r9 48 49 movq %rdi,%r10 50 movq %rdi,%rcx 51 testb $7,%dil 52 jz 2f 53 54 /* 55 * Handle misaligned reads: align to 8 and fill 56 * the spurious bytes. 57 */ 58 andq $~7,%rdi 59 movq (%rdi),%r11 60 shlq $3,%rcx 61 movq $-1,%rdx 62 shlq %cl,%rdx 63 notq %rdx 64 orq %rdx,%r11 65 66 leaq (%r11,%r8),%rcx 67 notq %r11 68 andq %r11,%rcx 69 andq %r9,%rcx 70 jnz 3f 71 72 /* 73 * Main loop. 74 */ 75 ALIGN_TEXT 761: 77 leaq 8(%rdi),%rdi 782: 79 movq (%rdi),%r11 80 leaq (%r11,%r8),%rcx 81 notq %r11 82 andq %r11,%rcx 83 andq %r9,%rcx 84 jz 1b 853: 86 bsfq %rcx,%rcx 87 shrq $3,%rcx 88 leaq (%rcx,%rdi),%rax 89 subq %r10,%rax 90 ret 91ARCHEND(strlen, scalar) 92 93ARCHENTRY(strlen, baseline) 94 mov %rdi, %rcx 95 pxor %xmm1, %xmm1 96 and $~0xf, %rdi # align string 97 pcmpeqb (%rdi), %xmm1 # compare head (with junk before string) 98 mov %rcx, %rsi # string pointer copy for later 99 and $0xf, %ecx # amount of bytes rdi is past 16 byte alignment 100 pmovmskb %xmm1, %eax 101 add $32, %rdi # advance to next iteration 102 shr %cl, %eax # clear out matches in junk bytes 103 test %eax, %eax # any match? (can't use ZF from SHR as CL=0 is possible) 104 jnz 2f 105 106 ALIGN_TEXT 1071: pxor %xmm1, %xmm1 108 pcmpeqb -16(%rdi), %xmm1 # find NUL bytes 109 pmovmskb %xmm1, %eax 110 test %eax, %eax # were any NUL bytes present? 111 jnz 3f 112 113 /* the same unrolled once more */ 114 pxor %xmm1, %xmm1 115 pcmpeqb (%rdi), %xmm1 116 pmovmskb %xmm1, %eax 117 add $32, %rdi # advance to next iteration 118 test %eax, %eax 119 jz 1b 120 121 /* match found in loop body */ 122 sub $16, %rdi # undo half the advancement 1233: tzcnt %eax, %eax # find the first NUL byte 124 sub %rsi, %rdi # string length until beginning of (%rdi) 125 lea -16(%rdi, %rax, 1), %rax # that plus loc. of NUL byte: full string length 126 ret 127 128 /* match found in head */ 1292: tzcnt %eax, %eax # compute string length 130 ret 131ARCHEND(strlen, baseline) 132 133 .section .note.GNU-stack,"",%progbits 134