1/*- 2 * Copyright (c) 2023 The FreeBSD Foundation 3 * 4 * This software was developed by Robert Clausecker <fuz@FreeBSD.org> 5 * under sponsorship from the FreeBSD Foundation. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE 27 */ 28 29#include <machine/asm.h> 30 31#include "amd64_archlevel.h" 32 33#define ALIGN_TEXT .p2align 4,0x90 # 16-byte alignment, nop-filled 34 35 .weak strchrnul 36 .set strchrnul, __strchrnul 37 38ARCHFUNCS(__strchrnul) 39 ARCHFUNC(__strchrnul, scalar) 40 ARCHFUNC(__strchrnul, baseline) 41ENDARCHFUNCS(__strchrnul) 42 43/* 44 * strchrnul(str, c) 45 * This is implemented like strlen(str), but we check for the 46 * presence of both NUL and c in each iteration. 47 */ 48ARCHENTRY(__strchrnul, scalar) 49 mov %edi, %ecx 50 and $~7, %rdi # align to 8 byte 51 movzbl %sil, %esi # clear stray high bits 52 movabs $0x0101010101010101, %r8 53 mov (%rdi), %rax # load first word 54 imul %r8, %rsi # replicate char 8 times 55 56 /* 57 * Unaligned input: align to 8 bytes. Then proceed the same 58 * way as with aligned input, but prevent matches before the 59 * beginning of the string. This is achieved by oring 0x01 60 * into each byte of the buffer before the string 61 */ 62 shl $3, %ecx 63 mov %r8, %r10 64 add $8, %rdi 65 shl %cl, %r10 # 0x01 where the string is 66 xor %r8, %r10 # 0x01 where it is not 67 neg %r8 # negate 01..01 so we can use lea 68 movabs $0x8080808080808080, %r9 69 70 mov %rsi, %rcx 71 xor %rax, %rcx # str ^ c 72 or %r10, %rax # str without NUL bytes before it 73 or %r10, %rcx # (str ^ c) without matches before it 74 lea (%rax, %r8, 1), %rdx # str - 0x01..01 75 lea (%rcx, %r8, 1), %r11 # (str ^ c) - 0x01..01 76 not %rax # ~str 77 not %rcx # ~(str ^ c) 78 and %rdx, %rax # (str - 0x01..01) & ~str 79 and %r11, %rcx # ((str ^ c - 0x01..01) & ~(str ^ c) 80 or %rcx, %rax # matches for both 81 and %r9, %rax # not including junk bytes 82 jnz 1f 83 84 /* main loop unrolled twice */ 85 ALIGN_TEXT 860: mov (%rdi), %rax # str 87 mov %rsi, %rcx 88 xor %rax, %rcx # str ^ c 89 lea (%rax, %r8, 1), %rdx # str - 0x01..01 90 lea (%rcx, %r8, 1), %r11 # (str ^ c) - 0x01..01 91 not %rax # ~str 92 not %rcx # ~(str ^ c) 93 and %rdx, %rax # (str - 0x01..01) & ~str 94 and %r11, %rcx # ((str ^ c - 0x01..01) & ~(str ^ c) 95 or %rcx, %rax # matches for both 96 and %r9, %rax # not including junk bits 97 jnz 2f 98 99 mov 8(%rdi), %rax # str 100 add $16, %rdi 101 mov %rsi, %rcx 102 xor %rax, %rcx # str ^ c 103 lea (%rax, %r8, 1), %rdx # str - 0x01..01 104 lea (%rcx, %r8, 1), %r11 # (str ^ c) - 0x01..01 105 not %rax # ~str 106 not %rcx # ~(str ^ c) 107 and %rdx, %rax # (str - 0x01..01) & ~str 108 and %r11, %rcx # ((str ^ c - 0x01..01) & ~(str ^ c) 109 or %rcx, %rax # matches for both 110 and %r9, %rax # not including junk bits 111 jz 0b 112 113 /* NUL or c found */ 1141: sub $8, %rdi # undo advance past buffer 1152: tzcnt %rax, %rax # first NUL or c byte match 116 shr $3, %eax # scale from bit to byte index 117 add %rdi, %rax # pointer to found c or NUL 118 ret 119ARCHEND(__strchrnul, scalar) 120 121ARCHENTRY(__strchrnul, baseline) 122 mov %edi, %ecx 123 and $~0xf, %rdi # align to 16 byte 124 movdqa (%rdi), %xmm1 125 movd %esi, %xmm0 126 and $0xf, %ecx # distance from (%rdi) to start of string 127 pxor %xmm2, %xmm2 128 mov $-1, %edx 129 punpcklbw %xmm0, %xmm0 # c -> cc 130 shl %cl, %edx # bits corresponding to bytes in the string 131 punpcklwd %xmm0, %xmm0 # cc -> cccc 132 add $16, %rdi 133 134 /* check for match in head */ 135 pcmpeqb %xmm1, %xmm2 # NUL bytes present? 136 pshufd $0, %xmm0, %xmm0 # cccc -> cccccccccccccccc 137 pcmpeqb %xmm0, %xmm1 # c present? 138 por %xmm2, %xmm1 # either present? 139 pmovmskb %xmm1, %eax 140 and %edx, %eax # match in the string? 141 jnz 1f 142 143 /* main loop unrolled twice */ 144 ALIGN_TEXT 1450: movdqa (%rdi), %xmm1 146 pxor %xmm2, %xmm2 147 pcmpeqb %xmm1, %xmm2 # NUL bytes present? 148 pcmpeqb %xmm0, %xmm1 # c present? 149 por %xmm2, %xmm1 # either present? 150 pmovmskb %xmm1, %eax 151 test %eax, %eax # match in the string? 152 jnz 2f 153 154 movdqa 16(%rdi), %xmm1 155 add $32, %rdi 156 pxor %xmm2, %xmm2 157 pcmpeqb %xmm1, %xmm2 # NUL bytes present? 158 pcmpeqb %xmm0, %xmm1 # c present? 159 por %xmm2, %xmm1 # either present? 160 pmovmskb %xmm1, %eax 161 test %eax, %eax # match in the string? 162 jz 0b 163 1641: sub $16, %rdi # undo advance past buffer 1652: tzcnt %eax, %eax # where is the match? 166 add %rdi, %rax # pointer to found c or NUL 167 ret 168ARCHEND(__strchrnul, baseline) 169 170 .section .note.GNU-stack,"",%progbits 171