1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22/* 23 * Copyright (c) 2009, Intel Corporation 24 * All rights reserved. 25 */ 26 27/* 28 * strlen - calculate the length of string 29 */ 30 31#include "SYS.h" 32#include "proc64_id.h" 33 34#define LABEL(s) .strlen##s 35 36 /* 37 * This implementation uses SSE instructions to compare up to 16 bytes 38 * at a time looking for the end of string (null char). 39 */ 40 ENTRY(strlen) /* (const char *s) */ 41 mov %rdi, %rsi /* keep original %rdi value */ 42 mov %rsi, %rcx 43 pxor %xmm0, %xmm0 /* 16 null chars */ 44 and $15, %rcx 45 jz LABEL(align16_loop) /* string is 16 byte aligned */ 46 47 /* 48 * Unaligned case. Round down to 16-byte boundary before comparing 49 * 16 bytes for a null char. The code then compensates for any extra chars 50 * preceding the start of the string. 51 */ 52LABEL(unalign16): 53 and $0xfffffffffffffff0, %rsi 54 55 pcmpeqb (%rsi), %xmm0 56 lea 16(%rdi), %rsi 57 pmovmskb %xmm0, %edx 58 59 shr %cl, %edx /* Compensate for bytes preceding the string */ 60 test %edx, %edx 61 jnz LABEL(exit) 62 sub %rcx, %rsi /* no null, adjust to next 16-byte boundary */ 63 pxor %xmm0, %xmm0 /* clear xmm0, may have been changed... */ 64 65 .p2align 4 66LABEL(align16_loop): /* 16 byte aligned */ 67 pcmpeqb (%rsi), %xmm0 /* look for null bytes */ 68 pmovmskb %xmm0, %edx /* move each byte mask of %xmm0 to edx */ 69 70 add $16, %rsi /* prepare to search next 16 bytes */ 71 test %edx, %edx /* if no null byte, %edx must be 0 */ 72 jnz LABEL(exit) /* found a null */ 73 74 pcmpeqb (%rsi), %xmm0 75 pmovmskb %xmm0, %edx 76 add $16, %rsi 77 test %edx, %edx 78 jnz LABEL(exit) 79 80 pcmpeqb (%rsi), %xmm0 81 pmovmskb %xmm0, %edx 82 add $16, %rsi 83 test %edx, %edx 84 jnz LABEL(exit) 85 86 pcmpeqb (%rsi), %xmm0 87 pmovmskb %xmm0, %edx 88 add $16, %rsi 89 test %edx, %edx 90 jz LABEL(align16_loop) 91 92 .p2align 4 93LABEL(exit): 94 neg %rdi 95 /* 96 * Check to see if BSF is fast on this processor. If not, use a different 97 * exit tail to find first bit set indicating null byte match. 98 */ 99 testl $USE_BSF, .memops_method(%rip) 100 jz LABEL(AMD_exit) 101 102 lea -16(%rdi, %rsi), %rax /* calculate exact offset */ 103 bsf %edx, %ecx /* Least significant 1 bit is index of null */ 104 lea (%rax, %rcx),%rax 105 ret 106 107 /* 108 * This exit tail does not use the bsf instruction. 109 */ 110 .p2align 4 111LABEL(AMD_exit): 112 lea -16(%rdi, %rsi), %rax 113 test %dl, %dl 114 jz LABEL(exit_high) 115 test $0x01, %dl 116 jnz LABEL(exit_tail0) 117 118 test $0x02, %dl 119 jnz LABEL(exit_tail1) 120 121 .p2align 4 122 test $0x04, %dl 123 jnz LABEL(exit_tail2) 124 125 test $0x08, %dl 126 jnz LABEL(exit_tail3) 127 128 test $0x10, %dl 129 jnz LABEL(exit_tail4) 130 131 test $0x20, %dl 132 jnz LABEL(exit_tail5) 133 134 test $0x40, %dl 135 jnz LABEL(exit_tail6) 136 add $7, %rax 137 ret 138 139 .p2align 4 140LABEL(exit_high): 141 add $8, %rax 142 test $0x01, %dh 143 jnz LABEL(exit_tail0) 144 145 test $0x02, %dh 146 jnz LABEL(exit_tail1) 147 148 test $0x04, %dh 149 jnz LABEL(exit_tail2) 150 151 test $0x08, %dh 152 jnz LABEL(exit_tail3) 153 154 test $0x10, %dh 155 jnz LABEL(exit_tail4) 156 157 test $0x20, %dh 158 jnz LABEL(exit_tail5) 159 160 test $0x40, %dh 161 jnz LABEL(exit_tail6) 162 add $7, %rax 163 ret 164 165 .p2align 4 166LABEL(exit_tail0): 167 xor %ecx, %ecx 168 ret 169 170 .p2align 4 171LABEL(exit_tail1): 172 add $1, %rax 173 ret 174 175 .p2align 4 176LABEL(exit_tail2): 177 add $2, %rax 178 ret 179 180 .p2align 4 181LABEL(exit_tail3): 182 add $3, %rax 183 ret 184 185 .p2align 4 186LABEL(exit_tail4): 187 add $4, %rax 188 ret 189 190 .p2align 4 191LABEL(exit_tail5): 192 add $5, %rax 193 ret 194 195 .p2align 4 196LABEL(exit_tail6): 197 add $6, %rax 198 ret 199 SET_SIZE(strlen) 200