1/* 2 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 3 * Use is subject to license terms. 4 */ 5 6/* 7 * Copyright (c) 2002 Advanced Micro Devices, Inc. 8 * 9 * All rights reserved. 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the 13 * following conditions are met: 14 * 15 * + Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the 17 * following disclaimer. 18 * 19 * + Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the 21 * following disclaimer in the documentation and/or other 22 * materials provided with the distribution. 23 * 24 * + Neither the name of Advanced Micro Devices, Inc. nor the 25 * names of its contributors may be used to endorse or 26 * promote products derived from this software without 27 * specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND 30 * CONTRIBUTORS AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, 31 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 32 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 33 * DISCLAIMED. IN NO EVENT SHALL ADVANCED MICRO DEVICES, 34 * INC. OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 35 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 36 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 37 * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 38 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 39 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 40 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 41 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 42 * POSSIBILITY OF SUCH DAMAGE. 43 * 44 * It is licensee's responsibility to comply with any export 45 * regulations applicable in licensee's jurisdiction. 46 */ 47 48 .ident "%Z%%M% %I% %E% SMI" 49 50 .file "%M%" 51 52#include "SYS.h" 53#include "cache.h" 54 55#define LABEL(s) .strlen/**/s 56 57 ENTRY(strlen) /* (const char *s) */ 58 59 mov %rdi, %rsi 60 neg %rdi 61 62LABEL(aligntry): 63 mov %rsi , %r8 64 and $7, %r8d 65 jz LABEL(alignafter) 66 67LABEL(align): /* 8-byte align */ 68 sub $8, %r8 69 70 .p2align 4 71 72LABEL(alignloop): 73 cmpb $0, (%rsi) 74 je LABEL(exit) 75 76 inc %rsi 77 inc %r8 78 jnz LABEL(alignloop) 79 80 .p2align 4 81 82LABEL(alignafter): 83 84LABEL(56try): 85 86LABEL(56): /* 56-byte */ 87 mov (%rsi), %rax 88 mov $0xfefefefefefefeff, %rcx 89 90LABEL(56loop): 91 mov %rcx, %r8 92 add %rax, %r8 93 jnc LABEL(tail) 94 95 xor %rax, %r8 96 or %rcx, %r8 97 inc %r8 98 jnz LABEL(tail) 99 100 mov 8 (%rsi), %rax 101 lea 8 (%rsi), %rsi 102 103 mov %rcx, %r8 104 add %rax, %r8 105 jnc LABEL(tail) 106 107 xor %rax, %r8 108 or %rcx, %r8 109 inc %r8 110 jnz LABEL(tail) 111 112 mov 8 (%rsi), %rax 113 lea 8 (%rsi), %rsi 114 115 mov %rcx, %r8 116 add %rax, %r8 117 jnc LABEL(tail) 118 119 xor %rax, %r8 120 or %rcx, %r8 121 inc %r8 122 jnz LABEL(tail) 123 124 mov 8 (%rsi), %rax 125 lea 8 (%rsi), %rsi 126 127 mov %rcx, %r8 128 add %rax, %r8 129 jnc LABEL(tail) 130 131 xor %rax, %r8 132 or %rcx, %r8 133 inc %r8 134 jnz LABEL(tail) 135 136 mov 8 (%rsi), %rax 137 lea 8 (%rsi), %rsi 138 139 mov %rcx, %r8 140 add %rax, %r8 141 jnc LABEL(tail) 142 143 xor %rax, %r8 144 or %rcx, %r8 145 inc %r8 146 jnz LABEL(tail) 147 148 mov 8 (%rsi), %rax 149 lea 8 (%rsi), %rsi 150 151 mov %rcx, %r8 152 add %rax, %r8 153 jnc LABEL(tail) 154 155 xor %rax, %r8 156 or %rcx, %r8 157 inc %r8 158 jnz LABEL(tail) 159 160 mov 8 (%rsi), %rax 161 lea 8 (%rsi), %rsi 162 163 mov %rcx, %r8 164 add %rax, %r8 165 jnc LABEL(tail) 166 167 xor %rax, %r8 168 or %rcx, %r8 169 inc %r8 170 jnz LABEL(tail) 171 172 mov 8 (%rsi), %rax 173 lea 8 (%rsi), %rsi 174 175LABEL(56after): 176 177LABEL(32): /* 32-byte */ 178 mov _sref_(.amd64cache1), %r9 179 180 .p2align 4 181 182LABEL(32loop): 183 mov %rcx, %r8 184 add %rax, %r8 185 sbb %rdx, %rdx 186 187 xor %rax, %r8 188 or %rcx, %r8 189 sub %rdx, %r8 190 jnz LABEL(tail) 191 192 mov 8 (%rsi), %rax 193 add $8, %rsi 194 195 mov %rcx, %r8 196 add %rax, %r8 197 sbb %rdx, %rdx 198 199 xor %rax, %r8 200 or %rcx, %r8 201 sub %rdx, %r8 202 jnz LABEL(tail) 203 204 mov 8 (%rsi), %rax 205 add $8, %rsi 206 207 mov %rcx, %r8 208 add %rax, %r8 209 sbb %rdx, %rdx 210 211 xor %rax, %r8 212 or %rcx, %r8 213 sub %rdx, %r8 214 jnz LABEL(tail) 215 216 mov 8 (%rsi), %rax 217 add $8, %rsi 218 219 mov %rcx, %r8 220 add %rax, %r8 221 sbb %rdx, %rdx 222 223 xor %rax, %r8 224 or %rcx, %r8 225 sub %rdx, %r8 226 jnz LABEL(tail) 227 228 mov 8 (%rsi), %rax 229 add $8, %rsi 230 231 mov %rcx, %r8 232 add %rax, %r8 233 sbb %rdx, %rdx 234 235 xor %rax, %r8 236 or %rcx, %r8 237 sub %rdx, %r8 238 jnz LABEL(tail) 239 240 mov 8 (%rsi), %rax 241 add $8, %rsi 242 243 mov %rcx, %r8 244 add %rax, %r8 245 sbb %rdx, %rdx 246 247 xor %rax, %r8 248 or %rcx, %r8 249 sub %rdx, %r8 250 jnz LABEL(tail) 251 252 mov 8 (%rsi), %rax 253 add $8, %rsi 254 255 mov %rcx, %r8 256 add %rax, %r8 257 sbb %rdx, %rdx 258 259 xor %rax, %r8 260 or %rcx, %r8 261 sub %rdx, %r8 262 jnz LABEL(tail) 263 264 mov 8 (%rsi), %rax 265 add $8, %rsi 266 267 mov %rcx, %r8 268 add %rax, %r8 269 sbb %rdx, %rdx 270 271 xor %rax, %r8 272 or %rcx, %r8 273 sub %rdx, %r8 274 jnz LABEL(tail) 275 276 sub $32, %r9 277 278 mov 8 (%rsi), %rax 279 lea 8 (%rsi), %rsi 280 281 jbe LABEL(32loop) 282 283LABEL(32after): 284 285LABEL(pretry): 286 287LABEL(pre): /* 64-byte prefetch */ 288 289 .p2align 4 290 291LABEL(preloop): 292 mov %rcx, %r8 293 add %rax, %r8 294 sbb %rdx, %rdx 295 296 xor %rax, %r8 297 or %rcx, %r8 298 sub %rdx, %r8 299 jnz LABEL(tail) 300 301 mov 8 (%rsi), %rax 302 add $8, %rsi 303 304 mov %rcx, %r8 305 add %rax, %r8 306 sbb %rdx, %rdx 307 308 xor %rax, %r8 309 or %rcx, %r8 310 sub %rdx, %r8 311 jnz LABEL(tail) 312 313 mov 8 (%rsi), %rax 314 add $8, %rsi 315 316 mov %rcx, %r8 317 add %rax, %r8 318 sbb %rdx, %rdx 319 320 xor %rax, %r8 321 or %rcx, %r8 322 sub %rdx, %r8 323 jnz LABEL(tail) 324 325 mov 8 (%rsi), %rax 326 add $8, %rsi 327 328 mov %rcx, %r8 329 add %rax, %r8 330 sbb %rdx, %rdx 331 332 xor %rax, %r8 333 or %rcx, %r8 334 sub %rdx, %r8 335 jnz LABEL(tail) 336 337 mov 8 (%rsi), %rax 338 add $8, %rsi 339 340 mov %rcx, %r8 341 add %rax, %r8 342 sbb %rdx, %rdx 343 344 xor %rax, %r8 345 or %rcx, %r8 346 sub %rdx, %r8 347 jnz LABEL(tail) 348 349 mov 8 (%rsi), %rax 350 add $8, %rsi 351 352 mov %rcx, %r8 353 add %rax, %r8 354 sbb %rdx, %rdx 355 356 xor %rax, %r8 357 or %rcx, %r8 358 sub %rdx, %r8 359 jnz LABEL(tail) 360 361 mov 8 (%rsi), %rax 362 add $8, %rsi 363 364 mov %rcx, %r8 365 add %rax, %r8 366 sbb %rdx, %rdx 367 368 xor %rax, %r8 369 or %rcx, %r8 370 sub %rdx, %r8 371 jnz LABEL(tail) 372 373 mov 8 (%rsi), %rax 374 add $8, %rsi 375 376 mov %rcx, %r8 377 add %rax, %r8 378 sbb %rdx, %rdx 379 380 xor %rax, %r8 381 or %rcx, %r8 382 sub %rdx, %r8 383 jnz LABEL(tail) 384 385 prefetchnta 512 (%rsi) /* 3DNow: use prefetch */ 386 387 mov 8 (%rsi), %rax 388 add $8, %rsi 389 390 jmp LABEL(preloop) 391 392 .p2align 4 393 394LABEL(preafter): 395 396LABEL(tailtry): 397 398LABEL(tail): /* 4-byte tail */ 399 400LABEL(tailloop): 401 test %al, %al 402 jz LABEL(exit) 403 404 inc %rsi 405 406 test %ah, %ah 407 jz LABEL(exit) 408 409 inc %rsi 410 411 test $0x00ff0000, %eax 412 jz LABEL(exit) 413 414 inc %rsi 415 416 test $0xff000000, %eax 417 jz LABEL(exit) 418 419 inc %rsi 420 421 shr $32, %rax 422 jmp LABEL(tailloop) 423 424LABEL(tailafter): 425 426 .p2align 4 427 428LABEL(exit): 429 lea (%rdi, %rsi), %rax 430 ret 431 432 SET_SIZE(strlen) 433