1/* 2 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 3 * Use is subject to license terms. 4 */ 5 6/* 7 * Copyright (c) 2002 Advanced Micro Devices, Inc. 8 * 9 * All rights reserved. 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the 13 * following conditions are met: 14 * 15 * + Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the 17 * following disclaimer. 18 * 19 * + Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the 21 * following disclaimer in the documentation and/or other 22 * materials provided with the distribution. 23 * 24 * + Neither the name of Advanced Micro Devices, Inc. nor the 25 * names of its contributors may be used to endorse or 26 * promote products derived from this software without 27 * specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND 30 * CONTRIBUTORS AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, 31 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 32 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 33 * DISCLAIMED. IN NO EVENT SHALL ADVANCED MICRO DEVICES, 34 * INC. OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 35 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 36 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 37 * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 38 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 39 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 40 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 41 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 42 * POSSIBILITY OF SUCH DAMAGE. 43 * 44 * It is licensee's responsibility to comply with any export 45 * regulations applicable in licensee's jurisdiction. 46 */ 47 48 .file "strlen.s" 49 50#include "SYS.h" 51#include "cache.h" 52 53#define LABEL(s) .strlen/**/s 54 55 ENTRY(strlen) /* (const char *s) */ 56 57 mov %rdi, %rsi 58 neg %rdi 59 60LABEL(aligntry): 61 mov %rsi , %r8 62 and $7, %r8d 63 jz LABEL(alignafter) 64 65LABEL(align): /* 8-byte align */ 66 sub $8, %r8 67 68 .p2align 4 69 70LABEL(alignloop): 71 cmpb $0, (%rsi) 72 je LABEL(exit) 73 74 inc %rsi 75 inc %r8 76 jnz LABEL(alignloop) 77 78 .p2align 4 79 80LABEL(alignafter): 81 82LABEL(56try): 83 84LABEL(56): /* 56-byte */ 85 mov (%rsi), %rax 86 mov $0xfefefefefefefeff, %rcx 87 88LABEL(56loop): 89 mov %rcx, %r8 90 add %rax, %r8 91 jnc LABEL(tail) 92 93 xor %rax, %r8 94 or %rcx, %r8 95 inc %r8 96 jnz LABEL(tail) 97 98 mov 8 (%rsi), %rax 99 lea 8 (%rsi), %rsi 100 101 mov %rcx, %r8 102 add %rax, %r8 103 jnc LABEL(tail) 104 105 xor %rax, %r8 106 or %rcx, %r8 107 inc %r8 108 jnz LABEL(tail) 109 110 mov 8 (%rsi), %rax 111 lea 8 (%rsi), %rsi 112 113 mov %rcx, %r8 114 add %rax, %r8 115 jnc LABEL(tail) 116 117 xor %rax, %r8 118 or %rcx, %r8 119 inc %r8 120 jnz LABEL(tail) 121 122 mov 8 (%rsi), %rax 123 lea 8 (%rsi), %rsi 124 125 mov %rcx, %r8 126 add %rax, %r8 127 jnc LABEL(tail) 128 129 xor %rax, %r8 130 or %rcx, %r8 131 inc %r8 132 jnz LABEL(tail) 133 134 mov 8 (%rsi), %rax 135 lea 8 (%rsi), %rsi 136 137 mov %rcx, %r8 138 add %rax, %r8 139 jnc LABEL(tail) 140 141 xor %rax, %r8 142 or %rcx, %r8 143 inc %r8 144 jnz LABEL(tail) 145 146 mov 8 (%rsi), %rax 147 lea 8 (%rsi), %rsi 148 149 mov %rcx, %r8 150 add %rax, %r8 151 jnc LABEL(tail) 152 153 xor %rax, %r8 154 or %rcx, %r8 155 inc %r8 156 jnz LABEL(tail) 157 158 mov 8 (%rsi), %rax 159 lea 8 (%rsi), %rsi 160 161 mov %rcx, %r8 162 add %rax, %r8 163 jnc LABEL(tail) 164 165 xor %rax, %r8 166 or %rcx, %r8 167 inc %r8 168 jnz LABEL(tail) 169 170 mov 8 (%rsi), %rax 171 lea 8 (%rsi), %rsi 172 173LABEL(56after): 174 175LABEL(32): /* 32-byte */ 176 mov _sref_(.amd64cache1), %r9 177 178 .p2align 4 179 180LABEL(32loop): 181 mov %rcx, %r8 182 add %rax, %r8 183 sbb %rdx, %rdx 184 185 xor %rax, %r8 186 or %rcx, %r8 187 sub %rdx, %r8 188 jnz LABEL(tail) 189 190 mov 8 (%rsi), %rax 191 add $8, %rsi 192 193 mov %rcx, %r8 194 add %rax, %r8 195 sbb %rdx, %rdx 196 197 xor %rax, %r8 198 or %rcx, %r8 199 sub %rdx, %r8 200 jnz LABEL(tail) 201 202 mov 8 (%rsi), %rax 203 add $8, %rsi 204 205 mov %rcx, %r8 206 add %rax, %r8 207 sbb %rdx, %rdx 208 209 xor %rax, %r8 210 or %rcx, %r8 211 sub %rdx, %r8 212 jnz LABEL(tail) 213 214 mov 8 (%rsi), %rax 215 add $8, %rsi 216 217 mov %rcx, %r8 218 add %rax, %r8 219 sbb %rdx, %rdx 220 221 xor %rax, %r8 222 or %rcx, %r8 223 sub %rdx, %r8 224 jnz LABEL(tail) 225 226 mov 8 (%rsi), %rax 227 add $8, %rsi 228 229 mov %rcx, %r8 230 add %rax, %r8 231 sbb %rdx, %rdx 232 233 xor %rax, %r8 234 or %rcx, %r8 235 sub %rdx, %r8 236 jnz LABEL(tail) 237 238 mov 8 (%rsi), %rax 239 add $8, %rsi 240 241 mov %rcx, %r8 242 add %rax, %r8 243 sbb %rdx, %rdx 244 245 xor %rax, %r8 246 or %rcx, %r8 247 sub %rdx, %r8 248 jnz LABEL(tail) 249 250 mov 8 (%rsi), %rax 251 add $8, %rsi 252 253 mov %rcx, %r8 254 add %rax, %r8 255 sbb %rdx, %rdx 256 257 xor %rax, %r8 258 or %rcx, %r8 259 sub %rdx, %r8 260 jnz LABEL(tail) 261 262 mov 8 (%rsi), %rax 263 add $8, %rsi 264 265 mov %rcx, %r8 266 add %rax, %r8 267 sbb %rdx, %rdx 268 269 xor %rax, %r8 270 or %rcx, %r8 271 sub %rdx, %r8 272 jnz LABEL(tail) 273 274 sub $32, %r9 275 276 mov 8 (%rsi), %rax 277 lea 8 (%rsi), %rsi 278 279 jbe LABEL(32loop) 280 281LABEL(32after): 282 283LABEL(pretry): 284 285LABEL(pre): /* 64-byte prefetch */ 286 287 .p2align 4 288 289LABEL(preloop): 290 mov %rcx, %r8 291 add %rax, %r8 292 sbb %rdx, %rdx 293 294 xor %rax, %r8 295 or %rcx, %r8 296 sub %rdx, %r8 297 jnz LABEL(tail) 298 299 mov 8 (%rsi), %rax 300 add $8, %rsi 301 302 mov %rcx, %r8 303 add %rax, %r8 304 sbb %rdx, %rdx 305 306 xor %rax, %r8 307 or %rcx, %r8 308 sub %rdx, %r8 309 jnz LABEL(tail) 310 311 mov 8 (%rsi), %rax 312 add $8, %rsi 313 314 mov %rcx, %r8 315 add %rax, %r8 316 sbb %rdx, %rdx 317 318 xor %rax, %r8 319 or %rcx, %r8 320 sub %rdx, %r8 321 jnz LABEL(tail) 322 323 mov 8 (%rsi), %rax 324 add $8, %rsi 325 326 mov %rcx, %r8 327 add %rax, %r8 328 sbb %rdx, %rdx 329 330 xor %rax, %r8 331 or %rcx, %r8 332 sub %rdx, %r8 333 jnz LABEL(tail) 334 335 mov 8 (%rsi), %rax 336 add $8, %rsi 337 338 mov %rcx, %r8 339 add %rax, %r8 340 sbb %rdx, %rdx 341 342 xor %rax, %r8 343 or %rcx, %r8 344 sub %rdx, %r8 345 jnz LABEL(tail) 346 347 mov 8 (%rsi), %rax 348 add $8, %rsi 349 350 mov %rcx, %r8 351 add %rax, %r8 352 sbb %rdx, %rdx 353 354 xor %rax, %r8 355 or %rcx, %r8 356 sub %rdx, %r8 357 jnz LABEL(tail) 358 359 mov 8 (%rsi), %rax 360 add $8, %rsi 361 362 mov %rcx, %r8 363 add %rax, %r8 364 sbb %rdx, %rdx 365 366 xor %rax, %r8 367 or %rcx, %r8 368 sub %rdx, %r8 369 jnz LABEL(tail) 370 371 mov 8 (%rsi), %rax 372 add $8, %rsi 373 374 mov %rcx, %r8 375 add %rax, %r8 376 sbb %rdx, %rdx 377 378 xor %rax, %r8 379 or %rcx, %r8 380 sub %rdx, %r8 381 jnz LABEL(tail) 382 383 prefetchnta 512 (%rsi) /* 3DNow: use prefetch */ 384 385 mov 8 (%rsi), %rax 386 add $8, %rsi 387 388 jmp LABEL(preloop) 389 390 .p2align 4 391 392LABEL(preafter): 393 394LABEL(tailtry): 395 396LABEL(tail): /* 4-byte tail */ 397 398LABEL(tailloop): 399 test %al, %al 400 jz LABEL(exit) 401 402 inc %rsi 403 404 test %ah, %ah 405 jz LABEL(exit) 406 407 inc %rsi 408 409 test $0x00ff0000, %eax 410 jz LABEL(exit) 411 412 inc %rsi 413 414 test $0xff000000, %eax 415 jz LABEL(exit) 416 417 inc %rsi 418 419 shr $32, %rax 420 jmp LABEL(tailloop) 421 422LABEL(tailafter): 423 424 .p2align 4 425 426LABEL(exit): 427 lea (%rdi, %rsi), %rax 428 ret 429 430 SET_SIZE(strlen) 431