1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22/* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27/* 28 * Copyright (c) 2002 Advanced Micro Devices, Inc. 29 * 30 * All rights reserved. 31 * 32 * Redistribution and use in source and binary forms, with or 33 * without modification, are permitted provided that the 34 * following conditions are met: 35 * 36 * + Redistributions of source code must retain the above 37 * copyright notice, this list of conditions and the 38 * following disclaimer. 39 * 40 * + Redistributions in binary form must reproduce the above 41 * copyright notice, this list of conditions and the 42 * following disclaimer in the documentation and/or other 43 * materials provided with the distribution. 44 * 45 * + Neither the name of Advanced Micro Devices, Inc. nor the 46 * names of its contributors may be used to endorse or 47 * promote products derived from this software without 48 * specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND 51 * CONTRIBUTORS AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, 52 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 53 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 54 * DISCLAIMED. IN NO EVENT SHALL ADVANCED MICRO DEVICES, 55 * INC. OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 56 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 57 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 58 * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 59 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 60 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 61 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 62 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 63 * POSSIBILITY OF SUCH DAMAGE. 64 * 65 * It is licensee's responsibility to comply with any export 66 * regulations applicable in licensee's jurisdiction. 67 */ 68 69 .ident "%Z%%M% %I% %E% SMI" 70 71 .file "%M%" 72 73#include "SYS.h" 74#include "cache.h" 75 76#define LABEL(s) .strlen/**/s 77 78 ENTRY(strlen) /* (const char *s) */ 79 80 mov %rdi, %rsi 81 neg %rdi 82 83LABEL(aligntry): 84 mov %rsi , %r8 85 and $7, %r8d 86 jz LABEL(alignafter) 87 88LABEL(align): /* 8-byte align */ 89 sub $8, %r8 90 91 .p2align 4 92 93LABEL(alignloop): 94 cmpb $0, (%rsi) 95 je LABEL(exit) 96 97 inc %rsi 98 inc %r8 99 jnz LABEL(alignloop) 100 101 .p2align 4 102 103LABEL(alignafter): 104 105LABEL(56try): 106 107LABEL(56): /* 56-byte */ 108 mov (%rsi), %rax 109 mov $0xfefefefefefefeff, %rcx 110 111LABEL(56loop): 112 mov %rcx, %r8 113 add %rax, %r8 114 jnc LABEL(tail) 115 116 xor %rax, %r8 117 or %rcx, %r8 118 inc %r8 119 jnz LABEL(tail) 120 121 mov 8 (%rsi), %rax 122 lea 8 (%rsi), %rsi 123 124 mov %rcx, %r8 125 add %rax, %r8 126 jnc LABEL(tail) 127 128 xor %rax, %r8 129 or %rcx, %r8 130 inc %r8 131 jnz LABEL(tail) 132 133 mov 8 (%rsi), %rax 134 lea 8 (%rsi), %rsi 135 136 mov %rcx, %r8 137 add %rax, %r8 138 jnc LABEL(tail) 139 140 xor %rax, %r8 141 or %rcx, %r8 142 inc %r8 143 jnz LABEL(tail) 144 145 mov 8 (%rsi), %rax 146 lea 8 (%rsi), %rsi 147 148 mov %rcx, %r8 149 add %rax, %r8 150 jnc LABEL(tail) 151 152 xor %rax, %r8 153 or %rcx, %r8 154 inc %r8 155 jnz LABEL(tail) 156 157 mov 8 (%rsi), %rax 158 lea 8 (%rsi), %rsi 159 160 mov %rcx, %r8 161 add %rax, %r8 162 jnc LABEL(tail) 163 164 xor %rax, %r8 165 or %rcx, %r8 166 inc %r8 167 jnz LABEL(tail) 168 169 mov 8 (%rsi), %rax 170 lea 8 (%rsi), %rsi 171 172 mov %rcx, %r8 173 add %rax, %r8 174 jnc LABEL(tail) 175 176 xor %rax, %r8 177 or %rcx, %r8 178 inc %r8 179 jnz LABEL(tail) 180 181 mov 8 (%rsi), %rax 182 lea 8 (%rsi), %rsi 183 184 mov %rcx, %r8 185 add %rax, %r8 186 jnc LABEL(tail) 187 188 xor %rax, %r8 189 or %rcx, %r8 190 inc %r8 191 jnz LABEL(tail) 192 193 mov 8 (%rsi), %rax 194 lea 8 (%rsi), %rsi 195 196LABEL(56after): 197 198LABEL(32): /* 32-byte */ 199 mov _sref_(.amd64cache1), %r9 200 201 .p2align 4 202 203LABEL(32loop): 204 mov %rcx, %r8 205 add %rax, %r8 206 sbb %rdx, %rdx 207 208 xor %rax, %r8 209 or %rcx, %r8 210 sub %rdx, %r8 211 jnz LABEL(tail) 212 213 mov 8 (%rsi), %rax 214 add $8, %rsi 215 216 mov %rcx, %r8 217 add %rax, %r8 218 sbb %rdx, %rdx 219 220 xor %rax, %r8 221 or %rcx, %r8 222 sub %rdx, %r8 223 jnz LABEL(tail) 224 225 mov 8 (%rsi), %rax 226 add $8, %rsi 227 228 mov %rcx, %r8 229 add %rax, %r8 230 sbb %rdx, %rdx 231 232 xor %rax, %r8 233 or %rcx, %r8 234 sub %rdx, %r8 235 jnz LABEL(tail) 236 237 mov 8 (%rsi), %rax 238 add $8, %rsi 239 240 mov %rcx, %r8 241 add %rax, %r8 242 sbb %rdx, %rdx 243 244 xor %rax, %r8 245 or %rcx, %r8 246 sub %rdx, %r8 247 jnz LABEL(tail) 248 249 mov 8 (%rsi), %rax 250 add $8, %rsi 251 252 mov %rcx, %r8 253 add %rax, %r8 254 sbb %rdx, %rdx 255 256 xor %rax, %r8 257 or %rcx, %r8 258 sub %rdx, %r8 259 jnz LABEL(tail) 260 261 mov 8 (%rsi), %rax 262 add $8, %rsi 263 264 mov %rcx, %r8 265 add %rax, %r8 266 sbb %rdx, %rdx 267 268 xor %rax, %r8 269 or %rcx, %r8 270 sub %rdx, %r8 271 jnz LABEL(tail) 272 273 mov 8 (%rsi), %rax 274 add $8, %rsi 275 276 mov %rcx, %r8 277 add %rax, %r8 278 sbb %rdx, %rdx 279 280 xor %rax, %r8 281 or %rcx, %r8 282 sub %rdx, %r8 283 jnz LABEL(tail) 284 285 mov 8 (%rsi), %rax 286 add $8, %rsi 287 288 mov %rcx, %r8 289 add %rax, %r8 290 sbb %rdx, %rdx 291 292 xor %rax, %r8 293 or %rcx, %r8 294 sub %rdx, %r8 295 jnz LABEL(tail) 296 297 sub $32, %r9 298 299 mov 8 (%rsi), %rax 300 lea 8 (%rsi), %rsi 301 302 jbe LABEL(32loop) 303 304LABEL(32after): 305 306LABEL(pretry): 307 308LABEL(pre): /* 64-byte prefetch */ 309 310 .p2align 4 311 312LABEL(preloop): 313 mov %rcx, %r8 314 add %rax, %r8 315 sbb %rdx, %rdx 316 317 xor %rax, %r8 318 or %rcx, %r8 319 sub %rdx, %r8 320 jnz LABEL(tail) 321 322 mov 8 (%rsi), %rax 323 add $8, %rsi 324 325 mov %rcx, %r8 326 add %rax, %r8 327 sbb %rdx, %rdx 328 329 xor %rax, %r8 330 or %rcx, %r8 331 sub %rdx, %r8 332 jnz LABEL(tail) 333 334 mov 8 (%rsi), %rax 335 add $8, %rsi 336 337 mov %rcx, %r8 338 add %rax, %r8 339 sbb %rdx, %rdx 340 341 xor %rax, %r8 342 or %rcx, %r8 343 sub %rdx, %r8 344 jnz LABEL(tail) 345 346 mov 8 (%rsi), %rax 347 add $8, %rsi 348 349 mov %rcx, %r8 350 add %rax, %r8 351 sbb %rdx, %rdx 352 353 xor %rax, %r8 354 or %rcx, %r8 355 sub %rdx, %r8 356 jnz LABEL(tail) 357 358 mov 8 (%rsi), %rax 359 add $8, %rsi 360 361 mov %rcx, %r8 362 add %rax, %r8 363 sbb %rdx, %rdx 364 365 xor %rax, %r8 366 or %rcx, %r8 367 sub %rdx, %r8 368 jnz LABEL(tail) 369 370 mov 8 (%rsi), %rax 371 add $8, %rsi 372 373 mov %rcx, %r8 374 add %rax, %r8 375 sbb %rdx, %rdx 376 377 xor %rax, %r8 378 or %rcx, %r8 379 sub %rdx, %r8 380 jnz LABEL(tail) 381 382 mov 8 (%rsi), %rax 383 add $8, %rsi 384 385 mov %rcx, %r8 386 add %rax, %r8 387 sbb %rdx, %rdx 388 389 xor %rax, %r8 390 or %rcx, %r8 391 sub %rdx, %r8 392 jnz LABEL(tail) 393 394 mov 8 (%rsi), %rax 395 add $8, %rsi 396 397 mov %rcx, %r8 398 add %rax, %r8 399 sbb %rdx, %rdx 400 401 xor %rax, %r8 402 or %rcx, %r8 403 sub %rdx, %r8 404 jnz LABEL(tail) 405 406 prefetchnta 512 (%rsi) /* 3DNow: use prefetch */ 407 408 mov 8 (%rsi), %rax 409 add $8, %rsi 410 411 jmp LABEL(preloop) 412 413 .p2align 4 414 415LABEL(preafter): 416 417LABEL(tailtry): 418 419LABEL(tail): /* 4-byte tail */ 420 421LABEL(tailloop): 422 test %al, %al 423 jz LABEL(exit) 424 425 inc %rsi 426 427 test %ah, %ah 428 jz LABEL(exit) 429 430 inc %rsi 431 432 test $0x00ff0000, %eax 433 jz LABEL(exit) 434 435 inc %rsi 436 437 test $0xff000000, %eax 438 jz LABEL(exit) 439 440 inc %rsi 441 442 shr $32, %rax 443 jmp LABEL(tailloop) 444 445LABEL(tailafter): 446 447 .p2align 4 448 449LABEL(exit): 450 lea (%rdi, %rsi), %rax 451 ret 452 453 SET_SIZE(strlen) 454