xref: /titanic_44/usr/src/lib/libc/amd64/gen/strlen.s (revision 09f67678c27dda8a89f87f1f408a87dd49ceb0e1)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License").  You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22/*
23 * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27/*
28 * Copyright (c) 2002 Advanced Micro Devices, Inc.
29 *
30 * All rights reserved.
31 *
32 * Redistribution and  use in source and binary  forms, with or
33 * without  modification,  are   permitted  provided  that  the
34 * following conditions are met:
35 *
36 * + Redistributions  of source  code  must  retain  the  above
37 *   copyright  notice,   this  list  of   conditions  and  the
38 *   following disclaimer.
39 *
40 * + Redistributions  in binary  form must reproduce  the above
41 *   copyright  notice,   this  list  of   conditions  and  the
42 *   following  disclaimer in  the  documentation and/or  other
43 *   materials provided with the distribution.
44 *
45 * + Neither the  name of Advanced Micro Devices,  Inc. nor the
46 *   names  of  its contributors  may  be  used  to endorse  or
47 *   promote  products  derived   from  this  software  without
48 *   specific prior written permission.
49 *
50 * THIS  SOFTWARE  IS PROVIDED  BY  THE  COPYRIGHT HOLDERS  AND
51 * CONTRIBUTORS AS IS AND  ANY EXPRESS OR IMPLIED WARRANTIES,
52 * INCLUDING,  BUT NOT  LIMITED TO,  THE IMPLIED  WARRANTIES OF
53 * MERCHANTABILITY  AND FITNESS  FOR A  PARTICULAR  PURPOSE ARE
54 * DISCLAIMED.  IN  NO  EVENT  SHALL  ADVANCED  MICRO  DEVICES,
55 * INC.  OR CONTRIBUTORS  BE LIABLE  FOR ANY  DIRECT, INDIRECT,
56 * INCIDENTAL,  SPECIAL,  EXEMPLARY,  OR CONSEQUENTIAL  DAMAGES
57 * (INCLUDING,  BUT NOT LIMITED  TO, PROCUREMENT  OF SUBSTITUTE
58 * GOODS  OR  SERVICES;  LOSS  OF  USE, DATA,  OR  PROFITS;  OR
59 * BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON  ANY THEORY OF
60 * LIABILITY,  WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
61 * (INCLUDING NEGLIGENCE  OR OTHERWISE) ARISING IN  ANY WAY OUT
62 * OF THE  USE  OF  THIS  SOFTWARE, EVEN  IF  ADVISED  OF  THE
63 * POSSIBILITY OF SUCH DAMAGE.
64 *
65 * It is  licensee's responsibility  to comply with  any export
66 * regulations applicable in licensee's jurisdiction.
67 */
68
69	.ident	"%Z%%M%	%I%	%E% SMI"
70
71	.file	"%M%"
72
73#include "SYS.h"
74#include "cache.h"
75
76#define LABEL(s) .strlen/**/s
77
78	ENTRY(strlen)                /* (const char *s) */
79
80        mov     %rdi, %rsi
81        neg     %rdi
82
83LABEL(aligntry):
84        mov     %rsi , %r8
85        and     $7, %r8d
86	jz	LABEL(alignafter)
87
88LABEL(align):                            /* 8-byte align */
89        sub     $8, %r8
90
91        .p2align 4
92
93LABEL(alignloop):
94        cmpb    $0, (%rsi)
95        je      LABEL(exit)
96
97        inc     %rsi
98        inc     %r8
99        jnz     LABEL(alignloop)
100
101        .p2align 4
102
103LABEL(alignafter):
104
105LABEL(56try):
106
107LABEL(56):                               /* 56-byte */
108        mov     (%rsi), %rax
109        mov     $0xfefefefefefefeff, %rcx
110
111LABEL(56loop):
112        mov     %rcx, %r8
113        add     %rax, %r8
114        jnc     LABEL(tail)
115
116        xor     %rax, %r8
117        or      %rcx, %r8
118        inc     %r8
119        jnz     LABEL(tail)
120
121        mov     8 (%rsi), %rax
122        lea     8 (%rsi), %rsi
123
124        mov     %rcx, %r8
125        add     %rax, %r8
126        jnc     LABEL(tail)
127
128        xor     %rax, %r8
129        or      %rcx, %r8
130        inc     %r8
131        jnz     LABEL(tail)
132
133        mov     8 (%rsi), %rax
134        lea     8 (%rsi), %rsi
135
136        mov     %rcx, %r8
137        add     %rax, %r8
138        jnc     LABEL(tail)
139
140        xor     %rax, %r8
141        or      %rcx, %r8
142        inc     %r8
143        jnz     LABEL(tail)
144
145        mov     8 (%rsi), %rax
146        lea     8 (%rsi), %rsi
147
148        mov     %rcx, %r8
149        add     %rax, %r8
150        jnc     LABEL(tail)
151
152        xor     %rax, %r8
153        or      %rcx, %r8
154        inc     %r8
155        jnz     LABEL(tail)
156
157        mov     8 (%rsi), %rax
158        lea     8 (%rsi), %rsi
159
160        mov     %rcx, %r8
161        add     %rax, %r8
162        jnc     LABEL(tail)
163
164        xor     %rax, %r8
165        or      %rcx, %r8
166        inc     %r8
167        jnz     LABEL(tail)
168
169        mov     8 (%rsi), %rax
170        lea     8 (%rsi), %rsi
171
172        mov     %rcx, %r8
173        add     %rax, %r8
174        jnc     LABEL(tail)
175
176        xor     %rax, %r8
177        or      %rcx, %r8
178        inc     %r8
179        jnz     LABEL(tail)
180
181        mov     8 (%rsi), %rax
182        lea     8 (%rsi), %rsi
183
184        mov     %rcx, %r8
185        add     %rax, %r8
186        jnc     LABEL(tail)
187
188        xor     %rax, %r8
189        or      %rcx, %r8
190        inc     %r8
191        jnz     LABEL(tail)
192
193        mov     8 (%rsi), %rax
194        lea     8 (%rsi), %rsi
195
196LABEL(56after):
197
198LABEL(32):                               /* 32-byte */
199        mov     _sref_(.amd64cache1), %r9
200
201        .p2align 4
202
203LABEL(32loop):
204        mov     %rcx, %r8
205        add     %rax, %r8
206        sbb     %rdx, %rdx
207
208        xor     %rax, %r8
209        or      %rcx, %r8
210        sub     %rdx, %r8
211        jnz     LABEL(tail)
212
213        mov     8 (%rsi), %rax
214        add     $8, %rsi
215
216        mov     %rcx, %r8
217        add     %rax, %r8
218        sbb     %rdx, %rdx
219
220        xor     %rax, %r8
221        or      %rcx, %r8
222        sub     %rdx, %r8
223        jnz     LABEL(tail)
224
225        mov     8 (%rsi), %rax
226        add     $8, %rsi
227
228        mov     %rcx, %r8
229        add     %rax, %r8
230        sbb     %rdx, %rdx
231
232        xor     %rax, %r8
233        or      %rcx, %r8
234        sub     %rdx, %r8
235        jnz     LABEL(tail)
236
237        mov     8 (%rsi), %rax
238        add     $8, %rsi
239
240        mov     %rcx, %r8
241        add     %rax, %r8
242        sbb     %rdx, %rdx
243
244        xor     %rax, %r8
245        or      %rcx, %r8
246        sub     %rdx, %r8
247        jnz     LABEL(tail)
248
249        mov     8 (%rsi), %rax
250        add     $8, %rsi
251
252        mov     %rcx, %r8
253        add     %rax, %r8
254        sbb     %rdx, %rdx
255
256        xor     %rax, %r8
257        or      %rcx, %r8
258        sub     %rdx, %r8
259        jnz     LABEL(tail)
260
261        mov     8 (%rsi), %rax
262        add     $8, %rsi
263
264        mov     %rcx, %r8
265        add     %rax, %r8
266        sbb     %rdx, %rdx
267
268        xor     %rax, %r8
269        or      %rcx, %r8
270        sub     %rdx, %r8
271        jnz     LABEL(tail)
272
273        mov     8 (%rsi), %rax
274        add     $8, %rsi
275
276        mov     %rcx, %r8
277        add     %rax, %r8
278        sbb     %rdx, %rdx
279
280        xor     %rax, %r8
281        or      %rcx, %r8
282        sub     %rdx, %r8
283        jnz     LABEL(tail)
284
285        mov     8 (%rsi), %rax
286        add     $8, %rsi
287
288        mov     %rcx, %r8
289        add     %rax, %r8
290        sbb     %rdx, %rdx
291
292        xor     %rax, %r8
293        or      %rcx, %r8
294        sub     %rdx, %r8
295        jnz     LABEL(tail)
296
297        sub     $32, %r9
298
299        mov     8 (%rsi), %rax
300        lea     8 (%rsi), %rsi
301
302        jbe     LABEL(32loop)
303
304LABEL(32after):
305
306LABEL(pretry):
307
308LABEL(pre):                              /* 64-byte prefetch */
309
310        .p2align 4
311
312LABEL(preloop):
313        mov     %rcx, %r8
314        add     %rax, %r8
315        sbb     %rdx, %rdx
316
317        xor     %rax, %r8
318        or      %rcx, %r8
319        sub     %rdx, %r8
320        jnz     LABEL(tail)
321
322        mov     8 (%rsi), %rax
323        add     $8, %rsi
324
325        mov     %rcx, %r8
326        add     %rax, %r8
327        sbb     %rdx, %rdx
328
329        xor     %rax, %r8
330        or      %rcx, %r8
331        sub     %rdx, %r8
332        jnz     LABEL(tail)
333
334        mov     8 (%rsi), %rax
335        add     $8, %rsi
336
337        mov     %rcx, %r8
338        add     %rax, %r8
339        sbb     %rdx, %rdx
340
341        xor     %rax, %r8
342        or      %rcx, %r8
343        sub     %rdx, %r8
344        jnz     LABEL(tail)
345
346        mov     8 (%rsi), %rax
347        add     $8, %rsi
348
349        mov     %rcx, %r8
350        add     %rax, %r8
351        sbb     %rdx, %rdx
352
353        xor     %rax, %r8
354        or      %rcx, %r8
355        sub     %rdx, %r8
356        jnz     LABEL(tail)
357
358        mov     8 (%rsi), %rax
359        add     $8, %rsi
360
361        mov     %rcx, %r8
362        add     %rax, %r8
363        sbb     %rdx, %rdx
364
365        xor     %rax, %r8
366        or      %rcx, %r8
367        sub     %rdx, %r8
368        jnz     LABEL(tail)
369
370        mov     8 (%rsi), %rax
371        add     $8, %rsi
372
373        mov     %rcx, %r8
374        add     %rax, %r8
375        sbb     %rdx, %rdx
376
377        xor     %rax, %r8
378        or      %rcx, %r8
379        sub     %rdx, %r8
380        jnz     LABEL(tail)
381
382        mov     8 (%rsi), %rax
383        add     $8, %rsi
384
385        mov     %rcx, %r8
386        add     %rax, %r8
387        sbb     %rdx, %rdx
388
389        xor     %rax, %r8
390        or      %rcx, %r8
391        sub     %rdx, %r8
392        jnz     LABEL(tail)
393
394        mov     8 (%rsi), %rax
395        add     $8, %rsi
396
397        mov     %rcx, %r8
398        add     %rax, %r8
399        sbb     %rdx, %rdx
400
401        xor     %rax, %r8
402        or      %rcx, %r8
403        sub     %rdx, %r8
404        jnz     LABEL(tail)
405
406        prefetchnta 512 (%rsi)	/* 3DNow: use prefetch */
407
408        mov     8 (%rsi), %rax
409        add     $8, %rsi
410
411        jmp     LABEL(preloop)
412
413        .p2align 4
414
415LABEL(preafter):
416
417LABEL(tailtry):
418
419LABEL(tail):                             /* 4-byte tail */
420
421LABEL(tailloop):
422        test    %al, %al
423        jz      LABEL(exit)
424
425        inc     %rsi
426
427        test    %ah, %ah
428        jz      LABEL(exit)
429
430        inc     %rsi
431
432        test    $0x00ff0000, %eax
433        jz      LABEL(exit)
434
435        inc     %rsi
436
437        test    $0xff000000, %eax
438        jz      LABEL(exit)
439
440        inc     %rsi
441
442        shr     $32, %rax
443        jmp     LABEL(tailloop)
444
445LABEL(tailafter):
446
447        .p2align 4
448
449LABEL(exit):
450        lea     (%rdi, %rsi), %rax
451        ret
452
453	SET_SIZE(strlen)
454