xref: /titanic_41/usr/src/lib/libc/amd64/gen/strlen.s (revision dfb96a4f56fb431b915bc67e5d9d5c8d4f4f6679)
1/*
2 * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
3 * Use is subject to license terms.
4 */
5
6/*
7 * Copyright (c) 2002 Advanced Micro Devices, Inc.
8 *
9 * All rights reserved.
10 *
11 * Redistribution and  use in source and binary  forms, with or
12 * without  modification,  are   permitted  provided  that  the
13 * following conditions are met:
14 *
15 * + Redistributions  of source  code  must  retain  the  above
16 *   copyright  notice,   this  list  of   conditions  and  the
17 *   following disclaimer.
18 *
19 * + Redistributions  in binary  form must reproduce  the above
20 *   copyright  notice,   this  list  of   conditions  and  the
21 *   following  disclaimer in  the  documentation and/or  other
22 *   materials provided with the distribution.
23 *
24 * + Neither the  name of Advanced Micro Devices,  Inc. nor the
25 *   names  of  its contributors  may  be  used  to endorse  or
26 *   promote  products  derived   from  this  software  without
27 *   specific prior written permission.
28 *
29 * THIS  SOFTWARE  IS PROVIDED  BY  THE  COPYRIGHT HOLDERS  AND
30 * CONTRIBUTORS AS IS AND  ANY EXPRESS OR IMPLIED WARRANTIES,
31 * INCLUDING,  BUT NOT  LIMITED TO,  THE IMPLIED  WARRANTIES OF
32 * MERCHANTABILITY  AND FITNESS  FOR A  PARTICULAR  PURPOSE ARE
33 * DISCLAIMED.  IN  NO  EVENT  SHALL  ADVANCED  MICRO  DEVICES,
34 * INC.  OR CONTRIBUTORS  BE LIABLE  FOR ANY  DIRECT, INDIRECT,
35 * INCIDENTAL,  SPECIAL,  EXEMPLARY,  OR CONSEQUENTIAL  DAMAGES
36 * (INCLUDING,  BUT NOT LIMITED  TO, PROCUREMENT  OF SUBSTITUTE
37 * GOODS  OR  SERVICES;  LOSS  OF  USE, DATA,  OR  PROFITS;  OR
38 * BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON  ANY THEORY OF
39 * LIABILITY,  WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
40 * (INCLUDING NEGLIGENCE  OR OTHERWISE) ARISING IN  ANY WAY OUT
41 * OF THE  USE  OF  THIS  SOFTWARE, EVEN  IF  ADVISED  OF  THE
42 * POSSIBILITY OF SUCH DAMAGE.
43 *
44 * It is  licensee's responsibility  to comply with  any export
45 * regulations applicable in licensee's jurisdiction.
46 */
47
48	.ident	"%Z%%M%	%I%	%E% SMI"
49
50	.file	"%M%"
51
52#include "SYS.h"
53#include "cache.h"
54
55#define LABEL(s) .strlen/**/s
56
57	ENTRY(strlen)                /* (const char *s) */
58
59        mov     %rdi, %rsi
60        neg     %rdi
61
62LABEL(aligntry):
63        mov     %rsi , %r8
64        and     $7, %r8d
65	jz	LABEL(alignafter)
66
67LABEL(align):                            /* 8-byte align */
68        sub     $8, %r8
69
70        .p2align 4
71
72LABEL(alignloop):
73        cmpb    $0, (%rsi)
74        je      LABEL(exit)
75
76        inc     %rsi
77        inc     %r8
78        jnz     LABEL(alignloop)
79
80        .p2align 4
81
82LABEL(alignafter):
83
84LABEL(56try):
85
86LABEL(56):                               /* 56-byte */
87        mov     (%rsi), %rax
88        mov     $0xfefefefefefefeff, %rcx
89
90LABEL(56loop):
91        mov     %rcx, %r8
92        add     %rax, %r8
93        jnc     LABEL(tail)
94
95        xor     %rax, %r8
96        or      %rcx, %r8
97        inc     %r8
98        jnz     LABEL(tail)
99
100        mov     8 (%rsi), %rax
101        lea     8 (%rsi), %rsi
102
103        mov     %rcx, %r8
104        add     %rax, %r8
105        jnc     LABEL(tail)
106
107        xor     %rax, %r8
108        or      %rcx, %r8
109        inc     %r8
110        jnz     LABEL(tail)
111
112        mov     8 (%rsi), %rax
113        lea     8 (%rsi), %rsi
114
115        mov     %rcx, %r8
116        add     %rax, %r8
117        jnc     LABEL(tail)
118
119        xor     %rax, %r8
120        or      %rcx, %r8
121        inc     %r8
122        jnz     LABEL(tail)
123
124        mov     8 (%rsi), %rax
125        lea     8 (%rsi), %rsi
126
127        mov     %rcx, %r8
128        add     %rax, %r8
129        jnc     LABEL(tail)
130
131        xor     %rax, %r8
132        or      %rcx, %r8
133        inc     %r8
134        jnz     LABEL(tail)
135
136        mov     8 (%rsi), %rax
137        lea     8 (%rsi), %rsi
138
139        mov     %rcx, %r8
140        add     %rax, %r8
141        jnc     LABEL(tail)
142
143        xor     %rax, %r8
144        or      %rcx, %r8
145        inc     %r8
146        jnz     LABEL(tail)
147
148        mov     8 (%rsi), %rax
149        lea     8 (%rsi), %rsi
150
151        mov     %rcx, %r8
152        add     %rax, %r8
153        jnc     LABEL(tail)
154
155        xor     %rax, %r8
156        or      %rcx, %r8
157        inc     %r8
158        jnz     LABEL(tail)
159
160        mov     8 (%rsi), %rax
161        lea     8 (%rsi), %rsi
162
163        mov     %rcx, %r8
164        add     %rax, %r8
165        jnc     LABEL(tail)
166
167        xor     %rax, %r8
168        or      %rcx, %r8
169        inc     %r8
170        jnz     LABEL(tail)
171
172        mov     8 (%rsi), %rax
173        lea     8 (%rsi), %rsi
174
175LABEL(56after):
176
177LABEL(32):                               /* 32-byte */
178        mov     _sref_(.amd64cache1), %r9
179
180        .p2align 4
181
182LABEL(32loop):
183        mov     %rcx, %r8
184        add     %rax, %r8
185        sbb     %rdx, %rdx
186
187        xor     %rax, %r8
188        or      %rcx, %r8
189        sub     %rdx, %r8
190        jnz     LABEL(tail)
191
192        mov     8 (%rsi), %rax
193        add     $8, %rsi
194
195        mov     %rcx, %r8
196        add     %rax, %r8
197        sbb     %rdx, %rdx
198
199        xor     %rax, %r8
200        or      %rcx, %r8
201        sub     %rdx, %r8
202        jnz     LABEL(tail)
203
204        mov     8 (%rsi), %rax
205        add     $8, %rsi
206
207        mov     %rcx, %r8
208        add     %rax, %r8
209        sbb     %rdx, %rdx
210
211        xor     %rax, %r8
212        or      %rcx, %r8
213        sub     %rdx, %r8
214        jnz     LABEL(tail)
215
216        mov     8 (%rsi), %rax
217        add     $8, %rsi
218
219        mov     %rcx, %r8
220        add     %rax, %r8
221        sbb     %rdx, %rdx
222
223        xor     %rax, %r8
224        or      %rcx, %r8
225        sub     %rdx, %r8
226        jnz     LABEL(tail)
227
228        mov     8 (%rsi), %rax
229        add     $8, %rsi
230
231        mov     %rcx, %r8
232        add     %rax, %r8
233        sbb     %rdx, %rdx
234
235        xor     %rax, %r8
236        or      %rcx, %r8
237        sub     %rdx, %r8
238        jnz     LABEL(tail)
239
240        mov     8 (%rsi), %rax
241        add     $8, %rsi
242
243        mov     %rcx, %r8
244        add     %rax, %r8
245        sbb     %rdx, %rdx
246
247        xor     %rax, %r8
248        or      %rcx, %r8
249        sub     %rdx, %r8
250        jnz     LABEL(tail)
251
252        mov     8 (%rsi), %rax
253        add     $8, %rsi
254
255        mov     %rcx, %r8
256        add     %rax, %r8
257        sbb     %rdx, %rdx
258
259        xor     %rax, %r8
260        or      %rcx, %r8
261        sub     %rdx, %r8
262        jnz     LABEL(tail)
263
264        mov     8 (%rsi), %rax
265        add     $8, %rsi
266
267        mov     %rcx, %r8
268        add     %rax, %r8
269        sbb     %rdx, %rdx
270
271        xor     %rax, %r8
272        or      %rcx, %r8
273        sub     %rdx, %r8
274        jnz     LABEL(tail)
275
276        sub     $32, %r9
277
278        mov     8 (%rsi), %rax
279        lea     8 (%rsi), %rsi
280
281        jbe     LABEL(32loop)
282
283LABEL(32after):
284
285LABEL(pretry):
286
287LABEL(pre):                              /* 64-byte prefetch */
288
289        .p2align 4
290
291LABEL(preloop):
292        mov     %rcx, %r8
293        add     %rax, %r8
294        sbb     %rdx, %rdx
295
296        xor     %rax, %r8
297        or      %rcx, %r8
298        sub     %rdx, %r8
299        jnz     LABEL(tail)
300
301        mov     8 (%rsi), %rax
302        add     $8, %rsi
303
304        mov     %rcx, %r8
305        add     %rax, %r8
306        sbb     %rdx, %rdx
307
308        xor     %rax, %r8
309        or      %rcx, %r8
310        sub     %rdx, %r8
311        jnz     LABEL(tail)
312
313        mov     8 (%rsi), %rax
314        add     $8, %rsi
315
316        mov     %rcx, %r8
317        add     %rax, %r8
318        sbb     %rdx, %rdx
319
320        xor     %rax, %r8
321        or      %rcx, %r8
322        sub     %rdx, %r8
323        jnz     LABEL(tail)
324
325        mov     8 (%rsi), %rax
326        add     $8, %rsi
327
328        mov     %rcx, %r8
329        add     %rax, %r8
330        sbb     %rdx, %rdx
331
332        xor     %rax, %r8
333        or      %rcx, %r8
334        sub     %rdx, %r8
335        jnz     LABEL(tail)
336
337        mov     8 (%rsi), %rax
338        add     $8, %rsi
339
340        mov     %rcx, %r8
341        add     %rax, %r8
342        sbb     %rdx, %rdx
343
344        xor     %rax, %r8
345        or      %rcx, %r8
346        sub     %rdx, %r8
347        jnz     LABEL(tail)
348
349        mov     8 (%rsi), %rax
350        add     $8, %rsi
351
352        mov     %rcx, %r8
353        add     %rax, %r8
354        sbb     %rdx, %rdx
355
356        xor     %rax, %r8
357        or      %rcx, %r8
358        sub     %rdx, %r8
359        jnz     LABEL(tail)
360
361        mov     8 (%rsi), %rax
362        add     $8, %rsi
363
364        mov     %rcx, %r8
365        add     %rax, %r8
366        sbb     %rdx, %rdx
367
368        xor     %rax, %r8
369        or      %rcx, %r8
370        sub     %rdx, %r8
371        jnz     LABEL(tail)
372
373        mov     8 (%rsi), %rax
374        add     $8, %rsi
375
376        mov     %rcx, %r8
377        add     %rax, %r8
378        sbb     %rdx, %rdx
379
380        xor     %rax, %r8
381        or      %rcx, %r8
382        sub     %rdx, %r8
383        jnz     LABEL(tail)
384
385        prefetchnta 512 (%rsi)	/* 3DNow: use prefetch */
386
387        mov     8 (%rsi), %rax
388        add     $8, %rsi
389
390        jmp     LABEL(preloop)
391
392        .p2align 4
393
394LABEL(preafter):
395
396LABEL(tailtry):
397
398LABEL(tail):                             /* 4-byte tail */
399
400LABEL(tailloop):
401        test    %al, %al
402        jz      LABEL(exit)
403
404        inc     %rsi
405
406        test    %ah, %ah
407        jz      LABEL(exit)
408
409        inc     %rsi
410
411        test    $0x00ff0000, %eax
412        jz      LABEL(exit)
413
414        inc     %rsi
415
416        test    $0xff000000, %eax
417        jz      LABEL(exit)
418
419        inc     %rsi
420
421        shr     $32, %rax
422        jmp     LABEL(tailloop)
423
424LABEL(tailafter):
425
426        .p2align 4
427
428LABEL(exit):
429        lea     (%rdi, %rsi), %rax
430        ret
431
432	SET_SIZE(strlen)
433