xref: /titanic_50/usr/src/lib/libc/amd64/gen/strlen.s (revision 8e7248e505faa19396d4e853604e3fa7cd2cb3b5)
1/*
2 * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
3 * Use is subject to license terms.
4 */
5
6/*
7 * Copyright (c) 2002 Advanced Micro Devices, Inc.
8 *
9 * All rights reserved.
10 *
11 * Redistribution and  use in source and binary  forms, with or
12 * without  modification,  are   permitted  provided  that  the
13 * following conditions are met:
14 *
15 * + Redistributions  of source  code  must  retain  the  above
16 *   copyright  notice,   this  list  of   conditions  and  the
17 *   following disclaimer.
18 *
19 * + Redistributions  in binary  form must reproduce  the above
20 *   copyright  notice,   this  list  of   conditions  and  the
21 *   following  disclaimer in  the  documentation and/or  other
22 *   materials provided with the distribution.
23 *
24 * + Neither the  name of Advanced Micro Devices,  Inc. nor the
25 *   names  of  its contributors  may  be  used  to endorse  or
26 *   promote  products  derived   from  this  software  without
27 *   specific prior written permission.
28 *
29 * THIS  SOFTWARE  IS PROVIDED  BY  THE  COPYRIGHT HOLDERS  AND
30 * CONTRIBUTORS AS IS AND  ANY EXPRESS OR IMPLIED WARRANTIES,
31 * INCLUDING,  BUT NOT  LIMITED TO,  THE IMPLIED  WARRANTIES OF
32 * MERCHANTABILITY  AND FITNESS  FOR A  PARTICULAR  PURPOSE ARE
33 * DISCLAIMED.  IN  NO  EVENT  SHALL  ADVANCED  MICRO  DEVICES,
34 * INC.  OR CONTRIBUTORS  BE LIABLE  FOR ANY  DIRECT, INDIRECT,
35 * INCIDENTAL,  SPECIAL,  EXEMPLARY,  OR CONSEQUENTIAL  DAMAGES
36 * (INCLUDING,  BUT NOT LIMITED  TO, PROCUREMENT  OF SUBSTITUTE
37 * GOODS  OR  SERVICES;  LOSS  OF  USE, DATA,  OR  PROFITS;  OR
38 * BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON  ANY THEORY OF
39 * LIABILITY,  WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
40 * (INCLUDING NEGLIGENCE  OR OTHERWISE) ARISING IN  ANY WAY OUT
41 * OF THE  USE  OF  THIS  SOFTWARE, EVEN  IF  ADVISED  OF  THE
42 * POSSIBILITY OF SUCH DAMAGE.
43 *
44 * It is  licensee's responsibility  to comply with  any export
45 * regulations applicable in licensee's jurisdiction.
46 */
47
48	.file	"strlen.s"
49
50#include "SYS.h"
51#include "cache.h"
52
53#define LABEL(s) .strlen/**/s
54
55	ENTRY(strlen)                /* (const char *s) */
56
57        mov     %rdi, %rsi
58        neg     %rdi
59
60LABEL(aligntry):
61        mov     %rsi , %r8
62        and     $7, %r8d
63	jz	LABEL(alignafter)
64
65LABEL(align):                            /* 8-byte align */
66        sub     $8, %r8
67
68        .p2align 4
69
70LABEL(alignloop):
71        cmpb    $0, (%rsi)
72        je      LABEL(exit)
73
74        inc     %rsi
75        inc     %r8
76        jnz     LABEL(alignloop)
77
78        .p2align 4
79
80LABEL(alignafter):
81
82LABEL(56try):
83
84LABEL(56):                               /* 56-byte */
85        mov     (%rsi), %rax
86        mov     $0xfefefefefefefeff, %rcx
87
88LABEL(56loop):
89        mov     %rcx, %r8
90        add     %rax, %r8
91        jnc     LABEL(tail)
92
93        xor     %rax, %r8
94        or      %rcx, %r8
95        inc     %r8
96        jnz     LABEL(tail)
97
98        mov     8 (%rsi), %rax
99        lea     8 (%rsi), %rsi
100
101        mov     %rcx, %r8
102        add     %rax, %r8
103        jnc     LABEL(tail)
104
105        xor     %rax, %r8
106        or      %rcx, %r8
107        inc     %r8
108        jnz     LABEL(tail)
109
110        mov     8 (%rsi), %rax
111        lea     8 (%rsi), %rsi
112
113        mov     %rcx, %r8
114        add     %rax, %r8
115        jnc     LABEL(tail)
116
117        xor     %rax, %r8
118        or      %rcx, %r8
119        inc     %r8
120        jnz     LABEL(tail)
121
122        mov     8 (%rsi), %rax
123        lea     8 (%rsi), %rsi
124
125        mov     %rcx, %r8
126        add     %rax, %r8
127        jnc     LABEL(tail)
128
129        xor     %rax, %r8
130        or      %rcx, %r8
131        inc     %r8
132        jnz     LABEL(tail)
133
134        mov     8 (%rsi), %rax
135        lea     8 (%rsi), %rsi
136
137        mov     %rcx, %r8
138        add     %rax, %r8
139        jnc     LABEL(tail)
140
141        xor     %rax, %r8
142        or      %rcx, %r8
143        inc     %r8
144        jnz     LABEL(tail)
145
146        mov     8 (%rsi), %rax
147        lea     8 (%rsi), %rsi
148
149        mov     %rcx, %r8
150        add     %rax, %r8
151        jnc     LABEL(tail)
152
153        xor     %rax, %r8
154        or      %rcx, %r8
155        inc     %r8
156        jnz     LABEL(tail)
157
158        mov     8 (%rsi), %rax
159        lea     8 (%rsi), %rsi
160
161        mov     %rcx, %r8
162        add     %rax, %r8
163        jnc     LABEL(tail)
164
165        xor     %rax, %r8
166        or      %rcx, %r8
167        inc     %r8
168        jnz     LABEL(tail)
169
170        mov     8 (%rsi), %rax
171        lea     8 (%rsi), %rsi
172
173LABEL(56after):
174
175LABEL(32):                               /* 32-byte */
176        mov     _sref_(.amd64cache1), %r9
177
178        .p2align 4
179
180LABEL(32loop):
181        mov     %rcx, %r8
182        add     %rax, %r8
183        sbb     %rdx, %rdx
184
185        xor     %rax, %r8
186        or      %rcx, %r8
187        sub     %rdx, %r8
188        jnz     LABEL(tail)
189
190        mov     8 (%rsi), %rax
191        add     $8, %rsi
192
193        mov     %rcx, %r8
194        add     %rax, %r8
195        sbb     %rdx, %rdx
196
197        xor     %rax, %r8
198        or      %rcx, %r8
199        sub     %rdx, %r8
200        jnz     LABEL(tail)
201
202        mov     8 (%rsi), %rax
203        add     $8, %rsi
204
205        mov     %rcx, %r8
206        add     %rax, %r8
207        sbb     %rdx, %rdx
208
209        xor     %rax, %r8
210        or      %rcx, %r8
211        sub     %rdx, %r8
212        jnz     LABEL(tail)
213
214        mov     8 (%rsi), %rax
215        add     $8, %rsi
216
217        mov     %rcx, %r8
218        add     %rax, %r8
219        sbb     %rdx, %rdx
220
221        xor     %rax, %r8
222        or      %rcx, %r8
223        sub     %rdx, %r8
224        jnz     LABEL(tail)
225
226        mov     8 (%rsi), %rax
227        add     $8, %rsi
228
229        mov     %rcx, %r8
230        add     %rax, %r8
231        sbb     %rdx, %rdx
232
233        xor     %rax, %r8
234        or      %rcx, %r8
235        sub     %rdx, %r8
236        jnz     LABEL(tail)
237
238        mov     8 (%rsi), %rax
239        add     $8, %rsi
240
241        mov     %rcx, %r8
242        add     %rax, %r8
243        sbb     %rdx, %rdx
244
245        xor     %rax, %r8
246        or      %rcx, %r8
247        sub     %rdx, %r8
248        jnz     LABEL(tail)
249
250        mov     8 (%rsi), %rax
251        add     $8, %rsi
252
253        mov     %rcx, %r8
254        add     %rax, %r8
255        sbb     %rdx, %rdx
256
257        xor     %rax, %r8
258        or      %rcx, %r8
259        sub     %rdx, %r8
260        jnz     LABEL(tail)
261
262        mov     8 (%rsi), %rax
263        add     $8, %rsi
264
265        mov     %rcx, %r8
266        add     %rax, %r8
267        sbb     %rdx, %rdx
268
269        xor     %rax, %r8
270        or      %rcx, %r8
271        sub     %rdx, %r8
272        jnz     LABEL(tail)
273
274        sub     $32, %r9
275
276        mov     8 (%rsi), %rax
277        lea     8 (%rsi), %rsi
278
279        jbe     LABEL(32loop)
280
281LABEL(32after):
282
283LABEL(pretry):
284
285LABEL(pre):                              /* 64-byte prefetch */
286
287        .p2align 4
288
289LABEL(preloop):
290        mov     %rcx, %r8
291        add     %rax, %r8
292        sbb     %rdx, %rdx
293
294        xor     %rax, %r8
295        or      %rcx, %r8
296        sub     %rdx, %r8
297        jnz     LABEL(tail)
298
299        mov     8 (%rsi), %rax
300        add     $8, %rsi
301
302        mov     %rcx, %r8
303        add     %rax, %r8
304        sbb     %rdx, %rdx
305
306        xor     %rax, %r8
307        or      %rcx, %r8
308        sub     %rdx, %r8
309        jnz     LABEL(tail)
310
311        mov     8 (%rsi), %rax
312        add     $8, %rsi
313
314        mov     %rcx, %r8
315        add     %rax, %r8
316        sbb     %rdx, %rdx
317
318        xor     %rax, %r8
319        or      %rcx, %r8
320        sub     %rdx, %r8
321        jnz     LABEL(tail)
322
323        mov     8 (%rsi), %rax
324        add     $8, %rsi
325
326        mov     %rcx, %r8
327        add     %rax, %r8
328        sbb     %rdx, %rdx
329
330        xor     %rax, %r8
331        or      %rcx, %r8
332        sub     %rdx, %r8
333        jnz     LABEL(tail)
334
335        mov     8 (%rsi), %rax
336        add     $8, %rsi
337
338        mov     %rcx, %r8
339        add     %rax, %r8
340        sbb     %rdx, %rdx
341
342        xor     %rax, %r8
343        or      %rcx, %r8
344        sub     %rdx, %r8
345        jnz     LABEL(tail)
346
347        mov     8 (%rsi), %rax
348        add     $8, %rsi
349
350        mov     %rcx, %r8
351        add     %rax, %r8
352        sbb     %rdx, %rdx
353
354        xor     %rax, %r8
355        or      %rcx, %r8
356        sub     %rdx, %r8
357        jnz     LABEL(tail)
358
359        mov     8 (%rsi), %rax
360        add     $8, %rsi
361
362        mov     %rcx, %r8
363        add     %rax, %r8
364        sbb     %rdx, %rdx
365
366        xor     %rax, %r8
367        or      %rcx, %r8
368        sub     %rdx, %r8
369        jnz     LABEL(tail)
370
371        mov     8 (%rsi), %rax
372        add     $8, %rsi
373
374        mov     %rcx, %r8
375        add     %rax, %r8
376        sbb     %rdx, %rdx
377
378        xor     %rax, %r8
379        or      %rcx, %r8
380        sub     %rdx, %r8
381        jnz     LABEL(tail)
382
383        prefetchnta 512 (%rsi)	/* 3DNow: use prefetch */
384
385        mov     8 (%rsi), %rax
386        add     $8, %rsi
387
388        jmp     LABEL(preloop)
389
390        .p2align 4
391
392LABEL(preafter):
393
394LABEL(tailtry):
395
396LABEL(tail):                             /* 4-byte tail */
397
398LABEL(tailloop):
399        test    %al, %al
400        jz      LABEL(exit)
401
402        inc     %rsi
403
404        test    %ah, %ah
405        jz      LABEL(exit)
406
407        inc     %rsi
408
409        test    $0x00ff0000, %eax
410        jz      LABEL(exit)
411
412        inc     %rsi
413
414        test    $0xff000000, %eax
415        jz      LABEL(exit)
416
417        inc     %rsi
418
419        shr     $32, %rax
420        jmp     LABEL(tailloop)
421
422LABEL(tailafter):
423
424        .p2align 4
425
426LABEL(exit):
427        lea     (%rdi, %rsi), %rax
428        ret
429
430	SET_SIZE(strlen)
431