xref: /freebsd/lib/libc/aarch64/string/strcmp.S (revision 5e7d93a604400ca3c9db3be1df82ce963527740c)
1*5e7d93a6SGetz Mikalsen/*-
2*5e7d93a6SGetz Mikalsen * SPDX-License-Identifier: BSD-2-Clause
3*5e7d93a6SGetz Mikalsen *
4*5e7d93a6SGetz Mikalsen * Copyright (c) 2024 Getz Mikalsen <getz@FreeBSD.org>
5*5e7d93a6SGetz Mikalsen*/
6*5e7d93a6SGetz Mikalsen
7*5e7d93a6SGetz Mikalsen#include <machine/asm.h>
8*5e7d93a6SGetz Mikalsen#include <machine/param.h>
9*5e7d93a6SGetz Mikalsen
10*5e7d93a6SGetz Mikalsen	.weak	strcmp
11*5e7d93a6SGetz Mikalsen	.set	strcmp, __strcmp
12*5e7d93a6SGetz Mikalsen	.text
13*5e7d93a6SGetz Mikalsen
14*5e7d93a6SGetz MikalsenENTRY(__strcmp)
15*5e7d93a6SGetz Mikalsen
16*5e7d93a6SGetz Mikalsen	bic	x8, x0, #0xf			// x0 aligned to the boundary
17*5e7d93a6SGetz Mikalsen	and	x9, x0, #0xf			// x9 is the offset
18*5e7d93a6SGetz Mikalsen	bic	x10, x1, #0xf			// x1 aligned to the boundary
19*5e7d93a6SGetz Mikalsen	and	x11, x1, #0xf			// x11 is the offset
20*5e7d93a6SGetz Mikalsen
21*5e7d93a6SGetz Mikalsen	mov	x13, #-1
22*5e7d93a6SGetz Mikalsen
23*5e7d93a6SGetz Mikalsen	/*
24*5e7d93a6SGetz Mikalsen	 * Check if either string is located at end of page to avoid crossing
25*5e7d93a6SGetz Mikalsen	 * into unmapped page. If so, we load 16 bytes from the nearest
26*5e7d93a6SGetz Mikalsen	 * alignment boundary and shift based on the offset.
27*5e7d93a6SGetz Mikalsen	 */
28*5e7d93a6SGetz Mikalsen
29*5e7d93a6SGetz Mikalsen	add	x3, x0, #16			// end of head
30*5e7d93a6SGetz Mikalsen	add	x4, x1, #16
31*5e7d93a6SGetz Mikalsen	eor	x3, x3, x0
32*5e7d93a6SGetz Mikalsen	eor	x4, x4, x1			// bits that changed
33*5e7d93a6SGetz Mikalsen	orr	x3, x3, x4			// in either str1 or str2
34*5e7d93a6SGetz Mikalsen	tbz	w3, #PAGE_SHIFT, .Lbegin
35*5e7d93a6SGetz Mikalsen
36*5e7d93a6SGetz Mikalsen	ldr	q0, [x8]			// load aligned head
37*5e7d93a6SGetz Mikalsen	ldr	q2, [x10]
38*5e7d93a6SGetz Mikalsen
39*5e7d93a6SGetz Mikalsen	lsl	x14, x9, #2
40*5e7d93a6SGetz Mikalsen	lsl	x15, x11, #2
41*5e7d93a6SGetz Mikalsen	lsl	x3, x13, x14			// string head
42*5e7d93a6SGetz Mikalsen	lsl	x4, x13, x15
43*5e7d93a6SGetz Mikalsen
44*5e7d93a6SGetz Mikalsen	cmeq	v5.16b, v0.16b, #0
45*5e7d93a6SGetz Mikalsen	cmeq	v6.16b, v2.16b, #0
46*5e7d93a6SGetz Mikalsen
47*5e7d93a6SGetz Mikalsen	shrn	v5.8b, v5.8h, #4
48*5e7d93a6SGetz Mikalsen	shrn	v6.8b, v6.8h, #4
49*5e7d93a6SGetz Mikalsen	fmov	x5, d5
50*5e7d93a6SGetz Mikalsen	fmov	x6, d6
51*5e7d93a6SGetz Mikalsen
52*5e7d93a6SGetz Mikalsen	adrp	x2, shift_data
53*5e7d93a6SGetz Mikalsen	add	x2, x2, :lo12:shift_data
54*5e7d93a6SGetz Mikalsen
55*5e7d93a6SGetz Mikalsen	/* heads may cross page boundary, avoid unmapped loads */
56*5e7d93a6SGetz Mikalsen	tst	x5, x3
57*5e7d93a6SGetz Mikalsen	b.eq	0f
58*5e7d93a6SGetz Mikalsen
59*5e7d93a6SGetz Mikalsen	ldr	q4, [x2, x9]			// load permutation table
60*5e7d93a6SGetz Mikalsen	tbl	v0.16b, {v0.16b}, v4.16b
61*5e7d93a6SGetz Mikalsen
62*5e7d93a6SGetz Mikalsen	b		1f
63*5e7d93a6SGetz Mikalsen	.p2align 4
64*5e7d93a6SGetz Mikalsen0:
65*5e7d93a6SGetz Mikalsen	ldr	q0, [x0]			// load true head
66*5e7d93a6SGetz Mikalsen1:
67*5e7d93a6SGetz Mikalsen	tst	x6, x4
68*5e7d93a6SGetz Mikalsen	b.eq	0f
69*5e7d93a6SGetz Mikalsen
70*5e7d93a6SGetz Mikalsen	ldr	q4, [x2, x11]
71*5e7d93a6SGetz Mikalsen	tbl	v4.16b, {v2.16b}, v4.16b
72*5e7d93a6SGetz Mikalsen
73*5e7d93a6SGetz Mikalsen	b 1f
74*5e7d93a6SGetz Mikalsen
75*5e7d93a6SGetz Mikalsen	.p2align 4
76*5e7d93a6SGetz Mikalsen.Lbegin:
77*5e7d93a6SGetz Mikalsen	ldr	q0, [x0]			// load true heads
78*5e7d93a6SGetz Mikalsen0:
79*5e7d93a6SGetz Mikalsen	ldr	q4, [x1]
80*5e7d93a6SGetz Mikalsen1:
81*5e7d93a6SGetz Mikalsen
82*5e7d93a6SGetz Mikalsen	cmeq	v2.16b, v0.16b, #0		// NUL byte present?
83*5e7d93a6SGetz Mikalsen	cmeq	v4.16b, v0.16b, v4.16b		// which bytes match?
84*5e7d93a6SGetz Mikalsen
85*5e7d93a6SGetz Mikalsen	orn	v2.16b, v2.16b, v4.16b		// mismatch or NUL byte?
86*5e7d93a6SGetz Mikalsen
87*5e7d93a6SGetz Mikalsen	shrn	v2.8b, v2.8h, #4
88*5e7d93a6SGetz Mikalsen	fmov	x5, d2
89*5e7d93a6SGetz Mikalsen
90*5e7d93a6SGetz Mikalsen	cbnz	x5, .Lhead_mismatch
91*5e7d93a6SGetz Mikalsen
92*5e7d93a6SGetz Mikalsen	ldr	q2, [x8, #16]			// load second chunk
93*5e7d93a6SGetz Mikalsen	ldr	q3, [x10, #16]
94*5e7d93a6SGetz Mikalsen	subs	x9, x9, x11			// is a&0xf >= b&0xf
95*5e7d93a6SGetz Mikalsen	b.lo	.Lswapped			// if not swap operands
96*5e7d93a6SGetz Mikalsen	sub	x12, x10, x9
97*5e7d93a6SGetz Mikalsen	ldr	q0, [x12, #16]!
98*5e7d93a6SGetz Mikalsen	sub	x10, x10, x8
99*5e7d93a6SGetz Mikalsen	sub	x11, x10, x9
100*5e7d93a6SGetz Mikalsen
101*5e7d93a6SGetz Mikalsen	cmeq	v1.16b, v3.16b, #0
102*5e7d93a6SGetz Mikalsen	cmeq	v0.16b, v0.16b, v2.16b
103*5e7d93a6SGetz Mikalsen	add	x8, x8, #16
104*5e7d93a6SGetz Mikalsen	shrn	v1.8b, v1.8h, #4
105*5e7d93a6SGetz Mikalsen	fmov	x6, d1
106*5e7d93a6SGetz Mikalsen	shrn	v0.8b, v0.8h, #4
107*5e7d93a6SGetz Mikalsen	fmov	x5, d0
108*5e7d93a6SGetz Mikalsen	cbnz	x6, .Lnulfound
109*5e7d93a6SGetz Mikalsen	mvn	x5, x5
110*5e7d93a6SGetz Mikalsen	cbnz	x5, .Lmismatch
111*5e7d93a6SGetz Mikalsen	add	x8, x8, #16			// advance aligned pointers
112*5e7d93a6SGetz Mikalsen
113*5e7d93a6SGetz Mikalsen	/*
114*5e7d93a6SGetz Mikalsen	 * During the main loop, the layout of the two strings is something like:
115*5e7d93a6SGetz Mikalsen	 *
116*5e7d93a6SGetz Mikalsen	 *          v ------1------ v ------2------ v
117*5e7d93a6SGetz Mikalsen	 *      X0:    AAAAAAAAAAAAABBBBBBBBBBBBBBBB...
118*5e7d93a6SGetz Mikalsen	 *      X1: AAAAAAAAAAAAABBBBBBBBBBBBBBBBCCC...
119*5e7d93a6SGetz Mikalsen	 *
120*5e7d93a6SGetz Mikalsen	 * where v indicates the alignment boundaries and corresponding chunks
121*5e7d93a6SGetz Mikalsen	 * of the strings have the same letters.  Chunk A has been checked in
122*5e7d93a6SGetz Mikalsen	 * the previous iteration.  This iteration, we first check that string
123*5e7d93a6SGetz Mikalsen	 * X1 doesn't end within region 2, then we compare chunk B between the
124*5e7d93a6SGetz Mikalsen	 * two strings.  As X1 is known not to hold a NUL byte in regions 1
125*5e7d93a6SGetz Mikalsen	 * and 2 at this point, this also ensures that x0 has not ended yet.
126*5e7d93a6SGetz Mikalsen	 */
127*5e7d93a6SGetz Mikalsen	.p2align 4
128*5e7d93a6SGetz Mikalsen0:
129*5e7d93a6SGetz Mikalsen	ldr	q0, [x8, x11]
130*5e7d93a6SGetz Mikalsen	ldr	q1, [x8, x10]
131*5e7d93a6SGetz Mikalsen	ldr	q2, [x8]
132*5e7d93a6SGetz Mikalsen
133*5e7d93a6SGetz Mikalsen	cmeq	v1.16b, v1.16b, #0		// end of string?
134*5e7d93a6SGetz Mikalsen	cmeq	v0.16b, v0.16b, v2.16b		// do the chunks match?
135*5e7d93a6SGetz Mikalsen
136*5e7d93a6SGetz Mikalsen	shrn	v1.8b, v1.8h, #4
137*5e7d93a6SGetz Mikalsen	fmov	x6, d1
138*5e7d93a6SGetz Mikalsen	shrn	v0.8b, v0.8h, #4
139*5e7d93a6SGetz Mikalsen	fmov	x5, d0
140*5e7d93a6SGetz Mikalsen	cbnz	x6, .Lnulfound
141*5e7d93a6SGetz Mikalsen	mvn	x5, x5				// any mismatches?
142*5e7d93a6SGetz Mikalsen	cbnz	x5, .Lmismatch
143*5e7d93a6SGetz Mikalsen
144*5e7d93a6SGetz Mikalsen	add	x8, x8, #16
145*5e7d93a6SGetz Mikalsen
146*5e7d93a6SGetz Mikalsen	ldr	q0, [x8, x11]
147*5e7d93a6SGetz Mikalsen	ldr	q1, [x8, x10]
148*5e7d93a6SGetz Mikalsen	ldr	q2, [x8]
149*5e7d93a6SGetz Mikalsen
150*5e7d93a6SGetz Mikalsen	add	x8, x8, #16
151*5e7d93a6SGetz Mikalsen	cmeq	v1.16b, v1.16b, #0
152*5e7d93a6SGetz Mikalsen	cmeq	v0.16b, v0.16b, v2.16b
153*5e7d93a6SGetz Mikalsen
154*5e7d93a6SGetz Mikalsen	shrn	v1.8b, v1.8h, #4
155*5e7d93a6SGetz Mikalsen	fmov	x6, d1
156*5e7d93a6SGetz Mikalsen	shrn	v0.8b, v0.8h, #4
157*5e7d93a6SGetz Mikalsen	fmov	x5, d0
158*5e7d93a6SGetz Mikalsen	cbnz	x6, .Lnulfound2
159*5e7d93a6SGetz Mikalsen	mvn	x5, x5
160*5e7d93a6SGetz Mikalsen	cbz	x5, 0b
161*5e7d93a6SGetz Mikalsen
162*5e7d93a6SGetz Mikalsen	sub	x8, x8, #16			// roll back second increment
163*5e7d93a6SGetz Mikalsen.Lmismatch:
164*5e7d93a6SGetz Mikalsen	rbit	x2, x5
165*5e7d93a6SGetz Mikalsen	clz	x2, x2				// index of mismatch
166*5e7d93a6SGetz Mikalsen	lsr	x2, x2, #2
167*5e7d93a6SGetz Mikalsen	add	x11, x8, x11
168*5e7d93a6SGetz Mikalsen
169*5e7d93a6SGetz Mikalsen	ldrb	w4, [x8, x2]
170*5e7d93a6SGetz Mikalsen	ldrb	w5, [x11, x2]
171*5e7d93a6SGetz Mikalsen	sub	w0, w4, w5			// byte difference
172*5e7d93a6SGetz Mikalsen	ret
173*5e7d93a6SGetz Mikalsen
174*5e7d93a6SGetz Mikalsen	.p2align 4
175*5e7d93a6SGetz Mikalsen.Lnulfound2:
176*5e7d93a6SGetz Mikalsen	sub	x8, x8, #16
177*5e7d93a6SGetz Mikalsen
178*5e7d93a6SGetz Mikalsen.Lnulfound:
179*5e7d93a6SGetz Mikalsen	mov	x7, x9
180*5e7d93a6SGetz Mikalsen	mov	x4, x6
181*5e7d93a6SGetz Mikalsen
182*5e7d93a6SGetz Mikalsen	ubfiz	x7, x7, #2, #4			// x7 = (x7 & 0xf) << 2
183*5e7d93a6SGetz Mikalsen	lsl	x6, x6, x7			// adjust NUL mask to indices
184*5e7d93a6SGetz Mikalsen	orn	x5, x6, x5
185*5e7d93a6SGetz Mikalsen	cbnz	x5, .Lmismatch
186*5e7d93a6SGetz Mikalsen
187*5e7d93a6SGetz Mikalsen	/*
188*5e7d93a6SGetz Mikalsen	 * (x0) == (x1) and NUL is past the string.
189*5e7d93a6SGetz Mikalsen	 * Compare (x1) with the corresponding part
190*5e7d93a6SGetz Mikalsen	 * of the other string until the NUL byte.
191*5e7d93a6SGetz Mikalsen	 */
192*5e7d93a6SGetz Mikalsen	ldr	q0, [x8, x9]
193*5e7d93a6SGetz Mikalsen	ldr	q1, [x8, x10]
194*5e7d93a6SGetz Mikalsen
195*5e7d93a6SGetz Mikalsen	cmeq	v1.16b, v0.16b, v1.16b
196*5e7d93a6SGetz Mikalsen	shrn	v1.8b, v1.8h, #4
197*5e7d93a6SGetz Mikalsen	fmov	x5, d1
198*5e7d93a6SGetz Mikalsen
199*5e7d93a6SGetz Mikalsen	orn	x5, x4, x5
200*5e7d93a6SGetz Mikalsen
201*5e7d93a6SGetz Mikalsen	rbit	x2, x5
202*5e7d93a6SGetz Mikalsen	clz	x2, x2
203*5e7d93a6SGetz Mikalsen	lsr	x5, x2, #2
204*5e7d93a6SGetz Mikalsen
205*5e7d93a6SGetz Mikalsen	add	x10, x10, x8			// restore x10 pointer
206*5e7d93a6SGetz Mikalsen	add	x8, x8, x9			// point to corresponding chunk
207*5e7d93a6SGetz Mikalsen
208*5e7d93a6SGetz Mikalsen	ldrb	w4, [x8, x5]
209*5e7d93a6SGetz Mikalsen	ldrb	w5, [x10, x5]
210*5e7d93a6SGetz Mikalsen	sub	w0, w4, w5
211*5e7d93a6SGetz Mikalsen	ret
212*5e7d93a6SGetz Mikalsen
213*5e7d93a6SGetz Mikalsen	.p2align 4
214*5e7d93a6SGetz Mikalsen.Lhead_mismatch:
215*5e7d93a6SGetz Mikalsen	rbit	x2, x5
216*5e7d93a6SGetz Mikalsen	clz	x2, x2				// index of mismatch
217*5e7d93a6SGetz Mikalsen	lsr	x2, x2, #2
218*5e7d93a6SGetz Mikalsen	ldrb	w4, [x0, x2]
219*5e7d93a6SGetz Mikalsen	ldrb	w5, [x1, x2]
220*5e7d93a6SGetz Mikalsen	sub	w0, w4, w5
221*5e7d93a6SGetz Mikalsen	ret
222*5e7d93a6SGetz Mikalsen
223*5e7d93a6SGetz Mikalsen	/*
224*5e7d93a6SGetz Mikalsen	 * If (a&0xf) < (b&0xf), we do the same thing but with swapped
225*5e7d93a6SGetz Mikalsen	 * operands.  I found that this performs slightly better than
226*5e7d93a6SGetz Mikalsen	 * using conditional moves to do the swap branchless.
227*5e7d93a6SGetz Mikalsen	 */
228*5e7d93a6SGetz Mikalsen	.p2align 4
229*5e7d93a6SGetz Mikalsen.Lswapped:
230*5e7d93a6SGetz Mikalsen	add	x12, x8, x9
231*5e7d93a6SGetz Mikalsen	ldr	q0, [x12, #16]!
232*5e7d93a6SGetz Mikalsen	sub	x8, x8, x10
233*5e7d93a6SGetz Mikalsen	add	x11, x8, x9
234*5e7d93a6SGetz Mikalsen	neg	x9, x9
235*5e7d93a6SGetz Mikalsen
236*5e7d93a6SGetz Mikalsen	cmeq	v1.16b, v2.16b, #0
237*5e7d93a6SGetz Mikalsen	cmeq	v0.16b, v0.16b, v3.16b
238*5e7d93a6SGetz Mikalsen	add	x10, x10, #16
239*5e7d93a6SGetz Mikalsen	shrn	v1.8b, v1.8h, #4
240*5e7d93a6SGetz Mikalsen	fmov	x6, d1
241*5e7d93a6SGetz Mikalsen	shrn	v0.8b, v0.8h, #4
242*5e7d93a6SGetz Mikalsen	fmov	x5, d0
243*5e7d93a6SGetz Mikalsen	cbnz	x6, .Lnulfounds
244*5e7d93a6SGetz Mikalsen	mvn	x5, x5
245*5e7d93a6SGetz Mikalsen	cbnz	x5, .Lmismatchs
246*5e7d93a6SGetz Mikalsen	add	x10, x10, #16
247*5e7d93a6SGetz Mikalsen
248*5e7d93a6SGetz Mikalsen	/*
249*5e7d93a6SGetz Mikalsen	 * During the main loop, the layout of the two strings is something like:
250*5e7d93a6SGetz Mikalsen	 *
251*5e7d93a6SGetz Mikalsen	 *          v ------1------ v ------2------ v
252*5e7d93a6SGetz Mikalsen	 *      X1:    AAAAAAAAAAAAABBBBBBBBBBBBBBBB...
253*5e7d93a6SGetz Mikalsen	 *      X0: AAAAAAAAAAAAABBBBBBBBBBBBBBBBCCC...
254*5e7d93a6SGetz Mikalsen	 *
255*5e7d93a6SGetz Mikalsen	 * where v indicates the alignment boundaries and corresponding chunks
256*5e7d93a6SGetz Mikalsen	 * of the strings have the same letters.  Chunk A has been checked in
257*5e7d93a6SGetz Mikalsen	 * the previous iteration.  This iteration, we first check that string
258*5e7d93a6SGetz Mikalsen	 * X0 doesn't end within region 2, then we compare chunk B between the
259*5e7d93a6SGetz Mikalsen	 * two strings.  As X0 is known not to hold a NUL byte in regions 1
260*5e7d93a6SGetz Mikalsen	 * and 2 at this point, this also ensures that X1 has not ended yet.
261*5e7d93a6SGetz Mikalsen	 */
262*5e7d93a6SGetz Mikalsen	.p2align 4
263*5e7d93a6SGetz Mikalsen0:
264*5e7d93a6SGetz Mikalsen	ldr	q0, [x10, x11]
265*5e7d93a6SGetz Mikalsen	ldr	q1, [x10, x8]
266*5e7d93a6SGetz Mikalsen	ldr	q2, [x10]
267*5e7d93a6SGetz Mikalsen
268*5e7d93a6SGetz Mikalsen	cmeq	v1.16b, v1.16b, #0
269*5e7d93a6SGetz Mikalsen	cmeq	v0.16b, v0.16b, v2.16b
270*5e7d93a6SGetz Mikalsen
271*5e7d93a6SGetz Mikalsen	shrn	v1.8b, v1.8h, #4
272*5e7d93a6SGetz Mikalsen	fmov	x6, d1
273*5e7d93a6SGetz Mikalsen	shrn	v0.8b, v0.8h, #4
274*5e7d93a6SGetz Mikalsen	fmov	x5, d0
275*5e7d93a6SGetz Mikalsen	cbnz	x6, .Lnulfounds
276*5e7d93a6SGetz Mikalsen	mvn	x5, x5
277*5e7d93a6SGetz Mikalsen	cbnz	x5, .Lmismatchs
278*5e7d93a6SGetz Mikalsen
279*5e7d93a6SGetz Mikalsen	add	x10, x10, #16
280*5e7d93a6SGetz Mikalsen
281*5e7d93a6SGetz Mikalsen	ldr	q0, [x10, x11]
282*5e7d93a6SGetz Mikalsen	ldr	q1, [x10, x8]
283*5e7d93a6SGetz Mikalsen	ldr	q2, [x10]
284*5e7d93a6SGetz Mikalsen
285*5e7d93a6SGetz Mikalsen	add	x10, x10, #16
286*5e7d93a6SGetz Mikalsen	cmeq	v1.16b, v1.16b, #0
287*5e7d93a6SGetz Mikalsen	cmeq	v0.16b, v0.16b, v2.16b
288*5e7d93a6SGetz Mikalsen
289*5e7d93a6SGetz Mikalsen	shrn	v1.8b, v1.8h, #4
290*5e7d93a6SGetz Mikalsen	fmov	x6, d1
291*5e7d93a6SGetz Mikalsen	shrn	v0.8b, v0.8h, #4
292*5e7d93a6SGetz Mikalsen	fmov	x5, d0
293*5e7d93a6SGetz Mikalsen	cbnz	x6, .Lnulfound2s
294*5e7d93a6SGetz Mikalsen	mvn	x5, x5
295*5e7d93a6SGetz Mikalsen	cbz	x5, 0b
296*5e7d93a6SGetz Mikalsen
297*5e7d93a6SGetz Mikalsen	sub	x10, x10, #16
298*5e7d93a6SGetz Mikalsen
299*5e7d93a6SGetz Mikalsen.Lmismatchs:
300*5e7d93a6SGetz Mikalsen	rbit	x2, x5
301*5e7d93a6SGetz Mikalsen	clz	x2, x2
302*5e7d93a6SGetz Mikalsen	lsr	x2, x2, #2
303*5e7d93a6SGetz Mikalsen	add	x11, x10, x11
304*5e7d93a6SGetz Mikalsen
305*5e7d93a6SGetz Mikalsen	ldrb	w4, [x10, x2]
306*5e7d93a6SGetz Mikalsen	ldrb	w5, [x11, x2]
307*5e7d93a6SGetz Mikalsen	sub	w0, w5, w4
308*5e7d93a6SGetz Mikalsen	ret
309*5e7d93a6SGetz Mikalsen
310*5e7d93a6SGetz Mikalsen	.p2align 4
311*5e7d93a6SGetz Mikalsen.Lnulfound2s:
312*5e7d93a6SGetz Mikalsen	sub	x10, x10, #16
313*5e7d93a6SGetz Mikalsen.Lnulfounds:
314*5e7d93a6SGetz Mikalsen	mov	x7, x9
315*5e7d93a6SGetz Mikalsen	mov	x4, x6
316*5e7d93a6SGetz Mikalsen
317*5e7d93a6SGetz Mikalsen	ubfiz	x7, x7, #2, #4
318*5e7d93a6SGetz Mikalsen	lsl	x6, x6, x7
319*5e7d93a6SGetz Mikalsen	orn	x5, x6, x5
320*5e7d93a6SGetz Mikalsen	cbnz	x5, .Lmismatchs
321*5e7d93a6SGetz Mikalsen
322*5e7d93a6SGetz Mikalsen	ldr	q0, [x10, x9]
323*5e7d93a6SGetz Mikalsen	ldr	q1, [x10, x8]
324*5e7d93a6SGetz Mikalsen
325*5e7d93a6SGetz Mikalsen	cmeq	v1.16b, v0.16b, v1.16b
326*5e7d93a6SGetz Mikalsen	shrn	v1.8b, v1.8h, #4
327*5e7d93a6SGetz Mikalsen	fmov	x5, d1
328*5e7d93a6SGetz Mikalsen
329*5e7d93a6SGetz Mikalsen	orn	x5, x4, x5
330*5e7d93a6SGetz Mikalsen
331*5e7d93a6SGetz Mikalsen	rbit	x2, x5
332*5e7d93a6SGetz Mikalsen	clz	x2, x2
333*5e7d93a6SGetz Mikalsen	lsr	x5, x2, #2
334*5e7d93a6SGetz Mikalsen
335*5e7d93a6SGetz Mikalsen	add	x11, x10, x8
336*5e7d93a6SGetz Mikalsen	add	x10, x10, x9
337*5e7d93a6SGetz Mikalsen
338*5e7d93a6SGetz Mikalsen	ldrb	w4, [x10, x5]
339*5e7d93a6SGetz Mikalsen	ldrb	w5, [x11, x5]
340*5e7d93a6SGetz Mikalsen	sub	w0, w5, w4
341*5e7d93a6SGetz Mikalsen	ret
342*5e7d93a6SGetz Mikalsen
343*5e7d93a6SGetz MikalsenEND(__strcmp)
344*5e7d93a6SGetz Mikalsen
345*5e7d93a6SGetz Mikalsen	.section .rodata
346*5e7d93a6SGetz Mikalsen	.p2align 4
347*5e7d93a6SGetz Mikalsenshift_data:
348*5e7d93a6SGetz Mikalsen	.byte 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
349*5e7d93a6SGetz Mikalsen	.fill 16, 1, -1
350*5e7d93a6SGetz Mikalsen	.size shift_data, .-shift_data
351