xref: /linux/arch/powerpc/lib/memcmp_64.S (revision e0bf6c5ca2d3281f231c5f0c9bf145e9513644de)
1/*
2 * Author: Anton Blanchard <anton@au.ibm.com>
3 * Copyright 2015 IBM Corporation.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version
8 * 2 of the License, or (at your option) any later version.
9 */
10#include <asm/ppc_asm.h>
11
12#define off8	r6
13#define off16	r7
14#define off24	r8
15
16#define rA	r9
17#define rB	r10
18#define rC	r11
19#define rD	r27
20#define rE	r28
21#define rF	r29
22#define rG	r30
23#define rH	r31
24
25#ifdef __LITTLE_ENDIAN__
26#define LD	ldbrx
27#else
28#define LD	ldx
29#endif
30
31_GLOBAL(memcmp)
32	cmpdi	cr1,r5,0
33
34	/* Use the short loop if both strings are not 8B aligned */
35	or	r6,r3,r4
36	andi.	r6,r6,7
37
38	/* Use the short loop if length is less than 32B */
39	cmpdi	cr6,r5,31
40
41	beq	cr1,.Lzero
42	bne	.Lshort
43	bgt	cr6,.Llong
44
45.Lshort:
46	mtctr	r5
47
481:	lbz	rA,0(r3)
49	lbz	rB,0(r4)
50	subf.	rC,rB,rA
51	bne	.Lnon_zero
52	bdz	.Lzero
53
54	lbz	rA,1(r3)
55	lbz	rB,1(r4)
56	subf.	rC,rB,rA
57	bne	.Lnon_zero
58	bdz	.Lzero
59
60	lbz	rA,2(r3)
61	lbz	rB,2(r4)
62	subf.	rC,rB,rA
63	bne	.Lnon_zero
64	bdz	.Lzero
65
66	lbz	rA,3(r3)
67	lbz	rB,3(r4)
68	subf.	rC,rB,rA
69	bne	.Lnon_zero
70
71	addi	r3,r3,4
72	addi	r4,r4,4
73
74	bdnz	1b
75
76.Lzero:
77	li	r3,0
78	blr
79
80.Lnon_zero:
81	mr	r3,rC
82	blr
83
84.Llong:
85	li	off8,8
86	li	off16,16
87	li	off24,24
88
89	std	r31,-8(r1)
90	std	r30,-16(r1)
91	std	r29,-24(r1)
92	std	r28,-32(r1)
93	std	r27,-40(r1)
94
95	srdi	r0,r5,5
96	mtctr	r0
97	andi.	r5,r5,31
98
99	LD	rA,0,r3
100	LD	rB,0,r4
101
102	LD	rC,off8,r3
103	LD	rD,off8,r4
104
105	LD	rE,off16,r3
106	LD	rF,off16,r4
107
108	LD	rG,off24,r3
109	LD	rH,off24,r4
110	cmpld	cr0,rA,rB
111
112	addi	r3,r3,32
113	addi	r4,r4,32
114
115	bdz	.Lfirst32
116
117	LD	rA,0,r3
118	LD	rB,0,r4
119	cmpld	cr1,rC,rD
120
121	LD	rC,off8,r3
122	LD	rD,off8,r4
123	cmpld	cr6,rE,rF
124
125	LD	rE,off16,r3
126	LD	rF,off16,r4
127	cmpld	cr7,rG,rH
128	bne	cr0,.LcmpAB
129
130	LD	rG,off24,r3
131	LD	rH,off24,r4
132	cmpld	cr0,rA,rB
133	bne	cr1,.LcmpCD
134
135	addi	r3,r3,32
136	addi	r4,r4,32
137
138	bdz	.Lsecond32
139
140	.balign	16
141
1421:	LD	rA,0,r3
143	LD	rB,0,r4
144	cmpld	cr1,rC,rD
145	bne	cr6,.LcmpEF
146
147	LD	rC,off8,r3
148	LD	rD,off8,r4
149	cmpld	cr6,rE,rF
150	bne	cr7,.LcmpGH
151
152	LD	rE,off16,r3
153	LD	rF,off16,r4
154	cmpld	cr7,rG,rH
155	bne	cr0,.LcmpAB
156
157	LD	rG,off24,r3
158	LD	rH,off24,r4
159	cmpld	cr0,rA,rB
160	bne	cr1,.LcmpCD
161
162	addi	r3,r3,32
163	addi	r4,r4,32
164
165	bdnz	1b
166
167.Lsecond32:
168	cmpld	cr1,rC,rD
169	bne	cr6,.LcmpEF
170
171	cmpld	cr6,rE,rF
172	bne	cr7,.LcmpGH
173
174	cmpld	cr7,rG,rH
175	bne	cr0,.LcmpAB
176
177	bne	cr1,.LcmpCD
178	bne	cr6,.LcmpEF
179	bne	cr7,.LcmpGH
180
181.Ltail:
182	ld	r31,-8(r1)
183	ld	r30,-16(r1)
184	ld	r29,-24(r1)
185	ld	r28,-32(r1)
186	ld	r27,-40(r1)
187
188	cmpdi	r5,0
189	beq	.Lzero
190	b	.Lshort
191
192.Lfirst32:
193	cmpld	cr1,rC,rD
194	cmpld	cr6,rE,rF
195	cmpld	cr7,rG,rH
196
197	bne	cr0,.LcmpAB
198	bne	cr1,.LcmpCD
199	bne	cr6,.LcmpEF
200	bne	cr7,.LcmpGH
201
202	b	.Ltail
203
204.LcmpAB:
205	li	r3,1
206	bgt	cr0,.Lout
207	li	r3,-1
208	b	.Lout
209
210.LcmpCD:
211	li	r3,1
212	bgt	cr1,.Lout
213	li	r3,-1
214	b	.Lout
215
216.LcmpEF:
217	li	r3,1
218	bgt	cr6,.Lout
219	li	r3,-1
220	b	.Lout
221
222.LcmpGH:
223	li	r3,1
224	bgt	cr7,.Lout
225	li	r3,-1
226
227.Lout:
228	ld	r31,-8(r1)
229	ld	r30,-16(r1)
230	ld	r29,-24(r1)
231	ld	r28,-32(r1)
232	ld	r27,-40(r1)
233	blr
234