xref: /linux/tools/testing/selftests/powerpc/stringloops/memcmp_64.S (revision e58e871becec2d3b04ed91c0c16fe8deac9c9dfa)
1/*
2 * Author: Anton Blanchard <anton@au.ibm.com>
3 * Copyright 2015 IBM Corporation.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version
8 * 2 of the License, or (at your option) any later version.
9 */
10#include <asm/ppc_asm.h>
11#include <asm/export.h>
12
13#define off8	r6
14#define off16	r7
15#define off24	r8
16
17#define rA	r9
18#define rB	r10
19#define rC	r11
20#define rD	r27
21#define rE	r28
22#define rF	r29
23#define rG	r30
24#define rH	r31
25
26#ifdef __LITTLE_ENDIAN__
27#define LD	ldbrx
28#else
29#define LD	ldx
30#endif
31
32_GLOBAL(memcmp)
33	cmpdi	cr1,r5,0
34
35	/* Use the short loop if both strings are not 8B aligned */
36	or	r6,r3,r4
37	andi.	r6,r6,7
38
39	/* Use the short loop if length is less than 32B */
40	cmpdi	cr6,r5,31
41
42	beq	cr1,.Lzero
43	bne	.Lshort
44	bgt	cr6,.Llong
45
46.Lshort:
47	mtctr	r5
48
491:	lbz	rA,0(r3)
50	lbz	rB,0(r4)
51	subf.	rC,rB,rA
52	bne	.Lnon_zero
53	bdz	.Lzero
54
55	lbz	rA,1(r3)
56	lbz	rB,1(r4)
57	subf.	rC,rB,rA
58	bne	.Lnon_zero
59	bdz	.Lzero
60
61	lbz	rA,2(r3)
62	lbz	rB,2(r4)
63	subf.	rC,rB,rA
64	bne	.Lnon_zero
65	bdz	.Lzero
66
67	lbz	rA,3(r3)
68	lbz	rB,3(r4)
69	subf.	rC,rB,rA
70	bne	.Lnon_zero
71
72	addi	r3,r3,4
73	addi	r4,r4,4
74
75	bdnz	1b
76
77.Lzero:
78	li	r3,0
79	blr
80
81.Lnon_zero:
82	mr	r3,rC
83	blr
84
85.Llong:
86	li	off8,8
87	li	off16,16
88	li	off24,24
89
90	std	r31,-8(r1)
91	std	r30,-16(r1)
92	std	r29,-24(r1)
93	std	r28,-32(r1)
94	std	r27,-40(r1)
95
96	srdi	r0,r5,5
97	mtctr	r0
98	andi.	r5,r5,31
99
100	LD	rA,0,r3
101	LD	rB,0,r4
102
103	LD	rC,off8,r3
104	LD	rD,off8,r4
105
106	LD	rE,off16,r3
107	LD	rF,off16,r4
108
109	LD	rG,off24,r3
110	LD	rH,off24,r4
111	cmpld	cr0,rA,rB
112
113	addi	r3,r3,32
114	addi	r4,r4,32
115
116	bdz	.Lfirst32
117
118	LD	rA,0,r3
119	LD	rB,0,r4
120	cmpld	cr1,rC,rD
121
122	LD	rC,off8,r3
123	LD	rD,off8,r4
124	cmpld	cr6,rE,rF
125
126	LD	rE,off16,r3
127	LD	rF,off16,r4
128	cmpld	cr7,rG,rH
129	bne	cr0,.LcmpAB
130
131	LD	rG,off24,r3
132	LD	rH,off24,r4
133	cmpld	cr0,rA,rB
134	bne	cr1,.LcmpCD
135
136	addi	r3,r3,32
137	addi	r4,r4,32
138
139	bdz	.Lsecond32
140
141	.balign	16
142
1431:	LD	rA,0,r3
144	LD	rB,0,r4
145	cmpld	cr1,rC,rD
146	bne	cr6,.LcmpEF
147
148	LD	rC,off8,r3
149	LD	rD,off8,r4
150	cmpld	cr6,rE,rF
151	bne	cr7,.LcmpGH
152
153	LD	rE,off16,r3
154	LD	rF,off16,r4
155	cmpld	cr7,rG,rH
156	bne	cr0,.LcmpAB
157
158	LD	rG,off24,r3
159	LD	rH,off24,r4
160	cmpld	cr0,rA,rB
161	bne	cr1,.LcmpCD
162
163	addi	r3,r3,32
164	addi	r4,r4,32
165
166	bdnz	1b
167
168.Lsecond32:
169	cmpld	cr1,rC,rD
170	bne	cr6,.LcmpEF
171
172	cmpld	cr6,rE,rF
173	bne	cr7,.LcmpGH
174
175	cmpld	cr7,rG,rH
176	bne	cr0,.LcmpAB
177
178	bne	cr1,.LcmpCD
179	bne	cr6,.LcmpEF
180	bne	cr7,.LcmpGH
181
182.Ltail:
183	ld	r31,-8(r1)
184	ld	r30,-16(r1)
185	ld	r29,-24(r1)
186	ld	r28,-32(r1)
187	ld	r27,-40(r1)
188
189	cmpdi	r5,0
190	beq	.Lzero
191	b	.Lshort
192
193.Lfirst32:
194	cmpld	cr1,rC,rD
195	cmpld	cr6,rE,rF
196	cmpld	cr7,rG,rH
197
198	bne	cr0,.LcmpAB
199	bne	cr1,.LcmpCD
200	bne	cr6,.LcmpEF
201	bne	cr7,.LcmpGH
202
203	b	.Ltail
204
205.LcmpAB:
206	li	r3,1
207	bgt	cr0,.Lout
208	li	r3,-1
209	b	.Lout
210
211.LcmpCD:
212	li	r3,1
213	bgt	cr1,.Lout
214	li	r3,-1
215	b	.Lout
216
217.LcmpEF:
218	li	r3,1
219	bgt	cr6,.Lout
220	li	r3,-1
221	b	.Lout
222
223.LcmpGH:
224	li	r3,1
225	bgt	cr7,.Lout
226	li	r3,-1
227
228.Lout:
229	ld	r31,-8(r1)
230	ld	r30,-16(r1)
231	ld	r29,-24(r1)
232	ld	r28,-32(r1)
233	ld	r27,-40(r1)
234	blr
235EXPORT_SYMBOL(memcmp)
236