xref: /linux/arch/powerpc/lib/string_64.S (revision 4d5e3b06e1fc1428be14cd4ebe3b37c1bb34f95d)
1/* SPDX-License-Identifier: GPL-2.0-or-later */
2/*
3 *
4 * Copyright (C) IBM Corporation, 2012
5 *
6 * Author: Anton Blanchard <anton@au.ibm.com>
7 */
8
9#include <asm/ppc_asm.h>
10#include <asm/linkage.h>
11#include <asm/asm-offsets.h>
12#include <asm/export.h>
13
14	.section	".toc","aw"
15PPC64_CACHES:
16	.tc		ppc64_caches[TC],ppc64_caches
17	.section	".text"
18
19/**
20 * __arch_clear_user: - Zero a block of memory in user space, with less checking.
21 * @to:   Destination address, in user space.
22 * @n:    Number of bytes to zero.
23 *
24 * Zero a block of memory in user space.  Caller must check
25 * the specified block with access_ok() before calling this function.
26 *
27 * Returns number of bytes that could not be cleared.
28 * On success, this will be zero.
29 */
30
31	.macro err1
32100:
33	EX_TABLE(100b,.Ldo_err1)
34	.endm
35
36	.macro err2
37200:
38	EX_TABLE(200b,.Ldo_err2)
39	.endm
40
41	.macro err3
42300:
43	EX_TABLE(300b,.Ldo_err3)
44	.endm
45
46.Ldo_err1:
47	mr	r3,r8
48
49.Ldo_err2:
50	mtctr	r4
511:
52err3;	stb	r0,0(r3)
53	addi	r3,r3,1
54	addi	r4,r4,-1
55	bdnz	1b
56
57.Ldo_err3:
58	mr	r3,r4
59	blr
60
61_GLOBAL_TOC(__arch_clear_user)
62	cmpdi	r4,32
63	neg	r6,r3
64	li	r0,0
65	blt	.Lshort_clear
66	mr	r8,r3
67	mtocrf	0x01,r6
68	clrldi	r6,r6,(64-3)
69
70	/* Get the destination 8 byte aligned */
71	bf	cr7*4+3,1f
72err1;	stb	r0,0(r3)
73	addi	r3,r3,1
74
751:	bf	cr7*4+2,2f
76err1;	sth	r0,0(r3)
77	addi	r3,r3,2
78
792:	bf	cr7*4+1,3f
80err1;	stw	r0,0(r3)
81	addi	r3,r3,4
82
833:	sub	r4,r4,r6
84
85	cmpdi	r4,32
86	cmpdi	cr1,r4,512
87	blt	.Lshort_clear
88	bgt	cr1,.Llong_clear
89
90.Lmedium_clear:
91	srdi	r6,r4,5
92	mtctr	r6
93
94	/* Do 32 byte chunks */
954:
96err2;	std	r0,0(r3)
97err2;	std	r0,8(r3)
98err2;	std	r0,16(r3)
99err2;	std	r0,24(r3)
100	addi	r3,r3,32
101	addi	r4,r4,-32
102	bdnz	4b
103
104.Lshort_clear:
105	/* up to 31 bytes to go */
106	cmpdi	r4,16
107	blt	6f
108err2;	std	r0,0(r3)
109err2;	std	r0,8(r3)
110	addi	r3,r3,16
111	addi	r4,r4,-16
112
113	/* Up to 15 bytes to go */
1146:	mr	r8,r3
115	clrldi	r4,r4,(64-4)
116	mtocrf	0x01,r4
117	bf	cr7*4+0,7f
118err1;	std	r0,0(r3)
119	addi	r3,r3,8
120
1217:	bf	cr7*4+1,8f
122err1;	stw	r0,0(r3)
123	addi	r3,r3,4
124
1258:	bf	cr7*4+2,9f
126err1;	sth	r0,0(r3)
127	addi	r3,r3,2
128
1299:	bf	cr7*4+3,10f
130err1;	stb	r0,0(r3)
131
13210:	li	r3,0
133	blr
134
135.Llong_clear:
136	ld	r5,PPC64_CACHES@toc(r2)
137
138	bf	cr7*4+0,11f
139err2;	std	r0,0(r3)
140	addi	r3,r3,8
141	addi	r4,r4,-8
142
143	/* Destination is 16 byte aligned, need to get it cache block aligned */
14411:	lwz	r7,DCACHEL1LOGBLOCKSIZE(r5)
145	lwz	r9,DCACHEL1BLOCKSIZE(r5)
146
147	/*
148	 * With worst case alignment the long clear loop takes a minimum
149	 * of 1 byte less than 2 cachelines.
150	 */
151	sldi	r10,r9,2
152	cmpd	r4,r10
153	blt	.Lmedium_clear
154
155	neg	r6,r3
156	addi	r10,r9,-1
157	and.	r5,r6,r10
158	beq	13f
159
160	srdi	r6,r5,4
161	mtctr	r6
162	mr	r8,r3
16312:
164err1;	std	r0,0(r3)
165err1;	std	r0,8(r3)
166	addi	r3,r3,16
167	bdnz	12b
168
169	sub	r4,r4,r5
170
17113:	srd	r6,r4,r7
172	mtctr	r6
173	mr	r8,r3
17414:
175err1;	dcbz	0,r3
176	add	r3,r3,r9
177	bdnz	14b
178
179	and	r4,r4,r10
180
181	cmpdi	r4,32
182	blt	.Lshort_clear
183	b	.Lmedium_clear
184EXPORT_SYMBOL(__arch_clear_user)
185