xref: /linux/arch/parisc/kernel/pacache.S (revision 18f90d372cf35b387663f1567de701e5393f6eb5)
1/*
2 *  PARISC TLB and cache flushing support
3 *  Copyright (C) 2000-2001 Hewlett-Packard (John Marvin)
4 *  Copyright (C) 2001 Matthew Wilcox (willy at parisc-linux.org)
5 *  Copyright (C) 2002 Richard Hirst (rhirst with parisc-linux.org)
6 *
7 *    This program is free software; you can redistribute it and/or modify
8 *    it under the terms of the GNU General Public License as published by
9 *    the Free Software Foundation; either version 2, or (at your option)
10 *    any later version.
11 *
12 *    This program is distributed in the hope that it will be useful,
13 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
14 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 *    GNU General Public License for more details.
16 *
17 *    You should have received a copy of the GNU General Public License
18 *    along with this program; if not, write to the Free Software
19 *    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
20 */
21
22/*
23 * NOTE: fdc,fic, and pdc instructions that use base register modification
24 *       should only use index and base registers that are not shadowed,
25 *       so that the fast path emulation in the non access miss handler
26 *       can be used.
27 */
28
29#ifdef CONFIG_64BIT
30	.level	2.0w
31#else
32	.level	2.0
33#endif
34
35#include <asm/psw.h>
36#include <asm/assembly.h>
37#include <asm/pgtable.h>
38#include <asm/cache.h>
39#include <asm/ldcw.h>
40#include <asm/alternative.h>
41#include <linux/linkage.h>
42#include <linux/init.h>
43
44	.section .text.hot
45	.align	16
46
47ENTRY_CFI(flush_tlb_all_local)
48	/*
49	 * The pitlbe and pdtlbe instructions should only be used to
50	 * flush the entire tlb. Also, there needs to be no intervening
51	 * tlb operations, e.g. tlb misses, so the operation needs
52	 * to happen in real mode with all interruptions disabled.
53	 */
54
55	/* pcxt_ssm_bug	- relied upon translation! PA 2.0 Arch. F-4 and F-5 */
56	rsm		PSW_SM_I, %r19		/* save I-bit state */
57	load32		PA(1f), %r1
58	nop
59	nop
60	nop
61	nop
62	nop
63
64	rsm		PSW_SM_Q, %r0		/* prep to load iia queue */
65	mtctl		%r0, %cr17		/* Clear IIASQ tail */
66	mtctl		%r0, %cr17		/* Clear IIASQ head */
67	mtctl		%r1, %cr18		/* IIAOQ head */
68	ldo		4(%r1), %r1
69	mtctl		%r1, %cr18		/* IIAOQ tail */
70	load32		REAL_MODE_PSW, %r1
71	mtctl           %r1, %ipsw
72	rfi
73	nop
74
751:      load32		PA(cache_info), %r1
76
77	/* Flush Instruction Tlb */
78
79	LDREG		ITLB_SID_BASE(%r1), %r20
80	LDREG		ITLB_SID_STRIDE(%r1), %r21
81	LDREG		ITLB_SID_COUNT(%r1), %r22
82	LDREG		ITLB_OFF_BASE(%r1), %arg0
83	LDREG		ITLB_OFF_STRIDE(%r1), %arg1
84	LDREG		ITLB_OFF_COUNT(%r1), %arg2
85	LDREG		ITLB_LOOP(%r1), %arg3
86
87	addib,COND(=)		-1, %arg3, fitoneloop	/* Preadjust and test */
88	movb,<,n	%arg3, %r31, fitdone	/* If loop < 0, skip */
89	copy		%arg0, %r28		/* Init base addr */
90
91fitmanyloop:					/* Loop if LOOP >= 2 */
92	mtsp		%r20, %sr1
93	add		%r21, %r20, %r20	/* increment space */
94	copy		%arg2, %r29		/* Init middle loop count */
95
96fitmanymiddle:					/* Loop if LOOP >= 2 */
97	addib,COND(>)		-1, %r31, fitmanymiddle	/* Adjusted inner loop decr */
98	pitlbe		%r0(%sr1, %r28)
99	pitlbe,m	%arg1(%sr1, %r28)	/* Last pitlbe and addr adjust */
100	addib,COND(>)		-1, %r29, fitmanymiddle	/* Middle loop decr */
101	copy		%arg3, %r31		/* Re-init inner loop count */
102
103	movb,tr		%arg0, %r28, fitmanyloop /* Re-init base addr */
104	addib,COND(<=),n	-1, %r22, fitdone	/* Outer loop count decr */
105
106fitoneloop:					/* Loop if LOOP = 1 */
107	mtsp		%r20, %sr1
108	copy		%arg0, %r28		/* init base addr */
109	copy		%arg2, %r29		/* init middle loop count */
110
111fitonemiddle:					/* Loop if LOOP = 1 */
112	addib,COND(>)		-1, %r29, fitonemiddle	/* Middle loop count decr */
113	pitlbe,m	%arg1(%sr1, %r28)	/* pitlbe for one loop */
114
115	addib,COND(>)		-1, %r22, fitoneloop	/* Outer loop count decr */
116	add		%r21, %r20, %r20		/* increment space */
117
118fitdone:
119
120	/* Flush Data Tlb */
121
122	LDREG		DTLB_SID_BASE(%r1), %r20
123	LDREG		DTLB_SID_STRIDE(%r1), %r21
124	LDREG		DTLB_SID_COUNT(%r1), %r22
125	LDREG		DTLB_OFF_BASE(%r1), %arg0
126	LDREG		DTLB_OFF_STRIDE(%r1), %arg1
127	LDREG		DTLB_OFF_COUNT(%r1), %arg2
128	LDREG		DTLB_LOOP(%r1), %arg3
129
130	addib,COND(=)		-1, %arg3, fdtoneloop	/* Preadjust and test */
131	movb,<,n	%arg3, %r31, fdtdone	/* If loop < 0, skip */
132	copy		%arg0, %r28		/* Init base addr */
133
134fdtmanyloop:					/* Loop if LOOP >= 2 */
135	mtsp		%r20, %sr1
136	add		%r21, %r20, %r20	/* increment space */
137	copy		%arg2, %r29		/* Init middle loop count */
138
139fdtmanymiddle:					/* Loop if LOOP >= 2 */
140	addib,COND(>)		-1, %r31, fdtmanymiddle	/* Adjusted inner loop decr */
141	pdtlbe		%r0(%sr1, %r28)
142	pdtlbe,m	%arg1(%sr1, %r28)	/* Last pdtlbe and addr adjust */
143	addib,COND(>)		-1, %r29, fdtmanymiddle	/* Middle loop decr */
144	copy		%arg3, %r31		/* Re-init inner loop count */
145
146	movb,tr		%arg0, %r28, fdtmanyloop /* Re-init base addr */
147	addib,COND(<=),n	-1, %r22,fdtdone	/* Outer loop count decr */
148
149fdtoneloop:					/* Loop if LOOP = 1 */
150	mtsp		%r20, %sr1
151	copy		%arg0, %r28		/* init base addr */
152	copy		%arg2, %r29		/* init middle loop count */
153
154fdtonemiddle:					/* Loop if LOOP = 1 */
155	addib,COND(>)		-1, %r29, fdtonemiddle	/* Middle loop count decr */
156	pdtlbe,m	%arg1(%sr1, %r28)	/* pdtlbe for one loop */
157
158	addib,COND(>)		-1, %r22, fdtoneloop	/* Outer loop count decr */
159	add		%r21, %r20, %r20	/* increment space */
160
161
162fdtdone:
163	/*
164	 * Switch back to virtual mode
165	 */
166	/* pcxt_ssm_bug */
167	rsm		PSW_SM_I, %r0
168	load32		2f, %r1
169	nop
170	nop
171	nop
172	nop
173	nop
174
175	rsm		PSW_SM_Q, %r0		/* prep to load iia queue */
176	mtctl		%r0, %cr17		/* Clear IIASQ tail */
177	mtctl		%r0, %cr17		/* Clear IIASQ head */
178	mtctl		%r1, %cr18		/* IIAOQ head */
179	ldo		4(%r1), %r1
180	mtctl		%r1, %cr18		/* IIAOQ tail */
181	load32		KERNEL_PSW, %r1
182	or		%r1, %r19, %r1	/* I-bit to state on entry */
183	mtctl		%r1, %ipsw	/* restore I-bit (entire PSW) */
184	rfi
185	nop
186
1872:      bv		%r0(%r2)
188	nop
189ENDPROC_CFI(flush_tlb_all_local)
190
191	.import cache_info,data
192
193ENTRY_CFI(flush_instruction_cache_local)
19488:	load32		cache_info, %r1
195
196	/* Flush Instruction Cache */
197
198	LDREG		ICACHE_BASE(%r1), %arg0
199	LDREG		ICACHE_STRIDE(%r1), %arg1
200	LDREG		ICACHE_COUNT(%r1), %arg2
201	LDREG		ICACHE_LOOP(%r1), %arg3
202	rsm		PSW_SM_I, %r22		/* No mmgt ops during loop*/
203	mtsp		%r0, %sr1
204	addib,COND(=)		-1, %arg3, fioneloop	/* Preadjust and test */
205	movb,<,n	%arg3, %r31, fisync	/* If loop < 0, do sync */
206
207fimanyloop:					/* Loop if LOOP >= 2 */
208	addib,COND(>)		-1, %r31, fimanyloop	/* Adjusted inner loop decr */
209	fice            %r0(%sr1, %arg0)
210	fice,m		%arg1(%sr1, %arg0)	/* Last fice and addr adjust */
211	movb,tr		%arg3, %r31, fimanyloop	/* Re-init inner loop count */
212	addib,COND(<=),n	-1, %arg2, fisync	/* Outer loop decr */
213
214fioneloop:					/* Loop if LOOP = 1 */
215	/* Some implementations may flush with a single fice instruction */
216	cmpib,COND(>>=),n	15, %arg2, fioneloop2
217
218fioneloop1:
219	fice,m		%arg1(%sr1, %arg0)
220	fice,m		%arg1(%sr1, %arg0)
221	fice,m		%arg1(%sr1, %arg0)
222	fice,m		%arg1(%sr1, %arg0)
223	fice,m		%arg1(%sr1, %arg0)
224	fice,m		%arg1(%sr1, %arg0)
225	fice,m		%arg1(%sr1, %arg0)
226	fice,m		%arg1(%sr1, %arg0)
227	fice,m		%arg1(%sr1, %arg0)
228	fice,m		%arg1(%sr1, %arg0)
229	fice,m		%arg1(%sr1, %arg0)
230	fice,m		%arg1(%sr1, %arg0)
231	fice,m		%arg1(%sr1, %arg0)
232	fice,m		%arg1(%sr1, %arg0)
233	fice,m		%arg1(%sr1, %arg0)
234	addib,COND(>)	-16, %arg2, fioneloop1
235	fice,m		%arg1(%sr1, %arg0)
236
237	/* Check if done */
238	cmpb,COND(=),n	%arg2, %r0, fisync	/* Predict branch taken */
239
240fioneloop2:
241	addib,COND(>)	-1, %arg2, fioneloop2	/* Outer loop count decr */
242	fice,m		%arg1(%sr1, %arg0)	/* Fice for one loop */
243
244fisync:
245	sync
246	mtsm		%r22			/* restore I-bit */
24789:	ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
248	bv		%r0(%r2)
249	nop
250ENDPROC_CFI(flush_instruction_cache_local)
251
252
253	.import cache_info, data
254ENTRY_CFI(flush_data_cache_local)
25588:	load32		cache_info, %r1
256
257	/* Flush Data Cache */
258
259	LDREG		DCACHE_BASE(%r1), %arg0
260	LDREG		DCACHE_STRIDE(%r1), %arg1
261	LDREG		DCACHE_COUNT(%r1), %arg2
262	LDREG		DCACHE_LOOP(%r1), %arg3
263	rsm		PSW_SM_I, %r22		/* No mmgt ops during loop*/
264	mtsp		%r0, %sr1
265	addib,COND(=)		-1, %arg3, fdoneloop	/* Preadjust and test */
266	movb,<,n	%arg3, %r31, fdsync	/* If loop < 0, do sync */
267
268fdmanyloop:					/* Loop if LOOP >= 2 */
269	addib,COND(>)		-1, %r31, fdmanyloop	/* Adjusted inner loop decr */
270	fdce		%r0(%sr1, %arg0)
271	fdce,m		%arg1(%sr1, %arg0)	/* Last fdce and addr adjust */
272	movb,tr		%arg3, %r31, fdmanyloop	/* Re-init inner loop count */
273	addib,COND(<=),n	-1, %arg2, fdsync	/* Outer loop decr */
274
275fdoneloop:					/* Loop if LOOP = 1 */
276	/* Some implementations may flush with a single fdce instruction */
277	cmpib,COND(>>=),n	15, %arg2, fdoneloop2
278
279fdoneloop1:
280	fdce,m		%arg1(%sr1, %arg0)
281	fdce,m		%arg1(%sr1, %arg0)
282	fdce,m		%arg1(%sr1, %arg0)
283	fdce,m		%arg1(%sr1, %arg0)
284	fdce,m		%arg1(%sr1, %arg0)
285	fdce,m		%arg1(%sr1, %arg0)
286	fdce,m		%arg1(%sr1, %arg0)
287	fdce,m		%arg1(%sr1, %arg0)
288	fdce,m		%arg1(%sr1, %arg0)
289	fdce,m		%arg1(%sr1, %arg0)
290	fdce,m		%arg1(%sr1, %arg0)
291	fdce,m		%arg1(%sr1, %arg0)
292	fdce,m		%arg1(%sr1, %arg0)
293	fdce,m		%arg1(%sr1, %arg0)
294	fdce,m		%arg1(%sr1, %arg0)
295	addib,COND(>)	-16, %arg2, fdoneloop1
296	fdce,m		%arg1(%sr1, %arg0)
297
298	/* Check if done */
299	cmpb,COND(=),n	%arg2, %r0, fdsync	/* Predict branch taken */
300
301fdoneloop2:
302	addib,COND(>)	-1, %arg2, fdoneloop2	/* Outer loop count decr */
303	fdce,m		%arg1(%sr1, %arg0)	/* Fdce for one loop */
304
305fdsync:
306	syncdma
307	sync
308	mtsm		%r22			/* restore I-bit */
30989:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
310	bv		%r0(%r2)
311	nop
312ENDPROC_CFI(flush_data_cache_local)
313
314/* Macros to serialize TLB purge operations on SMP.  */
315
316	.macro	tlb_lock	la,flags,tmp
317#ifdef CONFIG_SMP
31898:
319#if __PA_LDCW_ALIGNMENT > 4
320	load32		pa_tlb_lock + __PA_LDCW_ALIGNMENT-1, \la
321	depi		0,31,__PA_LDCW_ALIGN_ORDER, \la
322#else
323	load32		pa_tlb_lock, \la
324#endif
325	rsm		PSW_SM_I,\flags
3261:	LDCW		0(\la),\tmp
327	cmpib,<>,n	0,\tmp,3f
3282:	ldw		0(\la),\tmp
329	cmpb,<>		%r0,\tmp,1b
330	nop
331	b,n		2b
3323:
33399:	ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP)
334#endif
335	.endm
336
337	.macro	tlb_unlock	la,flags,tmp
338#ifdef CONFIG_SMP
33998:	ldi		1,\tmp
340	sync
341	stw		\tmp,0(\la)
342	mtsm		\flags
34399:	ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP)
344#endif
345	.endm
346
347/* Clear page using kernel mapping.  */
348
349ENTRY_CFI(clear_page_asm)
350#ifdef CONFIG_64BIT
351
352	/* Unroll the loop.  */
353	ldi		(PAGE_SIZE / 128), %r1
354
3551:
356	std		%r0, 0(%r26)
357	std		%r0, 8(%r26)
358	std		%r0, 16(%r26)
359	std		%r0, 24(%r26)
360	std		%r0, 32(%r26)
361	std		%r0, 40(%r26)
362	std		%r0, 48(%r26)
363	std		%r0, 56(%r26)
364	std		%r0, 64(%r26)
365	std		%r0, 72(%r26)
366	std		%r0, 80(%r26)
367	std		%r0, 88(%r26)
368	std		%r0, 96(%r26)
369	std		%r0, 104(%r26)
370	std		%r0, 112(%r26)
371	std		%r0, 120(%r26)
372
373	/* Note reverse branch hint for addib is taken.  */
374	addib,COND(>),n	-1, %r1, 1b
375	ldo		128(%r26), %r26
376
377#else
378
379	/*
380	 * Note that until (if) we start saving the full 64-bit register
381	 * values on interrupt, we can't use std on a 32 bit kernel.
382	 */
383	ldi		(PAGE_SIZE / 64), %r1
384
3851:
386	stw		%r0, 0(%r26)
387	stw		%r0, 4(%r26)
388	stw		%r0, 8(%r26)
389	stw		%r0, 12(%r26)
390	stw		%r0, 16(%r26)
391	stw		%r0, 20(%r26)
392	stw		%r0, 24(%r26)
393	stw		%r0, 28(%r26)
394	stw		%r0, 32(%r26)
395	stw		%r0, 36(%r26)
396	stw		%r0, 40(%r26)
397	stw		%r0, 44(%r26)
398	stw		%r0, 48(%r26)
399	stw		%r0, 52(%r26)
400	stw		%r0, 56(%r26)
401	stw		%r0, 60(%r26)
402
403	addib,COND(>),n	-1, %r1, 1b
404	ldo		64(%r26), %r26
405#endif
406	bv		%r0(%r2)
407	nop
408ENDPROC_CFI(clear_page_asm)
409
410/* Copy page using kernel mapping.  */
411
412ENTRY_CFI(copy_page_asm)
413#ifdef CONFIG_64BIT
414	/* PA8x00 CPUs can consume 2 loads or 1 store per cycle.
415	 * Unroll the loop by hand and arrange insn appropriately.
416	 * Prefetch doesn't improve performance on rp3440.
417	 * GCC probably can do this just as well...
418	 */
419
420	ldi		(PAGE_SIZE / 128), %r1
421
4221:	ldd		0(%r25), %r19
423	ldd		8(%r25), %r20
424
425	ldd		16(%r25), %r21
426	ldd		24(%r25), %r22
427	std		%r19, 0(%r26)
428	std		%r20, 8(%r26)
429
430	ldd		32(%r25), %r19
431	ldd		40(%r25), %r20
432	std		%r21, 16(%r26)
433	std		%r22, 24(%r26)
434
435	ldd		48(%r25), %r21
436	ldd		56(%r25), %r22
437	std		%r19, 32(%r26)
438	std		%r20, 40(%r26)
439
440	ldd		64(%r25), %r19
441	ldd		72(%r25), %r20
442	std		%r21, 48(%r26)
443	std		%r22, 56(%r26)
444
445	ldd		80(%r25), %r21
446	ldd		88(%r25), %r22
447	std		%r19, 64(%r26)
448	std		%r20, 72(%r26)
449
450	ldd		 96(%r25), %r19
451	ldd		104(%r25), %r20
452	std		%r21, 80(%r26)
453	std		%r22, 88(%r26)
454
455	ldd		112(%r25), %r21
456	ldd		120(%r25), %r22
457	ldo		128(%r25), %r25
458	std		%r19, 96(%r26)
459	std		%r20, 104(%r26)
460
461	std		%r21, 112(%r26)
462	std		%r22, 120(%r26)
463
464	/* Note reverse branch hint for addib is taken.  */
465	addib,COND(>),n	-1, %r1, 1b
466	ldo		128(%r26), %r26
467
468#else
469
470	/*
471	 * This loop is optimized for PCXL/PCXL2 ldw/ldw and stw/stw
472	 * bundles (very restricted rules for bundling).
473	 * Note that until (if) we start saving
474	 * the full 64 bit register values on interrupt, we can't
475	 * use ldd/std on a 32 bit kernel.
476	 */
477	ldw		0(%r25), %r19
478	ldi		(PAGE_SIZE / 64), %r1
479
4801:
481	ldw		4(%r25), %r20
482	ldw		8(%r25), %r21
483	ldw		12(%r25), %r22
484	stw		%r19, 0(%r26)
485	stw		%r20, 4(%r26)
486	stw		%r21, 8(%r26)
487	stw		%r22, 12(%r26)
488	ldw		16(%r25), %r19
489	ldw		20(%r25), %r20
490	ldw		24(%r25), %r21
491	ldw		28(%r25), %r22
492	stw		%r19, 16(%r26)
493	stw		%r20, 20(%r26)
494	stw		%r21, 24(%r26)
495	stw		%r22, 28(%r26)
496	ldw		32(%r25), %r19
497	ldw		36(%r25), %r20
498	ldw		40(%r25), %r21
499	ldw		44(%r25), %r22
500	stw		%r19, 32(%r26)
501	stw		%r20, 36(%r26)
502	stw		%r21, 40(%r26)
503	stw		%r22, 44(%r26)
504	ldw		48(%r25), %r19
505	ldw		52(%r25), %r20
506	ldw		56(%r25), %r21
507	ldw		60(%r25), %r22
508	stw		%r19, 48(%r26)
509	stw		%r20, 52(%r26)
510	ldo		64(%r25), %r25
511	stw		%r21, 56(%r26)
512	stw		%r22, 60(%r26)
513	ldo		64(%r26), %r26
514	addib,COND(>),n	-1, %r1, 1b
515	ldw		0(%r25), %r19
516#endif
517	bv		%r0(%r2)
518	nop
519ENDPROC_CFI(copy_page_asm)
520
521/*
522 * NOTE: Code in clear_user_page has a hard coded dependency on the
523 *       maximum alias boundary being 4 Mb. We've been assured by the
524 *       parisc chip designers that there will not ever be a parisc
525 *       chip with a larger alias boundary (Never say never :-) ).
526 *
527 *       Subtle: the dtlb miss handlers support the temp alias region by
528 *       "knowing" that if a dtlb miss happens within the temp alias
529 *       region it must have occurred while in clear_user_page. Since
530 *       this routine makes use of processor local translations, we
531 *       don't want to insert them into the kernel page table. Instead,
532 *       we load up some general registers (they need to be registers
533 *       which aren't shadowed) with the physical page numbers (preshifted
534 *       for tlb insertion) needed to insert the translations. When we
535 *       miss on the translation, the dtlb miss handler inserts the
536 *       translation into the tlb using these values:
537 *
538 *          %r26 physical page (shifted for tlb insert) of "to" translation
539 *          %r23 physical page (shifted for tlb insert) of "from" translation
540 */
541
542        /* Drop prot bits and convert to page addr for iitlbt and idtlbt */
543        #define PAGE_ADD_SHIFT  (PAGE_SHIFT-12)
544        .macro          convert_phys_for_tlb_insert20  phys
545        extrd,u         \phys, 56-PAGE_ADD_SHIFT, 32-PAGE_ADD_SHIFT, \phys
546#if _PAGE_SIZE_ENCODING_DEFAULT
547        depdi           _PAGE_SIZE_ENCODING_DEFAULT, 63, (63-58), \phys
548#endif
549	.endm
550
551	/*
552	 * copy_user_page_asm() performs a page copy using mappings
553	 * equivalent to the user page mappings.  It can be used to
554	 * implement copy_user_page() but unfortunately both the `from'
555	 * and `to' pages need to be flushed through mappings equivalent
556	 * to the user mappings after the copy because the kernel accesses
557	 * the `from' page through the kmap kernel mapping and the `to'
558	 * page needs to be flushed since code can be copied.  As a
559	 * result, this implementation is less efficient than the simpler
560	 * copy using the kernel mapping.  It only needs the `from' page
561	 * to flushed via the user mapping.  The kunmap routines handle
562	 * the flushes needed for the kernel mapping.
563	 *
564	 * I'm still keeping this around because it may be possible to
565	 * use it if more information is passed into copy_user_page().
566	 * Have to do some measurements to see if it is worthwhile to
567	 * lobby for such a change.
568	 *
569	 */
570
571ENTRY_CFI(copy_user_page_asm)
572	/* Convert virtual `to' and `from' addresses to physical addresses.
573	   Move `from' physical address to non shadowed register.  */
574	ldil		L%(__PAGE_OFFSET), %r1
575	sub		%r26, %r1, %r26
576	sub		%r25, %r1, %r23
577
578	ldil		L%(TMPALIAS_MAP_START), %r28
579#ifdef CONFIG_64BIT
580#if (TMPALIAS_MAP_START >= 0x80000000)
581	depdi		0, 31,32, %r28		/* clear any sign extension */
582#endif
583	convert_phys_for_tlb_insert20 %r26	/* convert phys addr to tlb insert format */
584	convert_phys_for_tlb_insert20 %r23	/* convert phys addr to tlb insert format */
585	depd		%r24,63,22, %r28	/* Form aliased virtual address 'to' */
586	depdi		0, 63,PAGE_SHIFT, %r28	/* Clear any offset bits */
587	copy		%r28, %r29
588	depdi		1, 41,1, %r29		/* Form aliased virtual address 'from' */
589#else
590	extrw,u		%r26, 24,25, %r26	/* convert phys addr to tlb insert format */
591	extrw,u		%r23, 24,25, %r23	/* convert phys addr to tlb insert format */
592	depw		%r24, 31,22, %r28	/* Form aliased virtual address 'to' */
593	depwi		0, 31,PAGE_SHIFT, %r28	/* Clear any offset bits */
594	copy		%r28, %r29
595	depwi		1, 9,1, %r29		/* Form aliased virtual address 'from' */
596#endif
597
598	/* Purge any old translations */
599
600#ifdef CONFIG_PA20
601	pdtlb,l		%r0(%r28)
602	pdtlb,l		%r0(%r29)
603#else
604	tlb_lock	%r20,%r21,%r22
6050:	pdtlb		%r0(%r28)
6061:	pdtlb		%r0(%r29)
607	tlb_unlock	%r20,%r21,%r22
608	ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
609	ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SMP, INSN_PxTLB)
610#endif
611
612#ifdef CONFIG_64BIT
613	/* PA8x00 CPUs can consume 2 loads or 1 store per cycle.
614	 * Unroll the loop by hand and arrange insn appropriately.
615	 * GCC probably can do this just as well.
616	 */
617
618	ldd		0(%r29), %r19
619	ldi		(PAGE_SIZE / 128), %r1
620
6211:	ldd		8(%r29), %r20
622
623	ldd		16(%r29), %r21
624	ldd		24(%r29), %r22
625	std		%r19, 0(%r28)
626	std		%r20, 8(%r28)
627
628	ldd		32(%r29), %r19
629	ldd		40(%r29), %r20
630	std		%r21, 16(%r28)
631	std		%r22, 24(%r28)
632
633	ldd		48(%r29), %r21
634	ldd		56(%r29), %r22
635	std		%r19, 32(%r28)
636	std		%r20, 40(%r28)
637
638	ldd		64(%r29), %r19
639	ldd		72(%r29), %r20
640	std		%r21, 48(%r28)
641	std		%r22, 56(%r28)
642
643	ldd		80(%r29), %r21
644	ldd		88(%r29), %r22
645	std		%r19, 64(%r28)
646	std		%r20, 72(%r28)
647
648	ldd		 96(%r29), %r19
649	ldd		104(%r29), %r20
650	std		%r21, 80(%r28)
651	std		%r22, 88(%r28)
652
653	ldd		112(%r29), %r21
654	ldd		120(%r29), %r22
655	std		%r19, 96(%r28)
656	std		%r20, 104(%r28)
657
658	ldo		128(%r29), %r29
659	std		%r21, 112(%r28)
660	std		%r22, 120(%r28)
661	ldo		128(%r28), %r28
662
663	/* conditional branches nullify on forward taken branch, and on
664	 * non-taken backward branch. Note that .+4 is a backwards branch.
665	 * The ldd should only get executed if the branch is taken.
666	 */
667	addib,COND(>),n	-1, %r1, 1b		/* bundle 10 */
668	ldd		0(%r29), %r19		/* start next loads */
669
670#else
671	ldi		(PAGE_SIZE / 64), %r1
672
673	/*
674	 * This loop is optimized for PCXL/PCXL2 ldw/ldw and stw/stw
675	 * bundles (very restricted rules for bundling). It probably
676	 * does OK on PCXU and better, but we could do better with
677	 * ldd/std instructions. Note that until (if) we start saving
678	 * the full 64 bit register values on interrupt, we can't
679	 * use ldd/std on a 32 bit kernel.
680	 */
681
6821:	ldw		0(%r29), %r19
683	ldw		4(%r29), %r20
684	ldw		8(%r29), %r21
685	ldw		12(%r29), %r22
686	stw		%r19, 0(%r28)
687	stw		%r20, 4(%r28)
688	stw		%r21, 8(%r28)
689	stw		%r22, 12(%r28)
690	ldw		16(%r29), %r19
691	ldw		20(%r29), %r20
692	ldw		24(%r29), %r21
693	ldw		28(%r29), %r22
694	stw		%r19, 16(%r28)
695	stw		%r20, 20(%r28)
696	stw		%r21, 24(%r28)
697	stw		%r22, 28(%r28)
698	ldw		32(%r29), %r19
699	ldw		36(%r29), %r20
700	ldw		40(%r29), %r21
701	ldw		44(%r29), %r22
702	stw		%r19, 32(%r28)
703	stw		%r20, 36(%r28)
704	stw		%r21, 40(%r28)
705	stw		%r22, 44(%r28)
706	ldw		48(%r29), %r19
707	ldw		52(%r29), %r20
708	ldw		56(%r29), %r21
709	ldw		60(%r29), %r22
710	stw		%r19, 48(%r28)
711	stw		%r20, 52(%r28)
712	stw		%r21, 56(%r28)
713	stw		%r22, 60(%r28)
714	ldo		64(%r28), %r28
715
716	addib,COND(>)		-1, %r1,1b
717	ldo		64(%r29), %r29
718#endif
719
720	bv		%r0(%r2)
721	nop
722ENDPROC_CFI(copy_user_page_asm)
723
724ENTRY_CFI(clear_user_page_asm)
725	tophys_r1	%r26
726
727	ldil		L%(TMPALIAS_MAP_START), %r28
728#ifdef CONFIG_64BIT
729#if (TMPALIAS_MAP_START >= 0x80000000)
730	depdi		0, 31,32, %r28		/* clear any sign extension */
731#endif
732	convert_phys_for_tlb_insert20 %r26	/* convert phys addr to tlb insert format */
733	depd		%r25, 63,22, %r28	/* Form aliased virtual address 'to' */
734	depdi		0, 63,PAGE_SHIFT, %r28	/* Clear any offset bits */
735#else
736	extrw,u		%r26, 24,25, %r26	/* convert phys addr to tlb insert format */
737	depw		%r25, 31,22, %r28	/* Form aliased virtual address 'to' */
738	depwi		0, 31,PAGE_SHIFT, %r28	/* Clear any offset bits */
739#endif
740
741	/* Purge any old translation */
742
743#ifdef CONFIG_PA20
744	pdtlb,l		%r0(%r28)
745#else
746	tlb_lock	%r20,%r21,%r22
7470:	pdtlb		%r0(%r28)
748	tlb_unlock	%r20,%r21,%r22
749	ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
750#endif
751
752#ifdef CONFIG_64BIT
753	ldi		(PAGE_SIZE / 128), %r1
754
755	/* PREFETCH (Write) has not (yet) been proven to help here */
756	/* #define	PREFETCHW_OP	ldd		256(%0), %r0 */
757
7581:	std		%r0, 0(%r28)
759	std		%r0, 8(%r28)
760	std		%r0, 16(%r28)
761	std		%r0, 24(%r28)
762	std		%r0, 32(%r28)
763	std		%r0, 40(%r28)
764	std		%r0, 48(%r28)
765	std		%r0, 56(%r28)
766	std		%r0, 64(%r28)
767	std		%r0, 72(%r28)
768	std		%r0, 80(%r28)
769	std		%r0, 88(%r28)
770	std		%r0, 96(%r28)
771	std		%r0, 104(%r28)
772	std		%r0, 112(%r28)
773	std		%r0, 120(%r28)
774	addib,COND(>)		-1, %r1, 1b
775	ldo		128(%r28), %r28
776
777#else	/* ! CONFIG_64BIT */
778	ldi		(PAGE_SIZE / 64), %r1
779
7801:	stw		%r0, 0(%r28)
781	stw		%r0, 4(%r28)
782	stw		%r0, 8(%r28)
783	stw		%r0, 12(%r28)
784	stw		%r0, 16(%r28)
785	stw		%r0, 20(%r28)
786	stw		%r0, 24(%r28)
787	stw		%r0, 28(%r28)
788	stw		%r0, 32(%r28)
789	stw		%r0, 36(%r28)
790	stw		%r0, 40(%r28)
791	stw		%r0, 44(%r28)
792	stw		%r0, 48(%r28)
793	stw		%r0, 52(%r28)
794	stw		%r0, 56(%r28)
795	stw		%r0, 60(%r28)
796	addib,COND(>)		-1, %r1, 1b
797	ldo		64(%r28), %r28
798#endif	/* CONFIG_64BIT */
799
800	bv		%r0(%r2)
801	nop
802ENDPROC_CFI(clear_user_page_asm)
803
804ENTRY_CFI(flush_dcache_page_asm)
805	ldil		L%(TMPALIAS_MAP_START), %r28
806#ifdef CONFIG_64BIT
807#if (TMPALIAS_MAP_START >= 0x80000000)
808	depdi		0, 31,32, %r28		/* clear any sign extension */
809#endif
810	convert_phys_for_tlb_insert20 %r26	/* convert phys addr to tlb insert format */
811	depd		%r25, 63,22, %r28	/* Form aliased virtual address 'to' */
812	depdi		0, 63,PAGE_SHIFT, %r28	/* Clear any offset bits */
813#else
814	extrw,u		%r26, 24,25, %r26	/* convert phys addr to tlb insert format */
815	depw		%r25, 31,22, %r28	/* Form aliased virtual address 'to' */
816	depwi		0, 31,PAGE_SHIFT, %r28	/* Clear any offset bits */
817#endif
818
819	/* Purge any old translation */
820
821#ifdef CONFIG_PA20
822	pdtlb,l		%r0(%r28)
823#else
824	tlb_lock	%r20,%r21,%r22
8250:	pdtlb		%r0(%r28)
826	tlb_unlock	%r20,%r21,%r22
827	ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
828#endif
829
83088:	ldil		L%dcache_stride, %r1
831	ldw		R%dcache_stride(%r1), r31
832
833#ifdef CONFIG_64BIT
834	depdi,z		1, 63-PAGE_SHIFT,1, %r25
835#else
836	depwi,z		1, 31-PAGE_SHIFT,1, %r25
837#endif
838	add		%r28, %r25, %r25
839	sub		%r25, r31, %r25
840
8411:	fdc,m		r31(%r28)
842	fdc,m		r31(%r28)
843	fdc,m		r31(%r28)
844	fdc,m		r31(%r28)
845	fdc,m		r31(%r28)
846	fdc,m		r31(%r28)
847	fdc,m		r31(%r28)
848	fdc,m		r31(%r28)
849	fdc,m		r31(%r28)
850	fdc,m		r31(%r28)
851	fdc,m		r31(%r28)
852	fdc,m		r31(%r28)
853	fdc,m		r31(%r28)
854	fdc,m		r31(%r28)
855	fdc,m		r31(%r28)
856	cmpb,COND(>>)	%r25, %r28, 1b /* predict taken */
857	fdc,m		r31(%r28)
858
85989:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
860	sync
861	bv		%r0(%r2)
862	nop
863ENDPROC_CFI(flush_dcache_page_asm)
864
865ENTRY_CFI(purge_dcache_page_asm)
866	ldil		L%(TMPALIAS_MAP_START), %r28
867#ifdef CONFIG_64BIT
868#if (TMPALIAS_MAP_START >= 0x80000000)
869	depdi		0, 31,32, %r28		/* clear any sign extension */
870#endif
871	convert_phys_for_tlb_insert20 %r26	/* convert phys addr to tlb insert format */
872	depd		%r25, 63,22, %r28	/* Form aliased virtual address 'to' */
873	depdi		0, 63,PAGE_SHIFT, %r28	/* Clear any offset bits */
874#else
875	extrw,u		%r26, 24,25, %r26	/* convert phys addr to tlb insert format */
876	depw		%r25, 31,22, %r28	/* Form aliased virtual address 'to' */
877	depwi		0, 31,PAGE_SHIFT, %r28	/* Clear any offset bits */
878#endif
879
880	/* Purge any old translation */
881
882#ifdef CONFIG_PA20
883	pdtlb,l		%r0(%r28)
884#else
885	tlb_lock	%r20,%r21,%r22
8860:	pdtlb		%r0(%r28)
887	tlb_unlock	%r20,%r21,%r22
888	ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
889#endif
890
89188:	ldil		L%dcache_stride, %r1
892	ldw		R%dcache_stride(%r1), r31
893
894#ifdef CONFIG_64BIT
895	depdi,z		1, 63-PAGE_SHIFT,1, %r25
896#else
897	depwi,z		1, 31-PAGE_SHIFT,1, %r25
898#endif
899	add		%r28, %r25, %r25
900	sub		%r25, r31, %r25
901
9021:      pdc,m		r31(%r28)
903	pdc,m		r31(%r28)
904	pdc,m		r31(%r28)
905	pdc,m		r31(%r28)
906	pdc,m		r31(%r28)
907	pdc,m		r31(%r28)
908	pdc,m		r31(%r28)
909	pdc,m		r31(%r28)
910	pdc,m		r31(%r28)
911	pdc,m		r31(%r28)
912	pdc,m		r31(%r28)
913	pdc,m		r31(%r28)
914	pdc,m		r31(%r28)
915	pdc,m		r31(%r28)
916	pdc,m		r31(%r28)
917	cmpb,COND(>>)	%r25, %r28, 1b /* predict taken */
918	pdc,m		r31(%r28)
919
92089:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
921	sync
922	bv		%r0(%r2)
923	nop
924ENDPROC_CFI(purge_dcache_page_asm)
925
926ENTRY_CFI(flush_icache_page_asm)
927	ldil		L%(TMPALIAS_MAP_START), %r28
928#ifdef CONFIG_64BIT
929#if (TMPALIAS_MAP_START >= 0x80000000)
930	depdi		0, 31,32, %r28		/* clear any sign extension */
931#endif
932	convert_phys_for_tlb_insert20 %r26	/* convert phys addr to tlb insert format */
933	depd		%r25, 63,22, %r28	/* Form aliased virtual address 'to' */
934	depdi		0, 63,PAGE_SHIFT, %r28	/* Clear any offset bits */
935#else
936	extrw,u		%r26, 24,25, %r26	/* convert phys addr to tlb insert format */
937	depw		%r25, 31,22, %r28	/* Form aliased virtual address 'to' */
938	depwi		0, 31,PAGE_SHIFT, %r28	/* Clear any offset bits */
939#endif
940
941	/* Purge any old translation.  Note that the FIC instruction
942	 * may use either the instruction or data TLB.  Given that we
943	 * have a flat address space, it's not clear which TLB will be
944	 * used.  So, we purge both entries.  */
945
946#ifdef CONFIG_PA20
947	pdtlb,l		%r0(%r28)
9481:	pitlb,l         %r0(%sr4,%r28)
949	ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SPLIT_TLB, INSN_NOP)
950#else
951	tlb_lock        %r20,%r21,%r22
9520:	pdtlb		%r0(%r28)
9531:	pitlb           %r0(%sr4,%r28)
954	tlb_unlock      %r20,%r21,%r22
955	ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
956	ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SMP, INSN_PxTLB)
957	ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SPLIT_TLB, INSN_NOP)
958#endif
959
96088:	ldil		L%icache_stride, %r1
961	ldw		R%icache_stride(%r1), %r31
962
963#ifdef CONFIG_64BIT
964	depdi,z		1, 63-PAGE_SHIFT,1, %r25
965#else
966	depwi,z		1, 31-PAGE_SHIFT,1, %r25
967#endif
968	add		%r28, %r25, %r25
969	sub		%r25, %r31, %r25
970
971	/* fic only has the type 26 form on PA1.1, requiring an
972	 * explicit space specification, so use %sr4 */
9731:      fic,m		%r31(%sr4,%r28)
974	fic,m		%r31(%sr4,%r28)
975	fic,m		%r31(%sr4,%r28)
976	fic,m		%r31(%sr4,%r28)
977	fic,m		%r31(%sr4,%r28)
978	fic,m		%r31(%sr4,%r28)
979	fic,m		%r31(%sr4,%r28)
980	fic,m		%r31(%sr4,%r28)
981	fic,m		%r31(%sr4,%r28)
982	fic,m		%r31(%sr4,%r28)
983	fic,m		%r31(%sr4,%r28)
984	fic,m		%r31(%sr4,%r28)
985	fic,m		%r31(%sr4,%r28)
986	fic,m		%r31(%sr4,%r28)
987	fic,m		%r31(%sr4,%r28)
988	cmpb,COND(>>)	%r25, %r28, 1b /* predict taken */
989	fic,m		%r31(%sr4,%r28)
990
99189:	ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
992	sync
993	bv		%r0(%r2)
994	nop
995ENDPROC_CFI(flush_icache_page_asm)
996
997ENTRY_CFI(flush_kernel_dcache_page_asm)
99888:	ldil		L%dcache_stride, %r1
999	ldw		R%dcache_stride(%r1), %r23
1000
1001#ifdef CONFIG_64BIT
1002	depdi,z		1, 63-PAGE_SHIFT,1, %r25
1003#else
1004	depwi,z		1, 31-PAGE_SHIFT,1, %r25
1005#endif
1006	add		%r26, %r25, %r25
1007	sub		%r25, %r23, %r25
1008
10091:      fdc,m		%r23(%r26)
1010	fdc,m		%r23(%r26)
1011	fdc,m		%r23(%r26)
1012	fdc,m		%r23(%r26)
1013	fdc,m		%r23(%r26)
1014	fdc,m		%r23(%r26)
1015	fdc,m		%r23(%r26)
1016	fdc,m		%r23(%r26)
1017	fdc,m		%r23(%r26)
1018	fdc,m		%r23(%r26)
1019	fdc,m		%r23(%r26)
1020	fdc,m		%r23(%r26)
1021	fdc,m		%r23(%r26)
1022	fdc,m		%r23(%r26)
1023	fdc,m		%r23(%r26)
1024	cmpb,COND(>>)	%r25, %r26, 1b /* predict taken */
1025	fdc,m		%r23(%r26)
1026
102789:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
1028	sync
1029	bv		%r0(%r2)
1030	nop
1031ENDPROC_CFI(flush_kernel_dcache_page_asm)
1032
1033ENTRY_CFI(purge_kernel_dcache_page_asm)
103488:	ldil		L%dcache_stride, %r1
1035	ldw		R%dcache_stride(%r1), %r23
1036
1037#ifdef CONFIG_64BIT
1038	depdi,z		1, 63-PAGE_SHIFT,1, %r25
1039#else
1040	depwi,z		1, 31-PAGE_SHIFT,1, %r25
1041#endif
1042	add		%r26, %r25, %r25
1043	sub		%r25, %r23, %r25
1044
10451:      pdc,m		%r23(%r26)
1046	pdc,m		%r23(%r26)
1047	pdc,m		%r23(%r26)
1048	pdc,m		%r23(%r26)
1049	pdc,m		%r23(%r26)
1050	pdc,m		%r23(%r26)
1051	pdc,m		%r23(%r26)
1052	pdc,m		%r23(%r26)
1053	pdc,m		%r23(%r26)
1054	pdc,m		%r23(%r26)
1055	pdc,m		%r23(%r26)
1056	pdc,m		%r23(%r26)
1057	pdc,m		%r23(%r26)
1058	pdc,m		%r23(%r26)
1059	pdc,m		%r23(%r26)
1060	cmpb,COND(>>)	%r25, %r26, 1b /* predict taken */
1061	pdc,m		%r23(%r26)
1062
106389:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
1064	sync
1065	bv		%r0(%r2)
1066	nop
1067ENDPROC_CFI(purge_kernel_dcache_page_asm)
1068
1069ENTRY_CFI(flush_user_dcache_range_asm)
107088:	ldil		L%dcache_stride, %r1
1071	ldw		R%dcache_stride(%r1), %r23
1072	ldo		-1(%r23), %r21
1073	ANDCM		%r26, %r21, %r26
1074
1075#ifdef CONFIG_64BIT
1076	depd,z		%r23, 59, 60, %r21
1077#else
1078	depw,z		%r23, 27, 28, %r21
1079#endif
1080	add		%r26, %r21, %r22
1081	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
10821:	add		%r22, %r21, %r22
1083	fdc,m		%r23(%sr3, %r26)
1084	fdc,m		%r23(%sr3, %r26)
1085	fdc,m		%r23(%sr3, %r26)
1086	fdc,m		%r23(%sr3, %r26)
1087	fdc,m		%r23(%sr3, %r26)
1088	fdc,m		%r23(%sr3, %r26)
1089	fdc,m		%r23(%sr3, %r26)
1090	fdc,m		%r23(%sr3, %r26)
1091	fdc,m		%r23(%sr3, %r26)
1092	fdc,m		%r23(%sr3, %r26)
1093	fdc,m		%r23(%sr3, %r26)
1094	fdc,m		%r23(%sr3, %r26)
1095	fdc,m		%r23(%sr3, %r26)
1096	fdc,m		%r23(%sr3, %r26)
1097	fdc,m		%r23(%sr3, %r26)
1098	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
1099	fdc,m		%r23(%sr3, %r26)
1100
11012:	cmpb,COND(>>),n	%r25, %r26, 2b
1102	fdc,m		%r23(%sr3, %r26)
1103
110489:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
1105	sync
1106	bv		%r0(%r2)
1107	nop
1108ENDPROC_CFI(flush_user_dcache_range_asm)
1109
1110ENTRY_CFI(flush_kernel_dcache_range_asm)
111188:	ldil		L%dcache_stride, %r1
1112	ldw		R%dcache_stride(%r1), %r23
1113	ldo		-1(%r23), %r21
1114	ANDCM		%r26, %r21, %r26
1115
1116#ifdef CONFIG_64BIT
1117	depd,z		%r23, 59, 60, %r21
1118#else
1119	depw,z		%r23, 27, 28, %r21
1120#endif
1121	add		%r26, %r21, %r22
1122	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
11231:	add		%r22, %r21, %r22
1124	fdc,m		%r23(%r26)
1125	fdc,m		%r23(%r26)
1126	fdc,m		%r23(%r26)
1127	fdc,m		%r23(%r26)
1128	fdc,m		%r23(%r26)
1129	fdc,m		%r23(%r26)
1130	fdc,m		%r23(%r26)
1131	fdc,m		%r23(%r26)
1132	fdc,m		%r23(%r26)
1133	fdc,m		%r23(%r26)
1134	fdc,m		%r23(%r26)
1135	fdc,m		%r23(%r26)
1136	fdc,m		%r23(%r26)
1137	fdc,m		%r23(%r26)
1138	fdc,m		%r23(%r26)
1139	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
1140	fdc,m		%r23(%r26)
1141
11422:	cmpb,COND(>>),n	%r25, %r26, 2b /* predict taken */
1143	fdc,m		%r23(%r26)
1144
1145	sync
114689:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
1147	syncdma
1148	bv		%r0(%r2)
1149	nop
1150ENDPROC_CFI(flush_kernel_dcache_range_asm)
1151
1152ENTRY_CFI(purge_kernel_dcache_range_asm)
115388:	ldil		L%dcache_stride, %r1
1154	ldw		R%dcache_stride(%r1), %r23
1155	ldo		-1(%r23), %r21
1156	ANDCM		%r26, %r21, %r26
1157
1158#ifdef CONFIG_64BIT
1159	depd,z		%r23, 59, 60, %r21
1160#else
1161	depw,z		%r23, 27, 28, %r21
1162#endif
1163	add		%r26, %r21, %r22
1164	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
11651:	add		%r22, %r21, %r22
1166	pdc,m		%r23(%r26)
1167	pdc,m		%r23(%r26)
1168	pdc,m		%r23(%r26)
1169	pdc,m		%r23(%r26)
1170	pdc,m		%r23(%r26)
1171	pdc,m		%r23(%r26)
1172	pdc,m		%r23(%r26)
1173	pdc,m		%r23(%r26)
1174	pdc,m		%r23(%r26)
1175	pdc,m		%r23(%r26)
1176	pdc,m		%r23(%r26)
1177	pdc,m		%r23(%r26)
1178	pdc,m		%r23(%r26)
1179	pdc,m		%r23(%r26)
1180	pdc,m		%r23(%r26)
1181	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
1182	pdc,m		%r23(%r26)
1183
11842:	cmpb,COND(>>),n	%r25, %r26, 2b /* predict taken */
1185	pdc,m		%r23(%r26)
1186
1187	sync
118889:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
1189	syncdma
1190	bv		%r0(%r2)
1191	nop
1192ENDPROC_CFI(purge_kernel_dcache_range_asm)
1193
1194ENTRY_CFI(flush_user_icache_range_asm)
119588:	ldil		L%icache_stride, %r1
1196	ldw		R%icache_stride(%r1), %r23
1197	ldo		-1(%r23), %r21
1198	ANDCM		%r26, %r21, %r26
1199
1200#ifdef CONFIG_64BIT
1201	depd,z		%r23, 59, 60, %r21
1202#else
1203	depw,z		%r23, 27, 28, %r21
1204#endif
1205	add		%r26, %r21, %r22
1206	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
12071:	add		%r22, %r21, %r22
1208	fic,m		%r23(%sr3, %r26)
1209	fic,m		%r23(%sr3, %r26)
1210	fic,m		%r23(%sr3, %r26)
1211	fic,m		%r23(%sr3, %r26)
1212	fic,m		%r23(%sr3, %r26)
1213	fic,m		%r23(%sr3, %r26)
1214	fic,m		%r23(%sr3, %r26)
1215	fic,m		%r23(%sr3, %r26)
1216	fic,m		%r23(%sr3, %r26)
1217	fic,m		%r23(%sr3, %r26)
1218	fic,m		%r23(%sr3, %r26)
1219	fic,m		%r23(%sr3, %r26)
1220	fic,m		%r23(%sr3, %r26)
1221	fic,m		%r23(%sr3, %r26)
1222	fic,m		%r23(%sr3, %r26)
1223	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
1224	fic,m		%r23(%sr3, %r26)
1225
12262:	cmpb,COND(>>),n	%r25, %r26, 2b
1227	fic,m		%r23(%sr3, %r26)
1228
122989:	ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
1230	sync
1231	bv		%r0(%r2)
1232	nop
1233ENDPROC_CFI(flush_user_icache_range_asm)
1234
1235ENTRY_CFI(flush_kernel_icache_page)
123688:	ldil		L%icache_stride, %r1
1237	ldw		R%icache_stride(%r1), %r23
1238
1239#ifdef CONFIG_64BIT
1240	depdi,z		1, 63-PAGE_SHIFT,1, %r25
1241#else
1242	depwi,z		1, 31-PAGE_SHIFT,1, %r25
1243#endif
1244	add		%r26, %r25, %r25
1245	sub		%r25, %r23, %r25
1246
1247
12481:      fic,m		%r23(%sr4, %r26)
1249	fic,m		%r23(%sr4, %r26)
1250	fic,m		%r23(%sr4, %r26)
1251	fic,m		%r23(%sr4, %r26)
1252	fic,m		%r23(%sr4, %r26)
1253	fic,m		%r23(%sr4, %r26)
1254	fic,m		%r23(%sr4, %r26)
1255	fic,m		%r23(%sr4, %r26)
1256	fic,m		%r23(%sr4, %r26)
1257	fic,m		%r23(%sr4, %r26)
1258	fic,m		%r23(%sr4, %r26)
1259	fic,m		%r23(%sr4, %r26)
1260	fic,m		%r23(%sr4, %r26)
1261	fic,m		%r23(%sr4, %r26)
1262	fic,m		%r23(%sr4, %r26)
1263	cmpb,COND(>>)	%r25, %r26, 1b /* predict taken */
1264	fic,m		%r23(%sr4, %r26)
1265
126689:	ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
1267	sync
1268	bv		%r0(%r2)
1269	nop
1270ENDPROC_CFI(flush_kernel_icache_page)
1271
1272ENTRY_CFI(flush_kernel_icache_range_asm)
127388:	ldil		L%icache_stride, %r1
1274	ldw		R%icache_stride(%r1), %r23
1275	ldo		-1(%r23), %r21
1276	ANDCM		%r26, %r21, %r26
1277
1278#ifdef CONFIG_64BIT
1279	depd,z		%r23, 59, 60, %r21
1280#else
1281	depw,z		%r23, 27, 28, %r21
1282#endif
1283	add		%r26, %r21, %r22
1284	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
12851:	add		%r22, %r21, %r22
1286	fic,m		%r23(%sr4, %r26)
1287	fic,m		%r23(%sr4, %r26)
1288	fic,m		%r23(%sr4, %r26)
1289	fic,m		%r23(%sr4, %r26)
1290	fic,m		%r23(%sr4, %r26)
1291	fic,m		%r23(%sr4, %r26)
1292	fic,m		%r23(%sr4, %r26)
1293	fic,m		%r23(%sr4, %r26)
1294	fic,m		%r23(%sr4, %r26)
1295	fic,m		%r23(%sr4, %r26)
1296	fic,m		%r23(%sr4, %r26)
1297	fic,m		%r23(%sr4, %r26)
1298	fic,m		%r23(%sr4, %r26)
1299	fic,m		%r23(%sr4, %r26)
1300	fic,m		%r23(%sr4, %r26)
1301	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
1302	fic,m		%r23(%sr4, %r26)
1303
13042:	cmpb,COND(>>),n	%r25, %r26, 2b /* predict taken */
1305	fic,m		%r23(%sr4, %r26)
1306
130789:	ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
1308	sync
1309	bv		%r0(%r2)
1310	nop
1311ENDPROC_CFI(flush_kernel_icache_range_asm)
1312
1313	__INIT
1314
1315	/* align should cover use of rfi in disable_sr_hashing_asm and
1316	 * srdis_done.
1317	 */
1318	.align	256
1319ENTRY_CFI(disable_sr_hashing_asm)
1320	/*
1321	 * Switch to real mode
1322	 */
1323	/* pcxt_ssm_bug */
1324	rsm		PSW_SM_I, %r0
1325	load32		PA(1f), %r1
1326	nop
1327	nop
1328	nop
1329	nop
1330	nop
1331
1332	rsm		PSW_SM_Q, %r0		/* prep to load iia queue */
1333	mtctl		%r0, %cr17		/* Clear IIASQ tail */
1334	mtctl		%r0, %cr17		/* Clear IIASQ head */
1335	mtctl		%r1, %cr18		/* IIAOQ head */
1336	ldo		4(%r1), %r1
1337	mtctl		%r1, %cr18		/* IIAOQ tail */
1338	load32		REAL_MODE_PSW, %r1
1339	mtctl		%r1, %ipsw
1340	rfi
1341	nop
1342
13431:      cmpib,=,n	SRHASH_PCXST, %r26,srdis_pcxs
1344	cmpib,=,n	SRHASH_PCXL, %r26,srdis_pcxl
1345	cmpib,=,n	SRHASH_PA20, %r26,srdis_pa20
1346	b,n		srdis_done
1347
1348srdis_pcxs:
1349
1350	/* Disable Space Register Hashing for PCXS,PCXT,PCXT' */
1351
1352	.word		0x141c1a00		/* mfdiag %dr0, %r28 */
1353	.word		0x141c1a00		/* must issue twice */
1354	depwi		0,18,1, %r28		/* Clear DHE (dcache hash enable) */
1355	depwi		0,20,1, %r28		/* Clear IHE (icache hash enable) */
1356	.word		0x141c1600		/* mtdiag %r28, %dr0 */
1357	.word		0x141c1600		/* must issue twice */
1358	b,n		srdis_done
1359
1360srdis_pcxl:
1361
1362	/* Disable Space Register Hashing for PCXL */
1363
1364	.word		0x141c0600		/* mfdiag %dr0, %r28 */
1365	depwi           0,28,2, %r28		/* Clear DHASH_EN & IHASH_EN */
1366	.word		0x141c0240		/* mtdiag %r28, %dr0 */
1367	b,n		srdis_done
1368
1369srdis_pa20:
1370
1371	/* Disable Space Register Hashing for PCXU,PCXU+,PCXW,PCXW+,PCXW2 */
1372
1373	.word		0x144008bc		/* mfdiag %dr2, %r28 */
1374	depdi		0, 54,1, %r28		/* clear DIAG_SPHASH_ENAB (bit 54) */
1375	.word		0x145c1840		/* mtdiag %r28, %dr2 */
1376
1377
1378srdis_done:
1379	/* Switch back to virtual mode */
1380	rsm		PSW_SM_I, %r0		/* prep to load iia queue */
1381	load32 	   	2f, %r1
1382	nop
1383	nop
1384	nop
1385	nop
1386	nop
1387
1388	rsm		PSW_SM_Q, %r0		/* prep to load iia queue */
1389	mtctl		%r0, %cr17		/* Clear IIASQ tail */
1390	mtctl		%r0, %cr17		/* Clear IIASQ head */
1391	mtctl		%r1, %cr18		/* IIAOQ head */
1392	ldo		4(%r1), %r1
1393	mtctl		%r1, %cr18		/* IIAOQ tail */
1394	load32		KERNEL_PSW, %r1
1395	mtctl		%r1, %ipsw
1396	rfi
1397	nop
1398
13992:      bv		%r0(%r2)
1400	nop
1401ENDPROC_CFI(disable_sr_hashing_asm)
1402
1403	.end
1404