xref: /linux/arch/parisc/kernel/pacache.S (revision e9f0878c4b2004ac19581274c1ae4c61ae3ca70e)
1/*
2 *  PARISC TLB and cache flushing support
3 *  Copyright (C) 2000-2001 Hewlett-Packard (John Marvin)
4 *  Copyright (C) 2001 Matthew Wilcox (willy at parisc-linux.org)
5 *  Copyright (C) 2002 Richard Hirst (rhirst with parisc-linux.org)
6 *
7 *    This program is free software; you can redistribute it and/or modify
8 *    it under the terms of the GNU General Public License as published by
9 *    the Free Software Foundation; either version 2, or (at your option)
10 *    any later version.
11 *
12 *    This program is distributed in the hope that it will be useful,
13 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
14 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 *    GNU General Public License for more details.
16 *
17 *    You should have received a copy of the GNU General Public License
18 *    along with this program; if not, write to the Free Software
19 *    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
20 */
21
22/*
23 * NOTE: fdc,fic, and pdc instructions that use base register modification
24 *       should only use index and base registers that are not shadowed,
25 *       so that the fast path emulation in the non access miss handler
26 *       can be used.
27 */
28
29#ifdef CONFIG_64BIT
30	.level	2.0w
31#else
32	.level	2.0
33#endif
34
35#include <asm/psw.h>
36#include <asm/assembly.h>
37#include <asm/pgtable.h>
38#include <asm/cache.h>
39#include <asm/ldcw.h>
40#include <linux/linkage.h>
41#include <linux/init.h>
42
43	.section .text.hot
44	.align	16
45
46ENTRY_CFI(flush_tlb_all_local)
47	/*
48	 * The pitlbe and pdtlbe instructions should only be used to
49	 * flush the entire tlb. Also, there needs to be no intervening
50	 * tlb operations, e.g. tlb misses, so the operation needs
51	 * to happen in real mode with all interruptions disabled.
52	 */
53
54	/* pcxt_ssm_bug	- relied upon translation! PA 2.0 Arch. F-4 and F-5 */
55	rsm		PSW_SM_I, %r19		/* save I-bit state */
56	load32		PA(1f), %r1
57	nop
58	nop
59	nop
60	nop
61	nop
62
63	rsm		PSW_SM_Q, %r0		/* prep to load iia queue */
64	mtctl		%r0, %cr17		/* Clear IIASQ tail */
65	mtctl		%r0, %cr17		/* Clear IIASQ head */
66	mtctl		%r1, %cr18		/* IIAOQ head */
67	ldo		4(%r1), %r1
68	mtctl		%r1, %cr18		/* IIAOQ tail */
69	load32		REAL_MODE_PSW, %r1
70	mtctl           %r1, %ipsw
71	rfi
72	nop
73
741:      load32		PA(cache_info), %r1
75
76	/* Flush Instruction Tlb */
77
78	LDREG		ITLB_SID_BASE(%r1), %r20
79	LDREG		ITLB_SID_STRIDE(%r1), %r21
80	LDREG		ITLB_SID_COUNT(%r1), %r22
81	LDREG		ITLB_OFF_BASE(%r1), %arg0
82	LDREG		ITLB_OFF_STRIDE(%r1), %arg1
83	LDREG		ITLB_OFF_COUNT(%r1), %arg2
84	LDREG		ITLB_LOOP(%r1), %arg3
85
86	addib,COND(=)		-1, %arg3, fitoneloop	/* Preadjust and test */
87	movb,<,n	%arg3, %r31, fitdone	/* If loop < 0, skip */
88	copy		%arg0, %r28		/* Init base addr */
89
90fitmanyloop:					/* Loop if LOOP >= 2 */
91	mtsp		%r20, %sr1
92	add		%r21, %r20, %r20	/* increment space */
93	copy		%arg2, %r29		/* Init middle loop count */
94
95fitmanymiddle:					/* Loop if LOOP >= 2 */
96	addib,COND(>)		-1, %r31, fitmanymiddle	/* Adjusted inner loop decr */
97	pitlbe		%r0(%sr1, %r28)
98	pitlbe,m	%arg1(%sr1, %r28)	/* Last pitlbe and addr adjust */
99	addib,COND(>)		-1, %r29, fitmanymiddle	/* Middle loop decr */
100	copy		%arg3, %r31		/* Re-init inner loop count */
101
102	movb,tr		%arg0, %r28, fitmanyloop /* Re-init base addr */
103	addib,COND(<=),n	-1, %r22, fitdone	/* Outer loop count decr */
104
105fitoneloop:					/* Loop if LOOP = 1 */
106	mtsp		%r20, %sr1
107	copy		%arg0, %r28		/* init base addr */
108	copy		%arg2, %r29		/* init middle loop count */
109
110fitonemiddle:					/* Loop if LOOP = 1 */
111	addib,COND(>)		-1, %r29, fitonemiddle	/* Middle loop count decr */
112	pitlbe,m	%arg1(%sr1, %r28)	/* pitlbe for one loop */
113
114	addib,COND(>)		-1, %r22, fitoneloop	/* Outer loop count decr */
115	add		%r21, %r20, %r20		/* increment space */
116
117fitdone:
118
119	/* Flush Data Tlb */
120
121	LDREG		DTLB_SID_BASE(%r1), %r20
122	LDREG		DTLB_SID_STRIDE(%r1), %r21
123	LDREG		DTLB_SID_COUNT(%r1), %r22
124	LDREG		DTLB_OFF_BASE(%r1), %arg0
125	LDREG		DTLB_OFF_STRIDE(%r1), %arg1
126	LDREG		DTLB_OFF_COUNT(%r1), %arg2
127	LDREG		DTLB_LOOP(%r1), %arg3
128
129	addib,COND(=)		-1, %arg3, fdtoneloop	/* Preadjust and test */
130	movb,<,n	%arg3, %r31, fdtdone	/* If loop < 0, skip */
131	copy		%arg0, %r28		/* Init base addr */
132
133fdtmanyloop:					/* Loop if LOOP >= 2 */
134	mtsp		%r20, %sr1
135	add		%r21, %r20, %r20	/* increment space */
136	copy		%arg2, %r29		/* Init middle loop count */
137
138fdtmanymiddle:					/* Loop if LOOP >= 2 */
139	addib,COND(>)		-1, %r31, fdtmanymiddle	/* Adjusted inner loop decr */
140	pdtlbe		%r0(%sr1, %r28)
141	pdtlbe,m	%arg1(%sr1, %r28)	/* Last pdtlbe and addr adjust */
142	addib,COND(>)		-1, %r29, fdtmanymiddle	/* Middle loop decr */
143	copy		%arg3, %r31		/* Re-init inner loop count */
144
145	movb,tr		%arg0, %r28, fdtmanyloop /* Re-init base addr */
146	addib,COND(<=),n	-1, %r22,fdtdone	/* Outer loop count decr */
147
148fdtoneloop:					/* Loop if LOOP = 1 */
149	mtsp		%r20, %sr1
150	copy		%arg0, %r28		/* init base addr */
151	copy		%arg2, %r29		/* init middle loop count */
152
153fdtonemiddle:					/* Loop if LOOP = 1 */
154	addib,COND(>)		-1, %r29, fdtonemiddle	/* Middle loop count decr */
155	pdtlbe,m	%arg1(%sr1, %r28)	/* pdtlbe for one loop */
156
157	addib,COND(>)		-1, %r22, fdtoneloop	/* Outer loop count decr */
158	add		%r21, %r20, %r20	/* increment space */
159
160
161fdtdone:
162	/*
163	 * Switch back to virtual mode
164	 */
165	/* pcxt_ssm_bug */
166	rsm		PSW_SM_I, %r0
167	load32		2f, %r1
168	nop
169	nop
170	nop
171	nop
172	nop
173
174	rsm		PSW_SM_Q, %r0		/* prep to load iia queue */
175	mtctl		%r0, %cr17		/* Clear IIASQ tail */
176	mtctl		%r0, %cr17		/* Clear IIASQ head */
177	mtctl		%r1, %cr18		/* IIAOQ head */
178	ldo		4(%r1), %r1
179	mtctl		%r1, %cr18		/* IIAOQ tail */
180	load32		KERNEL_PSW, %r1
181	or		%r1, %r19, %r1	/* I-bit to state on entry */
182	mtctl		%r1, %ipsw	/* restore I-bit (entire PSW) */
183	rfi
184	nop
185
1862:      bv		%r0(%r2)
187	nop
188ENDPROC_CFI(flush_tlb_all_local)
189
190	.import cache_info,data
191
192ENTRY_CFI(flush_instruction_cache_local)
193	load32		cache_info, %r1
194
195	/* Flush Instruction Cache */
196
197	LDREG		ICACHE_BASE(%r1), %arg0
198	LDREG		ICACHE_STRIDE(%r1), %arg1
199	LDREG		ICACHE_COUNT(%r1), %arg2
200	LDREG		ICACHE_LOOP(%r1), %arg3
201	rsm		PSW_SM_I, %r22		/* No mmgt ops during loop*/
202	mtsp		%r0, %sr1
203	addib,COND(=)		-1, %arg3, fioneloop	/* Preadjust and test */
204	movb,<,n	%arg3, %r31, fisync	/* If loop < 0, do sync */
205
206fimanyloop:					/* Loop if LOOP >= 2 */
207	addib,COND(>)		-1, %r31, fimanyloop	/* Adjusted inner loop decr */
208	fice            %r0(%sr1, %arg0)
209	fice,m		%arg1(%sr1, %arg0)	/* Last fice and addr adjust */
210	movb,tr		%arg3, %r31, fimanyloop	/* Re-init inner loop count */
211	addib,COND(<=),n	-1, %arg2, fisync	/* Outer loop decr */
212
213fioneloop:					/* Loop if LOOP = 1 */
214	/* Some implementations may flush with a single fice instruction */
215	cmpib,COND(>>=),n	15, %arg2, fioneloop2
216
217fioneloop1:
218	fice,m		%arg1(%sr1, %arg0)
219	fice,m		%arg1(%sr1, %arg0)
220	fice,m		%arg1(%sr1, %arg0)
221	fice,m		%arg1(%sr1, %arg0)
222	fice,m		%arg1(%sr1, %arg0)
223	fice,m		%arg1(%sr1, %arg0)
224	fice,m		%arg1(%sr1, %arg0)
225	fice,m		%arg1(%sr1, %arg0)
226	fice,m		%arg1(%sr1, %arg0)
227	fice,m		%arg1(%sr1, %arg0)
228	fice,m		%arg1(%sr1, %arg0)
229	fice,m		%arg1(%sr1, %arg0)
230	fice,m		%arg1(%sr1, %arg0)
231	fice,m		%arg1(%sr1, %arg0)
232	fice,m		%arg1(%sr1, %arg0)
233	addib,COND(>)	-16, %arg2, fioneloop1
234	fice,m		%arg1(%sr1, %arg0)
235
236	/* Check if done */
237	cmpb,COND(=),n	%arg2, %r0, fisync	/* Predict branch taken */
238
239fioneloop2:
240	addib,COND(>)	-1, %arg2, fioneloop2	/* Outer loop count decr */
241	fice,m		%arg1(%sr1, %arg0)	/* Fice for one loop */
242
243fisync:
244	sync
245	mtsm		%r22			/* restore I-bit */
246	bv		%r0(%r2)
247	nop
248ENDPROC_CFI(flush_instruction_cache_local)
249
250
251	.import cache_info, data
252ENTRY_CFI(flush_data_cache_local)
253	load32		cache_info, %r1
254
255	/* Flush Data Cache */
256
257	LDREG		DCACHE_BASE(%r1), %arg0
258	LDREG		DCACHE_STRIDE(%r1), %arg1
259	LDREG		DCACHE_COUNT(%r1), %arg2
260	LDREG		DCACHE_LOOP(%r1), %arg3
261	rsm		PSW_SM_I, %r22		/* No mmgt ops during loop*/
262	mtsp		%r0, %sr1
263	addib,COND(=)		-1, %arg3, fdoneloop	/* Preadjust and test */
264	movb,<,n	%arg3, %r31, fdsync	/* If loop < 0, do sync */
265
266fdmanyloop:					/* Loop if LOOP >= 2 */
267	addib,COND(>)		-1, %r31, fdmanyloop	/* Adjusted inner loop decr */
268	fdce		%r0(%sr1, %arg0)
269	fdce,m		%arg1(%sr1, %arg0)	/* Last fdce and addr adjust */
270	movb,tr		%arg3, %r31, fdmanyloop	/* Re-init inner loop count */
271	addib,COND(<=),n	-1, %arg2, fdsync	/* Outer loop decr */
272
273fdoneloop:					/* Loop if LOOP = 1 */
274	/* Some implementations may flush with a single fdce instruction */
275	cmpib,COND(>>=),n	15, %arg2, fdoneloop2
276
277fdoneloop1:
278	fdce,m		%arg1(%sr1, %arg0)
279	fdce,m		%arg1(%sr1, %arg0)
280	fdce,m		%arg1(%sr1, %arg0)
281	fdce,m		%arg1(%sr1, %arg0)
282	fdce,m		%arg1(%sr1, %arg0)
283	fdce,m		%arg1(%sr1, %arg0)
284	fdce,m		%arg1(%sr1, %arg0)
285	fdce,m		%arg1(%sr1, %arg0)
286	fdce,m		%arg1(%sr1, %arg0)
287	fdce,m		%arg1(%sr1, %arg0)
288	fdce,m		%arg1(%sr1, %arg0)
289	fdce,m		%arg1(%sr1, %arg0)
290	fdce,m		%arg1(%sr1, %arg0)
291	fdce,m		%arg1(%sr1, %arg0)
292	fdce,m		%arg1(%sr1, %arg0)
293	addib,COND(>)	-16, %arg2, fdoneloop1
294	fdce,m		%arg1(%sr1, %arg0)
295
296	/* Check if done */
297	cmpb,COND(=),n	%arg2, %r0, fdsync	/* Predict branch taken */
298
299fdoneloop2:
300	addib,COND(>)	-1, %arg2, fdoneloop2	/* Outer loop count decr */
301	fdce,m		%arg1(%sr1, %arg0)	/* Fdce for one loop */
302
303fdsync:
304	syncdma
305	sync
306	mtsm		%r22			/* restore I-bit */
307	bv		%r0(%r2)
308	nop
309ENDPROC_CFI(flush_data_cache_local)
310
311/* Macros to serialize TLB purge operations on SMP.  */
312
313	.macro	tlb_lock	la,flags,tmp
314#ifdef CONFIG_SMP
315#if __PA_LDCW_ALIGNMENT > 4
316	load32		pa_tlb_lock + __PA_LDCW_ALIGNMENT-1, \la
317	depi		0,31,__PA_LDCW_ALIGN_ORDER, \la
318#else
319	load32		pa_tlb_lock, \la
320#endif
321	rsm		PSW_SM_I,\flags
3221:	LDCW		0(\la),\tmp
323	cmpib,<>,n	0,\tmp,3f
3242:	ldw		0(\la),\tmp
325	cmpb,<>		%r0,\tmp,1b
326	nop
327	b,n		2b
3283:
329#endif
330	.endm
331
332	.macro	tlb_unlock	la,flags,tmp
333#ifdef CONFIG_SMP
334	ldi		1,\tmp
335	sync
336	stw		\tmp,0(\la)
337	mtsm		\flags
338#endif
339	.endm
340
341/* Clear page using kernel mapping.  */
342
343ENTRY_CFI(clear_page_asm)
344#ifdef CONFIG_64BIT
345
346	/* Unroll the loop.  */
347	ldi		(PAGE_SIZE / 128), %r1
348
3491:
350	std		%r0, 0(%r26)
351	std		%r0, 8(%r26)
352	std		%r0, 16(%r26)
353	std		%r0, 24(%r26)
354	std		%r0, 32(%r26)
355	std		%r0, 40(%r26)
356	std		%r0, 48(%r26)
357	std		%r0, 56(%r26)
358	std		%r0, 64(%r26)
359	std		%r0, 72(%r26)
360	std		%r0, 80(%r26)
361	std		%r0, 88(%r26)
362	std		%r0, 96(%r26)
363	std		%r0, 104(%r26)
364	std		%r0, 112(%r26)
365	std		%r0, 120(%r26)
366
367	/* Note reverse branch hint for addib is taken.  */
368	addib,COND(>),n	-1, %r1, 1b
369	ldo		128(%r26), %r26
370
371#else
372
373	/*
374	 * Note that until (if) we start saving the full 64-bit register
375	 * values on interrupt, we can't use std on a 32 bit kernel.
376	 */
377	ldi		(PAGE_SIZE / 64), %r1
378
3791:
380	stw		%r0, 0(%r26)
381	stw		%r0, 4(%r26)
382	stw		%r0, 8(%r26)
383	stw		%r0, 12(%r26)
384	stw		%r0, 16(%r26)
385	stw		%r0, 20(%r26)
386	stw		%r0, 24(%r26)
387	stw		%r0, 28(%r26)
388	stw		%r0, 32(%r26)
389	stw		%r0, 36(%r26)
390	stw		%r0, 40(%r26)
391	stw		%r0, 44(%r26)
392	stw		%r0, 48(%r26)
393	stw		%r0, 52(%r26)
394	stw		%r0, 56(%r26)
395	stw		%r0, 60(%r26)
396
397	addib,COND(>),n	-1, %r1, 1b
398	ldo		64(%r26), %r26
399#endif
400	bv		%r0(%r2)
401	nop
402ENDPROC_CFI(clear_page_asm)
403
404/* Copy page using kernel mapping.  */
405
406ENTRY_CFI(copy_page_asm)
407#ifdef CONFIG_64BIT
408	/* PA8x00 CPUs can consume 2 loads or 1 store per cycle.
409	 * Unroll the loop by hand and arrange insn appropriately.
410	 * Prefetch doesn't improve performance on rp3440.
411	 * GCC probably can do this just as well...
412	 */
413
414	ldi		(PAGE_SIZE / 128), %r1
415
4161:	ldd		0(%r25), %r19
417	ldd		8(%r25), %r20
418
419	ldd		16(%r25), %r21
420	ldd		24(%r25), %r22
421	std		%r19, 0(%r26)
422	std		%r20, 8(%r26)
423
424	ldd		32(%r25), %r19
425	ldd		40(%r25), %r20
426	std		%r21, 16(%r26)
427	std		%r22, 24(%r26)
428
429	ldd		48(%r25), %r21
430	ldd		56(%r25), %r22
431	std		%r19, 32(%r26)
432	std		%r20, 40(%r26)
433
434	ldd		64(%r25), %r19
435	ldd		72(%r25), %r20
436	std		%r21, 48(%r26)
437	std		%r22, 56(%r26)
438
439	ldd		80(%r25), %r21
440	ldd		88(%r25), %r22
441	std		%r19, 64(%r26)
442	std		%r20, 72(%r26)
443
444	ldd		 96(%r25), %r19
445	ldd		104(%r25), %r20
446	std		%r21, 80(%r26)
447	std		%r22, 88(%r26)
448
449	ldd		112(%r25), %r21
450	ldd		120(%r25), %r22
451	ldo		128(%r25), %r25
452	std		%r19, 96(%r26)
453	std		%r20, 104(%r26)
454
455	std		%r21, 112(%r26)
456	std		%r22, 120(%r26)
457
458	/* Note reverse branch hint for addib is taken.  */
459	addib,COND(>),n	-1, %r1, 1b
460	ldo		128(%r26), %r26
461
462#else
463
464	/*
465	 * This loop is optimized for PCXL/PCXL2 ldw/ldw and stw/stw
466	 * bundles (very restricted rules for bundling).
467	 * Note that until (if) we start saving
468	 * the full 64 bit register values on interrupt, we can't
469	 * use ldd/std on a 32 bit kernel.
470	 */
471	ldw		0(%r25), %r19
472	ldi		(PAGE_SIZE / 64), %r1
473
4741:
475	ldw		4(%r25), %r20
476	ldw		8(%r25), %r21
477	ldw		12(%r25), %r22
478	stw		%r19, 0(%r26)
479	stw		%r20, 4(%r26)
480	stw		%r21, 8(%r26)
481	stw		%r22, 12(%r26)
482	ldw		16(%r25), %r19
483	ldw		20(%r25), %r20
484	ldw		24(%r25), %r21
485	ldw		28(%r25), %r22
486	stw		%r19, 16(%r26)
487	stw		%r20, 20(%r26)
488	stw		%r21, 24(%r26)
489	stw		%r22, 28(%r26)
490	ldw		32(%r25), %r19
491	ldw		36(%r25), %r20
492	ldw		40(%r25), %r21
493	ldw		44(%r25), %r22
494	stw		%r19, 32(%r26)
495	stw		%r20, 36(%r26)
496	stw		%r21, 40(%r26)
497	stw		%r22, 44(%r26)
498	ldw		48(%r25), %r19
499	ldw		52(%r25), %r20
500	ldw		56(%r25), %r21
501	ldw		60(%r25), %r22
502	stw		%r19, 48(%r26)
503	stw		%r20, 52(%r26)
504	ldo		64(%r25), %r25
505	stw		%r21, 56(%r26)
506	stw		%r22, 60(%r26)
507	ldo		64(%r26), %r26
508	addib,COND(>),n	-1, %r1, 1b
509	ldw		0(%r25), %r19
510#endif
511	bv		%r0(%r2)
512	nop
513ENDPROC_CFI(copy_page_asm)
514
515/*
516 * NOTE: Code in clear_user_page has a hard coded dependency on the
517 *       maximum alias boundary being 4 Mb. We've been assured by the
518 *       parisc chip designers that there will not ever be a parisc
519 *       chip with a larger alias boundary (Never say never :-) ).
520 *
521 *       Subtle: the dtlb miss handlers support the temp alias region by
522 *       "knowing" that if a dtlb miss happens within the temp alias
523 *       region it must have occurred while in clear_user_page. Since
524 *       this routine makes use of processor local translations, we
525 *       don't want to insert them into the kernel page table. Instead,
526 *       we load up some general registers (they need to be registers
527 *       which aren't shadowed) with the physical page numbers (preshifted
528 *       for tlb insertion) needed to insert the translations. When we
529 *       miss on the translation, the dtlb miss handler inserts the
530 *       translation into the tlb using these values:
531 *
532 *          %r26 physical page (shifted for tlb insert) of "to" translation
533 *          %r23 physical page (shifted for tlb insert) of "from" translation
534 */
535
536        /* Drop prot bits and convert to page addr for iitlbt and idtlbt */
537        #define PAGE_ADD_SHIFT  (PAGE_SHIFT-12)
538        .macro          convert_phys_for_tlb_insert20  phys
539        extrd,u         \phys, 56-PAGE_ADD_SHIFT, 32-PAGE_ADD_SHIFT, \phys
540#if _PAGE_SIZE_ENCODING_DEFAULT
541        depdi           _PAGE_SIZE_ENCODING_DEFAULT, 63, (63-58), \phys
542#endif
543	.endm
544
545	/*
546	 * copy_user_page_asm() performs a page copy using mappings
547	 * equivalent to the user page mappings.  It can be used to
548	 * implement copy_user_page() but unfortunately both the `from'
549	 * and `to' pages need to be flushed through mappings equivalent
550	 * to the user mappings after the copy because the kernel accesses
551	 * the `from' page through the kmap kernel mapping and the `to'
552	 * page needs to be flushed since code can be copied.  As a
553	 * result, this implementation is less efficient than the simpler
554	 * copy using the kernel mapping.  It only needs the `from' page
555	 * to flushed via the user mapping.  The kunmap routines handle
556	 * the flushes needed for the kernel mapping.
557	 *
558	 * I'm still keeping this around because it may be possible to
559	 * use it if more information is passed into copy_user_page().
560	 * Have to do some measurements to see if it is worthwhile to
561	 * lobby for such a change.
562	 *
563	 */
564
565ENTRY_CFI(copy_user_page_asm)
566	/* Convert virtual `to' and `from' addresses to physical addresses.
567	   Move `from' physical address to non shadowed register.  */
568	ldil		L%(__PAGE_OFFSET), %r1
569	sub		%r26, %r1, %r26
570	sub		%r25, %r1, %r23
571
572	ldil		L%(TMPALIAS_MAP_START), %r28
573#ifdef CONFIG_64BIT
574#if (TMPALIAS_MAP_START >= 0x80000000)
575	depdi		0, 31,32, %r28		/* clear any sign extension */
576#endif
577	convert_phys_for_tlb_insert20 %r26	/* convert phys addr to tlb insert format */
578	convert_phys_for_tlb_insert20 %r23	/* convert phys addr to tlb insert format */
579	depd		%r24,63,22, %r28	/* Form aliased virtual address 'to' */
580	depdi		0, 63,PAGE_SHIFT, %r28	/* Clear any offset bits */
581	copy		%r28, %r29
582	depdi		1, 41,1, %r29		/* Form aliased virtual address 'from' */
583#else
584	extrw,u		%r26, 24,25, %r26	/* convert phys addr to tlb insert format */
585	extrw,u		%r23, 24,25, %r23	/* convert phys addr to tlb insert format */
586	depw		%r24, 31,22, %r28	/* Form aliased virtual address 'to' */
587	depwi		0, 31,PAGE_SHIFT, %r28	/* Clear any offset bits */
588	copy		%r28, %r29
589	depwi		1, 9,1, %r29		/* Form aliased virtual address 'from' */
590#endif
591
592	/* Purge any old translations */
593
594#ifdef CONFIG_PA20
595	pdtlb,l		%r0(%r28)
596	pdtlb,l		%r0(%r29)
597#else
598	tlb_lock	%r20,%r21,%r22
599	pdtlb		%r0(%r28)
600	pdtlb		%r0(%r29)
601	tlb_unlock	%r20,%r21,%r22
602#endif
603
604#ifdef CONFIG_64BIT
605	/* PA8x00 CPUs can consume 2 loads or 1 store per cycle.
606	 * Unroll the loop by hand and arrange insn appropriately.
607	 * GCC probably can do this just as well.
608	 */
609
610	ldd		0(%r29), %r19
611	ldi		(PAGE_SIZE / 128), %r1
612
6131:	ldd		8(%r29), %r20
614
615	ldd		16(%r29), %r21
616	ldd		24(%r29), %r22
617	std		%r19, 0(%r28)
618	std		%r20, 8(%r28)
619
620	ldd		32(%r29), %r19
621	ldd		40(%r29), %r20
622	std		%r21, 16(%r28)
623	std		%r22, 24(%r28)
624
625	ldd		48(%r29), %r21
626	ldd		56(%r29), %r22
627	std		%r19, 32(%r28)
628	std		%r20, 40(%r28)
629
630	ldd		64(%r29), %r19
631	ldd		72(%r29), %r20
632	std		%r21, 48(%r28)
633	std		%r22, 56(%r28)
634
635	ldd		80(%r29), %r21
636	ldd		88(%r29), %r22
637	std		%r19, 64(%r28)
638	std		%r20, 72(%r28)
639
640	ldd		 96(%r29), %r19
641	ldd		104(%r29), %r20
642	std		%r21, 80(%r28)
643	std		%r22, 88(%r28)
644
645	ldd		112(%r29), %r21
646	ldd		120(%r29), %r22
647	std		%r19, 96(%r28)
648	std		%r20, 104(%r28)
649
650	ldo		128(%r29), %r29
651	std		%r21, 112(%r28)
652	std		%r22, 120(%r28)
653	ldo		128(%r28), %r28
654
655	/* conditional branches nullify on forward taken branch, and on
656	 * non-taken backward branch. Note that .+4 is a backwards branch.
657	 * The ldd should only get executed if the branch is taken.
658	 */
659	addib,COND(>),n	-1, %r1, 1b		/* bundle 10 */
660	ldd		0(%r29), %r19		/* start next loads */
661
662#else
663	ldi		(PAGE_SIZE / 64), %r1
664
665	/*
666	 * This loop is optimized for PCXL/PCXL2 ldw/ldw and stw/stw
667	 * bundles (very restricted rules for bundling). It probably
668	 * does OK on PCXU and better, but we could do better with
669	 * ldd/std instructions. Note that until (if) we start saving
670	 * the full 64 bit register values on interrupt, we can't
671	 * use ldd/std on a 32 bit kernel.
672	 */
673
6741:	ldw		0(%r29), %r19
675	ldw		4(%r29), %r20
676	ldw		8(%r29), %r21
677	ldw		12(%r29), %r22
678	stw		%r19, 0(%r28)
679	stw		%r20, 4(%r28)
680	stw		%r21, 8(%r28)
681	stw		%r22, 12(%r28)
682	ldw		16(%r29), %r19
683	ldw		20(%r29), %r20
684	ldw		24(%r29), %r21
685	ldw		28(%r29), %r22
686	stw		%r19, 16(%r28)
687	stw		%r20, 20(%r28)
688	stw		%r21, 24(%r28)
689	stw		%r22, 28(%r28)
690	ldw		32(%r29), %r19
691	ldw		36(%r29), %r20
692	ldw		40(%r29), %r21
693	ldw		44(%r29), %r22
694	stw		%r19, 32(%r28)
695	stw		%r20, 36(%r28)
696	stw		%r21, 40(%r28)
697	stw		%r22, 44(%r28)
698	ldw		48(%r29), %r19
699	ldw		52(%r29), %r20
700	ldw		56(%r29), %r21
701	ldw		60(%r29), %r22
702	stw		%r19, 48(%r28)
703	stw		%r20, 52(%r28)
704	stw		%r21, 56(%r28)
705	stw		%r22, 60(%r28)
706	ldo		64(%r28), %r28
707
708	addib,COND(>)		-1, %r1,1b
709	ldo		64(%r29), %r29
710#endif
711
712	bv		%r0(%r2)
713	nop
714ENDPROC_CFI(copy_user_page_asm)
715
716ENTRY_CFI(clear_user_page_asm)
717	tophys_r1	%r26
718
719	ldil		L%(TMPALIAS_MAP_START), %r28
720#ifdef CONFIG_64BIT
721#if (TMPALIAS_MAP_START >= 0x80000000)
722	depdi		0, 31,32, %r28		/* clear any sign extension */
723#endif
724	convert_phys_for_tlb_insert20 %r26	/* convert phys addr to tlb insert format */
725	depd		%r25, 63,22, %r28	/* Form aliased virtual address 'to' */
726	depdi		0, 63,PAGE_SHIFT, %r28	/* Clear any offset bits */
727#else
728	extrw,u		%r26, 24,25, %r26	/* convert phys addr to tlb insert format */
729	depw		%r25, 31,22, %r28	/* Form aliased virtual address 'to' */
730	depwi		0, 31,PAGE_SHIFT, %r28	/* Clear any offset bits */
731#endif
732
733	/* Purge any old translation */
734
735#ifdef CONFIG_PA20
736	pdtlb,l		%r0(%r28)
737#else
738	tlb_lock	%r20,%r21,%r22
739	pdtlb		%r0(%r28)
740	tlb_unlock	%r20,%r21,%r22
741#endif
742
743#ifdef CONFIG_64BIT
744	ldi		(PAGE_SIZE / 128), %r1
745
746	/* PREFETCH (Write) has not (yet) been proven to help here */
747	/* #define	PREFETCHW_OP	ldd		256(%0), %r0 */
748
7491:	std		%r0, 0(%r28)
750	std		%r0, 8(%r28)
751	std		%r0, 16(%r28)
752	std		%r0, 24(%r28)
753	std		%r0, 32(%r28)
754	std		%r0, 40(%r28)
755	std		%r0, 48(%r28)
756	std		%r0, 56(%r28)
757	std		%r0, 64(%r28)
758	std		%r0, 72(%r28)
759	std		%r0, 80(%r28)
760	std		%r0, 88(%r28)
761	std		%r0, 96(%r28)
762	std		%r0, 104(%r28)
763	std		%r0, 112(%r28)
764	std		%r0, 120(%r28)
765	addib,COND(>)		-1, %r1, 1b
766	ldo		128(%r28), %r28
767
768#else	/* ! CONFIG_64BIT */
769	ldi		(PAGE_SIZE / 64), %r1
770
7711:	stw		%r0, 0(%r28)
772	stw		%r0, 4(%r28)
773	stw		%r0, 8(%r28)
774	stw		%r0, 12(%r28)
775	stw		%r0, 16(%r28)
776	stw		%r0, 20(%r28)
777	stw		%r0, 24(%r28)
778	stw		%r0, 28(%r28)
779	stw		%r0, 32(%r28)
780	stw		%r0, 36(%r28)
781	stw		%r0, 40(%r28)
782	stw		%r0, 44(%r28)
783	stw		%r0, 48(%r28)
784	stw		%r0, 52(%r28)
785	stw		%r0, 56(%r28)
786	stw		%r0, 60(%r28)
787	addib,COND(>)		-1, %r1, 1b
788	ldo		64(%r28), %r28
789#endif	/* CONFIG_64BIT */
790
791	bv		%r0(%r2)
792	nop
793ENDPROC_CFI(clear_user_page_asm)
794
795ENTRY_CFI(flush_dcache_page_asm)
796	ldil		L%(TMPALIAS_MAP_START), %r28
797#ifdef CONFIG_64BIT
798#if (TMPALIAS_MAP_START >= 0x80000000)
799	depdi		0, 31,32, %r28		/* clear any sign extension */
800#endif
801	convert_phys_for_tlb_insert20 %r26	/* convert phys addr to tlb insert format */
802	depd		%r25, 63,22, %r28	/* Form aliased virtual address 'to' */
803	depdi		0, 63,PAGE_SHIFT, %r28	/* Clear any offset bits */
804#else
805	extrw,u		%r26, 24,25, %r26	/* convert phys addr to tlb insert format */
806	depw		%r25, 31,22, %r28	/* Form aliased virtual address 'to' */
807	depwi		0, 31,PAGE_SHIFT, %r28	/* Clear any offset bits */
808#endif
809
810	/* Purge any old translation */
811
812#ifdef CONFIG_PA20
813	pdtlb,l		%r0(%r28)
814#else
815	tlb_lock	%r20,%r21,%r22
816	pdtlb		%r0(%r28)
817	tlb_unlock	%r20,%r21,%r22
818#endif
819
820	ldil		L%dcache_stride, %r1
821	ldw		R%dcache_stride(%r1), r31
822
823#ifdef CONFIG_64BIT
824	depdi,z		1, 63-PAGE_SHIFT,1, %r25
825#else
826	depwi,z		1, 31-PAGE_SHIFT,1, %r25
827#endif
828	add		%r28, %r25, %r25
829	sub		%r25, r31, %r25
830
831
8321:      fdc,m		r31(%r28)
833	fdc,m		r31(%r28)
834	fdc,m		r31(%r28)
835	fdc,m		r31(%r28)
836	fdc,m		r31(%r28)
837	fdc,m		r31(%r28)
838	fdc,m		r31(%r28)
839	fdc,m		r31(%r28)
840	fdc,m		r31(%r28)
841	fdc,m		r31(%r28)
842	fdc,m		r31(%r28)
843	fdc,m		r31(%r28)
844	fdc,m		r31(%r28)
845	fdc,m		r31(%r28)
846	fdc,m		r31(%r28)
847	cmpb,COND(<<)	%r28, %r25,1b
848	fdc,m		r31(%r28)
849
850	sync
851	bv		%r0(%r2)
852	nop
853ENDPROC_CFI(flush_dcache_page_asm)
854
855ENTRY_CFI(flush_icache_page_asm)
856	ldil		L%(TMPALIAS_MAP_START), %r28
857#ifdef CONFIG_64BIT
858#if (TMPALIAS_MAP_START >= 0x80000000)
859	depdi		0, 31,32, %r28		/* clear any sign extension */
860#endif
861	convert_phys_for_tlb_insert20 %r26	/* convert phys addr to tlb insert format */
862	depd		%r25, 63,22, %r28	/* Form aliased virtual address 'to' */
863	depdi		0, 63,PAGE_SHIFT, %r28	/* Clear any offset bits */
864#else
865	extrw,u		%r26, 24,25, %r26	/* convert phys addr to tlb insert format */
866	depw		%r25, 31,22, %r28	/* Form aliased virtual address 'to' */
867	depwi		0, 31,PAGE_SHIFT, %r28	/* Clear any offset bits */
868#endif
869
870	/* Purge any old translation.  Note that the FIC instruction
871	 * may use either the instruction or data TLB.  Given that we
872	 * have a flat address space, it's not clear which TLB will be
873	 * used.  So, we purge both entries.  */
874
875#ifdef CONFIG_PA20
876	pdtlb,l		%r0(%r28)
877	pitlb,l         %r0(%sr4,%r28)
878#else
879	tlb_lock        %r20,%r21,%r22
880	pdtlb		%r0(%r28)
881	pitlb           %r0(%sr4,%r28)
882	tlb_unlock      %r20,%r21,%r22
883#endif
884
885	ldil		L%icache_stride, %r1
886	ldw		R%icache_stride(%r1), %r31
887
888#ifdef CONFIG_64BIT
889	depdi,z		1, 63-PAGE_SHIFT,1, %r25
890#else
891	depwi,z		1, 31-PAGE_SHIFT,1, %r25
892#endif
893	add		%r28, %r25, %r25
894	sub		%r25, %r31, %r25
895
896
897	/* fic only has the type 26 form on PA1.1, requiring an
898	 * explicit space specification, so use %sr4 */
8991:      fic,m		%r31(%sr4,%r28)
900	fic,m		%r31(%sr4,%r28)
901	fic,m		%r31(%sr4,%r28)
902	fic,m		%r31(%sr4,%r28)
903	fic,m		%r31(%sr4,%r28)
904	fic,m		%r31(%sr4,%r28)
905	fic,m		%r31(%sr4,%r28)
906	fic,m		%r31(%sr4,%r28)
907	fic,m		%r31(%sr4,%r28)
908	fic,m		%r31(%sr4,%r28)
909	fic,m		%r31(%sr4,%r28)
910	fic,m		%r31(%sr4,%r28)
911	fic,m		%r31(%sr4,%r28)
912	fic,m		%r31(%sr4,%r28)
913	fic,m		%r31(%sr4,%r28)
914	cmpb,COND(<<)	%r28, %r25,1b
915	fic,m		%r31(%sr4,%r28)
916
917	sync
918	bv		%r0(%r2)
919	nop
920ENDPROC_CFI(flush_icache_page_asm)
921
922ENTRY_CFI(flush_kernel_dcache_page_asm)
923	ldil		L%dcache_stride, %r1
924	ldw		R%dcache_stride(%r1), %r23
925
926#ifdef CONFIG_64BIT
927	depdi,z		1, 63-PAGE_SHIFT,1, %r25
928#else
929	depwi,z		1, 31-PAGE_SHIFT,1, %r25
930#endif
931	add		%r26, %r25, %r25
932	sub		%r25, %r23, %r25
933
934
9351:      fdc,m		%r23(%r26)
936	fdc,m		%r23(%r26)
937	fdc,m		%r23(%r26)
938	fdc,m		%r23(%r26)
939	fdc,m		%r23(%r26)
940	fdc,m		%r23(%r26)
941	fdc,m		%r23(%r26)
942	fdc,m		%r23(%r26)
943	fdc,m		%r23(%r26)
944	fdc,m		%r23(%r26)
945	fdc,m		%r23(%r26)
946	fdc,m		%r23(%r26)
947	fdc,m		%r23(%r26)
948	fdc,m		%r23(%r26)
949	fdc,m		%r23(%r26)
950	cmpb,COND(<<)		%r26, %r25,1b
951	fdc,m		%r23(%r26)
952
953	sync
954	bv		%r0(%r2)
955	nop
956ENDPROC_CFI(flush_kernel_dcache_page_asm)
957
958ENTRY_CFI(purge_kernel_dcache_page_asm)
959	ldil		L%dcache_stride, %r1
960	ldw		R%dcache_stride(%r1), %r23
961
962#ifdef CONFIG_64BIT
963	depdi,z		1, 63-PAGE_SHIFT,1, %r25
964#else
965	depwi,z		1, 31-PAGE_SHIFT,1, %r25
966#endif
967	add		%r26, %r25, %r25
968	sub		%r25, %r23, %r25
969
9701:      pdc,m		%r23(%r26)
971	pdc,m		%r23(%r26)
972	pdc,m		%r23(%r26)
973	pdc,m		%r23(%r26)
974	pdc,m		%r23(%r26)
975	pdc,m		%r23(%r26)
976	pdc,m		%r23(%r26)
977	pdc,m		%r23(%r26)
978	pdc,m		%r23(%r26)
979	pdc,m		%r23(%r26)
980	pdc,m		%r23(%r26)
981	pdc,m		%r23(%r26)
982	pdc,m		%r23(%r26)
983	pdc,m		%r23(%r26)
984	pdc,m		%r23(%r26)
985	cmpb,COND(<<)		%r26, %r25, 1b
986	pdc,m		%r23(%r26)
987
988	sync
989	bv		%r0(%r2)
990	nop
991ENDPROC_CFI(purge_kernel_dcache_page_asm)
992
993ENTRY_CFI(flush_user_dcache_range_asm)
994	ldil		L%dcache_stride, %r1
995	ldw		R%dcache_stride(%r1), %r23
996	ldo		-1(%r23), %r21
997	ANDCM		%r26, %r21, %r26
998
9991:      cmpb,COND(<<),n	%r26, %r25, 1b
1000	fdc,m		%r23(%sr3, %r26)
1001
1002	sync
1003	bv		%r0(%r2)
1004	nop
1005ENDPROC_CFI(flush_user_dcache_range_asm)
1006
1007ENTRY_CFI(flush_kernel_dcache_range_asm)
1008	ldil		L%dcache_stride, %r1
1009	ldw		R%dcache_stride(%r1), %r23
1010	ldo		-1(%r23), %r21
1011	ANDCM		%r26, %r21, %r26
1012
10131:      cmpb,COND(<<),n	%r26, %r25,1b
1014	fdc,m		%r23(%r26)
1015
1016	sync
1017	syncdma
1018	bv		%r0(%r2)
1019	nop
1020ENDPROC_CFI(flush_kernel_dcache_range_asm)
1021
1022ENTRY_CFI(purge_kernel_dcache_range_asm)
1023	ldil		L%dcache_stride, %r1
1024	ldw		R%dcache_stride(%r1), %r23
1025	ldo		-1(%r23), %r21
1026	ANDCM		%r26, %r21, %r26
1027
10281:      cmpb,COND(<<),n	%r26, %r25,1b
1029	pdc,m		%r23(%r26)
1030
1031	sync
1032	syncdma
1033	bv		%r0(%r2)
1034	nop
1035ENDPROC_CFI(purge_kernel_dcache_range_asm)
1036
1037ENTRY_CFI(flush_user_icache_range_asm)
1038	ldil		L%icache_stride, %r1
1039	ldw		R%icache_stride(%r1), %r23
1040	ldo		-1(%r23), %r21
1041	ANDCM		%r26, %r21, %r26
1042
10431:      cmpb,COND(<<),n	%r26, %r25,1b
1044	fic,m		%r23(%sr3, %r26)
1045
1046	sync
1047	bv		%r0(%r2)
1048	nop
1049ENDPROC_CFI(flush_user_icache_range_asm)
1050
1051ENTRY_CFI(flush_kernel_icache_page)
1052	ldil		L%icache_stride, %r1
1053	ldw		R%icache_stride(%r1), %r23
1054
1055#ifdef CONFIG_64BIT
1056	depdi,z		1, 63-PAGE_SHIFT,1, %r25
1057#else
1058	depwi,z		1, 31-PAGE_SHIFT,1, %r25
1059#endif
1060	add		%r26, %r25, %r25
1061	sub		%r25, %r23, %r25
1062
1063
10641:      fic,m		%r23(%sr4, %r26)
1065	fic,m		%r23(%sr4, %r26)
1066	fic,m		%r23(%sr4, %r26)
1067	fic,m		%r23(%sr4, %r26)
1068	fic,m		%r23(%sr4, %r26)
1069	fic,m		%r23(%sr4, %r26)
1070	fic,m		%r23(%sr4, %r26)
1071	fic,m		%r23(%sr4, %r26)
1072	fic,m		%r23(%sr4, %r26)
1073	fic,m		%r23(%sr4, %r26)
1074	fic,m		%r23(%sr4, %r26)
1075	fic,m		%r23(%sr4, %r26)
1076	fic,m		%r23(%sr4, %r26)
1077	fic,m		%r23(%sr4, %r26)
1078	fic,m		%r23(%sr4, %r26)
1079	cmpb,COND(<<)		%r26, %r25, 1b
1080	fic,m		%r23(%sr4, %r26)
1081
1082	sync
1083	bv		%r0(%r2)
1084	nop
1085ENDPROC_CFI(flush_kernel_icache_page)
1086
1087ENTRY_CFI(flush_kernel_icache_range_asm)
1088	ldil		L%icache_stride, %r1
1089	ldw		R%icache_stride(%r1), %r23
1090	ldo		-1(%r23), %r21
1091	ANDCM		%r26, %r21, %r26
1092
10931:      cmpb,COND(<<),n	%r26, %r25, 1b
1094	fic,m		%r23(%sr4, %r26)
1095
1096	sync
1097	bv		%r0(%r2)
1098	nop
1099ENDPROC_CFI(flush_kernel_icache_range_asm)
1100
1101	__INIT
1102
1103	/* align should cover use of rfi in disable_sr_hashing_asm and
1104	 * srdis_done.
1105	 */
1106	.align	256
1107ENTRY_CFI(disable_sr_hashing_asm)
1108	/*
1109	 * Switch to real mode
1110	 */
1111	/* pcxt_ssm_bug */
1112	rsm		PSW_SM_I, %r0
1113	load32		PA(1f), %r1
1114	nop
1115	nop
1116	nop
1117	nop
1118	nop
1119
1120	rsm		PSW_SM_Q, %r0		/* prep to load iia queue */
1121	mtctl		%r0, %cr17		/* Clear IIASQ tail */
1122	mtctl		%r0, %cr17		/* Clear IIASQ head */
1123	mtctl		%r1, %cr18		/* IIAOQ head */
1124	ldo		4(%r1), %r1
1125	mtctl		%r1, %cr18		/* IIAOQ tail */
1126	load32		REAL_MODE_PSW, %r1
1127	mtctl		%r1, %ipsw
1128	rfi
1129	nop
1130
11311:      cmpib,=,n	SRHASH_PCXST, %r26,srdis_pcxs
1132	cmpib,=,n	SRHASH_PCXL, %r26,srdis_pcxl
1133	cmpib,=,n	SRHASH_PA20, %r26,srdis_pa20
1134	b,n		srdis_done
1135
1136srdis_pcxs:
1137
1138	/* Disable Space Register Hashing for PCXS,PCXT,PCXT' */
1139
1140	.word		0x141c1a00		/* mfdiag %dr0, %r28 */
1141	.word		0x141c1a00		/* must issue twice */
1142	depwi		0,18,1, %r28		/* Clear DHE (dcache hash enable) */
1143	depwi		0,20,1, %r28		/* Clear IHE (icache hash enable) */
1144	.word		0x141c1600		/* mtdiag %r28, %dr0 */
1145	.word		0x141c1600		/* must issue twice */
1146	b,n		srdis_done
1147
1148srdis_pcxl:
1149
1150	/* Disable Space Register Hashing for PCXL */
1151
1152	.word		0x141c0600		/* mfdiag %dr0, %r28 */
1153	depwi           0,28,2, %r28		/* Clear DHASH_EN & IHASH_EN */
1154	.word		0x141c0240		/* mtdiag %r28, %dr0 */
1155	b,n		srdis_done
1156
1157srdis_pa20:
1158
1159	/* Disable Space Register Hashing for PCXU,PCXU+,PCXW,PCXW+,PCXW2 */
1160
1161	.word		0x144008bc		/* mfdiag %dr2, %r28 */
1162	depdi		0, 54,1, %r28		/* clear DIAG_SPHASH_ENAB (bit 54) */
1163	.word		0x145c1840		/* mtdiag %r28, %dr2 */
1164
1165
1166srdis_done:
1167	/* Switch back to virtual mode */
1168	rsm		PSW_SM_I, %r0		/* prep to load iia queue */
1169	load32 	   	2f, %r1
1170	nop
1171	nop
1172	nop
1173	nop
1174	nop
1175
1176	rsm		PSW_SM_Q, %r0		/* prep to load iia queue */
1177	mtctl		%r0, %cr17		/* Clear IIASQ tail */
1178	mtctl		%r0, %cr17		/* Clear IIASQ head */
1179	mtctl		%r1, %cr18		/* IIAOQ head */
1180	ldo		4(%r1), %r1
1181	mtctl		%r1, %cr18		/* IIAOQ tail */
1182	load32		KERNEL_PSW, %r1
1183	mtctl		%r1, %ipsw
1184	rfi
1185	nop
1186
11872:      bv		%r0(%r2)
1188	nop
1189ENDPROC_CFI(disable_sr_hashing_asm)
1190
1191	.end
1192