xref: /linux/arch/sparc/kernel/tsb.S (revision c8bfe3fad4f86a029da7157bae9699c816f0c309)
1/* SPDX-License-Identifier: GPL-2.0 */
2/* tsb.S: Sparc64 TSB table handling.
3 *
4 * Copyright (C) 2006 David S. Miller <davem@davemloft.net>
5 */
6
7
8#include <asm/tsb.h>
9#include <asm/hypervisor.h>
10#include <asm/page.h>
11#include <asm/cpudata.h>
12#include <asm/mmu.h>
13
14	.text
15	.align	32
16
17	/* Invoked from TLB miss handler, we are in the
18	 * MMU global registers and they are setup like
19	 * this:
20	 *
21	 * %g1: TSB entry pointer
22	 * %g2:	available temporary
23	 * %g3:	FAULT_CODE_{D,I}TLB
24	 * %g4:	available temporary
25	 * %g5:	available temporary
26	 * %g6: TAG TARGET
27	 * %g7:	available temporary, will be loaded by us with
28	 *      the physical address base of the linux page
29	 *      tables for the current address space
30	 */
31tsb_miss_dtlb:
32	mov		TLB_TAG_ACCESS, %g4
33	ldxa		[%g4] ASI_DMMU, %g4
34	srlx		%g4, PAGE_SHIFT, %g4
35	ba,pt		%xcc, tsb_miss_page_table_walk
36	 sllx		%g4, PAGE_SHIFT, %g4
37
38tsb_miss_itlb:
39	mov		TLB_TAG_ACCESS, %g4
40	ldxa		[%g4] ASI_IMMU, %g4
41	srlx		%g4, PAGE_SHIFT, %g4
42	ba,pt		%xcc, tsb_miss_page_table_walk
43	 sllx		%g4, PAGE_SHIFT, %g4
44
45	/* At this point we have:
46	 * %g1 --	PAGE_SIZE TSB entry address
47	 * %g3 --	FAULT_CODE_{D,I}TLB
48	 * %g4 --	missing virtual address
49	 * %g6 --	TAG TARGET (vaddr >> 22)
50	 */
51tsb_miss_page_table_walk:
52	TRAP_LOAD_TRAP_BLOCK(%g7, %g5)
53
54	/* Before committing to a full page table walk,
55	 * check the huge page TSB.
56	 */
57#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
58
59661:	ldx		[%g7 + TRAP_PER_CPU_TSB_HUGE], %g5
60	nop
61	.section	.sun4v_2insn_patch, "ax"
62	.word		661b
63	mov		SCRATCHPAD_UTSBREG2, %g5
64	ldxa		[%g5] ASI_SCRATCHPAD, %g5
65	.previous
66
67	cmp		%g5, -1
68	be,pt		%xcc, 80f
69	 nop
70
71	/* We need an aligned pair of registers containing 2 values
72	 * which can be easily rematerialized.  %g6 and %g7 foot the
73	 * bill just nicely.  We'll save %g6 away into %g2 for the
74	 * huge page TSB TAG comparison.
75	 *
76	 * Perform a huge page TSB lookup.
77	 */
78	mov		%g6, %g2
79	and		%g5, 0x7, %g6
80	mov		512, %g7
81	andn		%g5, 0x7, %g5
82	sllx		%g7, %g6, %g7
83	srlx		%g4, REAL_HPAGE_SHIFT, %g6
84	sub		%g7, 1, %g7
85	and		%g6, %g7, %g6
86	sllx		%g6, 4, %g6
87	add		%g5, %g6, %g5
88
89	TSB_LOAD_QUAD(%g5, %g6)
90	cmp		%g6, %g2
91	be,a,pt		%xcc, tsb_tlb_reload
92	 mov		%g7, %g5
93
94	/* No match, remember the huge page TSB entry address,
95	 * and restore %g6 and %g7.
96	 */
97	TRAP_LOAD_TRAP_BLOCK(%g7, %g6)
98	srlx		%g4, 22, %g6
9980:	stx		%g5, [%g7 + TRAP_PER_CPU_TSB_HUGE_TEMP]
100
101#endif
102
103	ldx		[%g7 + TRAP_PER_CPU_PGD_PADDR], %g7
104
105	/* At this point we have:
106	 * %g1 --	TSB entry address
107	 * %g3 --	FAULT_CODE_{D,I}TLB
108	 * %g4 --	missing virtual address
109	 * %g6 --	TAG TARGET (vaddr >> 22)
110	 * %g7 --	page table physical address
111	 *
112	 * We know that both the base PAGE_SIZE TSB and the HPAGE_SIZE
113	 * TSB both lack a matching entry.
114	 */
115tsb_miss_page_table_walk_sun4v_fastpath:
116	USER_PGTABLE_WALK_TL1(%g4, %g7, %g5, %g2, tsb_do_fault)
117
118	/* Valid PTE is now in %g5.  */
119
120#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
121	sethi		%uhi(_PAGE_PMD_HUGE | _PAGE_PUD_HUGE), %g7
122	sllx		%g7, 32, %g7
123
124	andcc		%g5, %g7, %g0
125	be,pt		%xcc, 60f
126	 nop
127
128	/* It is a huge page, use huge page TSB entry address we
129	 * calculated above.  If the huge page TSB has not been
130	 * allocated, setup a trap stack and call hugetlb_setup()
131	 * to do so, then return from the trap to replay the TLB
132	 * miss.
133	 *
134	 * This is necessary to handle the case of transparent huge
135	 * pages where we don't really have a non-atomic context
136	 * in which to allocate the hugepage TSB hash table.  When
137	 * the 'mm' faults in the hugepage for the first time, we
138	 * thus handle it here.  This also makes sure that we can
139	 * allocate the TSB hash table on the correct NUMA node.
140	 */
141	TRAP_LOAD_TRAP_BLOCK(%g7, %g2)
142	ldx		[%g7 + TRAP_PER_CPU_TSB_HUGE_TEMP], %g1
143	cmp		%g1, -1
144	bne,pt		%xcc, 60f
145	 nop
146
147661:	rdpr		%pstate, %g5
148	wrpr		%g5, PSTATE_AG | PSTATE_MG, %pstate
149	.section	.sun4v_2insn_patch, "ax"
150	.word		661b
151	SET_GL(1)
152	nop
153	.previous
154
155	rdpr	%tl, %g7
156	cmp	%g7, 1
157	bne,pn	%xcc, winfix_trampoline
158	 mov	%g3, %g4
159	ba,pt	%xcc, etrap
160	 rd	%pc, %g7
161	call	hugetlb_setup
162	 add	%sp, PTREGS_OFF, %o0
163	ba,pt	%xcc, rtrap
164	 nop
165
16660:
167#endif
168
169	/* At this point we have:
170	 * %g1 --	TSB entry address
171	 * %g3 --	FAULT_CODE_{D,I}TLB
172	 * %g5 --	valid PTE
173	 * %g6 --	TAG TARGET (vaddr >> 22)
174	 */
175tsb_reload:
176	TSB_LOCK_TAG(%g1, %g2, %g7)
177	TSB_WRITE(%g1, %g5, %g6)
178
179	/* Finally, load TLB and return from trap.  */
180tsb_tlb_reload:
181	cmp		%g3, FAULT_CODE_DTLB
182	bne,pn		%xcc, tsb_itlb_load
183	 nop
184
185tsb_dtlb_load:
186
187661:	stxa		%g5, [%g0] ASI_DTLB_DATA_IN
188	retry
189	.section	.sun4v_2insn_patch, "ax"
190	.word		661b
191	nop
192	nop
193	.previous
194
195	/* For sun4v the ASI_DTLB_DATA_IN store and the retry
196	 * instruction get nop'd out and we get here to branch
197	 * to the sun4v tlb load code.  The registers are setup
198	 * as follows:
199	 *
200	 * %g4: vaddr
201	 * %g5: PTE
202	 * %g6:	TAG
203	 *
204	 * The sun4v TLB load wants the PTE in %g3 so we fix that
205	 * up here.
206	 */
207	ba,pt		%xcc, sun4v_dtlb_load
208	 mov		%g5, %g3
209
210tsb_itlb_load:
211	/* Executable bit must be set.  */
212661:	sethi		%hi(_PAGE_EXEC_4U), %g4
213	andcc		%g5, %g4, %g0
214	.section	.sun4v_2insn_patch, "ax"
215	.word		661b
216	andcc		%g5, _PAGE_EXEC_4V, %g0
217	nop
218	.previous
219
220	be,pn		%xcc, tsb_do_fault
221	 nop
222
223661:	stxa		%g5, [%g0] ASI_ITLB_DATA_IN
224	retry
225	.section	.sun4v_2insn_patch, "ax"
226	.word		661b
227	nop
228	nop
229	.previous
230
231	/* For sun4v the ASI_ITLB_DATA_IN store and the retry
232	 * instruction get nop'd out and we get here to branch
233	 * to the sun4v tlb load code.  The registers are setup
234	 * as follows:
235	 *
236	 * %g4: vaddr
237	 * %g5: PTE
238	 * %g6:	TAG
239	 *
240	 * The sun4v TLB load wants the PTE in %g3 so we fix that
241	 * up here.
242	 */
243	ba,pt		%xcc, sun4v_itlb_load
244	 mov		%g5, %g3
245
246	/* No valid entry in the page tables, do full fault
247	 * processing.
248	 */
249
250	.globl		tsb_do_fault
251tsb_do_fault:
252	cmp		%g3, FAULT_CODE_DTLB
253
254661:	rdpr		%pstate, %g5
255	wrpr		%g5, PSTATE_AG | PSTATE_MG, %pstate
256	.section	.sun4v_2insn_patch, "ax"
257	.word		661b
258	SET_GL(1)
259	ldxa		[%g0] ASI_SCRATCHPAD, %g4
260	.previous
261
262	bne,pn		%xcc, tsb_do_itlb_fault
263	 nop
264
265tsb_do_dtlb_fault:
266	rdpr	%tl, %g3
267	cmp	%g3, 1
268
269661:	mov	TLB_TAG_ACCESS, %g4
270	ldxa	[%g4] ASI_DMMU, %g5
271	.section .sun4v_2insn_patch, "ax"
272	.word	661b
273	ldx	[%g4 + HV_FAULT_D_ADDR_OFFSET], %g5
274	nop
275	.previous
276
277	/* Clear context ID bits.  */
278	srlx		%g5, PAGE_SHIFT, %g5
279	sllx		%g5, PAGE_SHIFT, %g5
280
281	be,pt	%xcc, sparc64_realfault_common
282	 mov	FAULT_CODE_DTLB, %g4
283	ba,pt	%xcc, winfix_trampoline
284	 nop
285
286tsb_do_itlb_fault:
287	rdpr	%tpc, %g5
288	ba,pt	%xcc, sparc64_realfault_common
289	 mov	FAULT_CODE_ITLB, %g4
290
291	.globl	sparc64_realfault_common
292sparc64_realfault_common:
293	/* fault code in %g4, fault address in %g5, etrap will
294	 * preserve these two values in %l4 and %l5 respectively
295	 */
296	ba,pt	%xcc, etrap			! Save trap state
2971:	 rd	%pc, %g7			! ...
298	stb	%l4, [%g6 + TI_FAULT_CODE]	! Save fault code
299	stx	%l5, [%g6 + TI_FAULT_ADDR]	! Save fault address
300	call	do_sparc64_fault		! Call fault handler
301	 add	%sp, PTREGS_OFF, %o0		! Compute pt_regs arg
302	ba,pt	%xcc, rtrap			! Restore cpu state
303	 nop					! Delay slot (fill me)
304
305winfix_trampoline:
306	rdpr	%tpc, %g3			! Prepare winfixup TNPC
307	or	%g3, 0x7c, %g3			! Compute branch offset
308	wrpr	%g3, %tnpc			! Write it into TNPC
309	done					! Trap return
310
311	/* Insert an entry into the TSB.
312	 *
313	 * %o0: TSB entry pointer (virt or phys address)
314	 * %o1: tag
315	 * %o2:	pte
316	 */
317	.align	32
318	.globl	__tsb_insert
319__tsb_insert:
320	rdpr	%pstate, %o5
321	wrpr	%o5, PSTATE_IE, %pstate
322	TSB_LOCK_TAG(%o0, %g2, %g3)
323	TSB_WRITE(%o0, %o2, %o1)
324	wrpr	%o5, %pstate
325	retl
326	 nop
327	.size	__tsb_insert, .-__tsb_insert
328
329	/* Flush the given TSB entry if it has the matching
330	 * tag.
331	 *
332	 * %o0: TSB entry pointer (virt or phys address)
333	 * %o1:	tag
334	 */
335	.align	32
336	.globl	tsb_flush
337	.type	tsb_flush,#function
338tsb_flush:
339	sethi	%hi(TSB_TAG_LOCK_HIGH), %g2
3401:	TSB_LOAD_TAG(%o0, %g1)
341	srlx	%g1, 32, %o3
342	andcc	%o3, %g2, %g0
343	bne,pn	%icc, 1b
344	 nop
345	cmp	%g1, %o1
346	mov	1, %o3
347	bne,pt	%xcc, 2f
348	 sllx	%o3, TSB_TAG_INVALID_BIT, %o3
349	TSB_CAS_TAG(%o0, %g1, %o3)
350	cmp	%g1, %o3
351	bne,pn	%xcc, 1b
352	 nop
3532:	retl
354	 nop
355	.size	tsb_flush, .-tsb_flush
356
357	/* Reload MMU related context switch state at
358	 * schedule() time.
359	 *
360	 * %o0: page table physical address
361	 * %o1:	TSB base config pointer
362	 * %o2:	TSB huge config pointer, or NULL if none
363	 * %o3:	Hypervisor TSB descriptor physical address
364	 * %o4: Secondary context to load, if non-zero
365	 *
366	 * We have to run this whole thing with interrupts
367	 * disabled so that the current cpu doesn't change
368	 * due to preemption.
369	 */
370	.align	32
371	.globl	__tsb_context_switch
372	.type	__tsb_context_switch,#function
373__tsb_context_switch:
374	rdpr	%pstate, %g1
375	wrpr	%g1, PSTATE_IE, %pstate
376
377	brz,pn	%o4, 1f
378	 mov	SECONDARY_CONTEXT, %o5
379
380661:	stxa	%o4, [%o5] ASI_DMMU
381	.section .sun4v_1insn_patch, "ax"
382	.word	661b
383	stxa	%o4, [%o5] ASI_MMU
384	.previous
385	flush	%g6
386
3871:
388	TRAP_LOAD_TRAP_BLOCK(%g2, %g3)
389
390	stx	%o0, [%g2 + TRAP_PER_CPU_PGD_PADDR]
391
392	ldx	[%o1 + TSB_CONFIG_REG_VAL], %o0
393	brz,pt	%o2, 1f
394	 mov	-1, %g3
395
396	ldx	[%o2 + TSB_CONFIG_REG_VAL], %g3
397
3981:	stx	%g3, [%g2 + TRAP_PER_CPU_TSB_HUGE]
399
400	sethi	%hi(tlb_type), %g2
401	lduw	[%g2 + %lo(tlb_type)], %g2
402	cmp	%g2, 3
403	bne,pt	%icc, 50f
404	 nop
405
406	/* Hypervisor TSB switch. */
407	mov	SCRATCHPAD_UTSBREG1, %o5
408	stxa	%o0, [%o5] ASI_SCRATCHPAD
409	mov	SCRATCHPAD_UTSBREG2, %o5
410	stxa	%g3, [%o5] ASI_SCRATCHPAD
411
412	mov	2, %o0
413	cmp	%g3, -1
414	move	%xcc, 1, %o0
415
416	mov	HV_FAST_MMU_TSB_CTXNON0, %o5
417	mov	%o3, %o1
418	ta	HV_FAST_TRAP
419
420	/* Finish up.  */
421	ba,pt	%xcc, 9f
422	 nop
423
424	/* SUN4U TSB switch.  */
42550:	mov	TSB_REG, %o5
426	stxa	%o0, [%o5] ASI_DMMU
427	membar	#Sync
428	stxa	%o0, [%o5] ASI_IMMU
429	membar	#Sync
430
4312:	ldx	[%o1 + TSB_CONFIG_MAP_VADDR], %o4
432	brz	%o4, 9f
433	 ldx	[%o1 + TSB_CONFIG_MAP_PTE], %o5
434
435	sethi	%hi(sparc64_highest_unlocked_tlb_ent), %g2
436	mov	TLB_TAG_ACCESS, %g3
437	lduw	[%g2 + %lo(sparc64_highest_unlocked_tlb_ent)], %g2
438	stxa	%o4, [%g3] ASI_DMMU
439	membar	#Sync
440	sllx	%g2, 3, %g2
441	stxa	%o5, [%g2] ASI_DTLB_DATA_ACCESS
442	membar	#Sync
443
444	brz,pt	%o2, 9f
445	 nop
446
447	ldx	[%o2 + TSB_CONFIG_MAP_VADDR], %o4
448	ldx	[%o2 + TSB_CONFIG_MAP_PTE], %o5
449	mov	TLB_TAG_ACCESS, %g3
450	stxa	%o4, [%g3] ASI_DMMU
451	membar	#Sync
452	sub	%g2, (1 << 3), %g2
453	stxa	%o5, [%g2] ASI_DTLB_DATA_ACCESS
454	membar	#Sync
455
4569:
457	wrpr	%g1, %pstate
458
459	retl
460	 nop
461	.size	__tsb_context_switch, .-__tsb_context_switch
462
463#define TSB_PASS_BITS	((1 << TSB_TAG_LOCK_BIT) | \
464			 (1 << TSB_TAG_INVALID_BIT))
465
466	.align	32
467	.globl	copy_tsb
468	.type	copy_tsb,#function
469copy_tsb:		/* %o0=old_tsb_base, %o1=old_tsb_size
470			 * %o2=new_tsb_base, %o3=new_tsb_size
471			 * %o4=page_size_shift
472			 */
473	sethi		%uhi(TSB_PASS_BITS), %g7
474	srlx		%o3, 4, %o3
475	add		%o0, %o1, %o1	/* end of old tsb */
476	sllx		%g7, 32, %g7
477	sub		%o3, 1, %o3	/* %o3 == new tsb hash mask */
478
479	mov		%o4, %g1	/* page_size_shift */
480
481661:	prefetcha	[%o0] ASI_N, #one_read
482	.section	.tsb_phys_patch, "ax"
483	.word		661b
484	prefetcha	[%o0] ASI_PHYS_USE_EC, #one_read
485	.previous
486
48790:	andcc		%o0, (64 - 1), %g0
488	bne		1f
489	 add		%o0, 64, %o5
490
491661:	prefetcha	[%o5] ASI_N, #one_read
492	.section	.tsb_phys_patch, "ax"
493	.word		661b
494	prefetcha	[%o5] ASI_PHYS_USE_EC, #one_read
495	.previous
496
4971:	TSB_LOAD_QUAD(%o0, %g2)		/* %g2/%g3 == TSB entry */
498	andcc		%g2, %g7, %g0	/* LOCK or INVALID set? */
499	bne,pn		%xcc, 80f	/* Skip it */
500	 sllx		%g2, 22, %o4	/* TAG --> VADDR */
501
502	/* This can definitely be computed faster... */
503	srlx		%o0, 4, %o5	/* Build index */
504	and		%o5, 511, %o5	/* Mask index */
505	sllx		%o5, %g1, %o5	/* Put into vaddr position */
506	or		%o4, %o5, %o4	/* Full VADDR. */
507	srlx		%o4, %g1, %o4	/* Shift down to create index */
508	and		%o4, %o3, %o4	/* Mask with new_tsb_nents-1 */
509	sllx		%o4, 4, %o4	/* Shift back up into tsb ent offset */
510	TSB_STORE(%o2 + %o4, %g2)	/* Store TAG */
511	add		%o4, 0x8, %o4	/* Advance to TTE */
512	TSB_STORE(%o2 + %o4, %g3)	/* Store TTE */
513
51480:	add		%o0, 16, %o0
515	cmp		%o0, %o1
516	bne,pt		%xcc, 90b
517	 nop
518
519	retl
520	 nop
521	.size		copy_tsb, .-copy_tsb
522
523	/* Set the invalid bit in all TSB entries.  */
524	.align		32
525	.globl		tsb_init
526	.type		tsb_init,#function
527tsb_init:		/* %o0 = TSB vaddr, %o1 = size in bytes */
528	prefetch	[%o0 + 0x000], #n_writes
529	mov		1, %g1
530	prefetch	[%o0 + 0x040], #n_writes
531	sllx		%g1, TSB_TAG_INVALID_BIT, %g1
532	prefetch	[%o0 + 0x080], #n_writes
5331:	prefetch	[%o0 + 0x0c0], #n_writes
534	stx		%g1, [%o0 + 0x00]
535	stx		%g1, [%o0 + 0x10]
536	stx		%g1, [%o0 + 0x20]
537	stx		%g1, [%o0 + 0x30]
538	prefetch	[%o0 + 0x100], #n_writes
539	stx		%g1, [%o0 + 0x40]
540	stx		%g1, [%o0 + 0x50]
541	stx		%g1, [%o0 + 0x60]
542	stx		%g1, [%o0 + 0x70]
543	prefetch	[%o0 + 0x140], #n_writes
544	stx		%g1, [%o0 + 0x80]
545	stx		%g1, [%o0 + 0x90]
546	stx		%g1, [%o0 + 0xa0]
547	stx		%g1, [%o0 + 0xb0]
548	prefetch	[%o0 + 0x180], #n_writes
549	stx		%g1, [%o0 + 0xc0]
550	stx		%g1, [%o0 + 0xd0]
551	stx		%g1, [%o0 + 0xe0]
552	stx		%g1, [%o0 + 0xf0]
553	subcc		%o1, 0x100, %o1
554	bne,pt		%xcc, 1b
555	 add		%o0, 0x100, %o0
556	retl
557	 nop
558	nop
559	nop
560	.size		tsb_init, .-tsb_init
561
562	.globl		NGtsb_init
563	.type		NGtsb_init,#function
564NGtsb_init:
565	rd		%asi, %g2
566	mov		1, %g1
567	wr		%g0, ASI_BLK_INIT_QUAD_LDD_P, %asi
568	sllx		%g1, TSB_TAG_INVALID_BIT, %g1
5691:	stxa		%g1, [%o0 + 0x00] %asi
570	stxa		%g1, [%o0 + 0x10] %asi
571	stxa		%g1, [%o0 + 0x20] %asi
572	stxa		%g1, [%o0 + 0x30] %asi
573	stxa		%g1, [%o0 + 0x40] %asi
574	stxa		%g1, [%o0 + 0x50] %asi
575	stxa		%g1, [%o0 + 0x60] %asi
576	stxa		%g1, [%o0 + 0x70] %asi
577	stxa		%g1, [%o0 + 0x80] %asi
578	stxa		%g1, [%o0 + 0x90] %asi
579	stxa		%g1, [%o0 + 0xa0] %asi
580	stxa		%g1, [%o0 + 0xb0] %asi
581	stxa		%g1, [%o0 + 0xc0] %asi
582	stxa		%g1, [%o0 + 0xd0] %asi
583	stxa		%g1, [%o0 + 0xe0] %asi
584	stxa		%g1, [%o0 + 0xf0] %asi
585	subcc		%o1, 0x100, %o1
586	bne,pt		%xcc, 1b
587	 add		%o0, 0x100, %o0
588	membar		#Sync
589	retl
590	 wr		%g2, 0x0, %asi
591	.size		NGtsb_init, .-NGtsb_init
592