xref: /titanic_50/usr/src/uts/sun4u/starcat/ml/drmach_asm.s (revision dfb96a4f56fb431b915bc67e5d9d5c8d4f4f6679)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#pragma ident	"%Z%%M%	%I%	%E% SMI"
27
28/*
29 * This file is through cpp before being used as
30 * an inline.  It contains support routines used
31 * only by DR.
32 */
33
34#if defined(lint)
35#include <sys/types.h>
36#else
37#include "assym.h"
38#endif /* lint */
39
40#include <sys/asm_linkage.h>
41#include <sys/clock.h>
42#include <sys/param.h>
43#include <sys/privregs.h>
44#include <sys/machasi.h>
45#include <sys/mmu.h>
46#include <sys/machthread.h>
47#include <sys/pte.h>
48#include <sys/stack.h>
49#include <sys/vis.h>
50#include <sys/cheetahregs.h>
51#include <sys/cmpregs.h>
52#include <sys/intreg.h>
53#include <sys/cheetahasm.h>
54
55#if defined(lint)
56
57/*ARGSUSED*/
58void
59drmach_shutdown_asm(uint64_t estack, uint64_t flushaddr, int size)
60{}
61
62/*ARGSUSED*/
63void
64drmach_rename(uint64_t *script, uint_t *err, uint64_t *id)
65{}
66
67void
68drmach_rename_end(void)
69{}
70
71/*ARGSUSED*/
72void
73drmach_rename_wait(uint64_t not_used_0, uint64_t not_used_1)
74{
75}
76
77/*ARGSUSED*/
78void
79drmach_rename_done(uint64_t not_used_0, uint64_t not_used_1)
80{
81}
82
83/*ARGSUSED*/
84void
85drmach_rename_abort(uint64_t not_used_0, uint64_t not_used_1)
86{
87}
88
89/*ARGSUSED*/
90uint64_t
91lddsafconfig(uint64_t physaddr)
92{
93	return (0x0ull);
94}
95
96/* ARGSUSED */
97uint32_t
98drmach_bc_bzero(void *addr, size_t size)
99{
100	return (0x0);
101}
102
103#else /* lint */
104
105#define BUS_SYNC(reg1, reg2)					\
1061:								;\
107	ldx	[reg1], reg2					;\
108	brz,pn	reg2, 2f					;\
109	add	reg1, 8, reg1					;\
110	ldxa	[reg2]ASI_MEM, %g0				;\
111	ba,a	1b						;\
112	nop							;\
1132:
114
115#define LOAD_MB(cpuid, mb_data, reg1)				\
116	set	drmach_xt_mb, reg1				;\
117	ldx	[reg1], reg1					;\
118	add	reg1, cpuid, reg1				;\
119	ldub	[reg1], mb_data					;\
120	stub	%g0, [reg1]
121
122#define LPA_MASK 0x7ff8
123
124#define SET_LPA(cmd, reg1, reg2)				\
125	btst	0x80, cmd					;\
126	bz	2f						;\
127	nop							;\
128	btst	0x40, cmd					;\
129	bnz,a	1f						;\
130	mov	%g0, cmd					;\
131	and	cmd, 0x1f, cmd					;\
132	sllx	cmd, 3, reg1					;\
133	add	cmd, 1, cmd					;\
134	sllx	cmd, 9, cmd					;\
135	or	cmd, reg1, cmd					;\
1361:								;\
137	set	LPA_MASK, reg2					;\
138	ldxa	[%g0]ASI_SAFARI_CONFIG, reg1			;\
139	and	cmd, reg2, cmd					;\
140	andn	reg1, reg2, reg1				;\
141	or	reg1, cmd, reg1					;\
142	stxa	reg1, [%g0]ASI_SAFARI_CONFIG			;\
143	membar	#Sync						;\
1442:								;\
145
146#define SET_NULL_LPA(reg1, reg2)				\
147	set	LPA_MASK, reg2					;\
148	ldxa	[%g0]ASI_SAFARI_CONFIG, reg1			;\
149	andn	reg1, reg2, reg1				;\
150	stxa	reg1, [%g0]ASI_SAFARI_CONFIG			;\
151	membar	#Sync						;\
152
153	! ATOMIC_ADD_LONG
154	! This code is run at TL > 0, being exec'd via a cross trap.
155	! While running at trap level > 0, all memory accesses are
156	! performed using NUCLEUS context, which is always 0.
157	! Since the cross trap handler does not force PRIMARY context
158	! to be zero, the following casxa instruction must specify
159	! NUCLEUS ASI.
160	! This ASI must be specified explicitly (via casxa), rather
161	! than using casx. This is because of the fact that the
162	! default casx specifies ASI_PRIMARY, which if non-zero, can
163	! prevent the cpu from translating the address, leading to panic
164	! on bad trap following repetitive dtlb misses.  This behavior
165	! was encountered on MCPUs when using casx instruction.
166#define ATOMIC_ADD_LONG(label, simm, reg1, reg2, reg3)		\
167	set	label, reg1					;\
168	ldx	[reg1], reg2					;\
1691:								;\
170	add	reg2, simm, reg3				;\
171	casxa	[reg1]ASI_N, reg2, reg3				;\
172	cmp	reg2, reg3					;\
173	bne,a,pn %xcc, 1b					;\
174	ldx	[reg1], reg2
175
176#define HERE(reg1, simm, reg2)					\
177	rdpr	%tick, reg2					;\
178	stx	reg2, [reg1 + simm]
179
180	!
181	! Returns processor icache size and linesize in reg1 and
182	! reg2, respectively.
183	!
184	! Panther has a larger icache compared to Cheetahplus and
185	! Jaguar.
186	!
187#define	GET_ICACHE_PARAMS(reg1, reg2)				\
188	GET_CPU_IMPL(reg1)					;\
189	cmp	reg1, PANTHER_IMPL				;\
190	bne	%xcc, 1f					;\
191	  nop							;\
192	set	PN_ICACHE_SIZE, reg1				;\
193	set	PN_ICACHE_LSIZE, reg2				;\
194	ba	2f						;\
195	  nop							;\
1961:								;\
197	set	CH_ICACHE_SIZE, reg1				;\
198	set	CH_ICACHE_LSIZE, reg2				;\
1992:
200
201#define	DRMACH_MCU_IDLE_READS	3
202
203	! Macro to check if a Panther MC is idle.  The EMU Activity
204	! Status register is first read to clear the MCU status bit.
205	! The MCU status is then checked DRMACH_MCU_IDLE_READS times
206	! to verify the MCU is indeed idle.  A single non-idle status
207	! will fail the idle check.  This could be made more lenient
208	! by adding a retry loop.
209	!	addr:	Panther EMU Activity Status register read address.
210	!		Assumed to be 0x18 for local ASI access or else
211	!		FIREPLANE_ADDRESS_REG + 0x400050 for PIO access.
212	!		0 is returned in this register if MCU is idle and
213	!		queues are empty.  Otherwise, -1 is returned in this
214	!		register.
215	!	asi:	Immediate asi value.  Assumed to be ASI_SAFARI_CONFIG
216	!		for local ASI or ASI_IO for PIO access.
217	!	scr1:	Scratch
218	!	scr2:	Scratch
219	!
220#define	CHECK_MCU_IDLE(addr, asi, scr1, scr2)			\
221	ldxa	[addr]asi, %g0					;\
222	ba	1f						;\
223	  clr	scr2						;\
2240:								;\
225	btst	MCU_ACT_STATUS, scr1				;\
226	bne,a	2f						;\
227	  sub	%g0, 1, addr					;\
228	inc	scr2						;\
2291:								;\
230	cmp	scr2, DRMACH_MCU_IDLE_READS			;\
231	ble,a	0b						;\
232	  ldxa    [addr]asi, scr1				;\
233	clr	addr						;\
2342:
235
236	! drmach_shutdown_asm
237	!
238	! inputs:
239	!	%o0 = stack pointer
240	!	%o1 = ecache flush address (ignored if cheetah+ processor)
241	!	%o2 = ecache size
242	!	%o3 = ecache line size
243	!	%o4 = phys addr of byte to clear when finished
244	!
245	! output:
246	!	Stores a zero at [%o4]ASI_MEM when the processor
247	!	is ready to be removed from domain coherency.
248	!
249	ENTRY_NP(drmach_shutdown_asm)
250	membar	#LoadStore		! parsley.
251
252	! Calculate pointer to data area. Determine size of
253	! drmach_shutdown_asm, add to base address and align
254	! to next 16 byte boundary. Leave result in %g6.
255	set	drmach_shutdown_asm_end, %g6
256	set	drmach_shutdown_asm, %g1
257	set	drmach_cpu_sram_va, %g2
258	ldx	[%g2], %g2
259	sub	%g6, %g1, %g6
260	add	%g6, %g2, %g6
261	add	%g6, 15, %g6
262	andn	%g6, 15, %g6
263
264	! Save parameters
265	stx	%o0, [%g6 + 0]		! save stack pointer
266	stx	%o1, [%g6 + 24]		! save E$ flush PA
267	st	%o2, [%g6 + 32]		! save E$ size
268	st	%o3, [%g6 + 36]		! save E$ linesize
269	stx	%o4, [%g6 + 40]		! save phys addr of signal byte
270
271	set	dcache_size, %g1
272	ld	[%g1], %g1
273	st	%g1, [%g6 + 8]		! save dcache_size
274	set	dcache_linesize, %g1
275	ld	[%g1], %g1
276	st	%g1, [%g6 + 12]		! save dcache_linesize
277
278	GET_ICACHE_PARAMS(%g1, %g2)
279	st	%g1, [%g6 + 16]		! save icache_size
280	st	%g2, [%g6 + 20]		! save icache_linesize
281
282	! Flushes all active windows except the current one.
283	! Can cause spill traps to occur.
284	flushw
285
286	! Make sure all asynchronous processing is complete.
287	! Note: has no implications on pending bus transactions.
288	membar	#Sync
289
290	! Move stack. Algorithm copied from t0stacktop setup of
291	! %sp in sun4u/ml/locore.s
292	! Replaces SWITCH_STACK() macro used in Starfire DR.
293	ldx	[%g6 + 0], %g1
294	sub	%g1, SA(KFPUSIZE+GSR_SIZE), %g2
295 	and	%g2, 0x3f, %g3
296 	sub	%g2, %g3, %o2
297 	sub	%o2, SA(MPCBSIZE) + STACK_BIAS, %sp
298	stx	%sp, [%g6 + 48]		! for debug
299
300	HERE(%g6, 128, %g1)		! initialization complete (for debug)
301
302	! Panther needs to flush the L2 cache before the L3
303	! cache is flushed by the ecache flushall macro.
304	PN_L2_FLUSHALL(%g1, %g2, %g3)
305
306	! Flush E$. The purpose of this flush is to rid the E$ of
307	! lines in states O or Os. Implicitly flushes W$.
308	ldx	[%g6 + 24], %g1		! *ecache_flushaddr
309	ld	[%g6 + 32], %g2		! ecache_size
310	ld	[%g6 + 36], %g3		! ecache_linesize
311	ECACHE_FLUSHALL(%g2, %g3, %g1, %g4)
312
313	! Since the bus sync list read below does not guarantee
314	! transaction completion on Panther domains, as an
315	! optimization Panther skips the read and subsequent
316	! E$ flush.
317	GET_CPU_IMPL(%g1)
318	cmp	%g1, PANTHER_IMPL
319	be	%xcc, drmach_shutdown_ecache_flushed
320	  nop
321
322	!
323	! Ensure all outstanding writebacks have retired.  Following this
324	! sync, all writes must be strictly managed.
325	!
326	set	drmach_bus_sync_list, %g1
327	BUS_SYNC(%g1, %g2)
328
329	! Flush E$ again to victimize references to drmach_bus_sync_list.
330	ldx     [%g6 + 24], %g1         ! *ecache_flushaddr
331	ld	[%g6 + 32], %g2		! ecache_size
332	ld	[%g6 + 36], %g3		! ecache_linesize
333	ECACHE_FLUSHALL(%g2, %g3, %g1, %g4)
334
335drmach_shutdown_ecache_flushed:
336
337	ld	[%g6 + 8], %g1		! flush dcache
338	ld	[%g6 + 12], %g2
339	CH_DCACHE_FLUSHALL(%g1, %g2, %g3)
340
341	ld	[%g6 + 16], %g1		! flush icache
342	ld	[%g6 + 20], %g2
343	CH_ICACHE_FLUSHALL(%g1, %g2, %g3, %g4)
344
345	PCACHE_FLUSHALL(%g1, %g2, %g3) ! flush pcache (no parameters)
346
347	!
348	! Flush all unlocked dtlb and itlb entries.
349	! Replaces TLB_FLUSH_UNLOCKED macro used in Starfire DR.
350	!
351	sethi	%hi(FLUSH_ADDR), %g1
352	set	DEMAP_ALL_TYPE, %g2
353	stxa	%g0, [%g2]ASI_DTLB_DEMAP
354	stxa	%g0, [%g2]ASI_ITLB_DEMAP
355	flush	%g1
356
357	!
358	! Zero LPA by clearing CBASE and CBND. Following
359	! this, all transactions to cachable address space
360	! will be of the remote flavor.
361	!
362	SET_NULL_LPA(%g1, %g2)
363
364	HERE(%g6, 136, %g1)		! preparation complete (for debug)
365
366	!
367	! Clear byte to signal finished.
368	! NOTE: This store will allocate in the E$. It is
369	! vitally important that this line is demoted to
370	! state I before removing this processor from the
371	! coherency.  The demotion is ensured by a synchronous
372	! "steal back" that takes place in drmach_cpu_poweroff.
373	ldx	[%g6 + 40], %g1
374	stba	%g0, [%g1]ASI_MEM
3755:
376	HERE(%g6, 144, %g1)		! spin indicator (for debug)
377	ba	5b
378	  nop
379
380	.asciz	"drmach_shutdown_asm"		! for debug
381	.align	4
382	.global	drmach_shutdown_asm_end
383drmach_shutdown_asm_end:
384	SET_SIZE(drmach_shutdown_asm)
385
386
387	! lddsafconfig
388	!
389	! input:
390	!	nothing
391	!
392	! output:
393	!	%o0	content of this processor's SCR
394	!
395	!	Returns current value of this processor's Safari
396	!	Configuration Register.
397	!
398	ENTRY(lddsafconfig)
399        retl
400        ldxa    [%g0]ASI_SAFARI_CONFIG, %o0
401        SET_SIZE(lddsafconfig)
402
403	! drmach_rename
404	!
405	! input:
406	!	%o0	pointer to register address/value compound list
407	!	%o1	address for setting error code if rename did not
408	!		complete.  Unmodified if no error.
409	!	%o2	address for returning opaque memory controller id
410	!		in case of error.  Unmodified if no error.
411	!	Global	drmach_xt_mb[cpuid] is expected to be the new LPA.
412	!
413	! output:
414	!	[%o1] =	1 if failed to idle memory controller, otherwise unmodified.
415	!	[%o2] = id of failed memory controller, otherwise unmodified.
416	!
417	! Perform HW register reprogramming. This is the "rename" step for
418	! the copy-rename process.  drmach_rename is copied to a cpu's sram
419	! followed by register address/value pairs -- the text and data are
420	! sourced from the sram while drmach_rename is executed.
421	!
422	! The parameter is assumed to point to a concatenation of six
423	! zero-terminated lists located in non-cachable storage. The assumed
424	! format (and purpose) of each list is as follows:
425	!
426	!	1) a copy of drmach_bus_sync_list. A list of PA for each
427	!	   active memory bank in the domain. Used to infer the
428	!	   the completion of all pending coherent transactions
429	!	   initiated by this processor. Assumes MC work queue
430	!	   does not implement read bypass. This is true of Cheetah,
431	!	   Cheetah+, and Jaguar processors.  Panther does support
432	!	   read bypass, so for Panther MCs with read-bypass-write
433	!	   enabled, the read is issued but it does not guarantee
434	!	   completion of outstanding writes in the MC queue.
435	!	2) address/id pair for the local Panther EMU Activity Status
436	!	   Register of this processor.  The register address is assumed
437	!	   to be a VA which is polled via ASI_SAFARI_CONFIG until the
438	!	   MC queues are empty.  The id is an opaque identifier which
439	!	   must be returned along with an error code if the MCU status
440	!	   does not go idle.  See the parameter description above.
441	!	   This section will be empty if this processor is not a Panther.
442	!	   Both the address and id are assumed to be 64 bit values.
443	!	3) address/id pairs for non-local Panther EMU Activity Status
444	!	   Registers on other source and target processors.  The register
445	!	   address is assumed to be a PIO address which is polled via
446	!	   ASI_IO to drain/idle the MCs on other Panther procs.  The
447	!	   id is an opaque identifier which must be returned along with
448	!	   an error code if a MC fails to go idle.  This section will
449	!	   empty if there are no non-local Panther processors on the
450	!	   source and target expanders.  Both the address and id are
451	!	   assumed to be 64 bit values.
452	!	4) address/value pairs for the Memory Address Decoder
453	!	   register of this processor. The register address is
454	!	   assumed to be a VA within ASM_MC_DECODE space. The
455	!	   address and value elements are assumed to 64 bit values.
456	!	5) address/value pairs for any 64 bit register accessible
457	!	   via ASI_IO. The address and value fields are assumed to
458	!	   be 64 bit values.
459	!	   This list is typically used for reprogramming the Memory
460	!	   Address Decoder Register of other cpus and for reprogram-
461	!	   ming the Safari Configuration Register of I/O controllers.
462	!	6) address/value pairs for any 32 bit register accessible
463	!	   via ASI_IO. The address element is assumed to be a 64 bit
464	!	   value. The value element is assumed to be a 64 bit word
465	!	   containing a 32 bit value in the lower half.
466	!	   This list typically contains address/value pairs for
467	!	   AXQ CASM tables.
468	!
469	ENTRY_NP(drmach_rename)
470
471	mov	%o1, %o4		! save error code address
472	mov	%o2, %o5		! save error id address
473
474	BUS_SYNC(%o0, %o1)		! run section 1
475
476	SET_NULL_LPA(%o1, %o2)		! prep for cachable transactions
477					! after rename completes.
478					! e.g.: the load_mb that occurs below
4793:
480	ldx	[%o0], %o1		! run section 2
481	brz,a,pn %o1, 4f
482	add	%o0, 8, %o0		! skip section 2 terminator
483	CHECK_MCU_IDLE(%o1, ASI_SAFARI_CONFIG, %o2, %o3)
484	cmp	%o1, 0			! idled?
485	be,a	3b			! ok, advance
486	  add	%o0, 16, %o0
487	mov	1, %o1			! not idle, bailout
488	stw	%o1, [%o4]		! set MC idle error code
489	ldx	[%o0 + 8], %o1
490	stx	%o1, [%o5]		! set MC idle error id
491	retl
492	  nop
4934:
494	ldx	[%o0], %o1		! run section 3
495	brz,a,pn %o1, 5f
496	add	%o0, 8, %o0		! skip section 3 terminator
497	CHECK_MCU_IDLE(%o1, ASI_IO, %o2, %o3)
498	cmp	%o1, 0			! idled?
499	be,a	4b			! ok, advance
500	  add	%o0, 16, %o0
501	mov	1, %o1			! not idle, bailout
502	stw	%o1, [%o4]		! set MC idle error code
503	ldx	[%o0 + 8], %o1
504	stx	%o1, [%o5]		! set MC idle error id
505	retl
506	  nop
5075:
508	ldx	[%o0], %o1		! run section 4
509	brz,a,pn %o1, 6f
510	add	%o0, 8, %o0		! skip section 4 terminator
511	ldx	[%o0 + 8], %o2
512	stxa	%o2, [%o1]ASI_MC_DECODE
513	membar	#Sync
514	ldxa	[%o1]ASI_MC_DECODE, %g0	! read back to insure written
515	b	5b
516	add	%o0, 16, %o0
5176:
518	ldx	[%o0], %o1		! run section 5
519	brz,a,pn %o1, 7f
520	add	%o0, 8, %o0		! skip section 5 terminator
521	ldx	[%o0 + 8], %o2
522	stxa	%o2, [%o1]ASI_IO
523	ldxa	[%o1]ASI_IO, %g0	! read back to insure written
524	b	6b
525	add	%o0, 16, %o0
5267:
527	ldx	[%o0], %o1		! run section 6
528	brz,a,pn %o1, 8f
529	nop
530	ldx	[%o0 + 8], %o2
531	stwa	%o2, [%o1]ASI_IO
532	lduwa	[%o1]ASI_IO, %g0	! read back to insure written
533	b	7b
534	add	%o0, 16, %o0
5358:
536	CPU_INDEX(%o0, %o1)
537	LOAD_MB(%o0, %o1, %o2)
538	SET_LPA(%o1, %o0, %o2)
539
540	retl
541	nop
542
543	.asciz	"drmach_rename"		! for debug
544	.align	4
545	SET_SIZE(drmach_rename)
546
547	.global drmach_rename_end
548drmach_rename_end:
549
550
551	! drmach_rename_wait
552	!
553	! input:
554	!	nothing
555	!
556	! output:
557	!	nothing
558	!
559	! drmach_rename_wait is a cross-trap function used to move a
560	! cpu's execution out of coherent space while a copy-rename
561	! operation is in progress.
562	!
563	! In each CPU SRAM exists an area (16KB on Cheetah+ boards,
564	! 32KB on Jaguar/Panther boards) reserved for DR. This area is
565	! logically divided by DR into 8KB pages, one page per CPU (or
566	! core) in a port pair. (Two Safari ports share HW resources on
567	! a CPU/MEM board. These are referred to as a port pair.)
568	!
569	! This routine begins by mapping the appropriate SRAM page,
570	! transferring the machine code (between the labels
571	! drmach_rename_wait_asm and drmach_rename_wait_asm_end), then
572	! jumping to SRAM.  After returning from SRAM, the page is
573	! demapped before the cross-call is exited (sic).
574	!
575	! The machine code flushes all caches, waits for a special
576	! interrupt vector, then updates the processor's LPA and
577	! resynchronizes caches with the new home memory.
578	!
579	! The special interrupt vector is assumed to be a cross-call to
580	! drmach_rename_done sent by the master processor upon completing
581	! the copy-rename operation. The interrupt is received and discarded;
582	! The cross-call to drmach_rename_done is never executed.  Instead
583	! the Interrupt Receive Status Register is employed, temporarily,
584	! as a semaphore. This avoids unwanted bus traffic during the critical
585	! rename operation.
586	!
587	ENTRY_NP(drmach_rename_wait)
588
589	CPU_INDEX(%g5, %g1)		! put cpuid in %g5
590
591	!
592	! sfmmu_dtlb_ld(drmach_cpu_sram_va,
593	!	KCONTEXT, drmach_cpu_sram_tte[cpuid]);
594	! sfmmu_itlb_ld(drmach_cpu_sram_va,
595	!	KCONTEXT, drmach_cpu_sram_tte[cpuid]);
596	!
597	set	drmach_cpu_sram_tte, %g1
598	sllx	%g5, 3, %g2
599	ldx	[%g1 + %g2], %g3
600	set	drmach_cpu_sram_va, %g1
601	ldx	[%g1], %g1
602	or	%g1, KCONTEXT, %g2	! preserve %g1
603	set	MMU_TAG_ACCESS, %g4
604	set	cpu_impl_dual_pgsz, %g6
605	ld      [%g6], %g6
606	brz	%g6, 1f
607	  nop
608
609	sethi	%hi(ksfmmup), %g6
610	ldx	[%g6 + %lo(ksfmmup)], %g6
611	ldub    [%g6 + SFMMU_CEXT], %g6
612        sll     %g6, TAGACCEXT_SHIFT, %g6
613
614	set	MMU_TAG_ACCESS_EXT, %g7
615	stxa	%g6, [%g7]ASI_DMMU
6161:
617	stxa	%g2, [%g4]ASI_DMMU
618	stxa    %g3, [%g0]ASI_DTLB_IN
619	membar	#Sync
620	sethi	%hi(FLUSH_ADDR), %g6
621	stxa	%g2, [%g4]ASI_IMMU
622	stxa    %g3, [%g0]ASI_ITLB_IN
623	flush	%g6
624
625	!
626	! copy drmach_rename_wait_asm block to SRAM. Preserve entry
627	! point in %g1. After the code has been copied, align %g6
628	! (the destination pointer) to the next highest 16 byte
629	! boundary. This will define the start of the data area.
630	!
631	mov	%g1, %g6
632	set	drmach_rename_wait_asm, %g2
633	set	drmach_rename_wait_asm_end, %g3
6340:
635	lduw	[%g2], %g4		! do copy
636	stw	%g4, [%g6]
637	add	%g2, 4, %g2
638	cmp	%g2, %g3
639	bne	0b
640	add	%g6, 4, %g6
641
642	add	%g6, 15, %g6		! locate data area on next 16 byte
643	andn	%g6, 15, %g6		! boundary following text
644					! WARNING: no bounds checking
645
646	jmpl	%g1, %g7		! jump to code in cpu sram
647	nop
648
649	set	drmach_cpu_sram_va, %g1	! vtab_flushpage_tl1(drmach_cpu_sram_va,
650	ldx	[%g1], %g1		! 	KCONTEXT);
651	set	KCONTEXT, %g2
652	set	MMU_PCONTEXT, %g4
653	or	%g1, DEMAP_PRIMARY | DEMAP_PAGE_TYPE, %g1
654	ldxa	[%g4]ASI_DMMU, %g5	/* rd old ctxnum */
655	stxa	%g2, [%g4]ASI_DMMU	/* wr new ctxum */
656	stxa	%g0, [%g1]ASI_DTLB_DEMAP
657	stxa	%g0, [%g1]ASI_ITLB_DEMAP
658	stxa	%g5, [%g4]ASI_DMMU	/* restore old ctxnum */
659
660	retry
661
662drmach_rename_wait_asm:
663	! the following code is copied to a cpu's sram and executed
664	! from there.
665	! Input:
666	!	%g5 is cpuid
667	!	%g6 is data area (follows text)
668	!	%g7 is link address back to caller
669	!
670	st	%g5, [%g6 + 4]		! save cpuid (for debug)
671
672	set	dcache_size, %g1
673	ld	[%g1], %g1
674	st	%g1, [%g6 + 8]		! save dcache_size
675	set	dcache_linesize, %g1
676	ld	[%g1], %g1
677	st	%g1, [%g6 + 12]		! save dcache_linesize
678
679	GET_ICACHE_PARAMS(%g1, %g2)
680	st	%g1, [%g6 + 16]		! save icache_size
681	st	%g2, [%g6 + 20]		! save icache_linesize
682
683	set	drmach_iocage_paddr, %g1
684	ldx	[%g1], %g1
685	stx	%g1, [%g6 + 24]		! save *ecache_flushadr
686
687	mulx	%g5, CPU_NODE_SIZE, %g1	! %g4 = &cpunodes[cpuid]
688	set	cpunodes, %g4
689	add	%g4, %g1, %g4
690	ld	[%g4 + ECACHE_SIZE], %g1
691	st	%g1, [%g6 + 32]		! save ecache_size
692	ld	[%g4 + ECACHE_LINESIZE], %g1
693	st	%g1, [%g6 + 36]		! save ecache_linesize
694
695	LOAD_MB(%g5, %g1, %g2)		! save mailbox data
696	stb	%g1, [%g6 + 40]
697
698	membar	#Sync			! Complete any pending processing.
699
700	! Flush E$. The purpose of this flush is to rid the E$ of
701	! lines in states O or Os. Implicitly flushes W$.
702	! NOTE: Reading the bus sync list and r/w ops on drmach_xt_ready
703	! will disturb the E$. The lines of the bus sync list will be
704	! in state S. The line containing drmach_xt_ready will be in
705	! state O. Before proceeding with the copy-rename, the master
706	! processor will "steal back" the drmach_xt_ready (sic) line.
707	! This will demote the state of the line in E$ to I.
708	! However, the lines containing the bus sync list must be
709	! victimized before returning to the OS. This is vital because
710	! following copy-rename the corresponding lines in the new home
711	! memory will be in state gM. The resulting S,gM state pair is
712	! invalid and does represent a loss of coherency. Flushing the
713	! E$ after the bus sync list is read will be sufficient to
714	! avoid the invalid condition.
715	!
716	! For Panther, there is redundancy as both cores flush the shared
717	! L2 and L3 caches.  As an optimization, only one core could do the
718	! flush of the shared caches, however care must be taken that the
719	! sibling core does not install owned lines once the flush begins.
720	PN_L2_FLUSHALL(%g1, %g2, %g3)
721	ldx	[%g6 + 24], %g1		! *ecache_flushaddr
722	ld	[%g6 + 32], %g2		! ecache_size
723	ld	[%g6 + 36], %g3		! ecache_linesize
724	ECACHE_FLUSHALL(%g2, %g3, %g1, %g4)
725
726	! Make sure all outstanding transactions for this processor
727	! have retired. See E$ note above.
728	set	drmach_bus_sync_list, %g1
729	BUS_SYNC(%g1, %g2)
730
731	HERE(%g6, 128, %g4)		! preparation complete (for debug)
732
733	! Signal this processor is ready for rename operation to begin.
734	! See E$ note above.
735	ATOMIC_ADD_LONG(drmach_xt_ready, 1, %g2, %g3, %g4)
736
737	! Loop on IRSR waiting for interrupt. The expected interrupt
738	! is a cross-trap to drmach_wait_done. It is sent by the master
739	! processor when the copy-rename operation is complete. The
740	! received cross-trap is used only as a signal. It is not executed.
7412:
742	HERE(%g6, 136, %g4)		! last poll tick (for debug)
743
744	ldxa	[%g0]ASI_INTR_RECEIVE_STATUS, %g4	! wait for xt
745	btst	IRSR_BUSY, %g4
746	bz	2b
747	nop
748	stx	%g4, [%g6 + 64]		! save status and payload
749	set	IRDR_0, %g2
750	ldxa	[%g2]ASI_INTR_RECEIVE, %g2
751	stx	%g2, [%g6 + 72]
752	set	IRDR_1, %g2
753	ldxa	[%g2]ASI_INTR_RECEIVE, %g2
754	stx	%g2, [%g6 + 80]
755	set	IRDR_2, %g2
756	ldxa	[%g2]ASI_INTR_RECEIVE, %g2
757	stx	%g2, [%g6 + 88]
758
759					! clear rcv status
760	stxa	%g0, [%g0]ASI_INTR_RECEIVE_STATUS
761	membar	#Sync
762
763	HERE(%g6, 144, %g4)		! signal rcvd tick (for debug)
764
765	! Check for copy-rename abort signal. If this signal is received,
766	! the LPA change is skipped since the rename step was not done.
767	! The cache flushes are still done as paranoia.
768	set	drmach_rename_abort, %g1
769	ldx	[%g6 + 72], %g2
770	cmp 	%g1, %g2
771	be	3f
772	nop
773
774	! Resume waiting if this is not drmach_rename_done.
775	set	drmach_rename_done, %g1
776	cmp 	%g1, %g2
777	bne	2b
778	nop
779
780	ldub	[%g6 + 40], %g1		! get saved mailbox data
781	SET_LPA(%g1, %g2, %g3)		! set LPA as indicated by the mb data
782
7833:
784	! Flush all caches (E, D, I and P) to ensure each is resynchronized
785	! with the corresponding states in the new home memory. (W$ is
786	! implicitly flushed when the E$ is flushed.)
787	!
788	! Panther needs to flush the L2 cache before the L3
789	! cache is flushed by the ecache flushall macro.
790	PN_L2_FLUSHALL(%g1, %g2, %g3)
791
792	ldx	[%g6 + 24], %g1		! *ecache_flushaddr
793	ld	[%g6 + 32], %g2		! ecache_size
794	ld	[%g6 + 36], %g3		! ecache_linesize
795	ECACHE_FLUSHALL(%g2, %g3, %g1, %g4)
796
797	ld	[%g6 + 8], %g1		! flush dcache
798	ld	[%g6 + 12], %g2
799	CH_DCACHE_FLUSHALL(%g1, %g2, %g3)
800
801	ld	[%g6 + 16], %g1		! flush icache
802	ld	[%g6 + 20], %g2
803	CH_ICACHE_FLUSHALL(%g1, %g2, %g3, %g4)
804
805	PCACHE_FLUSHALL(%g1, %g2, %g3)	! flush pcache (no parameters)
806
807	HERE(%g6, 152, %g4)		! done tick (for debug)
808
809	jmpl	%g7+8, %g0
810	nop
811
812	.asciz	"drmach_rename_wait"	! for debug
813	.align	4
814drmach_rename_wait_asm_end:
815	SET_SIZE(drmach_rename_wait)
816
817
818	! drmach_rename_done
819	!
820	! input:
821	!	nothing
822	!
823	! output:
824	!	nothing
825	!
826	! Used as signal data. See drmach_rename_wait.
827	!
828	ENTRY_NP(drmach_rename_done)
829	retry
830	SET_SIZE(drmach_rename_done)
831
832	! drmach_rename_abort
833	!
834	! input:
835	!	nothing
836	!
837	! output:
838	!	nothing
839	!
840	! Used as signal data. See drmach_rename_wait.
841	!
842	ENTRY_NP(drmach_rename_abort)
843	retry
844	SET_SIZE(drmach_rename_abort)
845
846
847	! drmach_set_lpa
848	!
849	! input:
850	!	Globals: drmach_xt_mb[cpuid] contains new LPA data
851	!
852	! output:
853	!	nothing
854	!
855	! Sets the executing processor's LPA as indicated by the command
856	! stored in drmach_xt_mb, a byte array indexed by cpuid. Assumes
857	! the caller is preventing illegal LPA settings and transistions.
858	!
859	ENTRY_NP(drmach_set_lpa)
860
861	!
862	! Set %g1 to this processor's cpuid.
863	!
864	CPU_INDEX(%g1, %g2)
865
866	!
867	! Get LPA message from mailbox, leave in %g5.
868	!
869	LOAD_MB(%g1, %g5, %g2)
870
871	!
872	! Set LPA, mailbox data in %g5.
873	!
874	SET_LPA(%g5, %g1, %g2)
875
876	!
877	! Signal work is done.
878	!
879	ATOMIC_ADD_LONG(drmach_xt_ready, 1, %g1, %g2, %g3)
880
881	retry
882	SET_SIZE(drmach_set_lpa)
883
884!
885! drmach_bc_bzero
886!
887! inputs:
888! 	%o0 = base vaddr of area to clear (must be 64-byte aligned)
889!	%o1 = size of area to clear (must be multiple of 256 bytes)
890!
891! outputs:
892!	%o0 =
893!		0 (success)
894!		1 (size too small or not modulo 256)
895!		2 (vaddr not 64-byte aligned)
896!
897! Zero a block of storage using block commit stores.
898! Nonzero return if caller's address or size are not
899! block aligned.
900!
901
902
903	ENTRY(drmach_bc_bzero)
904
905	! verify size is >= 256 bytes
906	cmp	%o1, 256
907	blu,a	.bz_done
908	mov	1, %o0			! error code 1 for invalid size
909
910	! verify size is a multiple of 256
911	btst	(256-1), %o1
912	bnz,a	.bz_done
913	mov	1, %o0			! error code 1 for invalid size
914
915	! verify that vaddr is aligned for block stores
916	btst	(64-1), %o0
917	bnz,a	.bz_done
918	mov	2, %o0			! error code 2 for invalid alignment
919
920	! save fprs for restore when finished
921	rd	%fprs, %g1
922
923	! make sure FPU is enabled
924	rdpr	%pstate, %g3
925	btst	PSTATE_PEF, %g3
926	bnz	.bz_block
927	nop
928	andn	%g3, PSTATE_PEF, %g4
929	wrpr	%g4, PSTATE_PEF, %pstate
930
931.bz_block:
932	membar	#StoreStore|#StoreLoad|#LoadStore
933	wr	%g0, FPRS_FEF, %fprs
934
935	! Clear block
936	fzero	%d0
937	fzero	%d2
938	fzero	%d4
939	fzero	%d6
940	fzero	%d8
941	fzero	%d10
942	fzero	%d12
943	fzero	%d14
944	wr	%g0, ASI_BLK_COMMIT_P, %asi
945	mov	256, %o3
946	ba	.bz_doblock
947	nop
948
949.bz_blkstart:
950      ! stda	%d0, [%o0+192]%asi  ! in dly slot of branch that got us here
951	stda	%d0, [%o0+128]%asi
952	stda	%d0, [%o0+64]%asi
953	stda	%d0, [%o0]%asi
954	add	%o0, %o3, %o0
955	sub	%o1, %o3, %o1
956.bz_doblock:
957	cmp	%o1, 256
958	bgeu,a	%ncc, .bz_blkstart
959	stda	%d0, [%o0+192]%asi
960
961.bz_finish:
962	membar	#StoreLoad|#StoreStore
963	clr	%o0
964	wr	%g1, %fprs		! restore fprs
965	btst	PSTATE_PEF, %g3		! restore pstate if necessary
966	bnz	.bz_done
967	nop
968	wrpr	%g3, %g0, %pstate
969.bz_done:
970	membar	#Sync
971	retl
972	nop
973
974	SET_SIZE(drmach_bc_bzero)
975
976#endif /* lint */
977