xref: /titanic_52/usr/src/uts/sun4u/starcat/ml/drmach_asm.s (revision f498645a3eecf2ddd304b4ea9c7f1b4c155ff79e)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License").  You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22/*
23 * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#pragma ident	"%Z%%M%	%I%	%E% SMI"
28
29/*
30 * This file is through cpp before being used as
31 * an inline.  It contains support routines used
32 * only by DR.
33 */
34
35#if defined(lint)
36#include <sys/types.h>
37#else
38#include "assym.h"
39#endif /* lint */
40
41#include <sys/asm_linkage.h>
42#include <sys/clock.h>
43#include <sys/param.h>
44#include <sys/privregs.h>
45#include <sys/machasi.h>
46#include <sys/mmu.h>
47#include <sys/machthread.h>
48#include <sys/pte.h>
49#include <sys/stack.h>
50#include <sys/vis.h>
51#include <sys/cheetahregs.h>
52#include <sys/cmpregs.h>
53#include <sys/intreg.h>
54#include <sys/cheetahasm.h>
55
56#if defined(lint)
57
58/*ARGSUSED*/
59void
60drmach_shutdown_asm(uint64_t estack, uint64_t flushaddr, int size)
61{}
62
63/*ARGSUSED*/
64void
65drmach_rename(uint64_t *script, uint_t *err, uint64_t *id)
66{}
67
68void
69drmach_rename_end(void)
70{}
71
72/*ARGSUSED*/
73void
74drmach_rename_wait(uint64_t not_used_0, uint64_t not_used_1)
75{
76}
77
78/*ARGSUSED*/
79void
80drmach_rename_done(uint64_t not_used_0, uint64_t not_used_1)
81{
82}
83
84/*ARGSUSED*/
85void
86drmach_rename_abort(uint64_t not_used_0, uint64_t not_used_1)
87{
88}
89
90/*ARGSUSED*/
91uint64_t
92lddsafconfig(uint64_t physaddr)
93{
94	return (0x0ull);
95}
96
97/* ARGSUSED */
98uint32_t
99drmach_bc_bzero(void *addr, size_t size)
100{
101	return (0x0);
102}
103
104#else /* lint */
105
106#define BUS_SYNC(reg1, reg2)					\
1071:								;\
108	ldx	[reg1], reg2					;\
109	brz,pn	reg2, 2f					;\
110	add	reg1, 8, reg1					;\
111	ldxa	[reg2]ASI_MEM, %g0				;\
112	ba,a	1b						;\
113	nop							;\
1142:
115
116#define LOAD_MB(cpuid, mb_data, reg1)				\
117	set	drmach_xt_mb, reg1				;\
118	ldx	[reg1], reg1					;\
119	add	reg1, cpuid, reg1				;\
120	ldub	[reg1], mb_data					;\
121	stub	%g0, [reg1]
122
123#define LPA_MASK 0x7ff8
124
125#define SET_LPA(cmd, reg1, reg2)				\
126	btst	0x80, cmd					;\
127	bz	2f						;\
128	nop							;\
129	btst	0x40, cmd					;\
130	bnz,a	1f						;\
131	mov	%g0, cmd					;\
132	and	cmd, 0x1f, cmd					;\
133	sllx	cmd, 3, reg1					;\
134	add	cmd, 1, cmd					;\
135	sllx	cmd, 9, cmd					;\
136	or	cmd, reg1, cmd					;\
1371:								;\
138	set	LPA_MASK, reg2					;\
139	ldxa	[%g0]ASI_SAFARI_CONFIG, reg1			;\
140	and	cmd, reg2, cmd					;\
141	andn	reg1, reg2, reg1				;\
142	or	reg1, cmd, reg1					;\
143	stxa	reg1, [%g0]ASI_SAFARI_CONFIG			;\
144	membar	#Sync						;\
1452:								;\
146
147#define SET_NULL_LPA(reg1, reg2)				\
148	set	LPA_MASK, reg2					;\
149	ldxa	[%g0]ASI_SAFARI_CONFIG, reg1			;\
150	andn	reg1, reg2, reg1				;\
151	stxa	reg1, [%g0]ASI_SAFARI_CONFIG			;\
152	membar	#Sync						;\
153
154	! ATOMIC_ADD_LONG
155	! This code is run at TL > 0, being exec'd via a cross trap.
156	! While running at trap level > 0, all memory accesses are
157	! performed using NUCLEUS context, which is always 0.
158	! Since the cross trap handler does not force PRIMARY context
159	! to be zero, the following casxa instruction must specify
160	! NUCLEUS ASI.
161	! This ASI must be specified explicitly (via casxa), rather
162	! than using casx. This is because of the fact that the
163	! default casx specifies ASI_PRIMARY, which if non-zero, can
164	! prevent the cpu from translating the address, leading to panic
165	! on bad trap following repetitive dtlb misses.  This behavior
166	! was encountered on MCPUs when using casx instruction.
167#define ATOMIC_ADD_LONG(label, simm, reg1, reg2, reg3)		\
168	set	label, reg1					;\
169	ldx	[reg1], reg2					;\
1701:								;\
171	add	reg2, simm, reg3				;\
172	casxa	[reg1]ASI_N, reg2, reg3				;\
173	cmp	reg2, reg3					;\
174	bne,a,pn %xcc, 1b					;\
175	ldx	[reg1], reg2
176
177#define HERE(reg1, simm, reg2)					\
178	rdpr	%tick, reg2					;\
179	stx	reg2, [reg1 + simm]
180
181	!
182	! Returns processor icache size and linesize in reg1 and
183	! reg2, respectively.
184	!
185	! Panther has a larger icache compared to Cheetahplus and
186	! Jaguar.
187	!
188#define	GET_ICACHE_PARAMS(reg1, reg2)				\
189	GET_CPU_IMPL(reg1)					;\
190	cmp	reg1, PANTHER_IMPL				;\
191	bne	%xcc, 1f					;\
192	  nop							;\
193	set	PN_ICACHE_SIZE, reg1				;\
194	set	PN_ICACHE_LSIZE, reg2				;\
195	ba	2f						;\
196	  nop							;\
1971:								;\
198	set	CH_ICACHE_SIZE, reg1				;\
199	set	CH_ICACHE_LSIZE, reg2				;\
2002:
201
202#define	DRMACH_MCU_IDLE_READS	3
203
204	! Macro to check if a Panther MC is idle.  The EMU Activity
205	! Status register is first read to clear the MCU status bit.
206	! The MCU status is then checked DRMACH_MCU_IDLE_READS times
207	! to verify the MCU is indeed idle.  A single non-idle status
208	! will fail the idle check.  This could be made more lenient
209	! by adding a retry loop.
210	!	addr:	Panther EMU Activity Status register read address.
211	!		Assumed to be 0x18 for local ASI access or else
212	!		FIREPLANE_ADDRESS_REG + 0x400050 for PIO access.
213	!		0 is returned in this register if MCU is idle and
214	!		queues are empty.  Otherwise, -1 is returned in this
215	!		register.
216	!	asi:	Immediate asi value.  Assumed to be ASI_SAFARI_CONFIG
217	!		for local ASI or ASI_IO for PIO access.
218	!	scr1:	Scratch
219	!	scr2:	Scratch
220	!
221#define	CHECK_MCU_IDLE(addr, asi, scr1, scr2)			\
222	ldxa	[addr]asi, %g0					;\
223	ba	1f						;\
224	  clr	scr2						;\
2250:								;\
226	btst	MCU_ACT_STATUS, scr1				;\
227	bne,a	2f						;\
228	  sub	%g0, 1, addr					;\
229	inc	scr2						;\
2301:								;\
231	cmp	scr2, DRMACH_MCU_IDLE_READS			;\
232	ble,a	0b						;\
233	  ldxa    [addr]asi, scr1				;\
234	clr	addr						;\
2352:
236
237	! drmach_shutdown_asm
238	!
239	! inputs:
240	!	%o0 = stack pointer
241	!	%o1 = ecache flush address (ignored if cheetah+ processor)
242	!	%o2 = ecache size
243	!	%o3 = ecache line size
244	!	%o4 = phys addr of byte to clear when finished
245	!
246	! output:
247	!	Stores a zero at [%o4]ASI_MEM when the processor
248	!	is ready to be removed from domain coherency.
249	!
250	ENTRY_NP(drmach_shutdown_asm)
251	membar	#LoadStore		! parsley.
252
253	! Calculate pointer to data area. Determine size of
254	! drmach_shutdown_asm, add to base address and align
255	! to next 16 byte boundary. Leave result in %g6.
256	set	drmach_shutdown_asm_end, %g6
257	set	drmach_shutdown_asm, %g1
258	set	drmach_cpu_sram_va, %g2
259	ldx	[%g2], %g2
260	sub	%g6, %g1, %g6
261	add	%g6, %g2, %g6
262	add	%g6, 15, %g6
263	andn	%g6, 15, %g6
264
265	! Save parameters
266	stx	%o0, [%g6 + 0]		! save stack pointer
267	stx	%o1, [%g6 + 24]		! save E$ flush PA
268	st	%o2, [%g6 + 32]		! save E$ size
269	st	%o3, [%g6 + 36]		! save E$ linesize
270	stx	%o4, [%g6 + 40]		! save phys addr of signal byte
271
272	set	dcache_size, %g1
273	ld	[%g1], %g1
274	st	%g1, [%g6 + 8]		! save dcache_size
275	set	dcache_linesize, %g1
276	ld	[%g1], %g1
277	st	%g1, [%g6 + 12]		! save dcache_linesize
278
279	GET_ICACHE_PARAMS(%g1, %g2)
280	st	%g1, [%g6 + 16]		! save icache_size
281	st	%g2, [%g6 + 20]		! save icache_linesize
282
283	! Flushes all active windows except the current one.
284	! Can cause spill traps to occur.
285	flushw
286
287	! Make sure all asynchronous processing is complete.
288	! Note: has no implications on pending bus transactions.
289	membar	#Sync
290
291	! Move stack. Algorithm copied from t0stacktop setup of
292	! %sp in sun4u/ml/locore.s
293	! Replaces SWITCH_STACK() macro used in Starfire DR.
294	ldx	[%g6 + 0], %g1
295	sub	%g1, SA(KFPUSIZE+GSR_SIZE), %g2
296 	and	%g2, 0x3f, %g3
297 	sub	%g2, %g3, %o2
298 	sub	%o2, SA(MPCBSIZE) + STACK_BIAS, %sp
299	stx	%sp, [%g6 + 48]		! for debug
300
301	HERE(%g6, 128, %g1)		! initialization complete (for debug)
302
303	! Panther needs to flush the L2 cache before the L3
304	! cache is flushed by the ecache flushall macro.
305	PN_L2_FLUSHALL(%g1, %g2, %g3)
306
307	! Flush E$. The purpose of this flush is to rid the E$ of
308	! lines in states O or Os. Implicitly flushes W$.
309	ldx	[%g6 + 24], %g1		! *ecache_flushaddr
310	ld	[%g6 + 32], %g2		! ecache_size
311	ld	[%g6 + 36], %g3		! ecache_linesize
312	ECACHE_FLUSHALL(%g2, %g3, %g1, %g4)
313
314	! Since the bus sync list read below does not guarantee
315	! transaction completion on Panther domains, as an
316	! optimization Panther skips the read and subsequent
317	! E$ flush.
318	GET_CPU_IMPL(%g1)
319	cmp	%g1, PANTHER_IMPL
320	be	%xcc, drmach_shutdown_ecache_flushed
321	  nop
322
323	!
324	! Ensure all outstanding writebacks have retired.  Following this
325	! sync, all writes must be strictly managed.
326	!
327	set	drmach_bus_sync_list, %g1
328	BUS_SYNC(%g1, %g2)
329
330	! Flush E$ again to victimize references to drmach_bus_sync_list.
331	ldx     [%g6 + 24], %g1         ! *ecache_flushaddr
332	ld	[%g6 + 32], %g2		! ecache_size
333	ld	[%g6 + 36], %g3		! ecache_linesize
334	ECACHE_FLUSHALL(%g2, %g3, %g1, %g4)
335
336drmach_shutdown_ecache_flushed:
337
338	ld	[%g6 + 8], %g1		! flush dcache
339	ld	[%g6 + 12], %g2
340	CH_DCACHE_FLUSHALL(%g1, %g2, %g3)
341
342	ld	[%g6 + 16], %g1		! flush icache
343	ld	[%g6 + 20], %g2
344	CH_ICACHE_FLUSHALL(%g1, %g2, %g3, %g4)
345
346	PCACHE_FLUSHALL(%g1, %g2, %g3) ! flush pcache (no parameters)
347
348	!
349	! Flush all unlocked dtlb and itlb entries.
350	! Replaces TLB_FLUSH_UNLOCKED macro used in Starfire DR.
351	!
352	sethi	%hi(FLUSH_ADDR), %g1
353	set	DEMAP_ALL_TYPE, %g2
354	stxa	%g0, [%g2]ASI_DTLB_DEMAP
355	stxa	%g0, [%g2]ASI_ITLB_DEMAP
356	flush	%g1
357
358	!
359	! Zero LPA by clearing CBASE and CBND. Following
360	! this, all transactions to cachable address space
361	! will be of the remote flavor.
362	!
363	SET_NULL_LPA(%g1, %g2)
364
365	HERE(%g6, 136, %g1)		! preparation complete (for debug)
366
367	!
368	! Clear byte to signal finished.
369	! NOTE: This store will allocate in the E$. It is
370	! vitally important that this line is demoted to
371	! state I before removing this processor from the
372	! coherency.  The demotion is ensured by a synchronous
373	! "steal back" that takes place in drmach_cpu_poweroff.
374	ldx	[%g6 + 40], %g1
375	stba	%g0, [%g1]ASI_MEM
3765:
377	HERE(%g6, 144, %g1)		! spin indicator (for debug)
378	ba	5b
379	  nop
380
381	.asciz	"drmach_shutdown_asm"		! for debug
382	.align	4
383	.global	drmach_shutdown_asm_end
384drmach_shutdown_asm_end:
385	SET_SIZE(drmach_shutdown_asm)
386
387
388	! lddsafconfig
389	!
390	! input:
391	!	nothing
392	!
393	! output:
394	!	%o0	content of this processor's SCR
395	!
396	!	Returns current value of this processor's Safari
397	!	Configuration Register.
398	!
399	ENTRY(lddsafconfig)
400        retl
401        ldxa    [%g0]ASI_SAFARI_CONFIG, %o0
402        SET_SIZE(lddsafconfig)
403
404	! drmach_rename
405	!
406	! input:
407	!	%o0	pointer to register address/value compound list
408	!	%o1	address for setting error code if rename did not
409	!		complete.  Unmodified if no error.
410	!	%o2	address for returning opaque memory controller id
411	!		in case of error.  Unmodified if no error.
412	!	Global	drmach_xt_mb[cpuid] is expected to be the new LPA.
413	!
414	! output:
415	!	[%o1] =	1 if failed to idle memory controller, otherwise unmodified.
416	!	[%o2] = id of failed memory controller, otherwise unmodified.
417	!
418	! Perform HW register reprogramming. This is the "rename" step for
419	! the copy-rename process.  drmach_rename is copied to a cpu's sram
420	! followed by register address/value pairs -- the text and data are
421	! sourced from the sram while drmach_rename is executed.
422	!
423	! The parameter is assumed to point to a concatenation of six
424	! zero-terminated lists located in non-cachable storage. The assumed
425	! format (and purpose) of each list is as follows:
426	!
427	!	1) a copy of drmach_bus_sync_list. A list of PA for each
428	!	   active memory bank in the domain. Used to infer the
429	!	   the completion of all pending coherent transactions
430	!	   initiated by this processor. Assumes MC work queue
431	!	   does not implement read bypass. This is true of Cheetah,
432	!	   Cheetah+, and Jaguar processors.  Panther does support
433	!	   read bypass, so for Panther MCs with read-bypass-write
434	!	   enabled, the read is issued but it does not guarantee
435	!	   completion of outstanding writes in the MC queue.
436	!	2) address/id pair for the local Panther EMU Activity Status
437	!	   Register of this processor.  The register address is assumed
438	!	   to be a VA which is polled via ASI_SAFARI_CONFIG until the
439	!	   MC queues are empty.  The id is an opaque identifier which
440	!	   must be returned along with an error code if the MCU status
441	!	   does not go idle.  See the parameter description above.
442	!	   This section will be empty if this processor is not a Panther.
443	!	   Both the address and id are assumed to be 64 bit values.
444	!	3) address/id pairs for non-local Panther EMU Activity Status
445	!	   Registers on other source and target processors.  The register
446	!	   address is assumed to be a PIO address which is polled via
447	!	   ASI_IO to drain/idle the MCs on other Panther procs.  The
448	!	   id is an opaque identifier which must be returned along with
449	!	   an error code if a MC fails to go idle.  This section will
450	!	   empty if there are no non-local Panther processors on the
451	!	   source and target expanders.  Both the address and id are
452	!	   assumed to be 64 bit values.
453	!	4) address/value pairs for the Memory Address Decoder
454	!	   register of this processor. The register address is
455	!	   assumed to be a VA within ASM_MC_DECODE space. The
456	!	   address and value elements are assumed to 64 bit values.
457	!	5) address/value pairs for any 64 bit register accessible
458	!	   via ASI_IO. The address and value fields are assumed to
459	!	   be 64 bit values.
460	!	   This list is typically used for reprogramming the Memory
461	!	   Address Decoder Register of other cpus and for reprogram-
462	!	   ming the Safari Configuration Register of I/O controllers.
463	!	6) address/value pairs for any 32 bit register accessible
464	!	   via ASI_IO. The address element is assumed to be a 64 bit
465	!	   value. The value element is assumed to be a 64 bit word
466	!	   containing a 32 bit value in the lower half.
467	!	   This list typically contains address/value pairs for
468	!	   AXQ CASM tables.
469	!
470	ENTRY_NP(drmach_rename)
471
472	mov	%o1, %o4		! save error code address
473	mov	%o2, %o5		! save error id address
474
475	BUS_SYNC(%o0, %o1)		! run section 1
476
477	SET_NULL_LPA(%o1, %o2)		! prep for cachable transactions
478					! after rename completes.
479					! e.g.: the load_mb that occurs below
4803:
481	ldx	[%o0], %o1		! run section 2
482	brz,a,pn %o1, 4f
483	add	%o0, 8, %o0		! skip section 2 terminator
484	CHECK_MCU_IDLE(%o1, ASI_SAFARI_CONFIG, %o2, %o3)
485	cmp	%o1, 0			! idled?
486	be,a	3b			! ok, advance
487	  add	%o0, 16, %o0
488	mov	1, %o1			! not idle, bailout
489	stw	%o1, [%o4]		! set MC idle error code
490	ldx	[%o0 + 8], %o1
491	stx	%o1, [%o5]		! set MC idle error id
492	retl
493	  nop
4944:
495	ldx	[%o0], %o1		! run section 3
496	brz,a,pn %o1, 5f
497	add	%o0, 8, %o0		! skip section 3 terminator
498	CHECK_MCU_IDLE(%o1, ASI_IO, %o2, %o3)
499	cmp	%o1, 0			! idled?
500	be,a	4b			! ok, advance
501	  add	%o0, 16, %o0
502	mov	1, %o1			! not idle, bailout
503	stw	%o1, [%o4]		! set MC idle error code
504	ldx	[%o0 + 8], %o1
505	stx	%o1, [%o5]		! set MC idle error id
506	retl
507	  nop
5085:
509	ldx	[%o0], %o1		! run section 4
510	brz,a,pn %o1, 6f
511	add	%o0, 8, %o0		! skip section 4 terminator
512	ldx	[%o0 + 8], %o2
513	stxa	%o2, [%o1]ASI_MC_DECODE
514	membar	#Sync
515	ldxa	[%o1]ASI_MC_DECODE, %g0	! read back to insure written
516	b	5b
517	add	%o0, 16, %o0
5186:
519	ldx	[%o0], %o1		! run section 5
520	brz,a,pn %o1, 7f
521	add	%o0, 8, %o0		! skip section 5 terminator
522	ldx	[%o0 + 8], %o2
523	stxa	%o2, [%o1]ASI_IO
524	ldxa	[%o1]ASI_IO, %g0	! read back to insure written
525	b	6b
526	add	%o0, 16, %o0
5277:
528	ldx	[%o0], %o1		! run section 6
529	brz,a,pn %o1, 8f
530	nop
531	ldx	[%o0 + 8], %o2
532	stwa	%o2, [%o1]ASI_IO
533	lduwa	[%o1]ASI_IO, %g0	! read back to insure written
534	b	7b
535	add	%o0, 16, %o0
5368:
537	CPU_INDEX(%o0, %o1)
538	LOAD_MB(%o0, %o1, %o2)
539	SET_LPA(%o1, %o0, %o2)
540
541	retl
542	nop
543
544	.asciz	"drmach_rename"		! for debug
545	.align	4
546	SET_SIZE(drmach_rename)
547
548	.global drmach_rename_end
549drmach_rename_end:
550
551
552	! drmach_rename_wait
553	!
554	! input:
555	!	nothing
556	!
557	! output:
558	!	nothing
559	!
560	! drmach_rename_wait is a cross-trap function used to move a
561	! cpu's execution out of coherent space while a copy-rename
562	! operation is in progress.
563	!
564	! In each CPU SRAM exists an area (16KB on Cheetah+ boards,
565	! 32KB on Jaguar/Panther boards) reserved for DR. This area is
566	! logically divided by DR into 8KB pages, one page per CPU (or
567	! core) in a port pair. (Two Safari ports share HW resources on
568	! a CPU/MEM board. These are referred to as a port pair.)
569	!
570	! This routine begins by mapping the appropriate SRAM page,
571	! transferring the machine code (between the labels
572	! drmach_rename_wait_asm and drmach_rename_wait_asm_end), then
573	! jumping to SRAM.  After returning from SRAM, the page is
574	! demapped before the cross-call is exited (sic).
575	!
576	! The machine code flushes all caches, waits for a special
577	! interrupt vector, then updates the processor's LPA and
578	! resynchronizes caches with the new home memory.
579	!
580	! The special interrupt vector is assumed to be a cross-call to
581	! drmach_rename_done sent by the master processor upon completing
582	! the copy-rename operation. The interrupt is received and discarded;
583	! The cross-call to drmach_rename_done is never executed.  Instead
584	! the Interrupt Receive Status Register is employed, temporarily,
585	! as a semaphore. This avoids unwanted bus traffic during the critical
586	! rename operation.
587	!
588	ENTRY_NP(drmach_rename_wait)
589
590	CPU_INDEX(%g5, %g1)		! put cpuid in %g5
591
592	!
593	! sfmmu_dtlb_ld(drmach_cpu_sram_va,
594	!	KCONTEXT, drmach_cpu_sram_tte[cpuid]);
595	! sfmmu_itlb_ld(drmach_cpu_sram_va,
596	!	KCONTEXT, drmach_cpu_sram_tte[cpuid]);
597	!
598	set	drmach_cpu_sram_tte, %g1
599	sllx	%g5, 3, %g2
600	ldx	[%g1 + %g2], %g3
601	set	drmach_cpu_sram_va, %g1
602	ldx	[%g1], %g1
603	or	%g1, KCONTEXT, %g2	! preserve %g1
604	set	MMU_TAG_ACCESS, %g4
605	sethi	%hi(ctx_pgsz_array), %g6
606	ldn	[%g6 + %lo(ctx_pgsz_array)], %g6
607	brz	%g6, 1f
608	  nop
609	ldub	[%g6 + KCONTEXT], %g6
610	sll	%g6, TAGACCEXT_SHIFT, %g6
611	set	MMU_TAG_ACCESS_EXT, %g7
612	stxa	%g6, [%g7]ASI_DMMU
6131:
614	stxa	%g2, [%g4]ASI_DMMU
615	stxa    %g3, [%g0]ASI_DTLB_IN
616	membar	#Sync
617	sethi	%hi(FLUSH_ADDR), %g6
618	stxa	%g2, [%g4]ASI_IMMU
619	stxa    %g3, [%g0]ASI_ITLB_IN
620	flush	%g6
621
622	!
623	! copy drmach_rename_wait_asm block to SRAM. Preserve entry
624	! point in %g1. After the code has been copied, align %g6
625	! (the destination pointer) to the next highest 16 byte
626	! boundary. This will define the start of the data area.
627	!
628	mov	%g1, %g6
629	set	drmach_rename_wait_asm, %g2
630	set	drmach_rename_wait_asm_end, %g3
6310:
632	lduw	[%g2], %g4		! do copy
633	stw	%g4, [%g6]
634	add	%g2, 4, %g2
635	cmp	%g2, %g3
636	bne	0b
637	add	%g6, 4, %g6
638
639	add	%g6, 15, %g6		! locate data area on next 16 byte
640	andn	%g6, 15, %g6		! boundary following text
641					! WARNING: no bounds checking
642
643	jmpl	%g1, %g7		! jump to code in cpu sram
644	nop
645
646	set	drmach_cpu_sram_va, %g1	! vtab_flushpage_tl1(drmach_cpu_sram_va,
647	ldx	[%g1], %g1		! 	KCONTEXT);
648	set	KCONTEXT, %g2
649	set	MMU_PCONTEXT, %g4
650	or	%g1, DEMAP_PRIMARY | DEMAP_PAGE_TYPE, %g1
651	ldxa	[%g4]ASI_DMMU, %g5	/* rd old ctxnum */
652	stxa	%g2, [%g4]ASI_DMMU	/* wr new ctxum */
653	stxa	%g0, [%g1]ASI_DTLB_DEMAP
654	stxa	%g0, [%g1]ASI_ITLB_DEMAP
655	stxa	%g5, [%g4]ASI_DMMU	/* restore old ctxnum */
656
657	retry
658
659drmach_rename_wait_asm:
660	! the following code is copied to a cpu's sram and executed
661	! from there.
662	! Input:
663	!	%g5 is cpuid
664	!	%g6 is data area (follows text)
665	!	%g7 is link address back to caller
666	!
667	st	%g5, [%g6 + 4]		! save cpuid (for debug)
668
669	set	dcache_size, %g1
670	ld	[%g1], %g1
671	st	%g1, [%g6 + 8]		! save dcache_size
672	set	dcache_linesize, %g1
673	ld	[%g1], %g1
674	st	%g1, [%g6 + 12]		! save dcache_linesize
675
676	GET_ICACHE_PARAMS(%g1, %g2)
677	st	%g1, [%g6 + 16]		! save icache_size
678	st	%g2, [%g6 + 20]		! save icache_linesize
679
680	set	drmach_iocage_paddr, %g1
681	ldx	[%g1], %g1
682	stx	%g1, [%g6 + 24]		! save *ecache_flushadr
683
684	mulx	%g5, CPU_NODE_SIZE, %g1	! %g4 = &cpunodes[cpuid]
685	set	cpunodes, %g4
686	add	%g4, %g1, %g4
687	ld	[%g4 + ECACHE_SIZE], %g1
688	st	%g1, [%g6 + 32]		! save ecache_size
689	ld	[%g4 + ECACHE_LINESIZE], %g1
690	st	%g1, [%g6 + 36]		! save ecache_linesize
691
692	LOAD_MB(%g5, %g1, %g2)		! save mailbox data
693	stb	%g1, [%g6 + 40]
694
695	membar	#Sync			! Complete any pending processing.
696
697	! Flush E$. The purpose of this flush is to rid the E$ of
698	! lines in states O or Os. Implicitly flushes W$.
699	! NOTE: Reading the bus sync list and r/w ops on drmach_xt_ready
700	! will disturb the E$. The lines of the bus sync list will be
701	! in state S. The line containing drmach_xt_ready will be in
702	! state O. Before proceeding with the copy-rename, the master
703	! processor will "steal back" the drmach_xt_ready (sic) line.
704	! This will demote the state of the line in E$ to I.
705	! However, the lines containing the bus sync list must be
706	! victimized before returning to the OS. This is vital because
707	! following copy-rename the corresponding lines in the new home
708	! memory will be in state gM. The resulting S,gM state pair is
709	! invalid and does represent a loss of coherency. Flushing the
710	! E$ after the bus sync list is read will be sufficient to
711	! avoid the invalid condition.
712	!
713	! For Panther, there is redundancy as both cores flush the shared
714	! L2 and L3 caches.  As an optimization, only one core could do the
715	! flush of the shared caches, however care must be taken that the
716	! sibling core does not install owned lines once the flush begins.
717	PN_L2_FLUSHALL(%g1, %g2, %g3)
718	ldx	[%g6 + 24], %g1		! *ecache_flushaddr
719	ld	[%g6 + 32], %g2		! ecache_size
720	ld	[%g6 + 36], %g3		! ecache_linesize
721	ECACHE_FLUSHALL(%g2, %g3, %g1, %g4)
722
723	! Make sure all outstanding transactions for this processor
724	! have retired. See E$ note above.
725	set	drmach_bus_sync_list, %g1
726	BUS_SYNC(%g1, %g2)
727
728	HERE(%g6, 128, %g4)		! preparation complete (for debug)
729
730	! Signal this processor is ready for rename operation to begin.
731	! See E$ note above.
732	ATOMIC_ADD_LONG(drmach_xt_ready, 1, %g2, %g3, %g4)
733
734	! Loop on IRSR waiting for interrupt. The expected interrupt
735	! is a cross-trap to drmach_wait_done. It is sent by the master
736	! processor when the copy-rename operation is complete. The
737	! received cross-trap is used only as a signal. It is not executed.
7382:
739	HERE(%g6, 136, %g4)		! last poll tick (for debug)
740
741	ldxa	[%g0]ASI_INTR_RECEIVE_STATUS, %g4	! wait for xt
742	btst	IRSR_BUSY, %g4
743	bz	2b
744	nop
745	stx	%g4, [%g6 + 64]		! save status and payload
746	set	IRDR_0, %g2
747	ldxa	[%g2]ASI_INTR_RECEIVE, %g2
748	stx	%g2, [%g6 + 72]
749	set	IRDR_1, %g2
750	ldxa	[%g2]ASI_INTR_RECEIVE, %g2
751	stx	%g2, [%g6 + 80]
752	set	IRDR_2, %g2
753	ldxa	[%g2]ASI_INTR_RECEIVE, %g2
754	stx	%g2, [%g6 + 88]
755
756					! clear rcv status
757	stxa	%g0, [%g0]ASI_INTR_RECEIVE_STATUS
758	membar	#Sync
759
760	HERE(%g6, 144, %g4)		! signal rcvd tick (for debug)
761
762	! Check for copy-rename abort signal. If this signal is received,
763	! the LPA change is skipped since the rename step was not done.
764	! The cache flushes are still done as paranoia.
765	set	drmach_rename_abort, %g1
766	ldx	[%g6 + 72], %g2
767	cmp 	%g1, %g2
768	be	3f
769	nop
770
771	! Resume waiting if this is not drmach_rename_done.
772	set	drmach_rename_done, %g1
773	cmp 	%g1, %g2
774	bne	2b
775	nop
776
777	ldub	[%g6 + 40], %g1		! get saved mailbox data
778	SET_LPA(%g1, %g2, %g3)		! set LPA as indicated by the mb data
779
7803:
781	! Flush all caches (E, D, I and P) to ensure each is resynchronized
782	! with the corresponding states in the new home memory. (W$ is
783	! implicitly flushed when the E$ is flushed.)
784	!
785	! Panther needs to flush the L2 cache before the L3
786	! cache is flushed by the ecache flushall macro.
787	PN_L2_FLUSHALL(%g1, %g2, %g3)
788
789	ldx	[%g6 + 24], %g1		! *ecache_flushaddr
790	ld	[%g6 + 32], %g2		! ecache_size
791	ld	[%g6 + 36], %g3		! ecache_linesize
792	ECACHE_FLUSHALL(%g2, %g3, %g1, %g4)
793
794	ld	[%g6 + 8], %g1		! flush dcache
795	ld	[%g6 + 12], %g2
796	CH_DCACHE_FLUSHALL(%g1, %g2, %g3)
797
798	ld	[%g6 + 16], %g1		! flush icache
799	ld	[%g6 + 20], %g2
800	CH_ICACHE_FLUSHALL(%g1, %g2, %g3, %g4)
801
802	PCACHE_FLUSHALL(%g1, %g2, %g3)	! flush pcache (no parameters)
803
804	HERE(%g6, 152, %g4)		! done tick (for debug)
805
806	jmpl	%g7+8, %g0
807	nop
808
809	.asciz	"drmach_rename_wait"	! for debug
810	.align	4
811drmach_rename_wait_asm_end:
812	SET_SIZE(drmach_rename_wait)
813
814
815	! drmach_rename_done
816	!
817	! input:
818	!	nothing
819	!
820	! output:
821	!	nothing
822	!
823	! Used as signal data. See drmach_rename_wait.
824	!
825	ENTRY_NP(drmach_rename_done)
826	retry
827	SET_SIZE(drmach_rename_done)
828
829	! drmach_rename_abort
830	!
831	! input:
832	!	nothing
833	!
834	! output:
835	!	nothing
836	!
837	! Used as signal data. See drmach_rename_wait.
838	!
839	ENTRY_NP(drmach_rename_abort)
840	retry
841	SET_SIZE(drmach_rename_abort)
842
843
844	! drmach_set_lpa
845	!
846	! input:
847	!	Globals: drmach_xt_mb[cpuid] contains new LPA data
848	!
849	! output:
850	!	nothing
851	!
852	! Sets the executing processor's LPA as indicated by the command
853	! stored in drmach_xt_mb, a byte array indexed by cpuid. Assumes
854	! the caller is preventing illegal LPA settings and transistions.
855	!
856	ENTRY_NP(drmach_set_lpa)
857
858	!
859	! Set %g1 to this processor's cpuid.
860	!
861	CPU_INDEX(%g1, %g2)
862
863	!
864	! Get LPA message from mailbox, leave in %g5.
865	!
866	LOAD_MB(%g1, %g5, %g2)
867
868	!
869	! Set LPA, mailbox data in %g5.
870	!
871	SET_LPA(%g5, %g1, %g2)
872
873	!
874	! Signal work is done.
875	!
876	ATOMIC_ADD_LONG(drmach_xt_ready, 1, %g1, %g2, %g3)
877
878	retry
879	SET_SIZE(drmach_set_lpa)
880
881!
882! drmach_bc_bzero
883!
884! inputs:
885! 	%o0 = base vaddr of area to clear (must be 64-byte aligned)
886!	%o1 = size of area to clear (must be multiple of 256 bytes)
887!
888! outputs:
889!	%o0 =
890!		0 (success)
891!		1 (size too small or not modulo 256)
892!		2 (vaddr not 64-byte aligned)
893!
894! Zero a block of storage using block commit stores.
895! Nonzero return if caller's address or size are not
896! block aligned.
897!
898
899
900	ENTRY(drmach_bc_bzero)
901
902	! verify size is >= 256 bytes
903	cmp	%o1, 256
904	blu,a	.bz_done
905	mov	1, %o0			! error code 1 for invalid size
906
907	! verify size is a multiple of 256
908	btst	(256-1), %o1
909	bnz,a	.bz_done
910	mov	1, %o0			! error code 1 for invalid size
911
912	! verify that vaddr is aligned for block stores
913	btst	(64-1), %o0
914	bnz,a	.bz_done
915	mov	2, %o0			! error code 2 for invalid alignment
916
917	! save fprs for restore when finished
918	rd	%fprs, %g1
919
920	! make sure FPU is enabled
921	rdpr	%pstate, %g3
922	btst	PSTATE_PEF, %g3
923	bnz	.bz_block
924	nop
925	andn	%g3, PSTATE_PEF, %g4
926	wrpr	%g4, PSTATE_PEF, %pstate
927
928.bz_block:
929	membar	#StoreStore|#StoreLoad|#LoadStore
930	wr	%g0, FPRS_FEF, %fprs
931
932	! Clear block
933	fzero	%d0
934	fzero	%d2
935	fzero	%d4
936	fzero	%d6
937	fzero	%d8
938	fzero	%d10
939	fzero	%d12
940	fzero	%d14
941	wr	%g0, ASI_BLK_COMMIT_P, %asi
942	mov	256, %o3
943	ba	.bz_doblock
944	nop
945
946.bz_blkstart:
947      ! stda	%d0, [%o0+192]%asi  ! in dly slot of branch that got us here
948	stda	%d0, [%o0+128]%asi
949	stda	%d0, [%o0+64]%asi
950	stda	%d0, [%o0]%asi
951	add	%o0, %o3, %o0
952	sub	%o1, %o3, %o1
953.bz_doblock:
954	cmp	%o1, 256
955	bgeu,a	%ncc, .bz_blkstart
956	stda	%d0, [%o0+192]%asi
957
958.bz_finish:
959	membar	#StoreLoad|#StoreStore
960	clr	%o0
961	wr	%g1, %fprs		! restore fprs
962	btst	PSTATE_PEF, %g3		! restore pstate if necessary
963	bnz	.bz_done
964	nop
965	wrpr	%g3, %g0, %pstate
966.bz_done:
967	membar	#Sync
968	retl
969	nop
970
971	SET_SIZE(drmach_bc_bzero)
972
973#endif /* lint */
974