xref: /titanic_41/usr/src/uts/sun4u/starcat/ml/drmach_asm.s (revision bbb1277b6ec1b0daad4e3ed1a2b891d3e2ece2eb)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26/*
27 * This file is through cpp before being used as
28 * an inline.  It contains support routines used
29 * only by DR.
30 */
31
32#if defined(lint)
33#include <sys/types.h>
34#else
35#include "assym.h"
36#endif /* lint */
37
38#include <sys/asm_linkage.h>
39#include <sys/clock.h>
40#include <sys/param.h>
41#include <sys/privregs.h>
42#include <sys/machasi.h>
43#include <sys/mmu.h>
44#include <sys/machthread.h>
45#include <sys/pte.h>
46#include <sys/stack.h>
47#include <sys/vis.h>
48#include <sys/cheetahregs.h>
49#include <sys/cmpregs.h>
50#include <sys/intreg.h>
51#include <sys/cheetahasm.h>
52
53#if defined(lint)
54
55/*ARGSUSED*/
56void
57drmach_shutdown_asm(uint64_t estack, uint64_t flushaddr,
58    int size, int lsz, uint64_t physmem)
59{}
60
61/*ARGSUSED*/
62void
63drmach_rename(uint64_t *script, uint_t *err, uint64_t *id)
64{}
65
66void
67drmach_rename_end(void)
68{}
69
70/*ARGSUSED*/
71void
72drmach_rename_wait(uint64_t not_used_0, uint64_t not_used_1)
73{
74}
75
76/*ARGSUSED*/
77void
78drmach_rename_done(uint64_t not_used_0, uint64_t not_used_1)
79{
80}
81
82/*ARGSUSED*/
83void
84drmach_rename_abort(uint64_t not_used_0, uint64_t not_used_1)
85{
86}
87
88/*ARGSUSED*/
89uint64_t
90lddsafconfig(void)
91{
92	return (0x0ull);
93}
94
95/* ARGSUSED */
96uint32_t
97drmach_bc_bzero(void *addr, size_t size)
98{
99	return (0x0);
100}
101
102#else /* lint */
103
104#define BUS_SYNC(reg1, reg2)					\
1051:								;\
106	ldx	[reg1], reg2					;\
107	brz,pn	reg2, 2f					;\
108	add	reg1, 8, reg1					;\
109	ldxa	[reg2]ASI_MEM, %g0				;\
110	ba,a	1b						;\
111	nop							;\
1122:
113
114#define LOAD_MB(cpuid, mb_data, reg1)				\
115	set	drmach_xt_mb, reg1				;\
116	ldx	[reg1], reg1					;\
117	add	reg1, cpuid, reg1				;\
118	ldub	[reg1], mb_data					;\
119	stub	%g0, [reg1]
120
121#define LPA_MASK 0x7ff8
122
123#define SET_LPA(cmd, reg1, reg2)				\
124	btst	0x80, cmd					;\
125	bz	2f						;\
126	nop							;\
127	btst	0x40, cmd					;\
128	bnz,a	1f						;\
129	mov	%g0, cmd					;\
130	and	cmd, 0x1f, cmd					;\
131	sllx	cmd, 3, reg1					;\
132	add	cmd, 1, cmd					;\
133	sllx	cmd, 9, cmd					;\
134	or	cmd, reg1, cmd					;\
1351:								;\
136	set	LPA_MASK, reg2					;\
137	ldxa	[%g0]ASI_SAFARI_CONFIG, reg1			;\
138	and	cmd, reg2, cmd					;\
139	andn	reg1, reg2, reg1				;\
140	or	reg1, cmd, reg1					;\
141	stxa	reg1, [%g0]ASI_SAFARI_CONFIG			;\
142	membar	#Sync						;\
1432:								;\
144
145#define SET_NULL_LPA(reg1, reg2)				\
146	set	LPA_MASK, reg2					;\
147	ldxa	[%g0]ASI_SAFARI_CONFIG, reg1			;\
148	andn	reg1, reg2, reg1				;\
149	stxa	reg1, [%g0]ASI_SAFARI_CONFIG			;\
150	membar	#Sync						;\
151
152	! ATOMIC_ADD_LONG
153	! This code is run at TL > 0, being exec'd via a cross trap.
154	! While running at trap level > 0, all memory accesses are
155	! performed using NUCLEUS context, which is always 0.
156	! Since the cross trap handler does not force PRIMARY context
157	! to be zero, the following casxa instruction must specify
158	! NUCLEUS ASI.
159	! This ASI must be specified explicitly (via casxa), rather
160	! than using casx. This is because of the fact that the
161	! default casx specifies ASI_PRIMARY, which if non-zero, can
162	! prevent the cpu from translating the address, leading to panic
163	! on bad trap following repetitive dtlb misses.  This behavior
164	! was encountered on MCPUs when using casx instruction.
165#define ATOMIC_ADD_LONG(label, simm, reg1, reg2, reg3)		\
166	set	label, reg1					;\
167	ldx	[reg1], reg2					;\
1681:								;\
169	add	reg2, simm, reg3				;\
170	casxa	[reg1]ASI_N, reg2, reg3				;\
171	cmp	reg2, reg3					;\
172	bne,a,pn %xcc, 1b					;\
173	ldx	[reg1], reg2
174
175#define HERE(reg1, simm, reg2)					\
176	rdpr	%tick, reg2					;\
177	stx	reg2, [reg1 + simm]
178
179	!
180	! Returns processor icache size and linesize in reg1 and
181	! reg2, respectively.
182	!
183	! Panther has a larger icache compared to Cheetahplus and
184	! Jaguar.
185	!
186#define	GET_ICACHE_PARAMS(reg1, reg2)				\
187	GET_CPU_IMPL(reg1)					;\
188	cmp	reg1, PANTHER_IMPL				;\
189	bne	%xcc, 1f					;\
190	  nop							;\
191	set	PN_ICACHE_SIZE, reg1				;\
192	set	PN_ICACHE_LSIZE, reg2				;\
193	ba	2f						;\
194	  nop							;\
1951:								;\
196	set	CH_ICACHE_SIZE, reg1				;\
197	set	CH_ICACHE_LSIZE, reg2				;\
1982:
199
200#define	DRMACH_MCU_IDLE_READS	3
201
202	! Macro to check if a Panther MC is idle.  The EMU Activity
203	! Status register is first read to clear the MCU status bit.
204	! The MCU status is then checked DRMACH_MCU_IDLE_READS times
205	! to verify the MCU is indeed idle.  A single non-idle status
206	! will fail the idle check.  This could be made more lenient
207	! by adding a retry loop.
208	!	addr:	Panther EMU Activity Status register read address.
209	!		Assumed to be 0x18 for local ASI access or else
210	!		FIREPLANE_ADDRESS_REG + 0x400050 for PIO access.
211	!		0 is returned in this register if MCU is idle and
212	!		queues are empty.  Otherwise, -1 is returned in this
213	!		register.
214	!	asi:	Immediate asi value.  Assumed to be ASI_SAFARI_CONFIG
215	!		for local ASI or ASI_IO for PIO access.
216	!	scr1:	Scratch
217	!	scr2:	Scratch
218	!
219#define	CHECK_MCU_IDLE(addr, asi, scr1, scr2)			\
220	ldxa	[addr]asi, %g0					;\
221	ba	1f						;\
222	  clr	scr2						;\
2230:								;\
224	btst	MCU_ACT_STATUS, scr1				;\
225	bne,a	2f						;\
226	  sub	%g0, 1, addr					;\
227	inc	scr2						;\
2281:								;\
229	cmp	scr2, DRMACH_MCU_IDLE_READS			;\
230	ble,a	0b						;\
231	  ldxa    [addr]asi, scr1				;\
232	clr	addr						;\
2332:
234
235	! drmach_shutdown_asm
236	!
237	! inputs:
238	!	%o0 = stack pointer
239	!	%o1 = ecache flush address (ignored if cheetah+ processor)
240	!	%o2 = ecache size
241	!	%o3 = ecache line size
242	!	%o4 = phys addr of byte to clear when finished
243	!
244	! output:
245	!	Stores a zero at [%o4]ASI_MEM when the processor
246	!	is ready to be removed from domain coherency.
247	!
248	ENTRY_NP(drmach_shutdown_asm)
249	membar	#LoadStore		! parsley.
250
251	! Calculate pointer to data area. Determine size of
252	! drmach_shutdown_asm, add to base address and align
253	! to next 16 byte boundary. Leave result in %g6.
254	set	drmach_shutdown_asm_end, %g6
255	set	drmach_shutdown_asm, %g1
256	set	drmach_cpu_sram_va, %g2
257	ldx	[%g2], %g2
258	sub	%g6, %g1, %g6
259	add	%g6, %g2, %g6
260	add	%g6, 15, %g6
261	andn	%g6, 15, %g6
262
263	! Save parameters
264	stx	%o0, [%g6 + 0]		! save stack pointer
265	stx	%o1, [%g6 + 24]		! save E$ flush PA
266	st	%o2, [%g6 + 32]		! save E$ size
267	st	%o3, [%g6 + 36]		! save E$ linesize
268	stx	%o4, [%g6 + 40]		! save phys addr of signal byte
269
270	set	dcache_size, %g1
271	ld	[%g1], %g1
272	st	%g1, [%g6 + 8]		! save dcache_size
273	set	dcache_linesize, %g1
274	ld	[%g1], %g1
275	st	%g1, [%g6 + 12]		! save dcache_linesize
276
277	GET_ICACHE_PARAMS(%g1, %g2)
278	st	%g1, [%g6 + 16]		! save icache_size
279	st	%g2, [%g6 + 20]		! save icache_linesize
280
281	! Flushes all active windows except the current one.
282	! Can cause spill traps to occur.
283	flushw
284
285	! Make sure all asynchronous processing is complete.
286	! Note: has no implications on pending bus transactions.
287	membar	#Sync
288
289	! Move stack. Algorithm copied from t0stacktop setup of
290	! %sp in sun4u/ml/locore.s
291	! Replaces SWITCH_STACK() macro used in Starfire DR.
292	ldx	[%g6 + 0], %g1
293	sub	%g1, SA(KFPUSIZE+GSR_SIZE), %g2
294 	and	%g2, 0x3f, %g3
295 	sub	%g2, %g3, %o2
296 	sub	%o2, SA(MPCBSIZE) + STACK_BIAS, %sp
297	stx	%sp, [%g6 + 48]		! for debug
298
299	HERE(%g6, 128, %g1)		! initialization complete (for debug)
300
301	! Panther needs to flush the L2 cache before the L3
302	! cache is flushed by the ecache flushall macro.
303	PN_L2_FLUSHALL(%g1, %g2, %g3)
304
305	! Flush E$. The purpose of this flush is to rid the E$ of
306	! lines in states O or Os. Implicitly flushes W$.
307	ldx	[%g6 + 24], %g1		! *ecache_flushaddr
308	ld	[%g6 + 32], %g2		! ecache_size
309	ld	[%g6 + 36], %g3		! ecache_linesize
310	ECACHE_FLUSHALL(%g2, %g3, %g1, %g4)
311
312	! Since the bus sync list read below does not guarantee
313	! transaction completion on Panther domains, as an
314	! optimization Panther skips the read and subsequent
315	! E$ flush.
316	GET_CPU_IMPL(%g1)
317	cmp	%g1, PANTHER_IMPL
318	be	%xcc, drmach_shutdown_ecache_flushed
319	  nop
320
321	!
322	! Ensure all outstanding writebacks have retired.  Following this
323	! sync, all writes must be strictly managed.
324	!
325	set	drmach_bus_sync_list, %g1
326	BUS_SYNC(%g1, %g2)
327
328	! Flush E$ again to victimize references to drmach_bus_sync_list.
329	ldx     [%g6 + 24], %g1         ! *ecache_flushaddr
330	ld	[%g6 + 32], %g2		! ecache_size
331	ld	[%g6 + 36], %g3		! ecache_linesize
332	ECACHE_FLUSHALL(%g2, %g3, %g1, %g4)
333
334drmach_shutdown_ecache_flushed:
335
336	ld	[%g6 + 8], %g1		! flush dcache
337	ld	[%g6 + 12], %g2
338	CH_DCACHE_FLUSHALL(%g1, %g2, %g3)
339
340	ld	[%g6 + 16], %g1		! flush icache
341	ld	[%g6 + 20], %g2
342	CH_ICACHE_FLUSHALL(%g1, %g2, %g3, %g4)
343
344	PCACHE_FLUSHALL(%g1, %g2, %g3) ! flush pcache (no parameters)
345
346	!
347	! Flush all unlocked dtlb and itlb entries.
348	! Replaces TLB_FLUSH_UNLOCKED macro used in Starfire DR.
349	!
350	sethi	%hi(FLUSH_ADDR), %g1
351	set	DEMAP_ALL_TYPE, %g2
352	stxa	%g0, [%g2]ASI_DTLB_DEMAP
353	stxa	%g0, [%g2]ASI_ITLB_DEMAP
354	flush	%g1
355
356	!
357	! Zero LPA by clearing CBASE and CBND. Following
358	! this, all transactions to cachable address space
359	! will be of the remote flavor.
360	!
361	SET_NULL_LPA(%g1, %g2)
362
363	HERE(%g6, 136, %g1)		! preparation complete (for debug)
364
365	!
366	! Clear byte to signal finished.
367	! NOTE: This store will allocate in the E$. It is
368	! vitally important that this line is demoted to
369	! state I before removing this processor from the
370	! coherency.  The demotion is ensured by a synchronous
371	! "steal back" that takes place in drmach_cpu_poweroff.
372	ldx	[%g6 + 40], %g1
373	stba	%g0, [%g1]ASI_MEM
3745:
375	HERE(%g6, 144, %g1)		! spin indicator (for debug)
376	ba	5b
377	  nop
378
379	.asciz	"drmach_shutdown_asm"		! for debug
380	.align	4
381	.global	drmach_shutdown_asm_end
382drmach_shutdown_asm_end:
383	SET_SIZE(drmach_shutdown_asm)
384
385
386	! lddsafconfig
387	!
388	! input:
389	!	nothing
390	!
391	! output:
392	!	%o0	content of this processor's SCR
393	!
394	!	Returns current value of this processor's Safari
395	!	Configuration Register.
396	!
397	ENTRY(lddsafconfig)
398        retl
399        ldxa    [%g0]ASI_SAFARI_CONFIG, %o0
400        SET_SIZE(lddsafconfig)
401
402	! drmach_rename
403	!
404	! input:
405	!	%o0	pointer to register address/value compound list
406	!	%o1	address for setting error code if rename did not
407	!		complete.  Unmodified if no error.
408	!	%o2	address for returning opaque memory controller id
409	!		in case of error.  Unmodified if no error.
410	!	Global	drmach_xt_mb[cpuid] is expected to be the new LPA.
411	!
412	! output:
413	!	[%o1] =	1 if failed to idle memory controller, otherwise unmodified.
414	!	[%o2] = id of failed memory controller, otherwise unmodified.
415	!
416	! Perform HW register reprogramming. This is the "rename" step for
417	! the copy-rename process.  drmach_rename is copied to a cpu's sram
418	! followed by register address/value pairs -- the text and data are
419	! sourced from the sram while drmach_rename is executed.
420	!
421	! The parameter is assumed to point to a concatenation of six
422	! zero-terminated lists located in non-cachable storage. The assumed
423	! format (and purpose) of each list is as follows:
424	!
425	!	1) a copy of drmach_bus_sync_list. A list of PA for each
426	!	   active memory bank in the domain. Used to infer the
427	!	   the completion of all pending coherent transactions
428	!	   initiated by this processor. Assumes MC work queue
429	!	   does not implement read bypass. This is true of Cheetah,
430	!	   Cheetah+, and Jaguar processors.  Panther does support
431	!	   read bypass, so for Panther MCs with read-bypass-write
432	!	   enabled, the read is issued but it does not guarantee
433	!	   completion of outstanding writes in the MC queue.
434	!	2) address/id pair for the local Panther EMU Activity Status
435	!	   Register of this processor.  The register address is assumed
436	!	   to be a VA which is polled via ASI_SAFARI_CONFIG until the
437	!	   MC queues are empty.  The id is an opaque identifier which
438	!	   must be returned along with an error code if the MCU status
439	!	   does not go idle.  See the parameter description above.
440	!	   This section will be empty if this processor is not a Panther.
441	!	   Both the address and id are assumed to be 64 bit values.
442	!	3) address/id pairs for non-local Panther EMU Activity Status
443	!	   Registers on other source and target processors.  The register
444	!	   address is assumed to be a PIO address which is polled via
445	!	   ASI_IO to drain/idle the MCs on other Panther procs.  The
446	!	   id is an opaque identifier which must be returned along with
447	!	   an error code if a MC fails to go idle.  This section will
448	!	   empty if there are no non-local Panther processors on the
449	!	   source and target expanders.  Both the address and id are
450	!	   assumed to be 64 bit values.
451	!	4) address/value pairs for the Memory Address Decoder
452	!	   register of this processor. The register address is
453	!	   assumed to be a VA within ASM_MC_DECODE space. The
454	!	   address and value elements are assumed to 64 bit values.
455	!	5) address/value pairs for any 64 bit register accessible
456	!	   via ASI_IO. The address and value fields are assumed to
457	!	   be 64 bit values.
458	!	   This list is typically used for reprogramming the Memory
459	!	   Address Decoder Register of other cpus and for reprogram-
460	!	   ming the Safari Configuration Register of I/O controllers.
461	!	6) address/value pairs for any 32 bit register accessible
462	!	   via ASI_IO. The address element is assumed to be a 64 bit
463	!	   value. The value element is assumed to be a 64 bit word
464	!	   containing a 32 bit value in the lower half.
465	!	   This list typically contains address/value pairs for
466	!	   AXQ CASM tables.
467	!
468	ENTRY_NP(drmach_rename)
469
470	mov	%o1, %o4		! save error code address
471	mov	%o2, %o5		! save error id address
472
473	BUS_SYNC(%o0, %o1)		! run section 1
474
475	SET_NULL_LPA(%o1, %o2)		! prep for cachable transactions
476					! after rename completes.
477					! e.g.: the load_mb that occurs below
4783:
479	ldx	[%o0], %o1		! run section 2
480	brz,a,pn %o1, 4f
481	add	%o0, 8, %o0		! skip section 2 terminator
482	CHECK_MCU_IDLE(%o1, ASI_SAFARI_CONFIG, %o2, %o3)
483	cmp	%o1, 0			! idled?
484	be,a	3b			! ok, advance
485	  add	%o0, 16, %o0
486	mov	1, %o1			! not idle, bailout
487	stw	%o1, [%o4]		! set MC idle error code
488	ldx	[%o0 + 8], %o1
489	stx	%o1, [%o5]		! set MC idle error id
490	retl
491	  nop
4924:
493	ldx	[%o0], %o1		! run section 3
494	brz,a,pn %o1, 5f
495	add	%o0, 8, %o0		! skip section 3 terminator
496	CHECK_MCU_IDLE(%o1, ASI_IO, %o2, %o3)
497	cmp	%o1, 0			! idled?
498	be,a	4b			! ok, advance
499	  add	%o0, 16, %o0
500	mov	1, %o1			! not idle, bailout
501	stw	%o1, [%o4]		! set MC idle error code
502	ldx	[%o0 + 8], %o1
503	stx	%o1, [%o5]		! set MC idle error id
504	retl
505	  nop
5065:
507	ldx	[%o0], %o1		! run section 4
508	brz,a,pn %o1, 6f
509	add	%o0, 8, %o0		! skip section 4 terminator
510	ldx	[%o0 + 8], %o2
511	stxa	%o2, [%o1]ASI_MC_DECODE
512	membar	#Sync
513	ldxa	[%o1]ASI_MC_DECODE, %g0	! read back to insure written
514	b	5b
515	add	%o0, 16, %o0
5166:
517	ldx	[%o0], %o1		! run section 5
518	brz,a,pn %o1, 7f
519	add	%o0, 8, %o0		! skip section 5 terminator
520	ldx	[%o0 + 8], %o2
521	stxa	%o2, [%o1]ASI_IO
522	ldxa	[%o1]ASI_IO, %g0	! read back to insure written
523	b	6b
524	add	%o0, 16, %o0
5257:
526	ldx	[%o0], %o1		! run section 6
527	brz,a,pn %o1, 8f
528	nop
529	ldx	[%o0 + 8], %o2
530	stwa	%o2, [%o1]ASI_IO
531	lduwa	[%o1]ASI_IO, %g0	! read back to insure written
532	b	7b
533	add	%o0, 16, %o0
5348:
535	CPU_INDEX(%o0, %o1)
536	LOAD_MB(%o0, %o1, %o2)
537	SET_LPA(%o1, %o0, %o2)
538
539	retl
540	nop
541
542	.asciz	"drmach_rename"		! for debug
543	.align	4
544	SET_SIZE(drmach_rename)
545
546	.global drmach_rename_end
547drmach_rename_end:
548
549
550	! drmach_rename_wait
551	!
552	! input:
553	!	nothing
554	!
555	! output:
556	!	nothing
557	!
558	! drmach_rename_wait is a cross-trap function used to move a
559	! cpu's execution out of coherent space while a copy-rename
560	! operation is in progress.
561	!
562	! In each CPU SRAM exists an area (16KB on Cheetah+ boards,
563	! 32KB on Jaguar/Panther boards) reserved for DR. This area is
564	! logically divided by DR into 8KB pages, one page per CPU (or
565	! core) in a port pair. (Two Safari ports share HW resources on
566	! a CPU/MEM board. These are referred to as a port pair.)
567	!
568	! This routine begins by mapping the appropriate SRAM page,
569	! transferring the machine code (between the labels
570	! drmach_rename_wait_asm and drmach_rename_wait_asm_end), then
571	! jumping to SRAM.  After returning from SRAM, the page is
572	! demapped before the cross-call is exited (sic).
573	!
574	! The machine code flushes all caches, waits for a special
575	! interrupt vector, then updates the processor's LPA and
576	! resynchronizes caches with the new home memory.
577	!
578	! The special interrupt vector is assumed to be a cross-call to
579	! drmach_rename_done sent by the master processor upon completing
580	! the copy-rename operation. The interrupt is received and discarded;
581	! The cross-call to drmach_rename_done is never executed.  Instead
582	! the Interrupt Receive Status Register is employed, temporarily,
583	! as a semaphore. This avoids unwanted bus traffic during the critical
584	! rename operation.
585	!
586	ENTRY_NP(drmach_rename_wait)
587
588	CPU_INDEX(%g5, %g1)		! put cpuid in %g5
589
590	!
591	! sfmmu_dtlb_ld(drmach_cpu_sram_va,
592	!	KCONTEXT, drmach_cpu_sram_tte[cpuid]);
593	! sfmmu_itlb_ld(drmach_cpu_sram_va,
594	!	KCONTEXT, drmach_cpu_sram_tte[cpuid]);
595	!
596	set	drmach_cpu_sram_tte, %g1
597	sllx	%g5, 3, %g2
598	ldx	[%g1 + %g2], %g3
599	set	drmach_cpu_sram_va, %g1
600	ldx	[%g1], %g1
601	or	%g1, KCONTEXT, %g2	! preserve %g1
602	set	MMU_TAG_ACCESS, %g4
603	set	cpu_impl_dual_pgsz, %g6
604	ld      [%g6], %g6
605	brz	%g6, 1f
606	  nop
607
608	sethi	%hi(ksfmmup), %g6
609	ldx	[%g6 + %lo(ksfmmup)], %g6
610	ldub    [%g6 + SFMMU_CEXT], %g6
611        sll     %g6, TAGACCEXT_SHIFT, %g6
612
613	set	MMU_TAG_ACCESS_EXT, %g7
614	stxa	%g6, [%g7]ASI_DMMU
6151:
616	stxa	%g2, [%g4]ASI_DMMU
617	stxa    %g3, [%g0]ASI_DTLB_IN
618	membar	#Sync
619	sethi	%hi(FLUSH_ADDR), %g6
620	stxa	%g2, [%g4]ASI_IMMU
621	stxa    %g3, [%g0]ASI_ITLB_IN
622	flush	%g6
623
624	!
625	! copy drmach_rename_wait_asm block to SRAM. Preserve entry
626	! point in %g1. After the code has been copied, align %g6
627	! (the destination pointer) to the next highest 16 byte
628	! boundary. This will define the start of the data area.
629	!
630	mov	%g1, %g6
631	set	drmach_rename_wait_asm, %g2
632	set	drmach_rename_wait_asm_end, %g3
6330:
634	lduw	[%g2], %g4		! do copy
635	stw	%g4, [%g6]
636	add	%g2, 4, %g2
637	cmp	%g2, %g3
638	bne	0b
639	add	%g6, 4, %g6
640
641	add	%g6, 15, %g6		! locate data area on next 16 byte
642	andn	%g6, 15, %g6		! boundary following text
643					! WARNING: no bounds checking
644
645	jmpl	%g1, %g7		! jump to code in cpu sram
646	nop
647
648	set	drmach_cpu_sram_va, %g1	! vtab_flushpage_tl1(drmach_cpu_sram_va,
649	ldx	[%g1], %g1		! 	KCONTEXT);
650	set	KCONTEXT, %g2
651	set	MMU_PCONTEXT, %g4
652	or	%g1, DEMAP_PRIMARY | DEMAP_PAGE_TYPE, %g1
653	ldxa	[%g4]ASI_DMMU, %g5	/* rd old ctxnum */
654	stxa	%g2, [%g4]ASI_DMMU	/* wr new ctxum */
655	stxa	%g0, [%g1]ASI_DTLB_DEMAP
656	stxa	%g0, [%g1]ASI_ITLB_DEMAP
657	stxa	%g5, [%g4]ASI_DMMU	/* restore old ctxnum */
658
659	retry
660
661drmach_rename_wait_asm:
662	! the following code is copied to a cpu's sram and executed
663	! from there.
664	! Input:
665	!	%g5 is cpuid
666	!	%g6 is data area (follows text)
667	!	%g7 is link address back to caller
668	!
669	st	%g5, [%g6 + 4]		! save cpuid (for debug)
670
671	set	dcache_size, %g1
672	ld	[%g1], %g1
673	st	%g1, [%g6 + 8]		! save dcache_size
674	set	dcache_linesize, %g1
675	ld	[%g1], %g1
676	st	%g1, [%g6 + 12]		! save dcache_linesize
677
678	GET_ICACHE_PARAMS(%g1, %g2)
679	st	%g1, [%g6 + 16]		! save icache_size
680	st	%g2, [%g6 + 20]		! save icache_linesize
681
682	set	drmach_iocage_paddr, %g1
683	ldx	[%g1], %g1
684	stx	%g1, [%g6 + 24]		! save *ecache_flushadr
685
686	mulx	%g5, CPU_NODE_SIZE, %g1	! %g4 = &cpunodes[cpuid]
687	set	cpunodes, %g4
688	add	%g4, %g1, %g4
689	ld	[%g4 + ECACHE_SIZE], %g1
690	st	%g1, [%g6 + 32]		! save ecache_size
691	ld	[%g4 + ECACHE_LINESIZE], %g1
692	st	%g1, [%g6 + 36]		! save ecache_linesize
693
694	LOAD_MB(%g5, %g1, %g2)		! save mailbox data
695	stb	%g1, [%g6 + 40]
696
697	membar	#Sync			! Complete any pending processing.
698
699	! Flush E$. The purpose of this flush is to rid the E$ of
700	! lines in states O or Os. Implicitly flushes W$.
701	! NOTE: Reading the bus sync list and r/w ops on drmach_xt_ready
702	! will disturb the E$. The lines of the bus sync list will be
703	! in state S. The line containing drmach_xt_ready will be in
704	! state O. Before proceeding with the copy-rename, the master
705	! processor will "steal back" the drmach_xt_ready (sic) line.
706	! This will demote the state of the line in E$ to I.
707	! However, the lines containing the bus sync list must be
708	! victimized before returning to the OS. This is vital because
709	! following copy-rename the corresponding lines in the new home
710	! memory will be in state gM. The resulting S,gM state pair is
711	! invalid and does represent a loss of coherency. Flushing the
712	! E$ after the bus sync list is read will be sufficient to
713	! avoid the invalid condition.
714	!
715	! For Panther, there is redundancy as both cores flush the shared
716	! L2 and L3 caches.  As an optimization, only one core could do the
717	! flush of the shared caches, however care must be taken that the
718	! sibling core does not install owned lines once the flush begins.
719	PN_L2_FLUSHALL(%g1, %g2, %g3)
720	ldx	[%g6 + 24], %g1		! *ecache_flushaddr
721	ld	[%g6 + 32], %g2		! ecache_size
722	ld	[%g6 + 36], %g3		! ecache_linesize
723	ECACHE_FLUSHALL(%g2, %g3, %g1, %g4)
724
725	! Make sure all outstanding transactions for this processor
726	! have retired. See E$ note above.
727	set	drmach_bus_sync_list, %g1
728	BUS_SYNC(%g1, %g2)
729
730	HERE(%g6, 128, %g4)		! preparation complete (for debug)
731
732	! Signal this processor is ready for rename operation to begin.
733	! See E$ note above.
734	ATOMIC_ADD_LONG(drmach_xt_ready, 1, %g2, %g3, %g4)
735
736	! Loop on IRSR waiting for interrupt. The expected interrupt
737	! is a cross-trap to drmach_wait_done. It is sent by the master
738	! processor when the copy-rename operation is complete. The
739	! received cross-trap is used only as a signal. It is not executed.
7402:
741	HERE(%g6, 136, %g4)		! last poll tick (for debug)
742
743	ldxa	[%g0]ASI_INTR_RECEIVE_STATUS, %g4	! wait for xt
744	btst	IRSR_BUSY, %g4
745	bz	2b
746	nop
747	stx	%g4, [%g6 + 64]		! save status and payload
748	set	IRDR_0, %g2
749	ldxa	[%g2]ASI_INTR_RECEIVE, %g2
750	stx	%g2, [%g6 + 72]
751	set	IRDR_1, %g2
752	ldxa	[%g2]ASI_INTR_RECEIVE, %g2
753	stx	%g2, [%g6 + 80]
754	set	IRDR_2, %g2
755	ldxa	[%g2]ASI_INTR_RECEIVE, %g2
756	stx	%g2, [%g6 + 88]
757
758					! clear rcv status
759	stxa	%g0, [%g0]ASI_INTR_RECEIVE_STATUS
760	membar	#Sync
761
762	HERE(%g6, 144, %g4)		! signal rcvd tick (for debug)
763
764	! Check for copy-rename abort signal. If this signal is received,
765	! the LPA change is skipped since the rename step was not done.
766	! The cache flushes are still done as paranoia.
767	set	drmach_rename_abort, %g1
768	ldx	[%g6 + 72], %g2
769	cmp 	%g1, %g2
770	be	3f
771	nop
772
773	! Resume waiting if this is not drmach_rename_done.
774	set	drmach_rename_done, %g1
775	cmp 	%g1, %g2
776	bne	2b
777	nop
778
779	ldub	[%g6 + 40], %g1		! get saved mailbox data
780	SET_LPA(%g1, %g2, %g3)		! set LPA as indicated by the mb data
781
7823:
783	! Flush all caches (E, D, I and P) to ensure each is resynchronized
784	! with the corresponding states in the new home memory. (W$ is
785	! implicitly flushed when the E$ is flushed.)
786	!
787	! Panther needs to flush the L2 cache before the L3
788	! cache is flushed by the ecache flushall macro.
789	PN_L2_FLUSHALL(%g1, %g2, %g3)
790
791	ldx	[%g6 + 24], %g1		! *ecache_flushaddr
792	ld	[%g6 + 32], %g2		! ecache_size
793	ld	[%g6 + 36], %g3		! ecache_linesize
794	ECACHE_FLUSHALL(%g2, %g3, %g1, %g4)
795
796	ld	[%g6 + 8], %g1		! flush dcache
797	ld	[%g6 + 12], %g2
798	CH_DCACHE_FLUSHALL(%g1, %g2, %g3)
799
800	ld	[%g6 + 16], %g1		! flush icache
801	ld	[%g6 + 20], %g2
802	CH_ICACHE_FLUSHALL(%g1, %g2, %g3, %g4)
803
804	PCACHE_FLUSHALL(%g1, %g2, %g3)	! flush pcache (no parameters)
805
806	HERE(%g6, 152, %g4)		! done tick (for debug)
807
808	jmpl	%g7+8, %g0
809	nop
810
811	.asciz	"drmach_rename_wait"	! for debug
812	.align	4
813drmach_rename_wait_asm_end:
814	SET_SIZE(drmach_rename_wait)
815
816
817	! drmach_rename_done
818	!
819	! input:
820	!	nothing
821	!
822	! output:
823	!	nothing
824	!
825	! Used as signal data. See drmach_rename_wait.
826	!
827	ENTRY_NP(drmach_rename_done)
828	retry
829	SET_SIZE(drmach_rename_done)
830
831	! drmach_rename_abort
832	!
833	! input:
834	!	nothing
835	!
836	! output:
837	!	nothing
838	!
839	! Used as signal data. See drmach_rename_wait.
840	!
841	ENTRY_NP(drmach_rename_abort)
842	retry
843	SET_SIZE(drmach_rename_abort)
844
845
846	! drmach_set_lpa
847	!
848	! input:
849	!	Globals: drmach_xt_mb[cpuid] contains new LPA data
850	!
851	! output:
852	!	nothing
853	!
854	! Sets the executing processor's LPA as indicated by the command
855	! stored in drmach_xt_mb, a byte array indexed by cpuid. Assumes
856	! the caller is preventing illegal LPA settings and transistions.
857	!
858	ENTRY_NP(drmach_set_lpa)
859
860	!
861	! Set %g1 to this processor's cpuid.
862	!
863	CPU_INDEX(%g1, %g2)
864
865	!
866	! Get LPA message from mailbox, leave in %g5.
867	!
868	LOAD_MB(%g1, %g5, %g2)
869
870	!
871	! Set LPA, mailbox data in %g5.
872	!
873	SET_LPA(%g5, %g1, %g2)
874
875	!
876	! Signal work is done.
877	!
878	ATOMIC_ADD_LONG(drmach_xt_ready, 1, %g1, %g2, %g3)
879
880	retry
881	SET_SIZE(drmach_set_lpa)
882
883!
884! drmach_bc_bzero
885!
886! inputs:
887! 	%o0 = base vaddr of area to clear (must be 64-byte aligned)
888!	%o1 = size of area to clear (must be multiple of 256 bytes)
889!
890! outputs:
891!	%o0 =
892!		0 (success)
893!		1 (size too small or not modulo 256)
894!		2 (vaddr not 64-byte aligned)
895!
896! Zero a block of storage using block commit stores.
897! Nonzero return if caller's address or size are not
898! block aligned.
899!
900
901
902	ENTRY(drmach_bc_bzero)
903
904	! verify size is >= 256 bytes
905	cmp	%o1, 256
906	blu,a	.bz_done
907	mov	1, %o0			! error code 1 for invalid size
908
909	! verify size is a multiple of 256
910	btst	(256-1), %o1
911	bnz,a	.bz_done
912	mov	1, %o0			! error code 1 for invalid size
913
914	! verify that vaddr is aligned for block stores
915	btst	(64-1), %o0
916	bnz,a	.bz_done
917	mov	2, %o0			! error code 2 for invalid alignment
918
919	! save fprs for restore when finished
920	rd	%fprs, %g1
921
922	! make sure FPU is enabled
923	rdpr	%pstate, %g3
924	btst	PSTATE_PEF, %g3
925	bnz	.bz_block
926	nop
927	andn	%g3, PSTATE_PEF, %g4
928	wrpr	%g4, PSTATE_PEF, %pstate
929
930.bz_block:
931	membar	#StoreStore|#StoreLoad|#LoadStore
932	wr	%g0, FPRS_FEF, %fprs
933
934	! Clear block
935	fzero	%d0
936	fzero	%d2
937	fzero	%d4
938	fzero	%d6
939	fzero	%d8
940	fzero	%d10
941	fzero	%d12
942	fzero	%d14
943	wr	%g0, ASI_BLK_COMMIT_P, %asi
944	mov	256, %o3
945	ba	.bz_doblock
946	nop
947
948.bz_blkstart:
949      ! stda	%d0, [%o0+192]%asi  ! in dly slot of branch that got us here
950	stda	%d0, [%o0+128]%asi
951	stda	%d0, [%o0+64]%asi
952	stda	%d0, [%o0]%asi
953	add	%o0, %o3, %o0
954	sub	%o1, %o3, %o1
955.bz_doblock:
956	cmp	%o1, 256
957	bgeu,a	%ncc, .bz_blkstart
958	stda	%d0, [%o0+192]%asi
959
960.bz_finish:
961	membar	#StoreLoad|#StoreStore
962	clr	%o0
963	wr	%g1, %fprs		! restore fprs
964	btst	PSTATE_PEF, %g3		! restore pstate if necessary
965	bnz	.bz_done
966	nop
967	wrpr	%g3, %g0, %pstate
968.bz_done:
969	membar	#Sync
970	retl
971	nop
972
973	SET_SIZE(drmach_bc_bzero)
974
975#endif /* lint */
976