xref: /titanic_51/usr/src/common/atomic/sparcv9/atomic.s (revision 0ad0f0b2adb964c7bd56bbf5a831721e1a67beaf)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27	.file	"atomic.s"
28
29#include <sys/asm_linkage.h>
30
31/*
32 * ATOMIC_BO_ENABLE_SHIFT can be selectively defined by processors
33 * to enable exponential backoff. No definition means backoff is
34 * not desired i.e. backoff should be disabled.
35 * By default, the shift value is used to generate a power of 2
36 * value for backoff limit. In the kernel, processors scale this
37 * shift value with the number of online cpus.
38 */
39
40#if defined(_KERNEL)
41	/*
42	 * Legacy kernel interfaces; they will go away (eventually).
43	 */
44	ANSI_PRAGMA_WEAK2(cas8,atomic_cas_8,function)
45	ANSI_PRAGMA_WEAK2(cas32,atomic_cas_32,function)
46	ANSI_PRAGMA_WEAK2(cas64,atomic_cas_64,function)
47	ANSI_PRAGMA_WEAK2(caslong,atomic_cas_ulong,function)
48	ANSI_PRAGMA_WEAK2(casptr,atomic_cas_ptr,function)
49	ANSI_PRAGMA_WEAK2(atomic_and_long,atomic_and_ulong,function)
50	ANSI_PRAGMA_WEAK2(atomic_or_long,atomic_or_ulong,function)
51	ANSI_PRAGMA_WEAK2(swapl,atomic_swap_32,function)
52
53#ifdef ATOMIC_BO_ENABLE_SHIFT
54
55#if !defined(lint)
56	.weak   cpu_atomic_delay
57	.type   cpu_atomic_delay, #function
58#endif  /* lint */
59
60/*
61 * For the kernel, invoke processor specific delay routine to perform
62 * low-impact spin delay. The value of ATOMIC_BO_ENABLE_SHIFT is tuned
63 * with respect to the specific spin delay implementation.
64 */
65#define	DELAY_SPIN(label, tmp1, tmp2)					\
66	/*								; \
67	 * Define a pragma weak reference to a cpu specific		; \
68	 * delay routine for atomic backoff. For CPUs that		; \
69	 * have no such delay routine defined, the delay becomes	; \
70	 * just a simple tight loop.					; \
71	 *								; \
72	 * tmp1 = holds CPU specific delay routine			; \
73	 * tmp2 = holds atomic routine's callee return address		; \
74	 */								; \
75	sethi	%hi(cpu_atomic_delay), tmp1				; \
76	or	tmp1, %lo(cpu_atomic_delay), tmp1			; \
77label/**/0:								; \
78	brz,pn	tmp1, label/**/1					; \
79	mov	%o7, tmp2						; \
80	jmpl	tmp1, %o7	/* call CPU specific delay routine */	; \
81	  nop			/* delay slot : do nothing */		; \
82	mov	tmp2, %o7	/* restore callee's return address */	; \
83label/**/1:
84
85#ifdef	ATOMIC_SIMPLE_BO_ENABLE
86/*
87 * For some processors, simple limit has proved benefical
88 */
89#define ATOMIC_BACKOFF_CPU(val, limit, ncpu, cas_cnt, label)		\
90	set	1 << ATOMIC_BO_ENABLE_SHIFT, limit
91#else
92/*
93 * For the kernel, we take into consideration of cas failures
94 * and also scale the backoff limit w.r.t. the number of cpus.
95 * For cas failures, we reset the backoff value to 1 if the cas
96 * failures exceed or equal to the number of online cpus. This
97 * will enforce some degree of fairness and prevent starvation.
98 * We also scale/normalize the processor provided specific
99 * ATOMIC_BO_ENABLE_SHIFT w.r.t. the number of online cpus to
100 * obtain the actual final limit to use.
101 */
102#define ATOMIC_BACKOFF_CPU(val, limit, ncpu, cas_cnt, label)		\
103	brnz,pt	ncpu, label/**/0					; \
104	  inc	cas_cnt							; \
105	sethi	%hi(ncpus_online), ncpu					; \
106	ld	[ncpu + %lo(ncpus_online)], ncpu			; \
107label/**/0:								; \
108	cmp	cas_cnt, ncpu						; \
109	blu,pt	%xcc, label/**/1					; \
110	  sllx	ncpu, ATOMIC_BO_ENABLE_SHIFT, limit			; \
111	mov	%g0, cas_cnt						; \
112	mov	1, val							; \
113label/**/1:
114#endif	/* ATOMIC_SIMPLE_BO_ENABLE */
115#endif	/* ATOMIC_BO_ENABLE_SHIFT */
116
117#else	/* _KERNEL */
118
119/*
120 * ATOMIC_BO_ENABLE_SHIFT may be enabled/defined here for generic
121 * libc atomics. None for now.
122 */
123#ifdef ATOMIC_BO_ENABLE_SHIFT
124#define	DELAY_SPIN(label, tmp1, tmp2)	\
125label/**/0:
126
127#define ATOMIC_BACKOFF_CPU(val, limit, ncpu, cas_cnt, label)  \
128	set	1 << ATOMIC_BO_ENABLE_SHIFT, limit
129#endif	/* ATOMIC_BO_ENABLE_SHIFT */
130#endif	/* _KERNEL */
131
132#ifdef ATOMIC_BO_ENABLE_SHIFT
133/*
134 * ATOMIC_BACKOFF_INIT macro for initialization.
135 * backoff val is initialized to 1.
136 * ncpu is initialized to 0
137 * The cas_cnt counts the cas instruction failure and is
138 * initialized to 0.
139 */
140#ifdef	ATOMIC_SIMPLE_BO_ENABLE
141#define ATOMIC_BACKOFF_INIT(val, ncpu, cas_cnt)	\
142	mov	1, val
143
144#else /* If not defined ATOMIC_SIMPLE_BO_ENABLE */
145#define ATOMIC_BACKOFF_INIT(val, ncpu, cas_cnt)	\
146	mov	1, val				; \
147	mov	%g0, ncpu			; \
148	mov	%g0, cas_cnt
149
150#endif	/* ATOMIC_SIMPLE_BO_ENABLE */
151
152#define ATOMIC_BACKOFF_BRANCH(cr, backoff, loop) \
153	bne,a,pn cr, backoff
154
155/*
156 * Main ATOMIC_BACKOFF_BACKOFF macro for backoff.
157 */
158#define ATOMIC_BACKOFF_BACKOFF(val, limit, ncpu, cas_cnt, label, retlabel) \
159	ATOMIC_BACKOFF_CPU(val, limit, ncpu, cas_cnt, label/**/_0)	; \
160	cmp	val, limit						; \
161	blu,a,pt %xcc, label/**/_1					; \
162	  mov	val, limit						; \
163label/**/_1:								; \
164	mov	limit, val						; \
165	DELAY_SPIN(label/**/_2, %g2, %g3)				; \
166	deccc	limit							; \
167	bgu,pn	%xcc, label/**/_20 /* branch to middle of DELAY_SPIN */	; \
168	  nop								; \
169	ba	retlabel						; \
170	sllx	val, 1, val
171
172#else	/* ATOMIC_BO_ENABLE_SHIFT */
173#define ATOMIC_BACKOFF_INIT(val, ncpu, cas_cnt)
174
175#define ATOMIC_BACKOFF_BRANCH(cr, backoff, loop) \
176	bne,a,pn cr, loop
177
178#define ATOMIC_BACKOFF_BACKOFF(val, limit, ncpu, cas_cnt, label, retlabel)
179#endif	/* ATOMIC_BO_ENABLE_SHIFT */
180
181	/*
182	 * NOTE: If atomic_inc_8 and atomic_inc_8_nv are ever
183	 * separated, you need to also edit the libc sparcv9 platform
184	 * specific mapfile and remove the NODYNSORT attribute
185	 * from atomic_inc_8_nv.
186	 */
187	ENTRY(atomic_inc_8)
188	ALTENTRY(atomic_inc_8_nv)
189	ALTENTRY(atomic_inc_uchar)
190	ALTENTRY(atomic_inc_uchar_nv)
191	ba	add_8
192	  add	%g0, 1, %o1
193	SET_SIZE(atomic_inc_uchar_nv)
194	SET_SIZE(atomic_inc_uchar)
195	SET_SIZE(atomic_inc_8_nv)
196	SET_SIZE(atomic_inc_8)
197
198	/*
199	 * NOTE: If atomic_dec_8 and atomic_dec_8_nv are ever
200	 * separated, you need to also edit the libc sparcv9 platform
201	 * specific mapfile and remove the NODYNSORT attribute
202	 * from atomic_dec_8_nv.
203	 */
204	ENTRY(atomic_dec_8)
205	ALTENTRY(atomic_dec_8_nv)
206	ALTENTRY(atomic_dec_uchar)
207	ALTENTRY(atomic_dec_uchar_nv)
208	ba	add_8
209	  sub	%g0, 1, %o1
210	SET_SIZE(atomic_dec_uchar_nv)
211	SET_SIZE(atomic_dec_uchar)
212	SET_SIZE(atomic_dec_8_nv)
213	SET_SIZE(atomic_dec_8)
214
215	/*
216	 * NOTE: If atomic_add_8 and atomic_add_8_nv are ever
217	 * separated, you need to also edit the libc sparcv9 platform
218	 * specific mapfile and remove the NODYNSORT attribute
219	 * from atomic_add_8_nv.
220	 */
221	ENTRY(atomic_add_8)
222	ALTENTRY(atomic_add_8_nv)
223	ALTENTRY(atomic_add_char)
224	ALTENTRY(atomic_add_char_nv)
225add_8:
226	and	%o0, 0x3, %o4		! %o4 = byte offset, left-to-right
227	xor	%o4, 0x3, %g1		! %g1 = byte offset, right-to-left
228	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
229	set	0xff, %o3		! %o3 = mask
230	sll	%o3, %g1, %o3		! %o3 = shifted to bit offset
231	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
232	and	%o1, %o3, %o1		! %o1 = single byte value
233	andn	%o0, 0x3, %o0		! %o0 = word address
234	ld	[%o0], %o2		! read old value
2351:
236	add	%o2, %o1, %o5		! add value to the old value
237	and	%o5, %o3, %o5		! clear other bits
238	andn	%o2, %o3, %o4		! clear target bits
239	or	%o4, %o5, %o5		! insert the new value
240	cas	[%o0], %o2, %o5
241	cmp	%o2, %o5
242	bne,a,pn %icc, 1b
243	  mov	%o5, %o2		! %o2 = old value
244	add	%o2, %o1, %o5
245	and	%o5, %o3, %o5
246	retl
247	srl	%o5, %g1, %o0		! %o0 = new value
248	SET_SIZE(atomic_add_char_nv)
249	SET_SIZE(atomic_add_char)
250	SET_SIZE(atomic_add_8_nv)
251	SET_SIZE(atomic_add_8)
252
253	/*
254	 * NOTE: If atomic_inc_16 and atomic_inc_16_nv are ever
255	 * separated, you need to also edit the libc sparcv9 platform
256	 * specific mapfile and remove the NODYNSORT attribute
257	 * from atomic_inc_16_nv.
258	 */
259	ENTRY(atomic_inc_16)
260	ALTENTRY(atomic_inc_16_nv)
261	ALTENTRY(atomic_inc_ushort)
262	ALTENTRY(atomic_inc_ushort_nv)
263	ba	add_16
264	  add	%g0, 1, %o1
265	SET_SIZE(atomic_inc_ushort_nv)
266	SET_SIZE(atomic_inc_ushort)
267	SET_SIZE(atomic_inc_16_nv)
268	SET_SIZE(atomic_inc_16)
269
270	/*
271	 * NOTE: If atomic_dec_16 and atomic_dec_16_nv are ever
272	 * separated, you need to also edit the libc sparcv9 platform
273	 * specific mapfile and remove the NODYNSORT attribute
274	 * from atomic_dec_16_nv.
275	 */
276	ENTRY(atomic_dec_16)
277	ALTENTRY(atomic_dec_16_nv)
278	ALTENTRY(atomic_dec_ushort)
279	ALTENTRY(atomic_dec_ushort_nv)
280	ba	add_16
281	  sub	%g0, 1, %o1
282	SET_SIZE(atomic_dec_ushort_nv)
283	SET_SIZE(atomic_dec_ushort)
284	SET_SIZE(atomic_dec_16_nv)
285	SET_SIZE(atomic_dec_16)
286
287	/*
288	 * NOTE: If atomic_add_16 and atomic_add_16_nv are ever
289	 * separated, you need to also edit the libc sparcv9 platform
290	 * specific mapfile and remove the NODYNSORT attribute
291	 * from atomic_add_16_nv.
292	 */
293	ENTRY(atomic_add_16)
294	ALTENTRY(atomic_add_16_nv)
295	ALTENTRY(atomic_add_short)
296	ALTENTRY(atomic_add_short_nv)
297add_16:
298	and	%o0, 0x2, %o4		! %o4 = byte offset, left-to-right
299	xor	%o4, 0x2, %g1		! %g1 = byte offset, right-to-left
300	sll	%o4, 3, %o4		! %o4 = bit offset, left-to-right
301	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
302	sethi	%hi(0xffff0000), %o3	! %o3 = mask
303	srl	%o3, %o4, %o3		! %o3 = shifted to bit offset
304	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
305	and	%o1, %o3, %o1		! %o1 = single short value
306	andn	%o0, 0x2, %o0		! %o0 = word address
307	! if low-order bit is 1, we will properly get an alignment fault here
308	ld	[%o0], %o2		! read old value
3091:
310	add	%o1, %o2, %o5		! add value to the old value
311	and	%o5, %o3, %o5		! clear other bits
312	andn	%o2, %o3, %o4		! clear target bits
313	or	%o4, %o5, %o5		! insert the new value
314	cas	[%o0], %o2, %o5
315	cmp	%o2, %o5
316	bne,a,pn %icc, 1b
317	  mov	%o5, %o2		! %o2 = old value
318	add	%o1, %o2, %o5
319	and	%o5, %o3, %o5
320	retl
321	srl	%o5, %g1, %o0		! %o0 = new value
322	SET_SIZE(atomic_add_short_nv)
323	SET_SIZE(atomic_add_short)
324	SET_SIZE(atomic_add_16_nv)
325	SET_SIZE(atomic_add_16)
326
327	/*
328	 * NOTE: If atomic_inc_32 and atomic_inc_32_nv are ever
329	 * separated, you need to also edit the libc sparcv9 platform
330	 * specific mapfile and remove the NODYNSORT attribute
331	 * from atomic_inc_32_nv.
332	 */
333	ENTRY(atomic_inc_32)
334	ALTENTRY(atomic_inc_32_nv)
335	ALTENTRY(atomic_inc_uint)
336	ALTENTRY(atomic_inc_uint_nv)
337	ba	add_32
338	  add	%g0, 1, %o1
339	SET_SIZE(atomic_inc_uint_nv)
340	SET_SIZE(atomic_inc_uint)
341	SET_SIZE(atomic_inc_32_nv)
342	SET_SIZE(atomic_inc_32)
343
344	/*
345	 * NOTE: If atomic_dec_32 and atomic_dec_32_nv are ever
346	 * separated, you need to also edit the libc sparcv9 platform
347	 * specific mapfile and remove the NODYNSORT attribute
348	 * from atomic_dec_32_nv.
349	 */
350	ENTRY(atomic_dec_32)
351	ALTENTRY(atomic_dec_32_nv)
352	ALTENTRY(atomic_dec_uint)
353	ALTENTRY(atomic_dec_uint_nv)
354	ba	add_32
355	  sub	%g0, 1, %o1
356	SET_SIZE(atomic_dec_uint_nv)
357	SET_SIZE(atomic_dec_uint)
358	SET_SIZE(atomic_dec_32_nv)
359	SET_SIZE(atomic_dec_32)
360
361	/*
362	 * NOTE: If atomic_add_32 and atomic_add_32_nv are ever
363	 * separated, you need to also edit the libc sparcv9 platform
364	 * specific mapfile and remove the NODYNSORT attribute
365	 * from atomic_add_32_nv.
366	 */
367	ENTRY(atomic_add_32)
368	ALTENTRY(atomic_add_32_nv)
369	ALTENTRY(atomic_add_int)
370	ALTENTRY(atomic_add_int_nv)
371add_32:
372	ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
3730:
374	ld	[%o0], %o2
3751:
376	add	%o2, %o1, %o3
377	cas	[%o0], %o2, %o3
378	cmp	%o2, %o3
379	ATOMIC_BACKOFF_BRANCH(%icc, 2f, 1b)
380	  mov	%o3, %o2
381	retl
382	add	%o2, %o1, %o0		! return new value
3832:
384	ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, add32, 0b)
385	SET_SIZE(atomic_add_int_nv)
386	SET_SIZE(atomic_add_int)
387	SET_SIZE(atomic_add_32_nv)
388	SET_SIZE(atomic_add_32)
389
390	/*
391	 * NOTE: If atomic_inc_64 and atomic_inc_64_nv are ever
392	 * separated, you need to also edit the libc sparcv9 platform
393	 * specific mapfile and remove the NODYNSORT attribute
394	 * from atomic_inc_64_nv.
395	 */
396	ENTRY(atomic_inc_64)
397	ALTENTRY(atomic_inc_64_nv)
398	ALTENTRY(atomic_inc_ulong)
399	ALTENTRY(atomic_inc_ulong_nv)
400	ba	add_64
401	  add	%g0, 1, %o1
402	SET_SIZE(atomic_inc_ulong_nv)
403	SET_SIZE(atomic_inc_ulong)
404	SET_SIZE(atomic_inc_64_nv)
405	SET_SIZE(atomic_inc_64)
406
407	/*
408	 * NOTE: If atomic_dec_64 and atomic_dec_64_nv are ever
409	 * separated, you need to also edit the libc sparcv9 platform
410	 * specific mapfile and remove the NODYNSORT attribute
411	 * from atomic_dec_64_nv.
412	 */
413	ENTRY(atomic_dec_64)
414	ALTENTRY(atomic_dec_64_nv)
415	ALTENTRY(atomic_dec_ulong)
416	ALTENTRY(atomic_dec_ulong_nv)
417	ba	add_64
418	  sub	%g0, 1, %o1
419	SET_SIZE(atomic_dec_ulong_nv)
420	SET_SIZE(atomic_dec_ulong)
421	SET_SIZE(atomic_dec_64_nv)
422	SET_SIZE(atomic_dec_64)
423
424	/*
425	 * NOTE: If atomic_add_64 and atomic_add_64_nv are ever
426	 * separated, you need to also edit the libc sparcv9 platform
427	 * specific mapfile and remove the NODYNSORT attribute
428	 * from atomic_add_64_nv.
429	 */
430	ENTRY(atomic_add_64)
431	ALTENTRY(atomic_add_64_nv)
432	ALTENTRY(atomic_add_ptr)
433	ALTENTRY(atomic_add_ptr_nv)
434	ALTENTRY(atomic_add_long)
435	ALTENTRY(atomic_add_long_nv)
436add_64:
437	ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
4380:
439	ldx	[%o0], %o2
4401:
441	add	%o2, %o1, %o3
442	casx	[%o0], %o2, %o3
443	cmp	%o2, %o3
444	ATOMIC_BACKOFF_BRANCH(%xcc, 2f, 1b)
445	  mov	%o3, %o2
446	retl
447	add	%o2, %o1, %o0		! return new value
4482:
449	ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, add64, 0b)
450	SET_SIZE(atomic_add_long_nv)
451	SET_SIZE(atomic_add_long)
452	SET_SIZE(atomic_add_ptr_nv)
453	SET_SIZE(atomic_add_ptr)
454	SET_SIZE(atomic_add_64_nv)
455	SET_SIZE(atomic_add_64)
456
457	/*
458	 * NOTE: If atomic_or_8 and atomic_or_8_nv are ever
459	 * separated, you need to also edit the libc sparcv9 platform
460	 * specific mapfile and remove the NODYNSORT attribute
461	 * from atomic_or_8_nv.
462	 */
463	ENTRY(atomic_or_8)
464	ALTENTRY(atomic_or_8_nv)
465	ALTENTRY(atomic_or_uchar)
466	ALTENTRY(atomic_or_uchar_nv)
467	and	%o0, 0x3, %o4		! %o4 = byte offset, left-to-right
468	xor	%o4, 0x3, %g1		! %g1 = byte offset, right-to-left
469	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
470	set	0xff, %o3		! %o3 = mask
471	sll	%o3, %g1, %o3		! %o3 = shifted to bit offset
472	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
473	and	%o1, %o3, %o1		! %o1 = single byte value
474	andn	%o0, 0x3, %o0		! %o0 = word address
475	ld	[%o0], %o2		! read old value
4761:
477	or	%o2, %o1, %o5		! or in the new value
478	cas	[%o0], %o2, %o5
479	cmp	%o2, %o5
480	bne,a,pn %icc, 1b
481	  mov	%o5, %o2		! %o2 = old value
482	or	%o2, %o1, %o5
483	and	%o5, %o3, %o5
484	retl
485	srl	%o5, %g1, %o0		! %o0 = new value
486	SET_SIZE(atomic_or_uchar_nv)
487	SET_SIZE(atomic_or_uchar)
488	SET_SIZE(atomic_or_8_nv)
489	SET_SIZE(atomic_or_8)
490
491	/*
492	 * NOTE: If atomic_or_16 and atomic_or_16_nv are ever
493	 * separated, you need to also edit the libc sparcv9 platform
494	 * specific mapfile and remove the NODYNSORT attribute
495	 * from atomic_or_16_nv.
496	 */
497	ENTRY(atomic_or_16)
498	ALTENTRY(atomic_or_16_nv)
499	ALTENTRY(atomic_or_ushort)
500	ALTENTRY(atomic_or_ushort_nv)
501	and	%o0, 0x2, %o4		! %o4 = byte offset, left-to-right
502	xor	%o4, 0x2, %g1		! %g1 = byte offset, right-to-left
503	sll	%o4, 3, %o4		! %o4 = bit offset, left-to-right
504	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
505	sethi	%hi(0xffff0000), %o3	! %o3 = mask
506	srl	%o3, %o4, %o3		! %o3 = shifted to bit offset
507	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
508	and	%o1, %o3, %o1		! %o1 = single short value
509	andn	%o0, 0x2, %o0		! %o0 = word address
510	! if low-order bit is 1, we will properly get an alignment fault here
511	ld	[%o0], %o2		! read old value
5121:
513	or	%o2, %o1, %o5		! or in the new value
514	cas	[%o0], %o2, %o5
515	cmp	%o2, %o5
516	bne,a,pn %icc, 1b
517	  mov	%o5, %o2		! %o2 = old value
518	or	%o2, %o1, %o5		! or in the new value
519	and	%o5, %o3, %o5
520	retl
521	srl	%o5, %g1, %o0		! %o0 = new value
522	SET_SIZE(atomic_or_ushort_nv)
523	SET_SIZE(atomic_or_ushort)
524	SET_SIZE(atomic_or_16_nv)
525	SET_SIZE(atomic_or_16)
526
527	/*
528	 * NOTE: If atomic_or_32 and atomic_or_32_nv are ever
529	 * separated, you need to also edit the libc sparcv9 platform
530	 * specific mapfile and remove the NODYNSORT attribute
531	 * from atomic_or_32_nv.
532	 */
533	ENTRY(atomic_or_32)
534	ALTENTRY(atomic_or_32_nv)
535	ALTENTRY(atomic_or_uint)
536	ALTENTRY(atomic_or_uint_nv)
537	ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
5380:
539	ld	[%o0], %o2
5401:
541	or	%o2, %o1, %o3
542	cas	[%o0], %o2, %o3
543	cmp	%o2, %o3
544	ATOMIC_BACKOFF_BRANCH(%icc, 2f, 1b)
545	  mov	%o3, %o2
546	retl
547	or	%o2, %o1, %o0		! return new value
5482:
549	ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, or32, 0b)
550	SET_SIZE(atomic_or_uint_nv)
551	SET_SIZE(atomic_or_uint)
552	SET_SIZE(atomic_or_32_nv)
553	SET_SIZE(atomic_or_32)
554
555	/*
556	 * NOTE: If atomic_or_64 and atomic_or_64_nv are ever
557	 * separated, you need to also edit the libc sparcv9 platform
558	 * specific mapfile and remove the NODYNSORT attribute
559	 * from atomic_or_64_nv.
560	 */
561	ENTRY(atomic_or_64)
562	ALTENTRY(atomic_or_64_nv)
563	ALTENTRY(atomic_or_ulong)
564	ALTENTRY(atomic_or_ulong_nv)
565	ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
5660:
567	ldx	[%o0], %o2
5681:
569	or	%o2, %o1, %o3
570	casx	[%o0], %o2, %o3
571	cmp	%o2, %o3
572	ATOMIC_BACKOFF_BRANCH(%xcc, 2f, 1b)
573	  mov	%o3, %o2
574	retl
575	or	%o2, %o1, %o0		! return new value
5762:
577	ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, or64, 0b)
578	SET_SIZE(atomic_or_ulong_nv)
579	SET_SIZE(atomic_or_ulong)
580	SET_SIZE(atomic_or_64_nv)
581	SET_SIZE(atomic_or_64)
582
583	/*
584	 * NOTE: If atomic_and_8 and atomic_and_8_nv are ever
585	 * separated, you need to also edit the libc sparcv9 platform
586	 * specific mapfile and remove the NODYNSORT attribute
587	 * from atomic_and_8_nv.
588	 */
589	ENTRY(atomic_and_8)
590	ALTENTRY(atomic_and_8_nv)
591	ALTENTRY(atomic_and_uchar)
592	ALTENTRY(atomic_and_uchar_nv)
593	and	%o0, 0x3, %o4		! %o4 = byte offset, left-to-right
594	xor	%o4, 0x3, %g1		! %g1 = byte offset, right-to-left
595	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
596	set	0xff, %o3		! %o3 = mask
597	sll	%o3, %g1, %o3		! %o3 = shifted to bit offset
598	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
599	orn	%o1, %o3, %o1		! all ones in other bytes
600	andn	%o0, 0x3, %o0		! %o0 = word address
601	ld	[%o0], %o2		! read old value
6021:
603	and	%o2, %o1, %o5		! and in the new value
604	cas	[%o0], %o2, %o5
605	cmp	%o2, %o5
606	bne,a,pn %icc, 1b
607	  mov	%o5, %o2		! %o2 = old value
608	and	%o2, %o1, %o5
609	and	%o5, %o3, %o5
610	retl
611	srl	%o5, %g1, %o0		! %o0 = new value
612	SET_SIZE(atomic_and_uchar_nv)
613	SET_SIZE(atomic_and_uchar)
614	SET_SIZE(atomic_and_8_nv)
615	SET_SIZE(atomic_and_8)
616
617	/*
618	 * NOTE: If atomic_and_16 and atomic_and_16_nv are ever
619	 * separated, you need to also edit the libc sparcv9 platform
620	 * specific mapfile and remove the NODYNSORT attribute
621	 * from atomic_and_16_nv.
622	 */
623	ENTRY(atomic_and_16)
624	ALTENTRY(atomic_and_16_nv)
625	ALTENTRY(atomic_and_ushort)
626	ALTENTRY(atomic_and_ushort_nv)
627	and	%o0, 0x2, %o4		! %o4 = byte offset, left-to-right
628	xor	%o4, 0x2, %g1		! %g1 = byte offset, right-to-left
629	sll	%o4, 3, %o4		! %o4 = bit offset, left-to-right
630	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
631	sethi	%hi(0xffff0000), %o3	! %o3 = mask
632	srl	%o3, %o4, %o3		! %o3 = shifted to bit offset
633	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
634	orn	%o1, %o3, %o1		! all ones in the other half
635	andn	%o0, 0x2, %o0		! %o0 = word address
636	! if low-order bit is 1, we will properly get an alignment fault here
637	ld	[%o0], %o2		! read old value
6381:
639	and	%o2, %o1, %o5		! and in the new value
640	cas	[%o0], %o2, %o5
641	cmp	%o2, %o5
642	bne,a,pn %icc, 1b
643	  mov	%o5, %o2		! %o2 = old value
644	and	%o2, %o1, %o5
645	and	%o5, %o3, %o5
646	retl
647	srl	%o5, %g1, %o0		! %o0 = new value
648	SET_SIZE(atomic_and_ushort_nv)
649	SET_SIZE(atomic_and_ushort)
650	SET_SIZE(atomic_and_16_nv)
651	SET_SIZE(atomic_and_16)
652
653	/*
654	 * NOTE: If atomic_and_32 and atomic_and_32_nv are ever
655	 * separated, you need to also edit the libc sparcv9 platform
656	 * specific mapfile and remove the NODYNSORT attribute
657	 * from atomic_and_32_nv.
658	 */
659	ENTRY(atomic_and_32)
660	ALTENTRY(atomic_and_32_nv)
661	ALTENTRY(atomic_and_uint)
662	ALTENTRY(atomic_and_uint_nv)
663	ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
6640:
665	ld	[%o0], %o2
6661:
667	and	%o2, %o1, %o3
668	cas	[%o0], %o2, %o3
669	cmp	%o2, %o3
670	ATOMIC_BACKOFF_BRANCH(%icc, 2f, 1b)
671	  mov	%o3, %o2
672	retl
673	and	%o2, %o1, %o0		! return new value
6742:
675	ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, and32, 0b)
676	SET_SIZE(atomic_and_uint_nv)
677	SET_SIZE(atomic_and_uint)
678	SET_SIZE(atomic_and_32_nv)
679	SET_SIZE(atomic_and_32)
680
681	/*
682	 * NOTE: If atomic_and_64 and atomic_and_64_nv are ever
683	 * separated, you need to also edit the libc sparcv9 platform
684	 * specific mapfile and remove the NODYNSORT attribute
685	 * from atomic_and_64_nv.
686	 */
687	ENTRY(atomic_and_64)
688	ALTENTRY(atomic_and_64_nv)
689	ALTENTRY(atomic_and_ulong)
690	ALTENTRY(atomic_and_ulong_nv)
691	ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
6920:
693	ldx	[%o0], %o2
6941:
695	and	%o2, %o1, %o3
696	casx	[%o0], %o2, %o3
697	cmp	%o2, %o3
698	ATOMIC_BACKOFF_BRANCH(%xcc, 2f, 1b)
699	  mov	%o3, %o2
700	retl
701	and	%o2, %o1, %o0		! return new value
7022:
703	ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, and64, 0b)
704	SET_SIZE(atomic_and_ulong_nv)
705	SET_SIZE(atomic_and_ulong)
706	SET_SIZE(atomic_and_64_nv)
707	SET_SIZE(atomic_and_64)
708
709	ENTRY(atomic_cas_8)
710	ALTENTRY(atomic_cas_uchar)
711	and	%o0, 0x3, %o4		! %o4 = byte offset, left-to-right
712	xor	%o4, 0x3, %g1		! %g1 = byte offset, right-to-left
713	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
714	set	0xff, %o3		! %o3 = mask
715	sll	%o3, %g1, %o3		! %o3 = shifted to bit offset
716	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
717	and	%o1, %o3, %o1		! %o1 = single byte value
718	sll	%o2, %g1, %o2		! %o2 = shifted to bit offset
719	and	%o2, %o3, %o2		! %o2 = single byte value
720	andn	%o0, 0x3, %o0		! %o0 = word address
721	ld	[%o0], %o4		! read old value
7221:
723	andn	%o4, %o3, %o4		! clear target bits
724	or	%o4, %o2, %o5		! insert the new value
725	or	%o4, %o1, %o4		! insert the comparison value
726	cas	[%o0], %o4, %o5
727	cmp	%o4, %o5		! did we succeed?
728	be,pt	%icc, 2f
729	  and	%o5, %o3, %o4		! isolate the old value
730	cmp	%o1, %o4		! should we have succeeded?
731	be,a,pt	%icc, 1b		! yes, try again
732	  mov	%o5, %o4		! %o4 = old value
7332:
734	retl
735	srl	%o4, %g1, %o0		! %o0 = old value
736	SET_SIZE(atomic_cas_uchar)
737	SET_SIZE(atomic_cas_8)
738
739	ENTRY(atomic_cas_16)
740	ALTENTRY(atomic_cas_ushort)
741	and	%o0, 0x2, %o4		! %o4 = byte offset, left-to-right
742	xor	%o4, 0x2, %g1		! %g1 = byte offset, right-to-left
743	sll	%o4, 3, %o4		! %o4 = bit offset, left-to-right
744	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
745	sethi	%hi(0xffff0000), %o3	! %o3 = mask
746	srl	%o3, %o4, %o3		! %o3 = shifted to bit offset
747	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
748	and	%o1, %o3, %o1		! %o1 = single short value
749	sll	%o2, %g1, %o2		! %o2 = shifted to bit offset
750	and	%o2, %o3, %o2		! %o2 = single short value
751	andn	%o0, 0x2, %o0		! %o0 = word address
752	! if low-order bit is 1, we will properly get an alignment fault here
753	ld	[%o0], %o4		! read old value
7541:
755	andn	%o4, %o3, %o4		! clear target bits
756	or	%o4, %o2, %o5		! insert the new value
757	or	%o4, %o1, %o4		! insert the comparison value
758	cas	[%o0], %o4, %o5
759	cmp	%o4, %o5		! did we succeed?
760	be,pt	%icc, 2f
761	  and	%o5, %o3, %o4		! isolate the old value
762	cmp	%o1, %o4		! should we have succeeded?
763	be,a,pt	%icc, 1b		! yes, try again
764	  mov	%o5, %o4		! %o4 = old value
7652:
766	retl
767	srl	%o4, %g1, %o0		! %o0 = old value
768	SET_SIZE(atomic_cas_ushort)
769	SET_SIZE(atomic_cas_16)
770
771	ENTRY(atomic_cas_32)
772	ALTENTRY(atomic_cas_uint)
773	cas	[%o0], %o1, %o2
774	retl
775	mov	%o2, %o0
776	SET_SIZE(atomic_cas_uint)
777	SET_SIZE(atomic_cas_32)
778
779	ENTRY(atomic_cas_64)
780	ALTENTRY(atomic_cas_ptr)
781	ALTENTRY(atomic_cas_ulong)
782	casx	[%o0], %o1, %o2
783	retl
784	mov	%o2, %o0
785	SET_SIZE(atomic_cas_ulong)
786	SET_SIZE(atomic_cas_ptr)
787	SET_SIZE(atomic_cas_64)
788
789	ENTRY(atomic_swap_8)
790	ALTENTRY(atomic_swap_uchar)
791	and	%o0, 0x3, %o4		! %o4 = byte offset, left-to-right
792	xor	%o4, 0x3, %g1		! %g1 = byte offset, right-to-left
793	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
794	set	0xff, %o3		! %o3 = mask
795	sll	%o3, %g1, %o3		! %o3 = shifted to bit offset
796	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
797	and	%o1, %o3, %o1		! %o1 = single byte value
798	andn	%o0, 0x3, %o0		! %o0 = word address
799	ld	[%o0], %o2		! read old value
8001:
801	andn	%o2, %o3, %o5		! clear target bits
802	or	%o5, %o1, %o5		! insert the new value
803	cas	[%o0], %o2, %o5
804	cmp	%o2, %o5
805	bne,a,pn %icc, 1b
806	  mov	%o5, %o2		! %o2 = old value
807	and	%o5, %o3, %o5
808	retl
809	srl	%o5, %g1, %o0		! %o0 = old value
810	SET_SIZE(atomic_swap_uchar)
811	SET_SIZE(atomic_swap_8)
812
813	ENTRY(atomic_swap_16)
814	ALTENTRY(atomic_swap_ushort)
815	and	%o0, 0x2, %o4		! %o4 = byte offset, left-to-right
816	xor	%o4, 0x2, %g1		! %g1 = byte offset, right-to-left
817	sll	%o4, 3, %o4		! %o4 = bit offset, left-to-right
818	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
819	sethi	%hi(0xffff0000), %o3	! %o3 = mask
820	srl	%o3, %o4, %o3		! %o3 = shifted to bit offset
821	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
822	and	%o1, %o3, %o1		! %o1 = single short value
823	andn	%o0, 0x2, %o0		! %o0 = word address
824	! if low-order bit is 1, we will properly get an alignment fault here
825	ld	[%o0], %o2		! read old value
8261:
827	andn	%o2, %o3, %o5		! clear target bits
828	or	%o5, %o1, %o5		! insert the new value
829	cas	[%o0], %o2, %o5
830	cmp	%o2, %o5
831	bne,a,pn %icc, 1b
832	  mov	%o5, %o2		! %o2 = old value
833	and	%o5, %o3, %o5
834	retl
835	srl	%o5, %g1, %o0		! %o0 = old value
836	SET_SIZE(atomic_swap_ushort)
837	SET_SIZE(atomic_swap_16)
838
839	ENTRY(atomic_swap_32)
840	ALTENTRY(atomic_swap_uint)
841	ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
8420:
843	ld	[%o0], %o2
8441:
845	mov	%o1, %o3
846	cas	[%o0], %o2, %o3
847	cmp	%o2, %o3
848	ATOMIC_BACKOFF_BRANCH(%icc, 2f, 1b)
849	  mov	%o3, %o2
850	retl
851	mov	%o3, %o0
8522:
853	ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, swap32, 0b)
854	SET_SIZE(atomic_swap_uint)
855	SET_SIZE(atomic_swap_32)
856
857	ENTRY(atomic_swap_64)
858	ALTENTRY(atomic_swap_ptr)
859	ALTENTRY(atomic_swap_ulong)
860	ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
8610:
862	ldx	[%o0], %o2
8631:
864	mov	%o1, %o3
865	casx	[%o0], %o2, %o3
866	cmp	%o2, %o3
867	ATOMIC_BACKOFF_BRANCH(%xcc, 2f, 1b)
868	  mov	%o3, %o2
869	retl
870	mov	%o3, %o0
8712:
872	ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, swap64, 0b)
873	SET_SIZE(atomic_swap_ulong)
874	SET_SIZE(atomic_swap_ptr)
875	SET_SIZE(atomic_swap_64)
876
877	ENTRY(atomic_set_long_excl)
878	ATOMIC_BACKOFF_INIT(%o5, %g4, %g5)
879	mov	1, %o3
880	slln	%o3, %o1, %o3
8810:
882	ldn	[%o0], %o2
8831:
884	andcc	%o2, %o3, %g0		! test if the bit is set
885	bnz,a,pn %ncc, 2f		! if so, then fail out
886	  mov	-1, %o0
887	or	%o2, %o3, %o4		! set the bit, and try to commit it
888	casn	[%o0], %o2, %o4
889	cmp	%o2, %o4
890	ATOMIC_BACKOFF_BRANCH(%ncc, 5f, 1b)
891	  mov	%o4, %o2
892	mov	%g0, %o0
8932:
894	retl
895	nop
8965:
897	ATOMIC_BACKOFF_BACKOFF(%o5, %g1, %g4, %g5, setlongexcl, 0b)
898	SET_SIZE(atomic_set_long_excl)
899
900	ENTRY(atomic_clear_long_excl)
901	ATOMIC_BACKOFF_INIT(%o5, %g4, %g5)
902	mov	1, %o3
903	slln	%o3, %o1, %o3
9040:
905	ldn	[%o0], %o2
9061:
907	andncc	%o3, %o2, %g0		! test if the bit is clear
908	bnz,a,pn %ncc, 2f		! if so, then fail out
909	  mov	-1, %o0
910	andn	%o2, %o3, %o4		! clear the bit, and try to commit it
911	casn	[%o0], %o2, %o4
912	cmp	%o2, %o4
913	ATOMIC_BACKOFF_BRANCH(%ncc, 5f, 1b)
914	  mov	%o4, %o2
915	mov	%g0, %o0
9162:
917	retl
918	nop
9195:
920	ATOMIC_BACKOFF_BACKOFF(%o5, %g1, %g4, %g5, clrlongexcl, 0b)
921	SET_SIZE(atomic_clear_long_excl)
922
923#if !defined(_KERNEL)
924
925	/*
926	 * Spitfires and Blackbirds have a problem with membars in the
927	 * delay slot (SF_ERRATA_51).  For safety's sake, we assume
928	 * that the whole world needs the workaround.
929	 */
930	ENTRY(membar_enter)
931	membar	#StoreLoad|#StoreStore
932	retl
933	nop
934	SET_SIZE(membar_enter)
935
936	ENTRY(membar_exit)
937	membar	#LoadStore|#StoreStore
938	retl
939	nop
940	SET_SIZE(membar_exit)
941
942	ENTRY(membar_producer)
943	membar	#StoreStore
944	retl
945	nop
946	SET_SIZE(membar_producer)
947
948	ENTRY(membar_consumer)
949	membar	#LoadLoad
950	retl
951	nop
952	SET_SIZE(membar_consumer)
953
954#endif	/* !_KERNEL */
955