xref: /titanic_41/usr/src/common/atomic/sparcv9/atomic.s (revision 07d06da50d310a325b457d6330165aebab1e0064)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27	.file	"atomic.s"
28
29#include <sys/asm_linkage.h>
30
31/*
32 * ATOMIC_BO_ENABLE_SHIFT can be selectively defined by processors
33 * to enable exponential backoff. No definition means backoff is
34 * not desired i.e. backoff should be disabled.
35 * By default, the shift value is used to generate a power of 2
36 * value for backoff limit. In the kernel, processors scale this
37 * shift value with the number of online cpus.
38 */
39
40#if defined(_KERNEL)
41	/*
42	 * Legacy kernel interfaces; they will go away (eventually).
43	 */
44	ANSI_PRAGMA_WEAK2(cas8,atomic_cas_8,function)
45	ANSI_PRAGMA_WEAK2(cas32,atomic_cas_32,function)
46	ANSI_PRAGMA_WEAK2(cas64,atomic_cas_64,function)
47	ANSI_PRAGMA_WEAK2(caslong,atomic_cas_ulong,function)
48	ANSI_PRAGMA_WEAK2(casptr,atomic_cas_ptr,function)
49	ANSI_PRAGMA_WEAK2(atomic_and_long,atomic_and_ulong,function)
50	ANSI_PRAGMA_WEAK2(atomic_or_long,atomic_or_ulong,function)
51	ANSI_PRAGMA_WEAK2(swapl,atomic_swap_32,function)
52
53#ifdef ATOMIC_BO_ENABLE_SHIFT
54
55#if !defined(lint)
56	.weak   cpu_atomic_delay
57	.type   cpu_atomic_delay, #function
58#endif  /* lint */
59
60/*
61 * For the kernel, invoke processor specific delay routine to perform
62 * low-impact spin delay. The value of ATOMIC_BO_ENABLE_SHIFT is tuned
63 * with respect to the specific spin delay implementation.
64 */
65#define	DELAY_SPIN(label, tmp1, tmp2)					\
66	/*								; \
67	 * Define a pragma weak reference to a cpu specific		; \
68	 * delay routine for atomic backoff. For CPUs that		; \
69	 * have no such delay routine defined, the delay becomes	; \
70	 * just a simple tight loop.					; \
71	 *								; \
72	 * tmp1 = holds CPU specific delay routine			; \
73	 * tmp2 = holds atomic routine's callee return address		; \
74	 */								; \
75	sethi	%hi(cpu_atomic_delay), tmp1				; \
76	or	tmp1, %lo(cpu_atomic_delay), tmp1			; \
77label/**/0:								; \
78	brz,pn	tmp1, label/**/1					; \
79	mov	%o7, tmp2						; \
80	jmpl	tmp1, %o7	/* call CPU specific delay routine */	; \
81	  nop			/* delay slot : do nothing */		; \
82	mov	tmp2, %o7	/* restore callee's return address */	; \
83label/**/1:
84
85/*
86 * For the kernel, we take into consideration of cas failures
87 * and also scale the backoff limit w.r.t. the number of cpus.
88 * For cas failures, we reset the backoff value to 1 if the cas
89 * failures exceed or equal to the number of online cpus. This
90 * will enforce some degree of fairness and prevent starvation.
91 * We also scale/normalize the processor provided specific
92 * ATOMIC_BO_ENABLE_SHIFT w.r.t. the number of online cpus to
93 * obtain the actual final limit to use.
94 */
95#define ATOMIC_BACKOFF_CPU(val, limit, ncpu, cas_cnt, label)		\
96	brnz,pt	ncpu, label/**/0					; \
97	  inc	cas_cnt							; \
98	sethi	%hi(ncpus_online), ncpu					; \
99	ld	[ncpu + %lo(ncpus_online)], ncpu			; \
100label/**/0:								; \
101	cmp	cas_cnt, ncpu						; \
102	blu,pt	%xcc, label/**/1					; \
103	  sllx	ncpu, ATOMIC_BO_ENABLE_SHIFT, limit			; \
104	mov	%g0, cas_cnt						; \
105	mov	1, val							; \
106label/**/1:
107#endif	/* ATOMIC_BO_ENABLE_SHIFT */
108
109#else	/* _KERNEL */
110
111/*
112 * ATOMIC_BO_ENABLE_SHIFT may be enabled/defined here for generic
113 * libc atomics. None for now.
114 */
115#ifdef ATOMIC_BO_ENABLE_SHIFT
116#define	DELAY_SPIN(label, tmp1, tmp2)	\
117label/**/0:
118
119#define ATOMIC_BACKOFF_CPU(val, limit, ncpu, cas_cnt, label)  \
120	set	1 << ATOMIC_BO_ENABLE_SHIFT, limit
121#endif	/* ATOMIC_BO_ENABLE_SHIFT */
122#endif	/* _KERNEL */
123
124#ifdef ATOMIC_BO_ENABLE_SHIFT
125/*
126 * ATOMIC_BACKOFF_INIT macro for initialization.
127 * backoff val is initialized to 1.
128 * ncpu is initialized to 0
129 * The cas_cnt counts the cas instruction failure and is
130 * initialized to 0.
131 */
132#define ATOMIC_BACKOFF_INIT(val, ncpu, cas_cnt)	\
133	mov	1, val				; \
134	mov	%g0, ncpu			; \
135	mov	%g0, cas_cnt
136
137#define ATOMIC_BACKOFF_BRANCH(cr, backoff, loop) \
138	bne,a,pn cr, backoff
139
140/*
141 * Main ATOMIC_BACKOFF_BACKOFF macro for backoff.
142 */
143#define ATOMIC_BACKOFF_BACKOFF(val, limit, ncpu, cas_cnt, label, retlabel) \
144	ATOMIC_BACKOFF_CPU(val, limit, ncpu, cas_cnt, label/**/_0)	; \
145	cmp	val, limit						; \
146	blu,a,pt %xcc, label/**/_1					; \
147	  mov	val, limit						; \
148label/**/_1:								; \
149	mov	limit, val						; \
150	DELAY_SPIN(label/**/_2, %g2, %g3)				; \
151	deccc	limit							; \
152	bgu,pn	%xcc, label/**/_20 /* branch to middle of DELAY_SPIN */	; \
153	  nop								; \
154	ba	retlabel						; \
155	sllx	val, 1, val
156
157#else	/* ATOMIC_BO_ENABLE_SHIFT */
158#define ATOMIC_BACKOFF_INIT(val, ncpu, cas_cnt)
159
160#define ATOMIC_BACKOFF_BRANCH(cr, backoff, loop) \
161	bne,a,pn cr, loop
162
163#define ATOMIC_BACKOFF_BACKOFF(val, limit, ncpu, cas_cnt, label, retlabel)
164#endif	/* ATOMIC_BO_ENABLE_SHIFT */
165
166	/*
167	 * NOTE: If atomic_inc_8 and atomic_inc_8_nv are ever
168	 * separated, you need to also edit the libc sparcv9 platform
169	 * specific mapfile and remove the NODYNSORT attribute
170	 * from atomic_inc_8_nv.
171	 */
172	ENTRY(atomic_inc_8)
173	ALTENTRY(atomic_inc_8_nv)
174	ALTENTRY(atomic_inc_uchar)
175	ALTENTRY(atomic_inc_uchar_nv)
176	ba	add_8
177	  add	%g0, 1, %o1
178	SET_SIZE(atomic_inc_uchar_nv)
179	SET_SIZE(atomic_inc_uchar)
180	SET_SIZE(atomic_inc_8_nv)
181	SET_SIZE(atomic_inc_8)
182
183	/*
184	 * NOTE: If atomic_dec_8 and atomic_dec_8_nv are ever
185	 * separated, you need to also edit the libc sparcv9 platform
186	 * specific mapfile and remove the NODYNSORT attribute
187	 * from atomic_dec_8_nv.
188	 */
189	ENTRY(atomic_dec_8)
190	ALTENTRY(atomic_dec_8_nv)
191	ALTENTRY(atomic_dec_uchar)
192	ALTENTRY(atomic_dec_uchar_nv)
193	ba	add_8
194	  sub	%g0, 1, %o1
195	SET_SIZE(atomic_dec_uchar_nv)
196	SET_SIZE(atomic_dec_uchar)
197	SET_SIZE(atomic_dec_8_nv)
198	SET_SIZE(atomic_dec_8)
199
200	/*
201	 * NOTE: If atomic_add_8 and atomic_add_8_nv are ever
202	 * separated, you need to also edit the libc sparcv9 platform
203	 * specific mapfile and remove the NODYNSORT attribute
204	 * from atomic_add_8_nv.
205	 */
206	ENTRY(atomic_add_8)
207	ALTENTRY(atomic_add_8_nv)
208	ALTENTRY(atomic_add_char)
209	ALTENTRY(atomic_add_char_nv)
210add_8:
211	and	%o0, 0x3, %o4		! %o4 = byte offset, left-to-right
212	xor	%o4, 0x3, %g1		! %g1 = byte offset, right-to-left
213	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
214	set	0xff, %o3		! %o3 = mask
215	sll	%o3, %g1, %o3		! %o3 = shifted to bit offset
216	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
217	and	%o1, %o3, %o1		! %o1 = single byte value
218	andn	%o0, 0x3, %o0		! %o0 = word address
219	ld	[%o0], %o2		! read old value
2201:
221	add	%o2, %o1, %o5		! add value to the old value
222	and	%o5, %o3, %o5		! clear other bits
223	andn	%o2, %o3, %o4		! clear target bits
224	or	%o4, %o5, %o5		! insert the new value
225	cas	[%o0], %o2, %o5
226	cmp	%o2, %o5
227	bne,a,pn %icc, 1b
228	  mov	%o5, %o2		! %o2 = old value
229	add	%o2, %o1, %o5
230	and	%o5, %o3, %o5
231	retl
232	srl	%o5, %g1, %o0		! %o0 = new value
233	SET_SIZE(atomic_add_char_nv)
234	SET_SIZE(atomic_add_char)
235	SET_SIZE(atomic_add_8_nv)
236	SET_SIZE(atomic_add_8)
237
238	/*
239	 * NOTE: If atomic_inc_16 and atomic_inc_16_nv are ever
240	 * separated, you need to also edit the libc sparcv9 platform
241	 * specific mapfile and remove the NODYNSORT attribute
242	 * from atomic_inc_16_nv.
243	 */
244	ENTRY(atomic_inc_16)
245	ALTENTRY(atomic_inc_16_nv)
246	ALTENTRY(atomic_inc_ushort)
247	ALTENTRY(atomic_inc_ushort_nv)
248	ba	add_16
249	  add	%g0, 1, %o1
250	SET_SIZE(atomic_inc_ushort_nv)
251	SET_SIZE(atomic_inc_ushort)
252	SET_SIZE(atomic_inc_16_nv)
253	SET_SIZE(atomic_inc_16)
254
255	/*
256	 * NOTE: If atomic_dec_16 and atomic_dec_16_nv are ever
257	 * separated, you need to also edit the libc sparcv9 platform
258	 * specific mapfile and remove the NODYNSORT attribute
259	 * from atomic_dec_16_nv.
260	 */
261	ENTRY(atomic_dec_16)
262	ALTENTRY(atomic_dec_16_nv)
263	ALTENTRY(atomic_dec_ushort)
264	ALTENTRY(atomic_dec_ushort_nv)
265	ba	add_16
266	  sub	%g0, 1, %o1
267	SET_SIZE(atomic_dec_ushort_nv)
268	SET_SIZE(atomic_dec_ushort)
269	SET_SIZE(atomic_dec_16_nv)
270	SET_SIZE(atomic_dec_16)
271
272	/*
273	 * NOTE: If atomic_add_16 and atomic_add_16_nv are ever
274	 * separated, you need to also edit the libc sparcv9 platform
275	 * specific mapfile and remove the NODYNSORT attribute
276	 * from atomic_add_16_nv.
277	 */
278	ENTRY(atomic_add_16)
279	ALTENTRY(atomic_add_16_nv)
280	ALTENTRY(atomic_add_short)
281	ALTENTRY(atomic_add_short_nv)
282add_16:
283	and	%o0, 0x2, %o4		! %o4 = byte offset, left-to-right
284	xor	%o4, 0x2, %g1		! %g1 = byte offset, right-to-left
285	sll	%o4, 3, %o4		! %o4 = bit offset, left-to-right
286	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
287	sethi	%hi(0xffff0000), %o3	! %o3 = mask
288	srl	%o3, %o4, %o3		! %o3 = shifted to bit offset
289	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
290	and	%o1, %o3, %o1		! %o1 = single short value
291	andn	%o0, 0x2, %o0		! %o0 = word address
292	! if low-order bit is 1, we will properly get an alignment fault here
293	ld	[%o0], %o2		! read old value
2941:
295	add	%o1, %o2, %o5		! add value to the old value
296	and	%o5, %o3, %o5		! clear other bits
297	andn	%o2, %o3, %o4		! clear target bits
298	or	%o4, %o5, %o5		! insert the new value
299	cas	[%o0], %o2, %o5
300	cmp	%o2, %o5
301	bne,a,pn %icc, 1b
302	  mov	%o5, %o2		! %o2 = old value
303	add	%o1, %o2, %o5
304	and	%o5, %o3, %o5
305	retl
306	srl	%o5, %g1, %o0		! %o0 = new value
307	SET_SIZE(atomic_add_short_nv)
308	SET_SIZE(atomic_add_short)
309	SET_SIZE(atomic_add_16_nv)
310	SET_SIZE(atomic_add_16)
311
312	/*
313	 * NOTE: If atomic_inc_32 and atomic_inc_32_nv are ever
314	 * separated, you need to also edit the libc sparcv9 platform
315	 * specific mapfile and remove the NODYNSORT attribute
316	 * from atomic_inc_32_nv.
317	 */
318	ENTRY(atomic_inc_32)
319	ALTENTRY(atomic_inc_32_nv)
320	ALTENTRY(atomic_inc_uint)
321	ALTENTRY(atomic_inc_uint_nv)
322	ba	add_32
323	  add	%g0, 1, %o1
324	SET_SIZE(atomic_inc_uint_nv)
325	SET_SIZE(atomic_inc_uint)
326	SET_SIZE(atomic_inc_32_nv)
327	SET_SIZE(atomic_inc_32)
328
329	/*
330	 * NOTE: If atomic_dec_32 and atomic_dec_32_nv are ever
331	 * separated, you need to also edit the libc sparcv9 platform
332	 * specific mapfile and remove the NODYNSORT attribute
333	 * from atomic_dec_32_nv.
334	 */
335	ENTRY(atomic_dec_32)
336	ALTENTRY(atomic_dec_32_nv)
337	ALTENTRY(atomic_dec_uint)
338	ALTENTRY(atomic_dec_uint_nv)
339	ba	add_32
340	  sub	%g0, 1, %o1
341	SET_SIZE(atomic_dec_uint_nv)
342	SET_SIZE(atomic_dec_uint)
343	SET_SIZE(atomic_dec_32_nv)
344	SET_SIZE(atomic_dec_32)
345
346	/*
347	 * NOTE: If atomic_add_32 and atomic_add_32_nv are ever
348	 * separated, you need to also edit the libc sparcv9 platform
349	 * specific mapfile and remove the NODYNSORT attribute
350	 * from atomic_add_32_nv.
351	 */
352	ENTRY(atomic_add_32)
353	ALTENTRY(atomic_add_32_nv)
354	ALTENTRY(atomic_add_int)
355	ALTENTRY(atomic_add_int_nv)
356add_32:
357	ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
3580:
359	ld	[%o0], %o2
3601:
361	add	%o2, %o1, %o3
362	cas	[%o0], %o2, %o3
363	cmp	%o2, %o3
364	ATOMIC_BACKOFF_BRANCH(%icc, 2f, 1b)
365	  mov	%o3, %o2
366	retl
367	add	%o2, %o1, %o0		! return new value
3682:
369	ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, add32, 0b)
370	SET_SIZE(atomic_add_int_nv)
371	SET_SIZE(atomic_add_int)
372	SET_SIZE(atomic_add_32_nv)
373	SET_SIZE(atomic_add_32)
374
375	/*
376	 * NOTE: If atomic_inc_64 and atomic_inc_64_nv are ever
377	 * separated, you need to also edit the libc sparcv9 platform
378	 * specific mapfile and remove the NODYNSORT attribute
379	 * from atomic_inc_64_nv.
380	 */
381	ENTRY(atomic_inc_64)
382	ALTENTRY(atomic_inc_64_nv)
383	ALTENTRY(atomic_inc_ulong)
384	ALTENTRY(atomic_inc_ulong_nv)
385	ba	add_64
386	  add	%g0, 1, %o1
387	SET_SIZE(atomic_inc_ulong_nv)
388	SET_SIZE(atomic_inc_ulong)
389	SET_SIZE(atomic_inc_64_nv)
390	SET_SIZE(atomic_inc_64)
391
392	/*
393	 * NOTE: If atomic_dec_64 and atomic_dec_64_nv are ever
394	 * separated, you need to also edit the libc sparcv9 platform
395	 * specific mapfile and remove the NODYNSORT attribute
396	 * from atomic_dec_64_nv.
397	 */
398	ENTRY(atomic_dec_64)
399	ALTENTRY(atomic_dec_64_nv)
400	ALTENTRY(atomic_dec_ulong)
401	ALTENTRY(atomic_dec_ulong_nv)
402	ba	add_64
403	  sub	%g0, 1, %o1
404	SET_SIZE(atomic_dec_ulong_nv)
405	SET_SIZE(atomic_dec_ulong)
406	SET_SIZE(atomic_dec_64_nv)
407	SET_SIZE(atomic_dec_64)
408
409	/*
410	 * NOTE: If atomic_add_64 and atomic_add_64_nv are ever
411	 * separated, you need to also edit the libc sparcv9 platform
412	 * specific mapfile and remove the NODYNSORT attribute
413	 * from atomic_add_64_nv.
414	 */
415	ENTRY(atomic_add_64)
416	ALTENTRY(atomic_add_64_nv)
417	ALTENTRY(atomic_add_ptr)
418	ALTENTRY(atomic_add_ptr_nv)
419	ALTENTRY(atomic_add_long)
420	ALTENTRY(atomic_add_long_nv)
421add_64:
422	ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
4230:
424	ldx	[%o0], %o2
4251:
426	add	%o2, %o1, %o3
427	casx	[%o0], %o2, %o3
428	cmp	%o2, %o3
429	ATOMIC_BACKOFF_BRANCH(%xcc, 2f, 1b)
430	  mov	%o3, %o2
431	retl
432	add	%o2, %o1, %o0		! return new value
4332:
434	ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, add64, 0b)
435	SET_SIZE(atomic_add_long_nv)
436	SET_SIZE(atomic_add_long)
437	SET_SIZE(atomic_add_ptr_nv)
438	SET_SIZE(atomic_add_ptr)
439	SET_SIZE(atomic_add_64_nv)
440	SET_SIZE(atomic_add_64)
441
442	/*
443	 * NOTE: If atomic_or_8 and atomic_or_8_nv are ever
444	 * separated, you need to also edit the libc sparcv9 platform
445	 * specific mapfile and remove the NODYNSORT attribute
446	 * from atomic_or_8_nv.
447	 */
448	ENTRY(atomic_or_8)
449	ALTENTRY(atomic_or_8_nv)
450	ALTENTRY(atomic_or_uchar)
451	ALTENTRY(atomic_or_uchar_nv)
452	and	%o0, 0x3, %o4		! %o4 = byte offset, left-to-right
453	xor	%o4, 0x3, %g1		! %g1 = byte offset, right-to-left
454	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
455	set	0xff, %o3		! %o3 = mask
456	sll	%o3, %g1, %o3		! %o3 = shifted to bit offset
457	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
458	and	%o1, %o3, %o1		! %o1 = single byte value
459	andn	%o0, 0x3, %o0		! %o0 = word address
460	ld	[%o0], %o2		! read old value
4611:
462	or	%o2, %o1, %o5		! or in the new value
463	cas	[%o0], %o2, %o5
464	cmp	%o2, %o5
465	bne,a,pn %icc, 1b
466	  mov	%o5, %o2		! %o2 = old value
467	or	%o2, %o1, %o5
468	and	%o5, %o3, %o5
469	retl
470	srl	%o5, %g1, %o0		! %o0 = new value
471	SET_SIZE(atomic_or_uchar_nv)
472	SET_SIZE(atomic_or_uchar)
473	SET_SIZE(atomic_or_8_nv)
474	SET_SIZE(atomic_or_8)
475
476	/*
477	 * NOTE: If atomic_or_16 and atomic_or_16_nv are ever
478	 * separated, you need to also edit the libc sparcv9 platform
479	 * specific mapfile and remove the NODYNSORT attribute
480	 * from atomic_or_16_nv.
481	 */
482	ENTRY(atomic_or_16)
483	ALTENTRY(atomic_or_16_nv)
484	ALTENTRY(atomic_or_ushort)
485	ALTENTRY(atomic_or_ushort_nv)
486	and	%o0, 0x2, %o4		! %o4 = byte offset, left-to-right
487	xor	%o4, 0x2, %g1		! %g1 = byte offset, right-to-left
488	sll	%o4, 3, %o4		! %o4 = bit offset, left-to-right
489	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
490	sethi	%hi(0xffff0000), %o3	! %o3 = mask
491	srl	%o3, %o4, %o3		! %o3 = shifted to bit offset
492	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
493	and	%o1, %o3, %o1		! %o1 = single short value
494	andn	%o0, 0x2, %o0		! %o0 = word address
495	! if low-order bit is 1, we will properly get an alignment fault here
496	ld	[%o0], %o2		! read old value
4971:
498	or	%o2, %o1, %o5		! or in the new value
499	cas	[%o0], %o2, %o5
500	cmp	%o2, %o5
501	bne,a,pn %icc, 1b
502	  mov	%o5, %o2		! %o2 = old value
503	or	%o2, %o1, %o5		! or in the new value
504	and	%o5, %o3, %o5
505	retl
506	srl	%o5, %g1, %o0		! %o0 = new value
507	SET_SIZE(atomic_or_ushort_nv)
508	SET_SIZE(atomic_or_ushort)
509	SET_SIZE(atomic_or_16_nv)
510	SET_SIZE(atomic_or_16)
511
512	/*
513	 * NOTE: If atomic_or_32 and atomic_or_32_nv are ever
514	 * separated, you need to also edit the libc sparcv9 platform
515	 * specific mapfile and remove the NODYNSORT attribute
516	 * from atomic_or_32_nv.
517	 */
518	ENTRY(atomic_or_32)
519	ALTENTRY(atomic_or_32_nv)
520	ALTENTRY(atomic_or_uint)
521	ALTENTRY(atomic_or_uint_nv)
522	ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
5230:
524	ld	[%o0], %o2
5251:
526	or	%o2, %o1, %o3
527	cas	[%o0], %o2, %o3
528	cmp	%o2, %o3
529	ATOMIC_BACKOFF_BRANCH(%icc, 2f, 1b)
530	  mov	%o3, %o2
531	retl
532	or	%o2, %o1, %o0		! return new value
5332:
534	ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, or32, 0b)
535	SET_SIZE(atomic_or_uint_nv)
536	SET_SIZE(atomic_or_uint)
537	SET_SIZE(atomic_or_32_nv)
538	SET_SIZE(atomic_or_32)
539
540	/*
541	 * NOTE: If atomic_or_64 and atomic_or_64_nv are ever
542	 * separated, you need to also edit the libc sparcv9 platform
543	 * specific mapfile and remove the NODYNSORT attribute
544	 * from atomic_or_64_nv.
545	 */
546	ENTRY(atomic_or_64)
547	ALTENTRY(atomic_or_64_nv)
548	ALTENTRY(atomic_or_ulong)
549	ALTENTRY(atomic_or_ulong_nv)
550	ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
5510:
552	ldx	[%o0], %o2
5531:
554	or	%o2, %o1, %o3
555	casx	[%o0], %o2, %o3
556	cmp	%o2, %o3
557	ATOMIC_BACKOFF_BRANCH(%xcc, 2f, 1b)
558	  mov	%o3, %o2
559	retl
560	or	%o2, %o1, %o0		! return new value
5612:
562	ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, or64, 0b)
563	SET_SIZE(atomic_or_ulong_nv)
564	SET_SIZE(atomic_or_ulong)
565	SET_SIZE(atomic_or_64_nv)
566	SET_SIZE(atomic_or_64)
567
568	/*
569	 * NOTE: If atomic_and_8 and atomic_and_8_nv are ever
570	 * separated, you need to also edit the libc sparcv9 platform
571	 * specific mapfile and remove the NODYNSORT attribute
572	 * from atomic_and_8_nv.
573	 */
574	ENTRY(atomic_and_8)
575	ALTENTRY(atomic_and_8_nv)
576	ALTENTRY(atomic_and_uchar)
577	ALTENTRY(atomic_and_uchar_nv)
578	and	%o0, 0x3, %o4		! %o4 = byte offset, left-to-right
579	xor	%o4, 0x3, %g1		! %g1 = byte offset, right-to-left
580	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
581	set	0xff, %o3		! %o3 = mask
582	sll	%o3, %g1, %o3		! %o3 = shifted to bit offset
583	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
584	orn	%o1, %o3, %o1		! all ones in other bytes
585	andn	%o0, 0x3, %o0		! %o0 = word address
586	ld	[%o0], %o2		! read old value
5871:
588	and	%o2, %o1, %o5		! and in the new value
589	cas	[%o0], %o2, %o5
590	cmp	%o2, %o5
591	bne,a,pn %icc, 1b
592	  mov	%o5, %o2		! %o2 = old value
593	and	%o2, %o1, %o5
594	and	%o5, %o3, %o5
595	retl
596	srl	%o5, %g1, %o0		! %o0 = new value
597	SET_SIZE(atomic_and_uchar_nv)
598	SET_SIZE(atomic_and_uchar)
599	SET_SIZE(atomic_and_8_nv)
600	SET_SIZE(atomic_and_8)
601
602	/*
603	 * NOTE: If atomic_and_16 and atomic_and_16_nv are ever
604	 * separated, you need to also edit the libc sparcv9 platform
605	 * specific mapfile and remove the NODYNSORT attribute
606	 * from atomic_and_16_nv.
607	 */
608	ENTRY(atomic_and_16)
609	ALTENTRY(atomic_and_16_nv)
610	ALTENTRY(atomic_and_ushort)
611	ALTENTRY(atomic_and_ushort_nv)
612	and	%o0, 0x2, %o4		! %o4 = byte offset, left-to-right
613	xor	%o4, 0x2, %g1		! %g1 = byte offset, right-to-left
614	sll	%o4, 3, %o4		! %o4 = bit offset, left-to-right
615	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
616	sethi	%hi(0xffff0000), %o3	! %o3 = mask
617	srl	%o3, %o4, %o3		! %o3 = shifted to bit offset
618	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
619	orn	%o1, %o3, %o1		! all ones in the other half
620	andn	%o0, 0x2, %o0		! %o0 = word address
621	! if low-order bit is 1, we will properly get an alignment fault here
622	ld	[%o0], %o2		! read old value
6231:
624	and	%o2, %o1, %o5		! and in the new value
625	cas	[%o0], %o2, %o5
626	cmp	%o2, %o5
627	bne,a,pn %icc, 1b
628	  mov	%o5, %o2		! %o2 = old value
629	and	%o2, %o1, %o5
630	and	%o5, %o3, %o5
631	retl
632	srl	%o5, %g1, %o0		! %o0 = new value
633	SET_SIZE(atomic_and_ushort_nv)
634	SET_SIZE(atomic_and_ushort)
635	SET_SIZE(atomic_and_16_nv)
636	SET_SIZE(atomic_and_16)
637
638	/*
639	 * NOTE: If atomic_and_32 and atomic_and_32_nv are ever
640	 * separated, you need to also edit the libc sparcv9 platform
641	 * specific mapfile and remove the NODYNSORT attribute
642	 * from atomic_and_32_nv.
643	 */
644	ENTRY(atomic_and_32)
645	ALTENTRY(atomic_and_32_nv)
646	ALTENTRY(atomic_and_uint)
647	ALTENTRY(atomic_and_uint_nv)
648	ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
6490:
650	ld	[%o0], %o2
6511:
652	and	%o2, %o1, %o3
653	cas	[%o0], %o2, %o3
654	cmp	%o2, %o3
655	ATOMIC_BACKOFF_BRANCH(%icc, 2f, 1b)
656	  mov	%o3, %o2
657	retl
658	and	%o2, %o1, %o0		! return new value
6592:
660	ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, and32, 0b)
661	SET_SIZE(atomic_and_uint_nv)
662	SET_SIZE(atomic_and_uint)
663	SET_SIZE(atomic_and_32_nv)
664	SET_SIZE(atomic_and_32)
665
666	/*
667	 * NOTE: If atomic_and_64 and atomic_and_64_nv are ever
668	 * separated, you need to also edit the libc sparcv9 platform
669	 * specific mapfile and remove the NODYNSORT attribute
670	 * from atomic_and_64_nv.
671	 */
672	ENTRY(atomic_and_64)
673	ALTENTRY(atomic_and_64_nv)
674	ALTENTRY(atomic_and_ulong)
675	ALTENTRY(atomic_and_ulong_nv)
676	ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
6770:
678	ldx	[%o0], %o2
6791:
680	and	%o2, %o1, %o3
681	casx	[%o0], %o2, %o3
682	cmp	%o2, %o3
683	ATOMIC_BACKOFF_BRANCH(%xcc, 2f, 1b)
684	  mov	%o3, %o2
685	retl
686	and	%o2, %o1, %o0		! return new value
6872:
688	ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, and64, 0b)
689	SET_SIZE(atomic_and_ulong_nv)
690	SET_SIZE(atomic_and_ulong)
691	SET_SIZE(atomic_and_64_nv)
692	SET_SIZE(atomic_and_64)
693
694	ENTRY(atomic_cas_8)
695	ALTENTRY(atomic_cas_uchar)
696	and	%o0, 0x3, %o4		! %o4 = byte offset, left-to-right
697	xor	%o4, 0x3, %g1		! %g1 = byte offset, right-to-left
698	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
699	set	0xff, %o3		! %o3 = mask
700	sll	%o3, %g1, %o3		! %o3 = shifted to bit offset
701	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
702	and	%o1, %o3, %o1		! %o1 = single byte value
703	sll	%o2, %g1, %o2		! %o2 = shifted to bit offset
704	and	%o2, %o3, %o2		! %o2 = single byte value
705	andn	%o0, 0x3, %o0		! %o0 = word address
706	ld	[%o0], %o4		! read old value
7071:
708	andn	%o4, %o3, %o4		! clear target bits
709	or	%o4, %o2, %o5		! insert the new value
710	or	%o4, %o1, %o4		! insert the comparison value
711	cas	[%o0], %o4, %o5
712	cmp	%o4, %o5		! did we succeed?
713	be,pt	%icc, 2f
714	  and	%o5, %o3, %o4		! isolate the old value
715	cmp	%o1, %o4		! should we have succeeded?
716	be,a,pt	%icc, 1b		! yes, try again
717	  mov	%o5, %o4		! %o4 = old value
7182:
719	retl
720	srl	%o4, %g1, %o0		! %o0 = old value
721	SET_SIZE(atomic_cas_uchar)
722	SET_SIZE(atomic_cas_8)
723
724	ENTRY(atomic_cas_16)
725	ALTENTRY(atomic_cas_ushort)
726	and	%o0, 0x2, %o4		! %o4 = byte offset, left-to-right
727	xor	%o4, 0x2, %g1		! %g1 = byte offset, right-to-left
728	sll	%o4, 3, %o4		! %o4 = bit offset, left-to-right
729	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
730	sethi	%hi(0xffff0000), %o3	! %o3 = mask
731	srl	%o3, %o4, %o3		! %o3 = shifted to bit offset
732	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
733	and	%o1, %o3, %o1		! %o1 = single short value
734	sll	%o2, %g1, %o2		! %o2 = shifted to bit offset
735	and	%o2, %o3, %o2		! %o2 = single short value
736	andn	%o0, 0x2, %o0		! %o0 = word address
737	! if low-order bit is 1, we will properly get an alignment fault here
738	ld	[%o0], %o4		! read old value
7391:
740	andn	%o4, %o3, %o4		! clear target bits
741	or	%o4, %o2, %o5		! insert the new value
742	or	%o4, %o1, %o4		! insert the comparison value
743	cas	[%o0], %o4, %o5
744	cmp	%o4, %o5		! did we succeed?
745	be,pt	%icc, 2f
746	  and	%o5, %o3, %o4		! isolate the old value
747	cmp	%o1, %o4		! should we have succeeded?
748	be,a,pt	%icc, 1b		! yes, try again
749	  mov	%o5, %o4		! %o4 = old value
7502:
751	retl
752	srl	%o4, %g1, %o0		! %o0 = old value
753	SET_SIZE(atomic_cas_ushort)
754	SET_SIZE(atomic_cas_16)
755
756	ENTRY(atomic_cas_32)
757	ALTENTRY(atomic_cas_uint)
758	cas	[%o0], %o1, %o2
759	retl
760	mov	%o2, %o0
761	SET_SIZE(atomic_cas_uint)
762	SET_SIZE(atomic_cas_32)
763
764	ENTRY(atomic_cas_64)
765	ALTENTRY(atomic_cas_ptr)
766	ALTENTRY(atomic_cas_ulong)
767	casx	[%o0], %o1, %o2
768	retl
769	mov	%o2, %o0
770	SET_SIZE(atomic_cas_ulong)
771	SET_SIZE(atomic_cas_ptr)
772	SET_SIZE(atomic_cas_64)
773
774	ENTRY(atomic_swap_8)
775	ALTENTRY(atomic_swap_uchar)
776	and	%o0, 0x3, %o4		! %o4 = byte offset, left-to-right
777	xor	%o4, 0x3, %g1		! %g1 = byte offset, right-to-left
778	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
779	set	0xff, %o3		! %o3 = mask
780	sll	%o3, %g1, %o3		! %o3 = shifted to bit offset
781	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
782	and	%o1, %o3, %o1		! %o1 = single byte value
783	andn	%o0, 0x3, %o0		! %o0 = word address
784	ld	[%o0], %o2		! read old value
7851:
786	andn	%o2, %o3, %o5		! clear target bits
787	or	%o5, %o1, %o5		! insert the new value
788	cas	[%o0], %o2, %o5
789	cmp	%o2, %o5
790	bne,a,pn %icc, 1b
791	  mov	%o5, %o2		! %o2 = old value
792	and	%o5, %o3, %o5
793	retl
794	srl	%o5, %g1, %o0		! %o0 = old value
795	SET_SIZE(atomic_swap_uchar)
796	SET_SIZE(atomic_swap_8)
797
798	ENTRY(atomic_swap_16)
799	ALTENTRY(atomic_swap_ushort)
800	and	%o0, 0x2, %o4		! %o4 = byte offset, left-to-right
801	xor	%o4, 0x2, %g1		! %g1 = byte offset, right-to-left
802	sll	%o4, 3, %o4		! %o4 = bit offset, left-to-right
803	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
804	sethi	%hi(0xffff0000), %o3	! %o3 = mask
805	srl	%o3, %o4, %o3		! %o3 = shifted to bit offset
806	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
807	and	%o1, %o3, %o1		! %o1 = single short value
808	andn	%o0, 0x2, %o0		! %o0 = word address
809	! if low-order bit is 1, we will properly get an alignment fault here
810	ld	[%o0], %o2		! read old value
8111:
812	andn	%o2, %o3, %o5		! clear target bits
813	or	%o5, %o1, %o5		! insert the new value
814	cas	[%o0], %o2, %o5
815	cmp	%o2, %o5
816	bne,a,pn %icc, 1b
817	  mov	%o5, %o2		! %o2 = old value
818	and	%o5, %o3, %o5
819	retl
820	srl	%o5, %g1, %o0		! %o0 = old value
821	SET_SIZE(atomic_swap_ushort)
822	SET_SIZE(atomic_swap_16)
823
824	ENTRY(atomic_swap_32)
825	ALTENTRY(atomic_swap_uint)
826	ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
8270:
828	ld	[%o0], %o2
8291:
830	mov	%o1, %o3
831	cas	[%o0], %o2, %o3
832	cmp	%o2, %o3
833	ATOMIC_BACKOFF_BRANCH(%icc, 2f, 1b)
834	  mov	%o3, %o2
835	retl
836	mov	%o3, %o0
8372:
838	ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, swap32, 0b)
839	SET_SIZE(atomic_swap_uint)
840	SET_SIZE(atomic_swap_32)
841
842	ENTRY(atomic_swap_64)
843	ALTENTRY(atomic_swap_ptr)
844	ALTENTRY(atomic_swap_ulong)
845	ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
8460:
847	ldx	[%o0], %o2
8481:
849	mov	%o1, %o3
850	casx	[%o0], %o2, %o3
851	cmp	%o2, %o3
852	ATOMIC_BACKOFF_BRANCH(%xcc, 2f, 1b)
853	  mov	%o3, %o2
854	retl
855	mov	%o3, %o0
8562:
857	ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, swap64, 0b)
858	SET_SIZE(atomic_swap_ulong)
859	SET_SIZE(atomic_swap_ptr)
860	SET_SIZE(atomic_swap_64)
861
862	ENTRY(atomic_set_long_excl)
863	ATOMIC_BACKOFF_INIT(%o5, %g4, %g5)
864	mov	1, %o3
865	slln	%o3, %o1, %o3
8660:
867	ldn	[%o0], %o2
8681:
869	andcc	%o2, %o3, %g0		! test if the bit is set
870	bnz,a,pn %ncc, 2f		! if so, then fail out
871	  mov	-1, %o0
872	or	%o2, %o3, %o4		! set the bit, and try to commit it
873	casn	[%o0], %o2, %o4
874	cmp	%o2, %o4
875	ATOMIC_BACKOFF_BRANCH(%ncc, 5f, 1b)
876	  mov	%o4, %o2
877	mov	%g0, %o0
8782:
879	retl
880	nop
8815:
882	ATOMIC_BACKOFF_BACKOFF(%o5, %g1, %g4, %g5, setlongexcl, 0b)
883	SET_SIZE(atomic_set_long_excl)
884
885	ENTRY(atomic_clear_long_excl)
886	ATOMIC_BACKOFF_INIT(%o5, %g4, %g5)
887	mov	1, %o3
888	slln	%o3, %o1, %o3
8890:
890	ldn	[%o0], %o2
8911:
892	andncc	%o3, %o2, %g0		! test if the bit is clear
893	bnz,a,pn %ncc, 2f		! if so, then fail out
894	  mov	-1, %o0
895	andn	%o2, %o3, %o4		! clear the bit, and try to commit it
896	casn	[%o0], %o2, %o4
897	cmp	%o2, %o4
898	ATOMIC_BACKOFF_BRANCH(%ncc, 5f, 1b)
899	  mov	%o4, %o2
900	mov	%g0, %o0
9012:
902	retl
903	nop
9045:
905	ATOMIC_BACKOFF_BACKOFF(%o5, %g1, %g4, %g5, clrlongexcl, 0b)
906	SET_SIZE(atomic_clear_long_excl)
907
908#if !defined(_KERNEL)
909
910	/*
911	 * Spitfires and Blackbirds have a problem with membars in the
912	 * delay slot (SF_ERRATA_51).  For safety's sake, we assume
913	 * that the whole world needs the workaround.
914	 */
915	ENTRY(membar_enter)
916	membar	#StoreLoad|#StoreStore
917	retl
918	nop
919	SET_SIZE(membar_enter)
920
921	ENTRY(membar_exit)
922	membar	#LoadStore|#StoreStore
923	retl
924	nop
925	SET_SIZE(membar_exit)
926
927	ENTRY(membar_producer)
928	membar	#StoreStore
929	retl
930	nop
931	SET_SIZE(membar_producer)
932
933	ENTRY(membar_consumer)
934	membar	#LoadLoad
935	retl
936	nop
937	SET_SIZE(membar_consumer)
938
939#endif	/* !_KERNEL */
940