xref: /titanic_41/usr/src/common/atomic/sparcv9/atomic.s (revision 54c529d43d4363891fd7381edde56d2cafbff593)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27	.file	"atomic.s"
28
29#include <sys/asm_linkage.h>
30
31/*
32 * ATOMIC_BO_ENABLE_SHIFT can be selectively defined by processors
33 * to enable exponential backoff. No definition means backoff is
34 * not desired i.e. backoff should be disabled.
35 * By default, the shift value is used to generate a power of 2
36 * value for backoff limit. In the kernel, processors scale this
37 * shift value with the number of online cpus.
38 */
39
40#if defined(_KERNEL)
41	/*
42	 * Legacy kernel interfaces; they will go away (eventually).
43	 */
44	ANSI_PRAGMA_WEAK2(cas8,atomic_cas_8,function)
45	ANSI_PRAGMA_WEAK2(cas32,atomic_cas_32,function)
46	ANSI_PRAGMA_WEAK2(cas64,atomic_cas_64,function)
47	ANSI_PRAGMA_WEAK2(caslong,atomic_cas_ulong,function)
48	ANSI_PRAGMA_WEAK2(casptr,atomic_cas_ptr,function)
49	ANSI_PRAGMA_WEAK2(atomic_and_long,atomic_and_ulong,function)
50	ANSI_PRAGMA_WEAK2(atomic_or_long,atomic_or_ulong,function)
51	ANSI_PRAGMA_WEAK2(swapl,atomic_swap_32,function)
52
53#ifdef ATOMIC_BO_ENABLE_SHIFT
54
55#if !defined(lint)
56	.weak   cpu_atomic_delay
57	.type   cpu_atomic_delay, #function
58#endif  /* lint */
59
60/*
61 * For the kernel, invoke processor specific delay routine to perform
62 * low-impact spin delay. The value of ATOMIC_BO_ENABLE_SHIFT is tuned
63 * with respect to the specific spin delay implementation.
64 */
65#define	DELAY_SPIN(label, tmp1, tmp2)					\
66	/*								; \
67	 * Define a pragma weak reference to a cpu specific		; \
68	 * delay routine for atomic backoff. For CPUs that		; \
69	 * have no such delay routine defined, the delay becomes	; \
70	 * just a simple tight loop.					; \
71	 *								; \
72	 * tmp1 = holds CPU specific delay routine			; \
73	 * tmp2 = holds atomic routine's callee return address		; \
74	 */								; \
75	sethi	%hi(cpu_atomic_delay), tmp1				; \
76	or	tmp1, %lo(cpu_atomic_delay), tmp1			; \
77label/**/0:								; \
78	brz,pn	tmp1, label/**/1					; \
79	mov	%o7, tmp2						; \
80	jmpl	tmp1, %o7	/* call CPU specific delay routine */	; \
81	  nop			/* delay slot : do nothing */		; \
82	mov	tmp2, %o7	/* restore callee's return address */	; \
83label/**/1:
84
85/*
86 * For the kernel, we take into consideration of cas failures
87 * and also scale the backoff limit w.r.t. the number of cpus.
88 * For cas failures, we reset the backoff value to 1 if the cas
89 * failures exceed or equal to the number of online cpus. This
90 * will enforce some degree of fairness and prevent starvation.
91 * We also scale/normalize the processor provided specific
92 * ATOMIC_BO_ENABLE_SHIFT w.r.t. the number of online cpus to
93 * obtain the actual final limit to use.
94 */
95#define ATOMIC_BACKOFF_CPU(val, limit, ncpu, cas_cnt, label)		\
96	brnz,pt	ncpu, label/**/0					; \
97	  inc	cas_cnt							; \
98	sethi	%hi(ncpus_online), ncpu					; \
99	ld	[ncpu + %lo(ncpus_online)], ncpu			; \
100label/**/0:								; \
101	cmp	cas_cnt, ncpu						; \
102	blu,pt	%xcc, label/**/1					; \
103	  sllx	ncpu, ATOMIC_BO_ENABLE_SHIFT, limit			; \
104	mov	%g0, cas_cnt						; \
105	mov	1, val							; \
106label/**/1:
107#endif	/* ATOMIC_BO_ENABLE_SHIFT */
108
109#else	/* _KERNEL */
110
111/*
112 * ATOMIC_BO_ENABLE_SHIFT may be enabled/defined here for generic
113 * libc atomics. None for now.
114 */
115#ifdef ATOMIC_BO_ENABLE_SHIFT
116#define	DELAY_SPIN(label, tmp1, tmp2)	\
117label/**/0:
118
119#define ATOMIC_BACKOFF_CPU(val, limit, ncpu, cas_cnt, label)  \
120	set	1 << ATOMIC_BO_ENABLE_SHIFT, limit
121#endif	/* ATOMIC_BO_ENABLE_SHIFT */
122#endif	/* _KERNEL */
123
124#ifdef ATOMIC_BO_ENABLE_SHIFT
125/*
126 * ATOMIC_BACKOFF_INIT macro for initialization.
127 * backoff val is initialized to 1.
128 * ncpu is initialized to 0
129 * The cas_cnt counts the cas instruction failure and is
130 * initialized to 0.
131 */
132#define ATOMIC_BACKOFF_INIT(val, ncpu, cas_cnt)	\
133	mov	1, val				; \
134	mov	%g0, ncpu			; \
135	mov	%g0, cas_cnt
136
137#define ATOMIC_BACKOFF_BRANCH(cr, backoff, loop) \
138	bne,a,pn cr, backoff
139
140/*
141 * Main ATOMIC_BACKOFF_BACKOFF macro for backoff.
142 */
143#define ATOMIC_BACKOFF_BACKOFF(val, limit, ncpu, cas_cnt, label, retlabel) \
144	ATOMIC_BACKOFF_CPU(val, limit, ncpu, cas_cnt, label/**/_0)	; \
145	cmp	val, limit						; \
146	blu,a,pt %xcc, label/**/_1					; \
147	  mov	val, limit						; \
148label/**/_1:								; \
149	mov	limit, val						; \
150	DELAY_SPIN(label/**/_2, %g2, %g3)				; \
151	deccc	limit							; \
152	bgu,pn	%xcc, label/**/_20 /* branch to middle of DELAY_SPIN */	; \
153	  nop								; \
154	ba	retlabel						; \
155	  sllx  val, 1, val
156#else	/* ATOMIC_BO_ENABLE_SHIFT */
157#define ATOMIC_BACKOFF_INIT(val, ncpu, cas_cnt)
158
159#define ATOMIC_BACKOFF_BRANCH(cr, backoff, loop) \
160	bne,a,pn cr, loop
161
162#define ATOMIC_BACKOFF_BACKOFF(val, limit, ncpu, cas_cnt, label, retlabel)
163#endif	/* ATOMIC_BO_ENABLE_SHIFT */
164
165	/*
166	 * NOTE: If atomic_inc_8 and atomic_inc_8_nv are ever
167	 * separated, you need to also edit the libc sparcv9 platform
168	 * specific mapfile and remove the NODYNSORT attribute
169	 * from atomic_inc_8_nv.
170	 */
171	ENTRY(atomic_inc_8)
172	ALTENTRY(atomic_inc_8_nv)
173	ALTENTRY(atomic_inc_uchar)
174	ALTENTRY(atomic_inc_uchar_nv)
175	ba	add_8
176	  add	%g0, 1, %o1
177	SET_SIZE(atomic_inc_uchar_nv)
178	SET_SIZE(atomic_inc_uchar)
179	SET_SIZE(atomic_inc_8_nv)
180	SET_SIZE(atomic_inc_8)
181
182	/*
183	 * NOTE: If atomic_dec_8 and atomic_dec_8_nv are ever
184	 * separated, you need to also edit the libc sparcv9 platform
185	 * specific mapfile and remove the NODYNSORT attribute
186	 * from atomic_dec_8_nv.
187	 */
188	ENTRY(atomic_dec_8)
189	ALTENTRY(atomic_dec_8_nv)
190	ALTENTRY(atomic_dec_uchar)
191	ALTENTRY(atomic_dec_uchar_nv)
192	ba	add_8
193	  sub	%g0, 1, %o1
194	SET_SIZE(atomic_dec_uchar_nv)
195	SET_SIZE(atomic_dec_uchar)
196	SET_SIZE(atomic_dec_8_nv)
197	SET_SIZE(atomic_dec_8)
198
199	/*
200	 * NOTE: If atomic_add_8 and atomic_add_8_nv are ever
201	 * separated, you need to also edit the libc sparcv9 platform
202	 * specific mapfile and remove the NODYNSORT attribute
203	 * from atomic_add_8_nv.
204	 */
205	ENTRY(atomic_add_8)
206	ALTENTRY(atomic_add_8_nv)
207	ALTENTRY(atomic_add_char)
208	ALTENTRY(atomic_add_char_nv)
209add_8:
210	and	%o0, 0x3, %o4		! %o4 = byte offset, left-to-right
211	xor	%o4, 0x3, %g1		! %g1 = byte offset, right-to-left
212	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
213	set	0xff, %o3		! %o3 = mask
214	sll	%o3, %g1, %o3		! %o3 = shifted to bit offset
215	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
216	and	%o1, %o3, %o1		! %o1 = single byte value
217	andn	%o0, 0x3, %o0		! %o0 = word address
218	ld	[%o0], %o2		! read old value
2191:
220	add	%o2, %o1, %o5		! add value to the old value
221	and	%o5, %o3, %o5		! clear other bits
222	andn	%o2, %o3, %o4		! clear target bits
223	or	%o4, %o5, %o5		! insert the new value
224	cas	[%o0], %o2, %o5
225	cmp	%o2, %o5
226	bne,a,pn %icc, 1b
227	  mov	%o5, %o2		! %o2 = old value
228	add	%o2, %o1, %o5
229	and	%o5, %o3, %o5
230	retl
231	srl	%o5, %g1, %o0		! %o0 = new value
232	SET_SIZE(atomic_add_char_nv)
233	SET_SIZE(atomic_add_char)
234	SET_SIZE(atomic_add_8_nv)
235	SET_SIZE(atomic_add_8)
236
237	/*
238	 * NOTE: If atomic_inc_16 and atomic_inc_16_nv are ever
239	 * separated, you need to also edit the libc sparcv9 platform
240	 * specific mapfile and remove the NODYNSORT attribute
241	 * from atomic_inc_16_nv.
242	 */
243	ENTRY(atomic_inc_16)
244	ALTENTRY(atomic_inc_16_nv)
245	ALTENTRY(atomic_inc_ushort)
246	ALTENTRY(atomic_inc_ushort_nv)
247	ba	add_16
248	  add	%g0, 1, %o1
249	SET_SIZE(atomic_inc_ushort_nv)
250	SET_SIZE(atomic_inc_ushort)
251	SET_SIZE(atomic_inc_16_nv)
252	SET_SIZE(atomic_inc_16)
253
254	/*
255	 * NOTE: If atomic_dec_16 and atomic_dec_16_nv are ever
256	 * separated, you need to also edit the libc sparcv9 platform
257	 * specific mapfile and remove the NODYNSORT attribute
258	 * from atomic_dec_16_nv.
259	 */
260	ENTRY(atomic_dec_16)
261	ALTENTRY(atomic_dec_16_nv)
262	ALTENTRY(atomic_dec_ushort)
263	ALTENTRY(atomic_dec_ushort_nv)
264	ba	add_16
265	  sub	%g0, 1, %o1
266	SET_SIZE(atomic_dec_ushort_nv)
267	SET_SIZE(atomic_dec_ushort)
268	SET_SIZE(atomic_dec_16_nv)
269	SET_SIZE(atomic_dec_16)
270
271	/*
272	 * NOTE: If atomic_add_16 and atomic_add_16_nv are ever
273	 * separated, you need to also edit the libc sparcv9 platform
274	 * specific mapfile and remove the NODYNSORT attribute
275	 * from atomic_add_16_nv.
276	 */
277	ENTRY(atomic_add_16)
278	ALTENTRY(atomic_add_16_nv)
279	ALTENTRY(atomic_add_short)
280	ALTENTRY(atomic_add_short_nv)
281add_16:
282	and	%o0, 0x2, %o4		! %o4 = byte offset, left-to-right
283	xor	%o4, 0x2, %g1		! %g1 = byte offset, right-to-left
284	sll	%o4, 3, %o4		! %o4 = bit offset, left-to-right
285	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
286	sethi	%hi(0xffff0000), %o3	! %o3 = mask
287	srl	%o3, %o4, %o3		! %o3 = shifted to bit offset
288	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
289	and	%o1, %o3, %o1		! %o1 = single short value
290	andn	%o0, 0x2, %o0		! %o0 = word address
291	! if low-order bit is 1, we will properly get an alignment fault here
292	ld	[%o0], %o2		! read old value
2931:
294	add	%o1, %o2, %o5		! add value to the old value
295	and	%o5, %o3, %o5		! clear other bits
296	andn	%o2, %o3, %o4		! clear target bits
297	or	%o4, %o5, %o5		! insert the new value
298	cas	[%o0], %o2, %o5
299	cmp	%o2, %o5
300	bne,a,pn %icc, 1b
301	  mov	%o5, %o2		! %o2 = old value
302	add	%o1, %o2, %o5
303	and	%o5, %o3, %o5
304	retl
305	srl	%o5, %g1, %o0		! %o0 = new value
306	SET_SIZE(atomic_add_short_nv)
307	SET_SIZE(atomic_add_short)
308	SET_SIZE(atomic_add_16_nv)
309	SET_SIZE(atomic_add_16)
310
311	/*
312	 * NOTE: If atomic_inc_32 and atomic_inc_32_nv are ever
313	 * separated, you need to also edit the libc sparcv9 platform
314	 * specific mapfile and remove the NODYNSORT attribute
315	 * from atomic_inc_32_nv.
316	 */
317	ENTRY(atomic_inc_32)
318	ALTENTRY(atomic_inc_32_nv)
319	ALTENTRY(atomic_inc_uint)
320	ALTENTRY(atomic_inc_uint_nv)
321	ba	add_32
322	  add	%g0, 1, %o1
323	SET_SIZE(atomic_inc_uint_nv)
324	SET_SIZE(atomic_inc_uint)
325	SET_SIZE(atomic_inc_32_nv)
326	SET_SIZE(atomic_inc_32)
327
328	/*
329	 * NOTE: If atomic_dec_32 and atomic_dec_32_nv are ever
330	 * separated, you need to also edit the libc sparcv9 platform
331	 * specific mapfile and remove the NODYNSORT attribute
332	 * from atomic_dec_32_nv.
333	 */
334	ENTRY(atomic_dec_32)
335	ALTENTRY(atomic_dec_32_nv)
336	ALTENTRY(atomic_dec_uint)
337	ALTENTRY(atomic_dec_uint_nv)
338	ba	add_32
339	  sub	%g0, 1, %o1
340	SET_SIZE(atomic_dec_uint_nv)
341	SET_SIZE(atomic_dec_uint)
342	SET_SIZE(atomic_dec_32_nv)
343	SET_SIZE(atomic_dec_32)
344
345	/*
346	 * NOTE: If atomic_add_32 and atomic_add_32_nv are ever
347	 * separated, you need to also edit the libc sparcv9 platform
348	 * specific mapfile and remove the NODYNSORT attribute
349	 * from atomic_add_32_nv.
350	 */
351	ENTRY(atomic_add_32)
352	ALTENTRY(atomic_add_32_nv)
353	ALTENTRY(atomic_add_int)
354	ALTENTRY(atomic_add_int_nv)
355add_32:
356	ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
3570:
358	ld	[%o0], %o2
3591:
360	add	%o2, %o1, %o3
361	cas	[%o0], %o2, %o3
362	cmp	%o2, %o3
363	ATOMIC_BACKOFF_BRANCH(%icc, 2f, 1b)
364	  mov	%o3, %o2
365	retl
366	add	%o2, %o1, %o0		! return new value
3672:
368	ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, add32, 0b)
369	SET_SIZE(atomic_add_int_nv)
370	SET_SIZE(atomic_add_int)
371	SET_SIZE(atomic_add_32_nv)
372	SET_SIZE(atomic_add_32)
373
374	/*
375	 * NOTE: If atomic_inc_64 and atomic_inc_64_nv are ever
376	 * separated, you need to also edit the libc sparcv9 platform
377	 * specific mapfile and remove the NODYNSORT attribute
378	 * from atomic_inc_64_nv.
379	 */
380	ENTRY(atomic_inc_64)
381	ALTENTRY(atomic_inc_64_nv)
382	ALTENTRY(atomic_inc_ulong)
383	ALTENTRY(atomic_inc_ulong_nv)
384	ba	add_64
385	  add	%g0, 1, %o1
386	SET_SIZE(atomic_inc_ulong_nv)
387	SET_SIZE(atomic_inc_ulong)
388	SET_SIZE(atomic_inc_64_nv)
389	SET_SIZE(atomic_inc_64)
390
391	/*
392	 * NOTE: If atomic_dec_64 and atomic_dec_64_nv are ever
393	 * separated, you need to also edit the libc sparcv9 platform
394	 * specific mapfile and remove the NODYNSORT attribute
395	 * from atomic_dec_64_nv.
396	 */
397	ENTRY(atomic_dec_64)
398	ALTENTRY(atomic_dec_64_nv)
399	ALTENTRY(atomic_dec_ulong)
400	ALTENTRY(atomic_dec_ulong_nv)
401	ba	add_64
402	  sub	%g0, 1, %o1
403	SET_SIZE(atomic_dec_ulong_nv)
404	SET_SIZE(atomic_dec_ulong)
405	SET_SIZE(atomic_dec_64_nv)
406	SET_SIZE(atomic_dec_64)
407
408	/*
409	 * NOTE: If atomic_add_64 and atomic_add_64_nv are ever
410	 * separated, you need to also edit the libc sparcv9 platform
411	 * specific mapfile and remove the NODYNSORT attribute
412	 * from atomic_add_64_nv.
413	 */
414	ENTRY(atomic_add_64)
415	ALTENTRY(atomic_add_64_nv)
416	ALTENTRY(atomic_add_ptr)
417	ALTENTRY(atomic_add_ptr_nv)
418	ALTENTRY(atomic_add_long)
419	ALTENTRY(atomic_add_long_nv)
420add_64:
421	ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
4220:
423	ldx	[%o0], %o2
4241:
425	add	%o2, %o1, %o3
426	casx	[%o0], %o2, %o3
427	cmp	%o2, %o3
428	ATOMIC_BACKOFF_BRANCH(%xcc, 2f, 1b)
429	  mov	%o3, %o2
430	retl
431	add	%o2, %o1, %o0		! return new value
4322:
433	ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, add64, 0b)
434	SET_SIZE(atomic_add_long_nv)
435	SET_SIZE(atomic_add_long)
436	SET_SIZE(atomic_add_ptr_nv)
437	SET_SIZE(atomic_add_ptr)
438	SET_SIZE(atomic_add_64_nv)
439	SET_SIZE(atomic_add_64)
440
441	/*
442	 * NOTE: If atomic_or_8 and atomic_or_8_nv are ever
443	 * separated, you need to also edit the libc sparcv9 platform
444	 * specific mapfile and remove the NODYNSORT attribute
445	 * from atomic_or_8_nv.
446	 */
447	ENTRY(atomic_or_8)
448	ALTENTRY(atomic_or_8_nv)
449	ALTENTRY(atomic_or_uchar)
450	ALTENTRY(atomic_or_uchar_nv)
451	and	%o0, 0x3, %o4		! %o4 = byte offset, left-to-right
452	xor	%o4, 0x3, %g1		! %g1 = byte offset, right-to-left
453	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
454	set	0xff, %o3		! %o3 = mask
455	sll	%o3, %g1, %o3		! %o3 = shifted to bit offset
456	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
457	and	%o1, %o3, %o1		! %o1 = single byte value
458	andn	%o0, 0x3, %o0		! %o0 = word address
459	ld	[%o0], %o2		! read old value
4601:
461	or	%o2, %o1, %o5		! or in the new value
462	cas	[%o0], %o2, %o5
463	cmp	%o2, %o5
464	bne,a,pn %icc, 1b
465	  mov	%o5, %o2		! %o2 = old value
466	or	%o2, %o1, %o5
467	and	%o5, %o3, %o5
468	retl
469	srl	%o5, %g1, %o0		! %o0 = new value
470	SET_SIZE(atomic_or_uchar_nv)
471	SET_SIZE(atomic_or_uchar)
472	SET_SIZE(atomic_or_8_nv)
473	SET_SIZE(atomic_or_8)
474
475	/*
476	 * NOTE: If atomic_or_16 and atomic_or_16_nv are ever
477	 * separated, you need to also edit the libc sparcv9 platform
478	 * specific mapfile and remove the NODYNSORT attribute
479	 * from atomic_or_16_nv.
480	 */
481	ENTRY(atomic_or_16)
482	ALTENTRY(atomic_or_16_nv)
483	ALTENTRY(atomic_or_ushort)
484	ALTENTRY(atomic_or_ushort_nv)
485	and	%o0, 0x2, %o4		! %o4 = byte offset, left-to-right
486	xor	%o4, 0x2, %g1		! %g1 = byte offset, right-to-left
487	sll	%o4, 3, %o4		! %o4 = bit offset, left-to-right
488	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
489	sethi	%hi(0xffff0000), %o3	! %o3 = mask
490	srl	%o3, %o4, %o3		! %o3 = shifted to bit offset
491	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
492	and	%o1, %o3, %o1		! %o1 = single short value
493	andn	%o0, 0x2, %o0		! %o0 = word address
494	! if low-order bit is 1, we will properly get an alignment fault here
495	ld	[%o0], %o2		! read old value
4961:
497	or	%o2, %o1, %o5		! or in the new value
498	cas	[%o0], %o2, %o5
499	cmp	%o2, %o5
500	bne,a,pn %icc, 1b
501	  mov	%o5, %o2		! %o2 = old value
502	or	%o2, %o1, %o5		! or in the new value
503	and	%o5, %o3, %o5
504	retl
505	srl	%o5, %g1, %o0		! %o0 = new value
506	SET_SIZE(atomic_or_ushort_nv)
507	SET_SIZE(atomic_or_ushort)
508	SET_SIZE(atomic_or_16_nv)
509	SET_SIZE(atomic_or_16)
510
511	/*
512	 * NOTE: If atomic_or_32 and atomic_or_32_nv are ever
513	 * separated, you need to also edit the libc sparcv9 platform
514	 * specific mapfile and remove the NODYNSORT attribute
515	 * from atomic_or_32_nv.
516	 */
517	ENTRY(atomic_or_32)
518	ALTENTRY(atomic_or_32_nv)
519	ALTENTRY(atomic_or_uint)
520	ALTENTRY(atomic_or_uint_nv)
521	ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
5220:
523	ld	[%o0], %o2
5241:
525	or	%o2, %o1, %o3
526	cas	[%o0], %o2, %o3
527	cmp	%o2, %o3
528	ATOMIC_BACKOFF_BRANCH(%icc, 2f, 1b)
529	  mov	%o3, %o2
530	retl
531	or	%o2, %o1, %o0		! return new value
5322:
533	ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, or32, 0b)
534	SET_SIZE(atomic_or_uint_nv)
535	SET_SIZE(atomic_or_uint)
536	SET_SIZE(atomic_or_32_nv)
537	SET_SIZE(atomic_or_32)
538
539	/*
540	 * NOTE: If atomic_or_64 and atomic_or_64_nv are ever
541	 * separated, you need to also edit the libc sparcv9 platform
542	 * specific mapfile and remove the NODYNSORT attribute
543	 * from atomic_or_64_nv.
544	 */
545	ENTRY(atomic_or_64)
546	ALTENTRY(atomic_or_64_nv)
547	ALTENTRY(atomic_or_ulong)
548	ALTENTRY(atomic_or_ulong_nv)
549	ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
5500:
551	ldx	[%o0], %o2
5521:
553	or	%o2, %o1, %o3
554	casx	[%o0], %o2, %o3
555	cmp	%o2, %o3
556	ATOMIC_BACKOFF_BRANCH(%xcc, 2f, 1b)
557	  mov	%o3, %o2
558	retl
559	or	%o2, %o1, %o0		! return new value
5602:
561	ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, or64, 0b)
562	SET_SIZE(atomic_or_ulong_nv)
563	SET_SIZE(atomic_or_ulong)
564	SET_SIZE(atomic_or_64_nv)
565	SET_SIZE(atomic_or_64)
566
567	/*
568	 * NOTE: If atomic_and_8 and atomic_and_8_nv are ever
569	 * separated, you need to also edit the libc sparcv9 platform
570	 * specific mapfile and remove the NODYNSORT attribute
571	 * from atomic_and_8_nv.
572	 */
573	ENTRY(atomic_and_8)
574	ALTENTRY(atomic_and_8_nv)
575	ALTENTRY(atomic_and_uchar)
576	ALTENTRY(atomic_and_uchar_nv)
577	and	%o0, 0x3, %o4		! %o4 = byte offset, left-to-right
578	xor	%o4, 0x3, %g1		! %g1 = byte offset, right-to-left
579	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
580	set	0xff, %o3		! %o3 = mask
581	sll	%o3, %g1, %o3		! %o3 = shifted to bit offset
582	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
583	orn	%o1, %o3, %o1		! all ones in other bytes
584	andn	%o0, 0x3, %o0		! %o0 = word address
585	ld	[%o0], %o2		! read old value
5861:
587	and	%o2, %o1, %o5		! and in the new value
588	cas	[%o0], %o2, %o5
589	cmp	%o2, %o5
590	bne,a,pn %icc, 1b
591	  mov	%o5, %o2		! %o2 = old value
592	and	%o2, %o1, %o5
593	and	%o5, %o3, %o5
594	retl
595	srl	%o5, %g1, %o0		! %o0 = new value
596	SET_SIZE(atomic_and_uchar_nv)
597	SET_SIZE(atomic_and_uchar)
598	SET_SIZE(atomic_and_8_nv)
599	SET_SIZE(atomic_and_8)
600
601	/*
602	 * NOTE: If atomic_and_16 and atomic_and_16_nv are ever
603	 * separated, you need to also edit the libc sparcv9 platform
604	 * specific mapfile and remove the NODYNSORT attribute
605	 * from atomic_and_16_nv.
606	 */
607	ENTRY(atomic_and_16)
608	ALTENTRY(atomic_and_16_nv)
609	ALTENTRY(atomic_and_ushort)
610	ALTENTRY(atomic_and_ushort_nv)
611	and	%o0, 0x2, %o4		! %o4 = byte offset, left-to-right
612	xor	%o4, 0x2, %g1		! %g1 = byte offset, right-to-left
613	sll	%o4, 3, %o4		! %o4 = bit offset, left-to-right
614	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
615	sethi	%hi(0xffff0000), %o3	! %o3 = mask
616	srl	%o3, %o4, %o3		! %o3 = shifted to bit offset
617	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
618	orn	%o1, %o3, %o1		! all ones in the other half
619	andn	%o0, 0x2, %o0		! %o0 = word address
620	! if low-order bit is 1, we will properly get an alignment fault here
621	ld	[%o0], %o2		! read old value
6221:
623	and	%o2, %o1, %o5		! and in the new value
624	cas	[%o0], %o2, %o5
625	cmp	%o2, %o5
626	bne,a,pn %icc, 1b
627	  mov	%o5, %o2		! %o2 = old value
628	and	%o2, %o1, %o5
629	and	%o5, %o3, %o5
630	retl
631	srl	%o5, %g1, %o0		! %o0 = new value
632	SET_SIZE(atomic_and_ushort_nv)
633	SET_SIZE(atomic_and_ushort)
634	SET_SIZE(atomic_and_16_nv)
635	SET_SIZE(atomic_and_16)
636
637	/*
638	 * NOTE: If atomic_and_32 and atomic_and_32_nv are ever
639	 * separated, you need to also edit the libc sparcv9 platform
640	 * specific mapfile and remove the NODYNSORT attribute
641	 * from atomic_and_32_nv.
642	 */
643	ENTRY(atomic_and_32)
644	ALTENTRY(atomic_and_32_nv)
645	ALTENTRY(atomic_and_uint)
646	ALTENTRY(atomic_and_uint_nv)
647	ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
6480:
649	ld	[%o0], %o2
6501:
651	and	%o2, %o1, %o3
652	cas	[%o0], %o2, %o3
653	cmp	%o2, %o3
654	ATOMIC_BACKOFF_BRANCH(%icc, 2f, 1b)
655	  mov	%o3, %o2
656	retl
657	and	%o2, %o1, %o0		! return new value
6582:
659	ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, and32, 0b)
660	SET_SIZE(atomic_and_uint_nv)
661	SET_SIZE(atomic_and_uint)
662	SET_SIZE(atomic_and_32_nv)
663	SET_SIZE(atomic_and_32)
664
665	/*
666	 * NOTE: If atomic_and_64 and atomic_and_64_nv are ever
667	 * separated, you need to also edit the libc sparcv9 platform
668	 * specific mapfile and remove the NODYNSORT attribute
669	 * from atomic_and_64_nv.
670	 */
671	ENTRY(atomic_and_64)
672	ALTENTRY(atomic_and_64_nv)
673	ALTENTRY(atomic_and_ulong)
674	ALTENTRY(atomic_and_ulong_nv)
675	ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
6760:
677	ldx	[%o0], %o2
6781:
679	and	%o2, %o1, %o3
680	casx	[%o0], %o2, %o3
681	cmp	%o2, %o3
682	ATOMIC_BACKOFF_BRANCH(%xcc, 2f, 1b)
683	  mov	%o3, %o2
684	retl
685	and	%o2, %o1, %o0		! return new value
6862:
687	ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, and64, 0b)
688	SET_SIZE(atomic_and_ulong_nv)
689	SET_SIZE(atomic_and_ulong)
690	SET_SIZE(atomic_and_64_nv)
691	SET_SIZE(atomic_and_64)
692
693	ENTRY(atomic_cas_8)
694	ALTENTRY(atomic_cas_uchar)
695	and	%o0, 0x3, %o4		! %o4 = byte offset, left-to-right
696	xor	%o4, 0x3, %g1		! %g1 = byte offset, right-to-left
697	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
698	set	0xff, %o3		! %o3 = mask
699	sll	%o3, %g1, %o3		! %o3 = shifted to bit offset
700	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
701	and	%o1, %o3, %o1		! %o1 = single byte value
702	sll	%o2, %g1, %o2		! %o2 = shifted to bit offset
703	and	%o2, %o3, %o2		! %o2 = single byte value
704	andn	%o0, 0x3, %o0		! %o0 = word address
705	ld	[%o0], %o4		! read old value
7061:
707	andn	%o4, %o3, %o4		! clear target bits
708	or	%o4, %o2, %o5		! insert the new value
709	or	%o4, %o1, %o4		! insert the comparison value
710	cas	[%o0], %o4, %o5
711	cmp	%o4, %o5		! did we succeed?
712	be,pt	%icc, 2f
713	  and	%o5, %o3, %o4		! isolate the old value
714	cmp	%o1, %o4		! should we have succeeded?
715	be,a,pt	%icc, 1b		! yes, try again
716	  mov	%o5, %o4		! %o4 = old value
7172:
718	retl
719	srl	%o4, %g1, %o0		! %o0 = old value
720	SET_SIZE(atomic_cas_uchar)
721	SET_SIZE(atomic_cas_8)
722
723	ENTRY(atomic_cas_16)
724	ALTENTRY(atomic_cas_ushort)
725	and	%o0, 0x2, %o4		! %o4 = byte offset, left-to-right
726	xor	%o4, 0x2, %g1		! %g1 = byte offset, right-to-left
727	sll	%o4, 3, %o4		! %o4 = bit offset, left-to-right
728	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
729	sethi	%hi(0xffff0000), %o3	! %o3 = mask
730	srl	%o3, %o4, %o3		! %o3 = shifted to bit offset
731	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
732	and	%o1, %o3, %o1		! %o1 = single short value
733	sll	%o2, %g1, %o2		! %o2 = shifted to bit offset
734	and	%o2, %o3, %o2		! %o2 = single short value
735	andn	%o0, 0x2, %o0		! %o0 = word address
736	! if low-order bit is 1, we will properly get an alignment fault here
737	ld	[%o0], %o4		! read old value
7381:
739	andn	%o4, %o3, %o4		! clear target bits
740	or	%o4, %o2, %o5		! insert the new value
741	or	%o4, %o1, %o4		! insert the comparison value
742	cas	[%o0], %o4, %o5
743	cmp	%o4, %o5		! did we succeed?
744	be,pt	%icc, 2f
745	  and	%o5, %o3, %o4		! isolate the old value
746	cmp	%o1, %o4		! should we have succeeded?
747	be,a,pt	%icc, 1b		! yes, try again
748	  mov	%o5, %o4		! %o4 = old value
7492:
750	retl
751	srl	%o4, %g1, %o0		! %o0 = old value
752	SET_SIZE(atomic_cas_ushort)
753	SET_SIZE(atomic_cas_16)
754
755	ENTRY(atomic_cas_32)
756	ALTENTRY(atomic_cas_uint)
757	cas	[%o0], %o1, %o2
758	retl
759	mov	%o2, %o0
760	SET_SIZE(atomic_cas_uint)
761	SET_SIZE(atomic_cas_32)
762
763	ENTRY(atomic_cas_64)
764	ALTENTRY(atomic_cas_ptr)
765	ALTENTRY(atomic_cas_ulong)
766	casx	[%o0], %o1, %o2
767	retl
768	mov	%o2, %o0
769	SET_SIZE(atomic_cas_ulong)
770	SET_SIZE(atomic_cas_ptr)
771	SET_SIZE(atomic_cas_64)
772
773	ENTRY(atomic_swap_8)
774	ALTENTRY(atomic_swap_uchar)
775	and	%o0, 0x3, %o4		! %o4 = byte offset, left-to-right
776	xor	%o4, 0x3, %g1		! %g1 = byte offset, right-to-left
777	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
778	set	0xff, %o3		! %o3 = mask
779	sll	%o3, %g1, %o3		! %o3 = shifted to bit offset
780	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
781	and	%o1, %o3, %o1		! %o1 = single byte value
782	andn	%o0, 0x3, %o0		! %o0 = word address
783	ld	[%o0], %o2		! read old value
7841:
785	andn	%o2, %o3, %o5		! clear target bits
786	or	%o5, %o1, %o5		! insert the new value
787	cas	[%o0], %o2, %o5
788	cmp	%o2, %o5
789	bne,a,pn %icc, 1b
790	  mov	%o5, %o2		! %o2 = old value
791	and	%o5, %o3, %o5
792	retl
793	srl	%o5, %g1, %o0		! %o0 = old value
794	SET_SIZE(atomic_swap_uchar)
795	SET_SIZE(atomic_swap_8)
796
797	ENTRY(atomic_swap_16)
798	ALTENTRY(atomic_swap_ushort)
799	and	%o0, 0x2, %o4		! %o4 = byte offset, left-to-right
800	xor	%o4, 0x2, %g1		! %g1 = byte offset, right-to-left
801	sll	%o4, 3, %o4		! %o4 = bit offset, left-to-right
802	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
803	sethi	%hi(0xffff0000), %o3	! %o3 = mask
804	srl	%o3, %o4, %o3		! %o3 = shifted to bit offset
805	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
806	and	%o1, %o3, %o1		! %o1 = single short value
807	andn	%o0, 0x2, %o0		! %o0 = word address
808	! if low-order bit is 1, we will properly get an alignment fault here
809	ld	[%o0], %o2		! read old value
8101:
811	andn	%o2, %o3, %o5		! clear target bits
812	or	%o5, %o1, %o5		! insert the new value
813	cas	[%o0], %o2, %o5
814	cmp	%o2, %o5
815	bne,a,pn %icc, 1b
816	  mov	%o5, %o2		! %o2 = old value
817	and	%o5, %o3, %o5
818	retl
819	srl	%o5, %g1, %o0		! %o0 = old value
820	SET_SIZE(atomic_swap_ushort)
821	SET_SIZE(atomic_swap_16)
822
823	ENTRY(atomic_swap_32)
824	ALTENTRY(atomic_swap_uint)
825	ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
8260:
827	ld	[%o0], %o2
8281:
829	mov	%o1, %o3
830	cas	[%o0], %o2, %o3
831	cmp	%o2, %o3
832	ATOMIC_BACKOFF_BRANCH(%icc, 2f, 1b)
833	  mov	%o3, %o2
834	retl
835	mov	%o3, %o0
8362:
837	ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, swap32, 0b)
838	SET_SIZE(atomic_swap_uint)
839	SET_SIZE(atomic_swap_32)
840
841	ENTRY(atomic_swap_64)
842	ALTENTRY(atomic_swap_ptr)
843	ALTENTRY(atomic_swap_ulong)
844	ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
8450:
846	ldx	[%o0], %o2
8471:
848	mov	%o1, %o3
849	casx	[%o0], %o2, %o3
850	cmp	%o2, %o3
851	ATOMIC_BACKOFF_BRANCH(%xcc, 2f, 1b)
852	  mov	%o3, %o2
853	retl
854	mov	%o3, %o0
8552:
856	ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, swap64, 0b)
857	SET_SIZE(atomic_swap_ulong)
858	SET_SIZE(atomic_swap_ptr)
859	SET_SIZE(atomic_swap_64)
860
861	ENTRY(atomic_set_long_excl)
862	ATOMIC_BACKOFF_INIT(%o5, %g4, %g5)
863	mov	1, %o3
864	slln	%o3, %o1, %o3
8650:
866	ldn	[%o0], %o2
8671:
868	andcc	%o2, %o3, %g0		! test if the bit is set
869	bnz,a,pn %ncc, 2f		! if so, then fail out
870	  mov	-1, %o0
871	or	%o2, %o3, %o4		! set the bit, and try to commit it
872	casn	[%o0], %o2, %o4
873	cmp	%o2, %o4
874	ATOMIC_BACKOFF_BRANCH(%ncc, 5f, 1b)
875	  mov	%o4, %o2
876	mov	%g0, %o0
8772:
878	retl
879	nop
8805:
881	ATOMIC_BACKOFF_BACKOFF(%o5, %g1, %g4, %g5, setlongexcl, 0b)
882	SET_SIZE(atomic_set_long_excl)
883
884	ENTRY(atomic_clear_long_excl)
885	ATOMIC_BACKOFF_INIT(%o5, %g4, %g5)
886	mov	1, %o3
887	slln	%o3, %o1, %o3
8880:
889	ldn	[%o0], %o2
8901:
891	andncc	%o3, %o2, %g0		! test if the bit is clear
892	bnz,a,pn %ncc, 2f		! if so, then fail out
893	  mov	-1, %o0
894	andn	%o2, %o3, %o4		! clear the bit, and try to commit it
895	casn	[%o0], %o2, %o4
896	cmp	%o2, %o4
897	ATOMIC_BACKOFF_BRANCH(%ncc, 5f, 1b)
898	  mov	%o4, %o2
899	mov	%g0, %o0
9002:
901	retl
902	nop
9035:
904	ATOMIC_BACKOFF_BACKOFF(%o5, %g1, %g4, %g5, clrlongexcl, 0b)
905	SET_SIZE(atomic_clear_long_excl)
906
907#if !defined(_KERNEL)
908
909	/*
910	 * Spitfires and Blackbirds have a problem with membars in the
911	 * delay slot (SF_ERRATA_51).  For safety's sake, we assume
912	 * that the whole world needs the workaround.
913	 */
914	ENTRY(membar_enter)
915	membar	#StoreLoad|#StoreStore
916	retl
917	nop
918	SET_SIZE(membar_enter)
919
920	ENTRY(membar_exit)
921	membar	#LoadStore|#StoreStore
922	retl
923	nop
924	SET_SIZE(membar_exit)
925
926	ENTRY(membar_producer)
927	membar	#StoreStore
928	retl
929	nop
930	SET_SIZE(membar_producer)
931
932	ENTRY(membar_consumer)
933	membar	#LoadLoad
934	retl
935	nop
936	SET_SIZE(membar_consumer)
937
938#endif	/* !_KERNEL */
939