xref: /titanic_44/usr/src/common/atomic/sparcv9/atomic.s (revision 1db2880b3a411e3c56e50c7dc42d3b137fcc4e48)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27	.file	"atomic.s"
28
29#include <sys/asm_linkage.h>
30
31/*
32 * ATOMIC_BO_ENABLE_SHIFT can be selectively defined by processors
33 * to enable exponential backoff. No definition means backoff is
34 * not desired i.e. backoff should be disabled.
35 * By default, the shift value is used to generate a power of 2
36 * value for backoff limit. In the kernel, processors scale this
37 * shift value with the number of online cpus.
38 */
39
40#if defined(_KERNEL)
41	/*
42	 * Legacy kernel interfaces; they will go away the moment our closed
43	 * bins no longer require them.
44	 */
45	ANSI_PRAGMA_WEAK2(cas8,atomic_cas_8,function)
46	ANSI_PRAGMA_WEAK2(cas32,atomic_cas_32,function)
47	ANSI_PRAGMA_WEAK2(cas64,atomic_cas_64,function)
48	ANSI_PRAGMA_WEAK2(caslong,atomic_cas_ulong,function)
49	ANSI_PRAGMA_WEAK2(casptr,atomic_cas_ptr,function)
50	ANSI_PRAGMA_WEAK2(atomic_and_long,atomic_and_ulong,function)
51	ANSI_PRAGMA_WEAK2(atomic_or_long,atomic_or_ulong,function)
52	ANSI_PRAGMA_WEAK2(swapl,atomic_swap_32,function)
53
54#ifdef ATOMIC_BO_ENABLE_SHIFT
55
56#if !defined(lint)
57	.weak   cpu_atomic_delay
58	.type   cpu_atomic_delay, #function
59#endif  /* lint */
60
61/*
62 * For the kernel, invoke processor specific delay routine to perform
63 * low-impact spin delay. The value of ATOMIC_BO_ENABLE_SHIFT is tuned
64 * with respect to the specific spin delay implementation.
65 */
66#define	DELAY_SPIN(label, tmp1, tmp2)					\
67	/*								; \
68	 * Define a pragma weak reference to a cpu specific		; \
69	 * delay routine for atomic backoff. For CPUs that		; \
70	 * have no such delay routine defined, the delay becomes	; \
71	 * just a simple tight loop.					; \
72	 *								; \
73	 * tmp1 = holds CPU specific delay routine			; \
74	 * tmp2 = holds atomic routine's callee return address		; \
75	 */								; \
76	sethi	%hi(cpu_atomic_delay), tmp1				; \
77	or	tmp1, %lo(cpu_atomic_delay), tmp1			; \
78label/**/0:								; \
79	brz,pn	tmp1, label/**/1					; \
80	mov	%o7, tmp2						; \
81	jmpl	tmp1, %o7	/* call CPU specific delay routine */	; \
82	  nop			/* delay slot : do nothing */		; \
83	mov	tmp2, %o7	/* restore callee's return address */	; \
84label/**/1:
85
86/*
87 * For the kernel, we take into consideration of cas failures
88 * and also scale the backoff limit w.r.t. the number of cpus.
89 * For cas failures, we reset the backoff value to 1 if the cas
90 * failures exceed or equal to the number of online cpus. This
91 * will enforce some degree of fairness and prevent starvation.
92 * We also scale/normalize the processor provided specific
93 * ATOMIC_BO_ENABLE_SHIFT w.r.t. the number of online cpus to
94 * obtain the actual final limit to use.
95 */
96#define ATOMIC_BACKOFF_CPU(val, limit, ncpu, cas_cnt, label)		\
97	brnz,pt	ncpu, label/**/0					; \
98	  inc	cas_cnt							; \
99	sethi	%hi(ncpus_online), ncpu					; \
100	ld	[ncpu + %lo(ncpus_online)], ncpu			; \
101label/**/0:								; \
102	cmp	cas_cnt, ncpu						; \
103	blu,pt	%xcc, label/**/1					; \
104	  sllx	ncpu, ATOMIC_BO_ENABLE_SHIFT, limit			; \
105	mov	%g0, cas_cnt						; \
106	mov	1, val							; \
107label/**/1:
108#endif	/* ATOMIC_BO_ENABLE_SHIFT */
109
110#else	/* _KERNEL */
111
112/*
113 * ATOMIC_BO_ENABLE_SHIFT may be enabled/defined here for generic
114 * libc atomics. None for now.
115 */
116#ifdef ATOMIC_BO_ENABLE_SHIFT
117#define	DELAY_SPIN(label, tmp1, tmp2)	\
118label/**/0:
119
120#define ATOMIC_BACKOFF_CPU(val, limit, ncpu, cas_cnt, label)  \
121	set	1 << ATOMIC_BO_ENABLE_SHIFT, limit
122#endif	/* ATOMIC_BO_ENABLE_SHIFT */
123#endif	/* _KERNEL */
124
125#ifdef ATOMIC_BO_ENABLE_SHIFT
126/*
127 * ATOMIC_BACKOFF_INIT macro for initialization.
128 * backoff val is initialized to 1.
129 * ncpu is initialized to 0
130 * The cas_cnt counts the cas instruction failure and is
131 * initialized to 0.
132 */
133#define ATOMIC_BACKOFF_INIT(val, ncpu, cas_cnt)	\
134	mov	1, val				; \
135	mov	%g0, ncpu			; \
136	mov	%g0, cas_cnt
137
138#define ATOMIC_BACKOFF_BRANCH(cr, backoff, loop) \
139	bne,a,pn cr, backoff
140
141/*
142 * Main ATOMIC_BACKOFF_BACKOFF macro for backoff.
143 */
144#define ATOMIC_BACKOFF_BACKOFF(val, limit, ncpu, cas_cnt, label, retlabel) \
145	ATOMIC_BACKOFF_CPU(val, limit, ncpu, cas_cnt, label/**/_0)	; \
146	cmp	val, limit						; \
147	blu,a,pt %xcc, label/**/_1					; \
148	  mov	val, limit						; \
149label/**/_1:								; \
150	mov	limit, val						; \
151	DELAY_SPIN(label/**/_2, %g2, %g3)				; \
152	deccc	limit							; \
153	bgu,pn	%xcc, label/**/_20 /* branch to middle of DELAY_SPIN */	; \
154	  nop								; \
155	ba	retlabel						; \
156	sllx	val, 1, val
157
158#else	/* ATOMIC_BO_ENABLE_SHIFT */
159#define ATOMIC_BACKOFF_INIT(val, ncpu, cas_cnt)
160
161#define ATOMIC_BACKOFF_BRANCH(cr, backoff, loop) \
162	bne,a,pn cr, loop
163
164#define ATOMIC_BACKOFF_BACKOFF(val, limit, ncpu, cas_cnt, label, retlabel)
165#endif	/* ATOMIC_BO_ENABLE_SHIFT */
166
167	/*
168	 * NOTE: If atomic_inc_8 and atomic_inc_8_nv are ever
169	 * separated, you need to also edit the libc sparcv9 platform
170	 * specific mapfile and remove the NODYNSORT attribute
171	 * from atomic_inc_8_nv.
172	 */
173	ENTRY(atomic_inc_8)
174	ALTENTRY(atomic_inc_8_nv)
175	ALTENTRY(atomic_inc_uchar)
176	ALTENTRY(atomic_inc_uchar_nv)
177	ba	add_8
178	  add	%g0, 1, %o1
179	SET_SIZE(atomic_inc_uchar_nv)
180	SET_SIZE(atomic_inc_uchar)
181	SET_SIZE(atomic_inc_8_nv)
182	SET_SIZE(atomic_inc_8)
183
184	/*
185	 * NOTE: If atomic_dec_8 and atomic_dec_8_nv are ever
186	 * separated, you need to also edit the libc sparcv9 platform
187	 * specific mapfile and remove the NODYNSORT attribute
188	 * from atomic_dec_8_nv.
189	 */
190	ENTRY(atomic_dec_8)
191	ALTENTRY(atomic_dec_8_nv)
192	ALTENTRY(atomic_dec_uchar)
193	ALTENTRY(atomic_dec_uchar_nv)
194	ba	add_8
195	  sub	%g0, 1, %o1
196	SET_SIZE(atomic_dec_uchar_nv)
197	SET_SIZE(atomic_dec_uchar)
198	SET_SIZE(atomic_dec_8_nv)
199	SET_SIZE(atomic_dec_8)
200
201	/*
202	 * NOTE: If atomic_add_8 and atomic_add_8_nv are ever
203	 * separated, you need to also edit the libc sparcv9 platform
204	 * specific mapfile and remove the NODYNSORT attribute
205	 * from atomic_add_8_nv.
206	 */
207	ENTRY(atomic_add_8)
208	ALTENTRY(atomic_add_8_nv)
209	ALTENTRY(atomic_add_char)
210	ALTENTRY(atomic_add_char_nv)
211add_8:
212	and	%o0, 0x3, %o4		! %o4 = byte offset, left-to-right
213	xor	%o4, 0x3, %g1		! %g1 = byte offset, right-to-left
214	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
215	set	0xff, %o3		! %o3 = mask
216	sll	%o3, %g1, %o3		! %o3 = shifted to bit offset
217	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
218	and	%o1, %o3, %o1		! %o1 = single byte value
219	andn	%o0, 0x3, %o0		! %o0 = word address
220	ld	[%o0], %o2		! read old value
2211:
222	add	%o2, %o1, %o5		! add value to the old value
223	and	%o5, %o3, %o5		! clear other bits
224	andn	%o2, %o3, %o4		! clear target bits
225	or	%o4, %o5, %o5		! insert the new value
226	cas	[%o0], %o2, %o5
227	cmp	%o2, %o5
228	bne,a,pn %icc, 1b
229	  mov	%o5, %o2		! %o2 = old value
230	add	%o2, %o1, %o5
231	and	%o5, %o3, %o5
232	retl
233	srl	%o5, %g1, %o0		! %o0 = new value
234	SET_SIZE(atomic_add_char_nv)
235	SET_SIZE(atomic_add_char)
236	SET_SIZE(atomic_add_8_nv)
237	SET_SIZE(atomic_add_8)
238
239	/*
240	 * NOTE: If atomic_inc_16 and atomic_inc_16_nv are ever
241	 * separated, you need to also edit the libc sparcv9 platform
242	 * specific mapfile and remove the NODYNSORT attribute
243	 * from atomic_inc_16_nv.
244	 */
245	ENTRY(atomic_inc_16)
246	ALTENTRY(atomic_inc_16_nv)
247	ALTENTRY(atomic_inc_ushort)
248	ALTENTRY(atomic_inc_ushort_nv)
249	ba	add_16
250	  add	%g0, 1, %o1
251	SET_SIZE(atomic_inc_ushort_nv)
252	SET_SIZE(atomic_inc_ushort)
253	SET_SIZE(atomic_inc_16_nv)
254	SET_SIZE(atomic_inc_16)
255
256	/*
257	 * NOTE: If atomic_dec_16 and atomic_dec_16_nv are ever
258	 * separated, you need to also edit the libc sparcv9 platform
259	 * specific mapfile and remove the NODYNSORT attribute
260	 * from atomic_dec_16_nv.
261	 */
262	ENTRY(atomic_dec_16)
263	ALTENTRY(atomic_dec_16_nv)
264	ALTENTRY(atomic_dec_ushort)
265	ALTENTRY(atomic_dec_ushort_nv)
266	ba	add_16
267	  sub	%g0, 1, %o1
268	SET_SIZE(atomic_dec_ushort_nv)
269	SET_SIZE(atomic_dec_ushort)
270	SET_SIZE(atomic_dec_16_nv)
271	SET_SIZE(atomic_dec_16)
272
273	/*
274	 * NOTE: If atomic_add_16 and atomic_add_16_nv are ever
275	 * separated, you need to also edit the libc sparcv9 platform
276	 * specific mapfile and remove the NODYNSORT attribute
277	 * from atomic_add_16_nv.
278	 */
279	ENTRY(atomic_add_16)
280	ALTENTRY(atomic_add_16_nv)
281	ALTENTRY(atomic_add_short)
282	ALTENTRY(atomic_add_short_nv)
283add_16:
284	and	%o0, 0x2, %o4		! %o4 = byte offset, left-to-right
285	xor	%o4, 0x2, %g1		! %g1 = byte offset, right-to-left
286	sll	%o4, 3, %o4		! %o4 = bit offset, left-to-right
287	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
288	sethi	%hi(0xffff0000), %o3	! %o3 = mask
289	srl	%o3, %o4, %o3		! %o3 = shifted to bit offset
290	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
291	and	%o1, %o3, %o1		! %o1 = single short value
292	andn	%o0, 0x2, %o0		! %o0 = word address
293	! if low-order bit is 1, we will properly get an alignment fault here
294	ld	[%o0], %o2		! read old value
2951:
296	add	%o1, %o2, %o5		! add value to the old value
297	and	%o5, %o3, %o5		! clear other bits
298	andn	%o2, %o3, %o4		! clear target bits
299	or	%o4, %o5, %o5		! insert the new value
300	cas	[%o0], %o2, %o5
301	cmp	%o2, %o5
302	bne,a,pn %icc, 1b
303	  mov	%o5, %o2		! %o2 = old value
304	add	%o1, %o2, %o5
305	and	%o5, %o3, %o5
306	retl
307	srl	%o5, %g1, %o0		! %o0 = new value
308	SET_SIZE(atomic_add_short_nv)
309	SET_SIZE(atomic_add_short)
310	SET_SIZE(atomic_add_16_nv)
311	SET_SIZE(atomic_add_16)
312
313	/*
314	 * NOTE: If atomic_inc_32 and atomic_inc_32_nv are ever
315	 * separated, you need to also edit the libc sparcv9 platform
316	 * specific mapfile and remove the NODYNSORT attribute
317	 * from atomic_inc_32_nv.
318	 */
319	ENTRY(atomic_inc_32)
320	ALTENTRY(atomic_inc_32_nv)
321	ALTENTRY(atomic_inc_uint)
322	ALTENTRY(atomic_inc_uint_nv)
323	ba	add_32
324	  add	%g0, 1, %o1
325	SET_SIZE(atomic_inc_uint_nv)
326	SET_SIZE(atomic_inc_uint)
327	SET_SIZE(atomic_inc_32_nv)
328	SET_SIZE(atomic_inc_32)
329
330	/*
331	 * NOTE: If atomic_dec_32 and atomic_dec_32_nv are ever
332	 * separated, you need to also edit the libc sparcv9 platform
333	 * specific mapfile and remove the NODYNSORT attribute
334	 * from atomic_dec_32_nv.
335	 */
336	ENTRY(atomic_dec_32)
337	ALTENTRY(atomic_dec_32_nv)
338	ALTENTRY(atomic_dec_uint)
339	ALTENTRY(atomic_dec_uint_nv)
340	ba	add_32
341	  sub	%g0, 1, %o1
342	SET_SIZE(atomic_dec_uint_nv)
343	SET_SIZE(atomic_dec_uint)
344	SET_SIZE(atomic_dec_32_nv)
345	SET_SIZE(atomic_dec_32)
346
347	/*
348	 * NOTE: If atomic_add_32 and atomic_add_32_nv are ever
349	 * separated, you need to also edit the libc sparcv9 platform
350	 * specific mapfile and remove the NODYNSORT attribute
351	 * from atomic_add_32_nv.
352	 */
353	ENTRY(atomic_add_32)
354	ALTENTRY(atomic_add_32_nv)
355	ALTENTRY(atomic_add_int)
356	ALTENTRY(atomic_add_int_nv)
357add_32:
358	ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
3590:
360	ld	[%o0], %o2
3611:
362	add	%o2, %o1, %o3
363	cas	[%o0], %o2, %o3
364	cmp	%o2, %o3
365	ATOMIC_BACKOFF_BRANCH(%icc, 2f, 1b)
366	  mov	%o3, %o2
367	retl
368	add	%o2, %o1, %o0		! return new value
3692:
370	ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, add32, 0b)
371	SET_SIZE(atomic_add_int_nv)
372	SET_SIZE(atomic_add_int)
373	SET_SIZE(atomic_add_32_nv)
374	SET_SIZE(atomic_add_32)
375
376	/*
377	 * NOTE: If atomic_inc_64 and atomic_inc_64_nv are ever
378	 * separated, you need to also edit the libc sparcv9 platform
379	 * specific mapfile and remove the NODYNSORT attribute
380	 * from atomic_inc_64_nv.
381	 */
382	ENTRY(atomic_inc_64)
383	ALTENTRY(atomic_inc_64_nv)
384	ALTENTRY(atomic_inc_ulong)
385	ALTENTRY(atomic_inc_ulong_nv)
386	ba	add_64
387	  add	%g0, 1, %o1
388	SET_SIZE(atomic_inc_ulong_nv)
389	SET_SIZE(atomic_inc_ulong)
390	SET_SIZE(atomic_inc_64_nv)
391	SET_SIZE(atomic_inc_64)
392
393	/*
394	 * NOTE: If atomic_dec_64 and atomic_dec_64_nv are ever
395	 * separated, you need to also edit the libc sparcv9 platform
396	 * specific mapfile and remove the NODYNSORT attribute
397	 * from atomic_dec_64_nv.
398	 */
399	ENTRY(atomic_dec_64)
400	ALTENTRY(atomic_dec_64_nv)
401	ALTENTRY(atomic_dec_ulong)
402	ALTENTRY(atomic_dec_ulong_nv)
403	ba	add_64
404	  sub	%g0, 1, %o1
405	SET_SIZE(atomic_dec_ulong_nv)
406	SET_SIZE(atomic_dec_ulong)
407	SET_SIZE(atomic_dec_64_nv)
408	SET_SIZE(atomic_dec_64)
409
410	/*
411	 * NOTE: If atomic_add_64 and atomic_add_64_nv are ever
412	 * separated, you need to also edit the libc sparcv9 platform
413	 * specific mapfile and remove the NODYNSORT attribute
414	 * from atomic_add_64_nv.
415	 */
416	ENTRY(atomic_add_64)
417	ALTENTRY(atomic_add_64_nv)
418	ALTENTRY(atomic_add_ptr)
419	ALTENTRY(atomic_add_ptr_nv)
420	ALTENTRY(atomic_add_long)
421	ALTENTRY(atomic_add_long_nv)
422add_64:
423	ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
4240:
425	ldx	[%o0], %o2
4261:
427	add	%o2, %o1, %o3
428	casx	[%o0], %o2, %o3
429	cmp	%o2, %o3
430	ATOMIC_BACKOFF_BRANCH(%xcc, 2f, 1b)
431	  mov	%o3, %o2
432	retl
433	add	%o2, %o1, %o0		! return new value
4342:
435	ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, add64, 0b)
436	SET_SIZE(atomic_add_long_nv)
437	SET_SIZE(atomic_add_long)
438	SET_SIZE(atomic_add_ptr_nv)
439	SET_SIZE(atomic_add_ptr)
440	SET_SIZE(atomic_add_64_nv)
441	SET_SIZE(atomic_add_64)
442
443	/*
444	 * NOTE: If atomic_or_8 and atomic_or_8_nv are ever
445	 * separated, you need to also edit the libc sparcv9 platform
446	 * specific mapfile and remove the NODYNSORT attribute
447	 * from atomic_or_8_nv.
448	 */
449	ENTRY(atomic_or_8)
450	ALTENTRY(atomic_or_8_nv)
451	ALTENTRY(atomic_or_uchar)
452	ALTENTRY(atomic_or_uchar_nv)
453	and	%o0, 0x3, %o4		! %o4 = byte offset, left-to-right
454	xor	%o4, 0x3, %g1		! %g1 = byte offset, right-to-left
455	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
456	set	0xff, %o3		! %o3 = mask
457	sll	%o3, %g1, %o3		! %o3 = shifted to bit offset
458	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
459	and	%o1, %o3, %o1		! %o1 = single byte value
460	andn	%o0, 0x3, %o0		! %o0 = word address
461	ld	[%o0], %o2		! read old value
4621:
463	or	%o2, %o1, %o5		! or in the new value
464	cas	[%o0], %o2, %o5
465	cmp	%o2, %o5
466	bne,a,pn %icc, 1b
467	  mov	%o5, %o2		! %o2 = old value
468	or	%o2, %o1, %o5
469	and	%o5, %o3, %o5
470	retl
471	srl	%o5, %g1, %o0		! %o0 = new value
472	SET_SIZE(atomic_or_uchar_nv)
473	SET_SIZE(atomic_or_uchar)
474	SET_SIZE(atomic_or_8_nv)
475	SET_SIZE(atomic_or_8)
476
477	/*
478	 * NOTE: If atomic_or_16 and atomic_or_16_nv are ever
479	 * separated, you need to also edit the libc sparcv9 platform
480	 * specific mapfile and remove the NODYNSORT attribute
481	 * from atomic_or_16_nv.
482	 */
483	ENTRY(atomic_or_16)
484	ALTENTRY(atomic_or_16_nv)
485	ALTENTRY(atomic_or_ushort)
486	ALTENTRY(atomic_or_ushort_nv)
487	and	%o0, 0x2, %o4		! %o4 = byte offset, left-to-right
488	xor	%o4, 0x2, %g1		! %g1 = byte offset, right-to-left
489	sll	%o4, 3, %o4		! %o4 = bit offset, left-to-right
490	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
491	sethi	%hi(0xffff0000), %o3	! %o3 = mask
492	srl	%o3, %o4, %o3		! %o3 = shifted to bit offset
493	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
494	and	%o1, %o3, %o1		! %o1 = single short value
495	andn	%o0, 0x2, %o0		! %o0 = word address
496	! if low-order bit is 1, we will properly get an alignment fault here
497	ld	[%o0], %o2		! read old value
4981:
499	or	%o2, %o1, %o5		! or in the new value
500	cas	[%o0], %o2, %o5
501	cmp	%o2, %o5
502	bne,a,pn %icc, 1b
503	  mov	%o5, %o2		! %o2 = old value
504	or	%o2, %o1, %o5		! or in the new value
505	and	%o5, %o3, %o5
506	retl
507	srl	%o5, %g1, %o0		! %o0 = new value
508	SET_SIZE(atomic_or_ushort_nv)
509	SET_SIZE(atomic_or_ushort)
510	SET_SIZE(atomic_or_16_nv)
511	SET_SIZE(atomic_or_16)
512
513	/*
514	 * NOTE: If atomic_or_32 and atomic_or_32_nv are ever
515	 * separated, you need to also edit the libc sparcv9 platform
516	 * specific mapfile and remove the NODYNSORT attribute
517	 * from atomic_or_32_nv.
518	 */
519	ENTRY(atomic_or_32)
520	ALTENTRY(atomic_or_32_nv)
521	ALTENTRY(atomic_or_uint)
522	ALTENTRY(atomic_or_uint_nv)
523	ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
5240:
525	ld	[%o0], %o2
5261:
527	or	%o2, %o1, %o3
528	cas	[%o0], %o2, %o3
529	cmp	%o2, %o3
530	ATOMIC_BACKOFF_BRANCH(%icc, 2f, 1b)
531	  mov	%o3, %o2
532	retl
533	or	%o2, %o1, %o0		! return new value
5342:
535	ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, or32, 0b)
536	SET_SIZE(atomic_or_uint_nv)
537	SET_SIZE(atomic_or_uint)
538	SET_SIZE(atomic_or_32_nv)
539	SET_SIZE(atomic_or_32)
540
541	/*
542	 * NOTE: If atomic_or_64 and atomic_or_64_nv are ever
543	 * separated, you need to also edit the libc sparcv9 platform
544	 * specific mapfile and remove the NODYNSORT attribute
545	 * from atomic_or_64_nv.
546	 */
547	ENTRY(atomic_or_64)
548	ALTENTRY(atomic_or_64_nv)
549	ALTENTRY(atomic_or_ulong)
550	ALTENTRY(atomic_or_ulong_nv)
551	ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
5520:
553	ldx	[%o0], %o2
5541:
555	or	%o2, %o1, %o3
556	casx	[%o0], %o2, %o3
557	cmp	%o2, %o3
558	ATOMIC_BACKOFF_BRANCH(%xcc, 2f, 1b)
559	  mov	%o3, %o2
560	retl
561	or	%o2, %o1, %o0		! return new value
5622:
563	ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, or64, 0b)
564	SET_SIZE(atomic_or_ulong_nv)
565	SET_SIZE(atomic_or_ulong)
566	SET_SIZE(atomic_or_64_nv)
567	SET_SIZE(atomic_or_64)
568
569	/*
570	 * NOTE: If atomic_and_8 and atomic_and_8_nv are ever
571	 * separated, you need to also edit the libc sparcv9 platform
572	 * specific mapfile and remove the NODYNSORT attribute
573	 * from atomic_and_8_nv.
574	 */
575	ENTRY(atomic_and_8)
576	ALTENTRY(atomic_and_8_nv)
577	ALTENTRY(atomic_and_uchar)
578	ALTENTRY(atomic_and_uchar_nv)
579	and	%o0, 0x3, %o4		! %o4 = byte offset, left-to-right
580	xor	%o4, 0x3, %g1		! %g1 = byte offset, right-to-left
581	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
582	set	0xff, %o3		! %o3 = mask
583	sll	%o3, %g1, %o3		! %o3 = shifted to bit offset
584	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
585	orn	%o1, %o3, %o1		! all ones in other bytes
586	andn	%o0, 0x3, %o0		! %o0 = word address
587	ld	[%o0], %o2		! read old value
5881:
589	and	%o2, %o1, %o5		! and in the new value
590	cas	[%o0], %o2, %o5
591	cmp	%o2, %o5
592	bne,a,pn %icc, 1b
593	  mov	%o5, %o2		! %o2 = old value
594	and	%o2, %o1, %o5
595	and	%o5, %o3, %o5
596	retl
597	srl	%o5, %g1, %o0		! %o0 = new value
598	SET_SIZE(atomic_and_uchar_nv)
599	SET_SIZE(atomic_and_uchar)
600	SET_SIZE(atomic_and_8_nv)
601	SET_SIZE(atomic_and_8)
602
603	/*
604	 * NOTE: If atomic_and_16 and atomic_and_16_nv are ever
605	 * separated, you need to also edit the libc sparcv9 platform
606	 * specific mapfile and remove the NODYNSORT attribute
607	 * from atomic_and_16_nv.
608	 */
609	ENTRY(atomic_and_16)
610	ALTENTRY(atomic_and_16_nv)
611	ALTENTRY(atomic_and_ushort)
612	ALTENTRY(atomic_and_ushort_nv)
613	and	%o0, 0x2, %o4		! %o4 = byte offset, left-to-right
614	xor	%o4, 0x2, %g1		! %g1 = byte offset, right-to-left
615	sll	%o4, 3, %o4		! %o4 = bit offset, left-to-right
616	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
617	sethi	%hi(0xffff0000), %o3	! %o3 = mask
618	srl	%o3, %o4, %o3		! %o3 = shifted to bit offset
619	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
620	orn	%o1, %o3, %o1		! all ones in the other half
621	andn	%o0, 0x2, %o0		! %o0 = word address
622	! if low-order bit is 1, we will properly get an alignment fault here
623	ld	[%o0], %o2		! read old value
6241:
625	and	%o2, %o1, %o5		! and in the new value
626	cas	[%o0], %o2, %o5
627	cmp	%o2, %o5
628	bne,a,pn %icc, 1b
629	  mov	%o5, %o2		! %o2 = old value
630	and	%o2, %o1, %o5
631	and	%o5, %o3, %o5
632	retl
633	srl	%o5, %g1, %o0		! %o0 = new value
634	SET_SIZE(atomic_and_ushort_nv)
635	SET_SIZE(atomic_and_ushort)
636	SET_SIZE(atomic_and_16_nv)
637	SET_SIZE(atomic_and_16)
638
639	/*
640	 * NOTE: If atomic_and_32 and atomic_and_32_nv are ever
641	 * separated, you need to also edit the libc sparcv9 platform
642	 * specific mapfile and remove the NODYNSORT attribute
643	 * from atomic_and_32_nv.
644	 */
645	ENTRY(atomic_and_32)
646	ALTENTRY(atomic_and_32_nv)
647	ALTENTRY(atomic_and_uint)
648	ALTENTRY(atomic_and_uint_nv)
649	ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
6500:
651	ld	[%o0], %o2
6521:
653	and	%o2, %o1, %o3
654	cas	[%o0], %o2, %o3
655	cmp	%o2, %o3
656	ATOMIC_BACKOFF_BRANCH(%icc, 2f, 1b)
657	  mov	%o3, %o2
658	retl
659	and	%o2, %o1, %o0		! return new value
6602:
661	ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, and32, 0b)
662	SET_SIZE(atomic_and_uint_nv)
663	SET_SIZE(atomic_and_uint)
664	SET_SIZE(atomic_and_32_nv)
665	SET_SIZE(atomic_and_32)
666
667	/*
668	 * NOTE: If atomic_and_64 and atomic_and_64_nv are ever
669	 * separated, you need to also edit the libc sparcv9 platform
670	 * specific mapfile and remove the NODYNSORT attribute
671	 * from atomic_and_64_nv.
672	 */
673	ENTRY(atomic_and_64)
674	ALTENTRY(atomic_and_64_nv)
675	ALTENTRY(atomic_and_ulong)
676	ALTENTRY(atomic_and_ulong_nv)
677	ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
6780:
679	ldx	[%o0], %o2
6801:
681	and	%o2, %o1, %o3
682	casx	[%o0], %o2, %o3
683	cmp	%o2, %o3
684	ATOMIC_BACKOFF_BRANCH(%xcc, 2f, 1b)
685	  mov	%o3, %o2
686	retl
687	and	%o2, %o1, %o0		! return new value
6882:
689	ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, and64, 0b)
690	SET_SIZE(atomic_and_ulong_nv)
691	SET_SIZE(atomic_and_ulong)
692	SET_SIZE(atomic_and_64_nv)
693	SET_SIZE(atomic_and_64)
694
695	ENTRY(atomic_cas_8)
696	ALTENTRY(atomic_cas_uchar)
697	and	%o0, 0x3, %o4		! %o4 = byte offset, left-to-right
698	xor	%o4, 0x3, %g1		! %g1 = byte offset, right-to-left
699	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
700	set	0xff, %o3		! %o3 = mask
701	sll	%o3, %g1, %o3		! %o3 = shifted to bit offset
702	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
703	and	%o1, %o3, %o1		! %o1 = single byte value
704	sll	%o2, %g1, %o2		! %o2 = shifted to bit offset
705	and	%o2, %o3, %o2		! %o2 = single byte value
706	andn	%o0, 0x3, %o0		! %o0 = word address
707	ld	[%o0], %o4		! read old value
7081:
709	andn	%o4, %o3, %o4		! clear target bits
710	or	%o4, %o2, %o5		! insert the new value
711	or	%o4, %o1, %o4		! insert the comparison value
712	cas	[%o0], %o4, %o5
713	cmp	%o4, %o5		! did we succeed?
714	be,pt	%icc, 2f
715	  and	%o5, %o3, %o4		! isolate the old value
716	cmp	%o1, %o4		! should we have succeeded?
717	be,a,pt	%icc, 1b		! yes, try again
718	  mov	%o5, %o4		! %o4 = old value
7192:
720	retl
721	srl	%o4, %g1, %o0		! %o0 = old value
722	SET_SIZE(atomic_cas_uchar)
723	SET_SIZE(atomic_cas_8)
724
725	ENTRY(atomic_cas_16)
726	ALTENTRY(atomic_cas_ushort)
727	and	%o0, 0x2, %o4		! %o4 = byte offset, left-to-right
728	xor	%o4, 0x2, %g1		! %g1 = byte offset, right-to-left
729	sll	%o4, 3, %o4		! %o4 = bit offset, left-to-right
730	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
731	sethi	%hi(0xffff0000), %o3	! %o3 = mask
732	srl	%o3, %o4, %o3		! %o3 = shifted to bit offset
733	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
734	and	%o1, %o3, %o1		! %o1 = single short value
735	sll	%o2, %g1, %o2		! %o2 = shifted to bit offset
736	and	%o2, %o3, %o2		! %o2 = single short value
737	andn	%o0, 0x2, %o0		! %o0 = word address
738	! if low-order bit is 1, we will properly get an alignment fault here
739	ld	[%o0], %o4		! read old value
7401:
741	andn	%o4, %o3, %o4		! clear target bits
742	or	%o4, %o2, %o5		! insert the new value
743	or	%o4, %o1, %o4		! insert the comparison value
744	cas	[%o0], %o4, %o5
745	cmp	%o4, %o5		! did we succeed?
746	be,pt	%icc, 2f
747	  and	%o5, %o3, %o4		! isolate the old value
748	cmp	%o1, %o4		! should we have succeeded?
749	be,a,pt	%icc, 1b		! yes, try again
750	  mov	%o5, %o4		! %o4 = old value
7512:
752	retl
753	srl	%o4, %g1, %o0		! %o0 = old value
754	SET_SIZE(atomic_cas_ushort)
755	SET_SIZE(atomic_cas_16)
756
757	ENTRY(atomic_cas_32)
758	ALTENTRY(atomic_cas_uint)
759	cas	[%o0], %o1, %o2
760	retl
761	mov	%o2, %o0
762	SET_SIZE(atomic_cas_uint)
763	SET_SIZE(atomic_cas_32)
764
765	ENTRY(atomic_cas_64)
766	ALTENTRY(atomic_cas_ptr)
767	ALTENTRY(atomic_cas_ulong)
768	casx	[%o0], %o1, %o2
769	retl
770	mov	%o2, %o0
771	SET_SIZE(atomic_cas_ulong)
772	SET_SIZE(atomic_cas_ptr)
773	SET_SIZE(atomic_cas_64)
774
775	ENTRY(atomic_swap_8)
776	ALTENTRY(atomic_swap_uchar)
777	and	%o0, 0x3, %o4		! %o4 = byte offset, left-to-right
778	xor	%o4, 0x3, %g1		! %g1 = byte offset, right-to-left
779	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
780	set	0xff, %o3		! %o3 = mask
781	sll	%o3, %g1, %o3		! %o3 = shifted to bit offset
782	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
783	and	%o1, %o3, %o1		! %o1 = single byte value
784	andn	%o0, 0x3, %o0		! %o0 = word address
785	ld	[%o0], %o2		! read old value
7861:
787	andn	%o2, %o3, %o5		! clear target bits
788	or	%o5, %o1, %o5		! insert the new value
789	cas	[%o0], %o2, %o5
790	cmp	%o2, %o5
791	bne,a,pn %icc, 1b
792	  mov	%o5, %o2		! %o2 = old value
793	and	%o5, %o3, %o5
794	retl
795	srl	%o5, %g1, %o0		! %o0 = old value
796	SET_SIZE(atomic_swap_uchar)
797	SET_SIZE(atomic_swap_8)
798
799	ENTRY(atomic_swap_16)
800	ALTENTRY(atomic_swap_ushort)
801	and	%o0, 0x2, %o4		! %o4 = byte offset, left-to-right
802	xor	%o4, 0x2, %g1		! %g1 = byte offset, right-to-left
803	sll	%o4, 3, %o4		! %o4 = bit offset, left-to-right
804	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
805	sethi	%hi(0xffff0000), %o3	! %o3 = mask
806	srl	%o3, %o4, %o3		! %o3 = shifted to bit offset
807	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
808	and	%o1, %o3, %o1		! %o1 = single short value
809	andn	%o0, 0x2, %o0		! %o0 = word address
810	! if low-order bit is 1, we will properly get an alignment fault here
811	ld	[%o0], %o2		! read old value
8121:
813	andn	%o2, %o3, %o5		! clear target bits
814	or	%o5, %o1, %o5		! insert the new value
815	cas	[%o0], %o2, %o5
816	cmp	%o2, %o5
817	bne,a,pn %icc, 1b
818	  mov	%o5, %o2		! %o2 = old value
819	and	%o5, %o3, %o5
820	retl
821	srl	%o5, %g1, %o0		! %o0 = old value
822	SET_SIZE(atomic_swap_ushort)
823	SET_SIZE(atomic_swap_16)
824
825	ENTRY(atomic_swap_32)
826	ALTENTRY(atomic_swap_uint)
827	ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
8280:
829	ld	[%o0], %o2
8301:
831	mov	%o1, %o3
832	cas	[%o0], %o2, %o3
833	cmp	%o2, %o3
834	ATOMIC_BACKOFF_BRANCH(%icc, 2f, 1b)
835	  mov	%o3, %o2
836	retl
837	mov	%o3, %o0
8382:
839	ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, swap32, 0b)
840	SET_SIZE(atomic_swap_uint)
841	SET_SIZE(atomic_swap_32)
842
843	ENTRY(atomic_swap_64)
844	ALTENTRY(atomic_swap_ptr)
845	ALTENTRY(atomic_swap_ulong)
846	ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
8470:
848	ldx	[%o0], %o2
8491:
850	mov	%o1, %o3
851	casx	[%o0], %o2, %o3
852	cmp	%o2, %o3
853	ATOMIC_BACKOFF_BRANCH(%xcc, 2f, 1b)
854	  mov	%o3, %o2
855	retl
856	mov	%o3, %o0
8572:
858	ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, swap64, 0b)
859	SET_SIZE(atomic_swap_ulong)
860	SET_SIZE(atomic_swap_ptr)
861	SET_SIZE(atomic_swap_64)
862
863	ENTRY(atomic_set_long_excl)
864	ATOMIC_BACKOFF_INIT(%o5, %g4, %g5)
865	mov	1, %o3
866	slln	%o3, %o1, %o3
8670:
868	ldn	[%o0], %o2
8691:
870	andcc	%o2, %o3, %g0		! test if the bit is set
871	bnz,a,pn %ncc, 2f		! if so, then fail out
872	  mov	-1, %o0
873	or	%o2, %o3, %o4		! set the bit, and try to commit it
874	casn	[%o0], %o2, %o4
875	cmp	%o2, %o4
876	ATOMIC_BACKOFF_BRANCH(%ncc, 5f, 1b)
877	  mov	%o4, %o2
878	mov	%g0, %o0
8792:
880	retl
881	nop
8825:
883	ATOMIC_BACKOFF_BACKOFF(%o5, %g1, %g4, %g5, setlongexcl, 0b)
884	SET_SIZE(atomic_set_long_excl)
885
886	ENTRY(atomic_clear_long_excl)
887	ATOMIC_BACKOFF_INIT(%o5, %g4, %g5)
888	mov	1, %o3
889	slln	%o3, %o1, %o3
8900:
891	ldn	[%o0], %o2
8921:
893	andncc	%o3, %o2, %g0		! test if the bit is clear
894	bnz,a,pn %ncc, 2f		! if so, then fail out
895	  mov	-1, %o0
896	andn	%o2, %o3, %o4		! clear the bit, and try to commit it
897	casn	[%o0], %o2, %o4
898	cmp	%o2, %o4
899	ATOMIC_BACKOFF_BRANCH(%ncc, 5f, 1b)
900	  mov	%o4, %o2
901	mov	%g0, %o0
9022:
903	retl
904	nop
9055:
906	ATOMIC_BACKOFF_BACKOFF(%o5, %g1, %g4, %g5, clrlongexcl, 0b)
907	SET_SIZE(atomic_clear_long_excl)
908
909#if !defined(_KERNEL)
910
911	/*
912	 * Spitfires and Blackbirds have a problem with membars in the
913	 * delay slot (SF_ERRATA_51).  For safety's sake, we assume
914	 * that the whole world needs the workaround.
915	 */
916	ENTRY(membar_enter)
917	membar	#StoreLoad|#StoreStore
918	retl
919	nop
920	SET_SIZE(membar_enter)
921
922	ENTRY(membar_exit)
923	membar	#LoadStore|#StoreStore
924	retl
925	nop
926	SET_SIZE(membar_exit)
927
928	ENTRY(membar_producer)
929	membar	#StoreStore
930	retl
931	nop
932	SET_SIZE(membar_producer)
933
934	ENTRY(membar_consumer)
935	membar	#LoadLoad
936	retl
937	nop
938	SET_SIZE(membar_consumer)
939
940#endif	/* !_KERNEL */
941