xref: /titanic_51/usr/src/common/atomic/sparcv9/atomic.s (revision 895ca178e38ac3583d0c0d8317d51dc5f388df6e)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26	.ident	"%Z%%M%	%I%	%E% SMI"
27
28	.file	"%M%"
29
30#include <sys/asm_linkage.h>
31
32/*
33 * ATOMIC_BO_ENABLE_SHIFT can be selectively defined by processors
34 * to enable exponential backoff. No definition means backoff is
35 * not desired i.e. backoff should be disabled.
36 * By default, the shift value is used to generate a power of 2
37 * value for backoff limit. In the kernel, processors scale this
38 * shift value with the number of online cpus.
39 */
40
41#if defined(_KERNEL)
42	/*
43	 * Legacy kernel interfaces; they will go away (eventually).
44	 */
45	ANSI_PRAGMA_WEAK2(cas8,atomic_cas_8,function)
46	ANSI_PRAGMA_WEAK2(cas32,atomic_cas_32,function)
47	ANSI_PRAGMA_WEAK2(cas64,atomic_cas_64,function)
48	ANSI_PRAGMA_WEAK2(caslong,atomic_cas_ulong,function)
49	ANSI_PRAGMA_WEAK2(casptr,atomic_cas_ptr,function)
50	ANSI_PRAGMA_WEAK2(atomic_and_long,atomic_and_ulong,function)
51	ANSI_PRAGMA_WEAK2(atomic_or_long,atomic_or_ulong,function)
52	ANSI_PRAGMA_WEAK2(swapl,atomic_swap_32,function)
53
54#ifdef ATOMIC_BO_ENABLE_SHIFT
55
56#if !defined(lint)
57	.weak   cpu_atomic_delay
58	.type   cpu_atomic_delay, #function
59#endif  /* lint */
60
61/*
62 * For the kernel, invoke processor specific delay routine to perform
63 * low-impact spin delay. The value of ATOMIC_BO_ENABLE_SHIFT is tuned
64 * with respect to the specific spin delay implementation.
65 */
66#define	DELAY_SPIN(label, tmp1, tmp2)					\
67	/*								; \
68	 * Define a pragma weak reference to a cpu specific		; \
69	 * delay routine for atomic backoff. For CPUs that		; \
70	 * have no such delay routine defined, the delay becomes	; \
71	 * just a simple tight loop.					; \
72	 *								; \
73	 * tmp1 = holds CPU specific delay routine			; \
74	 * tmp2 = holds atomic routine's callee return address		; \
75	 */								; \
76	sethi	%hi(cpu_atomic_delay), tmp1				; \
77	or	tmp1, %lo(cpu_atomic_delay), tmp1			; \
78label/**/0:								; \
79	brz,pn	tmp1, label/**/1					; \
80	mov	%o7, tmp2						; \
81	jmpl	tmp1, %o7	/* call CPU specific delay routine */	; \
82	  nop			/* delay slot : do nothing */		; \
83	mov	tmp2, %o7	/* restore callee's return address */	; \
84label/**/1:
85
86/*
87 * For the kernel, we take into consideration of cas failures
88 * and also scale the backoff limit w.r.t. the number of cpus.
89 * For cas failures, we reset the backoff value to 1 if the cas
90 * failures exceed or equal to the number of online cpus. This
91 * will enforce some degree of fairness and prevent starvation.
92 * We also scale/normalize the processor provided specific
93 * ATOMIC_BO_ENABLE_SHIFT w.r.t. the number of online cpus to
94 * obtain the actual final limit to use.
95 */
96#define ATOMIC_BACKOFF_CPU(val, limit, ncpu, cas_cnt, label)		\
97	brnz,pt	ncpu, label/**/0					; \
98	  inc	cas_cnt							; \
99	sethi	%hi(ncpus_online), ncpu					; \
100	ld	[ncpu + %lo(ncpus_online)], ncpu			; \
101label/**/0:								; \
102	cmp	cas_cnt, ncpu						; \
103	blu,pt	%xcc, label/**/1					; \
104	  sllx	ncpu, ATOMIC_BO_ENABLE_SHIFT, limit			; \
105	mov	%g0, cas_cnt						; \
106	mov	1, val							; \
107label/**/1:
108#endif	/* ATOMIC_BO_ENABLE_SHIFT */
109
110#else	/* _KERNEL */
111	/*
112	 * Include the definitions for the libc weak aliases.
113	 */
114#include "../atomic_asm_weak.h"
115
116/*
117 * ATOMIC_BO_ENABLE_SHIFT may be enabled/defined here for generic
118 * libc atomics. None for now.
119 */
120#ifdef ATOMIC_BO_ENABLE_SHIFT
121#define	DELAY_SPIN(label, tmp1, tmp2)	\
122label/**/0:
123
124#define ATOMIC_BACKOFF_CPU(val, limit, ncpu, cas_cnt, label)  \
125	set	1 << ATOMIC_BO_ENABLE_SHIFT, limit
126#endif	/* ATOMIC_BO_ENABLE_SHIFT */
127#endif	/* _KERNEL */
128
129#ifdef ATOMIC_BO_ENABLE_SHIFT
130/*
131 * ATOMIC_BACKOFF_INIT macro for initialization.
132 * backoff val is initialized to 1.
133 * ncpu is initialized to 0
134 * The cas_cnt counts the cas instruction failure and is
135 * initialized to 0.
136 */
137#define ATOMIC_BACKOFF_INIT(val, ncpu, cas_cnt)	\
138	mov	1, val				; \
139	mov	%g0, ncpu			; \
140	mov	%g0, cas_cnt
141
142#define ATOMIC_BACKOFF_BRANCH(cr, backoff, loop) \
143	bne,a,pn cr, backoff
144
145/*
146 * Main ATOMIC_BACKOFF_BACKOFF macro for backoff.
147 */
148#define ATOMIC_BACKOFF_BACKOFF(val, limit, ncpu, cas_cnt, label, retlabel) \
149	ATOMIC_BACKOFF_CPU(val, limit, ncpu, cas_cnt, label/**/_0)	; \
150	cmp	val, limit						; \
151	blu,a,pt %xcc, label/**/_1					; \
152	  mov	val, limit						; \
153label/**/_1:								; \
154	mov	limit, val						; \
155	DELAY_SPIN(label/**/_2, %g2, %g3)				; \
156	deccc	limit							; \
157	bgu,pn	%xcc, label/**/_20 /* branch to middle of DELAY_SPIN */	; \
158	  nop								; \
159	ba	retlabel						; \
160	  sllx  val, 1, val
161#else	/* ATOMIC_BO_ENABLE_SHIFT */
162#define ATOMIC_BACKOFF_INIT(val, ncpu, cas_cnt)
163
164#define ATOMIC_BACKOFF_BRANCH(cr, backoff, loop) \
165	bne,a,pn cr, loop
166
167#define ATOMIC_BACKOFF_BACKOFF(val, limit, ncpu, cas_cnt, label, retlabel)
168#endif	/* ATOMIC_BO_ENABLE_SHIFT */
169
170	/*
171	 * NOTE: If atomic_inc_8 and atomic_inc_8_nv are ever
172	 * separated, you need to also edit the libc sparcv9 platform
173	 * specific mapfile and remove the NODYNSORT attribute
174	 * from atomic_inc_8_nv.
175	 */
176	ENTRY(atomic_inc_8)
177	ALTENTRY(atomic_inc_8_nv)
178	ALTENTRY(atomic_inc_uchar)
179	ALTENTRY(atomic_inc_uchar_nv)
180	ba	add_8
181	  add	%g0, 1, %o1
182	SET_SIZE(atomic_inc_uchar_nv)
183	SET_SIZE(atomic_inc_uchar)
184	SET_SIZE(atomic_inc_8_nv)
185	SET_SIZE(atomic_inc_8)
186
187	/*
188	 * NOTE: If atomic_dec_8 and atomic_dec_8_nv are ever
189	 * separated, you need to also edit the libc sparcv9 platform
190	 * specific mapfile and remove the NODYNSORT attribute
191	 * from atomic_dec_8_nv.
192	 */
193	ENTRY(atomic_dec_8)
194	ALTENTRY(atomic_dec_8_nv)
195	ALTENTRY(atomic_dec_uchar)
196	ALTENTRY(atomic_dec_uchar_nv)
197	ba	add_8
198	  sub	%g0, 1, %o1
199	SET_SIZE(atomic_dec_uchar_nv)
200	SET_SIZE(atomic_dec_uchar)
201	SET_SIZE(atomic_dec_8_nv)
202	SET_SIZE(atomic_dec_8)
203
204	/*
205	 * NOTE: If atomic_add_8 and atomic_add_8_nv are ever
206	 * separated, you need to also edit the libc sparcv9 platform
207	 * specific mapfile and remove the NODYNSORT attribute
208	 * from atomic_add_8_nv.
209	 */
210	ENTRY(atomic_add_8)
211	ALTENTRY(atomic_add_8_nv)
212	ALTENTRY(atomic_add_char)
213	ALTENTRY(atomic_add_char_nv)
214add_8:
215	and	%o0, 0x3, %o4		! %o4 = byte offset, left-to-right
216	xor	%o4, 0x3, %g1		! %g1 = byte offset, right-to-left
217	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
218	set	0xff, %o3		! %o3 = mask
219	sll	%o3, %g1, %o3		! %o3 = shifted to bit offset
220	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
221	and	%o1, %o3, %o1		! %o1 = single byte value
222	andn	%o0, 0x3, %o0		! %o0 = word address
223	ld	[%o0], %o2		! read old value
2241:
225	add	%o2, %o1, %o5		! add value to the old value
226	and	%o5, %o3, %o5		! clear other bits
227	andn	%o2, %o3, %o4		! clear target bits
228	or	%o4, %o5, %o5		! insert the new value
229	cas	[%o0], %o2, %o5
230	cmp	%o2, %o5
231	bne,a,pn %icc, 1b
232	  mov	%o5, %o2		! %o2 = old value
233	add	%o2, %o1, %o5
234	and	%o5, %o3, %o5
235	retl
236	srl	%o5, %g1, %o0		! %o0 = new value
237	SET_SIZE(atomic_add_char_nv)
238	SET_SIZE(atomic_add_char)
239	SET_SIZE(atomic_add_8_nv)
240	SET_SIZE(atomic_add_8)
241
242	/*
243	 * NOTE: If atomic_inc_16 and atomic_inc_16_nv are ever
244	 * separated, you need to also edit the libc sparcv9 platform
245	 * specific mapfile and remove the NODYNSORT attribute
246	 * from atomic_inc_16_nv.
247	 */
248	ENTRY(atomic_inc_16)
249	ALTENTRY(atomic_inc_16_nv)
250	ALTENTRY(atomic_inc_ushort)
251	ALTENTRY(atomic_inc_ushort_nv)
252	ba	add_16
253	  add	%g0, 1, %o1
254	SET_SIZE(atomic_inc_ushort_nv)
255	SET_SIZE(atomic_inc_ushort)
256	SET_SIZE(atomic_inc_16_nv)
257	SET_SIZE(atomic_inc_16)
258
259	/*
260	 * NOTE: If atomic_dec_16 and atomic_dec_16_nv are ever
261	 * separated, you need to also edit the libc sparcv9 platform
262	 * specific mapfile and remove the NODYNSORT attribute
263	 * from atomic_dec_16_nv.
264	 */
265	ENTRY(atomic_dec_16)
266	ALTENTRY(atomic_dec_16_nv)
267	ALTENTRY(atomic_dec_ushort)
268	ALTENTRY(atomic_dec_ushort_nv)
269	ba	add_16
270	  sub	%g0, 1, %o1
271	SET_SIZE(atomic_dec_ushort_nv)
272	SET_SIZE(atomic_dec_ushort)
273	SET_SIZE(atomic_dec_16_nv)
274	SET_SIZE(atomic_dec_16)
275
276	/*
277	 * NOTE: If atomic_add_16 and atomic_add_16_nv are ever
278	 * separated, you need to also edit the libc sparcv9 platform
279	 * specific mapfile and remove the NODYNSORT attribute
280	 * from atomic_add_16_nv.
281	 */
282	ENTRY(atomic_add_16)
283	ALTENTRY(atomic_add_16_nv)
284	ALTENTRY(atomic_add_short)
285	ALTENTRY(atomic_add_short_nv)
286add_16:
287	and	%o0, 0x2, %o4		! %o4 = byte offset, left-to-right
288	xor	%o4, 0x2, %g1		! %g1 = byte offset, right-to-left
289	sll	%o4, 3, %o4		! %o4 = bit offset, left-to-right
290	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
291	sethi	%hi(0xffff0000), %o3	! %o3 = mask
292	srl	%o3, %o4, %o3		! %o3 = shifted to bit offset
293	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
294	and	%o1, %o3, %o1		! %o1 = single short value
295	andn	%o0, 0x2, %o0		! %o0 = word address
296	! if low-order bit is 1, we will properly get an alignment fault here
297	ld	[%o0], %o2		! read old value
2981:
299	add	%o1, %o2, %o5		! add value to the old value
300	and	%o5, %o3, %o5		! clear other bits
301	andn	%o2, %o3, %o4		! clear target bits
302	or	%o4, %o5, %o5		! insert the new value
303	cas	[%o0], %o2, %o5
304	cmp	%o2, %o5
305	bne,a,pn %icc, 1b
306	  mov	%o5, %o2		! %o2 = old value
307	add	%o1, %o2, %o5
308	and	%o5, %o3, %o5
309	retl
310	srl	%o5, %g1, %o0		! %o0 = new value
311	SET_SIZE(atomic_add_short_nv)
312	SET_SIZE(atomic_add_short)
313	SET_SIZE(atomic_add_16_nv)
314	SET_SIZE(atomic_add_16)
315
316	/*
317	 * NOTE: If atomic_inc_32 and atomic_inc_32_nv are ever
318	 * separated, you need to also edit the libc sparcv9 platform
319	 * specific mapfile and remove the NODYNSORT attribute
320	 * from atomic_inc_32_nv.
321	 */
322	ENTRY(atomic_inc_32)
323	ALTENTRY(atomic_inc_32_nv)
324	ALTENTRY(atomic_inc_uint)
325	ALTENTRY(atomic_inc_uint_nv)
326	ba	add_32
327	  add	%g0, 1, %o1
328	SET_SIZE(atomic_inc_uint_nv)
329	SET_SIZE(atomic_inc_uint)
330	SET_SIZE(atomic_inc_32_nv)
331	SET_SIZE(atomic_inc_32)
332
333	/*
334	 * NOTE: If atomic_dec_32 and atomic_dec_32_nv are ever
335	 * separated, you need to also edit the libc sparcv9 platform
336	 * specific mapfile and remove the NODYNSORT attribute
337	 * from atomic_dec_32_nv.
338	 */
339	ENTRY(atomic_dec_32)
340	ALTENTRY(atomic_dec_32_nv)
341	ALTENTRY(atomic_dec_uint)
342	ALTENTRY(atomic_dec_uint_nv)
343	ba	add_32
344	  sub	%g0, 1, %o1
345	SET_SIZE(atomic_dec_uint_nv)
346	SET_SIZE(atomic_dec_uint)
347	SET_SIZE(atomic_dec_32_nv)
348	SET_SIZE(atomic_dec_32)
349
350	/*
351	 * NOTE: If atomic_add_32 and atomic_add_32_nv are ever
352	 * separated, you need to also edit the libc sparcv9 platform
353	 * specific mapfile and remove the NODYNSORT attribute
354	 * from atomic_add_32_nv.
355	 */
356	ENTRY(atomic_add_32)
357	ALTENTRY(atomic_add_32_nv)
358	ALTENTRY(atomic_add_int)
359	ALTENTRY(atomic_add_int_nv)
360add_32:
361	ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
3620:
363	ld	[%o0], %o2
3641:
365	add	%o2, %o1, %o3
366	cas	[%o0], %o2, %o3
367	cmp	%o2, %o3
368	ATOMIC_BACKOFF_BRANCH(%icc, 2f, 1b)
369	  mov	%o3, %o2
370	retl
371	add	%o2, %o1, %o0		! return new value
3722:
373	ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, add32, 0b)
374	SET_SIZE(atomic_add_int_nv)
375	SET_SIZE(atomic_add_int)
376	SET_SIZE(atomic_add_32_nv)
377	SET_SIZE(atomic_add_32)
378
379	/*
380	 * NOTE: If atomic_inc_64 and atomic_inc_64_nv are ever
381	 * separated, you need to also edit the libc sparcv9 platform
382	 * specific mapfile and remove the NODYNSORT attribute
383	 * from atomic_inc_64_nv.
384	 */
385	ENTRY(atomic_inc_64)
386	ALTENTRY(atomic_inc_64_nv)
387	ALTENTRY(atomic_inc_ulong)
388	ALTENTRY(atomic_inc_ulong_nv)
389	ba	add_64
390	  add	%g0, 1, %o1
391	SET_SIZE(atomic_inc_ulong_nv)
392	SET_SIZE(atomic_inc_ulong)
393	SET_SIZE(atomic_inc_64_nv)
394	SET_SIZE(atomic_inc_64)
395
396	/*
397	 * NOTE: If atomic_dec_64 and atomic_dec_64_nv are ever
398	 * separated, you need to also edit the libc sparcv9 platform
399	 * specific mapfile and remove the NODYNSORT attribute
400	 * from atomic_dec_64_nv.
401	 */
402	ENTRY(atomic_dec_64)
403	ALTENTRY(atomic_dec_64_nv)
404	ALTENTRY(atomic_dec_ulong)
405	ALTENTRY(atomic_dec_ulong_nv)
406	ba	add_64
407	  sub	%g0, 1, %o1
408	SET_SIZE(atomic_dec_ulong_nv)
409	SET_SIZE(atomic_dec_ulong)
410	SET_SIZE(atomic_dec_64_nv)
411	SET_SIZE(atomic_dec_64)
412
413	/*
414	 * NOTE: If atomic_add_64 and atomic_add_64_nv are ever
415	 * separated, you need to also edit the libc sparcv9 platform
416	 * specific mapfile and remove the NODYNSORT attribute
417	 * from atomic_add_64_nv.
418	 */
419	ENTRY(atomic_add_64)
420	ALTENTRY(atomic_add_64_nv)
421	ALTENTRY(atomic_add_ptr)
422	ALTENTRY(atomic_add_ptr_nv)
423	ALTENTRY(atomic_add_long)
424	ALTENTRY(atomic_add_long_nv)
425add_64:
426	ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
4270:
428	ldx	[%o0], %o2
4291:
430	add	%o2, %o1, %o3
431	casx	[%o0], %o2, %o3
432	cmp	%o2, %o3
433	ATOMIC_BACKOFF_BRANCH(%xcc, 2f, 1b)
434	  mov	%o3, %o2
435	retl
436	add	%o2, %o1, %o0		! return new value
4372:
438	ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, add64, 0b)
439	SET_SIZE(atomic_add_long_nv)
440	SET_SIZE(atomic_add_long)
441	SET_SIZE(atomic_add_ptr_nv)
442	SET_SIZE(atomic_add_ptr)
443	SET_SIZE(atomic_add_64_nv)
444	SET_SIZE(atomic_add_64)
445
446	/*
447	 * NOTE: If atomic_or_8 and atomic_or_8_nv are ever
448	 * separated, you need to also edit the libc sparcv9 platform
449	 * specific mapfile and remove the NODYNSORT attribute
450	 * from atomic_or_8_nv.
451	 */
452	ENTRY(atomic_or_8)
453	ALTENTRY(atomic_or_8_nv)
454	ALTENTRY(atomic_or_uchar)
455	ALTENTRY(atomic_or_uchar_nv)
456	and	%o0, 0x3, %o4		! %o4 = byte offset, left-to-right
457	xor	%o4, 0x3, %g1		! %g1 = byte offset, right-to-left
458	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
459	set	0xff, %o3		! %o3 = mask
460	sll	%o3, %g1, %o3		! %o3 = shifted to bit offset
461	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
462	and	%o1, %o3, %o1		! %o1 = single byte value
463	andn	%o0, 0x3, %o0		! %o0 = word address
464	ld	[%o0], %o2		! read old value
4651:
466	or	%o2, %o1, %o5		! or in the new value
467	cas	[%o0], %o2, %o5
468	cmp	%o2, %o5
469	bne,a,pn %icc, 1b
470	  mov	%o5, %o2		! %o2 = old value
471	or	%o2, %o1, %o5
472	and	%o5, %o3, %o5
473	retl
474	srl	%o5, %g1, %o0		! %o0 = new value
475	SET_SIZE(atomic_or_uchar_nv)
476	SET_SIZE(atomic_or_uchar)
477	SET_SIZE(atomic_or_8_nv)
478	SET_SIZE(atomic_or_8)
479
480	/*
481	 * NOTE: If atomic_or_16 and atomic_or_16_nv are ever
482	 * separated, you need to also edit the libc sparcv9 platform
483	 * specific mapfile and remove the NODYNSORT attribute
484	 * from atomic_or_16_nv.
485	 */
486	ENTRY(atomic_or_16)
487	ALTENTRY(atomic_or_16_nv)
488	ALTENTRY(atomic_or_ushort)
489	ALTENTRY(atomic_or_ushort_nv)
490	and	%o0, 0x2, %o4		! %o4 = byte offset, left-to-right
491	xor	%o4, 0x2, %g1		! %g1 = byte offset, right-to-left
492	sll	%o4, 3, %o4		! %o4 = bit offset, left-to-right
493	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
494	sethi	%hi(0xffff0000), %o3	! %o3 = mask
495	srl	%o3, %o4, %o3		! %o3 = shifted to bit offset
496	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
497	and	%o1, %o3, %o1		! %o1 = single short value
498	andn	%o0, 0x2, %o0		! %o0 = word address
499	! if low-order bit is 1, we will properly get an alignment fault here
500	ld	[%o0], %o2		! read old value
5011:
502	or	%o2, %o1, %o5		! or in the new value
503	cas	[%o0], %o2, %o5
504	cmp	%o2, %o5
505	bne,a,pn %icc, 1b
506	  mov	%o5, %o2		! %o2 = old value
507	or	%o2, %o1, %o5		! or in the new value
508	and	%o5, %o3, %o5
509	retl
510	srl	%o5, %g1, %o0		! %o0 = new value
511	SET_SIZE(atomic_or_ushort_nv)
512	SET_SIZE(atomic_or_ushort)
513	SET_SIZE(atomic_or_16_nv)
514	SET_SIZE(atomic_or_16)
515
516	/*
517	 * NOTE: If atomic_or_32 and atomic_or_32_nv are ever
518	 * separated, you need to also edit the libc sparcv9 platform
519	 * specific mapfile and remove the NODYNSORT attribute
520	 * from atomic_or_32_nv.
521	 */
522	ENTRY(atomic_or_32)
523	ALTENTRY(atomic_or_32_nv)
524	ALTENTRY(atomic_or_uint)
525	ALTENTRY(atomic_or_uint_nv)
526	ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
5270:
528	ld	[%o0], %o2
5291:
530	or	%o2, %o1, %o3
531	cas	[%o0], %o2, %o3
532	cmp	%o2, %o3
533	ATOMIC_BACKOFF_BRANCH(%icc, 2f, 1b)
534	  mov	%o3, %o2
535	retl
536	or	%o2, %o1, %o0		! return new value
5372:
538	ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, or32, 0b)
539	SET_SIZE(atomic_or_uint_nv)
540	SET_SIZE(atomic_or_uint)
541	SET_SIZE(atomic_or_32_nv)
542	SET_SIZE(atomic_or_32)
543
544	/*
545	 * NOTE: If atomic_or_64 and atomic_or_64_nv are ever
546	 * separated, you need to also edit the libc sparcv9 platform
547	 * specific mapfile and remove the NODYNSORT attribute
548	 * from atomic_or_64_nv.
549	 */
550	ENTRY(atomic_or_64)
551	ALTENTRY(atomic_or_64_nv)
552	ALTENTRY(atomic_or_ulong)
553	ALTENTRY(atomic_or_ulong_nv)
554	ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
5550:
556	ldx	[%o0], %o2
5571:
558	or	%o2, %o1, %o3
559	casx	[%o0], %o2, %o3
560	cmp	%o2, %o3
561	ATOMIC_BACKOFF_BRANCH(%xcc, 2f, 1b)
562	  mov	%o3, %o2
563	retl
564	or	%o2, %o1, %o0		! return new value
5652:
566	ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, or64, 0b)
567	SET_SIZE(atomic_or_ulong_nv)
568	SET_SIZE(atomic_or_ulong)
569	SET_SIZE(atomic_or_64_nv)
570	SET_SIZE(atomic_or_64)
571
572	/*
573	 * NOTE: If atomic_and_8 and atomic_and_8_nv are ever
574	 * separated, you need to also edit the libc sparcv9 platform
575	 * specific mapfile and remove the NODYNSORT attribute
576	 * from atomic_and_8_nv.
577	 */
578	ENTRY(atomic_and_8)
579	ALTENTRY(atomic_and_8_nv)
580	ALTENTRY(atomic_and_uchar)
581	ALTENTRY(atomic_and_uchar_nv)
582	and	%o0, 0x3, %o4		! %o4 = byte offset, left-to-right
583	xor	%o4, 0x3, %g1		! %g1 = byte offset, right-to-left
584	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
585	set	0xff, %o3		! %o3 = mask
586	sll	%o3, %g1, %o3		! %o3 = shifted to bit offset
587	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
588	orn	%o1, %o3, %o1		! all ones in other bytes
589	andn	%o0, 0x3, %o0		! %o0 = word address
590	ld	[%o0], %o2		! read old value
5911:
592	and	%o2, %o1, %o5		! and in the new value
593	cas	[%o0], %o2, %o5
594	cmp	%o2, %o5
595	bne,a,pn %icc, 1b
596	  mov	%o5, %o2		! %o2 = old value
597	and	%o2, %o1, %o5
598	and	%o5, %o3, %o5
599	retl
600	srl	%o5, %g1, %o0		! %o0 = new value
601	SET_SIZE(atomic_and_uchar_nv)
602	SET_SIZE(atomic_and_uchar)
603	SET_SIZE(atomic_and_8_nv)
604	SET_SIZE(atomic_and_8)
605
606	/*
607	 * NOTE: If atomic_and_16 and atomic_and_16_nv are ever
608	 * separated, you need to also edit the libc sparcv9 platform
609	 * specific mapfile and remove the NODYNSORT attribute
610	 * from atomic_and_16_nv.
611	 */
612	ENTRY(atomic_and_16)
613	ALTENTRY(atomic_and_16_nv)
614	ALTENTRY(atomic_and_ushort)
615	ALTENTRY(atomic_and_ushort_nv)
616	and	%o0, 0x2, %o4		! %o4 = byte offset, left-to-right
617	xor	%o4, 0x2, %g1		! %g1 = byte offset, right-to-left
618	sll	%o4, 3, %o4		! %o4 = bit offset, left-to-right
619	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
620	sethi	%hi(0xffff0000), %o3	! %o3 = mask
621	srl	%o3, %o4, %o3		! %o3 = shifted to bit offset
622	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
623	orn	%o1, %o3, %o1		! all ones in the other half
624	andn	%o0, 0x2, %o0		! %o0 = word address
625	! if low-order bit is 1, we will properly get an alignment fault here
626	ld	[%o0], %o2		! read old value
6271:
628	and	%o2, %o1, %o5		! and in the new value
629	cas	[%o0], %o2, %o5
630	cmp	%o2, %o5
631	bne,a,pn %icc, 1b
632	  mov	%o5, %o2		! %o2 = old value
633	and	%o2, %o1, %o5
634	and	%o5, %o3, %o5
635	retl
636	srl	%o5, %g1, %o0		! %o0 = new value
637	SET_SIZE(atomic_and_ushort_nv)
638	SET_SIZE(atomic_and_ushort)
639	SET_SIZE(atomic_and_16_nv)
640	SET_SIZE(atomic_and_16)
641
642	/*
643	 * NOTE: If atomic_and_32 and atomic_and_32_nv are ever
644	 * separated, you need to also edit the libc sparcv9 platform
645	 * specific mapfile and remove the NODYNSORT attribute
646	 * from atomic_and_32_nv.
647	 */
648	ENTRY(atomic_and_32)
649	ALTENTRY(atomic_and_32_nv)
650	ALTENTRY(atomic_and_uint)
651	ALTENTRY(atomic_and_uint_nv)
652	ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
6530:
654	ld	[%o0], %o2
6551:
656	and	%o2, %o1, %o3
657	cas	[%o0], %o2, %o3
658	cmp	%o2, %o3
659	ATOMIC_BACKOFF_BRANCH(%icc, 2f, 1b)
660	  mov	%o3, %o2
661	retl
662	and	%o2, %o1, %o0		! return new value
6632:
664	ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, and32, 0b)
665	SET_SIZE(atomic_and_uint_nv)
666	SET_SIZE(atomic_and_uint)
667	SET_SIZE(atomic_and_32_nv)
668	SET_SIZE(atomic_and_32)
669
670	/*
671	 * NOTE: If atomic_and_64 and atomic_and_64_nv are ever
672	 * separated, you need to also edit the libc sparcv9 platform
673	 * specific mapfile and remove the NODYNSORT attribute
674	 * from atomic_and_64_nv.
675	 */
676	ENTRY(atomic_and_64)
677	ALTENTRY(atomic_and_64_nv)
678	ALTENTRY(atomic_and_ulong)
679	ALTENTRY(atomic_and_ulong_nv)
680	ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
6810:
682	ldx	[%o0], %o2
6831:
684	and	%o2, %o1, %o3
685	casx	[%o0], %o2, %o3
686	cmp	%o2, %o3
687	ATOMIC_BACKOFF_BRANCH(%xcc, 2f, 1b)
688	  mov	%o3, %o2
689	retl
690	and	%o2, %o1, %o0		! return new value
6912:
692	ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, and64, 0b)
693	SET_SIZE(atomic_and_ulong_nv)
694	SET_SIZE(atomic_and_ulong)
695	SET_SIZE(atomic_and_64_nv)
696	SET_SIZE(atomic_and_64)
697
698	ENTRY(atomic_cas_8)
699	ALTENTRY(atomic_cas_uchar)
700	and	%o0, 0x3, %o4		! %o4 = byte offset, left-to-right
701	xor	%o4, 0x3, %g1		! %g1 = byte offset, right-to-left
702	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
703	set	0xff, %o3		! %o3 = mask
704	sll	%o3, %g1, %o3		! %o3 = shifted to bit offset
705	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
706	and	%o1, %o3, %o1		! %o1 = single byte value
707	sll	%o2, %g1, %o2		! %o2 = shifted to bit offset
708	and	%o2, %o3, %o2		! %o2 = single byte value
709	andn	%o0, 0x3, %o0		! %o0 = word address
710	ld	[%o0], %o4		! read old value
7111:
712	andn	%o4, %o3, %o4		! clear target bits
713	or	%o4, %o2, %o5		! insert the new value
714	or	%o4, %o1, %o4		! insert the comparison value
715	cas	[%o0], %o4, %o5
716	cmp	%o4, %o5		! did we succeed?
717	be,pt	%icc, 2f
718	  and	%o5, %o3, %o4		! isolate the old value
719	cmp	%o1, %o4		! should we have succeeded?
720	be,a,pt	%icc, 1b		! yes, try again
721	  mov	%o5, %o4		! %o4 = old value
7222:
723	retl
724	srl	%o4, %g1, %o0		! %o0 = old value
725	SET_SIZE(atomic_cas_uchar)
726	SET_SIZE(atomic_cas_8)
727
728	ENTRY(atomic_cas_16)
729	ALTENTRY(atomic_cas_ushort)
730	and	%o0, 0x2, %o4		! %o4 = byte offset, left-to-right
731	xor	%o4, 0x2, %g1		! %g1 = byte offset, right-to-left
732	sll	%o4, 3, %o4		! %o4 = bit offset, left-to-right
733	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
734	sethi	%hi(0xffff0000), %o3	! %o3 = mask
735	srl	%o3, %o4, %o3		! %o3 = shifted to bit offset
736	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
737	and	%o1, %o3, %o1		! %o1 = single short value
738	sll	%o2, %g1, %o2		! %o2 = shifted to bit offset
739	and	%o2, %o3, %o2		! %o2 = single short value
740	andn	%o0, 0x2, %o0		! %o0 = word address
741	! if low-order bit is 1, we will properly get an alignment fault here
742	ld	[%o0], %o4		! read old value
7431:
744	andn	%o4, %o3, %o4		! clear target bits
745	or	%o4, %o2, %o5		! insert the new value
746	or	%o4, %o1, %o4		! insert the comparison value
747	cas	[%o0], %o4, %o5
748	cmp	%o4, %o5		! did we succeed?
749	be,pt	%icc, 2f
750	  and	%o5, %o3, %o4		! isolate the old value
751	cmp	%o1, %o4		! should we have succeeded?
752	be,a,pt	%icc, 1b		! yes, try again
753	  mov	%o5, %o4		! %o4 = old value
7542:
755	retl
756	srl	%o4, %g1, %o0		! %o0 = old value
757	SET_SIZE(atomic_cas_ushort)
758	SET_SIZE(atomic_cas_16)
759
760	ENTRY(atomic_cas_32)
761	ALTENTRY(atomic_cas_uint)
762	cas	[%o0], %o1, %o2
763	retl
764	mov	%o2, %o0
765	SET_SIZE(atomic_cas_uint)
766	SET_SIZE(atomic_cas_32)
767
768	ENTRY(atomic_cas_64)
769	ALTENTRY(atomic_cas_ptr)
770	ALTENTRY(atomic_cas_ulong)
771	casx	[%o0], %o1, %o2
772	retl
773	mov	%o2, %o0
774	SET_SIZE(atomic_cas_ulong)
775	SET_SIZE(atomic_cas_ptr)
776	SET_SIZE(atomic_cas_64)
777
778	ENTRY(atomic_swap_8)
779	ALTENTRY(atomic_swap_uchar)
780	and	%o0, 0x3, %o4		! %o4 = byte offset, left-to-right
781	xor	%o4, 0x3, %g1		! %g1 = byte offset, right-to-left
782	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
783	set	0xff, %o3		! %o3 = mask
784	sll	%o3, %g1, %o3		! %o3 = shifted to bit offset
785	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
786	and	%o1, %o3, %o1		! %o1 = single byte value
787	andn	%o0, 0x3, %o0		! %o0 = word address
788	ld	[%o0], %o2		! read old value
7891:
790	andn	%o2, %o3, %o5		! clear target bits
791	or	%o5, %o1, %o5		! insert the new value
792	cas	[%o0], %o2, %o5
793	cmp	%o2, %o5
794	bne,a,pn %icc, 1b
795	  mov	%o5, %o2		! %o2 = old value
796	and	%o5, %o3, %o5
797	retl
798	srl	%o5, %g1, %o0		! %o0 = old value
799	SET_SIZE(atomic_swap_uchar)
800	SET_SIZE(atomic_swap_8)
801
802	ENTRY(atomic_swap_16)
803	ALTENTRY(atomic_swap_ushort)
804	and	%o0, 0x2, %o4		! %o4 = byte offset, left-to-right
805	xor	%o4, 0x2, %g1		! %g1 = byte offset, right-to-left
806	sll	%o4, 3, %o4		! %o4 = bit offset, left-to-right
807	sll	%g1, 3, %g1		! %g1 = bit offset, right-to-left
808	sethi	%hi(0xffff0000), %o3	! %o3 = mask
809	srl	%o3, %o4, %o3		! %o3 = shifted to bit offset
810	sll	%o1, %g1, %o1		! %o1 = shifted to bit offset
811	and	%o1, %o3, %o1		! %o1 = single short value
812	andn	%o0, 0x2, %o0		! %o0 = word address
813	! if low-order bit is 1, we will properly get an alignment fault here
814	ld	[%o0], %o2		! read old value
8151:
816	andn	%o2, %o3, %o5		! clear target bits
817	or	%o5, %o1, %o5		! insert the new value
818	cas	[%o0], %o2, %o5
819	cmp	%o2, %o5
820	bne,a,pn %icc, 1b
821	  mov	%o5, %o2		! %o2 = old value
822	and	%o5, %o3, %o5
823	retl
824	srl	%o5, %g1, %o0		! %o0 = old value
825	SET_SIZE(atomic_swap_ushort)
826	SET_SIZE(atomic_swap_16)
827
828	ENTRY(atomic_swap_32)
829	ALTENTRY(atomic_swap_uint)
830	ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
8310:
832	ld	[%o0], %o2
8331:
834	mov	%o1, %o3
835	cas	[%o0], %o2, %o3
836	cmp	%o2, %o3
837	ATOMIC_BACKOFF_BRANCH(%icc, 2f, 1b)
838	  mov	%o3, %o2
839	retl
840	mov	%o3, %o0
8412:
842	ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, swap32, 0b)
843	SET_SIZE(atomic_swap_uint)
844	SET_SIZE(atomic_swap_32)
845
846	ENTRY(atomic_swap_64)
847	ALTENTRY(atomic_swap_ptr)
848	ALTENTRY(atomic_swap_ulong)
849	ATOMIC_BACKOFF_INIT(%o4, %g4, %g5)
8500:
851	ldx	[%o0], %o2
8521:
853	mov	%o1, %o3
854	casx	[%o0], %o2, %o3
855	cmp	%o2, %o3
856	ATOMIC_BACKOFF_BRANCH(%xcc, 2f, 1b)
857	  mov	%o3, %o2
858	retl
859	mov	%o3, %o0
8602:
861	ATOMIC_BACKOFF_BACKOFF(%o4, %o5, %g4, %g5, swap64, 0b)
862	SET_SIZE(atomic_swap_ulong)
863	SET_SIZE(atomic_swap_ptr)
864	SET_SIZE(atomic_swap_64)
865
866	ENTRY(atomic_set_long_excl)
867	ATOMIC_BACKOFF_INIT(%o5, %g4, %g5)
868	mov	1, %o3
869	slln	%o3, %o1, %o3
8700:
871	ldn	[%o0], %o2
8721:
873	andcc	%o2, %o3, %g0		! test if the bit is set
874	bnz,a,pn %ncc, 2f		! if so, then fail out
875	  mov	-1, %o0
876	or	%o2, %o3, %o4		! set the bit, and try to commit it
877	casn	[%o0], %o2, %o4
878	cmp	%o2, %o4
879	ATOMIC_BACKOFF_BRANCH(%ncc, 5f, 1b)
880	  mov	%o4, %o2
881	mov	%g0, %o0
8822:
883	retl
884	nop
8855:
886	ATOMIC_BACKOFF_BACKOFF(%o5, %g1, %g4, %g5, setlongexcl, 0b)
887	SET_SIZE(atomic_set_long_excl)
888
889	ENTRY(atomic_clear_long_excl)
890	ATOMIC_BACKOFF_INIT(%o5, %g4, %g5)
891	mov	1, %o3
892	slln	%o3, %o1, %o3
8930:
894	ldn	[%o0], %o2
8951:
896	andncc	%o3, %o2, %g0		! test if the bit is clear
897	bnz,a,pn %ncc, 2f		! if so, then fail out
898	  mov	-1, %o0
899	andn	%o2, %o3, %o4		! clear the bit, and try to commit it
900	casn	[%o0], %o2, %o4
901	cmp	%o2, %o4
902	ATOMIC_BACKOFF_BRANCH(%ncc, 5f, 1b)
903	  mov	%o4, %o2
904	mov	%g0, %o0
9052:
906	retl
907	nop
9085:
909	ATOMIC_BACKOFF_BACKOFF(%o5, %g1, %g4, %g5, clrlongexcl, 0b)
910	SET_SIZE(atomic_clear_long_excl)
911
912#if !defined(_KERNEL)
913
914	/*
915	 * Spitfires and Blackbirds have a problem with membars in the
916	 * delay slot (SF_ERRATA_51).  For safety's sake, we assume
917	 * that the whole world needs the workaround.
918	 */
919	ENTRY(membar_enter)
920	membar	#StoreLoad|#StoreStore
921	retl
922	nop
923	SET_SIZE(membar_enter)
924
925	ENTRY(membar_exit)
926	membar	#LoadStore|#StoreStore
927	retl
928	nop
929	SET_SIZE(membar_exit)
930
931	ENTRY(membar_producer)
932	membar	#StoreStore
933	retl
934	nop
935	SET_SIZE(membar_producer)
936
937	ENTRY(membar_consumer)
938	membar	#LoadLoad
939	retl
940	nop
941	SET_SIZE(membar_consumer)
942
943#endif	/* !_KERNEL */
944