xref: /freebsd/share/man/man9/atomic.9 (revision ca987d4641cdcd7f27e153db17c5bf064934faf5)
1.\" Copyright (c) 2000-2001 John H. Baldwin <jhb@FreeBSD.org>
2.\" All rights reserved.
3.\"
4.\" Redistribution and use in source and binary forms, with or without
5.\" modification, are permitted provided that the following conditions
6.\" are met:
7.\" 1. Redistributions of source code must retain the above copyright
8.\"    notice, this list of conditions and the following disclaimer.
9.\" 2. Redistributions in binary form must reproduce the above copyright
10.\"    notice, this list of conditions and the following disclaimer in the
11.\"    documentation and/or other materials provided with the distribution.
12.\"
13.\" THIS SOFTWARE IS PROVIDED BY THE DEVELOPERS ``AS IS'' AND ANY EXPRESS OR
14.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
15.\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
16.\" IN NO EVENT SHALL THE DEVELOPERS BE LIABLE FOR ANY DIRECT, INDIRECT,
17.\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
18.\" NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
19.\" DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
20.\" THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21.\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
22.\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23.\"
24.\" $FreeBSD$
25.\"
26.Dd March 23, 2017
27.Dt ATOMIC 9
28.Os
29.Sh NAME
30.Nm atomic_add ,
31.Nm atomic_clear ,
32.Nm atomic_cmpset ,
33.Nm atomic_fcmpset ,
34.Nm atomic_fetchadd ,
35.Nm atomic_load ,
36.Nm atomic_readandclear ,
37.Nm atomic_set ,
38.Nm atomic_subtract ,
39.Nm atomic_store
40.Nd atomic operations
41.Sh SYNOPSIS
42.In sys/types.h
43.In machine/atomic.h
44.Ft void
45.Fn atomic_add_[acq_|rel_]<type> "volatile <type> *p" "<type> v"
46.Ft void
47.Fn atomic_clear_[acq_|rel_]<type> "volatile <type> *p" "<type> v"
48.Ft int
49.Fo atomic_cmpset_[acq_|rel_]<type>
50.Fa "volatile <type> *dst"
51.Fa "<type> old"
52.Fa "<type> new"
53.Fc
54.Ft int
55.Fo atomic_fcmpset_[acq_|rel_]<type>
56.Fa "volatile <type> *dst"
57.Fa "<type> *old"
58.Fa "<type> new"
59.Fc
60.Ft <type>
61.Fn atomic_fetchadd_<type> "volatile <type> *p" "<type> v"
62.Ft <type>
63.Fn atomic_load_acq_<type> "volatile <type> *p"
64.Ft <type>
65.Fn atomic_readandclear_<type> "volatile <type> *p"
66.Ft void
67.Fn atomic_set_[acq_|rel_]<type> "volatile <type> *p" "<type> v"
68.Ft void
69.Fn atomic_subtract_[acq_|rel_]<type> "volatile <type> *p" "<type> v"
70.Ft void
71.Fn atomic_store_rel_<type> "volatile <type> *p" "<type> v"
72.Ft <type>
73.Fn atomic_swap_<type> "volatile <type> *p" "<type> v"
74.Ft int
75.Fn atomic_testandclear_<type> "volatile <type> *p" "u_int v"
76.Ft int
77.Fn atomic_testandset_<type> "volatile <type> *p" "u_int v"
78.Sh DESCRIPTION
79All of these operations are performed atomically across multiple
80threads and in the presence of interrupts, meaning that they are
81performed in an indivisible manner from the perspective of concurrently
82running threads and interrupt handlers.
83.Pp
84When atomic operations are performed on cache-coherent memory, all
85operations on the same location are totally ordered.
86.Pp
87When an atomic load is performed on a location in cache-coherent memory,
88it reads the entire value that was defined by the last atomic store to
89each byte of the location.
90An atomic load will never return a value out of thin air.
91When an atomic store is performed on a location, no other thread or
92interrupt handler will observe a
93.Em torn write ,
94or partial modification of the location.
95.Pp
96On all architectures supported by
97.Fx ,
98ordinary loads and stores of naturally aligned integer types
99are atomic, as executed by the processor.
100.Pp
101Atomic operations can be used to implement reference counts or as
102building blocks for synchronization primitives such as mutexes.
103.Pp
104The semantics of
105.Fx Ns 's
106atomic operations are almost identical to those of the similarly named
107C11 operations.
108The one important difference is that the C11 standard does not
109require ordinary loads and stores to ever be atomic.
110This is is why the
111.Fn atomic_load_explicit memory_order_relaxed
112operation exists in the C11 standard, but is not provided by
113.In machine/atomic.h .
114.Ss Types
115Each atomic operation operates on a specific
116.Fa type .
117The type to use is indicated in the function name.
118The available types that can be used are:
119.Pp
120.Bl -tag -offset indent -width short -compact
121.It Li int
122unsigned integer
123.It Li long
124unsigned long integer
125.It Li ptr
126unsigned integer the size of a pointer
127.It Li 32
128unsigned 32-bit integer
129.It Li 64
130unsigned 64-bit integer
131.El
132.Pp
133For example, the function to atomically add two integers is called
134.Fn atomic_add_int .
135.Pp
136Certain architectures also provide operations for types smaller than
137.Dq Li int .
138.Pp
139.Bl -tag -offset indent -width short -compact
140.It Li char
141unsigned character
142.It Li short
143unsigned short integer
144.It Li 8
145unsigned 8-bit integer
146.It Li 16
147unsigned 16-bit integer
148.El
149.Pp
150These must not be used in MI code because the instructions to implement them
151efficiently might not be available.
152.Ss Acquire and Release Operations
153By default, a thread's accesses to different memory locations might not be
154performed in
155.Em program order ,
156that is, the order in which the accesses appear in the source code.
157To optimize the program's execution, both the compiler and processor might
158reorder the thread's accesses.
159However, both ensure that their reordering of the accesses is not visible to
160the thread.
161Otherwise, the traditional memory model that is expected by single-threaded
162programs would be violated.
163Nonetheless, other threads in a multithreaded program, such as the
164.Fx
165kernel, might observe the reordering.
166Moreover, in some cases, such as the implementation of synchronization between
167threads, arbitrary reordering might result in the incorrect execution of the
168program.
169To constrain the reordering that both the compiler and processor might perform
170on a thread's accesses, the thread should use atomic operations with
171.Em acquire
172and
173.Em release
174semantics.
175.Pp
176Most of the atomic operations on memory have three variants.
177The first variant performs the operation without imposing any ordering
178constraints on memory accesses to other locations.
179The second variant has acquire semantics, and the third variant has release
180semantics.
181In effect, operations with acquire and release semantics establish one-way
182barriers to reordering.
183.Pp
184When an atomic operation has acquire semantics, the effects of the operation
185must have completed before any subsequent load or store (by program order) is
186performed.
187Conversely, acquire semantics do not require that prior loads or stores have
188completed before the atomic operation is performed.
189To denote acquire semantics, the suffix
190.Dq Li _acq
191is inserted into the function name immediately prior to the
192.Dq Li _ Ns Aq Fa type
193suffix.
194For example, to subtract two integers ensuring that subsequent loads and
195stores happen after the subtraction is performed, use
196.Fn atomic_subtract_acq_int .
197.Pp
198When an atomic operation has release semantics, the effects of all prior
199loads or stores (by program order) must have completed before the operation
200is performed.
201Conversely, release semantics do not require that the effects of the
202atomic operation must have completed before any subsequent load or store is
203performed.
204To denote release semantics, the suffix
205.Dq Li _rel
206is inserted into the function name immediately prior to the
207.Dq Li _ Ns Aq Fa type
208suffix.
209For example, to add two long integers ensuring that all prior loads and
210stores happen before the addition, use
211.Fn atomic_add_rel_long .
212.Pp
213The one-way barriers provided by acquire and release operations allow the
214implementations of common synchronization primitives to express their
215ordering requirements without also imposing unnecessary ordering.
216For example, for a critical section guarded by a mutex, an acquire operation
217when the mutex is locked and a release operation when the mutex is unlocked
218will prevent any loads or stores from moving outside of the critical
219section.
220However, they will not prevent the compiler or processor from moving loads
221or stores into the critical section, which does not violate the semantics of
222a mutex.
223.Ss Multiple Processors
224In multiprocessor systems, the atomicity of the atomic operations on memory
225depends on support for cache coherence in the underlying architecture.
226In general, cache coherence on the default memory type,
227.Dv VM_MEMATTR_DEFAULT ,
228is guaranteed by all architectures that are supported by
229.Fx .
230For example, cache coherence is guaranteed on write-back memory by the
231.Tn amd64
232and
233.Tn i386
234architectures.
235However, on some architectures, cache coherence might not be enabled on all
236memory types.
237To determine if cache coherence is enabled for a non-default memory type,
238consult the architecture's documentation.
239.Ss Semantics
240This section describes the semantics of each operation using a C like notation.
241.Bl -hang
242.It Fn atomic_add p v
243.Bd -literal -compact
244*p += v;
245.Ed
246.It Fn atomic_clear p v
247.Bd -literal -compact
248*p &= ~v;
249.Ed
250.It Fn atomic_cmpset dst old new
251.Bd -literal -compact
252if (*dst == old) {
253	*dst = new;
254	return (1);
255} else
256	return (0);
257.Ed
258.El
259.Pp
260Some architectures do not implement the
261.Fn atomic_cmpset
262functions for the types
263.Dq Li char ,
264.Dq Li short ,
265.Dq Li 8 ,
266and
267.Dq Li 16 .
268.Bl -hang
269.It Fn atomic_fcmpset dst *old new
270.El
271.Pp
272On architectures implementing
273.Em Compare And Swap
274operation in hardware, the functionality can be described as
275.Bd -literal -offset indent -compact
276if (*dst == *old) {
277	*dst = new;
278	return (1);
279} else {
280	*old = *dst;
281	return (0);
282}
283.Ed
284On architectures which provide
285.Em Load Linked/Store Conditional
286primitive, the write to
287.Dv *dst
288might also fail for several reasons, most important of which
289is a parallel write to
290.Dv *dst
291cache line by other CPU.
292In this case
293.Fn atomic_fcmpset
294function also returns
295.Dv false ,
296despite
297.Dl *old == *dst .
298.Pp
299Some architectures do not implement the
300.Fn atomic_fcmpset
301functions for the types
302.Dq Li char ,
303.Dq Li short ,
304.Dq Li 8 ,
305and
306.Dq Li 16 .
307.Bl -hang
308.It Fn atomic_fetchadd p v
309.Bd -literal -compact
310tmp = *p;
311*p += v;
312return (tmp);
313.Ed
314.El
315.Pp
316The
317.Fn atomic_fetchadd
318functions are only implemented for the types
319.Dq Li int ,
320.Dq Li long
321and
322.Dq Li 32
323and do not have any variants with memory barriers at this time.
324.Bl -hang
325.It Fn atomic_load p
326.Bd -literal -compact
327return (*p);
328.Ed
329.El
330.Pp
331The
332.Fn atomic_load
333functions are only provided with acquire memory barriers.
334.Bl -hang
335.It Fn atomic_readandclear p
336.Bd -literal -compact
337tmp = *p;
338*p = 0;
339return (tmp);
340.Ed
341.El
342.Pp
343The
344.Fn atomic_readandclear
345functions are not implemented for the types
346.Dq Li char ,
347.Dq Li short ,
348.Dq Li ptr ,
349.Dq Li 8 ,
350and
351.Dq Li 16
352and do not have any variants with memory barriers at this time.
353.Bl -hang
354.It Fn atomic_set p v
355.Bd -literal -compact
356*p |= v;
357.Ed
358.It Fn atomic_subtract p v
359.Bd -literal -compact
360*p -= v;
361.Ed
362.It Fn atomic_store p v
363.Bd -literal -compact
364*p = v;
365.Ed
366.El
367.Pp
368The
369.Fn atomic_store
370functions are only provided with release memory barriers.
371.Bl -hang
372.It Fn atomic_swap p v
373.Bd -literal -compact
374tmp = *p;
375*p = v;
376return (tmp);
377.Ed
378.El
379.Pp
380The
381.Fn atomic_swap
382functions are not implemented for the types
383.Dq Li char ,
384.Dq Li short ,
385.Dq Li ptr ,
386.Dq Li 8 ,
387and
388.Dq Li 16
389and do not have any variants with memory barriers at this time.
390.Bl -hang
391.It Fn atomic_testandclear p v
392.Bd -literal -compact
393bit = 1 << (v % (sizeof(*p) * NBBY));
394tmp = (*p & bit) != 0;
395*p &= ~bit;
396return (tmp);
397.Ed
398.El
399.Bl -hang
400.It Fn atomic_testandset p v
401.Bd -literal -compact
402bit = 1 << (v % (sizeof(*p) * NBBY));
403tmp = (*p & bit) != 0;
404*p |= bit;
405return (tmp);
406.Ed
407.El
408.Pp
409The
410.Fn atomic_testandset
411and
412.Fn atomic_testandclear
413functions are only implemented for the types
414.Dq Li int ,
415.Dq Li long
416and
417.Dq Li 32
418and do not have any variants with memory barriers at this time.
419.Pp
420The type
421.Dq Li 64
422is currently not implemented for any of the atomic operations on the
423.Tn arm ,
424.Tn i386 ,
425and
426.Tn powerpc
427architectures.
428.Sh RETURN VALUES
429The
430.Fn atomic_cmpset
431function returns the result of the compare operation.
432The
433.Fn atomic_fcmpset
434function returns
435.Dv true
436if the operation succeeded.
437Otherwise it returns
438.Dv false
439and sets
440.Va *old
441to the found value.
442The
443.Fn atomic_fetchadd ,
444.Fn atomic_load ,
445.Fn atomic_readandclear ,
446and
447.Fn atomic_swap
448functions return the value at the specified address.
449The
450.Fn atomic_testandset
451and
452.Fn atomic_testandclear
453function returns the result of the test operation.
454.Sh EXAMPLES
455This example uses the
456.Fn atomic_cmpset_acq_ptr
457and
458.Fn atomic_set_ptr
459functions to obtain a sleep mutex and handle recursion.
460Since the
461.Va mtx_lock
462member of a
463.Vt "struct mtx"
464is a pointer, the
465.Dq Li ptr
466type is used.
467.Bd -literal
468/* Try to obtain mtx_lock once. */
469#define _obtain_lock(mp, tid)						\\
470	atomic_cmpset_acq_ptr(&(mp)->mtx_lock, MTX_UNOWNED, (tid))
471
472/* Get a sleep lock, deal with recursion inline. */
473#define _get_sleep_lock(mp, tid, opts, file, line) do {			\\
474	uintptr_t _tid = (uintptr_t)(tid);				\\
475									\\
476	if (!_obtain_lock(mp, tid)) {					\\
477		if (((mp)->mtx_lock & MTX_FLAGMASK) != _tid)		\\
478			_mtx_lock_sleep((mp), _tid, (opts), (file), (line));\\
479		else {							\\
480			atomic_set_ptr(&(mp)->mtx_lock, MTX_RECURSE);	\\
481			(mp)->mtx_recurse++;				\\
482		}							\\
483	}								\\
484} while (0)
485.Ed
486.Sh HISTORY
487The
488.Fn atomic_add ,
489.Fn atomic_clear ,
490.Fn atomic_set ,
491and
492.Fn atomic_subtract
493operations were first introduced in
494.Fx 3.0 .
495This first set only supported the types
496.Dq Li char ,
497.Dq Li short ,
498.Dq Li int ,
499and
500.Dq Li long .
501The
502.Fn atomic_cmpset ,
503.Fn atomic_load ,
504.Fn atomic_readandclear ,
505and
506.Fn atomic_store
507operations were added in
508.Fx 5.0 .
509The types
510.Dq Li 8 ,
511.Dq Li 16 ,
512.Dq Li 32 ,
513.Dq Li 64 ,
514and
515.Dq Li ptr
516and all of the acquire and release variants
517were added in
518.Fx 5.0
519as well.
520The
521.Fn atomic_fetchadd
522operations were added in
523.Fx 6.0 .
524The
525.Fn atomic_swap
526and
527.Fn atomic_testandset
528operations were added in
529.Fx 10.0 .
530.Fn atomic_testandclear
531operation was added in
532.Fx 11.0 .
533