1 /*- 2 * Copyright (c) 1998 Doug Rabson 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 #ifndef _MACHINE_ATOMIC_H_ 29 #define _MACHINE_ATOMIC_H_ 30 31 #ifndef _SYS_CDEFS_H_ 32 #error this file needs sys/cdefs.h as a prerequisite 33 #endif 34 35 #define mb() __asm __volatile("mfence;" : : : "memory") 36 #define wmb() __asm __volatile("sfence;" : : : "memory") 37 #define rmb() __asm __volatile("lfence;" : : : "memory") 38 39 /* 40 * Various simple operations on memory, each of which is atomic in the 41 * presence of interrupts and multiple processors. 42 * 43 * atomic_set_char(P, V) (*(u_char *)(P) |= (V)) 44 * atomic_clear_char(P, V) (*(u_char *)(P) &= ~(V)) 45 * atomic_add_char(P, V) (*(u_char *)(P) += (V)) 46 * atomic_subtract_char(P, V) (*(u_char *)(P) -= (V)) 47 * 48 * atomic_set_short(P, V) (*(u_short *)(P) |= (V)) 49 * atomic_clear_short(P, V) (*(u_short *)(P) &= ~(V)) 50 * atomic_add_short(P, V) (*(u_short *)(P) += (V)) 51 * atomic_subtract_short(P, V) (*(u_short *)(P) -= (V)) 52 * 53 * atomic_set_int(P, V) (*(u_int *)(P) |= (V)) 54 * atomic_clear_int(P, V) (*(u_int *)(P) &= ~(V)) 55 * atomic_add_int(P, V) (*(u_int *)(P) += (V)) 56 * atomic_subtract_int(P, V) (*(u_int *)(P) -= (V)) 57 * atomic_readandclear_int(P) (return (*(u_int *)(P)); *(u_int *)(P) = 0;) 58 * 59 * atomic_set_long(P, V) (*(u_long *)(P) |= (V)) 60 * atomic_clear_long(P, V) (*(u_long *)(P) &= ~(V)) 61 * atomic_add_long(P, V) (*(u_long *)(P) += (V)) 62 * atomic_subtract_long(P, V) (*(u_long *)(P) -= (V)) 63 * atomic_readandclear_long(P) (return (*(u_long *)(P)); *(u_long *)(P) = 0;) 64 */ 65 66 /* 67 * The above functions are expanded inline in the statically-linked 68 * kernel. Lock prefixes are generated if an SMP kernel is being 69 * built. 70 * 71 * Kernel modules call real functions which are built into the kernel. 72 * This allows kernel modules to be portable between UP and SMP systems. 73 */ 74 #if defined(KLD_MODULE) || !defined(__GNUCLIKE_ASM) 75 #define ATOMIC_ASM(NAME, TYPE, OP, CONS, V) \ 76 void atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v); \ 77 void atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v) 78 79 int atomic_cmpset_int(volatile u_int *dst, u_int expect, u_int src); 80 int atomic_cmpset_long(volatile u_long *dst, u_long expect, u_long src); 81 u_int atomic_fetchadd_int(volatile u_int *p, u_int v); 82 u_long atomic_fetchadd_long(volatile u_long *p, u_long v); 83 84 #define ATOMIC_STORE_LOAD(TYPE, LOP, SOP) \ 85 u_##TYPE atomic_load_acq_##TYPE(volatile u_##TYPE *p); \ 86 void atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v) 87 88 #else /* !KLD_MODULE && __GNUCLIKE_ASM */ 89 90 /* 91 * For userland, always use lock prefixes so that the binaries will run 92 * on both SMP and !SMP systems. 93 */ 94 #if defined(SMP) || !defined(_KERNEL) 95 #define MPLOCKED "lock ; " 96 #else 97 #define MPLOCKED 98 #endif 99 100 /* 101 * The assembly is volatilized to avoid code chunk removal by the compiler. 102 * GCC aggressively reorders operations and memory clobbering is necessary 103 * in order to avoid that for memory barriers. 104 */ 105 #define ATOMIC_ASM(NAME, TYPE, OP, CONS, V) \ 106 static __inline void \ 107 atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ 108 { \ 109 __asm __volatile(MPLOCKED OP \ 110 : "=m" (*p) \ 111 : CONS (V), "m" (*p)); \ 112 } \ 113 \ 114 static __inline void \ 115 atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ 116 { \ 117 __asm __volatile(MPLOCKED OP \ 118 : "=m" (*p) \ 119 : CONS (V), "m" (*p) \ 120 : "memory"); \ 121 } \ 122 struct __hack 123 124 /* 125 * Atomic compare and set, used by the mutex functions 126 * 127 * if (*dst == expect) *dst = src (all 32 bit words) 128 * 129 * Returns 0 on failure, non-zero on success 130 */ 131 132 static __inline int 133 atomic_cmpset_int(volatile u_int *dst, u_int expect, u_int src) 134 { 135 u_char res; 136 137 __asm __volatile( 138 " " MPLOCKED " " 139 " cmpxchgl %2,%1 ; " 140 " sete %0 ; " 141 "1: " 142 "# atomic_cmpset_int" 143 : "=a" (res), /* 0 */ 144 "=m" (*dst) /* 1 */ 145 : "r" (src), /* 2 */ 146 "a" (expect), /* 3 */ 147 "m" (*dst) /* 4 */ 148 : "memory"); 149 150 return (res); 151 } 152 153 static __inline int 154 atomic_cmpset_long(volatile u_long *dst, u_long expect, u_long src) 155 { 156 u_char res; 157 158 __asm __volatile( 159 " " MPLOCKED " " 160 " cmpxchgq %2,%1 ; " 161 " sete %0 ; " 162 "1: " 163 "# atomic_cmpset_long" 164 : "=a" (res), /* 0 */ 165 "=m" (*dst) /* 1 */ 166 : "r" (src), /* 2 */ 167 "a" (expect), /* 3 */ 168 "m" (*dst) /* 4 */ 169 : "memory"); 170 171 return (res); 172 } 173 174 /* 175 * Atomically add the value of v to the integer pointed to by p and return 176 * the previous value of *p. 177 */ 178 static __inline u_int 179 atomic_fetchadd_int(volatile u_int *p, u_int v) 180 { 181 182 __asm __volatile( 183 " " MPLOCKED " " 184 " xaddl %0, %1 ; " 185 "# atomic_fetchadd_int" 186 : "+r" (v), /* 0 (result) */ 187 "=m" (*p) /* 1 */ 188 : "m" (*p)); /* 2 */ 189 190 return (v); 191 } 192 193 /* 194 * Atomically add the value of v to the long integer pointed to by p and return 195 * the previous value of *p. 196 */ 197 static __inline u_long 198 atomic_fetchadd_long(volatile u_long *p, u_long v) 199 { 200 201 __asm __volatile( 202 " " MPLOCKED " " 203 " xaddq %0, %1 ; " 204 "# atomic_fetchadd_long" 205 : "+r" (v), /* 0 (result) */ 206 "=m" (*p) /* 1 */ 207 : "m" (*p)); /* 2 */ 208 209 return (v); 210 } 211 212 #if defined(_KERNEL) && !defined(SMP) 213 214 /* 215 * We assume that a = b will do atomic loads and stores. However, on a 216 * PentiumPro or higher, reads may pass writes, so for that case we have 217 * to use a serializing instruction (i.e. with LOCK) to do the load in 218 * SMP kernels. For UP kernels, however, the cache of the single processor 219 * is always consistent, so we only need to take care of compiler. 220 */ 221 #define ATOMIC_STORE_LOAD(TYPE, LOP, SOP) \ 222 static __inline u_##TYPE \ 223 atomic_load_acq_##TYPE(volatile u_##TYPE *p) \ 224 { \ 225 u_##TYPE tmp; \ 226 \ 227 tmp = *p; \ 228 __asm __volatile ("" : : : "memory"); \ 229 return (tmp); \ 230 } \ 231 \ 232 static __inline void \ 233 atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ 234 { \ 235 __asm __volatile ("" : : : "memory"); \ 236 *p = v; \ 237 } \ 238 struct __hack 239 240 #else /* !(_KERNEL && !SMP) */ 241 242 #define ATOMIC_STORE_LOAD(TYPE, LOP, SOP) \ 243 static __inline u_##TYPE \ 244 atomic_load_acq_##TYPE(volatile u_##TYPE *p) \ 245 { \ 246 u_##TYPE res; \ 247 \ 248 __asm __volatile(MPLOCKED LOP \ 249 : "=a" (res), /* 0 */ \ 250 "=m" (*p) /* 1 */ \ 251 : "m" (*p) /* 2 */ \ 252 : "memory"); \ 253 \ 254 return (res); \ 255 } \ 256 \ 257 /* \ 258 * The XCHG instruction asserts LOCK automagically. \ 259 */ \ 260 static __inline void \ 261 atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ 262 { \ 263 __asm __volatile(SOP \ 264 : "=m" (*p), /* 0 */ \ 265 "+r" (v) /* 1 */ \ 266 : "m" (*p) /* 2 */ \ 267 : "memory"); \ 268 } \ 269 struct __hack 270 271 #endif /* _KERNEL && !SMP */ 272 273 #endif /* KLD_MODULE || !__GNUCLIKE_ASM */ 274 275 ATOMIC_ASM(set, char, "orb %b1,%0", "iq", v); 276 ATOMIC_ASM(clear, char, "andb %b1,%0", "iq", ~v); 277 ATOMIC_ASM(add, char, "addb %b1,%0", "iq", v); 278 ATOMIC_ASM(subtract, char, "subb %b1,%0", "iq", v); 279 280 ATOMIC_ASM(set, short, "orw %w1,%0", "ir", v); 281 ATOMIC_ASM(clear, short, "andw %w1,%0", "ir", ~v); 282 ATOMIC_ASM(add, short, "addw %w1,%0", "ir", v); 283 ATOMIC_ASM(subtract, short, "subw %w1,%0", "ir", v); 284 285 ATOMIC_ASM(set, int, "orl %1,%0", "ir", v); 286 ATOMIC_ASM(clear, int, "andl %1,%0", "ir", ~v); 287 ATOMIC_ASM(add, int, "addl %1,%0", "ir", v); 288 ATOMIC_ASM(subtract, int, "subl %1,%0", "ir", v); 289 290 ATOMIC_ASM(set, long, "orq %1,%0", "ir", v); 291 ATOMIC_ASM(clear, long, "andq %1,%0", "ir", ~v); 292 ATOMIC_ASM(add, long, "addq %1,%0", "ir", v); 293 ATOMIC_ASM(subtract, long, "subq %1,%0", "ir", v); 294 295 ATOMIC_STORE_LOAD(char, "cmpxchgb %b0,%1", "xchgb %b1,%0"); 296 ATOMIC_STORE_LOAD(short,"cmpxchgw %w0,%1", "xchgw %w1,%0"); 297 ATOMIC_STORE_LOAD(int, "cmpxchgl %0,%1", "xchgl %1,%0"); 298 ATOMIC_STORE_LOAD(long, "cmpxchgq %0,%1", "xchgq %1,%0"); 299 300 #undef ATOMIC_ASM 301 #undef ATOMIC_STORE_LOAD 302 303 #ifndef WANT_FUNCTIONS 304 305 /* Read the current value and store a zero in the destination. */ 306 #ifdef __GNUCLIKE_ASM 307 308 static __inline u_int 309 atomic_readandclear_int(volatile u_int *addr) 310 { 311 u_int res; 312 313 res = 0; 314 __asm __volatile( 315 " xchgl %1,%0 ; " 316 "# atomic_readandclear_int" 317 : "+r" (res), /* 0 */ 318 "=m" (*addr) /* 1 */ 319 : "m" (*addr)); 320 321 return (res); 322 } 323 324 static __inline u_long 325 atomic_readandclear_long(volatile u_long *addr) 326 { 327 u_long res; 328 329 res = 0; 330 __asm __volatile( 331 " xchgq %1,%0 ; " 332 "# atomic_readandclear_long" 333 : "+r" (res), /* 0 */ 334 "=m" (*addr) /* 1 */ 335 : "m" (*addr)); 336 337 return (res); 338 } 339 340 #else /* !__GNUCLIKE_ASM */ 341 342 u_int atomic_readandclear_int(volatile u_int *addr); 343 u_long atomic_readandclear_long(volatile u_long *addr); 344 345 #endif /* __GNUCLIKE_ASM */ 346 347 #define atomic_set_acq_char atomic_set_barr_char 348 #define atomic_set_rel_char atomic_set_barr_char 349 #define atomic_clear_acq_char atomic_clear_barr_char 350 #define atomic_clear_rel_char atomic_clear_barr_char 351 #define atomic_add_acq_char atomic_add_barr_char 352 #define atomic_add_rel_char atomic_add_barr_char 353 #define atomic_subtract_acq_char atomic_subtract_barr_char 354 #define atomic_subtract_rel_char atomic_subtract_barr_char 355 356 #define atomic_set_acq_short atomic_set_barr_short 357 #define atomic_set_rel_short atomic_set_barr_short 358 #define atomic_clear_acq_short atomic_clear_barr_short 359 #define atomic_clear_rel_short atomic_clear_barr_short 360 #define atomic_add_acq_short atomic_add_barr_short 361 #define atomic_add_rel_short atomic_add_barr_short 362 #define atomic_subtract_acq_short atomic_subtract_barr_short 363 #define atomic_subtract_rel_short atomic_subtract_barr_short 364 365 #define atomic_set_acq_int atomic_set_barr_int 366 #define atomic_set_rel_int atomic_set_barr_int 367 #define atomic_clear_acq_int atomic_clear_barr_int 368 #define atomic_clear_rel_int atomic_clear_barr_int 369 #define atomic_add_acq_int atomic_add_barr_int 370 #define atomic_add_rel_int atomic_add_barr_int 371 #define atomic_subtract_acq_int atomic_subtract_barr_int 372 #define atomic_subtract_rel_int atomic_subtract_barr_int 373 #define atomic_cmpset_acq_int atomic_cmpset_int 374 #define atomic_cmpset_rel_int atomic_cmpset_int 375 376 #define atomic_set_acq_long atomic_set_barr_long 377 #define atomic_set_rel_long atomic_set_barr_long 378 #define atomic_clear_acq_long atomic_clear_barr_long 379 #define atomic_clear_rel_long atomic_clear_barr_long 380 #define atomic_add_acq_long atomic_add_barr_long 381 #define atomic_add_rel_long atomic_add_barr_long 382 #define atomic_subtract_acq_long atomic_subtract_barr_long 383 #define atomic_subtract_rel_long atomic_subtract_barr_long 384 #define atomic_cmpset_acq_long atomic_cmpset_long 385 #define atomic_cmpset_rel_long atomic_cmpset_long 386 387 /* Operations on 8-bit bytes. */ 388 #define atomic_set_8 atomic_set_char 389 #define atomic_set_acq_8 atomic_set_acq_char 390 #define atomic_set_rel_8 atomic_set_rel_char 391 #define atomic_clear_8 atomic_clear_char 392 #define atomic_clear_acq_8 atomic_clear_acq_char 393 #define atomic_clear_rel_8 atomic_clear_rel_char 394 #define atomic_add_8 atomic_add_char 395 #define atomic_add_acq_8 atomic_add_acq_char 396 #define atomic_add_rel_8 atomic_add_rel_char 397 #define atomic_subtract_8 atomic_subtract_char 398 #define atomic_subtract_acq_8 atomic_subtract_acq_char 399 #define atomic_subtract_rel_8 atomic_subtract_rel_char 400 #define atomic_load_acq_8 atomic_load_acq_char 401 #define atomic_store_rel_8 atomic_store_rel_char 402 403 /* Operations on 16-bit words. */ 404 #define atomic_set_16 atomic_set_short 405 #define atomic_set_acq_16 atomic_set_acq_short 406 #define atomic_set_rel_16 atomic_set_rel_short 407 #define atomic_clear_16 atomic_clear_short 408 #define atomic_clear_acq_16 atomic_clear_acq_short 409 #define atomic_clear_rel_16 atomic_clear_rel_short 410 #define atomic_add_16 atomic_add_short 411 #define atomic_add_acq_16 atomic_add_acq_short 412 #define atomic_add_rel_16 atomic_add_rel_short 413 #define atomic_subtract_16 atomic_subtract_short 414 #define atomic_subtract_acq_16 atomic_subtract_acq_short 415 #define atomic_subtract_rel_16 atomic_subtract_rel_short 416 #define atomic_load_acq_16 atomic_load_acq_short 417 #define atomic_store_rel_16 atomic_store_rel_short 418 419 /* Operations on 32-bit double words. */ 420 #define atomic_set_32 atomic_set_int 421 #define atomic_set_acq_32 atomic_set_acq_int 422 #define atomic_set_rel_32 atomic_set_rel_int 423 #define atomic_clear_32 atomic_clear_int 424 #define atomic_clear_acq_32 atomic_clear_acq_int 425 #define atomic_clear_rel_32 atomic_clear_rel_int 426 #define atomic_add_32 atomic_add_int 427 #define atomic_add_acq_32 atomic_add_acq_int 428 #define atomic_add_rel_32 atomic_add_rel_int 429 #define atomic_subtract_32 atomic_subtract_int 430 #define atomic_subtract_acq_32 atomic_subtract_acq_int 431 #define atomic_subtract_rel_32 atomic_subtract_rel_int 432 #define atomic_load_acq_32 atomic_load_acq_int 433 #define atomic_store_rel_32 atomic_store_rel_int 434 #define atomic_cmpset_32 atomic_cmpset_int 435 #define atomic_cmpset_acq_32 atomic_cmpset_acq_int 436 #define atomic_cmpset_rel_32 atomic_cmpset_rel_int 437 #define atomic_readandclear_32 atomic_readandclear_int 438 #define atomic_fetchadd_32 atomic_fetchadd_int 439 440 /* Operations on 64-bit quad words. */ 441 #define atomic_set_64 atomic_set_long 442 #define atomic_set_acq_64 atomic_set_acq_long 443 #define atomic_set_rel_64 atomic_set_rel_long 444 #define atomic_clear_64 atomic_clear_long 445 #define atomic_clear_acq_64 atomic_clear_acq_long 446 #define atomic_clear_rel_64 atomic_clear_rel_long 447 #define atomic_add_64 atomic_add_long 448 #define atomic_add_acq_64 atomic_add_acq_long 449 #define atomic_add_rel_64 atomic_add_rel_long 450 #define atomic_subtract_64 atomic_subtract_long 451 #define atomic_subtract_acq_64 atomic_subtract_acq_long 452 #define atomic_subtract_rel_64 atomic_subtract_rel_long 453 #define atomic_load_acq_64 atomic_load_acq_long 454 #define atomic_store_rel_64 atomic_store_rel_long 455 #define atomic_cmpset_64 atomic_cmpset_long 456 #define atomic_cmpset_acq_64 atomic_cmpset_acq_long 457 #define atomic_cmpset_rel_64 atomic_cmpset_rel_long 458 #define atomic_readandclear_64 atomic_readandclear_long 459 460 /* Operations on pointers. */ 461 #define atomic_set_ptr atomic_set_long 462 #define atomic_set_acq_ptr atomic_set_acq_long 463 #define atomic_set_rel_ptr atomic_set_rel_long 464 #define atomic_clear_ptr atomic_clear_long 465 #define atomic_clear_acq_ptr atomic_clear_acq_long 466 #define atomic_clear_rel_ptr atomic_clear_rel_long 467 #define atomic_add_ptr atomic_add_long 468 #define atomic_add_acq_ptr atomic_add_acq_long 469 #define atomic_add_rel_ptr atomic_add_rel_long 470 #define atomic_subtract_ptr atomic_subtract_long 471 #define atomic_subtract_acq_ptr atomic_subtract_acq_long 472 #define atomic_subtract_rel_ptr atomic_subtract_rel_long 473 #define atomic_load_acq_ptr atomic_load_acq_long 474 #define atomic_store_rel_ptr atomic_store_rel_long 475 #define atomic_cmpset_ptr atomic_cmpset_long 476 #define atomic_cmpset_acq_ptr atomic_cmpset_acq_long 477 #define atomic_cmpset_rel_ptr atomic_cmpset_rel_long 478 #define atomic_readandclear_ptr atomic_readandclear_long 479 480 #endif /* !WANT_FUNCTIONS */ 481 482 #endif /* !_MACHINE_ATOMIC_H_ */ 483