1 /*- 2 * Copyright (c) 1998 Doug Rabson 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 #ifndef _MACHINE_ATOMIC_H_ 29 #define _MACHINE_ATOMIC_H_ 30 31 #ifndef _SYS_CDEFS_H_ 32 #error this file needs sys/cdefs.h as a prerequisite 33 #endif 34 35 #define mb() __asm __volatile("mfence;" : : : "memory") 36 #define wmb() __asm __volatile("sfence;" : : : "memory") 37 #define rmb() __asm __volatile("lfence;" : : : "memory") 38 39 /* 40 * Various simple operations on memory, each of which is atomic in the 41 * presence of interrupts and multiple processors. 42 * 43 * atomic_set_char(P, V) (*(u_char *)(P) |= (V)) 44 * atomic_clear_char(P, V) (*(u_char *)(P) &= ~(V)) 45 * atomic_add_char(P, V) (*(u_char *)(P) += (V)) 46 * atomic_subtract_char(P, V) (*(u_char *)(P) -= (V)) 47 * 48 * atomic_set_short(P, V) (*(u_short *)(P) |= (V)) 49 * atomic_clear_short(P, V) (*(u_short *)(P) &= ~(V)) 50 * atomic_add_short(P, V) (*(u_short *)(P) += (V)) 51 * atomic_subtract_short(P, V) (*(u_short *)(P) -= (V)) 52 * 53 * atomic_set_int(P, V) (*(u_int *)(P) |= (V)) 54 * atomic_clear_int(P, V) (*(u_int *)(P) &= ~(V)) 55 * atomic_add_int(P, V) (*(u_int *)(P) += (V)) 56 * atomic_subtract_int(P, V) (*(u_int *)(P) -= (V)) 57 * atomic_swap_int(P, V) (return (*(u_int *)(P)); *(u_int *)(P) = (V);) 58 * atomic_readandclear_int(P) (return (*(u_int *)(P)); *(u_int *)(P) = 0;) 59 * 60 * atomic_set_long(P, V) (*(u_long *)(P) |= (V)) 61 * atomic_clear_long(P, V) (*(u_long *)(P) &= ~(V)) 62 * atomic_add_long(P, V) (*(u_long *)(P) += (V)) 63 * atomic_subtract_long(P, V) (*(u_long *)(P) -= (V)) 64 * atomic_swap_long(P, V) (return (*(u_long *)(P)); *(u_long *)(P) = (V);) 65 * atomic_readandclear_long(P) (return (*(u_long *)(P)); *(u_long *)(P) = 0;) 66 */ 67 68 /* 69 * The above functions are expanded inline in the statically-linked 70 * kernel. Lock prefixes are generated if an SMP kernel is being 71 * built. 72 * 73 * Kernel modules call real functions which are built into the kernel. 74 * This allows kernel modules to be portable between UP and SMP systems. 75 */ 76 #if defined(KLD_MODULE) || !defined(__GNUCLIKE_ASM) 77 #define ATOMIC_ASM(NAME, TYPE, OP, CONS, V) \ 78 void atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v); \ 79 void atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v) 80 81 int atomic_cmpset_int(volatile u_int *dst, u_int expect, u_int src); 82 int atomic_cmpset_long(volatile u_long *dst, u_long expect, u_long src); 83 u_int atomic_fetchadd_int(volatile u_int *p, u_int v); 84 u_long atomic_fetchadd_long(volatile u_long *p, u_long v); 85 int atomic_testandset_int(volatile u_int *p, u_int v); 86 int atomic_testandset_long(volatile u_long *p, u_int v); 87 88 #define ATOMIC_LOAD(TYPE, LOP) \ 89 u_##TYPE atomic_load_acq_##TYPE(volatile u_##TYPE *p) 90 #define ATOMIC_STORE(TYPE) \ 91 void atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v) 92 93 #else /* !KLD_MODULE && __GNUCLIKE_ASM */ 94 95 /* 96 * For userland, always use lock prefixes so that the binaries will run 97 * on both SMP and !SMP systems. 98 */ 99 #if defined(SMP) || !defined(_KERNEL) 100 #define MPLOCKED "lock ; " 101 #else 102 #define MPLOCKED 103 #endif 104 105 /* 106 * The assembly is volatilized to avoid code chunk removal by the compiler. 107 * GCC aggressively reorders operations and memory clobbering is necessary 108 * in order to avoid that for memory barriers. 109 */ 110 #define ATOMIC_ASM(NAME, TYPE, OP, CONS, V) \ 111 static __inline void \ 112 atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ 113 { \ 114 __asm __volatile(MPLOCKED OP \ 115 : "+m" (*p) \ 116 : CONS (V) \ 117 : "cc"); \ 118 } \ 119 \ 120 static __inline void \ 121 atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ 122 { \ 123 __asm __volatile(MPLOCKED OP \ 124 : "+m" (*p) \ 125 : CONS (V) \ 126 : "memory", "cc"); \ 127 } \ 128 struct __hack 129 130 /* 131 * Atomic compare and set, used by the mutex functions 132 * 133 * if (*dst == expect) *dst = src (all 32 bit words) 134 * 135 * Returns 0 on failure, non-zero on success 136 */ 137 138 static __inline int 139 atomic_cmpset_int(volatile u_int *dst, u_int expect, u_int src) 140 { 141 u_char res; 142 143 __asm __volatile( 144 " " MPLOCKED " " 145 " cmpxchgl %3,%1 ; " 146 " sete %0 ; " 147 "# atomic_cmpset_int" 148 : "=q" (res), /* 0 */ 149 "+m" (*dst), /* 1 */ 150 "+a" (expect) /* 2 */ 151 : "r" (src) /* 3 */ 152 : "memory", "cc"); 153 return (res); 154 } 155 156 static __inline int 157 atomic_cmpset_long(volatile u_long *dst, u_long expect, u_long src) 158 { 159 u_char res; 160 161 __asm __volatile( 162 " " MPLOCKED " " 163 " cmpxchgq %3,%1 ; " 164 " sete %0 ; " 165 "# atomic_cmpset_long" 166 : "=q" (res), /* 0 */ 167 "+m" (*dst), /* 1 */ 168 "+a" (expect) /* 2 */ 169 : "r" (src) /* 3 */ 170 : "memory", "cc"); 171 return (res); 172 } 173 174 /* 175 * Atomically add the value of v to the integer pointed to by p and return 176 * the previous value of *p. 177 */ 178 static __inline u_int 179 atomic_fetchadd_int(volatile u_int *p, u_int v) 180 { 181 182 __asm __volatile( 183 " " MPLOCKED " " 184 " xaddl %0,%1 ; " 185 "# atomic_fetchadd_int" 186 : "+r" (v), /* 0 */ 187 "+m" (*p) /* 1 */ 188 : : "cc"); 189 return (v); 190 } 191 192 /* 193 * Atomically add the value of v to the long integer pointed to by p and return 194 * the previous value of *p. 195 */ 196 static __inline u_long 197 atomic_fetchadd_long(volatile u_long *p, u_long v) 198 { 199 200 __asm __volatile( 201 " " MPLOCKED " " 202 " xaddq %0,%1 ; " 203 "# atomic_fetchadd_long" 204 : "+r" (v), /* 0 */ 205 "+m" (*p) /* 1 */ 206 : : "cc"); 207 return (v); 208 } 209 210 static __inline int 211 atomic_testandset_int(volatile u_int *p, u_int v) 212 { 213 u_char res; 214 215 __asm __volatile( 216 " " MPLOCKED " " 217 " btsl %2,%1 ; " 218 " setc %0 ; " 219 "# atomic_testandset_int" 220 : "=q" (res), /* 0 */ 221 "+m" (*p) /* 1 */ 222 : "Ir" (v & 0x1f) /* 2 */ 223 : "cc"); 224 return (res); 225 } 226 227 static __inline int 228 atomic_testandset_long(volatile u_long *p, u_int v) 229 { 230 u_char res; 231 232 __asm __volatile( 233 " " MPLOCKED " " 234 " btsq %2,%1 ; " 235 " setc %0 ; " 236 "# atomic_testandset_long" 237 : "=q" (res), /* 0 */ 238 "+m" (*p) /* 1 */ 239 : "Jr" ((u_long)(v & 0x3f)) /* 2 */ 240 : "cc"); 241 return (res); 242 } 243 244 /* 245 * We assume that a = b will do atomic loads and stores. Due to the 246 * IA32 memory model, a simple store guarantees release semantics. 247 * 248 * However, loads may pass stores, so for atomic_load_acq we have to 249 * ensure a Store/Load barrier to do the load in SMP kernels. We use 250 * "lock cmpxchg" as recommended by the AMD Software Optimization 251 * Guide, and not mfence. For UP kernels, however, the cache of the 252 * single processor is always consistent, so we only need to take care 253 * of the compiler. 254 */ 255 #define ATOMIC_STORE(TYPE) \ 256 static __inline void \ 257 atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ 258 { \ 259 __compiler_membar(); \ 260 *p = v; \ 261 } \ 262 struct __hack 263 264 #if defined(_KERNEL) && !defined(SMP) 265 266 #define ATOMIC_LOAD(TYPE, LOP) \ 267 static __inline u_##TYPE \ 268 atomic_load_acq_##TYPE(volatile u_##TYPE *p) \ 269 { \ 270 u_##TYPE tmp; \ 271 \ 272 tmp = *p; \ 273 __compiler_membar(); \ 274 return (tmp); \ 275 } \ 276 struct __hack 277 278 #else /* !(_KERNEL && !SMP) */ 279 280 #define ATOMIC_LOAD(TYPE, LOP) \ 281 static __inline u_##TYPE \ 282 atomic_load_acq_##TYPE(volatile u_##TYPE *p) \ 283 { \ 284 u_##TYPE res; \ 285 \ 286 __asm __volatile(MPLOCKED LOP \ 287 : "=a" (res), /* 0 */ \ 288 "+m" (*p) /* 1 */ \ 289 : : "memory", "cc"); \ 290 return (res); \ 291 } \ 292 struct __hack 293 294 #endif /* _KERNEL && !SMP */ 295 296 #endif /* KLD_MODULE || !__GNUCLIKE_ASM */ 297 298 ATOMIC_ASM(set, char, "orb %b1,%0", "iq", v); 299 ATOMIC_ASM(clear, char, "andb %b1,%0", "iq", ~v); 300 ATOMIC_ASM(add, char, "addb %b1,%0", "iq", v); 301 ATOMIC_ASM(subtract, char, "subb %b1,%0", "iq", v); 302 303 ATOMIC_ASM(set, short, "orw %w1,%0", "ir", v); 304 ATOMIC_ASM(clear, short, "andw %w1,%0", "ir", ~v); 305 ATOMIC_ASM(add, short, "addw %w1,%0", "ir", v); 306 ATOMIC_ASM(subtract, short, "subw %w1,%0", "ir", v); 307 308 ATOMIC_ASM(set, int, "orl %1,%0", "ir", v); 309 ATOMIC_ASM(clear, int, "andl %1,%0", "ir", ~v); 310 ATOMIC_ASM(add, int, "addl %1,%0", "ir", v); 311 ATOMIC_ASM(subtract, int, "subl %1,%0", "ir", v); 312 313 ATOMIC_ASM(set, long, "orq %1,%0", "ir", v); 314 ATOMIC_ASM(clear, long, "andq %1,%0", "ir", ~v); 315 ATOMIC_ASM(add, long, "addq %1,%0", "ir", v); 316 ATOMIC_ASM(subtract, long, "subq %1,%0", "ir", v); 317 318 ATOMIC_LOAD(char, "cmpxchgb %b0,%1"); 319 ATOMIC_LOAD(short, "cmpxchgw %w0,%1"); 320 ATOMIC_LOAD(int, "cmpxchgl %0,%1"); 321 ATOMIC_LOAD(long, "cmpxchgq %0,%1"); 322 323 ATOMIC_STORE(char); 324 ATOMIC_STORE(short); 325 ATOMIC_STORE(int); 326 ATOMIC_STORE(long); 327 328 #undef ATOMIC_ASM 329 #undef ATOMIC_LOAD 330 #undef ATOMIC_STORE 331 332 #ifndef WANT_FUNCTIONS 333 334 /* Read the current value and store a new value in the destination. */ 335 #ifdef __GNUCLIKE_ASM 336 337 static __inline u_int 338 atomic_swap_int(volatile u_int *p, u_int v) 339 { 340 341 __asm __volatile( 342 " xchgl %1,%0 ; " 343 "# atomic_swap_int" 344 : "+r" (v), /* 0 */ 345 "+m" (*p)); /* 1 */ 346 return (v); 347 } 348 349 static __inline u_long 350 atomic_swap_long(volatile u_long *p, u_long v) 351 { 352 353 __asm __volatile( 354 " xchgq %1,%0 ; " 355 "# atomic_swap_long" 356 : "+r" (v), /* 0 */ 357 "+m" (*p)); /* 1 */ 358 return (v); 359 } 360 361 #else /* !__GNUCLIKE_ASM */ 362 363 u_int atomic_swap_int(volatile u_int *p, u_int v); 364 u_long atomic_swap_long(volatile u_long *p, u_long v); 365 366 #endif /* __GNUCLIKE_ASM */ 367 368 #define atomic_set_acq_char atomic_set_barr_char 369 #define atomic_set_rel_char atomic_set_barr_char 370 #define atomic_clear_acq_char atomic_clear_barr_char 371 #define atomic_clear_rel_char atomic_clear_barr_char 372 #define atomic_add_acq_char atomic_add_barr_char 373 #define atomic_add_rel_char atomic_add_barr_char 374 #define atomic_subtract_acq_char atomic_subtract_barr_char 375 #define atomic_subtract_rel_char atomic_subtract_barr_char 376 377 #define atomic_set_acq_short atomic_set_barr_short 378 #define atomic_set_rel_short atomic_set_barr_short 379 #define atomic_clear_acq_short atomic_clear_barr_short 380 #define atomic_clear_rel_short atomic_clear_barr_short 381 #define atomic_add_acq_short atomic_add_barr_short 382 #define atomic_add_rel_short atomic_add_barr_short 383 #define atomic_subtract_acq_short atomic_subtract_barr_short 384 #define atomic_subtract_rel_short atomic_subtract_barr_short 385 386 #define atomic_set_acq_int atomic_set_barr_int 387 #define atomic_set_rel_int atomic_set_barr_int 388 #define atomic_clear_acq_int atomic_clear_barr_int 389 #define atomic_clear_rel_int atomic_clear_barr_int 390 #define atomic_add_acq_int atomic_add_barr_int 391 #define atomic_add_rel_int atomic_add_barr_int 392 #define atomic_subtract_acq_int atomic_subtract_barr_int 393 #define atomic_subtract_rel_int atomic_subtract_barr_int 394 #define atomic_cmpset_acq_int atomic_cmpset_int 395 #define atomic_cmpset_rel_int atomic_cmpset_int 396 397 #define atomic_set_acq_long atomic_set_barr_long 398 #define atomic_set_rel_long atomic_set_barr_long 399 #define atomic_clear_acq_long atomic_clear_barr_long 400 #define atomic_clear_rel_long atomic_clear_barr_long 401 #define atomic_add_acq_long atomic_add_barr_long 402 #define atomic_add_rel_long atomic_add_barr_long 403 #define atomic_subtract_acq_long atomic_subtract_barr_long 404 #define atomic_subtract_rel_long atomic_subtract_barr_long 405 #define atomic_cmpset_acq_long atomic_cmpset_long 406 #define atomic_cmpset_rel_long atomic_cmpset_long 407 408 #define atomic_readandclear_int(p) atomic_swap_int(p, 0) 409 #define atomic_readandclear_long(p) atomic_swap_long(p, 0) 410 411 /* Operations on 8-bit bytes. */ 412 #define atomic_set_8 atomic_set_char 413 #define atomic_set_acq_8 atomic_set_acq_char 414 #define atomic_set_rel_8 atomic_set_rel_char 415 #define atomic_clear_8 atomic_clear_char 416 #define atomic_clear_acq_8 atomic_clear_acq_char 417 #define atomic_clear_rel_8 atomic_clear_rel_char 418 #define atomic_add_8 atomic_add_char 419 #define atomic_add_acq_8 atomic_add_acq_char 420 #define atomic_add_rel_8 atomic_add_rel_char 421 #define atomic_subtract_8 atomic_subtract_char 422 #define atomic_subtract_acq_8 atomic_subtract_acq_char 423 #define atomic_subtract_rel_8 atomic_subtract_rel_char 424 #define atomic_load_acq_8 atomic_load_acq_char 425 #define atomic_store_rel_8 atomic_store_rel_char 426 427 /* Operations on 16-bit words. */ 428 #define atomic_set_16 atomic_set_short 429 #define atomic_set_acq_16 atomic_set_acq_short 430 #define atomic_set_rel_16 atomic_set_rel_short 431 #define atomic_clear_16 atomic_clear_short 432 #define atomic_clear_acq_16 atomic_clear_acq_short 433 #define atomic_clear_rel_16 atomic_clear_rel_short 434 #define atomic_add_16 atomic_add_short 435 #define atomic_add_acq_16 atomic_add_acq_short 436 #define atomic_add_rel_16 atomic_add_rel_short 437 #define atomic_subtract_16 atomic_subtract_short 438 #define atomic_subtract_acq_16 atomic_subtract_acq_short 439 #define atomic_subtract_rel_16 atomic_subtract_rel_short 440 #define atomic_load_acq_16 atomic_load_acq_short 441 #define atomic_store_rel_16 atomic_store_rel_short 442 443 /* Operations on 32-bit double words. */ 444 #define atomic_set_32 atomic_set_int 445 #define atomic_set_acq_32 atomic_set_acq_int 446 #define atomic_set_rel_32 atomic_set_rel_int 447 #define atomic_clear_32 atomic_clear_int 448 #define atomic_clear_acq_32 atomic_clear_acq_int 449 #define atomic_clear_rel_32 atomic_clear_rel_int 450 #define atomic_add_32 atomic_add_int 451 #define atomic_add_acq_32 atomic_add_acq_int 452 #define atomic_add_rel_32 atomic_add_rel_int 453 #define atomic_subtract_32 atomic_subtract_int 454 #define atomic_subtract_acq_32 atomic_subtract_acq_int 455 #define atomic_subtract_rel_32 atomic_subtract_rel_int 456 #define atomic_load_acq_32 atomic_load_acq_int 457 #define atomic_store_rel_32 atomic_store_rel_int 458 #define atomic_cmpset_32 atomic_cmpset_int 459 #define atomic_cmpset_acq_32 atomic_cmpset_acq_int 460 #define atomic_cmpset_rel_32 atomic_cmpset_rel_int 461 #define atomic_swap_32 atomic_swap_int 462 #define atomic_readandclear_32 atomic_readandclear_int 463 #define atomic_fetchadd_32 atomic_fetchadd_int 464 #define atomic_testandset_32 atomic_testandset_int 465 466 /* Operations on 64-bit quad words. */ 467 #define atomic_set_64 atomic_set_long 468 #define atomic_set_acq_64 atomic_set_acq_long 469 #define atomic_set_rel_64 atomic_set_rel_long 470 #define atomic_clear_64 atomic_clear_long 471 #define atomic_clear_acq_64 atomic_clear_acq_long 472 #define atomic_clear_rel_64 atomic_clear_rel_long 473 #define atomic_add_64 atomic_add_long 474 #define atomic_add_acq_64 atomic_add_acq_long 475 #define atomic_add_rel_64 atomic_add_rel_long 476 #define atomic_subtract_64 atomic_subtract_long 477 #define atomic_subtract_acq_64 atomic_subtract_acq_long 478 #define atomic_subtract_rel_64 atomic_subtract_rel_long 479 #define atomic_load_acq_64 atomic_load_acq_long 480 #define atomic_store_rel_64 atomic_store_rel_long 481 #define atomic_cmpset_64 atomic_cmpset_long 482 #define atomic_cmpset_acq_64 atomic_cmpset_acq_long 483 #define atomic_cmpset_rel_64 atomic_cmpset_rel_long 484 #define atomic_swap_64 atomic_swap_long 485 #define atomic_readandclear_64 atomic_readandclear_long 486 #define atomic_testandset_64 atomic_testandset_long 487 488 /* Operations on pointers. */ 489 #define atomic_set_ptr atomic_set_long 490 #define atomic_set_acq_ptr atomic_set_acq_long 491 #define atomic_set_rel_ptr atomic_set_rel_long 492 #define atomic_clear_ptr atomic_clear_long 493 #define atomic_clear_acq_ptr atomic_clear_acq_long 494 #define atomic_clear_rel_ptr atomic_clear_rel_long 495 #define atomic_add_ptr atomic_add_long 496 #define atomic_add_acq_ptr atomic_add_acq_long 497 #define atomic_add_rel_ptr atomic_add_rel_long 498 #define atomic_subtract_ptr atomic_subtract_long 499 #define atomic_subtract_acq_ptr atomic_subtract_acq_long 500 #define atomic_subtract_rel_ptr atomic_subtract_rel_long 501 #define atomic_load_acq_ptr atomic_load_acq_long 502 #define atomic_store_rel_ptr atomic_store_rel_long 503 #define atomic_cmpset_ptr atomic_cmpset_long 504 #define atomic_cmpset_acq_ptr atomic_cmpset_acq_long 505 #define atomic_cmpset_rel_ptr atomic_cmpset_rel_long 506 #define atomic_swap_ptr atomic_swap_long 507 #define atomic_readandclear_ptr atomic_readandclear_long 508 509 #endif /* !WANT_FUNCTIONS */ 510 511 #endif /* !_MACHINE_ATOMIC_H_ */ 512