1 /*- 2 * Copyright (c) 2015 Ruslan Bukin <br@bsdpad.com> 3 * All rights reserved. 4 * 5 * Portions of this software were developed by SRI International and the 6 * University of Cambridge Computer Laboratory under DARPA/AFRL contract 7 * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. 8 * 9 * Portions of this software were developed by the University of Cambridge 10 * Computer Laboratory as part of the CTSRD Project, with support from the 11 * UK Higher Education Innovation Fund (HEIF). 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $FreeBSD$ 35 */ 36 37 #ifndef _MACHINE_ATOMIC_H_ 38 #define _MACHINE_ATOMIC_H_ 39 40 #define fence() __asm __volatile("fence" ::: "memory"); 41 #define mb() fence() 42 #define rmb() fence() 43 #define wmb() fence() 44 45 #define ATOMIC_ACQ_REL(NAME, WIDTH) \ 46 static __inline void \ 47 atomic_##NAME##_acq_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\ 48 { \ 49 atomic_##NAME##_##WIDTH(p, v); \ 50 fence(); \ 51 } \ 52 \ 53 static __inline void \ 54 atomic_##NAME##_rel_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\ 55 { \ 56 fence(); \ 57 atomic_##NAME##_##WIDTH(p, v); \ 58 } 59 60 static __inline void 61 atomic_add_32(volatile uint32_t *p, uint32_t val) 62 { 63 64 __asm __volatile("amoadd.w zero, %1, %0" 65 : "+A" (*p) 66 : "r" (val) 67 : "memory"); 68 } 69 70 static __inline void 71 atomic_subtract_32(volatile uint32_t *p, uint32_t val) 72 { 73 74 __asm __volatile("amoadd.w zero, %1, %0" 75 : "+A" (*p) 76 : "r" (-val) 77 : "memory"); 78 } 79 80 static __inline void 81 atomic_set_32(volatile uint32_t *p, uint32_t val) 82 { 83 84 __asm __volatile("amoor.w zero, %1, %0" 85 : "+A" (*p) 86 : "r" (val) 87 : "memory"); 88 } 89 90 static __inline void 91 atomic_clear_32(volatile uint32_t *p, uint32_t val) 92 { 93 94 __asm __volatile("amoand.w zero, %1, %0" 95 : "+A" (*p) 96 : "r" (~val) 97 : "memory"); 98 } 99 100 static __inline int 101 atomic_cmpset_32(volatile uint32_t *p, uint32_t cmpval, uint32_t newval) 102 { 103 uint32_t tmp; 104 int res; 105 106 res = 0; 107 108 __asm __volatile( 109 "0:" 110 "li %1, 1\n" /* Preset to fail */ 111 "lr.w %0, %2\n" 112 "bne %0, %z3, 1f\n" 113 "sc.w %1, %z4, %2\n" 114 "bnez %1, 0b\n" 115 "1:" 116 : "=&r" (tmp), "=&r" (res), "+A" (*p) 117 : "rJ" (cmpval), "rJ" (newval) 118 : "memory"); 119 120 return (!res); 121 } 122 123 static __inline int 124 atomic_fcmpset_32(volatile uint32_t *p, uint32_t *cmpval, uint32_t newval) 125 { 126 uint32_t tmp; 127 int res; 128 129 res = 0; 130 131 __asm __volatile( 132 "0:" 133 "li %1, 1\n" /* Preset to fail */ 134 "lr.w %0, %2\n" /* Load old value */ 135 "bne %0, %z4, 1f\n" /* Compare */ 136 "sc.w %1, %z5, %2\n" /* Try to store new value */ 137 "j 2f\n" 138 "1:" 139 "sw %0, %3\n" /* Save old value */ 140 "2:" 141 : "=&r" (tmp), "=&r" (res), "+A" (*p), "+A" (*cmpval) 142 : "rJ" (*cmpval), "rJ" (newval) 143 : "memory"); 144 145 return (!res); 146 } 147 148 static __inline uint32_t 149 atomic_fetchadd_32(volatile uint32_t *p, uint32_t val) 150 { 151 uint32_t ret; 152 153 __asm __volatile("amoadd.w %0, %2, %1" 154 : "=&r" (ret), "+A" (*p) 155 : "r" (val) 156 : "memory"); 157 158 return (ret); 159 } 160 161 static __inline uint32_t 162 atomic_readandclear_32(volatile uint32_t *p) 163 { 164 uint32_t ret; 165 uint32_t val; 166 167 val = 0; 168 169 __asm __volatile("amoswap.w %0, %2, %1" 170 : "=&r"(ret), "+A" (*p) 171 : "r" (val) 172 : "memory"); 173 174 return (ret); 175 } 176 177 #define atomic_add_int atomic_add_32 178 #define atomic_clear_int atomic_clear_32 179 #define atomic_cmpset_int atomic_cmpset_32 180 #define atomic_fcmpset_int atomic_fcmpset_32 181 #define atomic_fetchadd_int atomic_fetchadd_32 182 #define atomic_readandclear_int atomic_readandclear_32 183 #define atomic_set_int atomic_set_32 184 #define atomic_subtract_int atomic_subtract_32 185 186 ATOMIC_ACQ_REL(set, 32) 187 ATOMIC_ACQ_REL(clear, 32) 188 ATOMIC_ACQ_REL(add, 32) 189 ATOMIC_ACQ_REL(subtract, 32) 190 191 static __inline int 192 atomic_cmpset_acq_32(volatile uint32_t *p, uint32_t cmpval, uint32_t newval) 193 { 194 int res; 195 196 res = atomic_cmpset_32(p, cmpval, newval); 197 198 fence(); 199 200 return (res); 201 } 202 203 static __inline int 204 atomic_cmpset_rel_32(volatile uint32_t *p, uint32_t cmpval, uint32_t newval) 205 { 206 207 fence(); 208 209 return (atomic_cmpset_32(p, cmpval, newval)); 210 } 211 212 static __inline int 213 atomic_fcmpset_acq_32(volatile uint32_t *p, uint32_t *cmpval, uint32_t newval) 214 { 215 int res; 216 217 res = atomic_fcmpset_32(p, cmpval, newval); 218 219 fence(); 220 221 return (res); 222 } 223 224 static __inline int 225 atomic_fcmpset_rel_32(volatile uint32_t *p, uint32_t *cmpval, uint32_t newval) 226 { 227 228 fence(); 229 230 return (atomic_fcmpset_32(p, cmpval, newval)); 231 } 232 233 static __inline uint32_t 234 atomic_load_acq_32(volatile uint32_t *p) 235 { 236 uint32_t ret; 237 238 ret = *p; 239 240 fence(); 241 242 return (ret); 243 } 244 245 static __inline void 246 atomic_store_rel_32(volatile uint32_t *p, uint32_t val) 247 { 248 249 fence(); 250 251 *p = val; 252 } 253 254 #define atomic_add_acq_int atomic_add_acq_32 255 #define atomic_clear_acq_int atomic_clear_acq_32 256 #define atomic_cmpset_acq_int atomic_cmpset_acq_32 257 #define atomic_fcmpset_acq_int atomic_fcmpset_acq_32 258 #define atomic_load_acq_int atomic_load_acq_32 259 #define atomic_set_acq_int atomic_set_acq_32 260 #define atomic_subtract_acq_int atomic_subtract_acq_32 261 262 #define atomic_add_rel_int atomic_add_rel_32 263 #define atomic_clear_rel_int atomic_add_rel_32 264 #define atomic_cmpset_rel_int atomic_cmpset_rel_32 265 #define atomic_fcmpset_rel_int atomic_fcmpset_rel_32 266 #define atomic_set_rel_int atomic_set_rel_32 267 #define atomic_subtract_rel_int atomic_subtract_rel_32 268 #define atomic_store_rel_int atomic_store_rel_32 269 270 static __inline void 271 atomic_add_64(volatile uint64_t *p, uint64_t val) 272 { 273 274 __asm __volatile("amoadd.d zero, %1, %0" 275 : "+A" (*p) 276 : "r" (val) 277 : "memory"); 278 } 279 280 static __inline void 281 atomic_subtract_64(volatile uint64_t *p, uint64_t val) 282 { 283 284 __asm __volatile("amoadd.d zero, %1, %0" 285 : "+A" (*p) 286 : "r" (-val) 287 : "memory"); 288 } 289 290 static __inline void 291 atomic_set_64(volatile uint64_t *p, uint64_t val) 292 { 293 294 __asm __volatile("amoor.d zero, %1, %0" 295 : "+A" (*p) 296 : "r" (val) 297 : "memory"); 298 } 299 300 static __inline void 301 atomic_clear_64(volatile uint64_t *p, uint64_t val) 302 { 303 304 __asm __volatile("amoand.d zero, %1, %0" 305 : "+A" (*p) 306 : "r" (~val) 307 : "memory"); 308 } 309 310 static __inline int 311 atomic_cmpset_64(volatile uint64_t *p, uint64_t cmpval, uint64_t newval) 312 { 313 uint64_t tmp; 314 int res; 315 316 res = 0; 317 318 __asm __volatile( 319 "0:" 320 "li %1, 1\n" /* Preset to fail */ 321 "lr.d %0, %2\n" 322 "bne %0, %z3, 1f\n" 323 "sc.d %1, %z4, %2\n" 324 "bnez %1, 0b\n" 325 "1:" 326 : "=&r" (tmp), "=&r" (res), "+A" (*p) 327 : "rJ" (cmpval), "rJ" (newval) 328 : "memory"); 329 330 return (!res); 331 } 332 333 static __inline int 334 atomic_fcmpset_64(volatile uint64_t *p, uint64_t *cmpval, uint64_t newval) 335 { 336 uint64_t tmp; 337 int res; 338 339 res = 0; 340 341 __asm __volatile( 342 "0:" 343 "li %1, 1\n" /* Preset to fail */ 344 "lr.d %0, %2\n" /* Load old value */ 345 "bne %0, %z4, 1f\n" /* Compare */ 346 "sc.d %1, %z5, %2\n" /* Try to store new value */ 347 "j 2f\n" 348 "1:" 349 "sd %0, %3\n" /* Save old value */ 350 "2:" 351 : "=&r" (tmp), "=&r" (res), "+A" (*p), "+A" (*cmpval) 352 : "rJ" (*cmpval), "rJ" (newval) 353 : "memory"); 354 355 return (!res); 356 } 357 358 static __inline uint64_t 359 atomic_fetchadd_64(volatile uint64_t *p, uint64_t val) 360 { 361 uint64_t ret; 362 363 __asm __volatile("amoadd.d %0, %2, %1" 364 : "=&r" (ret), "+A" (*p) 365 : "r" (val) 366 : "memory"); 367 368 return (ret); 369 } 370 371 static __inline uint64_t 372 atomic_readandclear_64(volatile uint64_t *p) 373 { 374 uint64_t ret; 375 uint64_t val; 376 377 val = 0; 378 379 __asm __volatile("amoswap.d %0, %2, %1" 380 : "=&r"(ret), "+A" (*p) 381 : "r" (val) 382 : "memory"); 383 384 return (ret); 385 } 386 387 static __inline uint32_t 388 atomic_swap_32(volatile uint32_t *p, uint32_t val) 389 { 390 uint32_t old; 391 392 __asm __volatile("amoswap.w %0, %2, %1" 393 : "=&r"(old), "+A" (*p) 394 : "r" (val) 395 : "memory"); 396 397 return (old); 398 } 399 400 static __inline uint64_t 401 atomic_swap_64(volatile uint64_t *p, uint64_t val) 402 { 403 uint64_t old; 404 405 __asm __volatile("amoswap.d %0, %2, %1" 406 : "=&r"(old), "+A" (*p) 407 : "r" (val) 408 : "memory"); 409 410 return (old); 411 } 412 413 #define atomic_add_long atomic_add_64 414 #define atomic_clear_long atomic_clear_64 415 #define atomic_cmpset_long atomic_cmpset_64 416 #define atomic_fcmpset_long atomic_fcmpset_64 417 #define atomic_fetchadd_long atomic_fetchadd_64 418 #define atomic_readandclear_long atomic_readandclear_64 419 #define atomic_set_long atomic_set_64 420 #define atomic_subtract_long atomic_subtract_64 421 422 #define atomic_add_ptr atomic_add_64 423 #define atomic_clear_ptr atomic_clear_64 424 #define atomic_cmpset_ptr atomic_cmpset_64 425 #define atomic_fcmpset_ptr atomic_fcmpset_64 426 #define atomic_fetchadd_ptr atomic_fetchadd_64 427 #define atomic_readandclear_ptr atomic_readandclear_64 428 #define atomic_set_ptr atomic_set_64 429 #define atomic_subtract_ptr atomic_subtract_64 430 431 ATOMIC_ACQ_REL(set, 64) 432 ATOMIC_ACQ_REL(clear, 64) 433 ATOMIC_ACQ_REL(add, 64) 434 ATOMIC_ACQ_REL(subtract, 64) 435 436 static __inline int 437 atomic_cmpset_acq_64(volatile uint64_t *p, uint64_t cmpval, uint64_t newval) 438 { 439 int res; 440 441 res = atomic_cmpset_64(p, cmpval, newval); 442 443 fence(); 444 445 return (res); 446 } 447 448 static __inline int 449 atomic_cmpset_rel_64(volatile uint64_t *p, uint64_t cmpval, uint64_t newval) 450 { 451 452 fence(); 453 454 return (atomic_cmpset_64(p, cmpval, newval)); 455 } 456 457 static __inline int 458 atomic_fcmpset_acq_64(volatile uint64_t *p, uint64_t *cmpval, uint64_t newval) 459 { 460 int res; 461 462 res = atomic_fcmpset_64(p, cmpval, newval); 463 464 fence(); 465 466 return (res); 467 } 468 469 static __inline int 470 atomic_fcmpset_rel_64(volatile uint64_t *p, uint64_t *cmpval, uint64_t newval) 471 { 472 473 fence(); 474 475 return (atomic_fcmpset_64(p, cmpval, newval)); 476 } 477 478 static __inline uint64_t 479 atomic_load_acq_64(volatile uint64_t *p) 480 { 481 uint64_t ret; 482 483 ret = *p; 484 485 fence(); 486 487 return (ret); 488 } 489 490 static __inline void 491 atomic_store_rel_64(volatile uint64_t *p, uint64_t val) 492 { 493 494 fence(); 495 496 *p = val; 497 } 498 499 #define atomic_add_acq_long atomic_add_acq_64 500 #define atomic_clear_acq_long atomic_add_acq_64 501 #define atomic_cmpset_acq_long atomic_cmpset_acq_64 502 #define atomic_fcmpset_acq_long atomic_fcmpset_acq_64 503 #define atomic_load_acq_long atomic_load_acq_64 504 #define atomic_set_acq_long atomic_set_acq_64 505 #define atomic_subtract_acq_long atomic_subtract_acq_64 506 507 #define atomic_add_acq_ptr atomic_add_acq_64 508 #define atomic_clear_acq_ptr atomic_add_acq_64 509 #define atomic_cmpset_acq_ptr atomic_cmpset_acq_64 510 #define atomic_fcmpset_acq_ptr atomic_fcmpset_acq_64 511 #define atomic_load_acq_ptr atomic_load_acq_64 512 #define atomic_set_acq_ptr atomic_set_acq_64 513 #define atomic_subtract_acq_ptr atomic_subtract_acq_64 514 515 #undef ATOMIC_ACQ_REL 516 517 static __inline void 518 atomic_thread_fence_acq(void) 519 { 520 521 fence(); 522 } 523 524 static __inline void 525 atomic_thread_fence_rel(void) 526 { 527 528 fence(); 529 } 530 531 static __inline void 532 atomic_thread_fence_acq_rel(void) 533 { 534 535 fence(); 536 } 537 538 static __inline void 539 atomic_thread_fence_seq_cst(void) 540 { 541 542 fence(); 543 } 544 545 #define atomic_add_rel_long atomic_add_rel_64 546 #define atomic_clear_rel_long atomic_clear_rel_64 547 548 #define atomic_add_rel_long atomic_add_rel_64 549 #define atomic_clear_rel_long atomic_clear_rel_64 550 #define atomic_cmpset_rel_long atomic_cmpset_rel_64 551 #define atomic_fcmpset_rel_long atomic_fcmpset_rel_64 552 #define atomic_set_rel_long atomic_set_rel_64 553 #define atomic_subtract_rel_long atomic_subtract_rel_64 554 #define atomic_store_rel_long atomic_store_rel_64 555 556 #define atomic_add_rel_ptr atomic_add_rel_64 557 #define atomic_clear_rel_ptr atomic_clear_rel_64 558 #define atomic_cmpset_rel_ptr atomic_cmpset_rel_64 559 #define atomic_fcmpset_rel_ptr atomic_fcmpset_rel_64 560 #define atomic_set_rel_ptr atomic_set_rel_64 561 #define atomic_subtract_rel_ptr atomic_subtract_rel_64 562 #define atomic_store_rel_ptr atomic_store_rel_64 563 564 #endif /* _MACHINE_ATOMIC_H_ */ 565