1 /* 2 * Copyright 2009-2015 Samy Al Bahra. 3 * Copyright 2011 Devon H. O'Dell <devon.odell@gmail.com> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #ifndef CK_PR_X86_H 29 #define CK_PR_X86_H 30 31 #ifndef CK_PR_H 32 #error Do not include this file directly, use ck_pr.h 33 #endif 34 35 #include <ck_cc.h> 36 #include <ck_md.h> 37 #include <ck_stdint.h> 38 39 /* 40 * The following represent supported atomic operations. 41 * These operations may be emulated. 42 */ 43 #include "ck_f_pr.h" 44 45 /* Minimum requirements for the CK_PR interface are met. */ 46 #define CK_F_PR 47 48 /* 49 * Prevent speculative execution in busy-wait loops (P4 <=) or "predefined 50 * delay". 51 */ 52 CK_CC_INLINE static void 53 ck_pr_stall(void) 54 { 55 __asm__ __volatile__("pause" ::: "memory"); 56 return; 57 } 58 59 #ifdef CK_MD_UMP 60 #define CK_PR_LOCK_PREFIX 61 #define CK_PR_FENCE(T, I) \ 62 CK_CC_INLINE static void \ 63 ck_pr_fence_strict_##T(void) \ 64 { \ 65 __asm__ __volatile__("" ::: "memory"); \ 66 return; \ 67 } 68 #else 69 #define CK_PR_LOCK_PREFIX "lock " 70 #define CK_PR_FENCE(T, I) \ 71 CK_CC_INLINE static void \ 72 ck_pr_fence_strict_##T(void) \ 73 { \ 74 __asm__ __volatile__(I ::: "memory"); \ 75 return; \ 76 } 77 #endif /* CK_MD_UMP */ 78 79 #if defined(CK_MD_SSE_DISABLE) 80 /* If SSE is disabled, then use atomic operations for serialization. */ 81 #define CK_MD_X86_MFENCE "lock addl $0, (%%esp)" 82 #define CK_MD_X86_SFENCE CK_MD_X86_MFENCE 83 #define CK_MD_X86_LFENCE CK_MD_X86_MFENCE 84 #else 85 #define CK_MD_X86_SFENCE "sfence" 86 #define CK_MD_X86_LFENCE "lfence" 87 #define CK_MD_X86_MFENCE "mfence" 88 #endif /* !CK_MD_SSE_DISABLE */ 89 90 CK_PR_FENCE(atomic, "") 91 CK_PR_FENCE(atomic_store, "") 92 CK_PR_FENCE(atomic_load, "") 93 CK_PR_FENCE(store_atomic, "") 94 CK_PR_FENCE(load_atomic, "") 95 CK_PR_FENCE(load, CK_MD_X86_LFENCE) 96 CK_PR_FENCE(load_store, CK_MD_X86_MFENCE) 97 CK_PR_FENCE(store, CK_MD_X86_SFENCE) 98 CK_PR_FENCE(store_load, CK_MD_X86_MFENCE) 99 CK_PR_FENCE(memory, CK_MD_X86_MFENCE) 100 CK_PR_FENCE(release, CK_MD_X86_MFENCE) 101 CK_PR_FENCE(acquire, CK_MD_X86_MFENCE) 102 CK_PR_FENCE(acqrel, CK_MD_X86_MFENCE) 103 CK_PR_FENCE(lock, CK_MD_X86_MFENCE) 104 CK_PR_FENCE(unlock, CK_MD_X86_MFENCE) 105 106 #undef CK_PR_FENCE 107 108 /* 109 * Atomic fetch-and-store operations. 110 */ 111 #define CK_PR_FAS(S, M, T, C, I) \ 112 CK_CC_INLINE static T \ 113 ck_pr_fas_##S(M *target, T v) \ 114 { \ 115 __asm__ __volatile__(I " %0, %1" \ 116 : "+m" (*(C *)target), \ 117 "+q" (v) \ 118 : \ 119 : "memory"); \ 120 return v; \ 121 } 122 123 CK_PR_FAS(ptr, void, void *, uint32_t, "xchgl") 124 125 #define CK_PR_FAS_S(S, T, I) CK_PR_FAS(S, T, T, T, I) 126 127 CK_PR_FAS_S(char, char, "xchgb") 128 CK_PR_FAS_S(uint, unsigned int, "xchgl") 129 CK_PR_FAS_S(int, int, "xchgl") 130 CK_PR_FAS_S(32, uint32_t, "xchgl") 131 CK_PR_FAS_S(16, uint16_t, "xchgw") 132 CK_PR_FAS_S(8, uint8_t, "xchgb") 133 134 #undef CK_PR_FAS_S 135 #undef CK_PR_FAS 136 137 #define CK_PR_LOAD(S, M, T, C, I) \ 138 CK_CC_INLINE static T \ 139 ck_pr_md_load_##S(const M *target) \ 140 { \ 141 T r; \ 142 __asm__ __volatile__(I " %1, %0" \ 143 : "=q" (r) \ 144 : "m" (*(const C *)target) \ 145 : "memory"); \ 146 return (r); \ 147 } 148 149 CK_PR_LOAD(ptr, void, void *, uint32_t, "movl") 150 151 #define CK_PR_LOAD_S(S, T, I) CK_PR_LOAD(S, T, T, T, I) 152 153 CK_PR_LOAD_S(char, char, "movb") 154 CK_PR_LOAD_S(uint, unsigned int, "movl") 155 CK_PR_LOAD_S(int, int, "movl") 156 CK_PR_LOAD_S(32, uint32_t, "movl") 157 CK_PR_LOAD_S(16, uint16_t, "movw") 158 CK_PR_LOAD_S(8, uint8_t, "movb") 159 160 #undef CK_PR_LOAD_S 161 #undef CK_PR_LOAD 162 163 #define CK_PR_STORE(S, M, T, C, I) \ 164 CK_CC_INLINE static void \ 165 ck_pr_md_store_##S(M *target, T v) \ 166 { \ 167 __asm__ __volatile__(I " %1, %0" \ 168 : "=m" (*(C *)target) \ 169 : CK_CC_IMM "q" (v) \ 170 : "memory"); \ 171 return; \ 172 } 173 174 CK_PR_STORE(ptr, void, const void *, uint32_t, "movl") 175 176 #define CK_PR_STORE_S(S, T, I) CK_PR_STORE(S, T, T, T, I) 177 178 CK_PR_STORE_S(char, char, "movb") 179 CK_PR_STORE_S(uint, unsigned int, "movl") 180 CK_PR_STORE_S(int, int, "movl") 181 CK_PR_STORE_S(32, uint32_t, "movl") 182 CK_PR_STORE_S(16, uint16_t, "movw") 183 CK_PR_STORE_S(8, uint8_t, "movb") 184 185 #undef CK_PR_STORE_S 186 #undef CK_PR_STORE 187 188 /* 189 * Atomic fetch-and-add operations. 190 */ 191 #define CK_PR_FAA(S, M, T, C, I) \ 192 CK_CC_INLINE static T \ 193 ck_pr_faa_##S(M *target, T d) \ 194 { \ 195 __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %1, %0" \ 196 : "+m" (*(C *)target), \ 197 "+q" (d) \ 198 : \ 199 : "memory", "cc"); \ 200 return (d); \ 201 } 202 203 CK_PR_FAA(ptr, void, uintptr_t, uint32_t, "xaddl") 204 205 #define CK_PR_FAA_S(S, T, I) CK_PR_FAA(S, T, T, T, I) 206 207 CK_PR_FAA_S(char, char, "xaddb") 208 CK_PR_FAA_S(uint, unsigned int, "xaddl") 209 CK_PR_FAA_S(int, int, "xaddl") 210 CK_PR_FAA_S(32, uint32_t, "xaddl") 211 CK_PR_FAA_S(16, uint16_t, "xaddw") 212 CK_PR_FAA_S(8, uint8_t, "xaddb") 213 214 #undef CK_PR_FAA_S 215 #undef CK_PR_FAA 216 217 /* 218 * Atomic store-only unary operations. 219 */ 220 #define CK_PR_UNARY(K, S, T, C, I) \ 221 CK_PR_UNARY_R(K, S, T, C, I) \ 222 CK_PR_UNARY_V(K, S, T, C, I) 223 224 #define CK_PR_UNARY_R(K, S, T, C, I) \ 225 CK_CC_INLINE static void \ 226 ck_pr_##K##_##S(T *target) \ 227 { \ 228 __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %0" \ 229 : "+m" (*(C *)target) \ 230 : \ 231 : "memory", "cc"); \ 232 return; \ 233 } 234 235 #define CK_PR_UNARY_V(K, S, T, C, I) \ 236 CK_CC_INLINE static bool \ 237 ck_pr_##K##_##S##_is_zero(T *target) \ 238 { \ 239 bool ret; \ 240 __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %0; setz %1" \ 241 : "+m" (*(C *)target), \ 242 "=qm" (ret) \ 243 : \ 244 : "memory", "cc"); \ 245 return ret; \ 246 } 247 248 #define CK_PR_UNARY_S(K, S, T, I) CK_PR_UNARY(K, S, T, T, I) 249 250 #define CK_PR_GENERATE(K) \ 251 CK_PR_UNARY(K, ptr, void, uint32_t, #K "l") \ 252 CK_PR_UNARY_S(K, char, char, #K "b") \ 253 CK_PR_UNARY_S(K, int, int, #K "l") \ 254 CK_PR_UNARY_S(K, uint, unsigned int, #K "l") \ 255 CK_PR_UNARY_S(K, 32, uint32_t, #K "l") \ 256 CK_PR_UNARY_S(K, 16, uint16_t, #K "w") \ 257 CK_PR_UNARY_S(K, 8, uint8_t, #K "b") 258 259 CK_PR_GENERATE(inc) 260 CK_PR_GENERATE(dec) 261 CK_PR_GENERATE(neg) 262 263 /* not does not affect condition flags. */ 264 #undef CK_PR_UNARY_V 265 #define CK_PR_UNARY_V(a, b, c, d, e) 266 CK_PR_GENERATE(not) 267 268 #undef CK_PR_GENERATE 269 #undef CK_PR_UNARY_S 270 #undef CK_PR_UNARY_V 271 #undef CK_PR_UNARY_R 272 #undef CK_PR_UNARY 273 274 /* 275 * Atomic store-only binary operations. 276 */ 277 #define CK_PR_BINARY(K, S, M, T, C, I) \ 278 CK_CC_INLINE static void \ 279 ck_pr_##K##_##S(M *target, T d) \ 280 { \ 281 __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %1, %0" \ 282 : "+m" (*(C *)target) \ 283 : CK_CC_IMM "q" (d) \ 284 : "memory", "cc"); \ 285 return; \ 286 } 287 288 #define CK_PR_BINARY_S(K, S, T, I) CK_PR_BINARY(K, S, T, T, T, I) 289 290 #define CK_PR_GENERATE(K) \ 291 CK_PR_BINARY(K, ptr, void, uintptr_t, uint32_t, #K "l") \ 292 CK_PR_BINARY_S(K, char, char, #K "b") \ 293 CK_PR_BINARY_S(K, int, int, #K "l") \ 294 CK_PR_BINARY_S(K, uint, unsigned int, #K "l") \ 295 CK_PR_BINARY_S(K, 32, uint32_t, #K "l") \ 296 CK_PR_BINARY_S(K, 16, uint16_t, #K "w") \ 297 CK_PR_BINARY_S(K, 8, uint8_t, #K "b") 298 299 CK_PR_GENERATE(add) 300 CK_PR_GENERATE(sub) 301 CK_PR_GENERATE(and) 302 CK_PR_GENERATE(or) 303 CK_PR_GENERATE(xor) 304 305 #undef CK_PR_GENERATE 306 #undef CK_PR_BINARY_S 307 #undef CK_PR_BINARY 308 309 /* 310 * Atomic compare and swap, with a variant that sets *v to the old value of target. 311 */ 312 #ifdef __GCC_ASM_FLAG_OUTPUTS__ 313 #define CK_PR_CAS(S, M, T, C, I) \ 314 CK_CC_INLINE static bool \ 315 ck_pr_cas_##S(M *target, T compare, T set) \ 316 { \ 317 bool z; \ 318 __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %3, %0" \ 319 : "+m" (*(C *)target), \ 320 "=@ccz" (z), \ 321 /* RAX is clobbered by cmpxchg. */ \ 322 "+a" (compare) \ 323 : "q" (set) \ 324 : "memory", "cc"); \ 325 return z; \ 326 } \ 327 \ 328 CK_CC_INLINE static bool \ 329 ck_pr_cas_##S##_value(M *target, T compare, T set, M *v) \ 330 { \ 331 bool z; \ 332 __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %3, %0;" \ 333 : "+m" (*(C *)target), \ 334 "=@ccz" (z), \ 335 "+a" (compare) \ 336 : "q" (set) \ 337 : "memory", "cc"); \ 338 *(T *)v = compare; \ 339 return z; \ 340 } 341 #else 342 #define CK_PR_CAS(S, M, T, C, I) \ 343 CK_CC_INLINE static bool \ 344 ck_pr_cas_##S(M *target, T compare, T set) \ 345 { \ 346 bool z; \ 347 __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %2, %0; setz %1" \ 348 : "+m" (*(C *)target), \ 349 "=a" (z) \ 350 : "q" (set), \ 351 "a" (compare) \ 352 : "memory", "cc"); \ 353 return z; \ 354 } \ 355 \ 356 CK_CC_INLINE static bool \ 357 ck_pr_cas_##S##_value(M *target, T compare, T set, M *v) \ 358 { \ 359 bool z; \ 360 __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %3, %0;" \ 361 "setz %1;" \ 362 : "+m" (*(C *)target), \ 363 "=q" (z), \ 364 "+a" (compare) \ 365 : "q" (set) \ 366 : "memory", "cc"); \ 367 *(T *)v = compare; \ 368 return z; \ 369 } 370 #endif 371 372 CK_PR_CAS(ptr, void, void *, uint32_t, "cmpxchgl") 373 374 #define CK_PR_CAS_S(S, T, I) CK_PR_CAS(S, T, T, T, I) 375 376 CK_PR_CAS_S(char, char, "cmpxchgb") 377 CK_PR_CAS_S(int, int, "cmpxchgl") 378 CK_PR_CAS_S(uint, unsigned int, "cmpxchgl") 379 CK_PR_CAS_S(32, uint32_t, "cmpxchgl") 380 CK_PR_CAS_S(16, uint16_t, "cmpxchgw") 381 CK_PR_CAS_S(8, uint8_t, "cmpxchgb") 382 383 #undef CK_PR_CAS_S 384 #undef CK_PR_CAS 385 386 /* 387 * Atomic bit test operations. 388 */ 389 #define CK_PR_BT(K, S, T, P, C, I) \ 390 CK_CC_INLINE static bool \ 391 ck_pr_##K##_##S(T *target, unsigned int b) \ 392 { \ 393 bool c; \ 394 __asm__ __volatile__(CK_PR_LOCK_PREFIX I "; setc %1" \ 395 : "+m" (*(C *)target), \ 396 "=q" (c) \ 397 : "q" ((P)b) \ 398 : "memory", "cc"); \ 399 return (bool)c; \ 400 } 401 402 #define CK_PR_BT_S(K, S, T, I) CK_PR_BT(K, S, T, T, T, I) 403 404 #define CK_PR_GENERATE(K) \ 405 CK_PR_BT(K, ptr, void, uint32_t, uint32_t, #K "l %2, %0") \ 406 CK_PR_BT_S(K, uint, unsigned int, #K "l %2, %0") \ 407 CK_PR_BT_S(K, int, int, #K "l %2, %0") \ 408 CK_PR_BT_S(K, 32, uint32_t, #K "l %2, %0") \ 409 CK_PR_BT_S(K, 16, uint16_t, #K "w %w2, %0") 410 411 CK_PR_GENERATE(btc) 412 CK_PR_GENERATE(bts) 413 CK_PR_GENERATE(btr) 414 415 #undef CK_PR_GENERATE 416 #undef CK_PR_BT 417 418 #endif /* CK_PR_X86_H */ 419 420