1 /* 2 * Copyright 2009-2015 Samy Al Bahra. 3 * Copyright 2011 Devon H. O'Dell <devon.odell@gmail.com> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #ifndef CK_PR_X86_H 29 #define CK_PR_X86_H 30 31 #ifndef CK_PR_H 32 #error Do not include this file directly, use ck_pr.h 33 #endif 34 35 #include <ck_cc.h> 36 #include <ck_md.h> 37 #include <ck_stdint.h> 38 39 /* 40 * The following represent supported atomic operations. 41 * These operations may be emulated. 42 */ 43 #include "ck_f_pr.h" 44 45 /* Minimum requirements for the CK_PR interface are met. */ 46 #define CK_F_PR 47 48 #ifdef CK_MD_UMP 49 #define CK_PR_LOCK_PREFIX 50 #else 51 #define CK_PR_LOCK_PREFIX "lock " 52 #endif 53 54 /* 55 * Prevent speculative execution in busy-wait loops (P4 <=) 56 * or "predefined delay". 57 */ 58 CK_CC_INLINE static void 59 ck_pr_stall(void) 60 { 61 __asm__ __volatile__("pause" ::: "memory"); 62 return; 63 } 64 65 #define CK_PR_FENCE(T, I) \ 66 CK_CC_INLINE static void \ 67 ck_pr_fence_strict_##T(void) \ 68 { \ 69 __asm__ __volatile__(I ::: "memory"); \ 70 } 71 72 CK_PR_FENCE(atomic, "sfence") 73 CK_PR_FENCE(atomic_store, "sfence") 74 CK_PR_FENCE(atomic_load, "mfence") 75 CK_PR_FENCE(store_atomic, "sfence") 76 CK_PR_FENCE(load_atomic, "mfence") 77 CK_PR_FENCE(load, "lfence") 78 CK_PR_FENCE(load_store, "mfence") 79 CK_PR_FENCE(store, "sfence") 80 CK_PR_FENCE(store_load, "mfence") 81 CK_PR_FENCE(memory, "mfence") 82 CK_PR_FENCE(release, "mfence") 83 CK_PR_FENCE(acquire, "mfence") 84 CK_PR_FENCE(acqrel, "mfence") 85 CK_PR_FENCE(lock, "mfence") 86 CK_PR_FENCE(unlock, "mfence") 87 88 #undef CK_PR_FENCE 89 90 /* 91 * Atomic fetch-and-store operations. 92 */ 93 #define CK_PR_FAS(S, M, T, C, I) \ 94 CK_CC_INLINE static T \ 95 ck_pr_fas_##S(M *target, T v) \ 96 { \ 97 __asm__ __volatile__(I " %0, %1" \ 98 : "+m" (*(C *)target), \ 99 "+q" (v) \ 100 : \ 101 : "memory"); \ 102 return v; \ 103 } 104 105 CK_PR_FAS(ptr, void, void *, char, "xchgl") 106 107 #define CK_PR_FAS_S(S, T, I) CK_PR_FAS(S, T, T, T, I) 108 109 CK_PR_FAS_S(char, char, "xchgb") 110 CK_PR_FAS_S(uint, unsigned int, "xchgl") 111 CK_PR_FAS_S(int, int, "xchgl") 112 CK_PR_FAS_S(32, uint32_t, "xchgl") 113 CK_PR_FAS_S(16, uint16_t, "xchgw") 114 CK_PR_FAS_S(8, uint8_t, "xchgb") 115 116 #undef CK_PR_FAS_S 117 #undef CK_PR_FAS 118 119 #define CK_PR_LOAD(S, M, T, C, I) \ 120 CK_CC_INLINE static T \ 121 ck_pr_md_load_##S(const M *target) \ 122 { \ 123 T r; \ 124 __asm__ __volatile__(I " %1, %0" \ 125 : "=q" (r) \ 126 : "m" (*(const C *)target) \ 127 : "memory"); \ 128 return (r); \ 129 } 130 131 CK_PR_LOAD(ptr, void, void *, char, "movl") 132 133 #define CK_PR_LOAD_S(S, T, I) CK_PR_LOAD(S, T, T, T, I) 134 135 CK_PR_LOAD_S(char, char, "movb") 136 CK_PR_LOAD_S(uint, unsigned int, "movl") 137 CK_PR_LOAD_S(int, int, "movl") 138 CK_PR_LOAD_S(32, uint32_t, "movl") 139 CK_PR_LOAD_S(16, uint16_t, "movw") 140 CK_PR_LOAD_S(8, uint8_t, "movb") 141 142 #undef CK_PR_LOAD_S 143 #undef CK_PR_LOAD 144 145 #define CK_PR_STORE(S, M, T, C, I) \ 146 CK_CC_INLINE static void \ 147 ck_pr_md_store_##S(M *target, T v) \ 148 { \ 149 __asm__ __volatile__(I " %1, %0" \ 150 : "=m" (*(C *)target) \ 151 : CK_CC_IMM "q" (v) \ 152 : "memory"); \ 153 return; \ 154 } 155 156 CK_PR_STORE(ptr, void, const void *, char, "movl") 157 158 #define CK_PR_STORE_S(S, T, I) CK_PR_STORE(S, T, T, T, I) 159 160 CK_PR_STORE_S(char, char, "movb") 161 CK_PR_STORE_S(uint, unsigned int, "movl") 162 CK_PR_STORE_S(int, int, "movl") 163 CK_PR_STORE_S(32, uint32_t, "movl") 164 CK_PR_STORE_S(16, uint16_t, "movw") 165 CK_PR_STORE_S(8, uint8_t, "movb") 166 167 #undef CK_PR_STORE_S 168 #undef CK_PR_STORE 169 170 /* 171 * Atomic fetch-and-add operations. 172 */ 173 #define CK_PR_FAA(S, M, T, C, I) \ 174 CK_CC_INLINE static T \ 175 ck_pr_faa_##S(M *target, T d) \ 176 { \ 177 __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %1, %0" \ 178 : "+m" (*(C *)target), \ 179 "+q" (d) \ 180 : \ 181 : "memory", "cc"); \ 182 return (d); \ 183 } 184 185 CK_PR_FAA(ptr, void, uintptr_t, char, "xaddl") 186 187 #define CK_PR_FAA_S(S, T, I) CK_PR_FAA(S, T, T, T, I) 188 189 CK_PR_FAA_S(char, char, "xaddb") 190 CK_PR_FAA_S(uint, unsigned int, "xaddl") 191 CK_PR_FAA_S(int, int, "xaddl") 192 CK_PR_FAA_S(32, uint32_t, "xaddl") 193 CK_PR_FAA_S(16, uint16_t, "xaddw") 194 CK_PR_FAA_S(8, uint8_t, "xaddb") 195 196 #undef CK_PR_FAA_S 197 #undef CK_PR_FAA 198 199 /* 200 * Atomic store-only unary operations. 201 */ 202 #define CK_PR_UNARY(K, S, T, C, I) \ 203 CK_PR_UNARY_R(K, S, T, C, I) \ 204 CK_PR_UNARY_V(K, S, T, C, I) 205 206 #define CK_PR_UNARY_R(K, S, T, C, I) \ 207 CK_CC_INLINE static void \ 208 ck_pr_##K##_##S(T *target) \ 209 { \ 210 __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %0" \ 211 : "+m" (*(C *)target) \ 212 : \ 213 : "memory", "cc"); \ 214 return; \ 215 } 216 217 #define CK_PR_UNARY_V(K, S, T, C, I) \ 218 CK_CC_INLINE static void \ 219 ck_pr_##K##_##S##_zero(T *target, bool *r) \ 220 { \ 221 __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %0; setz %1" \ 222 : "+m" (*(C *)target), \ 223 "=m" (*r) \ 224 : \ 225 : "memory", "cc"); \ 226 return; \ 227 } 228 229 230 #define CK_PR_UNARY_S(K, S, T, I) CK_PR_UNARY(K, S, T, T, I) 231 232 #define CK_PR_GENERATE(K) \ 233 CK_PR_UNARY(K, ptr, void, char, #K "l") \ 234 CK_PR_UNARY_S(K, char, char, #K "b") \ 235 CK_PR_UNARY_S(K, int, int, #K "l") \ 236 CK_PR_UNARY_S(K, uint, unsigned int, #K "l") \ 237 CK_PR_UNARY_S(K, 32, uint32_t, #K "l") \ 238 CK_PR_UNARY_S(K, 16, uint16_t, #K "w") \ 239 CK_PR_UNARY_S(K, 8, uint8_t, #K "b") 240 241 CK_PR_GENERATE(inc) 242 CK_PR_GENERATE(dec) 243 CK_PR_GENERATE(neg) 244 245 /* not does not affect condition flags. */ 246 #undef CK_PR_UNARY_V 247 #define CK_PR_UNARY_V(a, b, c, d, e) 248 CK_PR_GENERATE(not) 249 250 #undef CK_PR_GENERATE 251 #undef CK_PR_UNARY_S 252 #undef CK_PR_UNARY_V 253 #undef CK_PR_UNARY_R 254 #undef CK_PR_UNARY 255 256 /* 257 * Atomic store-only binary operations. 258 */ 259 #define CK_PR_BINARY(K, S, M, T, C, I) \ 260 CK_CC_INLINE static void \ 261 ck_pr_##K##_##S(M *target, T d) \ 262 { \ 263 __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %1, %0" \ 264 : "+m" (*(C *)target) \ 265 : CK_CC_IMM "q" (d) \ 266 : "memory", "cc"); \ 267 return; \ 268 } 269 270 #define CK_PR_BINARY_S(K, S, T, I) CK_PR_BINARY(K, S, T, T, T, I) 271 272 #define CK_PR_GENERATE(K) \ 273 CK_PR_BINARY(K, ptr, void, uintptr_t, char, #K "l") \ 274 CK_PR_BINARY_S(K, char, char, #K "b") \ 275 CK_PR_BINARY_S(K, int, int, #K "l") \ 276 CK_PR_BINARY_S(K, uint, unsigned int, #K "l") \ 277 CK_PR_BINARY_S(K, 32, uint32_t, #K "l") \ 278 CK_PR_BINARY_S(K, 16, uint16_t, #K "w") \ 279 CK_PR_BINARY_S(K, 8, uint8_t, #K "b") 280 281 CK_PR_GENERATE(add) 282 CK_PR_GENERATE(sub) 283 CK_PR_GENERATE(and) 284 CK_PR_GENERATE(or) 285 CK_PR_GENERATE(xor) 286 287 #undef CK_PR_GENERATE 288 #undef CK_PR_BINARY_S 289 #undef CK_PR_BINARY 290 291 /* 292 * Atomic compare and swap. 293 */ 294 #define CK_PR_CAS(S, M, T, C, I) \ 295 CK_CC_INLINE static bool \ 296 ck_pr_cas_##S(M *target, T compare, T set) \ 297 { \ 298 bool z; \ 299 __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %2, %0; setz %1" \ 300 : "+m" (*(C *)target), \ 301 "=a" (z) \ 302 : "q" (set), \ 303 "a" (compare) \ 304 : "memory", "cc"); \ 305 return z; \ 306 } 307 308 CK_PR_CAS(ptr, void, void *, char, "cmpxchgl") 309 310 #define CK_PR_CAS_S(S, T, I) CK_PR_CAS(S, T, T, T, I) 311 312 CK_PR_CAS_S(char, char, "cmpxchgb") 313 CK_PR_CAS_S(int, int, "cmpxchgl") 314 CK_PR_CAS_S(uint, unsigned int, "cmpxchgl") 315 CK_PR_CAS_S(32, uint32_t, "cmpxchgl") 316 CK_PR_CAS_S(16, uint16_t, "cmpxchgw") 317 CK_PR_CAS_S(8, uint8_t, "cmpxchgb") 318 319 #undef CK_PR_CAS_S 320 #undef CK_PR_CAS 321 322 /* 323 * Compare and swap, set *v to old value of target. 324 */ 325 #define CK_PR_CAS_O(S, M, T, C, I, R) \ 326 CK_CC_INLINE static bool \ 327 ck_pr_cas_##S##_value(M *target, T compare, T set, M *v) \ 328 { \ 329 bool z; \ 330 __asm__ __volatile__(CK_PR_LOCK_PREFIX "cmpxchg" I " %3, %0;" \ 331 "mov %% " R ", %2;" \ 332 "setz %1;" \ 333 : "+m" (*(C *)target), \ 334 "=a" (z), \ 335 "=m" (*(C *)v) \ 336 : "q" (set), \ 337 "a" (compare) \ 338 : "memory", "cc"); \ 339 return (bool)z; \ 340 } 341 342 CK_PR_CAS_O(ptr, void, void *, char, "l", "eax") 343 344 #define CK_PR_CAS_O_S(S, T, I, R) \ 345 CK_PR_CAS_O(S, T, T, T, I, R) 346 347 CK_PR_CAS_O_S(char, char, "b", "al") 348 CK_PR_CAS_O_S(int, int, "l", "eax") 349 CK_PR_CAS_O_S(uint, unsigned int, "l", "eax") 350 CK_PR_CAS_O_S(32, uint32_t, "l", "eax") 351 CK_PR_CAS_O_S(16, uint16_t, "w", "ax") 352 CK_PR_CAS_O_S(8, uint8_t, "b", "al") 353 354 #undef CK_PR_CAS_O_S 355 #undef CK_PR_CAS_O 356 357 /* 358 * Atomic bit test operations. 359 */ 360 #define CK_PR_BT(K, S, T, P, C, I) \ 361 CK_CC_INLINE static bool \ 362 ck_pr_##K##_##S(T *target, unsigned int b) \ 363 { \ 364 bool c; \ 365 __asm__ __volatile__(CK_PR_LOCK_PREFIX I "; setc %1" \ 366 : "+m" (*(C *)target), \ 367 "=q" (c) \ 368 : "q" ((P)b) \ 369 : "memory", "cc"); \ 370 return (bool)c; \ 371 } 372 373 #define CK_PR_BT_S(K, S, T, I) CK_PR_BT(K, S, T, T, T, I) 374 375 #define CK_PR_GENERATE(K) \ 376 CK_PR_BT(K, ptr, void, uint32_t, char, #K "l %2, %0") \ 377 CK_PR_BT_S(K, uint, unsigned int, #K "l %2, %0") \ 378 CK_PR_BT_S(K, int, int, #K "l %2, %0") \ 379 CK_PR_BT_S(K, 32, uint32_t, #K "l %2, %0") \ 380 CK_PR_BT_S(K, 16, uint16_t, #K "w %w2, %0") 381 382 CK_PR_GENERATE(btc) 383 CK_PR_GENERATE(bts) 384 CK_PR_GENERATE(btr) 385 386 #undef CK_PR_GENERATE 387 #undef CK_PR_BT 388 389 #endif /* CK_PR_X86_H */ 390 391