11fb62fb0SOlivier Houchard /*
21fb62fb0SOlivier Houchard * Copyright 2009-2015 Samy Al Bahra.
31fb62fb0SOlivier Houchard * All rights reserved.
41fb62fb0SOlivier Houchard *
51fb62fb0SOlivier Houchard * Redistribution and use in source and binary forms, with or without
61fb62fb0SOlivier Houchard * modification, are permitted provided that the following conditions
71fb62fb0SOlivier Houchard * are met:
81fb62fb0SOlivier Houchard * 1. Redistributions of source code must retain the above copyright
91fb62fb0SOlivier Houchard * notice, this list of conditions and the following disclaimer.
101fb62fb0SOlivier Houchard * 2. Redistributions in binary form must reproduce the above copyright
111fb62fb0SOlivier Houchard * notice, this list of conditions and the following disclaimer in the
121fb62fb0SOlivier Houchard * documentation and/or other materials provided with the distribution.
131fb62fb0SOlivier Houchard *
141fb62fb0SOlivier Houchard * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
151fb62fb0SOlivier Houchard * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
161fb62fb0SOlivier Houchard * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
171fb62fb0SOlivier Houchard * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
181fb62fb0SOlivier Houchard * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
191fb62fb0SOlivier Houchard * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
201fb62fb0SOlivier Houchard * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
211fb62fb0SOlivier Houchard * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
221fb62fb0SOlivier Houchard * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
231fb62fb0SOlivier Houchard * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
241fb62fb0SOlivier Houchard * SUCH DAMAGE.
251fb62fb0SOlivier Houchard */
261fb62fb0SOlivier Houchard
271fb62fb0SOlivier Houchard #ifndef CK_PR_X86_64_H
281fb62fb0SOlivier Houchard #define CK_PR_X86_64_H
291fb62fb0SOlivier Houchard
301fb62fb0SOlivier Houchard #ifndef CK_PR_H
311fb62fb0SOlivier Houchard #error Do not include this file directly, use ck_pr.h
321fb62fb0SOlivier Houchard #endif
331fb62fb0SOlivier Houchard
341fb62fb0SOlivier Houchard #include <ck_cc.h>
351fb62fb0SOlivier Houchard #include <ck_md.h>
361fb62fb0SOlivier Houchard #include <ck_stdint.h>
371fb62fb0SOlivier Houchard
381fb62fb0SOlivier Houchard /*
391fb62fb0SOlivier Houchard * The following represent supported atomic operations.
401fb62fb0SOlivier Houchard * These operations may be emulated.
411fb62fb0SOlivier Houchard */
421fb62fb0SOlivier Houchard #include "ck_f_pr.h"
431fb62fb0SOlivier Houchard
441fb62fb0SOlivier Houchard /*
451fb62fb0SOlivier Houchard * Support for TSX extensions.
461fb62fb0SOlivier Houchard */
471fb62fb0SOlivier Houchard #ifdef CK_MD_RTM_ENABLE
481fb62fb0SOlivier Houchard #include "ck_pr_rtm.h"
491fb62fb0SOlivier Houchard #endif
501fb62fb0SOlivier Houchard
511fb62fb0SOlivier Houchard /* Minimum requirements for the CK_PR interface are met. */
521fb62fb0SOlivier Houchard #define CK_F_PR
531fb62fb0SOlivier Houchard
541fb62fb0SOlivier Houchard #ifdef CK_MD_UMP
551fb62fb0SOlivier Houchard #define CK_PR_LOCK_PREFIX
561fb62fb0SOlivier Houchard #else
571fb62fb0SOlivier Houchard #define CK_PR_LOCK_PREFIX "lock "
581fb62fb0SOlivier Houchard #endif
591fb62fb0SOlivier Houchard
601fb62fb0SOlivier Houchard /*
61271ce402SOlivier Houchard * Prevent speculative execution in busy-wait loops (P4 <=) or "predefined
62271ce402SOlivier Houchard * delay".
631fb62fb0SOlivier Houchard */
641fb62fb0SOlivier Houchard CK_CC_INLINE static void
ck_pr_stall(void)651fb62fb0SOlivier Houchard ck_pr_stall(void)
661fb62fb0SOlivier Houchard {
671fb62fb0SOlivier Houchard __asm__ __volatile__("pause" ::: "memory");
681fb62fb0SOlivier Houchard return;
691fb62fb0SOlivier Houchard }
701fb62fb0SOlivier Houchard
711fb62fb0SOlivier Houchard #define CK_PR_FENCE(T, I) \
721fb62fb0SOlivier Houchard CK_CC_INLINE static void \
731fb62fb0SOlivier Houchard ck_pr_fence_strict_##T(void) \
741fb62fb0SOlivier Houchard { \
751fb62fb0SOlivier Houchard __asm__ __volatile__(I ::: "memory"); \
761fb62fb0SOlivier Houchard }
771fb62fb0SOlivier Houchard
78271ce402SOlivier Houchard /* Atomic operations are always serializing. */
79271ce402SOlivier Houchard CK_PR_FENCE(atomic, "")
80271ce402SOlivier Houchard CK_PR_FENCE(atomic_store, "")
81271ce402SOlivier Houchard CK_PR_FENCE(atomic_load, "")
82271ce402SOlivier Houchard CK_PR_FENCE(store_atomic, "")
83271ce402SOlivier Houchard CK_PR_FENCE(load_atomic, "")
84271ce402SOlivier Houchard
85271ce402SOlivier Houchard /* Traditional fence interface. */
861fb62fb0SOlivier Houchard CK_PR_FENCE(load, "lfence")
871fb62fb0SOlivier Houchard CK_PR_FENCE(load_store, "mfence")
881fb62fb0SOlivier Houchard CK_PR_FENCE(store, "sfence")
891fb62fb0SOlivier Houchard CK_PR_FENCE(store_load, "mfence")
901fb62fb0SOlivier Houchard CK_PR_FENCE(memory, "mfence")
91271ce402SOlivier Houchard
92271ce402SOlivier Houchard /* Below are stdatomic-style fences. */
93271ce402SOlivier Houchard
94271ce402SOlivier Houchard /*
95271ce402SOlivier Houchard * Provides load-store and store-store ordering. However, Intel specifies that
96271ce402SOlivier Houchard * the WC memory model is relaxed. It is likely an sfence *is* sufficient (in
97271ce402SOlivier Houchard * particular, stores are not re-ordered with respect to prior loads and it is
98271ce402SOlivier Houchard * really just the stores that are subject to re-ordering). However, we take
99271ce402SOlivier Houchard * the conservative route as the manuals are too ambiguous for my taste.
100271ce402SOlivier Houchard */
1011fb62fb0SOlivier Houchard CK_PR_FENCE(release, "mfence")
102271ce402SOlivier Houchard
103271ce402SOlivier Houchard /*
104271ce402SOlivier Houchard * Provides load-load and load-store ordering. The lfence instruction ensures
105271ce402SOlivier Houchard * all prior load operations are complete before any subsequent instructions
106271ce402SOlivier Houchard * actually begin execution. However, the manual also ends up going to describe
107271ce402SOlivier Houchard * WC memory as a relaxed model.
108271ce402SOlivier Houchard */
1091fb62fb0SOlivier Houchard CK_PR_FENCE(acquire, "mfence")
110271ce402SOlivier Houchard
1111fb62fb0SOlivier Houchard CK_PR_FENCE(acqrel, "mfence")
1121fb62fb0SOlivier Houchard CK_PR_FENCE(lock, "mfence")
1131fb62fb0SOlivier Houchard CK_PR_FENCE(unlock, "mfence")
1141fb62fb0SOlivier Houchard
1151fb62fb0SOlivier Houchard #undef CK_PR_FENCE
1161fb62fb0SOlivier Houchard
1171fb62fb0SOlivier Houchard /*
1181fb62fb0SOlivier Houchard * Read for ownership. Older compilers will generate the 32-bit
1191fb62fb0SOlivier Houchard * 3DNow! variant which is binary compatible with x86-64 variant
1201fb62fb0SOlivier Houchard * of prefetchw.
1211fb62fb0SOlivier Houchard */
1221fb62fb0SOlivier Houchard #ifndef CK_F_PR_RFO
1231fb62fb0SOlivier Houchard #define CK_F_PR_RFO
1241fb62fb0SOlivier Houchard CK_CC_INLINE static void
ck_pr_rfo(const void * m)1251fb62fb0SOlivier Houchard ck_pr_rfo(const void *m)
1261fb62fb0SOlivier Houchard {
1271fb62fb0SOlivier Houchard
1281fb62fb0SOlivier Houchard __asm__ __volatile__("prefetchw (%0)"
1291fb62fb0SOlivier Houchard :
1301fb62fb0SOlivier Houchard : "r" (m)
1311fb62fb0SOlivier Houchard : "memory");
1321fb62fb0SOlivier Houchard
1331fb62fb0SOlivier Houchard return;
1341fb62fb0SOlivier Houchard }
1351fb62fb0SOlivier Houchard #endif /* CK_F_PR_RFO */
1361fb62fb0SOlivier Houchard
1371fb62fb0SOlivier Houchard /*
1381fb62fb0SOlivier Houchard * Atomic fetch-and-store operations.
1391fb62fb0SOlivier Houchard */
1401fb62fb0SOlivier Houchard #define CK_PR_FAS(S, M, T, C, I) \
1411fb62fb0SOlivier Houchard CK_CC_INLINE static T \
1421fb62fb0SOlivier Houchard ck_pr_fas_##S(M *target, T v) \
1431fb62fb0SOlivier Houchard { \
1441fb62fb0SOlivier Houchard __asm__ __volatile__(I " %0, %1" \
1451fb62fb0SOlivier Houchard : "+m" (*(C *)target), \
1461fb62fb0SOlivier Houchard "+q" (v) \
1471fb62fb0SOlivier Houchard : \
1481fb62fb0SOlivier Houchard : "memory"); \
1491fb62fb0SOlivier Houchard return v; \
1501fb62fb0SOlivier Houchard }
1511fb62fb0SOlivier Houchard
152d75884dfSMark Johnston CK_PR_FAS(ptr, void, void *, uint64_t, "xchgq")
1531fb62fb0SOlivier Houchard
1541fb62fb0SOlivier Houchard #define CK_PR_FAS_S(S, T, I) CK_PR_FAS(S, T, T, T, I)
1551fb62fb0SOlivier Houchard
1568f87df16SOlivier Houchard #ifndef CK_PR_DISABLE_DOUBLE
1571fb62fb0SOlivier Houchard CK_PR_FAS_S(double, double, "xchgq")
1588f87df16SOlivier Houchard #endif
1591fb62fb0SOlivier Houchard CK_PR_FAS_S(char, char, "xchgb")
1601fb62fb0SOlivier Houchard CK_PR_FAS_S(uint, unsigned int, "xchgl")
1611fb62fb0SOlivier Houchard CK_PR_FAS_S(int, int, "xchgl")
1621fb62fb0SOlivier Houchard CK_PR_FAS_S(64, uint64_t, "xchgq")
1631fb62fb0SOlivier Houchard CK_PR_FAS_S(32, uint32_t, "xchgl")
1641fb62fb0SOlivier Houchard CK_PR_FAS_S(16, uint16_t, "xchgw")
1651fb62fb0SOlivier Houchard CK_PR_FAS_S(8, uint8_t, "xchgb")
1661fb62fb0SOlivier Houchard
1671fb62fb0SOlivier Houchard #undef CK_PR_FAS_S
1681fb62fb0SOlivier Houchard #undef CK_PR_FAS
1691fb62fb0SOlivier Houchard
1701fb62fb0SOlivier Houchard /*
1711fb62fb0SOlivier Houchard * Atomic load-from-memory operations.
1721fb62fb0SOlivier Houchard */
1731fb62fb0SOlivier Houchard #define CK_PR_LOAD(S, M, T, C, I) \
1741fb62fb0SOlivier Houchard CK_CC_INLINE static T \
1751fb62fb0SOlivier Houchard ck_pr_md_load_##S(const M *target) \
1761fb62fb0SOlivier Houchard { \
1771fb62fb0SOlivier Houchard T r; \
1781fb62fb0SOlivier Houchard __asm__ __volatile__(I " %1, %0" \
1791fb62fb0SOlivier Houchard : "=q" (r) \
1801fb62fb0SOlivier Houchard : "m" (*(const C *)target) \
1811fb62fb0SOlivier Houchard : "memory"); \
1821fb62fb0SOlivier Houchard return (r); \
1831fb62fb0SOlivier Houchard }
1841fb62fb0SOlivier Houchard
185d75884dfSMark Johnston CK_PR_LOAD(ptr, void, void *, uint64_t, "movq")
1861fb62fb0SOlivier Houchard
1871fb62fb0SOlivier Houchard #define CK_PR_LOAD_S(S, T, I) CK_PR_LOAD(S, T, T, T, I)
1881fb62fb0SOlivier Houchard
1891fb62fb0SOlivier Houchard CK_PR_LOAD_S(char, char, "movb")
1901fb62fb0SOlivier Houchard CK_PR_LOAD_S(uint, unsigned int, "movl")
1911fb62fb0SOlivier Houchard CK_PR_LOAD_S(int, int, "movl")
1928f87df16SOlivier Houchard #ifndef CK_PR_DISABLE_DOUBLE
1931fb62fb0SOlivier Houchard CK_PR_LOAD_S(double, double, "movq")
1948f87df16SOlivier Houchard #endif
1951fb62fb0SOlivier Houchard CK_PR_LOAD_S(64, uint64_t, "movq")
1961fb62fb0SOlivier Houchard CK_PR_LOAD_S(32, uint32_t, "movl")
1971fb62fb0SOlivier Houchard CK_PR_LOAD_S(16, uint16_t, "movw")
1981fb62fb0SOlivier Houchard CK_PR_LOAD_S(8, uint8_t, "movb")
1991fb62fb0SOlivier Houchard
2001fb62fb0SOlivier Houchard #undef CK_PR_LOAD_S
2011fb62fb0SOlivier Houchard #undef CK_PR_LOAD
2021fb62fb0SOlivier Houchard
2031fb62fb0SOlivier Houchard CK_CC_INLINE static void
ck_pr_load_64_2(const uint64_t target[2],uint64_t v[2])2041fb62fb0SOlivier Houchard ck_pr_load_64_2(const uint64_t target[2], uint64_t v[2])
2051fb62fb0SOlivier Houchard {
2061fb62fb0SOlivier Houchard __asm__ __volatile__("movq %%rdx, %%rcx;"
2071fb62fb0SOlivier Houchard "movq %%rax, %%rbx;"
2081fb62fb0SOlivier Houchard CK_PR_LOCK_PREFIX "cmpxchg16b %2;"
2091fb62fb0SOlivier Houchard : "=a" (v[0]),
2101fb62fb0SOlivier Houchard "=d" (v[1])
2111fb62fb0SOlivier Houchard : "m" (*(const uint64_t *)target)
2121fb62fb0SOlivier Houchard : "rbx", "rcx", "memory", "cc");
2131fb62fb0SOlivier Houchard return;
2141fb62fb0SOlivier Houchard }
2151fb62fb0SOlivier Houchard
2161fb62fb0SOlivier Houchard CK_CC_INLINE static void
ck_pr_load_ptr_2(const void * t,void * v)2171fb62fb0SOlivier Houchard ck_pr_load_ptr_2(const void *t, void *v)
2181fb62fb0SOlivier Houchard {
2191fb62fb0SOlivier Houchard ck_pr_load_64_2(CK_CPP_CAST(const uint64_t *, t),
2201fb62fb0SOlivier Houchard CK_CPP_CAST(uint64_t *, v));
2211fb62fb0SOlivier Houchard return;
2221fb62fb0SOlivier Houchard }
2231fb62fb0SOlivier Houchard
2241fb62fb0SOlivier Houchard #define CK_PR_LOAD_2(S, W, T) \
2251fb62fb0SOlivier Houchard CK_CC_INLINE static void \
2261fb62fb0SOlivier Houchard ck_pr_md_load_##S##_##W(const T t[2], T v[2]) \
2271fb62fb0SOlivier Houchard { \
2281fb62fb0SOlivier Houchard ck_pr_load_64_2((const uint64_t *)(const void *)t, \
2291fb62fb0SOlivier Houchard (uint64_t *)(void *)v); \
2301fb62fb0SOlivier Houchard return; \
2311fb62fb0SOlivier Houchard }
2321fb62fb0SOlivier Houchard
2331fb62fb0SOlivier Houchard CK_PR_LOAD_2(char, 16, char)
2341fb62fb0SOlivier Houchard CK_PR_LOAD_2(int, 4, int)
2351fb62fb0SOlivier Houchard CK_PR_LOAD_2(uint, 4, unsigned int)
2361fb62fb0SOlivier Houchard CK_PR_LOAD_2(32, 4, uint32_t)
2371fb62fb0SOlivier Houchard CK_PR_LOAD_2(16, 8, uint16_t)
2381fb62fb0SOlivier Houchard CK_PR_LOAD_2(8, 16, uint8_t)
2391fb62fb0SOlivier Houchard
2401fb62fb0SOlivier Houchard #undef CK_PR_LOAD_2
2411fb62fb0SOlivier Houchard
2421fb62fb0SOlivier Houchard /*
2431fb62fb0SOlivier Houchard * Atomic store-to-memory operations.
2441fb62fb0SOlivier Houchard */
2451fb62fb0SOlivier Houchard #define CK_PR_STORE_IMM(S, M, T, C, I, K) \
2461fb62fb0SOlivier Houchard CK_CC_INLINE static void \
2471fb62fb0SOlivier Houchard ck_pr_md_store_##S(M *target, T v) \
2481fb62fb0SOlivier Houchard { \
2491fb62fb0SOlivier Houchard __asm__ __volatile__(I " %1, %0" \
2501fb62fb0SOlivier Houchard : "=m" (*(C *)target) \
2511fb62fb0SOlivier Houchard : K "q" (v) \
2521fb62fb0SOlivier Houchard : "memory"); \
2531fb62fb0SOlivier Houchard return; \
2541fb62fb0SOlivier Houchard }
2551fb62fb0SOlivier Houchard
2561fb62fb0SOlivier Houchard #define CK_PR_STORE(S, M, T, C, I) \
2571fb62fb0SOlivier Houchard CK_CC_INLINE static void \
2581fb62fb0SOlivier Houchard ck_pr_md_store_##S(M *target, T v) \
2591fb62fb0SOlivier Houchard { \
2601fb62fb0SOlivier Houchard __asm__ __volatile__(I " %1, %0" \
2611fb62fb0SOlivier Houchard : "=m" (*(C *)target) \
2621fb62fb0SOlivier Houchard : "q" (v) \
2631fb62fb0SOlivier Houchard : "memory"); \
2641fb62fb0SOlivier Houchard return; \
2651fb62fb0SOlivier Houchard }
2661fb62fb0SOlivier Houchard
267d75884dfSMark Johnston CK_PR_STORE_IMM(ptr, void, const void *, uint64_t, "movq", CK_CC_IMM_U32)
2688f87df16SOlivier Houchard #ifndef CK_PR_DISABLE_DOUBLE
2691fb62fb0SOlivier Houchard CK_PR_STORE(double, double, double, double, "movq")
2708f87df16SOlivier Houchard #endif
2711fb62fb0SOlivier Houchard
2721fb62fb0SOlivier Houchard #define CK_PR_STORE_S(S, T, I, K) CK_PR_STORE_IMM(S, T, T, T, I, K)
2731fb62fb0SOlivier Houchard
2741fb62fb0SOlivier Houchard CK_PR_STORE_S(char, char, "movb", CK_CC_IMM_S32)
2751fb62fb0SOlivier Houchard CK_PR_STORE_S(int, int, "movl", CK_CC_IMM_S32)
2761fb62fb0SOlivier Houchard CK_PR_STORE_S(uint, unsigned int, "movl", CK_CC_IMM_U32)
2771fb62fb0SOlivier Houchard CK_PR_STORE_S(64, uint64_t, "movq", CK_CC_IMM_U32)
2781fb62fb0SOlivier Houchard CK_PR_STORE_S(32, uint32_t, "movl", CK_CC_IMM_U32)
2791fb62fb0SOlivier Houchard CK_PR_STORE_S(16, uint16_t, "movw", CK_CC_IMM_U32)
2801fb62fb0SOlivier Houchard CK_PR_STORE_S(8, uint8_t, "movb", CK_CC_IMM_U32)
2811fb62fb0SOlivier Houchard
2821fb62fb0SOlivier Houchard #undef CK_PR_STORE_S
2831fb62fb0SOlivier Houchard #undef CK_PR_STORE_IMM
2841fb62fb0SOlivier Houchard #undef CK_PR_STORE
2851fb62fb0SOlivier Houchard
2861fb62fb0SOlivier Houchard /*
2871fb62fb0SOlivier Houchard * Atomic fetch-and-add operations.
2881fb62fb0SOlivier Houchard */
2891fb62fb0SOlivier Houchard #define CK_PR_FAA(S, M, T, C, I) \
2901fb62fb0SOlivier Houchard CK_CC_INLINE static T \
2911fb62fb0SOlivier Houchard ck_pr_faa_##S(M *target, T d) \
2921fb62fb0SOlivier Houchard { \
2931fb62fb0SOlivier Houchard __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %1, %0" \
2941fb62fb0SOlivier Houchard : "+m" (*(C *)target), \
2951fb62fb0SOlivier Houchard "+q" (d) \
2961fb62fb0SOlivier Houchard : \
2971fb62fb0SOlivier Houchard : "memory", "cc"); \
2981fb62fb0SOlivier Houchard return (d); \
2991fb62fb0SOlivier Houchard }
3001fb62fb0SOlivier Houchard
301d75884dfSMark Johnston CK_PR_FAA(ptr, void, uintptr_t, uint64_t, "xaddq")
3021fb62fb0SOlivier Houchard
3031fb62fb0SOlivier Houchard #define CK_PR_FAA_S(S, T, I) CK_PR_FAA(S, T, T, T, I)
3041fb62fb0SOlivier Houchard
3051fb62fb0SOlivier Houchard CK_PR_FAA_S(char, char, "xaddb")
3061fb62fb0SOlivier Houchard CK_PR_FAA_S(uint, unsigned int, "xaddl")
3071fb62fb0SOlivier Houchard CK_PR_FAA_S(int, int, "xaddl")
3081fb62fb0SOlivier Houchard CK_PR_FAA_S(64, uint64_t, "xaddq")
3091fb62fb0SOlivier Houchard CK_PR_FAA_S(32, uint32_t, "xaddl")
3101fb62fb0SOlivier Houchard CK_PR_FAA_S(16, uint16_t, "xaddw")
3111fb62fb0SOlivier Houchard CK_PR_FAA_S(8, uint8_t, "xaddb")
3121fb62fb0SOlivier Houchard
3131fb62fb0SOlivier Houchard #undef CK_PR_FAA_S
3141fb62fb0SOlivier Houchard #undef CK_PR_FAA
3151fb62fb0SOlivier Houchard
3161fb62fb0SOlivier Houchard /*
3171fb62fb0SOlivier Houchard * Atomic store-only unary operations.
3181fb62fb0SOlivier Houchard */
3191fb62fb0SOlivier Houchard #define CK_PR_UNARY(K, S, T, C, I) \
3201fb62fb0SOlivier Houchard CK_PR_UNARY_R(K, S, T, C, I) \
3211fb62fb0SOlivier Houchard CK_PR_UNARY_V(K, S, T, C, I)
3221fb62fb0SOlivier Houchard
3231fb62fb0SOlivier Houchard #define CK_PR_UNARY_R(K, S, T, C, I) \
3241fb62fb0SOlivier Houchard CK_CC_INLINE static void \
3251fb62fb0SOlivier Houchard ck_pr_##K##_##S(T *target) \
3261fb62fb0SOlivier Houchard { \
3271fb62fb0SOlivier Houchard __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %0" \
3281fb62fb0SOlivier Houchard : "+m" (*(C *)target) \
3291fb62fb0SOlivier Houchard : \
3301fb62fb0SOlivier Houchard : "memory", "cc"); \
3311fb62fb0SOlivier Houchard return; \
3321fb62fb0SOlivier Houchard }
3331fb62fb0SOlivier Houchard
3341fb62fb0SOlivier Houchard #define CK_PR_UNARY_V(K, S, T, C, I) \
335725de581SAndriy Gapon CK_CC_INLINE static bool \
336725de581SAndriy Gapon ck_pr_##K##_##S##_is_zero(T *target) \
3371fb62fb0SOlivier Houchard { \
338725de581SAndriy Gapon bool ret; \
3391fb62fb0SOlivier Houchard __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %0; setz %1" \
3401fb62fb0SOlivier Houchard : "+m" (*(C *)target), \
341725de581SAndriy Gapon "=rm" (ret) \
3421fb62fb0SOlivier Houchard : \
3431fb62fb0SOlivier Houchard : "memory", "cc"); \
344725de581SAndriy Gapon return ret; \
3451fb62fb0SOlivier Houchard }
3461fb62fb0SOlivier Houchard
3471fb62fb0SOlivier Houchard #define CK_PR_UNARY_S(K, S, T, I) CK_PR_UNARY(K, S, T, T, I)
3481fb62fb0SOlivier Houchard
3491fb62fb0SOlivier Houchard #define CK_PR_GENERATE(K) \
350d75884dfSMark Johnston CK_PR_UNARY(K, ptr, void, uint64_t, #K "q") \
3511fb62fb0SOlivier Houchard CK_PR_UNARY_S(K, char, char, #K "b") \
3521fb62fb0SOlivier Houchard CK_PR_UNARY_S(K, int, int, #K "l") \
3531fb62fb0SOlivier Houchard CK_PR_UNARY_S(K, uint, unsigned int, #K "l") \
3541fb62fb0SOlivier Houchard CK_PR_UNARY_S(K, 64, uint64_t, #K "q") \
3551fb62fb0SOlivier Houchard CK_PR_UNARY_S(K, 32, uint32_t, #K "l") \
3561fb62fb0SOlivier Houchard CK_PR_UNARY_S(K, 16, uint16_t, #K "w") \
3571fb62fb0SOlivier Houchard CK_PR_UNARY_S(K, 8, uint8_t, #K "b")
3581fb62fb0SOlivier Houchard
CK_PR_GENERATE(inc)3591fb62fb0SOlivier Houchard CK_PR_GENERATE(inc)
3601fb62fb0SOlivier Houchard CK_PR_GENERATE(dec)
3611fb62fb0SOlivier Houchard CK_PR_GENERATE(neg)
3621fb62fb0SOlivier Houchard
3631fb62fb0SOlivier Houchard /* not does not affect condition flags. */
3641fb62fb0SOlivier Houchard #undef CK_PR_UNARY_V
3651fb62fb0SOlivier Houchard #define CK_PR_UNARY_V(a, b, c, d, e)
3661fb62fb0SOlivier Houchard CK_PR_GENERATE(not)
3671fb62fb0SOlivier Houchard
3681fb62fb0SOlivier Houchard #undef CK_PR_GENERATE
3691fb62fb0SOlivier Houchard #undef CK_PR_UNARY_S
3701fb62fb0SOlivier Houchard #undef CK_PR_UNARY_V
3711fb62fb0SOlivier Houchard #undef CK_PR_UNARY_R
3721fb62fb0SOlivier Houchard #undef CK_PR_UNARY
3731fb62fb0SOlivier Houchard
3741fb62fb0SOlivier Houchard /*
3751fb62fb0SOlivier Houchard * Atomic store-only binary operations.
3761fb62fb0SOlivier Houchard */
3771fb62fb0SOlivier Houchard #define CK_PR_BINARY(K, S, M, T, C, I, O) \
3781fb62fb0SOlivier Houchard CK_CC_INLINE static void \
3791fb62fb0SOlivier Houchard ck_pr_##K##_##S(M *target, T d) \
3801fb62fb0SOlivier Houchard { \
3811fb62fb0SOlivier Houchard __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %1, %0" \
3821fb62fb0SOlivier Houchard : "+m" (*(C *)target) \
3831fb62fb0SOlivier Houchard : O "q" (d) \
3841fb62fb0SOlivier Houchard : "memory", "cc"); \
3851fb62fb0SOlivier Houchard return; \
3861fb62fb0SOlivier Houchard }
3871fb62fb0SOlivier Houchard
3881fb62fb0SOlivier Houchard #define CK_PR_BINARY_S(K, S, T, I, O) CK_PR_BINARY(K, S, T, T, T, I, O)
3891fb62fb0SOlivier Houchard
3901fb62fb0SOlivier Houchard #define CK_PR_GENERATE(K) \
391d75884dfSMark Johnston CK_PR_BINARY(K, ptr, void, uintptr_t, uint64_t, #K "q", CK_CC_IMM_U32) \
3921fb62fb0SOlivier Houchard CK_PR_BINARY_S(K, char, char, #K "b", CK_CC_IMM_S32) \
3931fb62fb0SOlivier Houchard CK_PR_BINARY_S(K, int, int, #K "l", CK_CC_IMM_S32) \
3941fb62fb0SOlivier Houchard CK_PR_BINARY_S(K, uint, unsigned int, #K "l", CK_CC_IMM_U32) \
3951fb62fb0SOlivier Houchard CK_PR_BINARY_S(K, 64, uint64_t, #K "q", CK_CC_IMM_U32) \
3961fb62fb0SOlivier Houchard CK_PR_BINARY_S(K, 32, uint32_t, #K "l", CK_CC_IMM_U32) \
3971fb62fb0SOlivier Houchard CK_PR_BINARY_S(K, 16, uint16_t, #K "w", CK_CC_IMM_U32) \
3981fb62fb0SOlivier Houchard CK_PR_BINARY_S(K, 8, uint8_t, #K "b", CK_CC_IMM_U32)
3991fb62fb0SOlivier Houchard
4001fb62fb0SOlivier Houchard CK_PR_GENERATE(add)
4011fb62fb0SOlivier Houchard CK_PR_GENERATE(sub)
4021fb62fb0SOlivier Houchard CK_PR_GENERATE(and)
4031fb62fb0SOlivier Houchard CK_PR_GENERATE(or)
4041fb62fb0SOlivier Houchard CK_PR_GENERATE(xor)
4051fb62fb0SOlivier Houchard
4061fb62fb0SOlivier Houchard #undef CK_PR_GENERATE
4071fb62fb0SOlivier Houchard #undef CK_PR_BINARY_S
4081fb62fb0SOlivier Houchard #undef CK_PR_BINARY
4091fb62fb0SOlivier Houchard
4101fb62fb0SOlivier Houchard /*
411*74e9b5f2SOlivier Houchard * Atomic compare and swap, with a variant that sets *v to the old value of target.
4121fb62fb0SOlivier Houchard */
413*74e9b5f2SOlivier Houchard #ifdef __GCC_ASM_FLAG_OUTPUTS__
414*74e9b5f2SOlivier Houchard #define CK_PR_CAS(S, M, T, C, I) \
415*74e9b5f2SOlivier Houchard CK_CC_INLINE static bool \
416*74e9b5f2SOlivier Houchard ck_pr_cas_##S(M *target, T compare, T set) \
417*74e9b5f2SOlivier Houchard { \
418*74e9b5f2SOlivier Houchard bool z; \
419*74e9b5f2SOlivier Houchard __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %3, %0" \
420*74e9b5f2SOlivier Houchard : "+m" (*(C *)target), \
421*74e9b5f2SOlivier Houchard "=@ccz" (z), \
422*74e9b5f2SOlivier Houchard /* RAX is clobbered by cmpxchg. */ \
423*74e9b5f2SOlivier Houchard "+a" (compare) \
424*74e9b5f2SOlivier Houchard : "q" (set) \
425*74e9b5f2SOlivier Houchard : "memory", "cc"); \
426*74e9b5f2SOlivier Houchard return z; \
427*74e9b5f2SOlivier Houchard } \
428*74e9b5f2SOlivier Houchard \
429*74e9b5f2SOlivier Houchard CK_CC_INLINE static bool \
430*74e9b5f2SOlivier Houchard ck_pr_cas_##S##_value(M *target, T compare, T set, M *v) \
431*74e9b5f2SOlivier Houchard { \
432*74e9b5f2SOlivier Houchard bool z; \
433*74e9b5f2SOlivier Houchard __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %3, %0;" \
434*74e9b5f2SOlivier Houchard : "+m" (*(C *)target), \
435*74e9b5f2SOlivier Houchard "=@ccz" (z), \
436*74e9b5f2SOlivier Houchard "+a" (compare) \
437*74e9b5f2SOlivier Houchard : "q" (set) \
438*74e9b5f2SOlivier Houchard : "memory", "cc"); \
439*74e9b5f2SOlivier Houchard *(T *)v = compare; \
440*74e9b5f2SOlivier Houchard return z; \
441*74e9b5f2SOlivier Houchard }
442*74e9b5f2SOlivier Houchard #else
4431fb62fb0SOlivier Houchard #define CK_PR_CAS(S, M, T, C, I) \
4441fb62fb0SOlivier Houchard CK_CC_INLINE static bool \
4451fb62fb0SOlivier Houchard ck_pr_cas_##S(M *target, T compare, T set) \
4461fb62fb0SOlivier Houchard { \
4471fb62fb0SOlivier Houchard bool z; \
4481fb62fb0SOlivier Houchard __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %2, %0; setz %1" \
4491fb62fb0SOlivier Houchard : "+m" (*(C *)target), \
4501fb62fb0SOlivier Houchard "=a" (z) \
4511fb62fb0SOlivier Houchard : "q" (set), \
4521fb62fb0SOlivier Houchard "a" (compare) \
4531fb62fb0SOlivier Houchard : "memory", "cc"); \
4541fb62fb0SOlivier Houchard return z; \
455*74e9b5f2SOlivier Houchard } \
456*74e9b5f2SOlivier Houchard \
457*74e9b5f2SOlivier Houchard CK_CC_INLINE static bool \
458*74e9b5f2SOlivier Houchard ck_pr_cas_##S##_value(M *target, T compare, T set, M *v) \
459*74e9b5f2SOlivier Houchard { \
460*74e9b5f2SOlivier Houchard bool z; \
461*74e9b5f2SOlivier Houchard __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %3, %0;" \
462*74e9b5f2SOlivier Houchard "setz %1;" \
463*74e9b5f2SOlivier Houchard : "+m" (*(C *)target), \
464*74e9b5f2SOlivier Houchard "=q" (z), \
465*74e9b5f2SOlivier Houchard "+a" (compare) \
466*74e9b5f2SOlivier Houchard : "q" (set) \
467*74e9b5f2SOlivier Houchard : "memory", "cc"); \
468*74e9b5f2SOlivier Houchard *(T *)v = compare; \
469*74e9b5f2SOlivier Houchard return z; \
4701fb62fb0SOlivier Houchard }
471*74e9b5f2SOlivier Houchard #endif
4721fb62fb0SOlivier Houchard
473d75884dfSMark Johnston CK_PR_CAS(ptr, void, void *, uint64_t, "cmpxchgq")
4741fb62fb0SOlivier Houchard
4751fb62fb0SOlivier Houchard #define CK_PR_CAS_S(S, T, I) CK_PR_CAS(S, T, T, T, I)
4761fb62fb0SOlivier Houchard
4771fb62fb0SOlivier Houchard CK_PR_CAS_S(char, char, "cmpxchgb")
4781fb62fb0SOlivier Houchard CK_PR_CAS_S(int, int, "cmpxchgl")
4791fb62fb0SOlivier Houchard CK_PR_CAS_S(uint, unsigned int, "cmpxchgl")
4808f87df16SOlivier Houchard #ifndef CK_PR_DISABLE_DOUBLE
4811fb62fb0SOlivier Houchard CK_PR_CAS_S(double, double, "cmpxchgq")
4828f87df16SOlivier Houchard #endif
4831fb62fb0SOlivier Houchard CK_PR_CAS_S(64, uint64_t, "cmpxchgq")
4841fb62fb0SOlivier Houchard CK_PR_CAS_S(32, uint32_t, "cmpxchgl")
4851fb62fb0SOlivier Houchard CK_PR_CAS_S(16, uint16_t, "cmpxchgw")
4861fb62fb0SOlivier Houchard CK_PR_CAS_S(8, uint8_t, "cmpxchgb")
4871fb62fb0SOlivier Houchard
4881fb62fb0SOlivier Houchard #undef CK_PR_CAS_S
4891fb62fb0SOlivier Houchard #undef CK_PR_CAS
4901fb62fb0SOlivier Houchard
4911fb62fb0SOlivier Houchard /*
4921fb62fb0SOlivier Houchard * Contrary to C-interface, alignment requirements are that of uint64_t[2].
4931fb62fb0SOlivier Houchard */
4941fb62fb0SOlivier Houchard CK_CC_INLINE static bool
4951fb62fb0SOlivier Houchard ck_pr_cas_64_2(uint64_t target[2], uint64_t compare[2], uint64_t set[2])
4961fb62fb0SOlivier Houchard {
4971fb62fb0SOlivier Houchard bool z;
4981fb62fb0SOlivier Houchard
4991fb62fb0SOlivier Houchard __asm__ __volatile__("movq 0(%4), %%rax;"
5001fb62fb0SOlivier Houchard "movq 8(%4), %%rdx;"
5011fb62fb0SOlivier Houchard CK_PR_LOCK_PREFIX "cmpxchg16b %0; setz %1"
5021fb62fb0SOlivier Houchard : "+m" (*target),
5031fb62fb0SOlivier Houchard "=q" (z)
5041fb62fb0SOlivier Houchard : "b" (set[0]),
5051fb62fb0SOlivier Houchard "c" (set[1]),
5061fb62fb0SOlivier Houchard "q" (compare)
5071fb62fb0SOlivier Houchard : "memory", "cc", "%rax", "%rdx");
5081fb62fb0SOlivier Houchard return z;
5091fb62fb0SOlivier Houchard }
5101fb62fb0SOlivier Houchard
5111fb62fb0SOlivier Houchard CK_CC_INLINE static bool
ck_pr_cas_ptr_2(void * t,void * c,void * s)5121fb62fb0SOlivier Houchard ck_pr_cas_ptr_2(void *t, void *c, void *s)
5131fb62fb0SOlivier Houchard {
5141fb62fb0SOlivier Houchard return ck_pr_cas_64_2(CK_CPP_CAST(uint64_t *, t),
5151fb62fb0SOlivier Houchard CK_CPP_CAST(uint64_t *, c),
5161fb62fb0SOlivier Houchard CK_CPP_CAST(uint64_t *, s));
5171fb62fb0SOlivier Houchard }
5181fb62fb0SOlivier Houchard
5191fb62fb0SOlivier Houchard CK_CC_INLINE static bool
ck_pr_cas_64_2_value(uint64_t target[2],uint64_t compare[2],uint64_t set[2],uint64_t v[2])5201fb62fb0SOlivier Houchard ck_pr_cas_64_2_value(uint64_t target[2],
5211fb62fb0SOlivier Houchard uint64_t compare[2],
5221fb62fb0SOlivier Houchard uint64_t set[2],
5231fb62fb0SOlivier Houchard uint64_t v[2])
5241fb62fb0SOlivier Houchard {
5251fb62fb0SOlivier Houchard bool z;
5261fb62fb0SOlivier Houchard
5271fb62fb0SOlivier Houchard __asm__ __volatile__(CK_PR_LOCK_PREFIX "cmpxchg16b %0;"
5281fb62fb0SOlivier Houchard "setz %3"
5291fb62fb0SOlivier Houchard : "+m" (*target),
5301fb62fb0SOlivier Houchard "=a" (v[0]),
5311fb62fb0SOlivier Houchard "=d" (v[1]),
5321fb62fb0SOlivier Houchard "=q" (z)
5331fb62fb0SOlivier Houchard : "a" (compare[0]),
5341fb62fb0SOlivier Houchard "d" (compare[1]),
5351fb62fb0SOlivier Houchard "b" (set[0]),
5361fb62fb0SOlivier Houchard "c" (set[1])
5371fb62fb0SOlivier Houchard : "memory", "cc");
5381fb62fb0SOlivier Houchard return z;
5391fb62fb0SOlivier Houchard }
5401fb62fb0SOlivier Houchard
5411fb62fb0SOlivier Houchard CK_CC_INLINE static bool
ck_pr_cas_ptr_2_value(void * t,void * c,void * s,void * v)5421fb62fb0SOlivier Houchard ck_pr_cas_ptr_2_value(void *t, void *c, void *s, void *v)
5431fb62fb0SOlivier Houchard {
5441fb62fb0SOlivier Houchard return ck_pr_cas_64_2_value(CK_CPP_CAST(uint64_t *,t),
5451fb62fb0SOlivier Houchard CK_CPP_CAST(uint64_t *,c),
5461fb62fb0SOlivier Houchard CK_CPP_CAST(uint64_t *,s),
5471fb62fb0SOlivier Houchard CK_CPP_CAST(uint64_t *,v));
5481fb62fb0SOlivier Houchard }
5491fb62fb0SOlivier Houchard
5501fb62fb0SOlivier Houchard #define CK_PR_CAS_V(S, W, T) \
5511fb62fb0SOlivier Houchard CK_CC_INLINE static bool \
5521fb62fb0SOlivier Houchard ck_pr_cas_##S##_##W(T t[W], T c[W], T s[W]) \
5531fb62fb0SOlivier Houchard { \
5541fb62fb0SOlivier Houchard return ck_pr_cas_64_2((uint64_t *)(void *)t, \
5551fb62fb0SOlivier Houchard (uint64_t *)(void *)c, \
5561fb62fb0SOlivier Houchard (uint64_t *)(void *)s); \
5571fb62fb0SOlivier Houchard } \
5581fb62fb0SOlivier Houchard CK_CC_INLINE static bool \
5591fb62fb0SOlivier Houchard ck_pr_cas_##S##_##W##_value(T *t, T c[W], T s[W], T *v) \
5601fb62fb0SOlivier Houchard { \
5611fb62fb0SOlivier Houchard return ck_pr_cas_64_2_value((uint64_t *)(void *)t, \
5621fb62fb0SOlivier Houchard (uint64_t *)(void *)c, \
5631fb62fb0SOlivier Houchard (uint64_t *)(void *)s, \
5641fb62fb0SOlivier Houchard (uint64_t *)(void *)v); \
5651fb62fb0SOlivier Houchard }
5661fb62fb0SOlivier Houchard
5678f87df16SOlivier Houchard #ifndef CK_PR_DISABLE_DOUBLE
5681fb62fb0SOlivier Houchard CK_PR_CAS_V(double, 2, double)
5698f87df16SOlivier Houchard #endif
5701fb62fb0SOlivier Houchard CK_PR_CAS_V(char, 16, char)
5711fb62fb0SOlivier Houchard CK_PR_CAS_V(int, 4, int)
5721fb62fb0SOlivier Houchard CK_PR_CAS_V(uint, 4, unsigned int)
5731fb62fb0SOlivier Houchard CK_PR_CAS_V(32, 4, uint32_t)
5741fb62fb0SOlivier Houchard CK_PR_CAS_V(16, 8, uint16_t)
5751fb62fb0SOlivier Houchard CK_PR_CAS_V(8, 16, uint8_t)
5761fb62fb0SOlivier Houchard
5771fb62fb0SOlivier Houchard #undef CK_PR_CAS_V
5781fb62fb0SOlivier Houchard
5791fb62fb0SOlivier Houchard /*
5801fb62fb0SOlivier Houchard * Atomic bit test operations.
5811fb62fb0SOlivier Houchard */
5821fb62fb0SOlivier Houchard #define CK_PR_BT(K, S, T, P, C, I) \
5831fb62fb0SOlivier Houchard CK_CC_INLINE static bool \
5841fb62fb0SOlivier Houchard ck_pr_##K##_##S(T *target, unsigned int b) \
5851fb62fb0SOlivier Houchard { \
5861fb62fb0SOlivier Houchard bool c; \
5871fb62fb0SOlivier Houchard __asm__ __volatile__(CK_PR_LOCK_PREFIX I "; setc %1" \
5881fb62fb0SOlivier Houchard : "+m" (*(C *)target), \
5891fb62fb0SOlivier Houchard "=q" (c) \
5901fb62fb0SOlivier Houchard : "q" ((P)b) \
5911fb62fb0SOlivier Houchard : "memory", "cc"); \
5921fb62fb0SOlivier Houchard return c; \
5931fb62fb0SOlivier Houchard }
5941fb62fb0SOlivier Houchard
5951fb62fb0SOlivier Houchard #define CK_PR_BT_S(K, S, T, I) CK_PR_BT(K, S, T, T, T, I)
5961fb62fb0SOlivier Houchard
5971fb62fb0SOlivier Houchard #define CK_PR_GENERATE(K) \
598d75884dfSMark Johnston CK_PR_BT(K, ptr, void, uint64_t, uint64_t, #K "q %2, %0") \
5991fb62fb0SOlivier Houchard CK_PR_BT_S(K, uint, unsigned int, #K "l %2, %0") \
6001fb62fb0SOlivier Houchard CK_PR_BT_S(K, int, int, #K "l %2, %0") \
6011fb62fb0SOlivier Houchard CK_PR_BT_S(K, 64, uint64_t, #K "q %2, %0") \
6021fb62fb0SOlivier Houchard CK_PR_BT_S(K, 32, uint32_t, #K "l %2, %0") \
6031fb62fb0SOlivier Houchard CK_PR_BT_S(K, 16, uint16_t, #K "w %w2, %0")
6041fb62fb0SOlivier Houchard
6051fb62fb0SOlivier Houchard CK_PR_GENERATE(btc)
6061fb62fb0SOlivier Houchard CK_PR_GENERATE(bts)
6071fb62fb0SOlivier Houchard CK_PR_GENERATE(btr)
6081fb62fb0SOlivier Houchard
6091fb62fb0SOlivier Houchard #undef CK_PR_GENERATE
6101fb62fb0SOlivier Houchard #undef CK_PR_BT
6111fb62fb0SOlivier Houchard
6121fb62fb0SOlivier Houchard #endif /* CK_PR_X86_64_H */
6131fb62fb0SOlivier Houchard
614