Lines Matching +full:s +full:- +full:mode

31  * ck_ec implements 32- and 64- bit event counts. Event counts let us
32 * easily integrate OS-level blocking (e.g., futexes) in lock-free
33 * protocols. Waiters block conditionally, if the event count's value
40 * especially in single producer mode. The 64 bit variants are larger,
44 * available if CK supports 64-bit atomic operations. Currently,
46 * x86-64, on compilers that support GCC extended inline assembly;
53 * - Make changes to some shared data structure, without involving
55 * - After each change, call ck_ec_inc on the event count. The call
56 * acts as a write-write barrier, and wakes up any consumer blocked
61 * - Snapshot ck_ec_value of the event count. The call acts as a
63 * - Read and process the shared data structure.
64 * - Wait for new changes by calling ck_ec_wait with the snapshot value.
68 * use the event count's value as the write pointer. If the buffer is
81 * can make `ck_ec_wait_pred` return early, before the event count's
89 * When compiled as C11 or later, this header defines type-generic
91 * type-generic API.
110 * mode is a struct ck_ec_mode *.
123 * `void ck_ec_inc(ec, mode)`: increments the value of the event
127 * `value ck_ec_add(ec, mode, value)`: increments the event counter by
128 * `value`, and returns the event counter's previous value. This
132 * mode,
137 * success, and -1 if ops->gettime failed (without touching errno).
139 * `int ck_ec_wait(ec, mode, value, deadline)`: waits until the event
140 * counter's value differs from `value`, or, if `deadline` is
141 * provided and non-NULL, until the current time is after that
142 * deadline. Use a deadline with tv_sec = 0 for a non-blocking
143 * execution. Returns 0 if the event counter has changed, and -1 on
146 * `int ck_ec_wait_pred(ec, mode, value, pred, data, deadline)`: waits
147 * until the event counter's value differs from `value`, or until
148 * `pred` returns non-zero, or, if `deadline` is provided and
149 * non-NULL, until the current time is after that deadline. Use a
150 * deadline with tv_sec = 0 for a non-blocking execution. Returns 0 if
151 * the event counter has changed, `pred`'s return value if non-zero,
152 * and -1 on timeout. This function acts as a read (acquire) barrier.
157 * `pred` returns a non-zero value, that value is immediately returned
170 * [x86-TSO](https://www.cl.cam.ac.uk/~pes20/weakmemory/cacm.pdf), and
171 * to non-atomic read-modify-write instructions (e.g., `inc mem`);
172 * these non-atomic RMW let us write to the same memory locations with
173 * atomic and non-atomic instructions, without suffering from process
176 * The reason we can mix atomic and non-atomic writes to the `counter`
177 * word is that every non-atomic write obviates the need for the
178 * atomically flipped flag bit: we only use non-atomic writes to
181 * We only require the non-atomic RMW counter update to prevent
186 * non-atomic writes. The key is instead x86-TSO's guarantee that a
191 * x86-TSO's constraint on reads suffices to guarantee that the
198 * When the producer forwards the counter's value from its store
203 * In reality, the store queue in x86-TSO stands for in-flight
204 * instructions in the chip's out-of-order backend. In the vast
206 * few hundred or thousand of cycles. That's why ck_ec_wait spins on
209 * very likely that the producer's next counter update will observe
212 * That's still not a hard guarantee of correctness. Conservatively,
217 * particularly the pre-emption timer, are why single-producer updates
218 * must happen in a single non-atomic read-modify-write instruction.
220 * have to consider the worst-case execution time for that
221 * instruction. That's easier than doing the same for a pair of
222 * instructions, which an unlucky pre-emption could delay for
235 * bit ints, so we must treat the 64 bit counter's low 32 bits as an
236 * int in futex_wait. That's a bit dodgy, but fine in practice, given
237 * that the OS's futex code will always read whatever value is
240 * the store queue (the out-of-order execution backend).
243 * or otherwise pre-empted? Migration must already incur a barrier, so
244 * that thread always sees its own writes, so that's safe. As for
245 * pre-emption, that requires storing the architectural state, which
247 * all when pre-emption happens.
268 * GCC inline assembly lets us exploit non-atomic read-modify-write
269 * instructions on x86/x86_64 for a fast single-producer mode.
284 void *data; /* Opaque pointer for the predicate's internal state. */
289 * ck_ec_ops define system-specific functions to get the current time,
297 /* Populates out with the current time. Returns non-zero on failure. */
302 * deadline is non-NULL, stops waiting once that deadline is
362 * it should attempt to specialize for single producer mode.
364 * mode structs are expected to be exposed by value, e.g.,
403 * works on x86-64 (i.e., little endian), so the futex int
483 static void ck_ec32_inc(struct ck_ec32 *ec, const struct ck_ec_mode *mode);
488 static void ck_ec64_inc(struct ck_ec64 *ec, const struct ck_ec_mode *mode);
491 #define ck_ec_inc(EC, MODE) \ argument
494 struct ck_ec64 : ck_ec64_inc)((EC), (MODE)))
503 const struct ck_ec_mode *mode,
510 const struct ck_ec_mode *mode,
514 #define ck_ec_add(EC, MODE, DELTA) \ argument
517 struct ck_ec64 : ck_ec64_add)((EC), (MODE), (DELTA)))
523 * Returns 0 on success, and -1 if clock_gettime failed, in which
527 const struct ck_ec_mode *mode,
532 * old_value, or, if deadline is non-NULL, until CLOCK_MONOTONIC is
535 * Returns 0 on success, and -1 on timeout.
538 const struct ck_ec_mode *mode,
546 const struct ck_ec_mode *mode,
551 #define ck_ec_wait(EC, MODE, OLD_VALUE, DEADLINE) \ argument
554 struct ck_ec64 : ck_ec64_wait)((EC), (MODE), \
562 * old_value, pred returns non-zero, or, if deadline is non-NULL,
565 * Returns 0 on success, -1 on timeout, and the return value of pred
566 * if it returns non-zero.
571 const struct ck_ec_mode *mode,
582 const struct ck_ec_mode *mode,
590 #define ck_ec_wait_pred(EC, MODE, OLD_VALUE, PRED, DATA, DEADLINE) \ argument
594 ((EC), (MODE), (OLD_VALUE), (PRED), (DATA), (DEADLINE)))
604 ec->counter = value & ~(1UL << 31); in ck_ec32_init()
610 uint32_t ret = ck_pr_load_32(&ec->counter) & ~(1UL << 31); in ck_ec32_value()
618 return ck_pr_load_32(&ec->counter) & (1UL << 31); in ck_ec32_has_waiters()
625 const struct ck_ec_mode *mode) in ck_ec32_inc() argument
629 ck_ec32_add(ec, mode, 1); in ck_ec32_inc()
639 * flip from 0 to 1 (that's once per 2^31 increment). in ck_ec32_inc()
644 * ------------------------------------------- in ck_ec32_inc()
660 : "+m"(ec->counter), "=@ccle"(flagged) \ in ck_ec32_inc()
665 : "+m"(ec->counter), "=r"(flagged) \ in ck_ec32_inc()
669 if (mode->single_producer == true) { in ck_ec32_inc()
679 ck_ec32_wake(ec, mode->ops); in ck_ec32_inc()
687 const struct ck_ec_mode *mode, in ck_ec32_add_epilogue() argument
696 ck_ec32_wake(ec, mode->ops); in ck_ec32_add_epilogue()
703 const struct ck_ec_mode *mode, in ck_ec32_add_mp() argument
709 old = ck_pr_faa_32(&ec->counter, delta); in ck_ec32_add_mp()
710 return ck_ec32_add_epilogue(ec, mode, old); in ck_ec32_add_mp()
715 const struct ck_ec_mode *mode, in ck_ec32_add_sp() argument
723 * is a no-op. in ck_ec32_add_sp()
732 : "+m"(ec->counter), "+r"(old) in ck_ec32_add_sp()
734 return ck_ec32_add_epilogue(ec, mode, old); in ck_ec32_add_sp()
739 const struct ck_ec_mode *mode, in ck_ec32_add() argument
743 if (mode->single_producer == true) { in ck_ec32_add()
744 return ck_ec32_add_sp(ec, mode, delta); in ck_ec32_add()
748 return ck_ec32_add_mp(ec, mode, delta); in ck_ec32_add()
756 const struct ck_ec_mode *mode, in ck_ec_deadline() argument
759 return ck_ec_deadline_impl(new_deadline, mode->ops, timeout); in ck_ec_deadline()
769 const struct ck_ec_mode *mode, in ck_ec32_wait() argument
777 return ck_ec32_wait_slow(ec, mode->ops, old_value, deadline); in ck_ec32_wait()
790 const struct ck_ec_mode *mode, in ck_ec32_wait_pred() argument
801 return ck_ec32_wait_pred_slow(ec, mode->ops, old_value, in ck_ec32_wait_pred()
808 ec->counter = value << 1; in ck_ec64_init()
814 uint64_t ret = ck_pr_load_64(&ec->counter) >> 1; in ck_ec64_value()
822 return ck_pr_load_64(&ec->counter) & 1; in ck_ec64_has_waiters()
828 const struct ck_ec_mode *mode) in ck_ec64_inc() argument
830 /* We always xadd, so there's no special optimization here. */ in ck_ec64_inc()
831 (void)ck_ec64_add(ec, mode, 1); in ck_ec64_inc()
836 const struct ck_ec_mode *mode, in ck_ec_add64_epilogue() argument
842 ck_ec64_wake(ec, mode->ops); in ck_ec_add64_epilogue()
849 const struct ck_ec_mode *mode, in ck_ec64_add_mp() argument
855 return ck_ec_add64_epilogue(ec, mode, ck_pr_faa_64(&ec->counter, inc)); in ck_ec64_add_mp()
859 /* Single-producer specialisation. */
861 const struct ck_ec_mode *mode, in ck_ec64_add_sp() argument
869 * is a no-op. in ck_ec64_add_sp()
878 : "+m"(ec->counter), "+r"(old) in ck_ec64_add_sp()
880 return ck_ec_add64_epilogue(ec, mode, old); in ck_ec64_add_sp()
885 * Dispatch on mode->single_producer in this FORCE_INLINE function:
890 const struct ck_ec_mode *mode, in ck_ec64_add() argument
894 if (mode->single_producer == true) { in ck_ec64_add()
895 return ck_ec64_add_sp(ec, mode, delta); in ck_ec64_add()
899 return ck_ec64_add_mp(ec, mode, delta); in ck_ec64_add()
908 const struct ck_ec_mode *mode, in ck_ec64_wait() argument
916 return ck_ec64_wait_slow(ec, mode->ops, old_value, deadline); in ck_ec64_wait()
930 const struct ck_ec_mode *mode, in ck_ec64_wait_pred() argument
941 return ck_ec64_wait_pred_slow(ec, mode->ops, old_value, in ck_ec64_wait_pred()