1c942fddfSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later
222e4ebb9SMathieu Desnoyers /*
322e4ebb9SMathieu Desnoyers * Copyright (C) 2010-2017 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
422e4ebb9SMathieu Desnoyers *
522e4ebb9SMathieu Desnoyers * membarrier system call
622e4ebb9SMathieu Desnoyers */
722e4ebb9SMathieu Desnoyers
822e4ebb9SMathieu Desnoyers /*
925595eb6SMathieu Desnoyers * For documentation purposes, here are some membarrier ordering
1025595eb6SMathieu Desnoyers * scenarios to keep in mind:
1125595eb6SMathieu Desnoyers *
1225595eb6SMathieu Desnoyers * A) Userspace thread execution after IPI vs membarrier's memory
1325595eb6SMathieu Desnoyers * barrier before sending the IPI
1425595eb6SMathieu Desnoyers *
1525595eb6SMathieu Desnoyers * Userspace variables:
1625595eb6SMathieu Desnoyers *
1725595eb6SMathieu Desnoyers * int x = 0, y = 0;
1825595eb6SMathieu Desnoyers *
1925595eb6SMathieu Desnoyers * The memory barrier at the start of membarrier() on CPU0 is necessary in
2025595eb6SMathieu Desnoyers * order to enforce the guarantee that any writes occurring on CPU0 before
2125595eb6SMathieu Desnoyers * the membarrier() is executed will be visible to any code executing on
2225595eb6SMathieu Desnoyers * CPU1 after the IPI-induced memory barrier:
2325595eb6SMathieu Desnoyers *
2425595eb6SMathieu Desnoyers * CPU0 CPU1
2525595eb6SMathieu Desnoyers *
2625595eb6SMathieu Desnoyers * x = 1
2725595eb6SMathieu Desnoyers * membarrier():
2825595eb6SMathieu Desnoyers * a: smp_mb()
2925595eb6SMathieu Desnoyers * b: send IPI IPI-induced mb
3025595eb6SMathieu Desnoyers * c: smp_mb()
3125595eb6SMathieu Desnoyers * r2 = y
3225595eb6SMathieu Desnoyers * y = 1
3325595eb6SMathieu Desnoyers * barrier()
3425595eb6SMathieu Desnoyers * r1 = x
3525595eb6SMathieu Desnoyers *
3625595eb6SMathieu Desnoyers * BUG_ON(r1 == 0 && r2 == 0)
3725595eb6SMathieu Desnoyers *
3825595eb6SMathieu Desnoyers * The write to y and load from x by CPU1 are unordered by the hardware,
3925595eb6SMathieu Desnoyers * so it's possible to have "r1 = x" reordered before "y = 1" at any
4025595eb6SMathieu Desnoyers * point after (b). If the memory barrier at (a) is omitted, then "x = 1"
4125595eb6SMathieu Desnoyers * can be reordered after (a) (although not after (c)), so we get r1 == 0
4225595eb6SMathieu Desnoyers * and r2 == 0. This violates the guarantee that membarrier() is
4325595eb6SMathieu Desnoyers * supposed by provide.
4425595eb6SMathieu Desnoyers *
4525595eb6SMathieu Desnoyers * The timing of the memory barrier at (a) has to ensure that it executes
4625595eb6SMathieu Desnoyers * before the IPI-induced memory barrier on CPU1.
4725595eb6SMathieu Desnoyers *
4825595eb6SMathieu Desnoyers * B) Userspace thread execution before IPI vs membarrier's memory
4925595eb6SMathieu Desnoyers * barrier after completing the IPI
5025595eb6SMathieu Desnoyers *
5125595eb6SMathieu Desnoyers * Userspace variables:
5225595eb6SMathieu Desnoyers *
5325595eb6SMathieu Desnoyers * int x = 0, y = 0;
5425595eb6SMathieu Desnoyers *
5525595eb6SMathieu Desnoyers * The memory barrier at the end of membarrier() on CPU0 is necessary in
5625595eb6SMathieu Desnoyers * order to enforce the guarantee that any writes occurring on CPU1 before
5725595eb6SMathieu Desnoyers * the membarrier() is executed will be visible to any code executing on
5825595eb6SMathieu Desnoyers * CPU0 after the membarrier():
5925595eb6SMathieu Desnoyers *
6025595eb6SMathieu Desnoyers * CPU0 CPU1
6125595eb6SMathieu Desnoyers *
6225595eb6SMathieu Desnoyers * x = 1
6325595eb6SMathieu Desnoyers * barrier()
6425595eb6SMathieu Desnoyers * y = 1
6525595eb6SMathieu Desnoyers * r2 = y
6625595eb6SMathieu Desnoyers * membarrier():
6725595eb6SMathieu Desnoyers * a: smp_mb()
6825595eb6SMathieu Desnoyers * b: send IPI IPI-induced mb
6925595eb6SMathieu Desnoyers * c: smp_mb()
7025595eb6SMathieu Desnoyers * r1 = x
7125595eb6SMathieu Desnoyers * BUG_ON(r1 == 0 && r2 == 1)
7225595eb6SMathieu Desnoyers *
7325595eb6SMathieu Desnoyers * The writes to x and y are unordered by the hardware, so it's possible to
7425595eb6SMathieu Desnoyers * have "r2 = 1" even though the write to x doesn't execute until (b). If
7525595eb6SMathieu Desnoyers * the memory barrier at (c) is omitted then "r1 = x" can be reordered
7625595eb6SMathieu Desnoyers * before (b) (although not before (a)), so we get "r1 = 0". This violates
7725595eb6SMathieu Desnoyers * the guarantee that membarrier() is supposed to provide.
7825595eb6SMathieu Desnoyers *
7925595eb6SMathieu Desnoyers * The timing of the memory barrier at (c) has to ensure that it executes
8025595eb6SMathieu Desnoyers * after the IPI-induced memory barrier on CPU1.
8125595eb6SMathieu Desnoyers *
8225595eb6SMathieu Desnoyers * C) Scheduling userspace thread -> kthread -> userspace thread vs membarrier
8325595eb6SMathieu Desnoyers *
8425595eb6SMathieu Desnoyers * CPU0 CPU1
8525595eb6SMathieu Desnoyers *
8625595eb6SMathieu Desnoyers * membarrier():
8725595eb6SMathieu Desnoyers * a: smp_mb()
8825595eb6SMathieu Desnoyers * d: switch to kthread (includes mb)
8925595eb6SMathieu Desnoyers * b: read rq->curr->mm == NULL
9025595eb6SMathieu Desnoyers * e: switch to user (includes mb)
9125595eb6SMathieu Desnoyers * c: smp_mb()
9225595eb6SMathieu Desnoyers *
9325595eb6SMathieu Desnoyers * Using the scenario from (A), we can show that (a) needs to be paired
9425595eb6SMathieu Desnoyers * with (e). Using the scenario from (B), we can show that (c) needs to
9525595eb6SMathieu Desnoyers * be paired with (d).
9625595eb6SMathieu Desnoyers *
9725595eb6SMathieu Desnoyers * D) exit_mm vs membarrier
9825595eb6SMathieu Desnoyers *
9925595eb6SMathieu Desnoyers * Two thread groups are created, A and B. Thread group B is created by
10025595eb6SMathieu Desnoyers * issuing clone from group A with flag CLONE_VM set, but not CLONE_THREAD.
10125595eb6SMathieu Desnoyers * Let's assume we have a single thread within each thread group (Thread A
10225595eb6SMathieu Desnoyers * and Thread B). Thread A runs on CPU0, Thread B runs on CPU1.
10325595eb6SMathieu Desnoyers *
10425595eb6SMathieu Desnoyers * CPU0 CPU1
10525595eb6SMathieu Desnoyers *
10625595eb6SMathieu Desnoyers * membarrier():
10725595eb6SMathieu Desnoyers * a: smp_mb()
10825595eb6SMathieu Desnoyers * exit_mm():
10925595eb6SMathieu Desnoyers * d: smp_mb()
11025595eb6SMathieu Desnoyers * e: current->mm = NULL
11125595eb6SMathieu Desnoyers * b: read rq->curr->mm == NULL
11225595eb6SMathieu Desnoyers * c: smp_mb()
11325595eb6SMathieu Desnoyers *
11425595eb6SMathieu Desnoyers * Using scenario (B), we can show that (c) needs to be paired with (d).
11525595eb6SMathieu Desnoyers *
11625595eb6SMathieu Desnoyers * E) kthread_{use,unuse}_mm vs membarrier
11725595eb6SMathieu Desnoyers *
11825595eb6SMathieu Desnoyers * CPU0 CPU1
11925595eb6SMathieu Desnoyers *
12025595eb6SMathieu Desnoyers * membarrier():
12125595eb6SMathieu Desnoyers * a: smp_mb()
12225595eb6SMathieu Desnoyers * kthread_unuse_mm()
12325595eb6SMathieu Desnoyers * d: smp_mb()
12425595eb6SMathieu Desnoyers * e: current->mm = NULL
12525595eb6SMathieu Desnoyers * b: read rq->curr->mm == NULL
12625595eb6SMathieu Desnoyers * kthread_use_mm()
12725595eb6SMathieu Desnoyers * f: current->mm = mm
12825595eb6SMathieu Desnoyers * g: smp_mb()
12925595eb6SMathieu Desnoyers * c: smp_mb()
13025595eb6SMathieu Desnoyers *
13125595eb6SMathieu Desnoyers * Using the scenario from (A), we can show that (a) needs to be paired
13225595eb6SMathieu Desnoyers * with (g). Using the scenario from (B), we can show that (c) needs to
13325595eb6SMathieu Desnoyers * be paired with (d).
13425595eb6SMathieu Desnoyers */
13525595eb6SMathieu Desnoyers
13625595eb6SMathieu Desnoyers /*
13722e4ebb9SMathieu Desnoyers * Bitmask made from a "or" of all commands within enum membarrier_cmd,
13822e4ebb9SMathieu Desnoyers * except MEMBARRIER_CMD_QUERY.
13922e4ebb9SMathieu Desnoyers */
14070216e18SMathieu Desnoyers #ifdef CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE
14170216e18SMathieu Desnoyers #define MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK \
14270216e18SMathieu Desnoyers (MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE \
14370216e18SMathieu Desnoyers | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE)
14470216e18SMathieu Desnoyers #else
14570216e18SMathieu Desnoyers #define MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK 0
14670216e18SMathieu Desnoyers #endif
14770216e18SMathieu Desnoyers
1482a36ab71SPeter Oskolkov #ifdef CONFIG_RSEQ
14980923261SMathieu Desnoyers #define MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK \
1502a36ab71SPeter Oskolkov (MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ \
15180923261SMathieu Desnoyers | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ)
1522a36ab71SPeter Oskolkov #else
15380923261SMathieu Desnoyers #define MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK 0
1542a36ab71SPeter Oskolkov #endif
1552a36ab71SPeter Oskolkov
15622e4ebb9SMathieu Desnoyers #define MEMBARRIER_CMD_BITMASK \
157c5f58bd5SMathieu Desnoyers (MEMBARRIER_CMD_GLOBAL | MEMBARRIER_CMD_GLOBAL_EXPEDITED \
158c5f58bd5SMathieu Desnoyers | MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED \
159c5f58bd5SMathieu Desnoyers | MEMBARRIER_CMD_PRIVATE_EXPEDITED \
16070216e18SMathieu Desnoyers | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED \
16180923261SMathieu Desnoyers | MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK \
162544a4f2eSMichal Clapinski | MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK \
163544a4f2eSMichal Clapinski | MEMBARRIER_CMD_GET_REGISTRATIONS)
16422e4ebb9SMathieu Desnoyers
165944d5fe5SLinus Torvalds static DEFINE_MUTEX(membarrier_ipi_mutex);
166944d5fe5SLinus Torvalds #define SERIALIZE_IPI() guard(mutex)(&membarrier_ipi_mutex)
167944d5fe5SLinus Torvalds
ipi_mb(void * info)16822e4ebb9SMathieu Desnoyers static void ipi_mb(void *info)
16922e4ebb9SMathieu Desnoyers {
17022e4ebb9SMathieu Desnoyers smp_mb(); /* IPIs should be serializing but paranoid. */
17122e4ebb9SMathieu Desnoyers }
17222e4ebb9SMathieu Desnoyers
ipi_sync_core(void * info)173758c9373SAndy Lutomirski static void ipi_sync_core(void *info)
174758c9373SAndy Lutomirski {
175758c9373SAndy Lutomirski /*
176758c9373SAndy Lutomirski * The smp_mb() in membarrier after all the IPIs is supposed to
177758c9373SAndy Lutomirski * ensure that memory on remote CPUs that occur before the IPI
178758c9373SAndy Lutomirski * become visible to membarrier()'s caller -- see scenario B in
179758c9373SAndy Lutomirski * the big comment at the top of this file.
180758c9373SAndy Lutomirski *
181758c9373SAndy Lutomirski * A sync_core() would provide this guarantee, but
182758c9373SAndy Lutomirski * sync_core_before_usermode() might end up being deferred until
183758c9373SAndy Lutomirski * after membarrier()'s smp_mb().
184758c9373SAndy Lutomirski */
185758c9373SAndy Lutomirski smp_mb(); /* IPIs should be serializing but paranoid. */
186758c9373SAndy Lutomirski
187758c9373SAndy Lutomirski sync_core_before_usermode();
188758c9373SAndy Lutomirski }
189758c9373SAndy Lutomirski
ipi_rseq(void * info)1902a36ab71SPeter Oskolkov static void ipi_rseq(void *info)
1912a36ab71SPeter Oskolkov {
1922ecedd75SAndy Lutomirski /*
1932ecedd75SAndy Lutomirski * Ensure that all stores done by the calling thread are visible
1942ecedd75SAndy Lutomirski * to the current task before the current task resumes. We could
1952ecedd75SAndy Lutomirski * probably optimize this away on most architectures, but by the
1962ecedd75SAndy Lutomirski * time we've already sent an IPI, the cost of the extra smp_mb()
1972ecedd75SAndy Lutomirski * is negligible.
1982ecedd75SAndy Lutomirski */
1992ecedd75SAndy Lutomirski smp_mb();
2002a36ab71SPeter Oskolkov rseq_preempt(current);
2012a36ab71SPeter Oskolkov }
2022a36ab71SPeter Oskolkov
ipi_sync_rq_state(void * info)203227a4aadSMathieu Desnoyers static void ipi_sync_rq_state(void *info)
204227a4aadSMathieu Desnoyers {
205227a4aadSMathieu Desnoyers struct mm_struct *mm = (struct mm_struct *) info;
206227a4aadSMathieu Desnoyers
207227a4aadSMathieu Desnoyers if (current->mm != mm)
208227a4aadSMathieu Desnoyers return;
209227a4aadSMathieu Desnoyers this_cpu_write(runqueues.membarrier_state,
210227a4aadSMathieu Desnoyers atomic_read(&mm->membarrier_state));
211227a4aadSMathieu Desnoyers /*
212227a4aadSMathieu Desnoyers * Issue a memory barrier after setting
213227a4aadSMathieu Desnoyers * MEMBARRIER_STATE_GLOBAL_EXPEDITED in the current runqueue to
214227a4aadSMathieu Desnoyers * guarantee that no memory access following registration is reordered
215227a4aadSMathieu Desnoyers * before registration.
216227a4aadSMathieu Desnoyers */
217227a4aadSMathieu Desnoyers smp_mb();
218227a4aadSMathieu Desnoyers }
219227a4aadSMathieu Desnoyers
membarrier_exec_mmap(struct mm_struct * mm)220227a4aadSMathieu Desnoyers void membarrier_exec_mmap(struct mm_struct *mm)
221227a4aadSMathieu Desnoyers {
222227a4aadSMathieu Desnoyers /*
223227a4aadSMathieu Desnoyers * Issue a memory barrier before clearing membarrier_state to
224227a4aadSMathieu Desnoyers * guarantee that no memory access prior to exec is reordered after
225227a4aadSMathieu Desnoyers * clearing this state.
226227a4aadSMathieu Desnoyers */
227227a4aadSMathieu Desnoyers smp_mb();
228227a4aadSMathieu Desnoyers atomic_set(&mm->membarrier_state, 0);
229227a4aadSMathieu Desnoyers /*
230227a4aadSMathieu Desnoyers * Keep the runqueue membarrier_state in sync with this mm
231227a4aadSMathieu Desnoyers * membarrier_state.
232227a4aadSMathieu Desnoyers */
233227a4aadSMathieu Desnoyers this_cpu_write(runqueues.membarrier_state, 0);
234227a4aadSMathieu Desnoyers }
235227a4aadSMathieu Desnoyers
membarrier_update_current_mm(struct mm_struct * next_mm)2365bc78502SMathieu Desnoyers void membarrier_update_current_mm(struct mm_struct *next_mm)
2375bc78502SMathieu Desnoyers {
2385bc78502SMathieu Desnoyers struct rq *rq = this_rq();
2395bc78502SMathieu Desnoyers int membarrier_state = 0;
2405bc78502SMathieu Desnoyers
2415bc78502SMathieu Desnoyers if (next_mm)
2425bc78502SMathieu Desnoyers membarrier_state = atomic_read(&next_mm->membarrier_state);
2435bc78502SMathieu Desnoyers if (READ_ONCE(rq->membarrier_state) == membarrier_state)
2445bc78502SMathieu Desnoyers return;
2455bc78502SMathieu Desnoyers WRITE_ONCE(rq->membarrier_state, membarrier_state);
2465bc78502SMathieu Desnoyers }
2475bc78502SMathieu Desnoyers
membarrier_global_expedited(void)248c5f58bd5SMathieu Desnoyers static int membarrier_global_expedited(void)
249c5f58bd5SMathieu Desnoyers {
250c5f58bd5SMathieu Desnoyers int cpu;
251c5f58bd5SMathieu Desnoyers cpumask_var_t tmpmask;
252c5f58bd5SMathieu Desnoyers
253c5f58bd5SMathieu Desnoyers if (num_online_cpus() == 1)
254c5f58bd5SMathieu Desnoyers return 0;
255c5f58bd5SMathieu Desnoyers
256c5f58bd5SMathieu Desnoyers /*
257a14d11a0SAndrea Parri * Matches memory barriers after rq->curr modification in
258c5f58bd5SMathieu Desnoyers * scheduler.
259c5f58bd5SMathieu Desnoyers */
260c5f58bd5SMathieu Desnoyers smp_mb(); /* system call entry is not a mb. */
261c5f58bd5SMathieu Desnoyers
262c172e0a3SMathieu Desnoyers if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
263c172e0a3SMathieu Desnoyers return -ENOMEM;
264c5f58bd5SMathieu Desnoyers
265944d5fe5SLinus Torvalds SERIALIZE_IPI();
266c5f58bd5SMathieu Desnoyers cpus_read_lock();
267227a4aadSMathieu Desnoyers rcu_read_lock();
268c5f58bd5SMathieu Desnoyers for_each_online_cpu(cpu) {
269c5f58bd5SMathieu Desnoyers struct task_struct *p;
270c5f58bd5SMathieu Desnoyers
271c5f58bd5SMathieu Desnoyers /*
272c5f58bd5SMathieu Desnoyers * Skipping the current CPU is OK even through we can be
273c5f58bd5SMathieu Desnoyers * migrated at any point. The current CPU, at the point
274c5f58bd5SMathieu Desnoyers * where we read raw_smp_processor_id(), is ensured to
275c5f58bd5SMathieu Desnoyers * be in program order with respect to the caller
276c5f58bd5SMathieu Desnoyers * thread. Therefore, we can skip this CPU from the
277c5f58bd5SMathieu Desnoyers * iteration.
278c5f58bd5SMathieu Desnoyers */
279c5f58bd5SMathieu Desnoyers if (cpu == raw_smp_processor_id())
280c5f58bd5SMathieu Desnoyers continue;
28197fb7a0aSIngo Molnar
282227a4aadSMathieu Desnoyers if (!(READ_ONCE(cpu_rq(cpu)->membarrier_state) &
283227a4aadSMathieu Desnoyers MEMBARRIER_STATE_GLOBAL_EXPEDITED))
284227a4aadSMathieu Desnoyers continue;
285227a4aadSMathieu Desnoyers
286227a4aadSMathieu Desnoyers /*
287618758edSMathieu Desnoyers * Skip the CPU if it runs a kernel thread which is not using
288618758edSMathieu Desnoyers * a task mm.
289227a4aadSMathieu Desnoyers */
290154abafcSEric W. Biederman p = rcu_dereference(cpu_rq(cpu)->curr);
291618758edSMathieu Desnoyers if (!p->mm)
292227a4aadSMathieu Desnoyers continue;
293227a4aadSMathieu Desnoyers
294c5f58bd5SMathieu Desnoyers __cpumask_set_cpu(cpu, tmpmask);
295c5f58bd5SMathieu Desnoyers }
296c5f58bd5SMathieu Desnoyers rcu_read_unlock();
297c172e0a3SMathieu Desnoyers
298c5f58bd5SMathieu Desnoyers preempt_disable();
299c5f58bd5SMathieu Desnoyers smp_call_function_many(tmpmask, ipi_mb, NULL, 1);
300c5f58bd5SMathieu Desnoyers preempt_enable();
301c172e0a3SMathieu Desnoyers
302c5f58bd5SMathieu Desnoyers free_cpumask_var(tmpmask);
303c5f58bd5SMathieu Desnoyers cpus_read_unlock();
304c5f58bd5SMathieu Desnoyers
305c5f58bd5SMathieu Desnoyers /*
306c5f58bd5SMathieu Desnoyers * Memory barrier on the caller thread _after_ we finished
307a14d11a0SAndrea Parri * waiting for the last IPI. Matches memory barriers before
308c5f58bd5SMathieu Desnoyers * rq->curr modification in scheduler.
309c5f58bd5SMathieu Desnoyers */
310c5f58bd5SMathieu Desnoyers smp_mb(); /* exit from system call is not a mb */
311c5f58bd5SMathieu Desnoyers return 0;
312c5f58bd5SMathieu Desnoyers }
313c5f58bd5SMathieu Desnoyers
membarrier_private_expedited(int flags,int cpu_id)3142a36ab71SPeter Oskolkov static int membarrier_private_expedited(int flags, int cpu_id)
31522e4ebb9SMathieu Desnoyers {
31622e4ebb9SMathieu Desnoyers cpumask_var_t tmpmask;
317c6d68c1cSMathieu Desnoyers struct mm_struct *mm = current->mm;
3182a36ab71SPeter Oskolkov smp_call_func_t ipi_func = ipi_mb;
31922e4ebb9SMathieu Desnoyers
3202a36ab71SPeter Oskolkov if (flags == MEMBARRIER_FLAG_SYNC_CORE) {
32170216e18SMathieu Desnoyers if (!IS_ENABLED(CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE))
32270216e18SMathieu Desnoyers return -EINVAL;
323c6d68c1cSMathieu Desnoyers if (!(atomic_read(&mm->membarrier_state) &
32470216e18SMathieu Desnoyers MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY))
325a961e409SMathieu Desnoyers return -EPERM;
326758c9373SAndy Lutomirski ipi_func = ipi_sync_core;
3274ff4c745SAndrea Parri prepare_sync_core_cmd(mm);
3282a36ab71SPeter Oskolkov } else if (flags == MEMBARRIER_FLAG_RSEQ) {
3292a36ab71SPeter Oskolkov if (!IS_ENABLED(CONFIG_RSEQ))
3302a36ab71SPeter Oskolkov return -EINVAL;
3312a36ab71SPeter Oskolkov if (!(atomic_read(&mm->membarrier_state) &
3322a36ab71SPeter Oskolkov MEMBARRIER_STATE_PRIVATE_EXPEDITED_RSEQ_READY))
3332a36ab71SPeter Oskolkov return -EPERM;
3342a36ab71SPeter Oskolkov ipi_func = ipi_rseq;
33570216e18SMathieu Desnoyers } else {
3362a36ab71SPeter Oskolkov WARN_ON_ONCE(flags);
337c6d68c1cSMathieu Desnoyers if (!(atomic_read(&mm->membarrier_state) &
33870216e18SMathieu Desnoyers MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY))
33970216e18SMathieu Desnoyers return -EPERM;
34070216e18SMathieu Desnoyers }
341a961e409SMathieu Desnoyers
342e45cdc71SAndy Lutomirski if (flags != MEMBARRIER_FLAG_SYNC_CORE &&
343e45cdc71SAndy Lutomirski (atomic_read(&mm->mm_users) == 1 || num_online_cpus() == 1))
344a961e409SMathieu Desnoyers return 0;
34522e4ebb9SMathieu Desnoyers
34622e4ebb9SMathieu Desnoyers /*
347a14d11a0SAndrea Parri * Matches memory barriers after rq->curr modification in
34822e4ebb9SMathieu Desnoyers * scheduler.
349*cd9b2901SAndrea Parri *
350*cd9b2901SAndrea Parri * On RISC-V, this barrier pairing is also needed for the
351*cd9b2901SAndrea Parri * SYNC_CORE command when switching between processes, cf.
352*cd9b2901SAndrea Parri * the inline comments in membarrier_arch_switch_mm().
35322e4ebb9SMathieu Desnoyers */
35422e4ebb9SMathieu Desnoyers smp_mb(); /* system call entry is not a mb. */
35522e4ebb9SMathieu Desnoyers
3562a36ab71SPeter Oskolkov if (cpu_id < 0 && !zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
357c172e0a3SMathieu Desnoyers return -ENOMEM;
35822e4ebb9SMathieu Desnoyers
359944d5fe5SLinus Torvalds SERIALIZE_IPI();
36022e4ebb9SMathieu Desnoyers cpus_read_lock();
3612a36ab71SPeter Oskolkov
3622a36ab71SPeter Oskolkov if (cpu_id >= 0) {
3632a36ab71SPeter Oskolkov struct task_struct *p;
3642a36ab71SPeter Oskolkov
3652a36ab71SPeter Oskolkov if (cpu_id >= nr_cpu_ids || !cpu_online(cpu_id))
3662a36ab71SPeter Oskolkov goto out;
3672a36ab71SPeter Oskolkov rcu_read_lock();
3682a36ab71SPeter Oskolkov p = rcu_dereference(cpu_rq(cpu_id)->curr);
3692a36ab71SPeter Oskolkov if (!p || p->mm != mm) {
3702a36ab71SPeter Oskolkov rcu_read_unlock();
3712a36ab71SPeter Oskolkov goto out;
3722a36ab71SPeter Oskolkov }
3732a36ab71SPeter Oskolkov rcu_read_unlock();
3742a36ab71SPeter Oskolkov } else {
3752a36ab71SPeter Oskolkov int cpu;
3762a36ab71SPeter Oskolkov
377227a4aadSMathieu Desnoyers rcu_read_lock();
37822e4ebb9SMathieu Desnoyers for_each_online_cpu(cpu) {
37922e4ebb9SMathieu Desnoyers struct task_struct *p;
38022e4ebb9SMathieu Desnoyers
381154abafcSEric W. Biederman p = rcu_dereference(cpu_rq(cpu)->curr);
382c172e0a3SMathieu Desnoyers if (p && p->mm == mm)
38322e4ebb9SMathieu Desnoyers __cpumask_set_cpu(cpu, tmpmask);
38422e4ebb9SMathieu Desnoyers }
385227a4aadSMathieu Desnoyers rcu_read_unlock();
3862a36ab71SPeter Oskolkov }
387c172e0a3SMathieu Desnoyers
388e45cdc71SAndy Lutomirski if (cpu_id >= 0) {
389e45cdc71SAndy Lutomirski /*
390e45cdc71SAndy Lutomirski * smp_call_function_single() will call ipi_func() if cpu_id
391e45cdc71SAndy Lutomirski * is the calling CPU.
392e45cdc71SAndy Lutomirski */
3932a36ab71SPeter Oskolkov smp_call_function_single(cpu_id, ipi_func, NULL, 1);
394e45cdc71SAndy Lutomirski } else {
395e45cdc71SAndy Lutomirski /*
396e45cdc71SAndy Lutomirski * For regular membarrier, we can save a few cycles by
397e45cdc71SAndy Lutomirski * skipping the current cpu -- we're about to do smp_mb()
398e45cdc71SAndy Lutomirski * below, and if we migrate to a different cpu, this cpu
399e45cdc71SAndy Lutomirski * and the new cpu will execute a full barrier in the
400e45cdc71SAndy Lutomirski * scheduler.
401e45cdc71SAndy Lutomirski *
402e45cdc71SAndy Lutomirski * For SYNC_CORE, we do need a barrier on the current cpu --
403e45cdc71SAndy Lutomirski * otherwise, if we are migrated and replaced by a different
404e45cdc71SAndy Lutomirski * task in the same mm just before, during, or after
405e45cdc71SAndy Lutomirski * membarrier, we will end up with some thread in the mm
406e45cdc71SAndy Lutomirski * running without a core sync.
407e45cdc71SAndy Lutomirski *
408e45cdc71SAndy Lutomirski * For RSEQ, don't rseq_preempt() the caller. User code
409e45cdc71SAndy Lutomirski * is not supposed to issue syscalls at all from inside an
410e45cdc71SAndy Lutomirski * rseq critical section.
411e45cdc71SAndy Lutomirski */
412e45cdc71SAndy Lutomirski if (flags != MEMBARRIER_FLAG_SYNC_CORE) {
413e45cdc71SAndy Lutomirski preempt_disable();
414e45cdc71SAndy Lutomirski smp_call_function_many(tmpmask, ipi_func, NULL, true);
41554167607SMathieu Desnoyers preempt_enable();
416e45cdc71SAndy Lutomirski } else {
417e45cdc71SAndy Lutomirski on_each_cpu_mask(tmpmask, ipi_func, NULL, true);
418e45cdc71SAndy Lutomirski }
419e45cdc71SAndy Lutomirski }
420c172e0a3SMathieu Desnoyers
4212a36ab71SPeter Oskolkov out:
4222a36ab71SPeter Oskolkov if (cpu_id < 0)
42322e4ebb9SMathieu Desnoyers free_cpumask_var(tmpmask);
42422e4ebb9SMathieu Desnoyers cpus_read_unlock();
42522e4ebb9SMathieu Desnoyers
42622e4ebb9SMathieu Desnoyers /*
42722e4ebb9SMathieu Desnoyers * Memory barrier on the caller thread _after_ we finished
428a14d11a0SAndrea Parri * waiting for the last IPI. Matches memory barriers before
42922e4ebb9SMathieu Desnoyers * rq->curr modification in scheduler.
43022e4ebb9SMathieu Desnoyers */
43122e4ebb9SMathieu Desnoyers smp_mb(); /* exit from system call is not a mb */
43297fb7a0aSIngo Molnar
433a961e409SMathieu Desnoyers return 0;
434a961e409SMathieu Desnoyers }
435a961e409SMathieu Desnoyers
sync_runqueues_membarrier_state(struct mm_struct * mm)436227a4aadSMathieu Desnoyers static int sync_runqueues_membarrier_state(struct mm_struct *mm)
437227a4aadSMathieu Desnoyers {
438227a4aadSMathieu Desnoyers int membarrier_state = atomic_read(&mm->membarrier_state);
439227a4aadSMathieu Desnoyers cpumask_var_t tmpmask;
440227a4aadSMathieu Desnoyers int cpu;
441227a4aadSMathieu Desnoyers
442227a4aadSMathieu Desnoyers if (atomic_read(&mm->mm_users) == 1 || num_online_cpus() == 1) {
443227a4aadSMathieu Desnoyers this_cpu_write(runqueues.membarrier_state, membarrier_state);
444227a4aadSMathieu Desnoyers
445227a4aadSMathieu Desnoyers /*
446227a4aadSMathieu Desnoyers * For single mm user, we can simply issue a memory barrier
447227a4aadSMathieu Desnoyers * after setting MEMBARRIER_STATE_GLOBAL_EXPEDITED in the
448227a4aadSMathieu Desnoyers * mm and in the current runqueue to guarantee that no memory
449227a4aadSMathieu Desnoyers * access following registration is reordered before
450227a4aadSMathieu Desnoyers * registration.
451227a4aadSMathieu Desnoyers */
452227a4aadSMathieu Desnoyers smp_mb();
453227a4aadSMathieu Desnoyers return 0;
454227a4aadSMathieu Desnoyers }
455227a4aadSMathieu Desnoyers
456227a4aadSMathieu Desnoyers if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
457227a4aadSMathieu Desnoyers return -ENOMEM;
458227a4aadSMathieu Desnoyers
459227a4aadSMathieu Desnoyers /*
460227a4aadSMathieu Desnoyers * For mm with multiple users, we need to ensure all future
461227a4aadSMathieu Desnoyers * scheduler executions will observe @mm's new membarrier
462227a4aadSMathieu Desnoyers * state.
463227a4aadSMathieu Desnoyers */
464227a4aadSMathieu Desnoyers synchronize_rcu();
465227a4aadSMathieu Desnoyers
466227a4aadSMathieu Desnoyers /*
467227a4aadSMathieu Desnoyers * For each cpu runqueue, if the task's mm match @mm, ensure that all
468c034f48eSRandy Dunlap * @mm's membarrier state set bits are also set in the runqueue's
469227a4aadSMathieu Desnoyers * membarrier state. This ensures that a runqueue scheduling
470227a4aadSMathieu Desnoyers * between threads which are users of @mm has its membarrier state
471227a4aadSMathieu Desnoyers * updated.
472227a4aadSMathieu Desnoyers */
473944d5fe5SLinus Torvalds SERIALIZE_IPI();
474227a4aadSMathieu Desnoyers cpus_read_lock();
475227a4aadSMathieu Desnoyers rcu_read_lock();
476227a4aadSMathieu Desnoyers for_each_online_cpu(cpu) {
477227a4aadSMathieu Desnoyers struct rq *rq = cpu_rq(cpu);
478227a4aadSMathieu Desnoyers struct task_struct *p;
479227a4aadSMathieu Desnoyers
480c172e0a3SMathieu Desnoyers p = rcu_dereference(rq->curr);
481227a4aadSMathieu Desnoyers if (p && p->mm == mm)
482227a4aadSMathieu Desnoyers __cpumask_set_cpu(cpu, tmpmask);
483227a4aadSMathieu Desnoyers }
484227a4aadSMathieu Desnoyers rcu_read_unlock();
485227a4aadSMathieu Desnoyers
486ce29ddc4SMathieu Desnoyers on_each_cpu_mask(tmpmask, ipi_sync_rq_state, mm, true);
487227a4aadSMathieu Desnoyers
488227a4aadSMathieu Desnoyers free_cpumask_var(tmpmask);
489227a4aadSMathieu Desnoyers cpus_read_unlock();
490227a4aadSMathieu Desnoyers
491227a4aadSMathieu Desnoyers return 0;
492227a4aadSMathieu Desnoyers }
493227a4aadSMathieu Desnoyers
membarrier_register_global_expedited(void)494c5f58bd5SMathieu Desnoyers static int membarrier_register_global_expedited(void)
495c5f58bd5SMathieu Desnoyers {
496c5f58bd5SMathieu Desnoyers struct task_struct *p = current;
497c5f58bd5SMathieu Desnoyers struct mm_struct *mm = p->mm;
498227a4aadSMathieu Desnoyers int ret;
499c5f58bd5SMathieu Desnoyers
500c5f58bd5SMathieu Desnoyers if (atomic_read(&mm->membarrier_state) &
501c5f58bd5SMathieu Desnoyers MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY)
502c5f58bd5SMathieu Desnoyers return 0;
503c5f58bd5SMathieu Desnoyers atomic_or(MEMBARRIER_STATE_GLOBAL_EXPEDITED, &mm->membarrier_state);
504227a4aadSMathieu Desnoyers ret = sync_runqueues_membarrier_state(mm);
505227a4aadSMathieu Desnoyers if (ret)
506227a4aadSMathieu Desnoyers return ret;
507c5f58bd5SMathieu Desnoyers atomic_or(MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY,
508c5f58bd5SMathieu Desnoyers &mm->membarrier_state);
50997fb7a0aSIngo Molnar
510c5f58bd5SMathieu Desnoyers return 0;
511c5f58bd5SMathieu Desnoyers }
512c5f58bd5SMathieu Desnoyers
membarrier_register_private_expedited(int flags)51370216e18SMathieu Desnoyers static int membarrier_register_private_expedited(int flags)
514a961e409SMathieu Desnoyers {
515a961e409SMathieu Desnoyers struct task_struct *p = current;
516a961e409SMathieu Desnoyers struct mm_struct *mm = p->mm;
517227a4aadSMathieu Desnoyers int ready_state = MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY,
518227a4aadSMathieu Desnoyers set_state = MEMBARRIER_STATE_PRIVATE_EXPEDITED,
519227a4aadSMathieu Desnoyers ret;
52070216e18SMathieu Desnoyers
5212a36ab71SPeter Oskolkov if (flags == MEMBARRIER_FLAG_SYNC_CORE) {
52270216e18SMathieu Desnoyers if (!IS_ENABLED(CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE))
52370216e18SMathieu Desnoyers return -EINVAL;
524227a4aadSMathieu Desnoyers ready_state =
525227a4aadSMathieu Desnoyers MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY;
5262a36ab71SPeter Oskolkov } else if (flags == MEMBARRIER_FLAG_RSEQ) {
5272a36ab71SPeter Oskolkov if (!IS_ENABLED(CONFIG_RSEQ))
5282a36ab71SPeter Oskolkov return -EINVAL;
5292a36ab71SPeter Oskolkov ready_state =
5302a36ab71SPeter Oskolkov MEMBARRIER_STATE_PRIVATE_EXPEDITED_RSEQ_READY;
5312a36ab71SPeter Oskolkov } else {
5322a36ab71SPeter Oskolkov WARN_ON_ONCE(flags);
53370216e18SMathieu Desnoyers }
534a961e409SMathieu Desnoyers
535a961e409SMathieu Desnoyers /*
536a961e409SMathieu Desnoyers * We need to consider threads belonging to different thread
537a961e409SMathieu Desnoyers * groups, which use the same mm. (CLONE_VM but not
538a961e409SMathieu Desnoyers * CLONE_THREAD).
539a961e409SMathieu Desnoyers */
540227a4aadSMathieu Desnoyers if ((atomic_read(&mm->membarrier_state) & ready_state) == ready_state)
541c5f58bd5SMathieu Desnoyers return 0;
54270216e18SMathieu Desnoyers if (flags & MEMBARRIER_FLAG_SYNC_CORE)
543227a4aadSMathieu Desnoyers set_state |= MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE;
5442a36ab71SPeter Oskolkov if (flags & MEMBARRIER_FLAG_RSEQ)
5452a36ab71SPeter Oskolkov set_state |= MEMBARRIER_STATE_PRIVATE_EXPEDITED_RSEQ;
546227a4aadSMathieu Desnoyers atomic_or(set_state, &mm->membarrier_state);
547227a4aadSMathieu Desnoyers ret = sync_runqueues_membarrier_state(mm);
548227a4aadSMathieu Desnoyers if (ret)
549227a4aadSMathieu Desnoyers return ret;
550227a4aadSMathieu Desnoyers atomic_or(ready_state, &mm->membarrier_state);
55197fb7a0aSIngo Molnar
552c5f58bd5SMathieu Desnoyers return 0;
55322e4ebb9SMathieu Desnoyers }
55422e4ebb9SMathieu Desnoyers
membarrier_get_registrations(void)555544a4f2eSMichal Clapinski static int membarrier_get_registrations(void)
556544a4f2eSMichal Clapinski {
557544a4f2eSMichal Clapinski struct task_struct *p = current;
558544a4f2eSMichal Clapinski struct mm_struct *mm = p->mm;
559544a4f2eSMichal Clapinski int registrations_mask = 0, membarrier_state, i;
560544a4f2eSMichal Clapinski static const int states[] = {
561544a4f2eSMichal Clapinski MEMBARRIER_STATE_GLOBAL_EXPEDITED |
562544a4f2eSMichal Clapinski MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY,
563544a4f2eSMichal Clapinski MEMBARRIER_STATE_PRIVATE_EXPEDITED |
564544a4f2eSMichal Clapinski MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY,
565544a4f2eSMichal Clapinski MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE |
566544a4f2eSMichal Clapinski MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY,
567544a4f2eSMichal Clapinski MEMBARRIER_STATE_PRIVATE_EXPEDITED_RSEQ |
568544a4f2eSMichal Clapinski MEMBARRIER_STATE_PRIVATE_EXPEDITED_RSEQ_READY
569544a4f2eSMichal Clapinski };
570544a4f2eSMichal Clapinski static const int registration_cmds[] = {
571544a4f2eSMichal Clapinski MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED,
572544a4f2eSMichal Clapinski MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED,
573544a4f2eSMichal Clapinski MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE,
574544a4f2eSMichal Clapinski MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ
575544a4f2eSMichal Clapinski };
576544a4f2eSMichal Clapinski BUILD_BUG_ON(ARRAY_SIZE(states) != ARRAY_SIZE(registration_cmds));
577544a4f2eSMichal Clapinski
578544a4f2eSMichal Clapinski membarrier_state = atomic_read(&mm->membarrier_state);
579544a4f2eSMichal Clapinski for (i = 0; i < ARRAY_SIZE(states); ++i) {
580544a4f2eSMichal Clapinski if (membarrier_state & states[i]) {
581544a4f2eSMichal Clapinski registrations_mask |= registration_cmds[i];
582544a4f2eSMichal Clapinski membarrier_state &= ~states[i];
583544a4f2eSMichal Clapinski }
584544a4f2eSMichal Clapinski }
585544a4f2eSMichal Clapinski WARN_ON_ONCE(membarrier_state != 0);
586544a4f2eSMichal Clapinski return registrations_mask;
587544a4f2eSMichal Clapinski }
588544a4f2eSMichal Clapinski
58922e4ebb9SMathieu Desnoyers /**
59022e4ebb9SMathieu Desnoyers * sys_membarrier - issue memory barriers on a set of threads
59122e4ebb9SMathieu Desnoyers * @cmd: Takes command values defined in enum membarrier_cmd.
5922a36ab71SPeter Oskolkov * @flags: Currently needs to be 0 for all commands other than
5932a36ab71SPeter Oskolkov * MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ: in the latter
5942a36ab71SPeter Oskolkov * case it can be MEMBARRIER_CMD_FLAG_CPU, indicating that @cpu_id
5952a36ab71SPeter Oskolkov * contains the CPU on which to interrupt (= restart)
5962a36ab71SPeter Oskolkov * the RSEQ critical section.
5972a36ab71SPeter Oskolkov * @cpu_id: if @flags == MEMBARRIER_CMD_FLAG_CPU, indicates the cpu on which
5982a36ab71SPeter Oskolkov * RSEQ CS should be interrupted (@cmd must be
5992a36ab71SPeter Oskolkov * MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ).
60022e4ebb9SMathieu Desnoyers *
60122e4ebb9SMathieu Desnoyers * If this system call is not implemented, -ENOSYS is returned. If the
60222e4ebb9SMathieu Desnoyers * command specified does not exist, not available on the running
60322e4ebb9SMathieu Desnoyers * kernel, or if the command argument is invalid, this system call
60422e4ebb9SMathieu Desnoyers * returns -EINVAL. For a given command, with flags argument set to 0,
605227a4aadSMathieu Desnoyers * if this system call returns -ENOSYS or -EINVAL, it is guaranteed to
606227a4aadSMathieu Desnoyers * always return the same value until reboot. In addition, it can return
607227a4aadSMathieu Desnoyers * -ENOMEM if there is not enough memory available to perform the system
608227a4aadSMathieu Desnoyers * call.
60922e4ebb9SMathieu Desnoyers *
61022e4ebb9SMathieu Desnoyers * All memory accesses performed in program order from each targeted thread
61122e4ebb9SMathieu Desnoyers * is guaranteed to be ordered with respect to sys_membarrier(). If we use
61222e4ebb9SMathieu Desnoyers * the semantic "barrier()" to represent a compiler barrier forcing memory
61322e4ebb9SMathieu Desnoyers * accesses to be performed in program order across the barrier, and
61422e4ebb9SMathieu Desnoyers * smp_mb() to represent explicit memory barriers forcing full memory
61522e4ebb9SMathieu Desnoyers * ordering across the barrier, we have the following ordering table for
61622e4ebb9SMathieu Desnoyers * each pair of barrier(), sys_membarrier() and smp_mb():
61722e4ebb9SMathieu Desnoyers *
61822e4ebb9SMathieu Desnoyers * The pair ordering is detailed as (O: ordered, X: not ordered):
61922e4ebb9SMathieu Desnoyers *
62022e4ebb9SMathieu Desnoyers * barrier() smp_mb() sys_membarrier()
62122e4ebb9SMathieu Desnoyers * barrier() X X O
62222e4ebb9SMathieu Desnoyers * smp_mb() X O O
62322e4ebb9SMathieu Desnoyers * sys_membarrier() O O O
62422e4ebb9SMathieu Desnoyers */
SYSCALL_DEFINE3(membarrier,int,cmd,unsigned int,flags,int,cpu_id)6252a36ab71SPeter Oskolkov SYSCALL_DEFINE3(membarrier, int, cmd, unsigned int, flags, int, cpu_id)
62622e4ebb9SMathieu Desnoyers {
6272a36ab71SPeter Oskolkov switch (cmd) {
6282a36ab71SPeter Oskolkov case MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ:
6292a36ab71SPeter Oskolkov if (unlikely(flags && flags != MEMBARRIER_CMD_FLAG_CPU))
6302a36ab71SPeter Oskolkov return -EINVAL;
6312a36ab71SPeter Oskolkov break;
6322a36ab71SPeter Oskolkov default:
63322e4ebb9SMathieu Desnoyers if (unlikely(flags))
63422e4ebb9SMathieu Desnoyers return -EINVAL;
6352a36ab71SPeter Oskolkov }
6362a36ab71SPeter Oskolkov
6372a36ab71SPeter Oskolkov if (!(flags & MEMBARRIER_CMD_FLAG_CPU))
6382a36ab71SPeter Oskolkov cpu_id = -1;
6392a36ab71SPeter Oskolkov
64022e4ebb9SMathieu Desnoyers switch (cmd) {
64122e4ebb9SMathieu Desnoyers case MEMBARRIER_CMD_QUERY:
64222e4ebb9SMathieu Desnoyers {
64322e4ebb9SMathieu Desnoyers int cmd_mask = MEMBARRIER_CMD_BITMASK;
64422e4ebb9SMathieu Desnoyers
64522e4ebb9SMathieu Desnoyers if (tick_nohz_full_enabled())
646c5f58bd5SMathieu Desnoyers cmd_mask &= ~MEMBARRIER_CMD_GLOBAL;
64722e4ebb9SMathieu Desnoyers return cmd_mask;
64822e4ebb9SMathieu Desnoyers }
649c5f58bd5SMathieu Desnoyers case MEMBARRIER_CMD_GLOBAL:
650c5f58bd5SMathieu Desnoyers /* MEMBARRIER_CMD_GLOBAL is not compatible with nohz_full. */
65122e4ebb9SMathieu Desnoyers if (tick_nohz_full_enabled())
65222e4ebb9SMathieu Desnoyers return -EINVAL;
65322e4ebb9SMathieu Desnoyers if (num_online_cpus() > 1)
65478d125d3SPaul E. McKenney synchronize_rcu();
65522e4ebb9SMathieu Desnoyers return 0;
656c5f58bd5SMathieu Desnoyers case MEMBARRIER_CMD_GLOBAL_EXPEDITED:
657c5f58bd5SMathieu Desnoyers return membarrier_global_expedited();
658c5f58bd5SMathieu Desnoyers case MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED:
659c5f58bd5SMathieu Desnoyers return membarrier_register_global_expedited();
66022e4ebb9SMathieu Desnoyers case MEMBARRIER_CMD_PRIVATE_EXPEDITED:
6612a36ab71SPeter Oskolkov return membarrier_private_expedited(0, cpu_id);
662a961e409SMathieu Desnoyers case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED:
66370216e18SMathieu Desnoyers return membarrier_register_private_expedited(0);
66470216e18SMathieu Desnoyers case MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE:
6652a36ab71SPeter Oskolkov return membarrier_private_expedited(MEMBARRIER_FLAG_SYNC_CORE, cpu_id);
66670216e18SMathieu Desnoyers case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE:
66770216e18SMathieu Desnoyers return membarrier_register_private_expedited(MEMBARRIER_FLAG_SYNC_CORE);
6682a36ab71SPeter Oskolkov case MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ:
6692a36ab71SPeter Oskolkov return membarrier_private_expedited(MEMBARRIER_FLAG_RSEQ, cpu_id);
6702a36ab71SPeter Oskolkov case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ:
6712a36ab71SPeter Oskolkov return membarrier_register_private_expedited(MEMBARRIER_FLAG_RSEQ);
672544a4f2eSMichal Clapinski case MEMBARRIER_CMD_GET_REGISTRATIONS:
673544a4f2eSMichal Clapinski return membarrier_get_registrations();
67422e4ebb9SMathieu Desnoyers default:
67522e4ebb9SMathieu Desnoyers return -EINVAL;
67622e4ebb9SMathieu Desnoyers }
67722e4ebb9SMathieu Desnoyers }
678