1 /* 2 * Copyright (C) 2010-2017 Mathieu Desnoyers <mathieu.desnoyers@efficios.com> 3 * 4 * membarrier system call 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 */ 16 17 #include <linux/syscalls.h> 18 #include <linux/membarrier.h> 19 #include <linux/tick.h> 20 #include <linux/cpumask.h> 21 #include <linux/atomic.h> 22 23 #include "sched.h" /* for cpu_rq(). */ 24 25 /* 26 * Bitmask made from a "or" of all commands within enum membarrier_cmd, 27 * except MEMBARRIER_CMD_QUERY. 28 */ 29 #define MEMBARRIER_CMD_BITMASK \ 30 (MEMBARRIER_CMD_GLOBAL | MEMBARRIER_CMD_GLOBAL_EXPEDITED \ 31 | MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED \ 32 | MEMBARRIER_CMD_PRIVATE_EXPEDITED \ 33 | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED) 34 35 static void ipi_mb(void *info) 36 { 37 smp_mb(); /* IPIs should be serializing but paranoid. */ 38 } 39 40 static int membarrier_global_expedited(void) 41 { 42 int cpu; 43 bool fallback = false; 44 cpumask_var_t tmpmask; 45 46 if (num_online_cpus() == 1) 47 return 0; 48 49 /* 50 * Matches memory barriers around rq->curr modification in 51 * scheduler. 52 */ 53 smp_mb(); /* system call entry is not a mb. */ 54 55 /* 56 * Expedited membarrier commands guarantee that they won't 57 * block, hence the GFP_NOWAIT allocation flag and fallback 58 * implementation. 59 */ 60 if (!zalloc_cpumask_var(&tmpmask, GFP_NOWAIT)) { 61 /* Fallback for OOM. */ 62 fallback = true; 63 } 64 65 cpus_read_lock(); 66 for_each_online_cpu(cpu) { 67 struct task_struct *p; 68 69 /* 70 * Skipping the current CPU is OK even through we can be 71 * migrated at any point. The current CPU, at the point 72 * where we read raw_smp_processor_id(), is ensured to 73 * be in program order with respect to the caller 74 * thread. Therefore, we can skip this CPU from the 75 * iteration. 76 */ 77 if (cpu == raw_smp_processor_id()) 78 continue; 79 rcu_read_lock(); 80 p = task_rcu_dereference(&cpu_rq(cpu)->curr); 81 if (p && p->mm && (atomic_read(&p->mm->membarrier_state) & 82 MEMBARRIER_STATE_GLOBAL_EXPEDITED)) { 83 if (!fallback) 84 __cpumask_set_cpu(cpu, tmpmask); 85 else 86 smp_call_function_single(cpu, ipi_mb, NULL, 1); 87 } 88 rcu_read_unlock(); 89 } 90 if (!fallback) { 91 preempt_disable(); 92 smp_call_function_many(tmpmask, ipi_mb, NULL, 1); 93 preempt_enable(); 94 free_cpumask_var(tmpmask); 95 } 96 cpus_read_unlock(); 97 98 /* 99 * Memory barrier on the caller thread _after_ we finished 100 * waiting for the last IPI. Matches memory barriers around 101 * rq->curr modification in scheduler. 102 */ 103 smp_mb(); /* exit from system call is not a mb */ 104 return 0; 105 } 106 107 static int membarrier_private_expedited(void) 108 { 109 int cpu; 110 bool fallback = false; 111 cpumask_var_t tmpmask; 112 113 if (!(atomic_read(¤t->mm->membarrier_state) 114 & MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY)) 115 return -EPERM; 116 117 if (num_online_cpus() == 1) 118 return 0; 119 120 /* 121 * Matches memory barriers around rq->curr modification in 122 * scheduler. 123 */ 124 smp_mb(); /* system call entry is not a mb. */ 125 126 /* 127 * Expedited membarrier commands guarantee that they won't 128 * block, hence the GFP_NOWAIT allocation flag and fallback 129 * implementation. 130 */ 131 if (!zalloc_cpumask_var(&tmpmask, GFP_NOWAIT)) { 132 /* Fallback for OOM. */ 133 fallback = true; 134 } 135 136 cpus_read_lock(); 137 for_each_online_cpu(cpu) { 138 struct task_struct *p; 139 140 /* 141 * Skipping the current CPU is OK even through we can be 142 * migrated at any point. The current CPU, at the point 143 * where we read raw_smp_processor_id(), is ensured to 144 * be in program order with respect to the caller 145 * thread. Therefore, we can skip this CPU from the 146 * iteration. 147 */ 148 if (cpu == raw_smp_processor_id()) 149 continue; 150 rcu_read_lock(); 151 p = task_rcu_dereference(&cpu_rq(cpu)->curr); 152 if (p && p->mm == current->mm) { 153 if (!fallback) 154 __cpumask_set_cpu(cpu, tmpmask); 155 else 156 smp_call_function_single(cpu, ipi_mb, NULL, 1); 157 } 158 rcu_read_unlock(); 159 } 160 if (!fallback) { 161 preempt_disable(); 162 smp_call_function_many(tmpmask, ipi_mb, NULL, 1); 163 preempt_enable(); 164 free_cpumask_var(tmpmask); 165 } 166 cpus_read_unlock(); 167 168 /* 169 * Memory barrier on the caller thread _after_ we finished 170 * waiting for the last IPI. Matches memory barriers around 171 * rq->curr modification in scheduler. 172 */ 173 smp_mb(); /* exit from system call is not a mb */ 174 return 0; 175 } 176 177 static int membarrier_register_global_expedited(void) 178 { 179 struct task_struct *p = current; 180 struct mm_struct *mm = p->mm; 181 182 if (atomic_read(&mm->membarrier_state) & 183 MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY) 184 return 0; 185 atomic_or(MEMBARRIER_STATE_GLOBAL_EXPEDITED, &mm->membarrier_state); 186 if (atomic_read(&mm->mm_users) == 1 && get_nr_threads(p) == 1) { 187 /* 188 * For single mm user, single threaded process, we can 189 * simply issue a memory barrier after setting 190 * MEMBARRIER_STATE_GLOBAL_EXPEDITED to guarantee that 191 * no memory access following registration is reordered 192 * before registration. 193 */ 194 smp_mb(); 195 } else { 196 /* 197 * For multi-mm user threads, we need to ensure all 198 * future scheduler executions will observe the new 199 * thread flag state for this mm. 200 */ 201 synchronize_sched(); 202 } 203 atomic_or(MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY, 204 &mm->membarrier_state); 205 return 0; 206 } 207 208 static int membarrier_register_private_expedited(void) 209 { 210 struct task_struct *p = current; 211 struct mm_struct *mm = p->mm; 212 213 /* 214 * We need to consider threads belonging to different thread 215 * groups, which use the same mm. (CLONE_VM but not 216 * CLONE_THREAD). 217 */ 218 if (atomic_read(&mm->membarrier_state) 219 & MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY) 220 return 0; 221 atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED, &mm->membarrier_state); 222 if (!(atomic_read(&mm->mm_users) == 1 && get_nr_threads(p) == 1)) { 223 /* 224 * Ensure all future scheduler executions will observe the 225 * new thread flag state for this process. 226 */ 227 synchronize_sched(); 228 } 229 atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY, 230 &mm->membarrier_state); 231 return 0; 232 } 233 234 /** 235 * sys_membarrier - issue memory barriers on a set of threads 236 * @cmd: Takes command values defined in enum membarrier_cmd. 237 * @flags: Currently needs to be 0. For future extensions. 238 * 239 * If this system call is not implemented, -ENOSYS is returned. If the 240 * command specified does not exist, not available on the running 241 * kernel, or if the command argument is invalid, this system call 242 * returns -EINVAL. For a given command, with flags argument set to 0, 243 * this system call is guaranteed to always return the same value until 244 * reboot. 245 * 246 * All memory accesses performed in program order from each targeted thread 247 * is guaranteed to be ordered with respect to sys_membarrier(). If we use 248 * the semantic "barrier()" to represent a compiler barrier forcing memory 249 * accesses to be performed in program order across the barrier, and 250 * smp_mb() to represent explicit memory barriers forcing full memory 251 * ordering across the barrier, we have the following ordering table for 252 * each pair of barrier(), sys_membarrier() and smp_mb(): 253 * 254 * The pair ordering is detailed as (O: ordered, X: not ordered): 255 * 256 * barrier() smp_mb() sys_membarrier() 257 * barrier() X X O 258 * smp_mb() X O O 259 * sys_membarrier() O O O 260 */ 261 SYSCALL_DEFINE2(membarrier, int, cmd, int, flags) 262 { 263 if (unlikely(flags)) 264 return -EINVAL; 265 switch (cmd) { 266 case MEMBARRIER_CMD_QUERY: 267 { 268 int cmd_mask = MEMBARRIER_CMD_BITMASK; 269 270 if (tick_nohz_full_enabled()) 271 cmd_mask &= ~MEMBARRIER_CMD_GLOBAL; 272 return cmd_mask; 273 } 274 case MEMBARRIER_CMD_GLOBAL: 275 /* MEMBARRIER_CMD_GLOBAL is not compatible with nohz_full. */ 276 if (tick_nohz_full_enabled()) 277 return -EINVAL; 278 if (num_online_cpus() > 1) 279 synchronize_sched(); 280 return 0; 281 case MEMBARRIER_CMD_GLOBAL_EXPEDITED: 282 return membarrier_global_expedited(); 283 case MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED: 284 return membarrier_register_global_expedited(); 285 case MEMBARRIER_CMD_PRIVATE_EXPEDITED: 286 return membarrier_private_expedited(); 287 case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED: 288 return membarrier_register_private_expedited(); 289 default: 290 return -EINVAL; 291 } 292 } 293