1 /* 2 * Generic helpers for smp ipi calls 3 * 4 * (C) Jens Axboe <jens.axboe@oracle.com> 2008 5 * 6 */ 7 #include <linux/init.h> 8 #include <linux/module.h> 9 #include <linux/percpu.h> 10 #include <linux/rcupdate.h> 11 #include <linux/rculist.h> 12 #include <linux/smp.h> 13 14 static DEFINE_PER_CPU(struct call_single_queue, call_single_queue); 15 static LIST_HEAD(call_function_queue); 16 __cacheline_aligned_in_smp DEFINE_SPINLOCK(call_function_lock); 17 18 enum { 19 CSD_FLAG_WAIT = 0x01, 20 CSD_FLAG_ALLOC = 0x02, 21 CSD_FLAG_LOCK = 0x04, 22 }; 23 24 struct call_function_data { 25 struct call_single_data csd; 26 spinlock_t lock; 27 unsigned int refs; 28 struct rcu_head rcu_head; 29 unsigned long cpumask_bits[]; 30 }; 31 32 struct call_single_queue { 33 struct list_head list; 34 spinlock_t lock; 35 }; 36 37 static int __cpuinit init_call_single_data(void) 38 { 39 int i; 40 41 for_each_possible_cpu(i) { 42 struct call_single_queue *q = &per_cpu(call_single_queue, i); 43 44 spin_lock_init(&q->lock); 45 INIT_LIST_HEAD(&q->list); 46 } 47 return 0; 48 } 49 early_initcall(init_call_single_data); 50 51 static void csd_flag_wait(struct call_single_data *data) 52 { 53 /* Wait for response */ 54 do { 55 if (!(data->flags & CSD_FLAG_WAIT)) 56 break; 57 cpu_relax(); 58 } while (1); 59 } 60 61 /* 62 * Insert a previously allocated call_single_data element for execution 63 * on the given CPU. data must already have ->func, ->info, and ->flags set. 64 */ 65 static void generic_exec_single(int cpu, struct call_single_data *data) 66 { 67 struct call_single_queue *dst = &per_cpu(call_single_queue, cpu); 68 int wait = data->flags & CSD_FLAG_WAIT, ipi; 69 unsigned long flags; 70 71 spin_lock_irqsave(&dst->lock, flags); 72 ipi = list_empty(&dst->list); 73 list_add_tail(&data->list, &dst->list); 74 spin_unlock_irqrestore(&dst->lock, flags); 75 76 /* 77 * Make the list addition visible before sending the ipi. 78 */ 79 smp_mb(); 80 81 if (ipi) 82 arch_send_call_function_single_ipi(cpu); 83 84 if (wait) 85 csd_flag_wait(data); 86 } 87 88 static void rcu_free_call_data(struct rcu_head *head) 89 { 90 struct call_function_data *data; 91 92 data = container_of(head, struct call_function_data, rcu_head); 93 94 kfree(data); 95 } 96 97 /* 98 * Invoked by arch to handle an IPI for call function. Must be called with 99 * interrupts disabled. 100 */ 101 void generic_smp_call_function_interrupt(void) 102 { 103 struct call_function_data *data; 104 int cpu = get_cpu(); 105 106 /* 107 * It's ok to use list_for_each_rcu() here even though we may delete 108 * 'pos', since list_del_rcu() doesn't clear ->next 109 */ 110 rcu_read_lock(); 111 list_for_each_entry_rcu(data, &call_function_queue, csd.list) { 112 int refs; 113 114 if (!cpumask_test_cpu(cpu, to_cpumask(data->cpumask_bits))) 115 continue; 116 117 data->csd.func(data->csd.info); 118 119 spin_lock(&data->lock); 120 cpumask_clear_cpu(cpu, to_cpumask(data->cpumask_bits)); 121 WARN_ON(data->refs == 0); 122 data->refs--; 123 refs = data->refs; 124 spin_unlock(&data->lock); 125 126 if (refs) 127 continue; 128 129 spin_lock(&call_function_lock); 130 list_del_rcu(&data->csd.list); 131 spin_unlock(&call_function_lock); 132 133 if (data->csd.flags & CSD_FLAG_WAIT) { 134 /* 135 * serialize stores to data with the flag clear 136 * and wakeup 137 */ 138 smp_wmb(); 139 data->csd.flags &= ~CSD_FLAG_WAIT; 140 } 141 if (data->csd.flags & CSD_FLAG_ALLOC) 142 call_rcu(&data->rcu_head, rcu_free_call_data); 143 } 144 rcu_read_unlock(); 145 146 put_cpu(); 147 } 148 149 /* 150 * Invoked by arch to handle an IPI for call function single. Must be called 151 * from the arch with interrupts disabled. 152 */ 153 void generic_smp_call_function_single_interrupt(void) 154 { 155 struct call_single_queue *q = &__get_cpu_var(call_single_queue); 156 LIST_HEAD(list); 157 158 /* 159 * Need to see other stores to list head for checking whether 160 * list is empty without holding q->lock 161 */ 162 smp_read_barrier_depends(); 163 while (!list_empty(&q->list)) { 164 unsigned int data_flags; 165 166 spin_lock(&q->lock); 167 list_replace_init(&q->list, &list); 168 spin_unlock(&q->lock); 169 170 while (!list_empty(&list)) { 171 struct call_single_data *data; 172 173 data = list_entry(list.next, struct call_single_data, 174 list); 175 list_del(&data->list); 176 177 /* 178 * 'data' can be invalid after this call if 179 * flags == 0 (when called through 180 * generic_exec_single(), so save them away before 181 * making the call. 182 */ 183 data_flags = data->flags; 184 185 data->func(data->info); 186 187 if (data_flags & CSD_FLAG_WAIT) { 188 smp_wmb(); 189 data->flags &= ~CSD_FLAG_WAIT; 190 } else if (data_flags & CSD_FLAG_LOCK) { 191 smp_wmb(); 192 data->flags &= ~CSD_FLAG_LOCK; 193 } else if (data_flags & CSD_FLAG_ALLOC) 194 kfree(data); 195 } 196 /* 197 * See comment on outer loop 198 */ 199 smp_read_barrier_depends(); 200 } 201 } 202 203 static DEFINE_PER_CPU(struct call_single_data, csd_data); 204 205 /* 206 * smp_call_function_single - Run a function on a specific CPU 207 * @func: The function to run. This must be fast and non-blocking. 208 * @info: An arbitrary pointer to pass to the function. 209 * @wait: If true, wait until function has completed on other CPUs. 210 * 211 * Returns 0 on success, else a negative status code. Note that @wait 212 * will be implicitly turned on in case of allocation failures, since 213 * we fall back to on-stack allocation. 214 */ 215 int smp_call_function_single(int cpu, void (*func) (void *info), void *info, 216 int wait) 217 { 218 struct call_single_data d; 219 unsigned long flags; 220 /* prevent preemption and reschedule on another processor, 221 as well as CPU removal */ 222 int me = get_cpu(); 223 int err = 0; 224 225 /* Can deadlock when called with interrupts disabled */ 226 WARN_ON(irqs_disabled()); 227 228 if (cpu == me) { 229 local_irq_save(flags); 230 func(info); 231 local_irq_restore(flags); 232 } else if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) { 233 struct call_single_data *data; 234 235 if (!wait) { 236 /* 237 * We are calling a function on a single CPU 238 * and we are not going to wait for it to finish. 239 * We first try to allocate the data, but if we 240 * fail, we fall back to use a per cpu data to pass 241 * the information to that CPU. Since all callers 242 * of this code will use the same data, we must 243 * synchronize the callers to prevent a new caller 244 * from corrupting the data before the callee 245 * can access it. 246 * 247 * The CSD_FLAG_LOCK is used to let us know when 248 * the IPI handler is done with the data. 249 * The first caller will set it, and the callee 250 * will clear it. The next caller must wait for 251 * it to clear before we set it again. This 252 * will make sure the callee is done with the 253 * data before a new caller will use it. 254 */ 255 data = kmalloc(sizeof(*data), GFP_ATOMIC); 256 if (data) 257 data->flags = CSD_FLAG_ALLOC; 258 else { 259 data = &per_cpu(csd_data, me); 260 while (data->flags & CSD_FLAG_LOCK) 261 cpu_relax(); 262 data->flags = CSD_FLAG_LOCK; 263 } 264 } else { 265 data = &d; 266 data->flags = CSD_FLAG_WAIT; 267 } 268 269 data->func = func; 270 data->info = info; 271 generic_exec_single(cpu, data); 272 } else { 273 err = -ENXIO; /* CPU not online */ 274 } 275 276 put_cpu(); 277 return err; 278 } 279 EXPORT_SYMBOL(smp_call_function_single); 280 281 /** 282 * __smp_call_function_single(): Run a function on another CPU 283 * @cpu: The CPU to run on. 284 * @data: Pre-allocated and setup data structure 285 * 286 * Like smp_call_function_single(), but allow caller to pass in a pre-allocated 287 * data structure. Useful for embedding @data inside other structures, for 288 * instance. 289 * 290 */ 291 void __smp_call_function_single(int cpu, struct call_single_data *data) 292 { 293 /* Can deadlock when called with interrupts disabled */ 294 WARN_ON((data->flags & CSD_FLAG_WAIT) && irqs_disabled()); 295 296 generic_exec_single(cpu, data); 297 } 298 299 /* FIXME: Shim for archs using old arch_send_call_function_ipi API. */ 300 #ifndef arch_send_call_function_ipi_mask 301 #define arch_send_call_function_ipi_mask(maskp) \ 302 arch_send_call_function_ipi(*(maskp)) 303 #endif 304 305 /** 306 * smp_call_function_many(): Run a function on a set of other CPUs. 307 * @mask: The set of cpus to run on (only runs on online subset). 308 * @func: The function to run. This must be fast and non-blocking. 309 * @info: An arbitrary pointer to pass to the function. 310 * @wait: If true, wait (atomically) until function has completed on other CPUs. 311 * 312 * If @wait is true, then returns once @func has returned. Note that @wait 313 * will be implicitly turned on in case of allocation failures, since 314 * we fall back to on-stack allocation. 315 * 316 * You must not call this function with disabled interrupts or from a 317 * hardware interrupt handler or from a bottom half handler. Preemption 318 * must be disabled when calling this function. 319 */ 320 void smp_call_function_many(const struct cpumask *mask, 321 void (*func)(void *), void *info, 322 bool wait) 323 { 324 struct call_function_data *data; 325 unsigned long flags; 326 int cpu, next_cpu; 327 328 /* Can deadlock when called with interrupts disabled */ 329 WARN_ON(irqs_disabled()); 330 331 /* So, what's a CPU they want? Ignoring this one. */ 332 cpu = cpumask_first_and(mask, cpu_online_mask); 333 if (cpu == smp_processor_id()) 334 cpu = cpumask_next_and(cpu, mask, cpu_online_mask); 335 /* No online cpus? We're done. */ 336 if (cpu >= nr_cpu_ids) 337 return; 338 339 /* Do we have another CPU which isn't us? */ 340 next_cpu = cpumask_next_and(cpu, mask, cpu_online_mask); 341 if (next_cpu == smp_processor_id()) 342 next_cpu = cpumask_next_and(next_cpu, mask, cpu_online_mask); 343 344 /* Fastpath: do that cpu by itself. */ 345 if (next_cpu >= nr_cpu_ids) { 346 smp_call_function_single(cpu, func, info, wait); 347 return; 348 } 349 350 data = kmalloc(sizeof(*data) + cpumask_size(), GFP_ATOMIC); 351 if (unlikely(!data)) { 352 /* Slow path. */ 353 for_each_online_cpu(cpu) { 354 if (cpu == smp_processor_id()) 355 continue; 356 if (cpumask_test_cpu(cpu, mask)) 357 smp_call_function_single(cpu, func, info, wait); 358 } 359 return; 360 } 361 362 spin_lock_init(&data->lock); 363 data->csd.flags = CSD_FLAG_ALLOC; 364 if (wait) 365 data->csd.flags |= CSD_FLAG_WAIT; 366 data->csd.func = func; 367 data->csd.info = info; 368 cpumask_and(to_cpumask(data->cpumask_bits), mask, cpu_online_mask); 369 cpumask_clear_cpu(smp_processor_id(), to_cpumask(data->cpumask_bits)); 370 data->refs = cpumask_weight(to_cpumask(data->cpumask_bits)); 371 372 spin_lock_irqsave(&call_function_lock, flags); 373 list_add_tail_rcu(&data->csd.list, &call_function_queue); 374 spin_unlock_irqrestore(&call_function_lock, flags); 375 376 /* 377 * Make the list addition visible before sending the ipi. 378 */ 379 smp_mb(); 380 381 /* Send a message to all CPUs in the map */ 382 arch_send_call_function_ipi_mask(to_cpumask(data->cpumask_bits)); 383 384 /* optionally wait for the CPUs to complete */ 385 if (wait) 386 csd_flag_wait(&data->csd); 387 } 388 EXPORT_SYMBOL(smp_call_function_many); 389 390 /** 391 * smp_call_function(): Run a function on all other CPUs. 392 * @func: The function to run. This must be fast and non-blocking. 393 * @info: An arbitrary pointer to pass to the function. 394 * @wait: If true, wait (atomically) until function has completed on other CPUs. 395 * 396 * Returns 0. 397 * 398 * If @wait is true, then returns once @func has returned; otherwise 399 * it returns just before the target cpu calls @func. In case of allocation 400 * failure, @wait will be implicitly turned on. 401 * 402 * You must not call this function with disabled interrupts or from a 403 * hardware interrupt handler or from a bottom half handler. 404 */ 405 int smp_call_function(void (*func)(void *), void *info, int wait) 406 { 407 preempt_disable(); 408 smp_call_function_many(cpu_online_mask, func, info, wait); 409 preempt_enable(); 410 return 0; 411 } 412 EXPORT_SYMBOL(smp_call_function); 413 414 void ipi_call_lock(void) 415 { 416 spin_lock(&call_function_lock); 417 } 418 419 void ipi_call_unlock(void) 420 { 421 spin_unlock(&call_function_lock); 422 } 423 424 void ipi_call_lock_irq(void) 425 { 426 spin_lock_irq(&call_function_lock); 427 } 428 429 void ipi_call_unlock_irq(void) 430 { 431 spin_unlock_irq(&call_function_lock); 432 } 433