1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Generic helpers for smp ipi calls 4 * 5 * (C) Jens Axboe <jens.axboe@oracle.com> 2008 6 */ 7 8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 9 10 #include <linux/irq_work.h> 11 #include <linux/rcupdate.h> 12 #include <linux/rculist.h> 13 #include <linux/kernel.h> 14 #include <linux/export.h> 15 #include <linux/percpu.h> 16 #include <linux/init.h> 17 #include <linux/gfp.h> 18 #include <linux/smp.h> 19 #include <linux/cpu.h> 20 #include <linux/sched.h> 21 #include <linux/sched/idle.h> 22 #include <linux/hypervisor.h> 23 #include <linux/sched/clock.h> 24 #include <linux/nmi.h> 25 #include <linux/sched/debug.h> 26 27 #include "smpboot.h" 28 #include "sched/smp.h" 29 30 #define CSD_TYPE(_csd) ((_csd)->node.u_flags & CSD_FLAG_TYPE_MASK) 31 32 struct call_function_data { 33 call_single_data_t __percpu *csd; 34 cpumask_var_t cpumask; 35 cpumask_var_t cpumask_ipi; 36 }; 37 38 static DEFINE_PER_CPU_ALIGNED(struct call_function_data, cfd_data); 39 40 static DEFINE_PER_CPU_SHARED_ALIGNED(struct llist_head, call_single_queue); 41 42 static void flush_smp_call_function_queue(bool warn_cpu_offline); 43 44 int smpcfd_prepare_cpu(unsigned int cpu) 45 { 46 struct call_function_data *cfd = &per_cpu(cfd_data, cpu); 47 48 if (!zalloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL, 49 cpu_to_node(cpu))) 50 return -ENOMEM; 51 if (!zalloc_cpumask_var_node(&cfd->cpumask_ipi, GFP_KERNEL, 52 cpu_to_node(cpu))) { 53 free_cpumask_var(cfd->cpumask); 54 return -ENOMEM; 55 } 56 cfd->csd = alloc_percpu(call_single_data_t); 57 if (!cfd->csd) { 58 free_cpumask_var(cfd->cpumask); 59 free_cpumask_var(cfd->cpumask_ipi); 60 return -ENOMEM; 61 } 62 63 return 0; 64 } 65 66 int smpcfd_dead_cpu(unsigned int cpu) 67 { 68 struct call_function_data *cfd = &per_cpu(cfd_data, cpu); 69 70 free_cpumask_var(cfd->cpumask); 71 free_cpumask_var(cfd->cpumask_ipi); 72 free_percpu(cfd->csd); 73 return 0; 74 } 75 76 int smpcfd_dying_cpu(unsigned int cpu) 77 { 78 /* 79 * The IPIs for the smp-call-function callbacks queued by other 80 * CPUs might arrive late, either due to hardware latencies or 81 * because this CPU disabled interrupts (inside stop-machine) 82 * before the IPIs were sent. So flush out any pending callbacks 83 * explicitly (without waiting for the IPIs to arrive), to 84 * ensure that the outgoing CPU doesn't go offline with work 85 * still pending. 86 */ 87 flush_smp_call_function_queue(false); 88 irq_work_run(); 89 return 0; 90 } 91 92 void __init call_function_init(void) 93 { 94 int i; 95 96 for_each_possible_cpu(i) 97 init_llist_head(&per_cpu(call_single_queue, i)); 98 99 smpcfd_prepare_cpu(smp_processor_id()); 100 } 101 102 #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG 103 104 static DEFINE_PER_CPU(call_single_data_t *, cur_csd); 105 static DEFINE_PER_CPU(smp_call_func_t, cur_csd_func); 106 static DEFINE_PER_CPU(void *, cur_csd_info); 107 108 #define CSD_LOCK_TIMEOUT (5ULL * NSEC_PER_SEC) 109 static atomic_t csd_bug_count = ATOMIC_INIT(0); 110 111 /* Record current CSD work for current CPU, NULL to erase. */ 112 static void csd_lock_record(call_single_data_t *csd) 113 { 114 if (!csd) { 115 smp_mb(); /* NULL cur_csd after unlock. */ 116 __this_cpu_write(cur_csd, NULL); 117 return; 118 } 119 __this_cpu_write(cur_csd_func, csd->func); 120 __this_cpu_write(cur_csd_info, csd->info); 121 smp_wmb(); /* func and info before csd. */ 122 __this_cpu_write(cur_csd, csd); 123 smp_mb(); /* Update cur_csd before function call. */ 124 /* Or before unlock, as the case may be. */ 125 } 126 127 static __always_inline int csd_lock_wait_getcpu(call_single_data_t *csd) 128 { 129 unsigned int csd_type; 130 131 csd_type = CSD_TYPE(csd); 132 if (csd_type == CSD_TYPE_ASYNC || csd_type == CSD_TYPE_SYNC) 133 return csd->node.dst; /* Other CSD_TYPE_ values might not have ->dst. */ 134 return -1; 135 } 136 137 /* 138 * Complain if too much time spent waiting. Note that only 139 * the CSD_TYPE_SYNC/ASYNC types provide the destination CPU, 140 * so waiting on other types gets much less information. 141 */ 142 static __always_inline bool csd_lock_wait_toolong(call_single_data_t *csd, u64 ts0, u64 *ts1, int *bug_id) 143 { 144 int cpu = -1; 145 int cpux; 146 bool firsttime; 147 u64 ts2, ts_delta; 148 call_single_data_t *cpu_cur_csd; 149 unsigned int flags = READ_ONCE(csd->node.u_flags); 150 151 if (!(flags & CSD_FLAG_LOCK)) { 152 if (!unlikely(*bug_id)) 153 return true; 154 cpu = csd_lock_wait_getcpu(csd); 155 pr_alert("csd: CSD lock (#%d) got unstuck on CPU#%02d, CPU#%02d released the lock.\n", 156 *bug_id, raw_smp_processor_id(), cpu); 157 return true; 158 } 159 160 ts2 = sched_clock(); 161 ts_delta = ts2 - *ts1; 162 if (likely(ts_delta <= CSD_LOCK_TIMEOUT)) 163 return false; 164 165 firsttime = !*bug_id; 166 if (firsttime) 167 *bug_id = atomic_inc_return(&csd_bug_count); 168 cpu = csd_lock_wait_getcpu(csd); 169 if (WARN_ONCE(cpu < 0 || cpu >= nr_cpu_ids, "%s: cpu = %d\n", __func__, cpu)) 170 cpux = 0; 171 else 172 cpux = cpu; 173 cpu_cur_csd = smp_load_acquire(&per_cpu(cur_csd, cpux)); /* Before func and info. */ 174 pr_alert("csd: %s non-responsive CSD lock (#%d) on CPU#%d, waiting %llu ns for CPU#%02d %pS(%ps).\n", 175 firsttime ? "Detected" : "Continued", *bug_id, raw_smp_processor_id(), ts2 - ts0, 176 cpu, csd->func, csd->info); 177 if (cpu_cur_csd && csd != cpu_cur_csd) { 178 pr_alert("\tcsd: CSD lock (#%d) handling prior %pS(%ps) request.\n", 179 *bug_id, READ_ONCE(per_cpu(cur_csd_func, cpux)), 180 READ_ONCE(per_cpu(cur_csd_info, cpux))); 181 } else { 182 pr_alert("\tcsd: CSD lock (#%d) %s.\n", 183 *bug_id, !cpu_cur_csd ? "unresponsive" : "handling this request"); 184 } 185 if (cpu >= 0) { 186 if (!trigger_single_cpu_backtrace(cpu)) 187 dump_cpu_task(cpu); 188 if (!cpu_cur_csd) { 189 pr_alert("csd: Re-sending CSD lock (#%d) IPI from CPU#%02d to CPU#%02d\n", *bug_id, raw_smp_processor_id(), cpu); 190 arch_send_call_function_single_ipi(cpu); 191 } 192 } 193 dump_stack(); 194 *ts1 = ts2; 195 196 return false; 197 } 198 199 /* 200 * csd_lock/csd_unlock used to serialize access to per-cpu csd resources 201 * 202 * For non-synchronous ipi calls the csd can still be in use by the 203 * previous function call. For multi-cpu calls its even more interesting 204 * as we'll have to ensure no other cpu is observing our csd. 205 */ 206 static __always_inline void csd_lock_wait(call_single_data_t *csd) 207 { 208 int bug_id = 0; 209 u64 ts0, ts1; 210 211 ts1 = ts0 = sched_clock(); 212 for (;;) { 213 if (csd_lock_wait_toolong(csd, ts0, &ts1, &bug_id)) 214 break; 215 cpu_relax(); 216 } 217 smp_acquire__after_ctrl_dep(); 218 } 219 220 #else 221 static void csd_lock_record(call_single_data_t *csd) 222 { 223 } 224 225 static __always_inline void csd_lock_wait(call_single_data_t *csd) 226 { 227 smp_cond_load_acquire(&csd->node.u_flags, !(VAL & CSD_FLAG_LOCK)); 228 } 229 #endif 230 231 static __always_inline void csd_lock(call_single_data_t *csd) 232 { 233 csd_lock_wait(csd); 234 csd->node.u_flags |= CSD_FLAG_LOCK; 235 236 /* 237 * prevent CPU from reordering the above assignment 238 * to ->flags with any subsequent assignments to other 239 * fields of the specified call_single_data_t structure: 240 */ 241 smp_wmb(); 242 } 243 244 static __always_inline void csd_unlock(call_single_data_t *csd) 245 { 246 WARN_ON(!(csd->node.u_flags & CSD_FLAG_LOCK)); 247 248 /* 249 * ensure we're all done before releasing data: 250 */ 251 smp_store_release(&csd->node.u_flags, 0); 252 } 253 254 static DEFINE_PER_CPU_SHARED_ALIGNED(call_single_data_t, csd_data); 255 256 void __smp_call_single_queue(int cpu, struct llist_node *node) 257 { 258 /* 259 * The list addition should be visible before sending the IPI 260 * handler locks the list to pull the entry off it because of 261 * normal cache coherency rules implied by spinlocks. 262 * 263 * If IPIs can go out of order to the cache coherency protocol 264 * in an architecture, sufficient synchronisation should be added 265 * to arch code to make it appear to obey cache coherency WRT 266 * locking and barrier primitives. Generic code isn't really 267 * equipped to do the right thing... 268 */ 269 if (llist_add(node, &per_cpu(call_single_queue, cpu))) 270 send_call_function_single_ipi(cpu); 271 } 272 273 /* 274 * Insert a previously allocated call_single_data_t element 275 * for execution on the given CPU. data must already have 276 * ->func, ->info, and ->flags set. 277 */ 278 static int generic_exec_single(int cpu, call_single_data_t *csd) 279 { 280 if (cpu == smp_processor_id()) { 281 smp_call_func_t func = csd->func; 282 void *info = csd->info; 283 unsigned long flags; 284 285 /* 286 * We can unlock early even for the synchronous on-stack case, 287 * since we're doing this from the same CPU.. 288 */ 289 csd_lock_record(csd); 290 csd_unlock(csd); 291 local_irq_save(flags); 292 func(info); 293 csd_lock_record(NULL); 294 local_irq_restore(flags); 295 return 0; 296 } 297 298 if ((unsigned)cpu >= nr_cpu_ids || !cpu_online(cpu)) { 299 csd_unlock(csd); 300 return -ENXIO; 301 } 302 303 __smp_call_single_queue(cpu, &csd->node.llist); 304 305 return 0; 306 } 307 308 /** 309 * generic_smp_call_function_single_interrupt - Execute SMP IPI callbacks 310 * 311 * Invoked by arch to handle an IPI for call function single. 312 * Must be called with interrupts disabled. 313 */ 314 void generic_smp_call_function_single_interrupt(void) 315 { 316 flush_smp_call_function_queue(true); 317 } 318 319 /** 320 * flush_smp_call_function_queue - Flush pending smp-call-function callbacks 321 * 322 * @warn_cpu_offline: If set to 'true', warn if callbacks were queued on an 323 * offline CPU. Skip this check if set to 'false'. 324 * 325 * Flush any pending smp-call-function callbacks queued on this CPU. This is 326 * invoked by the generic IPI handler, as well as by a CPU about to go offline, 327 * to ensure that all pending IPI callbacks are run before it goes completely 328 * offline. 329 * 330 * Loop through the call_single_queue and run all the queued callbacks. 331 * Must be called with interrupts disabled. 332 */ 333 static void flush_smp_call_function_queue(bool warn_cpu_offline) 334 { 335 call_single_data_t *csd, *csd_next; 336 struct llist_node *entry, *prev; 337 struct llist_head *head; 338 static bool warned; 339 340 lockdep_assert_irqs_disabled(); 341 342 head = this_cpu_ptr(&call_single_queue); 343 entry = llist_del_all(head); 344 entry = llist_reverse_order(entry); 345 346 /* There shouldn't be any pending callbacks on an offline CPU. */ 347 if (unlikely(warn_cpu_offline && !cpu_online(smp_processor_id()) && 348 !warned && !llist_empty(head))) { 349 warned = true; 350 WARN(1, "IPI on offline CPU %d\n", smp_processor_id()); 351 352 /* 353 * We don't have to use the _safe() variant here 354 * because we are not invoking the IPI handlers yet. 355 */ 356 llist_for_each_entry(csd, entry, node.llist) { 357 switch (CSD_TYPE(csd)) { 358 case CSD_TYPE_ASYNC: 359 case CSD_TYPE_SYNC: 360 case CSD_TYPE_IRQ_WORK: 361 pr_warn("IPI callback %pS sent to offline CPU\n", 362 csd->func); 363 break; 364 365 case CSD_TYPE_TTWU: 366 pr_warn("IPI task-wakeup sent to offline CPU\n"); 367 break; 368 369 default: 370 pr_warn("IPI callback, unknown type %d, sent to offline CPU\n", 371 CSD_TYPE(csd)); 372 break; 373 } 374 } 375 } 376 377 /* 378 * First; run all SYNC callbacks, people are waiting for us. 379 */ 380 prev = NULL; 381 llist_for_each_entry_safe(csd, csd_next, entry, node.llist) { 382 /* Do we wait until *after* callback? */ 383 if (CSD_TYPE(csd) == CSD_TYPE_SYNC) { 384 smp_call_func_t func = csd->func; 385 void *info = csd->info; 386 387 if (prev) { 388 prev->next = &csd_next->node.llist; 389 } else { 390 entry = &csd_next->node.llist; 391 } 392 393 csd_lock_record(csd); 394 func(info); 395 csd_unlock(csd); 396 csd_lock_record(NULL); 397 } else { 398 prev = &csd->node.llist; 399 } 400 } 401 402 if (!entry) 403 return; 404 405 /* 406 * Second; run all !SYNC callbacks. 407 */ 408 prev = NULL; 409 llist_for_each_entry_safe(csd, csd_next, entry, node.llist) { 410 int type = CSD_TYPE(csd); 411 412 if (type != CSD_TYPE_TTWU) { 413 if (prev) { 414 prev->next = &csd_next->node.llist; 415 } else { 416 entry = &csd_next->node.llist; 417 } 418 419 if (type == CSD_TYPE_ASYNC) { 420 smp_call_func_t func = csd->func; 421 void *info = csd->info; 422 423 csd_lock_record(csd); 424 csd_unlock(csd); 425 func(info); 426 csd_lock_record(NULL); 427 } else if (type == CSD_TYPE_IRQ_WORK) { 428 irq_work_single(csd); 429 } 430 431 } else { 432 prev = &csd->node.llist; 433 } 434 } 435 436 /* 437 * Third; only CSD_TYPE_TTWU is left, issue those. 438 */ 439 if (entry) 440 sched_ttwu_pending(entry); 441 } 442 443 void flush_smp_call_function_from_idle(void) 444 { 445 unsigned long flags; 446 447 if (llist_empty(this_cpu_ptr(&call_single_queue))) 448 return; 449 450 local_irq_save(flags); 451 flush_smp_call_function_queue(true); 452 local_irq_restore(flags); 453 } 454 455 /* 456 * smp_call_function_single - Run a function on a specific CPU 457 * @func: The function to run. This must be fast and non-blocking. 458 * @info: An arbitrary pointer to pass to the function. 459 * @wait: If true, wait until function has completed on other CPUs. 460 * 461 * Returns 0 on success, else a negative status code. 462 */ 463 int smp_call_function_single(int cpu, smp_call_func_t func, void *info, 464 int wait) 465 { 466 call_single_data_t *csd; 467 call_single_data_t csd_stack = { 468 .node = { .u_flags = CSD_FLAG_LOCK | CSD_TYPE_SYNC, }, 469 }; 470 int this_cpu; 471 int err; 472 473 /* 474 * prevent preemption and reschedule on another processor, 475 * as well as CPU removal 476 */ 477 this_cpu = get_cpu(); 478 479 /* 480 * Can deadlock when called with interrupts disabled. 481 * We allow cpu's that are not yet online though, as no one else can 482 * send smp call function interrupt to this cpu and as such deadlocks 483 * can't happen. 484 */ 485 WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled() 486 && !oops_in_progress); 487 488 /* 489 * When @wait we can deadlock when we interrupt between llist_add() and 490 * arch_send_call_function_ipi*(); when !@wait we can deadlock due to 491 * csd_lock() on because the interrupt context uses the same csd 492 * storage. 493 */ 494 WARN_ON_ONCE(!in_task()); 495 496 csd = &csd_stack; 497 if (!wait) { 498 csd = this_cpu_ptr(&csd_data); 499 csd_lock(csd); 500 } 501 502 csd->func = func; 503 csd->info = info; 504 #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG 505 csd->node.src = smp_processor_id(); 506 csd->node.dst = cpu; 507 #endif 508 509 err = generic_exec_single(cpu, csd); 510 511 if (wait) 512 csd_lock_wait(csd); 513 514 put_cpu(); 515 516 return err; 517 } 518 EXPORT_SYMBOL(smp_call_function_single); 519 520 /** 521 * smp_call_function_single_async(): Run an asynchronous function on a 522 * specific CPU. 523 * @cpu: The CPU to run on. 524 * @csd: Pre-allocated and setup data structure 525 * 526 * Like smp_call_function_single(), but the call is asynchonous and 527 * can thus be done from contexts with disabled interrupts. 528 * 529 * The caller passes his own pre-allocated data structure 530 * (ie: embedded in an object) and is responsible for synchronizing it 531 * such that the IPIs performed on the @csd are strictly serialized. 532 * 533 * If the function is called with one csd which has not yet been 534 * processed by previous call to smp_call_function_single_async(), the 535 * function will return immediately with -EBUSY showing that the csd 536 * object is still in progress. 537 * 538 * NOTE: Be careful, there is unfortunately no current debugging facility to 539 * validate the correctness of this serialization. 540 */ 541 int smp_call_function_single_async(int cpu, call_single_data_t *csd) 542 { 543 int err = 0; 544 545 preempt_disable(); 546 547 if (csd->node.u_flags & CSD_FLAG_LOCK) { 548 err = -EBUSY; 549 goto out; 550 } 551 552 csd->node.u_flags = CSD_FLAG_LOCK; 553 smp_wmb(); 554 555 err = generic_exec_single(cpu, csd); 556 557 out: 558 preempt_enable(); 559 560 return err; 561 } 562 EXPORT_SYMBOL_GPL(smp_call_function_single_async); 563 564 /* 565 * smp_call_function_any - Run a function on any of the given cpus 566 * @mask: The mask of cpus it can run on. 567 * @func: The function to run. This must be fast and non-blocking. 568 * @info: An arbitrary pointer to pass to the function. 569 * @wait: If true, wait until function has completed. 570 * 571 * Returns 0 on success, else a negative status code (if no cpus were online). 572 * 573 * Selection preference: 574 * 1) current cpu if in @mask 575 * 2) any cpu of current node if in @mask 576 * 3) any other online cpu in @mask 577 */ 578 int smp_call_function_any(const struct cpumask *mask, 579 smp_call_func_t func, void *info, int wait) 580 { 581 unsigned int cpu; 582 const struct cpumask *nodemask; 583 int ret; 584 585 /* Try for same CPU (cheapest) */ 586 cpu = get_cpu(); 587 if (cpumask_test_cpu(cpu, mask)) 588 goto call; 589 590 /* Try for same node. */ 591 nodemask = cpumask_of_node(cpu_to_node(cpu)); 592 for (cpu = cpumask_first_and(nodemask, mask); cpu < nr_cpu_ids; 593 cpu = cpumask_next_and(cpu, nodemask, mask)) { 594 if (cpu_online(cpu)) 595 goto call; 596 } 597 598 /* Any online will do: smp_call_function_single handles nr_cpu_ids. */ 599 cpu = cpumask_any_and(mask, cpu_online_mask); 600 call: 601 ret = smp_call_function_single(cpu, func, info, wait); 602 put_cpu(); 603 return ret; 604 } 605 EXPORT_SYMBOL_GPL(smp_call_function_any); 606 607 static void smp_call_function_many_cond(const struct cpumask *mask, 608 smp_call_func_t func, void *info, 609 bool wait, smp_cond_func_t cond_func) 610 { 611 struct call_function_data *cfd; 612 int cpu, next_cpu, this_cpu = smp_processor_id(); 613 614 /* 615 * Can deadlock when called with interrupts disabled. 616 * We allow cpu's that are not yet online though, as no one else can 617 * send smp call function interrupt to this cpu and as such deadlocks 618 * can't happen. 619 */ 620 WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled() 621 && !oops_in_progress && !early_boot_irqs_disabled); 622 623 /* 624 * When @wait we can deadlock when we interrupt between llist_add() and 625 * arch_send_call_function_ipi*(); when !@wait we can deadlock due to 626 * csd_lock() on because the interrupt context uses the same csd 627 * storage. 628 */ 629 WARN_ON_ONCE(!in_task()); 630 631 /* Try to fastpath. So, what's a CPU they want? Ignoring this one. */ 632 cpu = cpumask_first_and(mask, cpu_online_mask); 633 if (cpu == this_cpu) 634 cpu = cpumask_next_and(cpu, mask, cpu_online_mask); 635 636 /* No online cpus? We're done. */ 637 if (cpu >= nr_cpu_ids) 638 return; 639 640 /* Do we have another CPU which isn't us? */ 641 next_cpu = cpumask_next_and(cpu, mask, cpu_online_mask); 642 if (next_cpu == this_cpu) 643 next_cpu = cpumask_next_and(next_cpu, mask, cpu_online_mask); 644 645 /* Fastpath: do that cpu by itself. */ 646 if (next_cpu >= nr_cpu_ids) { 647 if (!cond_func || cond_func(cpu, info)) 648 smp_call_function_single(cpu, func, info, wait); 649 return; 650 } 651 652 cfd = this_cpu_ptr(&cfd_data); 653 654 cpumask_and(cfd->cpumask, mask, cpu_online_mask); 655 __cpumask_clear_cpu(this_cpu, cfd->cpumask); 656 657 /* Some callers race with other cpus changing the passed mask */ 658 if (unlikely(!cpumask_weight(cfd->cpumask))) 659 return; 660 661 cpumask_clear(cfd->cpumask_ipi); 662 for_each_cpu(cpu, cfd->cpumask) { 663 call_single_data_t *csd = per_cpu_ptr(cfd->csd, cpu); 664 665 if (cond_func && !cond_func(cpu, info)) 666 continue; 667 668 csd_lock(csd); 669 if (wait) 670 csd->node.u_flags |= CSD_TYPE_SYNC; 671 csd->func = func; 672 csd->info = info; 673 #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG 674 csd->node.src = smp_processor_id(); 675 csd->node.dst = cpu; 676 #endif 677 if (llist_add(&csd->node.llist, &per_cpu(call_single_queue, cpu))) 678 __cpumask_set_cpu(cpu, cfd->cpumask_ipi); 679 } 680 681 /* Send a message to all CPUs in the map */ 682 arch_send_call_function_ipi_mask(cfd->cpumask_ipi); 683 684 if (wait) { 685 for_each_cpu(cpu, cfd->cpumask) { 686 call_single_data_t *csd; 687 688 csd = per_cpu_ptr(cfd->csd, cpu); 689 csd_lock_wait(csd); 690 } 691 } 692 } 693 694 /** 695 * smp_call_function_many(): Run a function on a set of other CPUs. 696 * @mask: The set of cpus to run on (only runs on online subset). 697 * @func: The function to run. This must be fast and non-blocking. 698 * @info: An arbitrary pointer to pass to the function. 699 * @wait: If true, wait (atomically) until function has completed 700 * on other CPUs. 701 * 702 * If @wait is true, then returns once @func has returned. 703 * 704 * You must not call this function with disabled interrupts or from a 705 * hardware interrupt handler or from a bottom half handler. Preemption 706 * must be disabled when calling this function. 707 */ 708 void smp_call_function_many(const struct cpumask *mask, 709 smp_call_func_t func, void *info, bool wait) 710 { 711 smp_call_function_many_cond(mask, func, info, wait, NULL); 712 } 713 EXPORT_SYMBOL(smp_call_function_many); 714 715 /** 716 * smp_call_function(): Run a function on all other CPUs. 717 * @func: The function to run. This must be fast and non-blocking. 718 * @info: An arbitrary pointer to pass to the function. 719 * @wait: If true, wait (atomically) until function has completed 720 * on other CPUs. 721 * 722 * Returns 0. 723 * 724 * If @wait is true, then returns once @func has returned; otherwise 725 * it returns just before the target cpu calls @func. 726 * 727 * You must not call this function with disabled interrupts or from a 728 * hardware interrupt handler or from a bottom half handler. 729 */ 730 void smp_call_function(smp_call_func_t func, void *info, int wait) 731 { 732 preempt_disable(); 733 smp_call_function_many(cpu_online_mask, func, info, wait); 734 preempt_enable(); 735 } 736 EXPORT_SYMBOL(smp_call_function); 737 738 /* Setup configured maximum number of CPUs to activate */ 739 unsigned int setup_max_cpus = NR_CPUS; 740 EXPORT_SYMBOL(setup_max_cpus); 741 742 743 /* 744 * Setup routine for controlling SMP activation 745 * 746 * Command-line option of "nosmp" or "maxcpus=0" will disable SMP 747 * activation entirely (the MPS table probe still happens, though). 748 * 749 * Command-line option of "maxcpus=<NUM>", where <NUM> is an integer 750 * greater than 0, limits the maximum number of CPUs activated in 751 * SMP mode to <NUM>. 752 */ 753 754 void __weak arch_disable_smp_support(void) { } 755 756 static int __init nosmp(char *str) 757 { 758 setup_max_cpus = 0; 759 arch_disable_smp_support(); 760 761 return 0; 762 } 763 764 early_param("nosmp", nosmp); 765 766 /* this is hard limit */ 767 static int __init nrcpus(char *str) 768 { 769 int nr_cpus; 770 771 if (get_option(&str, &nr_cpus) && nr_cpus > 0 && nr_cpus < nr_cpu_ids) 772 nr_cpu_ids = nr_cpus; 773 774 return 0; 775 } 776 777 early_param("nr_cpus", nrcpus); 778 779 static int __init maxcpus(char *str) 780 { 781 get_option(&str, &setup_max_cpus); 782 if (setup_max_cpus == 0) 783 arch_disable_smp_support(); 784 785 return 0; 786 } 787 788 early_param("maxcpus", maxcpus); 789 790 /* Setup number of possible processor ids */ 791 unsigned int nr_cpu_ids __read_mostly = NR_CPUS; 792 EXPORT_SYMBOL(nr_cpu_ids); 793 794 /* An arch may set nr_cpu_ids earlier if needed, so this would be redundant */ 795 void __init setup_nr_cpu_ids(void) 796 { 797 nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1; 798 } 799 800 /* Called by boot processor to activate the rest. */ 801 void __init smp_init(void) 802 { 803 int num_nodes, num_cpus; 804 805 idle_threads_init(); 806 cpuhp_threads_init(); 807 808 pr_info("Bringing up secondary CPUs ...\n"); 809 810 bringup_nonboot_cpus(setup_max_cpus); 811 812 num_nodes = num_online_nodes(); 813 num_cpus = num_online_cpus(); 814 pr_info("Brought up %d node%s, %d CPU%s\n", 815 num_nodes, (num_nodes > 1 ? "s" : ""), 816 num_cpus, (num_cpus > 1 ? "s" : "")); 817 818 /* Any cleanup work */ 819 smp_cpus_done(setup_max_cpus); 820 } 821 822 /* 823 * Call a function on all processors. May be used during early boot while 824 * early_boot_irqs_disabled is set. Use local_irq_save/restore() instead 825 * of local_irq_disable/enable(). 826 */ 827 void on_each_cpu(smp_call_func_t func, void *info, int wait) 828 { 829 unsigned long flags; 830 831 preempt_disable(); 832 smp_call_function(func, info, wait); 833 local_irq_save(flags); 834 func(info); 835 local_irq_restore(flags); 836 preempt_enable(); 837 } 838 EXPORT_SYMBOL(on_each_cpu); 839 840 /** 841 * on_each_cpu_mask(): Run a function on processors specified by 842 * cpumask, which may include the local processor. 843 * @mask: The set of cpus to run on (only runs on online subset). 844 * @func: The function to run. This must be fast and non-blocking. 845 * @info: An arbitrary pointer to pass to the function. 846 * @wait: If true, wait (atomically) until function has completed 847 * on other CPUs. 848 * 849 * If @wait is true, then returns once @func has returned. 850 * 851 * You must not call this function with disabled interrupts or from a 852 * hardware interrupt handler or from a bottom half handler. The 853 * exception is that it may be used during early boot while 854 * early_boot_irqs_disabled is set. 855 */ 856 void on_each_cpu_mask(const struct cpumask *mask, smp_call_func_t func, 857 void *info, bool wait) 858 { 859 int cpu = get_cpu(); 860 861 smp_call_function_many(mask, func, info, wait); 862 if (cpumask_test_cpu(cpu, mask)) { 863 unsigned long flags; 864 local_irq_save(flags); 865 func(info); 866 local_irq_restore(flags); 867 } 868 put_cpu(); 869 } 870 EXPORT_SYMBOL(on_each_cpu_mask); 871 872 /* 873 * on_each_cpu_cond(): Call a function on each processor for which 874 * the supplied function cond_func returns true, optionally waiting 875 * for all the required CPUs to finish. This may include the local 876 * processor. 877 * @cond_func: A callback function that is passed a cpu id and 878 * the info parameter. The function is called 879 * with preemption disabled. The function should 880 * return a blooean value indicating whether to IPI 881 * the specified CPU. 882 * @func: The function to run on all applicable CPUs. 883 * This must be fast and non-blocking. 884 * @info: An arbitrary pointer to pass to both functions. 885 * @wait: If true, wait (atomically) until function has 886 * completed on other CPUs. 887 * 888 * Preemption is disabled to protect against CPUs going offline but not online. 889 * CPUs going online during the call will not be seen or sent an IPI. 890 * 891 * You must not call this function with disabled interrupts or 892 * from a hardware interrupt handler or from a bottom half handler. 893 */ 894 void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func, 895 void *info, bool wait, const struct cpumask *mask) 896 { 897 int cpu = get_cpu(); 898 899 smp_call_function_many_cond(mask, func, info, wait, cond_func); 900 if (cpumask_test_cpu(cpu, mask) && cond_func(cpu, info)) { 901 unsigned long flags; 902 903 local_irq_save(flags); 904 func(info); 905 local_irq_restore(flags); 906 } 907 put_cpu(); 908 } 909 EXPORT_SYMBOL(on_each_cpu_cond_mask); 910 911 void on_each_cpu_cond(smp_cond_func_t cond_func, smp_call_func_t func, 912 void *info, bool wait) 913 { 914 on_each_cpu_cond_mask(cond_func, func, info, wait, cpu_online_mask); 915 } 916 EXPORT_SYMBOL(on_each_cpu_cond); 917 918 static void do_nothing(void *unused) 919 { 920 } 921 922 /** 923 * kick_all_cpus_sync - Force all cpus out of idle 924 * 925 * Used to synchronize the update of pm_idle function pointer. It's 926 * called after the pointer is updated and returns after the dummy 927 * callback function has been executed on all cpus. The execution of 928 * the function can only happen on the remote cpus after they have 929 * left the idle function which had been called via pm_idle function 930 * pointer. So it's guaranteed that nothing uses the previous pointer 931 * anymore. 932 */ 933 void kick_all_cpus_sync(void) 934 { 935 /* Make sure the change is visible before we kick the cpus */ 936 smp_mb(); 937 smp_call_function(do_nothing, NULL, 1); 938 } 939 EXPORT_SYMBOL_GPL(kick_all_cpus_sync); 940 941 /** 942 * wake_up_all_idle_cpus - break all cpus out of idle 943 * wake_up_all_idle_cpus try to break all cpus which is in idle state even 944 * including idle polling cpus, for non-idle cpus, we will do nothing 945 * for them. 946 */ 947 void wake_up_all_idle_cpus(void) 948 { 949 int cpu; 950 951 preempt_disable(); 952 for_each_online_cpu(cpu) { 953 if (cpu == smp_processor_id()) 954 continue; 955 956 wake_up_if_idle(cpu); 957 } 958 preempt_enable(); 959 } 960 EXPORT_SYMBOL_GPL(wake_up_all_idle_cpus); 961 962 /** 963 * smp_call_on_cpu - Call a function on a specific cpu 964 * 965 * Used to call a function on a specific cpu and wait for it to return. 966 * Optionally make sure the call is done on a specified physical cpu via vcpu 967 * pinning in order to support virtualized environments. 968 */ 969 struct smp_call_on_cpu_struct { 970 struct work_struct work; 971 struct completion done; 972 int (*func)(void *); 973 void *data; 974 int ret; 975 int cpu; 976 }; 977 978 static void smp_call_on_cpu_callback(struct work_struct *work) 979 { 980 struct smp_call_on_cpu_struct *sscs; 981 982 sscs = container_of(work, struct smp_call_on_cpu_struct, work); 983 if (sscs->cpu >= 0) 984 hypervisor_pin_vcpu(sscs->cpu); 985 sscs->ret = sscs->func(sscs->data); 986 if (sscs->cpu >= 0) 987 hypervisor_pin_vcpu(-1); 988 989 complete(&sscs->done); 990 } 991 992 int smp_call_on_cpu(unsigned int cpu, int (*func)(void *), void *par, bool phys) 993 { 994 struct smp_call_on_cpu_struct sscs = { 995 .done = COMPLETION_INITIALIZER_ONSTACK(sscs.done), 996 .func = func, 997 .data = par, 998 .cpu = phys ? cpu : -1, 999 }; 1000 1001 INIT_WORK_ONSTACK(&sscs.work, smp_call_on_cpu_callback); 1002 1003 if (cpu >= nr_cpu_ids || !cpu_online(cpu)) 1004 return -ENXIO; 1005 1006 queue_work_on(cpu, system_wq, &sscs.work); 1007 wait_for_completion(&sscs.done); 1008 1009 return sscs.ret; 1010 } 1011 EXPORT_SYMBOL_GPL(smp_call_on_cpu); 1012