1 /*- 2 * Copyright (c) 2001 3 * John Baldwin <jhb@FreeBSD.org>. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 4. Neither the name of the author nor the names of any co-contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY JOHN BALDWIN AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL JOHN BALDWIN OR THE VOICES IN HIS HEAD 21 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 27 * THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 /* 31 * This module holds the global variables and machine independent functions 32 * used for the kernel SMP support. 33 */ 34 35 #include <sys/cdefs.h> 36 __FBSDID("$FreeBSD$"); 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/kernel.h> 41 #include <sys/ktr.h> 42 #include <sys/proc.h> 43 #include <sys/bus.h> 44 #include <sys/lock.h> 45 #include <sys/mutex.h> 46 #include <sys/pcpu.h> 47 #include <sys/smp.h> 48 #include <sys/sysctl.h> 49 50 #include <machine/cpu.h> 51 #include <machine/smp.h> 52 53 #include "opt_sched.h" 54 55 #ifdef SMP 56 volatile cpumask_t stopped_cpus; 57 volatile cpumask_t started_cpus; 58 cpumask_t idle_cpus_mask; 59 cpumask_t hlt_cpus_mask; 60 cpumask_t logical_cpus_mask; 61 62 void (*cpustop_restartfunc)(void); 63 #endif 64 /* This is used in modules that need to work in both SMP and UP. */ 65 cpumask_t all_cpus; 66 67 int mp_ncpus; 68 /* export this for libkvm consumers. */ 69 int mp_maxcpus = MAXCPU; 70 71 volatile int smp_started; 72 u_int mp_maxid; 73 74 SYSCTL_NODE(_kern, OID_AUTO, smp, CTLFLAG_RD, NULL, "Kernel SMP"); 75 76 SYSCTL_INT(_kern_smp, OID_AUTO, maxcpus, CTLFLAG_RD, &mp_maxcpus, 0, 77 "Max number of CPUs that the system was compiled for."); 78 79 int smp_active = 0; /* are the APs allowed to run? */ 80 SYSCTL_INT(_kern_smp, OID_AUTO, active, CTLFLAG_RW, &smp_active, 0, 81 "Number of Auxillary Processors (APs) that were successfully started"); 82 83 int smp_disabled = 0; /* has smp been disabled? */ 84 SYSCTL_INT(_kern_smp, OID_AUTO, disabled, CTLFLAG_RDTUN, &smp_disabled, 0, 85 "SMP has been disabled from the loader"); 86 TUNABLE_INT("kern.smp.disabled", &smp_disabled); 87 88 int smp_cpus = 1; /* how many cpu's running */ 89 SYSCTL_INT(_kern_smp, OID_AUTO, cpus, CTLFLAG_RD, &smp_cpus, 0, 90 "Number of CPUs online"); 91 92 int smp_topology = 0; /* Which topology we're using. */ 93 SYSCTL_INT(_kern_smp, OID_AUTO, topology, CTLFLAG_RD, &smp_topology, 0, 94 "Topology override setting; 0 is default provided by hardware."); 95 TUNABLE_INT("kern.smp.topology", &smp_topology); 96 97 #ifdef SMP 98 /* Enable forwarding of a signal to a process running on a different CPU */ 99 static int forward_signal_enabled = 1; 100 SYSCTL_INT(_kern_smp, OID_AUTO, forward_signal_enabled, CTLFLAG_RW, 101 &forward_signal_enabled, 0, 102 "Forwarding of a signal to a process on a different CPU"); 103 104 /* Enable forwarding of roundrobin to all other cpus */ 105 static int forward_roundrobin_enabled = 1; 106 SYSCTL_INT(_kern_smp, OID_AUTO, forward_roundrobin_enabled, CTLFLAG_RW, 107 &forward_roundrobin_enabled, 0, 108 "Forwarding of roundrobin to all other CPUs"); 109 110 /* Variables needed for SMP rendezvous. */ 111 static void (*volatile smp_rv_setup_func)(void *arg); 112 static void (*volatile smp_rv_action_func)(void *arg); 113 static void (*volatile smp_rv_teardown_func)(void *arg); 114 static void * volatile smp_rv_func_arg; 115 static volatile int smp_rv_waiters[3]; 116 117 /* 118 * Shared mutex to restrict busywaits between smp_rendezvous() and 119 * smp(_targeted)_tlb_shootdown(). A deadlock occurs if both of these 120 * functions trigger at once and cause multiple CPUs to busywait with 121 * interrupts disabled. 122 */ 123 struct mtx smp_ipi_mtx; 124 125 /* 126 * Let the MD SMP code initialize mp_maxid very early if it can. 127 */ 128 static void 129 mp_setmaxid(void *dummy) 130 { 131 cpu_mp_setmaxid(); 132 } 133 SYSINIT(cpu_mp_setmaxid, SI_SUB_TUNABLES, SI_ORDER_FIRST, mp_setmaxid, NULL); 134 135 /* 136 * Call the MD SMP initialization code. 137 */ 138 static void 139 mp_start(void *dummy) 140 { 141 142 /* Probe for MP hardware. */ 143 if (smp_disabled != 0 || cpu_mp_probe() == 0) { 144 mp_ncpus = 1; 145 all_cpus = PCPU_GET(cpumask); 146 return; 147 } 148 149 mtx_init(&smp_ipi_mtx, "smp rendezvous", NULL, MTX_SPIN); 150 cpu_mp_start(); 151 printf("FreeBSD/SMP: Multiprocessor System Detected: %d CPUs\n", 152 mp_ncpus); 153 cpu_mp_announce(); 154 } 155 SYSINIT(cpu_mp, SI_SUB_CPU, SI_ORDER_THIRD, mp_start, NULL); 156 157 void 158 forward_signal(struct thread *td) 159 { 160 int id; 161 162 /* 163 * signotify() has already set TDF_ASTPENDING and TDF_NEEDSIGCHECK on 164 * this thread, so all we need to do is poke it if it is currently 165 * executing so that it executes ast(). 166 */ 167 THREAD_LOCK_ASSERT(td, MA_OWNED); 168 KASSERT(TD_IS_RUNNING(td), 169 ("forward_signal: thread is not TDS_RUNNING")); 170 171 CTR1(KTR_SMP, "forward_signal(%p)", td->td_proc); 172 173 if (!smp_started || cold || panicstr) 174 return; 175 if (!forward_signal_enabled) 176 return; 177 178 /* No need to IPI ourself. */ 179 if (td == curthread) 180 return; 181 182 id = td->td_oncpu; 183 if (id == NOCPU) 184 return; 185 ipi_selected(1 << id, IPI_AST); 186 } 187 188 void 189 forward_roundrobin(void) 190 { 191 struct pcpu *pc; 192 struct thread *td; 193 cpumask_t id, map, me; 194 195 CTR0(KTR_SMP, "forward_roundrobin()"); 196 197 if (!smp_started || cold || panicstr) 198 return; 199 if (!forward_roundrobin_enabled) 200 return; 201 map = 0; 202 me = PCPU_GET(cpumask); 203 SLIST_FOREACH(pc, &cpuhead, pc_allcpu) { 204 td = pc->pc_curthread; 205 id = pc->pc_cpumask; 206 if (id != me && (id & stopped_cpus) == 0 && 207 !TD_IS_IDLETHREAD(td)) { 208 td->td_flags |= TDF_NEEDRESCHED; 209 map |= id; 210 } 211 } 212 ipi_selected(map, IPI_AST); 213 } 214 215 /* 216 * When called the executing CPU will send an IPI to all other CPUs 217 * requesting that they halt execution. 218 * 219 * Usually (but not necessarily) called with 'other_cpus' as its arg. 220 * 221 * - Signals all CPUs in map to stop. 222 * - Waits for each to stop. 223 * 224 * Returns: 225 * -1: error 226 * 0: NA 227 * 1: ok 228 * 229 * XXX FIXME: this is not MP-safe, needs a lock to prevent multiple CPUs 230 * from executing at same time. 231 */ 232 int 233 stop_cpus(cpumask_t map) 234 { 235 int i; 236 237 if (!smp_started) 238 return 0; 239 240 CTR1(KTR_SMP, "stop_cpus(%x)", map); 241 242 /* send the stop IPI to all CPUs in map */ 243 ipi_selected(map, IPI_STOP); 244 245 i = 0; 246 while ((stopped_cpus & map) != map) { 247 /* spin */ 248 cpu_spinwait(); 249 i++; 250 #ifdef DIAGNOSTIC 251 if (i == 100000) { 252 printf("timeout stopping cpus\n"); 253 break; 254 } 255 #endif 256 } 257 258 return 1; 259 } 260 261 /* 262 * Called by a CPU to restart stopped CPUs. 263 * 264 * Usually (but not necessarily) called with 'stopped_cpus' as its arg. 265 * 266 * - Signals all CPUs in map to restart. 267 * - Waits for each to restart. 268 * 269 * Returns: 270 * -1: error 271 * 0: NA 272 * 1: ok 273 */ 274 int 275 restart_cpus(cpumask_t map) 276 { 277 278 if (!smp_started) 279 return 0; 280 281 CTR1(KTR_SMP, "restart_cpus(%x)", map); 282 283 /* signal other cpus to restart */ 284 atomic_store_rel_int(&started_cpus, map); 285 286 /* wait for each to clear its bit */ 287 while ((stopped_cpus & map) != 0) 288 cpu_spinwait(); 289 290 return 1; 291 } 292 293 void 294 smp_no_rendevous_barrier(void *dummy) 295 { 296 #ifdef SMP 297 KASSERT((!smp_started),("smp_no_rendevous called and smp is started")); 298 #endif 299 } 300 301 /* 302 * All-CPU rendezvous. CPUs are signalled, all execute the setup function 303 * (if specified), rendezvous, execute the action function (if specified), 304 * rendezvous again, execute the teardown function (if specified), and then 305 * resume. 306 * 307 * Note that the supplied external functions _must_ be reentrant and aware 308 * that they are running in parallel and in an unknown lock context. 309 */ 310 void 311 smp_rendezvous_action(void) 312 { 313 void* local_func_arg = smp_rv_func_arg; 314 void (*local_setup_func)(void*) = smp_rv_setup_func; 315 void (*local_action_func)(void*) = smp_rv_action_func; 316 void (*local_teardown_func)(void*) = smp_rv_teardown_func; 317 318 /* Ensure we have up-to-date values. */ 319 atomic_add_acq_int(&smp_rv_waiters[0], 1); 320 while (smp_rv_waiters[0] < mp_ncpus) 321 cpu_spinwait(); 322 323 /* setup function */ 324 if (local_setup_func != smp_no_rendevous_barrier) { 325 if (smp_rv_setup_func != NULL) 326 smp_rv_setup_func(smp_rv_func_arg); 327 328 /* spin on entry rendezvous */ 329 atomic_add_int(&smp_rv_waiters[1], 1); 330 while (smp_rv_waiters[1] < mp_ncpus) 331 cpu_spinwait(); 332 } 333 334 /* action function */ 335 if (local_action_func != NULL) 336 local_action_func(local_func_arg); 337 338 /* spin on exit rendezvous */ 339 atomic_add_int(&smp_rv_waiters[2], 1); 340 if (local_teardown_func == smp_no_rendevous_barrier) 341 return; 342 while (smp_rv_waiters[2] < mp_ncpus) 343 cpu_spinwait(); 344 345 /* teardown function */ 346 if (local_teardown_func != NULL) 347 local_teardown_func(local_func_arg); 348 } 349 350 void 351 smp_rendezvous(void (* setup_func)(void *), 352 void (* action_func)(void *), 353 void (* teardown_func)(void *), 354 void *arg) 355 { 356 357 if (!smp_started) { 358 if (setup_func != NULL) 359 setup_func(arg); 360 if (action_func != NULL) 361 action_func(arg); 362 if (teardown_func != NULL) 363 teardown_func(arg); 364 return; 365 } 366 367 /* obtain rendezvous lock */ 368 mtx_lock_spin(&smp_ipi_mtx); 369 370 /* set static function pointers */ 371 smp_rv_setup_func = setup_func; 372 smp_rv_action_func = action_func; 373 smp_rv_teardown_func = teardown_func; 374 smp_rv_func_arg = arg; 375 smp_rv_waiters[1] = 0; 376 smp_rv_waiters[2] = 0; 377 atomic_store_rel_int(&smp_rv_waiters[0], 0); 378 379 /* signal other processors, which will enter the IPI with interrupts off */ 380 ipi_all_but_self(IPI_RENDEZVOUS); 381 382 /* call executor function */ 383 smp_rendezvous_action(); 384 385 if (teardown_func == smp_no_rendevous_barrier) 386 while (atomic_load_acq_int(&smp_rv_waiters[2]) < mp_ncpus) 387 cpu_spinwait(); 388 389 /* release lock */ 390 mtx_unlock_spin(&smp_ipi_mtx); 391 } 392 393 static struct cpu_group group[MAXCPU]; 394 395 struct cpu_group * 396 smp_topo(void) 397 { 398 struct cpu_group *top; 399 400 /* 401 * Check for a fake topology request for debugging purposes. 402 */ 403 switch (smp_topology) { 404 case 1: 405 /* Dual core with no sharing. */ 406 top = smp_topo_1level(CG_SHARE_NONE, 2, 0); 407 break; 408 case 2: 409 /* No topology, all cpus are equal. */ 410 top = smp_topo_none(); 411 break; 412 case 3: 413 /* Dual core with shared L2. */ 414 top = smp_topo_1level(CG_SHARE_L2, 2, 0); 415 break; 416 case 4: 417 /* quad core, shared l3 among each package, private l2. */ 418 top = smp_topo_1level(CG_SHARE_L3, 4, 0); 419 break; 420 case 5: 421 /* quad core, 2 dualcore parts on each package share l2. */ 422 top = smp_topo_2level(CG_SHARE_NONE, 2, CG_SHARE_L2, 2, 0); 423 break; 424 case 6: 425 /* Single-core 2xHTT */ 426 top = smp_topo_1level(CG_SHARE_L1, 2, CG_FLAG_HTT); 427 break; 428 case 7: 429 /* quad core with a shared l3, 8 threads sharing L2. */ 430 top = smp_topo_2level(CG_SHARE_L3, 4, CG_SHARE_L2, 8, 431 CG_FLAG_THREAD); 432 break; 433 default: 434 /* Default, ask the system what it wants. */ 435 top = cpu_topo(); 436 break; 437 } 438 /* 439 * Verify the returned topology. 440 */ 441 if (top->cg_count != mp_ncpus) 442 panic("Built bad topology at %p. CPU count %d != %d", 443 top, top->cg_count, mp_ncpus); 444 if (top->cg_mask != all_cpus) 445 panic("Built bad topology at %p. CPU mask 0x%X != 0x%X", 446 top, top->cg_mask, all_cpus); 447 return (top); 448 } 449 450 struct cpu_group * 451 smp_topo_none(void) 452 { 453 struct cpu_group *top; 454 455 top = &group[0]; 456 top->cg_parent = NULL; 457 top->cg_child = NULL; 458 top->cg_mask = (1 << mp_ncpus) - 1; 459 top->cg_count = mp_ncpus; 460 top->cg_children = 0; 461 top->cg_level = CG_SHARE_NONE; 462 top->cg_flags = 0; 463 464 return (top); 465 } 466 467 static int 468 smp_topo_addleaf(struct cpu_group *parent, struct cpu_group *child, int share, 469 int count, int flags, int start) 470 { 471 cpumask_t mask; 472 int i; 473 474 for (mask = 0, i = 0; i < count; i++, start++) 475 mask |= (1 << start); 476 child->cg_parent = parent; 477 child->cg_child = NULL; 478 child->cg_children = 0; 479 child->cg_level = share; 480 child->cg_count = count; 481 child->cg_flags = flags; 482 child->cg_mask = mask; 483 parent->cg_children++; 484 for (; parent != NULL; parent = parent->cg_parent) { 485 if ((parent->cg_mask & child->cg_mask) != 0) 486 panic("Duplicate children in %p. mask 0x%X child 0x%X", 487 parent, parent->cg_mask, child->cg_mask); 488 parent->cg_mask |= child->cg_mask; 489 parent->cg_count += child->cg_count; 490 } 491 492 return (start); 493 } 494 495 struct cpu_group * 496 smp_topo_1level(int share, int count, int flags) 497 { 498 struct cpu_group *child; 499 struct cpu_group *top; 500 int packages; 501 int cpu; 502 int i; 503 504 cpu = 0; 505 top = &group[0]; 506 packages = mp_ncpus / count; 507 top->cg_child = child = &group[1]; 508 top->cg_level = CG_SHARE_NONE; 509 for (i = 0; i < packages; i++, child++) 510 cpu = smp_topo_addleaf(top, child, share, count, flags, cpu); 511 return (top); 512 } 513 514 struct cpu_group * 515 smp_topo_2level(int l2share, int l2count, int l1share, int l1count, 516 int l1flags) 517 { 518 struct cpu_group *top; 519 struct cpu_group *l1g; 520 struct cpu_group *l2g; 521 int cpu; 522 int i; 523 int j; 524 525 cpu = 0; 526 top = &group[0]; 527 l2g = &group[1]; 528 top->cg_child = l2g; 529 top->cg_level = CG_SHARE_NONE; 530 top->cg_children = mp_ncpus / (l2count * l1count); 531 l1g = l2g + top->cg_children; 532 for (i = 0; i < top->cg_children; i++, l2g++) { 533 l2g->cg_parent = top; 534 l2g->cg_child = l1g; 535 l2g->cg_level = l2share; 536 for (j = 0; j < l2count; j++, l1g++) 537 cpu = smp_topo_addleaf(l2g, l1g, l1share, l1count, 538 l1flags, cpu); 539 } 540 return (top); 541 } 542 543 544 struct cpu_group * 545 smp_topo_find(struct cpu_group *top, int cpu) 546 { 547 struct cpu_group *cg; 548 cpumask_t mask; 549 int children; 550 int i; 551 552 mask = (1 << cpu); 553 cg = top; 554 for (;;) { 555 if ((cg->cg_mask & mask) == 0) 556 return (NULL); 557 if (cg->cg_children == 0) 558 return (cg); 559 children = cg->cg_children; 560 for (i = 0, cg = cg->cg_child; i < children; cg++, i++) 561 if ((cg->cg_mask & mask) != 0) 562 break; 563 } 564 return (NULL); 565 } 566 #else /* !SMP */ 567 568 void 569 smp_rendezvous(void (*setup_func)(void *), 570 void (*action_func)(void *), 571 void (*teardown_func)(void *), 572 void *arg) 573 { 574 575 if (setup_func != NULL) 576 setup_func(arg); 577 if (action_func != NULL) 578 action_func(arg); 579 if (teardown_func != NULL) 580 teardown_func(arg); 581 } 582 583 /* 584 * Provide dummy SMP support for UP kernels. Modules that need to use SMP 585 * APIs will still work using this dummy support. 586 */ 587 static void 588 mp_setvariables_for_up(void *dummy) 589 { 590 mp_ncpus = 1; 591 mp_maxid = PCPU_GET(cpuid); 592 all_cpus = PCPU_GET(cpumask); 593 KASSERT(PCPU_GET(cpuid) == 0, ("UP must have a CPU ID of zero")); 594 } 595 SYSINIT(cpu_mp_setvariables, SI_SUB_TUNABLES, SI_ORDER_FIRST, 596 mp_setvariables_for_up, NULL); 597 #endif /* SMP */ 598