1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * Facilities for cross-processor subroutine calls using "mailbox" interrupts. 31 * 32 */ 33 34 #include <sys/types.h> 35 36 #include <sys/param.h> 37 #include <sys/t_lock.h> 38 #include <sys/thread.h> 39 #include <sys/cpuvar.h> 40 #include <sys/x_call.h> 41 #include <sys/cpu.h> 42 #include <sys/psw.h> 43 #include <sys/sunddi.h> 44 #include <sys/mmu.h> 45 #include <sys/debug.h> 46 #include <sys/systm.h> 47 #include <sys/machsystm.h> 48 #include <sys/mutex_impl.h> 49 50 static struct xc_mbox xc_mboxes[X_CALL_LEVELS]; 51 static kmutex_t xc_mbox_lock[X_CALL_LEVELS]; 52 static uint_t xc_xlat_xcptoipl[X_CALL_LEVELS] = { 53 XC_LO_PIL, 54 XC_MED_PIL, 55 XC_HI_PIL 56 }; 57 58 static void xc_common(xc_func_t, xc_arg_t, xc_arg_t, xc_arg_t, 59 int, cpuset_t, int); 60 61 static int xc_initialized = 0; 62 extern ulong_t cpu_ready_set; 63 64 void 65 xc_init() 66 { 67 /* 68 * By making these mutexes type MUTEX_DRIVER, the ones below 69 * LOCK_LEVEL will be implemented as adaptive mutexes, and the 70 * ones above LOCK_LEVEL will be spin mutexes. 71 */ 72 mutex_init(&xc_mbox_lock[0], NULL, MUTEX_DRIVER, 73 (void *)ipltospl(XC_LO_PIL)); 74 mutex_init(&xc_mbox_lock[1], NULL, MUTEX_DRIVER, 75 (void *)ipltospl(XC_MED_PIL)); 76 mutex_init(&xc_mbox_lock[2], NULL, MUTEX_DRIVER, 77 (void *)ipltospl(XC_HI_PIL)); 78 79 xc_initialized = 1; 80 } 81 82 /* 83 * Used by the debugger to determine whether or not cross calls have been 84 * initialized and are safe to use. 85 */ 86 int 87 kdi_xc_initialized(void) 88 { 89 return (xc_initialized); 90 } 91 92 #define CAPTURE_CPU_ARG 0xffffffff 93 94 /* 95 * X-call interrupt service routine. 96 * 97 * arg == X_CALL_MEDPRI - capture cpus. 98 * 99 * We're protected against changing CPUs by being a high-priority interrupt. 100 */ 101 /*ARGSUSED*/ 102 uint_t 103 xc_serv(caddr_t arg1, caddr_t arg2) 104 { 105 int op; 106 int pri = (int)(uintptr_t)arg1; 107 struct cpu *cpup = CPU; 108 xc_arg_t *argp; 109 xc_arg_t arg2val; 110 uint_t tlbflush; 111 112 if (pri == X_CALL_MEDPRI) { 113 114 argp = &xc_mboxes[X_CALL_MEDPRI].arg2; 115 arg2val = *argp; 116 if (arg2val != CAPTURE_CPU_ARG && 117 !(arg2val & (1 << cpup->cpu_id))) 118 return (DDI_INTR_UNCLAIMED); 119 ASSERT(arg2val == CAPTURE_CPU_ARG); 120 if (cpup->cpu_m.xc_pend[pri] == 0) 121 return (DDI_INTR_UNCLAIMED); 122 123 cpup->cpu_m.xc_pend[X_CALL_MEDPRI] = 0; 124 cpup->cpu_m.xc_ack[X_CALL_MEDPRI] = 1; 125 126 for (;;) { 127 if ((cpup->cpu_m.xc_state[X_CALL_MEDPRI] == XC_DONE) || 128 (cpup->cpu_m.xc_pend[X_CALL_MEDPRI])) 129 break; 130 ht_pause(); 131 } 132 return (DDI_INTR_CLAIMED); 133 } 134 if (cpup->cpu_m.xc_pend[pri] == 0) 135 return (DDI_INTR_UNCLAIMED); 136 137 cpup->cpu_m.xc_pend[pri] = 0; 138 op = cpup->cpu_m.xc_state[pri]; 139 140 /* 141 * When invalidating TLB entries, wait until the initiator changes the 142 * memory PTE before doing any INVLPG. Otherwise, if the PTE in memory 143 * hasn't been changed, the processor's TLB Flush filter may ignore 144 * the INVLPG instruction. 145 */ 146 tlbflush = (cpup->cpu_m.xc_wait[pri] == 2); 147 148 /* 149 * Don't invoke a null function. 150 */ 151 if (xc_mboxes[pri].func != NULL) { 152 if (!tlbflush) 153 cpup->cpu_m.xc_retval[pri] = (*xc_mboxes[pri].func) 154 (xc_mboxes[pri].arg1, xc_mboxes[pri].arg2, 155 xc_mboxes[pri].arg3); 156 } else 157 cpup->cpu_m.xc_retval[pri] = 0; 158 159 /* 160 * Acknowledge that we have completed the x-call operation. 161 */ 162 cpup->cpu_m.xc_ack[pri] = 1; 163 164 if (op == XC_CALL_OP) 165 return (DDI_INTR_CLAIMED); 166 167 /* 168 * for (op == XC_SYNC_OP) 169 * Wait for the initiator of the x-call to indicate 170 * that all CPUs involved can proceed. 171 */ 172 while (cpup->cpu_m.xc_wait[pri]) 173 ht_pause(); 174 175 while (cpup->cpu_m.xc_state[pri] != XC_DONE) 176 ht_pause(); 177 178 /* 179 * Flush the TLB, if that's what is requested. 180 */ 181 if (xc_mboxes[pri].func != NULL && tlbflush) { 182 cpup->cpu_m.xc_retval[pri] = (*xc_mboxes[pri].func) 183 (xc_mboxes[pri].arg1, xc_mboxes[pri].arg2, 184 xc_mboxes[pri].arg3); 185 } 186 187 /* 188 * Acknowledge that we have received the directive to continue. 189 */ 190 ASSERT(cpup->cpu_m.xc_ack[pri] == 0); 191 cpup->cpu_m.xc_ack[pri] = 1; 192 193 return (DDI_INTR_CLAIMED); 194 } 195 196 197 /* 198 * xc_do_call: 199 */ 200 static void 201 xc_do_call( 202 xc_arg_t arg1, 203 xc_arg_t arg2, 204 xc_arg_t arg3, 205 int pri, 206 cpuset_t set, 207 xc_func_t func, 208 int sync) 209 { 210 /* 211 * If the pri indicates a low priority lock (below LOCK_LEVEL), 212 * we must disable preemption to avoid migrating to another CPU 213 * during the call. 214 */ 215 if (pri == X_CALL_LOPRI) { 216 kpreempt_disable(); 217 } else { 218 pri = X_CALL_HIPRI; 219 } 220 221 /* always grab highest mutex to avoid deadlock */ 222 mutex_enter(&xc_mbox_lock[X_CALL_HIPRI]); 223 xc_common(func, arg1, arg2, arg3, pri, set, sync); 224 mutex_exit(&xc_mbox_lock[X_CALL_HIPRI]); 225 if (pri == X_CALL_LOPRI) 226 kpreempt_enable(); 227 } 228 229 230 /* 231 * xc_call: call specified function on all processors 232 * remotes may continue after service 233 * we wait here until everybody has completed. 234 */ 235 void 236 xc_call( 237 xc_arg_t arg1, 238 xc_arg_t arg2, 239 xc_arg_t arg3, 240 int pri, 241 cpuset_t set, 242 xc_func_t func) 243 { 244 xc_do_call(arg1, arg2, arg3, pri, set, func, 0); 245 } 246 247 /* 248 * xc_sync: call specified function on all processors 249 * after doing work, each remote waits until we let 250 * it continue; send the contiunue after everyone has 251 * informed us that they are done. 252 */ 253 void 254 xc_sync( 255 xc_arg_t arg1, 256 xc_arg_t arg2, 257 xc_arg_t arg3, 258 int pri, 259 cpuset_t set, 260 xc_func_t func) 261 { 262 xc_do_call(arg1, arg2, arg3, pri, set, func, 1); 263 } 264 265 /* 266 * xc_sync_wait: similar to xc_sync(), except that the starting 267 * cpu waits for all other cpus to check in before running its 268 * service locally. 269 */ 270 void 271 xc_wait_sync( 272 xc_arg_t arg1, 273 xc_arg_t arg2, 274 xc_arg_t arg3, 275 int pri, 276 cpuset_t set, 277 xc_func_t func) 278 { 279 xc_do_call(arg1, arg2, arg3, pri, set, func, 2); 280 } 281 282 283 /* 284 * The routines xc_capture_cpus and xc_release_cpus 285 * can be used in place of xc_sync in order to implement a critical 286 * code section where all CPUs in the system can be controlled. 287 * xc_capture_cpus is used to start the critical code section, and 288 * xc_release_cpus is used to end the critical code section. 289 */ 290 291 /* 292 * Capture the CPUs specified in order to start a x-call session, 293 * and/or to begin a critical section. 294 */ 295 void 296 xc_capture_cpus(cpuset_t set) 297 { 298 int cix; 299 int lcx; 300 struct cpu *cpup; 301 int i; 302 cpuset_t *cpus; 303 cpuset_t c; 304 305 CPU_STATS_ADDQ(CPU, sys, xcalls, 1); 306 307 /* 308 * Prevent deadlocks where we take an interrupt and are waiting 309 * for a mutex owned by one of the CPUs that is captured for 310 * the x-call, while that CPU is waiting for some x-call signal 311 * to be set by us. 312 * 313 * This mutex also prevents preemption, since it raises SPL above 314 * LOCK_LEVEL (it is a spin-type driver mutex). 315 */ 316 /* always grab highest mutex to avoid deadlock */ 317 mutex_enter(&xc_mbox_lock[X_CALL_HIPRI]); 318 lcx = CPU->cpu_id; /* now we're safe */ 319 320 ASSERT(CPU->cpu_flags & CPU_READY); 321 322 /* 323 * Wait for all cpus 324 */ 325 cpus = (cpuset_t *)&xc_mboxes[X_CALL_MEDPRI].arg2; 326 if (CPU_IN_SET(*cpus, CPU->cpu_id)) 327 CPUSET_ATOMIC_DEL(*cpus, CPU->cpu_id); 328 for (;;) { 329 c = *(volatile cpuset_t *)cpus; 330 CPUSET_AND(c, cpu_ready_set); 331 if (CPUSET_ISNULL(c)) 332 break; 333 ht_pause(); 334 } 335 336 /* 337 * Store the set of CPUs involved in the x-call session, so that 338 * xc_release_cpus will know what CPUs to act upon. 339 */ 340 xc_mboxes[X_CALL_MEDPRI].set = set; 341 xc_mboxes[X_CALL_MEDPRI].arg2 = CAPTURE_CPU_ARG; 342 343 /* 344 * Now capture each CPU in the set and cause it to go into a 345 * holding pattern. 346 */ 347 i = 0; 348 for (cix = 0; cix < NCPU; cix++) { 349 if ((cpup = cpu[cix]) == NULL || 350 (cpup->cpu_flags & CPU_READY) == 0) { 351 /* 352 * In case CPU wasn't ready, but becomes ready later, 353 * take the CPU out of the set now. 354 */ 355 CPUSET_DEL(set, cix); 356 continue; 357 } 358 if (cix != lcx && CPU_IN_SET(set, cix)) { 359 cpup->cpu_m.xc_ack[X_CALL_MEDPRI] = 0; 360 cpup->cpu_m.xc_state[X_CALL_MEDPRI] = XC_HOLD; 361 cpup->cpu_m.xc_pend[X_CALL_MEDPRI] = 1; 362 send_dirint(cix, XC_MED_PIL); 363 } 364 i++; 365 if (i >= ncpus) 366 break; 367 } 368 369 /* 370 * Wait here until all remote calls to complete. 371 */ 372 i = 0; 373 for (cix = 0; cix < NCPU; cix++) { 374 if (lcx != cix && CPU_IN_SET(set, cix)) { 375 cpup = cpu[cix]; 376 while (cpup->cpu_m.xc_ack[X_CALL_MEDPRI] == 0) 377 ht_pause(); 378 cpup->cpu_m.xc_ack[X_CALL_MEDPRI] = 0; 379 } 380 i++; 381 if (i >= ncpus) 382 break; 383 } 384 385 } 386 387 /* 388 * Release the CPUs captured by xc_capture_cpus, thus terminating the 389 * x-call session and exiting the critical section. 390 */ 391 void 392 xc_release_cpus(void) 393 { 394 int cix; 395 int lcx = (int)(CPU->cpu_id); 396 cpuset_t set = xc_mboxes[X_CALL_MEDPRI].set; 397 struct cpu *cpup; 398 int i; 399 400 ASSERT(MUTEX_HELD(&xc_mbox_lock[X_CALL_HIPRI])); 401 402 /* 403 * Allow each CPU to exit its holding pattern. 404 */ 405 i = 0; 406 for (cix = 0; cix < NCPU; cix++) { 407 if ((cpup = cpu[cix]) == NULL) 408 continue; 409 if ((cpup->cpu_flags & CPU_READY) && 410 (cix != lcx) && CPU_IN_SET(set, cix)) { 411 /* 412 * Clear xc_ack since we will be waiting for it 413 * to be set again after we set XC_DONE. 414 */ 415 cpup->cpu_m.xc_state[X_CALL_MEDPRI] = XC_DONE; 416 } 417 i++; 418 if (i >= ncpus) 419 break; 420 } 421 422 xc_mboxes[X_CALL_MEDPRI].arg2 = 0; 423 mutex_exit(&xc_mbox_lock[X_CALL_HIPRI]); 424 } 425 426 /* 427 * Common code to call a specified function on a set of processors. 428 * sync specifies what kind of waiting is done. 429 * -1 - no waiting, don't release remotes 430 * 0 - no waiting, release remotes immediately 431 * 1 - run service locally w/o waiting for remotes. 432 * 2 - wait for remotes before running locally 433 */ 434 static void 435 xc_common( 436 xc_func_t func, 437 xc_arg_t arg1, 438 xc_arg_t arg2, 439 xc_arg_t arg3, 440 int pri, 441 cpuset_t set, 442 int sync) 443 { 444 int cix; 445 int lcx = (int)(CPU->cpu_id); 446 struct cpu *cpup; 447 448 ASSERT(panicstr == NULL); 449 450 ASSERT(MUTEX_HELD(&xc_mbox_lock[X_CALL_HIPRI])); 451 ASSERT(CPU->cpu_flags & CPU_READY); 452 453 /* 454 * Set up the service definition mailbox. 455 */ 456 xc_mboxes[pri].func = func; 457 xc_mboxes[pri].arg1 = arg1; 458 xc_mboxes[pri].arg2 = arg2; 459 xc_mboxes[pri].arg3 = arg3; 460 461 /* 462 * Request service on all remote processors. 463 */ 464 for (cix = 0; cix < NCPU; cix++) { 465 if ((cpup = cpu[cix]) == NULL || 466 (cpup->cpu_flags & CPU_READY) == 0) { 467 /* 468 * In case the non-local CPU is not ready but becomes 469 * ready later, take it out of the set now. The local 470 * CPU needs to remain in the set to complete the 471 * requested function. 472 */ 473 if (cix != lcx) 474 CPUSET_DEL(set, cix); 475 } else if (cix != lcx && CPU_IN_SET(set, cix)) { 476 CPU_STATS_ADDQ(CPU, sys, xcalls, 1); 477 cpup->cpu_m.xc_ack[pri] = 0; 478 cpup->cpu_m.xc_wait[pri] = sync; 479 if (sync > 0) 480 cpup->cpu_m.xc_state[pri] = XC_SYNC_OP; 481 else 482 cpup->cpu_m.xc_state[pri] = XC_CALL_OP; 483 cpup->cpu_m.xc_pend[pri] = 1; 484 send_dirint(cix, xc_xlat_xcptoipl[pri]); 485 } 486 } 487 488 /* 489 * Run service locally if not waiting for remotes. 490 */ 491 if (sync != 2 && CPU_IN_SET(set, lcx) && func != NULL) 492 CPU->cpu_m.xc_retval[pri] = (*func)(arg1, arg2, arg3); 493 494 if (sync == -1) 495 return; 496 497 /* 498 * Wait here until all remote calls complete. 499 */ 500 for (cix = 0; cix < NCPU; cix++) { 501 if (lcx != cix && CPU_IN_SET(set, cix)) { 502 cpup = cpu[cix]; 503 while (cpup->cpu_m.xc_ack[pri] == 0) 504 ht_pause(); 505 cpup->cpu_m.xc_ack[pri] = 0; 506 } 507 } 508 509 /* 510 * Run service locally if waiting for remotes. 511 */ 512 if (sync == 2 && CPU_IN_SET(set, lcx) && func != NULL) 513 CPU->cpu_m.xc_retval[pri] = (*func)(arg1, arg2, arg3); 514 515 if (sync == 0) 516 return; 517 518 /* 519 * Release any waiting CPUs 520 */ 521 for (cix = 0; cix < NCPU; cix++) { 522 if (lcx != cix && CPU_IN_SET(set, cix)) { 523 cpup = cpu[cix]; 524 if (cpup != NULL && (cpup->cpu_flags & CPU_READY)) { 525 cpup->cpu_m.xc_wait[pri] = 0; 526 cpup->cpu_m.xc_state[pri] = XC_DONE; 527 } 528 } 529 } 530 531 /* 532 * Wait for all CPUs to acknowledge completion before we continue. 533 * Without this check it's possible (on a VM or hyper-threaded CPUs 534 * or in the presence of Service Management Interrupts which can all 535 * cause delays) for the remote processor to still be waiting by 536 * the time xc_common() is next invoked with the sync flag set 537 * resulting in a deadlock. 538 */ 539 for (cix = 0; cix < NCPU; cix++) { 540 if (lcx != cix && CPU_IN_SET(set, cix)) { 541 cpup = cpu[cix]; 542 if (cpup != NULL && (cpup->cpu_flags & CPU_READY)) { 543 while (cpup->cpu_m.xc_ack[pri] == 0) 544 ht_pause(); 545 cpup->cpu_m.xc_ack[pri] = 0; 546 } 547 } 548 } 549 } 550 551 /* 552 * xc_trycall: attempt to call specified function on all processors 553 * remotes may wait for a long time 554 * we continue immediately 555 */ 556 void 557 xc_trycall( 558 xc_arg_t arg1, 559 xc_arg_t arg2, 560 xc_arg_t arg3, 561 cpuset_t set, 562 xc_func_t func) 563 { 564 int save_kernel_preemption; 565 extern int IGNORE_KERNEL_PREEMPTION; 566 567 /* 568 * If we can grab the mutex, we'll do the cross-call. If not -- if 569 * someone else is already doing a cross-call -- we won't. 570 */ 571 572 save_kernel_preemption = IGNORE_KERNEL_PREEMPTION; 573 IGNORE_KERNEL_PREEMPTION = 1; 574 if (mutex_tryenter(&xc_mbox_lock[X_CALL_HIPRI])) { 575 xc_common(func, arg1, arg2, arg3, X_CALL_HIPRI, set, -1); 576 mutex_exit(&xc_mbox_lock[X_CALL_HIPRI]); 577 } 578 IGNORE_KERNEL_PREEMPTION = save_kernel_preemption; 579 } 580 581 /* 582 * Used by the debugger to cross-call the other CPUs, thus causing them to 583 * enter the debugger. We can't hold locks, so we spin on the cross-call 584 * lock until we get it. When we get it, we send the cross-call, and assume 585 * that we successfully stopped the other CPUs. 586 */ 587 void 588 kdi_xc_others(int this_cpu, void (*func)(void)) 589 { 590 extern int IGNORE_KERNEL_PREEMPTION; 591 int save_kernel_preemption; 592 mutex_impl_t *lp; 593 cpuset_t set; 594 int x; 595 596 CPUSET_ALL_BUT(set, this_cpu); 597 598 save_kernel_preemption = IGNORE_KERNEL_PREEMPTION; 599 IGNORE_KERNEL_PREEMPTION = 1; 600 601 lp = (mutex_impl_t *)&xc_mbox_lock[X_CALL_HIPRI]; 602 for (x = 0; x < 0x400000; x++) { 603 if (lock_spin_try(&lp->m_spin.m_spinlock)) { 604 xc_common((xc_func_t)func, 0, 0, 0, X_CALL_HIPRI, 605 set, -1); 606 lp->m_spin.m_spinlock = 0; /* XXX */ 607 break; 608 } 609 (void) xc_serv((caddr_t)X_CALL_MEDPRI, NULL); 610 } 611 IGNORE_KERNEL_PREEMPTION = save_kernel_preemption; 612 } 613