1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * Facilities for cross-processor subroutine calls using "mailbox" interrupts. 30 * 31 */ 32 33 #include <sys/types.h> 34 #include <sys/param.h> 35 #include <sys/t_lock.h> 36 #include <sys/thread.h> 37 #include <sys/cpuvar.h> 38 #include <sys/x_call.h> 39 #include <sys/cpu.h> 40 #include <sys/psw.h> 41 #include <sys/sunddi.h> 42 #include <sys/debug.h> 43 #include <sys/systm.h> 44 #include <sys/archsystm.h> 45 #include <sys/machsystm.h> 46 #include <sys/mutex_impl.h> 47 #include <sys/traptrace.h> 48 49 50 static struct xc_mbox xc_mboxes[X_CALL_LEVELS]; 51 static kmutex_t xc_mbox_lock[X_CALL_LEVELS]; 52 static uint_t xc_xlat_xcptoipl[X_CALL_LEVELS] = { 53 XC_LO_PIL, 54 XC_MED_PIL, 55 XC_HI_PIL 56 }; 57 58 static void xc_common(xc_func_t, xc_arg_t, xc_arg_t, xc_arg_t, 59 int, cpuset_t, int); 60 61 static int xc_initialized = 0; 62 63 void 64 xc_init() 65 { 66 /* 67 * By making these mutexes type MUTEX_DRIVER, the ones below 68 * LOCK_LEVEL will be implemented as adaptive mutexes, and the 69 * ones above LOCK_LEVEL will be spin mutexes. 70 */ 71 mutex_init(&xc_mbox_lock[0], NULL, MUTEX_DRIVER, 72 (void *)ipltospl(XC_LO_PIL)); 73 mutex_init(&xc_mbox_lock[1], NULL, MUTEX_DRIVER, 74 (void *)ipltospl(XC_MED_PIL)); 75 mutex_init(&xc_mbox_lock[2], NULL, MUTEX_DRIVER, 76 (void *)ipltospl(XC_HI_PIL)); 77 78 xc_initialized = 1; 79 } 80 81 #if defined(TRAPTRACE) 82 83 /* 84 * When xc_traptrace is on, put x-call records into the trap trace buffer. 85 */ 86 int xc_traptrace; 87 88 void 89 xc_make_trap_trace_entry(uint8_t marker, int pri, ulong_t arg) 90 { 91 trap_trace_rec_t *ttr; 92 struct _xc_entry *xce; 93 94 if (xc_traptrace == 0) 95 return; 96 97 ttr = trap_trace_get_traceptr(TT_XCALL, 98 (ulong_t)caller(), (ulong_t)getfp()); 99 xce = &(ttr->ttr_info.xc_entry); 100 101 xce->xce_marker = marker; 102 xce->xce_pri = pri; 103 xce->xce_arg = arg; 104 105 if ((uint_t)pri < X_CALL_LEVELS) { 106 struct machcpu *mcpu = &CPU->cpu_m; 107 108 xce->xce_pend = mcpu->xc_pend[pri]; 109 xce->xce_ack = mcpu->xc_ack[pri]; 110 xce->xce_state = mcpu->xc_state[pri]; 111 xce->xce_retval = mcpu->xc_retval[pri]; 112 xce->xce_func = (uintptr_t)xc_mboxes[pri].func; 113 } 114 } 115 #endif 116 117 #define CAPTURE_CPU_ARG ~0UL 118 119 /* 120 * X-call interrupt service routine. 121 * 122 * arg == X_CALL_MEDPRI - capture cpus. 123 * 124 * We're protected against changing CPUs by being a high-priority interrupt. 125 */ 126 /*ARGSUSED*/ 127 uint_t 128 xc_serv(caddr_t arg1, caddr_t arg2) 129 { 130 int op; 131 int pri = (int)(uintptr_t)arg1; 132 struct cpu *cpup = CPU; 133 xc_arg_t arg2val; 134 uint_t initiator_first; 135 136 XC_TRACE(TT_XC_SVC_BEGIN, pri, (ulong_t)arg2); 137 138 if (pri == X_CALL_MEDPRI) { 139 140 arg2val = xc_mboxes[X_CALL_MEDPRI].arg2; 141 142 if (arg2val != CAPTURE_CPU_ARG && 143 !CPU_IN_SET((cpuset_t)arg2val, cpup->cpu_id)) 144 goto unclaimed; 145 146 ASSERT(arg2val == CAPTURE_CPU_ARG); 147 148 if (cpup->cpu_m.xc_pend[pri] == 0) 149 goto unclaimed; 150 151 cpup->cpu_m.xc_pend[X_CALL_MEDPRI] = 0; 152 cpup->cpu_m.xc_ack[X_CALL_MEDPRI] = 1; 153 154 for (;;) { 155 if ((cpup->cpu_m.xc_state[X_CALL_MEDPRI] == XC_DONE) || 156 (cpup->cpu_m.xc_pend[X_CALL_MEDPRI])) 157 break; 158 SMT_PAUSE(); 159 } 160 XC_TRACE(TT_XC_SVC_END, pri, DDI_INTR_CLAIMED); 161 return (DDI_INTR_CLAIMED); 162 } 163 164 if (cpup->cpu_m.xc_pend[pri] == 0) 165 goto unclaimed; 166 167 cpup->cpu_m.xc_pend[pri] = 0; 168 op = cpup->cpu_m.xc_state[pri]; 169 170 /* 171 * Special handling for xc_wait_sync(). The cross call is used 172 * to allow the initiating CPU to wait until all other CPUs are 173 * captured in the cross call. Then the initiator invokes the 174 * service function before any other CPU. Then other CPUs can 175 * invoke the service function. 176 */ 177 initiator_first = (cpup->cpu_m.xc_wait[pri] == 2); 178 179 /* 180 * Don't invoke a null function. 181 */ 182 if (xc_mboxes[pri].func != NULL) { 183 if (!initiator_first) 184 cpup->cpu_m.xc_retval[pri] = (*xc_mboxes[pri].func) 185 (xc_mboxes[pri].arg1, xc_mboxes[pri].arg2, 186 xc_mboxes[pri].arg3); 187 } else 188 cpup->cpu_m.xc_retval[pri] = 0; 189 190 /* 191 * Acknowledge that we have completed the x-call operation. 192 */ 193 cpup->cpu_m.xc_ack[pri] = 1; 194 195 if (op != XC_CALL_OP) { 196 /* 197 * for (op == XC_SYNC_OP) 198 * Wait for the initiator of the x-call to indicate 199 * that all CPUs involved can proceed. 200 */ 201 while (cpup->cpu_m.xc_wait[pri]) 202 SMT_PAUSE(); 203 204 while (cpup->cpu_m.xc_state[pri] != XC_DONE) 205 SMT_PAUSE(); 206 207 if (xc_mboxes[pri].func != NULL && initiator_first) { 208 cpup->cpu_m.xc_retval[pri] = (*xc_mboxes[pri].func) 209 (xc_mboxes[pri].arg1, xc_mboxes[pri].arg2, 210 xc_mboxes[pri].arg3); 211 } 212 213 /* 214 * Acknowledge that we have received the directive to continue. 215 */ 216 ASSERT(cpup->cpu_m.xc_ack[pri] == 0); 217 cpup->cpu_m.xc_ack[pri] = 1; 218 } 219 220 XC_TRACE(TT_XC_SVC_END, pri, DDI_INTR_CLAIMED); 221 return (DDI_INTR_CLAIMED); 222 223 unclaimed: 224 XC_TRACE(TT_XC_SVC_END, pri, DDI_INTR_UNCLAIMED); 225 return (DDI_INTR_UNCLAIMED); 226 } 227 228 229 /* 230 * xc_do_call: 231 */ 232 static void 233 xc_do_call( 234 xc_arg_t arg1, 235 xc_arg_t arg2, 236 xc_arg_t arg3, 237 int pri, 238 cpuset_t set, 239 xc_func_t func, 240 int sync) 241 { 242 /* 243 * If the pri indicates a low priority lock (below LOCK_LEVEL), 244 * we must disable preemption to avoid migrating to another CPU 245 * during the call. 246 */ 247 if (pri == X_CALL_LOPRI) { 248 kpreempt_disable(); 249 } else { 250 pri = X_CALL_HIPRI; 251 } 252 253 /* always grab highest mutex to avoid deadlock */ 254 mutex_enter(&xc_mbox_lock[X_CALL_HIPRI]); 255 xc_common(func, arg1, arg2, arg3, pri, set, sync); 256 mutex_exit(&xc_mbox_lock[X_CALL_HIPRI]); 257 if (pri == X_CALL_LOPRI) 258 kpreempt_enable(); 259 } 260 261 262 /* 263 * xc_call: call specified function on all processors 264 * remotes may continue after service 265 * we wait here until everybody has completed. 266 */ 267 void 268 xc_call( 269 xc_arg_t arg1, 270 xc_arg_t arg2, 271 xc_arg_t arg3, 272 int pri, 273 cpuset_t set, 274 xc_func_t func) 275 { 276 xc_do_call(arg1, arg2, arg3, pri, set, func, 0); 277 } 278 279 /* 280 * xc_sync: call specified function on all processors 281 * after doing work, each remote waits until we let 282 * it continue; send the contiunue after everyone has 283 * informed us that they are done. 284 */ 285 void 286 xc_sync( 287 xc_arg_t arg1, 288 xc_arg_t arg2, 289 xc_arg_t arg3, 290 int pri, 291 cpuset_t set, 292 xc_func_t func) 293 { 294 xc_do_call(arg1, arg2, arg3, pri, set, func, 1); 295 } 296 297 /* 298 * xc_sync_wait: similar to xc_sync(), except that the starting 299 * cpu waits for all other cpus to check in before running its 300 * service locally. 301 */ 302 void 303 xc_wait_sync( 304 xc_arg_t arg1, 305 xc_arg_t arg2, 306 xc_arg_t arg3, 307 int pri, 308 cpuset_t set, 309 xc_func_t func) 310 { 311 xc_do_call(arg1, arg2, arg3, pri, set, func, 2); 312 } 313 314 315 /* 316 * The routines xc_capture_cpus and xc_release_cpus 317 * can be used in place of xc_sync in order to implement a critical 318 * code section where all CPUs in the system can be controlled. 319 * xc_capture_cpus is used to start the critical code section, and 320 * xc_release_cpus is used to end the critical code section. 321 */ 322 323 /* 324 * Capture the CPUs specified in order to start a x-call session, 325 * and/or to begin a critical section. 326 */ 327 void 328 xc_capture_cpus(cpuset_t set) 329 { 330 int cix; 331 int lcx; 332 struct cpu *cpup; 333 int i; 334 cpuset_t *cpus; 335 cpuset_t c; 336 337 CPU_STATS_ADDQ(CPU, sys, xcalls, 1); 338 339 /* 340 * Prevent deadlocks where we take an interrupt and are waiting 341 * for a mutex owned by one of the CPUs that is captured for 342 * the x-call, while that CPU is waiting for some x-call signal 343 * to be set by us. 344 * 345 * This mutex also prevents preemption, since it raises SPL above 346 * LOCK_LEVEL (it is a spin-type driver mutex). 347 */ 348 /* always grab highest mutex to avoid deadlock */ 349 mutex_enter(&xc_mbox_lock[X_CALL_HIPRI]); 350 lcx = CPU->cpu_id; /* now we're safe */ 351 352 ASSERT(CPU->cpu_flags & CPU_READY); 353 354 /* 355 * Wait for all cpus 356 */ 357 cpus = (cpuset_t *)&xc_mboxes[X_CALL_MEDPRI].arg2; 358 if (CPU_IN_SET(*cpus, CPU->cpu_id)) 359 CPUSET_ATOMIC_DEL(*cpus, CPU->cpu_id); 360 for (;;) { 361 c = *(volatile cpuset_t *)cpus; 362 CPUSET_AND(c, cpu_ready_set); 363 if (CPUSET_ISNULL(c)) 364 break; 365 SMT_PAUSE(); 366 } 367 368 /* 369 * Store the set of CPUs involved in the x-call session, so that 370 * xc_release_cpus will know what CPUs to act upon. 371 */ 372 xc_mboxes[X_CALL_MEDPRI].set = set; 373 xc_mboxes[X_CALL_MEDPRI].arg2 = CAPTURE_CPU_ARG; 374 375 /* 376 * Now capture each CPU in the set and cause it to go into a 377 * holding pattern. 378 */ 379 i = 0; 380 for (cix = 0; cix < NCPU; cix++) { 381 if ((cpup = cpu[cix]) == NULL || 382 (cpup->cpu_flags & CPU_READY) == 0) { 383 /* 384 * In case CPU wasn't ready, but becomes ready later, 385 * take the CPU out of the set now. 386 */ 387 CPUSET_DEL(set, cix); 388 continue; 389 } 390 if (cix != lcx && CPU_IN_SET(set, cix)) { 391 cpup->cpu_m.xc_ack[X_CALL_MEDPRI] = 0; 392 cpup->cpu_m.xc_state[X_CALL_MEDPRI] = XC_HOLD; 393 cpup->cpu_m.xc_pend[X_CALL_MEDPRI] = 1; 394 XC_TRACE(TT_XC_CAPTURE, X_CALL_MEDPRI, cix); 395 send_dirint(cix, XC_MED_PIL); 396 } 397 i++; 398 if (i >= ncpus) 399 break; 400 } 401 402 /* 403 * Wait here until all remote calls to acknowledge. 404 */ 405 i = 0; 406 for (cix = 0; cix < NCPU; cix++) { 407 if (lcx != cix && CPU_IN_SET(set, cix)) { 408 cpup = cpu[cix]; 409 while (cpup->cpu_m.xc_ack[X_CALL_MEDPRI] == 0) 410 SMT_PAUSE(); 411 cpup->cpu_m.xc_ack[X_CALL_MEDPRI] = 0; 412 } 413 i++; 414 if (i >= ncpus) 415 break; 416 } 417 418 } 419 420 /* 421 * Release the CPUs captured by xc_capture_cpus, thus terminating the 422 * x-call session and exiting the critical section. 423 */ 424 void 425 xc_release_cpus(void) 426 { 427 int cix; 428 int lcx = (int)(CPU->cpu_id); 429 cpuset_t set = xc_mboxes[X_CALL_MEDPRI].set; 430 struct cpu *cpup; 431 int i; 432 433 ASSERT(MUTEX_HELD(&xc_mbox_lock[X_CALL_HIPRI])); 434 435 /* 436 * Allow each CPU to exit its holding pattern. 437 */ 438 i = 0; 439 for (cix = 0; cix < NCPU; cix++) { 440 if ((cpup = cpu[cix]) == NULL) 441 continue; 442 if ((cpup->cpu_flags & CPU_READY) && 443 (cix != lcx) && CPU_IN_SET(set, cix)) { 444 /* 445 * Clear xc_ack since we will be waiting for it 446 * to be set again after we set XC_DONE. 447 */ 448 XC_TRACE(TT_XC_RELEASE, X_CALL_MEDPRI, cix); 449 cpup->cpu_m.xc_state[X_CALL_MEDPRI] = XC_DONE; 450 } 451 i++; 452 if (i >= ncpus) 453 break; 454 } 455 456 xc_mboxes[X_CALL_MEDPRI].arg2 = 0; 457 mutex_exit(&xc_mbox_lock[X_CALL_HIPRI]); 458 } 459 460 /* 461 * Common code to call a specified function on a set of processors. 462 * sync specifies what kind of waiting is done. 463 * -1 - no waiting, don't release remotes 464 * 0 - no waiting, release remotes immediately 465 * 1 - run service locally w/o waiting for remotes. 466 * 2 - wait for remotes before running locally 467 */ 468 static void 469 xc_common( 470 xc_func_t func, 471 xc_arg_t arg1, 472 xc_arg_t arg2, 473 xc_arg_t arg3, 474 int pri, 475 cpuset_t set, 476 int sync) 477 { 478 int cix; 479 int lcx = (int)(CPU->cpu_id); 480 struct cpu *cpup; 481 482 ASSERT(panicstr == NULL); 483 484 ASSERT(MUTEX_HELD(&xc_mbox_lock[X_CALL_HIPRI])); 485 ASSERT(CPU->cpu_flags & CPU_READY); 486 487 /* 488 * Set up the service definition mailbox. 489 */ 490 xc_mboxes[pri].func = func; 491 xc_mboxes[pri].arg1 = arg1; 492 xc_mboxes[pri].arg2 = arg2; 493 xc_mboxes[pri].arg3 = arg3; 494 495 /* 496 * Request service on all remote processors. 497 */ 498 for (cix = 0; cix < NCPU; cix++) { 499 if ((cpup = cpu[cix]) == NULL || 500 (cpup->cpu_flags & CPU_READY) == 0) { 501 /* 502 * In case the non-local CPU is not ready but becomes 503 * ready later, take it out of the set now. The local 504 * CPU needs to remain in the set to complete the 505 * requested function. 506 */ 507 if (cix != lcx) 508 CPUSET_DEL(set, cix); 509 } else if (cix != lcx && CPU_IN_SET(set, cix)) { 510 CPU_STATS_ADDQ(CPU, sys, xcalls, 1); 511 cpup->cpu_m.xc_ack[pri] = 0; 512 cpup->cpu_m.xc_wait[pri] = sync; 513 if (sync > 0) 514 cpup->cpu_m.xc_state[pri] = XC_SYNC_OP; 515 else 516 cpup->cpu_m.xc_state[pri] = XC_CALL_OP; 517 cpup->cpu_m.xc_pend[pri] = 1; 518 XC_TRACE(TT_XC_START, pri, cix); 519 send_dirint(cix, xc_xlat_xcptoipl[pri]); 520 } 521 } 522 523 /* 524 * Run service locally if not waiting for remotes. 525 */ 526 if (sync != 2 && CPU_IN_SET(set, lcx) && func != NULL) { 527 XC_TRACE(TT_XC_START, pri, CPU->cpu_id); 528 CPU->cpu_m.xc_retval[pri] = (*func)(arg1, arg2, arg3); 529 } 530 531 if (sync == -1) 532 return; 533 534 /* 535 * Wait here until all remote calls acknowledge. 536 */ 537 for (cix = 0; cix < NCPU; cix++) { 538 if (lcx != cix && CPU_IN_SET(set, cix)) { 539 cpup = cpu[cix]; 540 while (cpup->cpu_m.xc_ack[pri] == 0) 541 SMT_PAUSE(); 542 XC_TRACE(TT_XC_WAIT, pri, cix); 543 cpup->cpu_m.xc_ack[pri] = 0; 544 } 545 } 546 547 /* 548 * Run service locally if waiting for remotes. 549 */ 550 if (sync == 2 && CPU_IN_SET(set, lcx) && func != NULL) 551 CPU->cpu_m.xc_retval[pri] = (*func)(arg1, arg2, arg3); 552 553 if (sync == 0) 554 return; 555 556 /* 557 * Release any waiting CPUs 558 */ 559 for (cix = 0; cix < NCPU; cix++) { 560 if (lcx != cix && CPU_IN_SET(set, cix)) { 561 cpup = cpu[cix]; 562 if (cpup != NULL && (cpup->cpu_flags & CPU_READY)) { 563 cpup->cpu_m.xc_wait[pri] = 0; 564 cpup->cpu_m.xc_state[pri] = XC_DONE; 565 } 566 } 567 } 568 569 /* 570 * Wait for all CPUs to acknowledge completion before we continue. 571 * Without this check it's possible (on a VM or hyper-threaded CPUs 572 * or in the presence of Service Management Interrupts which can all 573 * cause delays) for the remote processor to still be waiting by 574 * the time xc_common() is next invoked with the sync flag set 575 * resulting in a deadlock. 576 */ 577 for (cix = 0; cix < NCPU; cix++) { 578 if (lcx != cix && CPU_IN_SET(set, cix)) { 579 cpup = cpu[cix]; 580 if (cpup != NULL && (cpup->cpu_flags & CPU_READY)) { 581 while (cpup->cpu_m.xc_ack[pri] == 0) 582 SMT_PAUSE(); 583 XC_TRACE(TT_XC_ACK, pri, cix); 584 cpup->cpu_m.xc_ack[pri] = 0; 585 } 586 } 587 } 588 } 589 590 /* 591 * xc_trycall: attempt to call specified function on all processors 592 * remotes may wait for a long time 593 * we continue immediately 594 */ 595 void 596 xc_trycall( 597 xc_arg_t arg1, 598 xc_arg_t arg2, 599 xc_arg_t arg3, 600 cpuset_t set, 601 xc_func_t func) 602 { 603 int save_kernel_preemption; 604 extern int IGNORE_KERNEL_PREEMPTION; 605 606 /* 607 * If we can grab the mutex, we'll do the cross-call. If not -- if 608 * someone else is already doing a cross-call -- we won't. 609 */ 610 611 save_kernel_preemption = IGNORE_KERNEL_PREEMPTION; 612 IGNORE_KERNEL_PREEMPTION = 1; 613 if (mutex_tryenter(&xc_mbox_lock[X_CALL_HIPRI])) { 614 xc_common(func, arg1, arg2, arg3, X_CALL_HIPRI, set, -1); 615 mutex_exit(&xc_mbox_lock[X_CALL_HIPRI]); 616 } 617 IGNORE_KERNEL_PREEMPTION = save_kernel_preemption; 618 } 619 620 /* 621 * Used by the debugger to cross-call the other CPUs, thus causing them to 622 * enter the debugger. We can't hold locks, so we spin on the cross-call 623 * lock until we get it. When we get it, we send the cross-call, and assume 624 * that we successfully stopped the other CPUs. 625 */ 626 void 627 kdi_xc_others(int this_cpu, void (*func)(void)) 628 { 629 extern int IGNORE_KERNEL_PREEMPTION; 630 int save_kernel_preemption; 631 mutex_impl_t *lp; 632 cpuset_t set; 633 int x; 634 635 if (!xc_initialized) 636 return; 637 638 CPUSET_ALL_BUT(set, this_cpu); 639 640 save_kernel_preemption = IGNORE_KERNEL_PREEMPTION; 641 IGNORE_KERNEL_PREEMPTION = 1; 642 643 lp = (mutex_impl_t *)&xc_mbox_lock[X_CALL_HIPRI]; 644 for (x = 0; x < 0x400000; x++) { 645 if (lock_spin_try(&lp->m_spin.m_spinlock)) { 646 xc_common((xc_func_t)func, 0, 0, 0, X_CALL_HIPRI, 647 set, -1); 648 lp->m_spin.m_spinlock = 0; /* XXX */ 649 break; 650 } 651 (void) xc_serv((caddr_t)X_CALL_MEDPRI, NULL); 652 } 653 IGNORE_KERNEL_PREEMPTION = save_kernel_preemption; 654 } 655