1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * Facilities for cross-processor subroutine calls using "mailbox" interrupts. 30 * 31 */ 32 33 #include <sys/types.h> 34 35 #include <sys/param.h> 36 #include <sys/t_lock.h> 37 #include <sys/thread.h> 38 #include <sys/cpuvar.h> 39 #include <sys/x_call.h> 40 #include <sys/cpu.h> 41 #include <sys/psw.h> 42 #include <sys/sunddi.h> 43 #include <sys/mmu.h> 44 #include <sys/debug.h> 45 #include <sys/systm.h> 46 #include <sys/machsystm.h> 47 #include <sys/mutex_impl.h> 48 49 static struct xc_mbox xc_mboxes[X_CALL_LEVELS]; 50 static kmutex_t xc_mbox_lock[X_CALL_LEVELS]; 51 static uint_t xc_xlat_xcptoipl[X_CALL_LEVELS] = { 52 XC_LO_PIL, 53 XC_MED_PIL, 54 XC_HI_PIL 55 }; 56 57 static void xc_common(xc_func_t, xc_arg_t, xc_arg_t, xc_arg_t, 58 int, cpuset_t, int); 59 60 static int xc_initialized = 0; 61 extern cpuset_t cpu_ready_set; 62 63 void 64 xc_init() 65 { 66 /* 67 * By making these mutexes type MUTEX_DRIVER, the ones below 68 * LOCK_LEVEL will be implemented as adaptive mutexes, and the 69 * ones above LOCK_LEVEL will be spin mutexes. 70 */ 71 mutex_init(&xc_mbox_lock[0], NULL, MUTEX_DRIVER, 72 (void *)ipltospl(XC_LO_PIL)); 73 mutex_init(&xc_mbox_lock[1], NULL, MUTEX_DRIVER, 74 (void *)ipltospl(XC_MED_PIL)); 75 mutex_init(&xc_mbox_lock[2], NULL, MUTEX_DRIVER, 76 (void *)ipltospl(XC_HI_PIL)); 77 78 xc_initialized = 1; 79 } 80 81 /* 82 * Used by the debugger to determine whether or not cross calls have been 83 * initialized and are safe to use. 84 */ 85 int 86 kdi_xc_initialized(void) 87 { 88 return (xc_initialized); 89 } 90 91 #define CAPTURE_CPU_ARG ~0UL 92 93 /* 94 * X-call interrupt service routine. 95 * 96 * arg == X_CALL_MEDPRI - capture cpus. 97 * 98 * We're protected against changing CPUs by being a high-priority interrupt. 99 */ 100 /*ARGSUSED*/ 101 uint_t 102 xc_serv(caddr_t arg1, caddr_t arg2) 103 { 104 int op; 105 int pri = (int)(uintptr_t)arg1; 106 struct cpu *cpup = CPU; 107 xc_arg_t *argp; 108 xc_arg_t arg2val; 109 uint_t tlbflush; 110 111 if (pri == X_CALL_MEDPRI) { 112 113 argp = &xc_mboxes[X_CALL_MEDPRI].arg2; 114 arg2val = *argp; 115 if (arg2val != CAPTURE_CPU_ARG && 116 !CPU_IN_SET((cpuset_t)arg2val, cpup->cpu_id)) 117 return (DDI_INTR_UNCLAIMED); 118 ASSERT(arg2val == CAPTURE_CPU_ARG); 119 if (cpup->cpu_m.xc_pend[pri] == 0) 120 return (DDI_INTR_UNCLAIMED); 121 122 cpup->cpu_m.xc_pend[X_CALL_MEDPRI] = 0; 123 cpup->cpu_m.xc_ack[X_CALL_MEDPRI] = 1; 124 125 for (;;) { 126 if ((cpup->cpu_m.xc_state[X_CALL_MEDPRI] == XC_DONE) || 127 (cpup->cpu_m.xc_pend[X_CALL_MEDPRI])) 128 break; 129 ht_pause(); 130 } 131 return (DDI_INTR_CLAIMED); 132 } 133 if (cpup->cpu_m.xc_pend[pri] == 0) 134 return (DDI_INTR_UNCLAIMED); 135 136 cpup->cpu_m.xc_pend[pri] = 0; 137 op = cpup->cpu_m.xc_state[pri]; 138 139 /* 140 * When invalidating TLB entries, wait until the initiator changes the 141 * memory PTE before doing any INVLPG. Otherwise, if the PTE in memory 142 * hasn't been changed, the processor's TLB Flush filter may ignore 143 * the INVLPG instruction. 144 */ 145 tlbflush = (cpup->cpu_m.xc_wait[pri] == 2); 146 147 /* 148 * Don't invoke a null function. 149 */ 150 if (xc_mboxes[pri].func != NULL) { 151 if (!tlbflush) 152 cpup->cpu_m.xc_retval[pri] = (*xc_mboxes[pri].func) 153 (xc_mboxes[pri].arg1, xc_mboxes[pri].arg2, 154 xc_mboxes[pri].arg3); 155 } else 156 cpup->cpu_m.xc_retval[pri] = 0; 157 158 /* 159 * Acknowledge that we have completed the x-call operation. 160 */ 161 cpup->cpu_m.xc_ack[pri] = 1; 162 163 if (op == XC_CALL_OP) 164 return (DDI_INTR_CLAIMED); 165 166 /* 167 * for (op == XC_SYNC_OP) 168 * Wait for the initiator of the x-call to indicate 169 * that all CPUs involved can proceed. 170 */ 171 while (cpup->cpu_m.xc_wait[pri]) 172 ht_pause(); 173 174 while (cpup->cpu_m.xc_state[pri] != XC_DONE) 175 ht_pause(); 176 177 /* 178 * Flush the TLB, if that's what is requested. 179 */ 180 if (xc_mboxes[pri].func != NULL && tlbflush) { 181 cpup->cpu_m.xc_retval[pri] = (*xc_mboxes[pri].func) 182 (xc_mboxes[pri].arg1, xc_mboxes[pri].arg2, 183 xc_mboxes[pri].arg3); 184 } 185 186 /* 187 * Acknowledge that we have received the directive to continue. 188 */ 189 ASSERT(cpup->cpu_m.xc_ack[pri] == 0); 190 cpup->cpu_m.xc_ack[pri] = 1; 191 192 return (DDI_INTR_CLAIMED); 193 } 194 195 196 /* 197 * xc_do_call: 198 */ 199 static void 200 xc_do_call( 201 xc_arg_t arg1, 202 xc_arg_t arg2, 203 xc_arg_t arg3, 204 int pri, 205 cpuset_t set, 206 xc_func_t func, 207 int sync) 208 { 209 /* 210 * If the pri indicates a low priority lock (below LOCK_LEVEL), 211 * we must disable preemption to avoid migrating to another CPU 212 * during the call. 213 */ 214 if (pri == X_CALL_LOPRI) { 215 kpreempt_disable(); 216 } else { 217 pri = X_CALL_HIPRI; 218 } 219 220 /* always grab highest mutex to avoid deadlock */ 221 mutex_enter(&xc_mbox_lock[X_CALL_HIPRI]); 222 xc_common(func, arg1, arg2, arg3, pri, set, sync); 223 mutex_exit(&xc_mbox_lock[X_CALL_HIPRI]); 224 if (pri == X_CALL_LOPRI) 225 kpreempt_enable(); 226 } 227 228 229 /* 230 * xc_call: call specified function on all processors 231 * remotes may continue after service 232 * we wait here until everybody has completed. 233 */ 234 void 235 xc_call( 236 xc_arg_t arg1, 237 xc_arg_t arg2, 238 xc_arg_t arg3, 239 int pri, 240 cpuset_t set, 241 xc_func_t func) 242 { 243 xc_do_call(arg1, arg2, arg3, pri, set, func, 0); 244 } 245 246 /* 247 * xc_sync: call specified function on all processors 248 * after doing work, each remote waits until we let 249 * it continue; send the contiunue after everyone has 250 * informed us that they are done. 251 */ 252 void 253 xc_sync( 254 xc_arg_t arg1, 255 xc_arg_t arg2, 256 xc_arg_t arg3, 257 int pri, 258 cpuset_t set, 259 xc_func_t func) 260 { 261 xc_do_call(arg1, arg2, arg3, pri, set, func, 1); 262 } 263 264 /* 265 * xc_sync_wait: similar to xc_sync(), except that the starting 266 * cpu waits for all other cpus to check in before running its 267 * service locally. 268 */ 269 void 270 xc_wait_sync( 271 xc_arg_t arg1, 272 xc_arg_t arg2, 273 xc_arg_t arg3, 274 int pri, 275 cpuset_t set, 276 xc_func_t func) 277 { 278 xc_do_call(arg1, arg2, arg3, pri, set, func, 2); 279 } 280 281 282 /* 283 * The routines xc_capture_cpus and xc_release_cpus 284 * can be used in place of xc_sync in order to implement a critical 285 * code section where all CPUs in the system can be controlled. 286 * xc_capture_cpus is used to start the critical code section, and 287 * xc_release_cpus is used to end the critical code section. 288 */ 289 290 /* 291 * Capture the CPUs specified in order to start a x-call session, 292 * and/or to begin a critical section. 293 */ 294 void 295 xc_capture_cpus(cpuset_t set) 296 { 297 int cix; 298 int lcx; 299 struct cpu *cpup; 300 int i; 301 cpuset_t *cpus; 302 cpuset_t c; 303 304 CPU_STATS_ADDQ(CPU, sys, xcalls, 1); 305 306 /* 307 * Prevent deadlocks where we take an interrupt and are waiting 308 * for a mutex owned by one of the CPUs that is captured for 309 * the x-call, while that CPU is waiting for some x-call signal 310 * to be set by us. 311 * 312 * This mutex also prevents preemption, since it raises SPL above 313 * LOCK_LEVEL (it is a spin-type driver mutex). 314 */ 315 /* always grab highest mutex to avoid deadlock */ 316 mutex_enter(&xc_mbox_lock[X_CALL_HIPRI]); 317 lcx = CPU->cpu_id; /* now we're safe */ 318 319 ASSERT(CPU->cpu_flags & CPU_READY); 320 321 /* 322 * Wait for all cpus 323 */ 324 cpus = (cpuset_t *)&xc_mboxes[X_CALL_MEDPRI].arg2; 325 if (CPU_IN_SET(*cpus, CPU->cpu_id)) 326 CPUSET_ATOMIC_DEL(*cpus, CPU->cpu_id); 327 for (;;) { 328 c = *(volatile cpuset_t *)cpus; 329 CPUSET_AND(c, cpu_ready_set); 330 if (CPUSET_ISNULL(c)) 331 break; 332 ht_pause(); 333 } 334 335 /* 336 * Store the set of CPUs involved in the x-call session, so that 337 * xc_release_cpus will know what CPUs to act upon. 338 */ 339 xc_mboxes[X_CALL_MEDPRI].set = set; 340 xc_mboxes[X_CALL_MEDPRI].arg2 = CAPTURE_CPU_ARG; 341 342 /* 343 * Now capture each CPU in the set and cause it to go into a 344 * holding pattern. 345 */ 346 i = 0; 347 for (cix = 0; cix < NCPU; cix++) { 348 if ((cpup = cpu[cix]) == NULL || 349 (cpup->cpu_flags & CPU_READY) == 0) { 350 /* 351 * In case CPU wasn't ready, but becomes ready later, 352 * take the CPU out of the set now. 353 */ 354 CPUSET_DEL(set, cix); 355 continue; 356 } 357 if (cix != lcx && CPU_IN_SET(set, cix)) { 358 cpup->cpu_m.xc_ack[X_CALL_MEDPRI] = 0; 359 cpup->cpu_m.xc_state[X_CALL_MEDPRI] = XC_HOLD; 360 cpup->cpu_m.xc_pend[X_CALL_MEDPRI] = 1; 361 send_dirint(cix, XC_MED_PIL); 362 } 363 i++; 364 if (i >= ncpus) 365 break; 366 } 367 368 /* 369 * Wait here until all remote calls to complete. 370 */ 371 i = 0; 372 for (cix = 0; cix < NCPU; cix++) { 373 if (lcx != cix && CPU_IN_SET(set, cix)) { 374 cpup = cpu[cix]; 375 while (cpup->cpu_m.xc_ack[X_CALL_MEDPRI] == 0) 376 ht_pause(); 377 cpup->cpu_m.xc_ack[X_CALL_MEDPRI] = 0; 378 } 379 i++; 380 if (i >= ncpus) 381 break; 382 } 383 384 } 385 386 /* 387 * Release the CPUs captured by xc_capture_cpus, thus terminating the 388 * x-call session and exiting the critical section. 389 */ 390 void 391 xc_release_cpus(void) 392 { 393 int cix; 394 int lcx = (int)(CPU->cpu_id); 395 cpuset_t set = xc_mboxes[X_CALL_MEDPRI].set; 396 struct cpu *cpup; 397 int i; 398 399 ASSERT(MUTEX_HELD(&xc_mbox_lock[X_CALL_HIPRI])); 400 401 /* 402 * Allow each CPU to exit its holding pattern. 403 */ 404 i = 0; 405 for (cix = 0; cix < NCPU; cix++) { 406 if ((cpup = cpu[cix]) == NULL) 407 continue; 408 if ((cpup->cpu_flags & CPU_READY) && 409 (cix != lcx) && CPU_IN_SET(set, cix)) { 410 /* 411 * Clear xc_ack since we will be waiting for it 412 * to be set again after we set XC_DONE. 413 */ 414 cpup->cpu_m.xc_state[X_CALL_MEDPRI] = XC_DONE; 415 } 416 i++; 417 if (i >= ncpus) 418 break; 419 } 420 421 xc_mboxes[X_CALL_MEDPRI].arg2 = 0; 422 mutex_exit(&xc_mbox_lock[X_CALL_HIPRI]); 423 } 424 425 /* 426 * Common code to call a specified function on a set of processors. 427 * sync specifies what kind of waiting is done. 428 * -1 - no waiting, don't release remotes 429 * 0 - no waiting, release remotes immediately 430 * 1 - run service locally w/o waiting for remotes. 431 * 2 - wait for remotes before running locally 432 */ 433 static void 434 xc_common( 435 xc_func_t func, 436 xc_arg_t arg1, 437 xc_arg_t arg2, 438 xc_arg_t arg3, 439 int pri, 440 cpuset_t set, 441 int sync) 442 { 443 int cix; 444 int lcx = (int)(CPU->cpu_id); 445 struct cpu *cpup; 446 447 ASSERT(panicstr == NULL); 448 449 ASSERT(MUTEX_HELD(&xc_mbox_lock[X_CALL_HIPRI])); 450 ASSERT(CPU->cpu_flags & CPU_READY); 451 452 /* 453 * Set up the service definition mailbox. 454 */ 455 xc_mboxes[pri].func = func; 456 xc_mboxes[pri].arg1 = arg1; 457 xc_mboxes[pri].arg2 = arg2; 458 xc_mboxes[pri].arg3 = arg3; 459 460 /* 461 * Request service on all remote processors. 462 */ 463 for (cix = 0; cix < NCPU; cix++) { 464 if ((cpup = cpu[cix]) == NULL || 465 (cpup->cpu_flags & CPU_READY) == 0) { 466 /* 467 * In case the non-local CPU is not ready but becomes 468 * ready later, take it out of the set now. The local 469 * CPU needs to remain in the set to complete the 470 * requested function. 471 */ 472 if (cix != lcx) 473 CPUSET_DEL(set, cix); 474 } else if (cix != lcx && CPU_IN_SET(set, cix)) { 475 CPU_STATS_ADDQ(CPU, sys, xcalls, 1); 476 cpup->cpu_m.xc_ack[pri] = 0; 477 cpup->cpu_m.xc_wait[pri] = sync; 478 if (sync > 0) 479 cpup->cpu_m.xc_state[pri] = XC_SYNC_OP; 480 else 481 cpup->cpu_m.xc_state[pri] = XC_CALL_OP; 482 cpup->cpu_m.xc_pend[pri] = 1; 483 send_dirint(cix, xc_xlat_xcptoipl[pri]); 484 } 485 } 486 487 /* 488 * Run service locally if not waiting for remotes. 489 */ 490 if (sync != 2 && CPU_IN_SET(set, lcx) && func != NULL) 491 CPU->cpu_m.xc_retval[pri] = (*func)(arg1, arg2, arg3); 492 493 if (sync == -1) 494 return; 495 496 /* 497 * Wait here until all remote calls complete. 498 */ 499 for (cix = 0; cix < NCPU; cix++) { 500 if (lcx != cix && CPU_IN_SET(set, cix)) { 501 cpup = cpu[cix]; 502 while (cpup->cpu_m.xc_ack[pri] == 0) 503 ht_pause(); 504 cpup->cpu_m.xc_ack[pri] = 0; 505 } 506 } 507 508 /* 509 * Run service locally if waiting for remotes. 510 */ 511 if (sync == 2 && CPU_IN_SET(set, lcx) && func != NULL) 512 CPU->cpu_m.xc_retval[pri] = (*func)(arg1, arg2, arg3); 513 514 if (sync == 0) 515 return; 516 517 /* 518 * Release any waiting CPUs 519 */ 520 for (cix = 0; cix < NCPU; cix++) { 521 if (lcx != cix && CPU_IN_SET(set, cix)) { 522 cpup = cpu[cix]; 523 if (cpup != NULL && (cpup->cpu_flags & CPU_READY)) { 524 cpup->cpu_m.xc_wait[pri] = 0; 525 cpup->cpu_m.xc_state[pri] = XC_DONE; 526 } 527 } 528 } 529 530 /* 531 * Wait for all CPUs to acknowledge completion before we continue. 532 * Without this check it's possible (on a VM or hyper-threaded CPUs 533 * or in the presence of Service Management Interrupts which can all 534 * cause delays) for the remote processor to still be waiting by 535 * the time xc_common() is next invoked with the sync flag set 536 * resulting in a deadlock. 537 */ 538 for (cix = 0; cix < NCPU; cix++) { 539 if (lcx != cix && CPU_IN_SET(set, cix)) { 540 cpup = cpu[cix]; 541 if (cpup != NULL && (cpup->cpu_flags & CPU_READY)) { 542 while (cpup->cpu_m.xc_ack[pri] == 0) 543 ht_pause(); 544 cpup->cpu_m.xc_ack[pri] = 0; 545 } 546 } 547 } 548 } 549 550 /* 551 * xc_trycall: attempt to call specified function on all processors 552 * remotes may wait for a long time 553 * we continue immediately 554 */ 555 void 556 xc_trycall( 557 xc_arg_t arg1, 558 xc_arg_t arg2, 559 xc_arg_t arg3, 560 cpuset_t set, 561 xc_func_t func) 562 { 563 int save_kernel_preemption; 564 extern int IGNORE_KERNEL_PREEMPTION; 565 566 /* 567 * If we can grab the mutex, we'll do the cross-call. If not -- if 568 * someone else is already doing a cross-call -- we won't. 569 */ 570 571 save_kernel_preemption = IGNORE_KERNEL_PREEMPTION; 572 IGNORE_KERNEL_PREEMPTION = 1; 573 if (mutex_tryenter(&xc_mbox_lock[X_CALL_HIPRI])) { 574 xc_common(func, arg1, arg2, arg3, X_CALL_HIPRI, set, -1); 575 mutex_exit(&xc_mbox_lock[X_CALL_HIPRI]); 576 } 577 IGNORE_KERNEL_PREEMPTION = save_kernel_preemption; 578 } 579 580 /* 581 * Used by the debugger to cross-call the other CPUs, thus causing them to 582 * enter the debugger. We can't hold locks, so we spin on the cross-call 583 * lock until we get it. When we get it, we send the cross-call, and assume 584 * that we successfully stopped the other CPUs. 585 */ 586 void 587 kdi_xc_others(int this_cpu, void (*func)(void)) 588 { 589 extern int IGNORE_KERNEL_PREEMPTION; 590 int save_kernel_preemption; 591 mutex_impl_t *lp; 592 cpuset_t set; 593 int x; 594 595 CPUSET_ALL_BUT(set, this_cpu); 596 597 save_kernel_preemption = IGNORE_KERNEL_PREEMPTION; 598 IGNORE_KERNEL_PREEMPTION = 1; 599 600 lp = (mutex_impl_t *)&xc_mbox_lock[X_CALL_HIPRI]; 601 for (x = 0; x < 0x400000; x++) { 602 if (lock_spin_try(&lp->m_spin.m_spinlock)) { 603 xc_common((xc_func_t)func, 0, 0, 0, X_CALL_HIPRI, 604 set, -1); 605 lp->m_spin.m_spinlock = 0; /* XXX */ 606 break; 607 } 608 (void) xc_serv((caddr_t)X_CALL_MEDPRI, NULL); 609 } 610 IGNORE_KERNEL_PREEMPTION = save_kernel_preemption; 611 } 612