1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * Facilities for cross-processor subroutine calls using "mailbox" interrupts. 31 * 32 */ 33 34 #include <sys/types.h> 35 36 #include <sys/param.h> 37 #include <sys/t_lock.h> 38 #include <sys/thread.h> 39 #include <sys/cpuvar.h> 40 #include <sys/x_call.h> 41 #include <sys/cpu.h> 42 #include <sys/psw.h> 43 #include <sys/sunddi.h> 44 #include <sys/mmu.h> 45 #include <sys/debug.h> 46 #include <sys/systm.h> 47 #include <sys/machsystm.h> 48 #include <sys/mutex_impl.h> 49 50 static struct xc_mbox xc_mboxes[X_CALL_LEVELS]; 51 static kmutex_t xc_mbox_lock[X_CALL_LEVELS]; 52 static uint_t xc_xlat_xcptoipl[X_CALL_LEVELS] = { 53 XC_LO_PIL, 54 XC_MED_PIL, 55 XC_HI_PIL 56 }; 57 58 static void xc_common(xc_func_t, xc_arg_t, xc_arg_t, xc_arg_t, 59 int, cpuset_t, int); 60 61 static int xc_initialized = 0; 62 extern ulong_t cpu_ready_set; 63 64 void 65 xc_init() 66 { 67 /* 68 * By making these mutexes type MUTEX_DRIVER, the ones below 69 * LOCK_LEVEL will be implemented as adaptive mutexes, and the 70 * ones above LOCK_LEVEL will be spin mutexes. 71 */ 72 mutex_init(&xc_mbox_lock[0], NULL, MUTEX_DRIVER, 73 (void *)ipltospl(XC_LO_PIL)); 74 mutex_init(&xc_mbox_lock[1], NULL, MUTEX_DRIVER, 75 (void *)ipltospl(XC_MED_PIL)); 76 mutex_init(&xc_mbox_lock[2], NULL, MUTEX_DRIVER, 77 (void *)ipltospl(XC_HI_PIL)); 78 79 xc_initialized = 1; 80 } 81 82 /* 83 * Used by the debugger to determine whether or not cross calls have been 84 * initialized and are safe to use. 85 */ 86 int 87 kdi_xc_initialized(void) 88 { 89 return (xc_initialized); 90 } 91 92 #define CAPTURE_CPU_ARG 0xffffffff 93 94 /* 95 * X-call interrupt service routine. 96 * 97 * arg == X_CALL_MEDPRI - capture cpus. 98 * 99 * We're protected against changing CPUs by being a high-priority interrupt. 100 */ 101 /*ARGSUSED*/ 102 uint_t 103 xc_serv(caddr_t arg1, caddr_t arg2) 104 { 105 int op; 106 int pri = (int)(uintptr_t)arg1; 107 struct cpu *cpup = CPU; 108 xc_arg_t *argp; 109 xc_arg_t arg2val; 110 uint_t tlbflush; 111 112 if (pri == X_CALL_MEDPRI) { 113 114 argp = &xc_mboxes[X_CALL_MEDPRI].arg2; 115 arg2val = *argp; 116 if (arg2val != CAPTURE_CPU_ARG && 117 !(arg2val & (1 << cpup->cpu_id))) 118 return (DDI_INTR_UNCLAIMED); 119 ASSERT(arg2val == CAPTURE_CPU_ARG); 120 if (cpup->cpu_m.xc_pend[pri] == 0) 121 return (DDI_INTR_UNCLAIMED); 122 123 cpup->cpu_m.xc_pend[X_CALL_MEDPRI] = 0; 124 cpup->cpu_m.xc_ack[X_CALL_MEDPRI] = 1; 125 126 for (;;) { 127 if ((cpup->cpu_m.xc_state[X_CALL_MEDPRI] == XC_DONE) || 128 (cpup->cpu_m.xc_pend[X_CALL_MEDPRI])) 129 break; 130 ht_pause(); 131 return_instr(); 132 } 133 return (DDI_INTR_CLAIMED); 134 } 135 if (cpup->cpu_m.xc_pend[pri] == 0) 136 return (DDI_INTR_UNCLAIMED); 137 138 cpup->cpu_m.xc_pend[pri] = 0; 139 op = cpup->cpu_m.xc_state[pri]; 140 141 /* 142 * When invalidating TLB entries, wait until the initiator changes the 143 * memory PTE before doing any INVLPG. Otherwise, if the PTE in memory 144 * hasn't been changed, the processor's TLB Flush filter may ignore 145 * the INVLPG instruction. 146 */ 147 tlbflush = (cpup->cpu_m.xc_wait[pri] == 2); 148 149 /* 150 * Don't invoke a null function. 151 */ 152 if (xc_mboxes[pri].func != NULL) { 153 if (!tlbflush) 154 cpup->cpu_m.xc_retval[pri] = (*xc_mboxes[pri].func) 155 (xc_mboxes[pri].arg1, xc_mboxes[pri].arg2, 156 xc_mboxes[pri].arg3); 157 } else 158 cpup->cpu_m.xc_retval[pri] = 0; 159 160 /* 161 * Acknowledge that we have completed the x-call operation. 162 */ 163 cpup->cpu_m.xc_ack[pri] = 1; 164 165 if (op == XC_CALL_OP) 166 return (DDI_INTR_CLAIMED); 167 168 /* 169 * for (op == XC_SYNC_OP) 170 * Wait for the initiator of the x-call to indicate 171 * that all CPUs involved can proceed. 172 */ 173 while (cpup->cpu_m.xc_wait[pri]) { 174 ht_pause(); 175 return_instr(); 176 } 177 178 while (cpup->cpu_m.xc_state[pri] != XC_DONE) { 179 ht_pause(); 180 return_instr(); 181 } 182 183 /* 184 * Flush the TLB, if that's what is requested. 185 */ 186 if (xc_mboxes[pri].func != NULL && tlbflush) { 187 cpup->cpu_m.xc_retval[pri] = (*xc_mboxes[pri].func) 188 (xc_mboxes[pri].arg1, xc_mboxes[pri].arg2, 189 xc_mboxes[pri].arg3); 190 } 191 192 /* 193 * Acknowledge that we have received the directive to continue. 194 */ 195 ASSERT(cpup->cpu_m.xc_ack[pri] == 0); 196 cpup->cpu_m.xc_ack[pri] = 1; 197 198 return (DDI_INTR_CLAIMED); 199 } 200 201 202 /* 203 * xc_do_call: 204 */ 205 static void 206 xc_do_call( 207 xc_arg_t arg1, 208 xc_arg_t arg2, 209 xc_arg_t arg3, 210 int pri, 211 cpuset_t set, 212 xc_func_t func, 213 int sync) 214 { 215 /* 216 * If the pri indicates a low priority lock (below LOCK_LEVEL), 217 * we must disable preemption to avoid migrating to another CPU 218 * during the call. 219 */ 220 if (pri == X_CALL_LOPRI) { 221 kpreempt_disable(); 222 } else { 223 pri = X_CALL_HIPRI; 224 } 225 226 /* always grab highest mutex to avoid deadlock */ 227 mutex_enter(&xc_mbox_lock[X_CALL_HIPRI]); 228 xc_common(func, arg1, arg2, arg3, pri, set, sync); 229 mutex_exit(&xc_mbox_lock[X_CALL_HIPRI]); 230 if (pri == X_CALL_LOPRI) 231 kpreempt_enable(); 232 } 233 234 235 /* 236 * xc_call: call specified function on all processors 237 * remotes may continue after service 238 * we wait here until everybody has completed. 239 */ 240 void 241 xc_call( 242 xc_arg_t arg1, 243 xc_arg_t arg2, 244 xc_arg_t arg3, 245 int pri, 246 cpuset_t set, 247 xc_func_t func) 248 { 249 xc_do_call(arg1, arg2, arg3, pri, set, func, 0); 250 } 251 252 /* 253 * xc_sync: call specified function on all processors 254 * after doing work, each remote waits until we let 255 * it continue; send the contiunue after everyone has 256 * informed us that they are done. 257 */ 258 void 259 xc_sync( 260 xc_arg_t arg1, 261 xc_arg_t arg2, 262 xc_arg_t arg3, 263 int pri, 264 cpuset_t set, 265 xc_func_t func) 266 { 267 xc_do_call(arg1, arg2, arg3, pri, set, func, 1); 268 } 269 270 /* 271 * xc_sync_wait: similar to xc_sync(), except that the starting 272 * cpu waits for all other cpus to check in before running its 273 * service locally. 274 */ 275 void 276 xc_wait_sync( 277 xc_arg_t arg1, 278 xc_arg_t arg2, 279 xc_arg_t arg3, 280 int pri, 281 cpuset_t set, 282 xc_func_t func) 283 { 284 xc_do_call(arg1, arg2, arg3, pri, set, func, 2); 285 } 286 287 288 /* 289 * The routines xc_capture_cpus and xc_release_cpus 290 * can be used in place of xc_sync in order to implement a critical 291 * code section where all CPUs in the system can be controlled. 292 * xc_capture_cpus is used to start the critical code section, and 293 * xc_release_cpus is used to end the critical code section. 294 */ 295 296 /* 297 * Capture the CPUs specified in order to start a x-call session, 298 * and/or to begin a critical section. 299 */ 300 void 301 xc_capture_cpus(cpuset_t set) 302 { 303 int cix; 304 int lcx; 305 struct cpu *cpup; 306 int i; 307 cpuset_t *cpus; 308 cpuset_t c; 309 310 CPU_STATS_ADDQ(CPU, sys, xcalls, 1); 311 312 /* 313 * Prevent deadlocks where we take an interrupt and are waiting 314 * for a mutex owned by one of the CPUs that is captured for 315 * the x-call, while that CPU is waiting for some x-call signal 316 * to be set by us. 317 * 318 * This mutex also prevents preemption, since it raises SPL above 319 * LOCK_LEVEL (it is a spin-type driver mutex). 320 */ 321 /* always grab highest mutex to avoid deadlock */ 322 mutex_enter(&xc_mbox_lock[X_CALL_HIPRI]); 323 lcx = CPU->cpu_id; /* now we're safe */ 324 325 ASSERT(CPU->cpu_flags & CPU_READY); 326 327 /* 328 * Wait for all cpus 329 */ 330 cpus = (cpuset_t *)&xc_mboxes[X_CALL_MEDPRI].arg2; 331 if (CPU_IN_SET(*cpus, CPU->cpu_id)) 332 CPUSET_ATOMIC_DEL(*cpus, CPU->cpu_id); 333 for (;;) { 334 c = *(volatile cpuset_t *)cpus; 335 CPUSET_AND(c, cpu_ready_set); 336 if (CPUSET_ISNULL(c)) 337 break; 338 ht_pause(); 339 } 340 341 /* 342 * Store the set of CPUs involved in the x-call session, so that 343 * xc_release_cpus will know what CPUs to act upon. 344 */ 345 xc_mboxes[X_CALL_MEDPRI].set = set; 346 xc_mboxes[X_CALL_MEDPRI].arg2 = CAPTURE_CPU_ARG; 347 348 /* 349 * Now capture each CPU in the set and cause it to go into a 350 * holding pattern. 351 */ 352 i = 0; 353 for (cix = 0; cix < NCPU; cix++) { 354 if ((cpup = cpu[cix]) == NULL || 355 (cpup->cpu_flags & CPU_READY) == 0) { 356 /* 357 * In case CPU wasn't ready, but becomes ready later, 358 * take the CPU out of the set now. 359 */ 360 CPUSET_DEL(set, cix); 361 continue; 362 } 363 if (cix != lcx && CPU_IN_SET(set, cix)) { 364 cpup->cpu_m.xc_ack[X_CALL_MEDPRI] = 0; 365 cpup->cpu_m.xc_state[X_CALL_MEDPRI] = XC_HOLD; 366 cpup->cpu_m.xc_pend[X_CALL_MEDPRI] = 1; 367 send_dirint(cix, XC_MED_PIL); 368 } 369 i++; 370 if (i >= ncpus) 371 break; 372 } 373 374 /* 375 * Wait here until all remote calls to complete. 376 */ 377 i = 0; 378 for (cix = 0; cix < NCPU; cix++) { 379 if (lcx != cix && CPU_IN_SET(set, cix)) { 380 cpup = cpu[cix]; 381 while (cpup->cpu_m.xc_ack[X_CALL_MEDPRI] == 0) { 382 ht_pause(); 383 return_instr(); 384 } 385 cpup->cpu_m.xc_ack[X_CALL_MEDPRI] = 0; 386 } 387 i++; 388 if (i >= ncpus) 389 break; 390 } 391 392 } 393 394 /* 395 * Release the CPUs captured by xc_capture_cpus, thus terminating the 396 * x-call session and exiting the critical section. 397 */ 398 void 399 xc_release_cpus(void) 400 { 401 int cix; 402 int lcx = (int)(CPU->cpu_id); 403 cpuset_t set = xc_mboxes[X_CALL_MEDPRI].set; 404 struct cpu *cpup; 405 int i; 406 407 ASSERT(MUTEX_HELD(&xc_mbox_lock[X_CALL_HIPRI])); 408 409 /* 410 * Allow each CPU to exit its holding pattern. 411 */ 412 i = 0; 413 for (cix = 0; cix < NCPU; cix++) { 414 if ((cpup = cpu[cix]) == NULL) 415 continue; 416 if ((cpup->cpu_flags & CPU_READY) && 417 (cix != lcx) && CPU_IN_SET(set, cix)) { 418 /* 419 * Clear xc_ack since we will be waiting for it 420 * to be set again after we set XC_DONE. 421 */ 422 cpup->cpu_m.xc_state[X_CALL_MEDPRI] = XC_DONE; 423 } 424 i++; 425 if (i >= ncpus) 426 break; 427 } 428 429 xc_mboxes[X_CALL_MEDPRI].arg2 = 0; 430 mutex_exit(&xc_mbox_lock[X_CALL_HIPRI]); 431 } 432 433 /* 434 * Common code to call a specified function on a set of processors. 435 * sync specifies what kind of waiting is done. 436 * -1 - no waiting, don't release remotes 437 * 0 - no waiting, release remotes immediately 438 * 1 - run service locally w/o waiting for remotes. 439 * 2 - wait for remotes before running locally 440 */ 441 static void 442 xc_common( 443 xc_func_t func, 444 xc_arg_t arg1, 445 xc_arg_t arg2, 446 xc_arg_t arg3, 447 int pri, 448 cpuset_t set, 449 int sync) 450 { 451 int cix; 452 int lcx = (int)(CPU->cpu_id); 453 struct cpu *cpup; 454 455 ASSERT(panicstr == NULL); 456 457 ASSERT(MUTEX_HELD(&xc_mbox_lock[X_CALL_HIPRI])); 458 ASSERT(CPU->cpu_flags & CPU_READY); 459 460 /* 461 * Set up the service definition mailbox. 462 */ 463 xc_mboxes[pri].func = func; 464 xc_mboxes[pri].arg1 = arg1; 465 xc_mboxes[pri].arg2 = arg2; 466 xc_mboxes[pri].arg3 = arg3; 467 468 /* 469 * Request service on all remote processors. 470 */ 471 for (cix = 0; cix < NCPU; cix++) { 472 if ((cpup = cpu[cix]) == NULL || 473 (cpup->cpu_flags & CPU_READY) == 0) { 474 /* 475 * In case CPU wasn't ready, but becomes ready later, 476 * take the CPU out of the set now. 477 */ 478 CPUSET_DEL(set, cix); 479 } else if (cix != lcx && CPU_IN_SET(set, cix)) { 480 CPU_STATS_ADDQ(CPU, sys, xcalls, 1); 481 cpup->cpu_m.xc_ack[pri] = 0; 482 cpup->cpu_m.xc_wait[pri] = sync; 483 if (sync > 0) 484 cpup->cpu_m.xc_state[pri] = XC_SYNC_OP; 485 else 486 cpup->cpu_m.xc_state[pri] = XC_CALL_OP; 487 cpup->cpu_m.xc_pend[pri] = 1; 488 send_dirint(cix, xc_xlat_xcptoipl[pri]); 489 } 490 } 491 492 /* 493 * Run service locally if not waiting for remotes. 494 */ 495 if (sync != 2 && CPU_IN_SET(set, lcx) && func != NULL) 496 CPU->cpu_m.xc_retval[pri] = (*func)(arg1, arg2, arg3); 497 498 if (sync == -1) 499 return; 500 501 /* 502 * Wait here until all remote calls complete. 503 */ 504 for (cix = 0; cix < NCPU; cix++) { 505 if (lcx != cix && CPU_IN_SET(set, cix)) { 506 cpup = cpu[cix]; 507 while (cpup->cpu_m.xc_ack[pri] == 0) { 508 ht_pause(); 509 return_instr(); 510 } 511 cpup->cpu_m.xc_ack[pri] = 0; 512 } 513 } 514 515 /* 516 * Run service locally if waiting for remotes. 517 */ 518 if (sync == 2 && CPU_IN_SET(set, lcx) && func != NULL) 519 CPU->cpu_m.xc_retval[pri] = (*func)(arg1, arg2, arg3); 520 521 if (sync == 0) 522 return; 523 524 /* 525 * Release any waiting CPUs 526 */ 527 for (cix = 0; cix < NCPU; cix++) { 528 if (lcx != cix && CPU_IN_SET(set, cix)) { 529 cpup = cpu[cix]; 530 if (cpup != NULL && (cpup->cpu_flags & CPU_READY)) { 531 cpup->cpu_m.xc_wait[pri] = 0; 532 cpup->cpu_m.xc_state[pri] = XC_DONE; 533 } 534 } 535 } 536 537 /* 538 * Wait for all CPUs to acknowledge completion before we continue. 539 * Without this check it's possible (on a VM or hyper-threaded CPUs 540 * or in the presence of Service Management Interrupts which can all 541 * cause delays) for the remote processor to still be waiting by 542 * the time xc_common() is next invoked with the sync flag set 543 * resulting in a deadlock. 544 */ 545 for (cix = 0; cix < NCPU; cix++) { 546 if (lcx != cix && CPU_IN_SET(set, cix)) { 547 cpup = cpu[cix]; 548 if (cpup != NULL && (cpup->cpu_flags & CPU_READY)) { 549 while (cpup->cpu_m.xc_ack[pri] == 0) { 550 ht_pause(); 551 return_instr(); 552 } 553 cpup->cpu_m.xc_ack[pri] = 0; 554 } 555 } 556 } 557 } 558 559 /* 560 * xc_trycall: attempt to call specified function on all processors 561 * remotes may wait for a long time 562 * we continue immediately 563 */ 564 void 565 xc_trycall( 566 xc_arg_t arg1, 567 xc_arg_t arg2, 568 xc_arg_t arg3, 569 cpuset_t set, 570 xc_func_t func) 571 { 572 int save_kernel_preemption; 573 extern int IGNORE_KERNEL_PREEMPTION; 574 575 /* 576 * If we can grab the mutex, we'll do the cross-call. If not -- if 577 * someone else is already doing a cross-call -- we won't. 578 */ 579 580 save_kernel_preemption = IGNORE_KERNEL_PREEMPTION; 581 IGNORE_KERNEL_PREEMPTION = 1; 582 if (mutex_tryenter(&xc_mbox_lock[X_CALL_HIPRI])) { 583 xc_common(func, arg1, arg2, arg3, X_CALL_HIPRI, set, -1); 584 mutex_exit(&xc_mbox_lock[X_CALL_HIPRI]); 585 } 586 IGNORE_KERNEL_PREEMPTION = save_kernel_preemption; 587 } 588 589 /* 590 * Used by the debugger to cross-call the other CPUs, thus causing them to 591 * enter the debugger. We can't hold locks, so we spin on the cross-call 592 * lock until we get it. When we get it, we send the cross-call, and assume 593 * that we successfully stopped the other CPUs. 594 */ 595 void 596 kdi_xc_others(int this_cpu, void (*func)(void)) 597 { 598 extern int IGNORE_KERNEL_PREEMPTION; 599 int save_kernel_preemption; 600 mutex_impl_t *lp; 601 cpuset_t set; 602 int x; 603 604 CPUSET_ALL_BUT(set, this_cpu); 605 606 save_kernel_preemption = IGNORE_KERNEL_PREEMPTION; 607 IGNORE_KERNEL_PREEMPTION = 1; 608 609 lp = (mutex_impl_t *)&xc_mbox_lock[X_CALL_HIPRI]; 610 for (x = 0; x < 0x400000; x++) { 611 if (lock_spin_try(&lp->m_spin.m_spinlock)) { 612 xc_common((xc_func_t)func, 0, 0, 0, X_CALL_HIPRI, 613 set, -1); 614 lp->m_spin.m_spinlock = 0; /* XXX */ 615 break; 616 } 617 (void) xc_serv((caddr_t)X_CALL_MEDPRI, NULL); 618 } 619 IGNORE_KERNEL_PREEMPTION = save_kernel_preemption; 620 } 621