1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * Facilities for cross-processor subroutine calls using "mailbox" interrupts. 30 * 31 */ 32 33 #include <sys/types.h> 34 #include <sys/param.h> 35 #include <sys/t_lock.h> 36 #include <sys/thread.h> 37 #include <sys/cpuvar.h> 38 #include <sys/x_call.h> 39 #include <sys/cpu.h> 40 #include <sys/psw.h> 41 #include <sys/sunddi.h> 42 #include <sys/debug.h> 43 #include <sys/systm.h> 44 #include <sys/archsystm.h> 45 #include <sys/machsystm.h> 46 #include <sys/mutex_impl.h> 47 #include <sys/traptrace.h> 48 49 50 static struct xc_mbox xc_mboxes[X_CALL_LEVELS]; 51 static kmutex_t xc_mbox_lock[X_CALL_LEVELS]; 52 static uint_t xc_xlat_xcptoipl[X_CALL_LEVELS] = { 53 XC_LO_PIL, 54 XC_MED_PIL, 55 XC_HI_PIL 56 }; 57 58 static void xc_common(xc_func_t, xc_arg_t, xc_arg_t, xc_arg_t, 59 int, cpuset_t, int); 60 61 static int xc_initialized = 0; 62 63 void 64 xc_init() 65 { 66 /* 67 * By making these mutexes type MUTEX_DRIVER, the ones below 68 * LOCK_LEVEL will be implemented as adaptive mutexes, and the 69 * ones above LOCK_LEVEL will be spin mutexes. 70 */ 71 mutex_init(&xc_mbox_lock[0], NULL, MUTEX_DRIVER, 72 (void *)ipltospl(XC_LO_PIL)); 73 mutex_init(&xc_mbox_lock[1], NULL, MUTEX_DRIVER, 74 (void *)ipltospl(XC_MED_PIL)); 75 mutex_init(&xc_mbox_lock[2], NULL, MUTEX_DRIVER, 76 (void *)ipltospl(XC_HI_PIL)); 77 78 xc_initialized = 1; 79 } 80 81 #if defined(TRAPTRACE) 82 83 /* 84 * When xc_traptrace is on, put x-call records into the trap trace buffer. 85 */ 86 int xc_traptrace; 87 88 void 89 xc_make_trap_trace_entry(uint8_t marker, int pri, ulong_t arg) 90 { 91 trap_trace_rec_t *ttr; 92 struct _xc_entry *xce; 93 94 if (xc_traptrace == 0) 95 return; 96 97 ttr = trap_trace_get_traceptr(TT_XCALL, 98 (ulong_t)caller(), (ulong_t)getfp()); 99 xce = &(ttr->ttr_info.xc_entry); 100 101 xce->xce_marker = marker; 102 xce->xce_pri = pri; 103 xce->xce_arg = arg; 104 105 if ((uint_t)pri < X_CALL_LEVELS) { 106 struct machcpu *mcpu = &CPU->cpu_m; 107 108 xce->xce_pend = mcpu->xc_pend[pri]; 109 xce->xce_ack = mcpu->xc_ack[pri]; 110 xce->xce_state = mcpu->xc_state[pri]; 111 xce->xce_retval = mcpu->xc_retval[pri]; 112 xce->xce_func = (uintptr_t)xc_mboxes[pri].func; 113 } 114 } 115 #endif 116 117 #define CAPTURE_CPU_ARG ~0UL 118 119 /* 120 * X-call interrupt service routine. 121 * 122 * arg == X_CALL_MEDPRI - capture cpus. 123 * 124 * We're protected against changing CPUs by being a high-priority interrupt. 125 */ 126 /*ARGSUSED*/ 127 uint_t 128 xc_serv(caddr_t arg1, caddr_t arg2) 129 { 130 int op; 131 int pri = (int)(uintptr_t)arg1; 132 struct cpu *cpup = CPU; 133 xc_arg_t arg2val; 134 135 XC_TRACE(TT_XC_SVC_BEGIN, pri, (ulong_t)arg2); 136 137 if (pri == X_CALL_MEDPRI) { 138 139 arg2val = xc_mboxes[X_CALL_MEDPRI].arg2; 140 141 if (arg2val != CAPTURE_CPU_ARG || 142 !CPU_IN_SET(xc_mboxes[X_CALL_MEDPRI].set, cpup->cpu_id)) 143 goto unclaimed; 144 145 ASSERT(arg2val == CAPTURE_CPU_ARG); 146 147 if (cpup->cpu_m.xc_pend[pri] == 0) 148 goto unclaimed; 149 150 cpup->cpu_m.xc_pend[X_CALL_MEDPRI] = 0; 151 cpup->cpu_m.xc_ack[X_CALL_MEDPRI] = 1; 152 153 for (;;) { 154 if ((cpup->cpu_m.xc_state[X_CALL_MEDPRI] == XC_DONE) || 155 (cpup->cpu_m.xc_pend[X_CALL_MEDPRI])) 156 break; 157 SMT_PAUSE(); 158 } 159 CPUSET_DEL(xc_mboxes[X_CALL_MEDPRI].set, cpup->cpu_id); 160 XC_TRACE(TT_XC_SVC_END, pri, DDI_INTR_CLAIMED); 161 return (DDI_INTR_CLAIMED); 162 } 163 164 if (cpup->cpu_m.xc_pend[pri] == 0) 165 goto unclaimed; 166 167 cpup->cpu_m.xc_pend[pri] = 0; 168 op = cpup->cpu_m.xc_state[pri]; 169 170 /* 171 * Don't invoke a null function. 172 */ 173 if (xc_mboxes[pri].func != NULL) { 174 cpup->cpu_m.xc_retval[pri] = 175 (*xc_mboxes[pri].func)(xc_mboxes[pri].arg1, 176 xc_mboxes[pri].arg2, xc_mboxes[pri].arg3); 177 } else 178 cpup->cpu_m.xc_retval[pri] = 0; 179 180 /* 181 * Acknowledge that we have completed the x-call operation. 182 */ 183 cpup->cpu_m.xc_ack[pri] = 1; 184 185 if (op != XC_CALL_OP) { 186 /* 187 * for (op == XC_SYNC_OP) 188 * Wait for the initiator of the x-call to indicate 189 * that all CPUs involved can proceed. 190 */ 191 while (cpup->cpu_m.xc_wait[pri]) 192 SMT_PAUSE(); 193 194 while (cpup->cpu_m.xc_state[pri] != XC_DONE) 195 SMT_PAUSE(); 196 197 /* 198 * Acknowledge that we have received the directive to continue. 199 */ 200 ASSERT(cpup->cpu_m.xc_ack[pri] == 0); 201 cpup->cpu_m.xc_ack[pri] = 1; 202 } 203 204 XC_TRACE(TT_XC_SVC_END, pri, DDI_INTR_CLAIMED); 205 return (DDI_INTR_CLAIMED); 206 207 unclaimed: 208 XC_TRACE(TT_XC_SVC_END, pri, DDI_INTR_UNCLAIMED); 209 return (DDI_INTR_UNCLAIMED); 210 } 211 212 213 /* 214 * xc_do_call: 215 */ 216 static void 217 xc_do_call( 218 xc_arg_t arg1, 219 xc_arg_t arg2, 220 xc_arg_t arg3, 221 int pri, 222 cpuset_t set, 223 xc_func_t func, 224 int sync) 225 { 226 /* 227 * If the pri indicates a low priority lock (below LOCK_LEVEL), 228 * we must disable preemption to avoid migrating to another CPU 229 * during the call. 230 */ 231 if (pri == X_CALL_LOPRI) { 232 kpreempt_disable(); 233 } else { 234 pri = X_CALL_HIPRI; 235 } 236 237 /* always grab highest mutex to avoid deadlock */ 238 mutex_enter(&xc_mbox_lock[X_CALL_HIPRI]); 239 xc_common(func, arg1, arg2, arg3, pri, set, sync); 240 mutex_exit(&xc_mbox_lock[X_CALL_HIPRI]); 241 if (pri == X_CALL_LOPRI) 242 kpreempt_enable(); 243 } 244 245 246 /* 247 * xc_call: call specified function on all processors 248 * remotes may continue after service 249 * we wait here until everybody has completed. 250 */ 251 void 252 xc_call( 253 xc_arg_t arg1, 254 xc_arg_t arg2, 255 xc_arg_t arg3, 256 int pri, 257 cpuset_t set, 258 xc_func_t func) 259 { 260 xc_do_call(arg1, arg2, arg3, pri, set, func, 0); 261 } 262 263 /* 264 * xc_sync: call specified function on all processors 265 * after doing work, each remote waits until we let 266 * it continue; send the contiunue after everyone has 267 * informed us that they are done. 268 */ 269 void 270 xc_sync( 271 xc_arg_t arg1, 272 xc_arg_t arg2, 273 xc_arg_t arg3, 274 int pri, 275 cpuset_t set, 276 xc_func_t func) 277 { 278 xc_do_call(arg1, arg2, arg3, pri, set, func, 1); 279 } 280 281 /* 282 * The routines xc_capture_cpus and xc_release_cpus 283 * can be used in place of xc_sync in order to implement a critical 284 * code section where all CPUs in the system can be controlled. 285 * xc_capture_cpus is used to start the critical code section, and 286 * xc_release_cpus is used to end the critical code section. 287 */ 288 289 /* 290 * Capture the CPUs specified in order to start a x-call session, 291 * and/or to begin a critical section. 292 */ 293 void 294 xc_capture_cpus(cpuset_t set) 295 { 296 int cix; 297 int lcx; 298 struct cpu *cpup; 299 int i; 300 301 CPU_STATS_ADDQ(CPU, sys, xcalls, 1); 302 303 /* 304 * Prevent deadlocks where we take an interrupt and are waiting 305 * for a mutex owned by one of the CPUs that is captured for 306 * the x-call, while that CPU is waiting for some x-call signal 307 * to be set by us. 308 * 309 * This mutex also prevents preemption, since it raises SPL above 310 * LOCK_LEVEL (it is a spin-type driver mutex). 311 */ 312 /* always grab highest mutex to avoid deadlock */ 313 mutex_enter(&xc_mbox_lock[X_CALL_HIPRI]); 314 lcx = CPU->cpu_id; /* now we're safe */ 315 316 ASSERT(CPU->cpu_flags & CPU_READY); 317 318 /* 319 * Wait for all cpus. 320 */ 321 322 /* 323 * First remove ourself. 324 */ 325 if (CPU_IN_SET(xc_mboxes[X_CALL_MEDPRI].set, CPU->cpu_id)) 326 CPUSET_ATOMIC_DEL(xc_mboxes[X_CALL_MEDPRI].set, CPU->cpu_id); 327 /* 328 * We must wait for all cpus to clear their bit from 329 * xc_mboxes[X_CALL_MEDPRI].set before we write to this set. 330 */ 331 for (;;) { 332 CPUSET_AND(xc_mboxes[X_CALL_MEDPRI].set, cpu_ready_set); 333 if (CPUSET_ISNULL(xc_mboxes[X_CALL_MEDPRI].set)) 334 break; 335 SMT_PAUSE(); 336 } 337 338 /* 339 * Store the set of CPUs involved in the x-call session, so that 340 * xc_release_cpus will know what CPUs to act upon. 341 */ 342 xc_mboxes[X_CALL_MEDPRI].set = set; 343 xc_mboxes[X_CALL_MEDPRI].arg2 = CAPTURE_CPU_ARG; 344 345 /* 346 * Now capture each CPU in the set and cause it to go into a 347 * holding pattern. 348 */ 349 i = 0; 350 for (cix = 0; cix < NCPU; cix++) { 351 if ((cpup = cpu[cix]) == NULL || 352 (cpup->cpu_flags & CPU_READY) == 0) { 353 /* 354 * In case CPU wasn't ready, but becomes ready later, 355 * take the CPU out of the set now. 356 */ 357 CPUSET_DEL(set, cix); 358 continue; 359 } 360 if (cix != lcx && CPU_IN_SET(set, cix)) { 361 cpup->cpu_m.xc_ack[X_CALL_MEDPRI] = 0; 362 cpup->cpu_m.xc_state[X_CALL_MEDPRI] = XC_HOLD; 363 cpup->cpu_m.xc_pend[X_CALL_MEDPRI] = 1; 364 XC_TRACE(TT_XC_CAPTURE, X_CALL_MEDPRI, cix); 365 send_dirint(cix, XC_MED_PIL); 366 } 367 i++; 368 if (i >= ncpus) 369 break; 370 } 371 372 /* 373 * Wait here until all remote calls to acknowledge. 374 */ 375 i = 0; 376 for (cix = 0; cix < NCPU; cix++) { 377 if (lcx != cix && CPU_IN_SET(set, cix)) { 378 cpup = cpu[cix]; 379 while (cpup->cpu_m.xc_ack[X_CALL_MEDPRI] == 0) 380 SMT_PAUSE(); 381 cpup->cpu_m.xc_ack[X_CALL_MEDPRI] = 0; 382 } 383 i++; 384 if (i >= ncpus) 385 break; 386 } 387 388 } 389 390 /* 391 * Release the CPUs captured by xc_capture_cpus, thus terminating the 392 * x-call session and exiting the critical section. 393 */ 394 void 395 xc_release_cpus(void) 396 { 397 int cix; 398 int lcx = (int)(CPU->cpu_id); 399 cpuset_t set = xc_mboxes[X_CALL_MEDPRI].set; 400 struct cpu *cpup; 401 int i; 402 403 ASSERT(MUTEX_HELD(&xc_mbox_lock[X_CALL_HIPRI])); 404 405 /* 406 * Allow each CPU to exit its holding pattern. 407 */ 408 i = 0; 409 for (cix = 0; cix < NCPU; cix++) { 410 if ((cpup = cpu[cix]) == NULL) 411 continue; 412 if ((cpup->cpu_flags & CPU_READY) && 413 (cix != lcx) && CPU_IN_SET(set, cix)) { 414 /* 415 * Clear xc_ack since we will be waiting for it 416 * to be set again after we set XC_DONE. 417 */ 418 XC_TRACE(TT_XC_RELEASE, X_CALL_MEDPRI, cix); 419 cpup->cpu_m.xc_state[X_CALL_MEDPRI] = XC_DONE; 420 } 421 i++; 422 if (i >= ncpus) 423 break; 424 } 425 426 xc_mboxes[X_CALL_MEDPRI].arg2 = 0; 427 mutex_exit(&xc_mbox_lock[X_CALL_HIPRI]); 428 } 429 430 /* 431 * Common code to call a specified function on a set of processors. 432 * sync specifies what kind of waiting is done. 433 * -1 - no waiting, don't release remotes 434 * 0 - no waiting, release remotes immediately 435 * 1 - run service locally w/o waiting for remotes. 436 */ 437 static void 438 xc_common( 439 xc_func_t func, 440 xc_arg_t arg1, 441 xc_arg_t arg2, 442 xc_arg_t arg3, 443 int pri, 444 cpuset_t set, 445 int sync) 446 { 447 int cix; 448 int do_local = 0; 449 struct cpu *cpup; 450 cpuset_t tset; 451 int last_cpu = 0; 452 453 ASSERT(panicstr == NULL); 454 455 ASSERT(MUTEX_HELD(&xc_mbox_lock[X_CALL_HIPRI])); 456 ASSERT(CPU->cpu_flags & CPU_READY); 457 458 /* 459 * Set up the service definition mailbox. 460 */ 461 xc_mboxes[pri].func = func; 462 xc_mboxes[pri].arg1 = arg1; 463 xc_mboxes[pri].arg2 = arg2; 464 xc_mboxes[pri].arg3 = arg3; 465 466 if (CPU_IN_SET(set, CPU->cpu_id)) { 467 do_local = 1; 468 CPUSET_DEL(set, CPU->cpu_id); 469 } 470 471 /* 472 * Request service on all remote processors. 473 */ 474 tset = set; 475 for (cix = 0; cix < max_ncpus; cix++) { 476 if (!CPU_IN_SET(tset, cix)) 477 continue; 478 479 if ((cpup = cpu[cix]) == NULL || 480 (cpup->cpu_flags & CPU_READY) == 0) { 481 /* 482 * In case the CPU is not ready but becomes 483 * ready later, take it out of the set now. 484 */ 485 CPUSET_DEL(set, cix); 486 } else { 487 CPU_STATS_ADDQ(CPU, sys, xcalls, 1); 488 cpup->cpu_m.xc_ack[pri] = 0; 489 cpup->cpu_m.xc_wait[pri] = sync; 490 if (sync > 0) 491 cpup->cpu_m.xc_state[pri] = XC_SYNC_OP; 492 else 493 cpup->cpu_m.xc_state[pri] = XC_CALL_OP; 494 cpup->cpu_m.xc_pend[pri] = 1; 495 XC_TRACE(TT_XC_START, pri, cix); 496 send_dirint(cix, xc_xlat_xcptoipl[pri]); 497 last_cpu = cix; 498 } 499 500 CPUSET_DEL(tset, cix); 501 if (CPUSET_ISNULL(tset)) 502 break; 503 } 504 505 /* 506 * Run service locally 507 */ 508 if (do_local && func != NULL) { 509 XC_TRACE(TT_XC_START, pri, CPU->cpu_id); 510 CPU->cpu_m.xc_retval[pri] = (*func)(arg1, arg2, arg3); 511 } 512 513 if (sync == -1) 514 return; 515 516 /* 517 * Wait here until all remote calls acknowledge. 518 */ 519 for (cix = 0; cix <= last_cpu; cix++) { 520 if (CPU_IN_SET(set, cix)) { 521 cpup = cpu[cix]; 522 while (cpup->cpu_m.xc_ack[pri] == 0) 523 SMT_PAUSE(); 524 XC_TRACE(TT_XC_WAIT, pri, cix); 525 cpup->cpu_m.xc_ack[pri] = 0; 526 } 527 } 528 529 if (sync == 0) 530 return; 531 532 /* 533 * Release any waiting CPUs 534 */ 535 for (cix = 0; cix <= last_cpu; cix++) { 536 if (CPU_IN_SET(set, cix)) { 537 cpup = cpu[cix]; 538 if (cpup != NULL && (cpup->cpu_flags & CPU_READY)) { 539 cpup->cpu_m.xc_wait[pri] = 0; 540 cpup->cpu_m.xc_state[pri] = XC_DONE; 541 } 542 } 543 } 544 545 /* 546 * Wait for all CPUs to acknowledge completion before we continue. 547 * Without this check it's possible (on a VM or hyper-threaded CPUs 548 * or in the presence of Service Management Interrupts which can all 549 * cause delays) for the remote processor to still be waiting by 550 * the time xc_common() is next invoked with the sync flag set 551 * resulting in a deadlock. 552 */ 553 for (cix = 0; cix <= last_cpu; cix++) { 554 if (CPU_IN_SET(set, cix)) { 555 cpup = cpu[cix]; 556 if (cpup != NULL && (cpup->cpu_flags & CPU_READY)) { 557 while (cpup->cpu_m.xc_ack[pri] == 0) 558 SMT_PAUSE(); 559 XC_TRACE(TT_XC_ACK, pri, cix); 560 cpup->cpu_m.xc_ack[pri] = 0; 561 } 562 } 563 } 564 } 565 566 /* 567 * xc_trycall: attempt to call specified function on all processors 568 * remotes may wait for a long time 569 * we continue immediately 570 */ 571 void 572 xc_trycall( 573 xc_arg_t arg1, 574 xc_arg_t arg2, 575 xc_arg_t arg3, 576 cpuset_t set, 577 xc_func_t func) 578 { 579 int save_kernel_preemption; 580 extern int IGNORE_KERNEL_PREEMPTION; 581 582 /* 583 * If we can grab the mutex, we'll do the cross-call. If not -- if 584 * someone else is already doing a cross-call -- we won't. 585 */ 586 587 save_kernel_preemption = IGNORE_KERNEL_PREEMPTION; 588 IGNORE_KERNEL_PREEMPTION = 1; 589 if (mutex_tryenter(&xc_mbox_lock[X_CALL_HIPRI])) { 590 xc_common(func, arg1, arg2, arg3, X_CALL_HIPRI, set, -1); 591 mutex_exit(&xc_mbox_lock[X_CALL_HIPRI]); 592 } 593 IGNORE_KERNEL_PREEMPTION = save_kernel_preemption; 594 } 595 596 /* 597 * Used by the debugger to cross-call the other CPUs, thus causing them to 598 * enter the debugger. We can't hold locks, so we spin on the cross-call 599 * lock until we get it. When we get it, we send the cross-call, and assume 600 * that we successfully stopped the other CPUs. 601 */ 602 void 603 kdi_xc_others(int this_cpu, void (*func)(void)) 604 { 605 extern int IGNORE_KERNEL_PREEMPTION; 606 int save_kernel_preemption; 607 mutex_impl_t *lp; 608 cpuset_t set; 609 int x; 610 611 if (!xc_initialized) 612 return; 613 614 CPUSET_ALL_BUT(set, this_cpu); 615 616 save_kernel_preemption = IGNORE_KERNEL_PREEMPTION; 617 IGNORE_KERNEL_PREEMPTION = 1; 618 619 lp = (mutex_impl_t *)&xc_mbox_lock[X_CALL_HIPRI]; 620 for (x = 0; x < 0x400000; x++) { 621 if (lock_spin_try(&lp->m_spin.m_spinlock)) { 622 xc_common((xc_func_t)func, 0, 0, 0, X_CALL_HIPRI, 623 set, -1); 624 lp->m_spin.m_spinlock = 0; /* XXX */ 625 break; 626 } 627 SMT_PAUSE(); 628 } 629 IGNORE_KERNEL_PREEMPTION = save_kernel_preemption; 630 } 631