1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/param.h> 28 #include <sys/t_lock.h> 29 #include <sys/thread.h> 30 #include <sys/cpuvar.h> 31 #include <sys/x_call.h> 32 #include <sys/xc_levels.h> 33 #include <sys/cpu.h> 34 #include <sys/psw.h> 35 #include <sys/sunddi.h> 36 #include <sys/debug.h> 37 #include <sys/systm.h> 38 #include <sys/archsystm.h> 39 #include <sys/machsystm.h> 40 #include <sys/mutex_impl.h> 41 #include <sys/stack.h> 42 #include <sys/promif.h> 43 #include <sys/x86_archext.h> 44 45 /* 46 * Implementation for cross-processor calls via interprocessor interrupts 47 * 48 * This implementation uses a message passing architecture to allow multiple 49 * concurrent cross calls to be in flight at any given time. We use the cmpxchg 50 * instruction, aka casptr(), to implement simple efficient work queues for 51 * message passing between CPUs with almost no need for regular locking. 52 * See xc_extract() and xc_insert() below. 53 * 54 * The general idea is that initiating a cross call means putting a message 55 * on a target(s) CPU's work queue. Any synchronization is handled by passing 56 * the message back and forth between initiator and target(s). 57 * 58 * Every CPU has xc_work_cnt, which indicates it has messages to process. 59 * This value is incremented as message traffic is initiated and decremented 60 * with every message that finishes all processing. 61 * 62 * The code needs no mfence or other membar_*() calls. The uses of 63 * casptr(), cas32() and atomic_dec_32() for the message passing are 64 * implemented with LOCK prefix instructions which are equivalent to mfence. 65 * 66 * One interesting aspect of this implmentation is that it allows 2 or more 67 * CPUs to initiate cross calls to intersecting sets of CPUs at the same time. 68 * The cross call processing by the CPUs will happen in any order with only 69 * a guarantee, for xc_call() and xc_sync(), that an initiator won't return 70 * from cross calls before all slaves have invoked the function. 71 * 72 * The reason for this asynchronous approach is to allow for fast global 73 * TLB shootdowns. If all CPUs, say N, tried to do a global TLB invalidation 74 * on a different Virtual Address at the same time. The old code required 75 * N squared IPIs. With this method, depending on timing, it could happen 76 * with just N IPIs. 77 */ 78 79 /* 80 * The default is to not enable collecting counts of IPI information, since 81 * the updating of shared cachelines could cause excess bus traffic. 82 */ 83 uint_t xc_collect_enable = 0; 84 uint64_t xc_total_cnt = 0; /* total #IPIs sent for cross calls */ 85 uint64_t xc_multi_cnt = 0; /* # times we piggy backed on another IPI */ 86 87 /* 88 * Values for message states. Here are the normal transitions. A transition 89 * of "->" happens in the slave cpu and "=>" happens in the master cpu as 90 * the messages are passed back and forth. 91 * 92 * FREE => ASYNC -> DONE => FREE 93 * FREE => CALL -> DONE => FREE 94 * FREE => SYNC -> WAITING => RELEASED -> DONE => FREE 95 * 96 * The interesing one above is ASYNC. You might ask, why not go directly 97 * to FREE, instead of DONE. If it did that, it might be possible to exhaust 98 * the master's xc_free list if a master can generate ASYNC messages faster 99 * then the slave can process them. That could be handled with more complicated 100 * handling. However since nothing important uses ASYNC, I've not bothered. 101 */ 102 #define XC_MSG_FREE (0) /* msg in xc_free queue */ 103 #define XC_MSG_ASYNC (1) /* msg in slave xc_msgbox */ 104 #define XC_MSG_CALL (2) /* msg in slave xc_msgbox */ 105 #define XC_MSG_SYNC (3) /* msg in slave xc_msgbox */ 106 #define XC_MSG_WAITING (4) /* msg in master xc_msgbox or xc_waiters */ 107 #define XC_MSG_RELEASED (5) /* msg in slave xc_msgbox */ 108 #define XC_MSG_DONE (6) /* msg in master xc_msgbox */ 109 110 /* 111 * We allow for one high priority message at a time to happen in the system. 112 * This is used for panic, kmdb, etc., so no locking is done. 113 */ 114 static cpuset_t xc_priority_set; 115 static xc_data_t xc_priority_data; 116 117 /* 118 * Decrement a CPU's work count 119 */ 120 static void 121 xc_decrement(struct machcpu *mcpu) 122 { 123 atomic_dec_32(&mcpu->xc_work_cnt); 124 } 125 126 /* 127 * Increment a CPU's work count and return the old value 128 */ 129 static int 130 xc_increment(struct machcpu *mcpu) 131 { 132 int old; 133 do { 134 old = mcpu->xc_work_cnt; 135 } while (cas32((uint32_t *)&mcpu->xc_work_cnt, old, old + 1) != old); 136 return (old); 137 } 138 139 /* 140 * Put a message into a queue. The insertion is atomic no matter 141 * how many different inserts/extracts to the same queue happen. 142 */ 143 static void 144 xc_insert(void *queue, xc_msg_t *msg) 145 { 146 xc_msg_t *old_head; 147 do { 148 old_head = (xc_msg_t *)*(volatile xc_msg_t **)queue; 149 msg->xc_next = old_head; 150 } while (casptr(queue, old_head, msg) != old_head); 151 } 152 153 /* 154 * Extract a message from a queue. The extraction is atomic only 155 * when just one thread does extractions from the queue. 156 * If the queue is empty, NULL is returned. 157 */ 158 static xc_msg_t * 159 xc_extract(xc_msg_t **queue) 160 { 161 xc_msg_t *old_head; 162 163 do { 164 old_head = (xc_msg_t *)*(volatile xc_msg_t **)queue; 165 if (old_head == NULL) 166 return (old_head); 167 } while (casptr(queue, old_head, old_head->xc_next) != old_head); 168 old_head->xc_next = NULL; 169 return (old_head); 170 } 171 172 173 /* 174 * Initialize the machcpu fields used for cross calls 175 */ 176 static uint_t xc_initialized = 0; 177 void 178 xc_init_cpu(struct cpu *cpup) 179 { 180 xc_msg_t *msg; 181 int c; 182 183 /* 184 * add a new msg to each existing CPU's free list, as well as one for 185 * my list for each of them 186 */ 187 for (c = 0; c < ncpus; ++c) { 188 if (cpu[c] == NULL) 189 continue; 190 msg = kmem_zalloc(sizeof (*msg), KM_SLEEP); 191 msg->xc_command = XC_MSG_FREE; 192 xc_insert(&cpu[c]->cpu_m.xc_free, msg); 193 194 msg = kmem_zalloc(sizeof (*msg), KM_SLEEP); 195 msg->xc_command = XC_MSG_FREE; 196 xc_insert(&cpup->cpu_m.xc_free, msg); 197 } 198 199 /* 200 * Add one for self messages 201 */ 202 msg = kmem_zalloc(sizeof (*msg), KM_SLEEP); 203 msg->xc_command = XC_MSG_FREE; 204 xc_insert(&cpup->cpu_m.xc_free, msg); 205 206 if (!xc_initialized) 207 xc_initialized = 1; 208 } 209 210 /* 211 * X-call message processing routine. Note that this is used by both 212 * senders and recipients of messages. 213 * 214 * We're protected against changing CPUs by either being in a high-priority 215 * interrupt, having preemption disabled or by having a raised SPL. 216 */ 217 /*ARGSUSED*/ 218 uint_t 219 xc_serv(caddr_t arg1, caddr_t arg2) 220 { 221 struct machcpu *mcpup = &(CPU->cpu_m); 222 xc_msg_t *msg; 223 xc_data_t *data; 224 xc_msg_t *xc_waiters = NULL; 225 uint32_t num_waiting = 0; 226 xc_func_t func; 227 xc_arg_t a1; 228 xc_arg_t a2; 229 xc_arg_t a3; 230 uint_t rc = DDI_INTR_UNCLAIMED; 231 232 while (mcpup->xc_work_cnt != 0) { 233 rc = DDI_INTR_CLAIMED; 234 235 /* 236 * We may have to wait for a message to arrive. 237 */ 238 for (;;) { 239 /* 240 * alway check for and handle a priority message 241 */ 242 if (BT_TEST(CPUSET2BV(xc_priority_set), CPU->cpu_id)) { 243 func = xc_priority_data.xc_func; 244 a1 = xc_priority_data.xc_a1; 245 a2 = xc_priority_data.xc_a2; 246 a3 = xc_priority_data.xc_a3; 247 BT_CLEAR(CPUSET2BV(xc_priority_set), 248 CPU->cpu_id); 249 xc_decrement(mcpup); 250 func(a1, a2, a3); 251 if (mcpup->xc_work_cnt == 0) 252 return (rc); 253 } 254 255 /* 256 * extract and handle regular message 257 */ 258 msg = xc_extract(&mcpup->xc_msgbox); 259 if (msg != NULL) 260 break; 261 262 /* 263 * wait for a message to arrive 264 */ 265 if (x86_feature & X86_MWAIT) { 266 i86_monitor( 267 (volatile uint32_t *)&mcpup->xc_msgbox, 268 0, 0); 269 if (mcpup->xc_msgbox == NULL) 270 i86_mwait(0, 0); 271 } else { 272 SMT_PAUSE(); 273 } 274 } 275 276 277 /* 278 * process the message 279 */ 280 switch (msg->xc_command) { 281 282 /* 283 * ASYNC gives back the message immediately, then we do the 284 * function and return with no more waiting. 285 */ 286 case XC_MSG_ASYNC: 287 data = &cpu[msg->xc_master]->cpu_m.xc_data; 288 func = data->xc_func; 289 a1 = data->xc_a1; 290 a2 = data->xc_a2; 291 a3 = data->xc_a3; 292 msg->xc_command = XC_MSG_DONE; 293 xc_insert(&cpu[msg->xc_master]->cpu_m.xc_msgbox, msg); 294 if (func != NULL) 295 (void) (*func)(a1, a2, a3); 296 xc_decrement(mcpup); 297 break; 298 299 /* 300 * SYNC messages do the call, then send it back to the master 301 * in WAITING mode 302 */ 303 case XC_MSG_SYNC: 304 data = &cpu[msg->xc_master]->cpu_m.xc_data; 305 if (data->xc_func != NULL) 306 (void) (*data->xc_func)(data->xc_a1, 307 data->xc_a2, data->xc_a3); 308 msg->xc_command = XC_MSG_WAITING; 309 xc_insert(&cpu[msg->xc_master]->cpu_m.xc_msgbox, msg); 310 break; 311 312 /* 313 * WAITING messsages are collected by the master until all 314 * have arrived. Once all arrive, we release them back to 315 * the slaves 316 */ 317 case XC_MSG_WAITING: 318 xc_insert(&xc_waiters, msg); 319 if (++num_waiting < mcpup->xc_wait_cnt) 320 break; 321 while ((msg = xc_extract(&xc_waiters)) != NULL) { 322 msg->xc_command = XC_MSG_RELEASED; 323 xc_insert(&cpu[msg->xc_slave]->cpu_m.xc_msgbox, 324 msg); 325 --num_waiting; 326 } 327 if (num_waiting != 0) 328 panic("wrong number waiting"); 329 mcpup->xc_wait_cnt = 0; 330 break; 331 332 /* 333 * CALL messages do the function and then, like RELEASE, 334 * send the message is back to master as DONE. 335 */ 336 case XC_MSG_CALL: 337 data = &cpu[msg->xc_master]->cpu_m.xc_data; 338 if (data->xc_func != NULL) 339 (void) (*data->xc_func)(data->xc_a1, 340 data->xc_a2, data->xc_a3); 341 /*FALLTHROUGH*/ 342 case XC_MSG_RELEASED: 343 msg->xc_command = XC_MSG_DONE; 344 xc_insert(&cpu[msg->xc_master]->cpu_m.xc_msgbox, msg); 345 xc_decrement(mcpup); 346 break; 347 348 /* 349 * DONE means a slave has completely finished up. 350 * Once we collect all the DONE messages, we'll exit 351 * processing too. 352 */ 353 case XC_MSG_DONE: 354 msg->xc_command = XC_MSG_FREE; 355 xc_insert(&mcpup->xc_free, msg); 356 xc_decrement(mcpup); 357 break; 358 359 case XC_MSG_FREE: 360 panic("free message in msgbox"); 361 break; 362 363 default: 364 panic("bad message in msgbox"); 365 break; 366 } 367 } 368 return (rc); 369 } 370 371 /* 372 * Initiate cross call processing. 373 */ 374 static void 375 xc_common( 376 xc_func_t func, 377 xc_arg_t arg1, 378 xc_arg_t arg2, 379 xc_arg_t arg3, 380 ulong_t *set, 381 uint_t command) 382 { 383 int c; 384 struct cpu *cpup; 385 xc_msg_t *msg; 386 xc_data_t *data; 387 int cnt; 388 int save_spl; 389 390 if (!xc_initialized) { 391 if (BT_TEST(set, CPU->cpu_id) && (CPU->cpu_flags & CPU_READY) && 392 func != NULL) 393 (void) (*func)(arg1, arg2, arg3); 394 return; 395 } 396 397 save_spl = splr(ipltospl(XC_HI_PIL)); 398 399 /* 400 * fill in cross call data 401 */ 402 data = &CPU->cpu_m.xc_data; 403 data->xc_func = func; 404 data->xc_a1 = arg1; 405 data->xc_a2 = arg2; 406 data->xc_a3 = arg3; 407 408 /* 409 * Post messages to all CPUs involved that are CPU_READY 410 */ 411 CPU->cpu_m.xc_wait_cnt = 0; 412 for (c = 0; c < ncpus; ++c) { 413 if (!BT_TEST(set, c)) 414 continue; 415 cpup = cpu[c]; 416 if (cpup == NULL || !(cpup->cpu_flags & CPU_READY)) 417 continue; 418 419 /* 420 * Fill out a new message. 421 */ 422 msg = xc_extract(&CPU->cpu_m.xc_free); 423 if (msg == NULL) 424 panic("Ran out of free xc_msg_t's"); 425 msg->xc_command = command; 426 msg->xc_master = CPU->cpu_id; 427 msg->xc_slave = c; 428 429 /* 430 * Increment my work count for all messages that I'll 431 * transition from DONE to FREE. 432 * Also remember how many XC_MSG_WAITINGs to look for 433 */ 434 (void) xc_increment(&CPU->cpu_m); 435 if (command == XC_MSG_SYNC) 436 ++CPU->cpu_m.xc_wait_cnt; 437 438 /* 439 * Increment the target CPU work count then insert the message 440 * in the target msgbox. If I post the first bit of work 441 * for the target to do, send an IPI to the target CPU. 442 */ 443 cnt = xc_increment(&cpup->cpu_m); 444 xc_insert(&cpup->cpu_m.xc_msgbox, msg); 445 if (cpup != CPU) { 446 if (cnt == 0) { 447 CPU_STATS_ADDQ(CPU, sys, xcalls, 1); 448 send_dirint(c, XC_HI_PIL); 449 if (xc_collect_enable) 450 ++xc_total_cnt; 451 } else if (xc_collect_enable) { 452 ++xc_multi_cnt; 453 } 454 } 455 } 456 457 /* 458 * Now drop into the message handler until all work is done 459 */ 460 (void) xc_serv(NULL, NULL); 461 splx(save_spl); 462 } 463 464 /* 465 * Push out a priority cross call. 466 */ 467 static void 468 xc_priority_common( 469 xc_func_t func, 470 xc_arg_t arg1, 471 xc_arg_t arg2, 472 xc_arg_t arg3, 473 ulong_t *set) 474 { 475 int i; 476 int c; 477 struct cpu *cpup; 478 479 /* 480 * Wait briefly for a previous xc_priority to have finished, but 481 * continue no matter what. 482 */ 483 for (i = 0; i < 40000; ++i) { 484 if (CPUSET_ISNULL(xc_priority_set)) 485 break; 486 SMT_PAUSE(); 487 } 488 489 /* 490 * fill in cross call data 491 */ 492 xc_priority_data.xc_func = func; 493 xc_priority_data.xc_a1 = arg1; 494 xc_priority_data.xc_a2 = arg2; 495 xc_priority_data.xc_a3 = arg3; 496 xc_priority_set = *(cpuset_t *)set; 497 498 /* 499 * Post messages to all CPUs involved that are CPU_READY 500 * We'll always IPI, plus bang on the xc_msgbox for i86_mwait() 501 */ 502 for (c = 0; c < ncpus; ++c) { 503 if (!BT_TEST(set, c)) 504 continue; 505 cpup = cpu[c]; 506 if (cpup == NULL || !(cpup->cpu_flags & CPU_READY) || 507 cpup == CPU) 508 continue; 509 (void) xc_increment(&cpup->cpu_m); 510 send_dirint(c, XC_HI_PIL); 511 for (i = 0; i < 10; ++i) { 512 (void) casptr(&cpup->cpu_m.xc_msgbox, 513 cpup->cpu_m.xc_msgbox, cpup->cpu_m.xc_msgbox); 514 } 515 } 516 } 517 518 /* 519 * Do cross call to all other CPUs with absolutely no waiting or handshaking. 520 * This should only be used for extraordinary operations, like panic(), which 521 * need to work, in some fashion, in a not completely functional system. 522 * All other uses that want minimal waiting should use xc_call_nowait(). 523 */ 524 void 525 xc_priority( 526 xc_arg_t arg1, 527 xc_arg_t arg2, 528 xc_arg_t arg3, 529 ulong_t *set, 530 xc_func_t func) 531 { 532 extern int IGNORE_KERNEL_PREEMPTION; 533 int save_spl = splr(ipltospl(XC_HI_PIL)); 534 int save_kernel_preemption = IGNORE_KERNEL_PREEMPTION; 535 536 IGNORE_KERNEL_PREEMPTION = 1; 537 xc_priority_common((xc_func_t)func, arg1, arg2, arg3, set); 538 IGNORE_KERNEL_PREEMPTION = save_kernel_preemption; 539 splx(save_spl); 540 } 541 542 /* 543 * Wrapper for kmdb to capture other CPUs, causing them to enter the debugger. 544 */ 545 void 546 kdi_xc_others(int this_cpu, void (*func)(void)) 547 { 548 extern int IGNORE_KERNEL_PREEMPTION; 549 int save_kernel_preemption; 550 cpuset_t set; 551 552 if (!xc_initialized) 553 return; 554 555 save_kernel_preemption = IGNORE_KERNEL_PREEMPTION; 556 IGNORE_KERNEL_PREEMPTION = 1; 557 CPUSET_ALL_BUT(set, this_cpu); 558 xc_priority_common((xc_func_t)func, 0, 0, 0, CPUSET2BV(set)); 559 IGNORE_KERNEL_PREEMPTION = save_kernel_preemption; 560 } 561 562 563 564 /* 565 * Invoke function on specified processors. Remotes may continue after 566 * service with no waiting. xc_call_nowait() may return immediately too. 567 */ 568 void 569 xc_call_nowait( 570 xc_arg_t arg1, 571 xc_arg_t arg2, 572 xc_arg_t arg3, 573 ulong_t *set, 574 xc_func_t func) 575 { 576 xc_common(func, arg1, arg2, arg3, set, XC_MSG_ASYNC); 577 } 578 579 /* 580 * Invoke function on specified processors. Remotes may continue after 581 * service with no waiting. xc_call() returns only after remotes have finished. 582 */ 583 void 584 xc_call( 585 xc_arg_t arg1, 586 xc_arg_t arg2, 587 xc_arg_t arg3, 588 ulong_t *set, 589 xc_func_t func) 590 { 591 xc_common(func, arg1, arg2, arg3, set, XC_MSG_CALL); 592 } 593 594 /* 595 * Invoke function on specified processors. Remotes wait until all have 596 * finished. xc_sync() also waits until all remotes have finished. 597 */ 598 void 599 xc_sync( 600 xc_arg_t arg1, 601 xc_arg_t arg2, 602 xc_arg_t arg3, 603 ulong_t *set, 604 xc_func_t func) 605 { 606 xc_common(func, arg1, arg2, arg3, set, XC_MSG_SYNC); 607 } 608