1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * evtchn.c 31 * 32 * Communication via hypervisor event channels. 33 * 34 * Copyright (c) 2002-2005, K A Fraser 35 * 36 * This file may be distributed separately from the Linux kernel, or 37 * incorporated into other software packages, subject to the following license: 38 * 39 * Permission is hereby granted, free of charge, to any person obtaining a copy 40 * of this source file (the "Software"), to deal in the Software without 41 * restriction, including without limitation the rights to use, copy, modify, 42 * merge, publish, distribute, sublicense, and/or sell copies of the Software, 43 * and to permit persons to whom the Software is furnished to do so, subject to 44 * the following conditions: 45 * 46 * The above copyright notice and this permission notice shall be included in 47 * all copies or substantial portions of the Software. 48 * 49 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 50 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 51 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 52 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 53 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 54 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 55 * IN THE SOFTWARE. 56 */ 57 58 /* some parts derived from netbsd's hypervisor_machdep.c 1.2.2.2 */ 59 60 /* 61 * 62 * Copyright (c) 2004 Christian Limpach. 63 * All rights reserved. 64 * 65 * Redistribution and use in source and binary forms, with or without 66 * modification, are permitted provided that the following conditions 67 * are met: 68 * 1. Redistributions of source code must retain the above copyright 69 * notice, this list of conditions and the following disclaimer. 70 * 2. Redistributions in binary form must reproduce the above copyright 71 * notice, this list of conditions and the following disclaimer in the 72 * documentation and/or other materials provided with the distribution. 73 * 3. This section intentionally left blank. 74 * 4. The name of the author may not be used to endorse or promote products 75 * derived from this software without specific prior written permission. 76 * 77 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 78 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 79 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 80 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 81 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 82 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 83 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 84 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 85 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 86 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 87 */ 88 /* 89 * Section 3 of the above license was updated in response to bug 6379571. 90 */ 91 92 #include <sys/types.h> 93 #include <sys/hypervisor.h> 94 #include <sys/machsystm.h> 95 #include <sys/mutex.h> 96 #include <sys/evtchn_impl.h> 97 #include <sys/ddi_impldefs.h> 98 #include <sys/avintr.h> 99 #include <sys/cpuvar.h> 100 #include <sys/smp_impldefs.h> 101 #include <sys/archsystm.h> 102 #include <sys/sysmacros.h> 103 #include <sys/cmn_err.h> 104 #include <sys/promif.h> 105 #include <sys/debug.h> 106 #include <sys/psm.h> 107 #include <sys/privregs.h> 108 #include <sys/trap.h> 109 #include <sys/atomic.h> 110 #include <sys/cpu.h> 111 #include <sys/psw.h> 112 #include <sys/traptrace.h> 113 #include <sys/stack.h> 114 #include <sys/x_call.h> 115 #include <xen/public/physdev.h> 116 117 /* 118 * This file manages our association between hypervisor event channels and 119 * Solaris's IRQs. This is a one-to-one mapping, with the exception of 120 * IPI IRQs, for which there is one event channel per CPU participating 121 * in the IPI, and the clock VIRQ which also has an event channel per cpu 122 * and the IRQ for /dev/xen/evtchn. The IRQ types are: 123 * 124 * IRQT_VIRQ: 125 * The hypervisor's standard virtual IRQ, used for the clock timer, for 126 * example. This code allows any cpu to bind to one of these, although 127 * some are treated specially (i.e. VIRQ_DEBUG). 128 * Event channel binding is done via EVTCHNOP_bind_virq. 129 * 130 * IRQT_PIRQ: 131 * These associate a physical IRQ with an event channel via 132 * EVTCHNOP_bind_pirq. 133 * 134 * IRQT_IPI: 135 * A cross-call IRQ. Maps to "ncpus" event channels, each of which is 136 * bound to exactly one of the vcpus. We do not currently support 137 * unbinding of IPIs (since Solaris doesn't need it). Uses 138 * EVTCHNOP_bind_ipi. 139 * 140 * IRQT_EVTCHN: 141 * A "normal" binding to an event channel, typically used by the frontend 142 * drivers to bind to the their backend event channel. 143 * 144 * IRQT_DEV_EVTCHN: 145 * This is a one-time IRQ used by /dev/xen/evtchn. Unlike other IRQs, we 146 * have a one-IRQ to many-evtchn mapping. We only track evtchn->irq for 147 * these event channels, which are managed via ec_irq_add/rm_evtchn(). 148 * We enforce that IRQT_DEV_EVTCHN's representative evtchn (->ii_evtchn) 149 * is zero, and make any calls to irq_evtchn() an error, to prevent 150 * accidentally attempting to use the illegal evtchn 0. 151 * 152 * Suspend/resume 153 * 154 * During a suspend/resume cycle, we need to tear down the event channels. 155 * All other mapping data is kept. The drivers will remove their own event 156 * channels via xendev on receiving a DDI_SUSPEND. This leaves us with 157 * the IPIs and VIRQs, which we handle in ec_suspend() and ec_resume() 158 * below. 159 * 160 * CPU binding 161 * 162 * When an event channel is bound to a CPU, we set a bit in a mask present 163 * in the machcpu (evt_affinity) to indicate that this CPU can accept this 164 * event channel. For both IPIs and VIRQs, this binding is fixed at 165 * allocation time and we never modify it. All other event channels are 166 * bound via the PSM either as part of add_avintr(), or interrupt 167 * redistribution (xen_psm_dis/enable_intr()) as a result of CPU 168 * offline/online. 169 * 170 * Locking 171 * 172 * Updates are done holding the ec_lock. The xen_callback_handler() 173 * routine reads the mapping data in a lockless fashion. Additionally 174 * suspend takes ec_lock to prevent update races during a suspend/resume 175 * cycle. The IPI info is also examined without the lock; this is OK 176 * since we only ever change IPI info during initial setup and resume. 177 */ 178 179 #define IRQ_IS_CPUPOKE(irq) (ipi_info[XC_CPUPOKE_PIL].mi_irq == (irq)) 180 181 #define EVTCHN_MASKED(ev) \ 182 (HYPERVISOR_shared_info->evtchn_mask[(ev) >> EVTCHN_SHIFT] & \ 183 (1ul << ((ev) & ((1ul << EVTCHN_SHIFT) - 1)))) 184 185 static short evtchn_to_irq[NR_EVENT_CHANNELS]; 186 static cpuset_t evtchn_cpus[NR_EVENT_CHANNELS]; 187 static int evtchn_owner[NR_EVENT_CHANNELS]; 188 #ifdef DEBUG 189 static kthread_t *evtchn_owner_thread[NR_EVENT_CHANNELS]; 190 #endif 191 192 static irq_info_t irq_info[NR_IRQS]; 193 static mec_info_t ipi_info[MAXIPL]; 194 static mec_info_t virq_info[NR_VIRQS]; 195 /* 196 * Mailbox for communication with the evtchn device driver. 197 * We rely on only cpu 0 servicing the event channels associated 198 * with the driver. i.e. all evtchn driver evtchns are bound to cpu 0. 199 */ 200 volatile int ec_dev_mbox; /* mailbox for evtchn device driver */ 201 202 /* 203 * See the locking description above. 204 */ 205 kmutex_t ec_lock; 206 207 /* 208 * Bitmap indicating which PIRQs require the hypervisor to be notified 209 * on unmask. 210 */ 211 static unsigned long pirq_needs_eoi[NR_PIRQS / (sizeof (unsigned long) * NBBY)]; 212 213 static int ec_debug_irq = INVALID_IRQ; 214 int ec_dev_irq = INVALID_IRQ; 215 216 int 217 xen_bind_virq(unsigned int virq, processorid_t cpu, int *port) 218 { 219 evtchn_bind_virq_t bind; 220 int err; 221 222 bind.virq = virq; 223 bind.vcpu = cpu; 224 if ((err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, &bind)) == 0) 225 *port = bind.port; 226 else 227 err = xen_xlate_errcode(err); 228 return (err); 229 } 230 231 int 232 xen_bind_interdomain(int domid, int remote_port, int *port) 233 { 234 evtchn_bind_interdomain_t bind; 235 int err; 236 237 bind.remote_dom = domid; 238 bind.remote_port = remote_port; 239 if ((err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, 240 &bind)) == 0) 241 *port = bind.local_port; 242 else 243 err = xen_xlate_errcode(err); 244 return (err); 245 } 246 247 int 248 xen_alloc_unbound_evtchn(int domid, int *evtchnp) 249 { 250 evtchn_alloc_unbound_t alloc; 251 int err; 252 253 alloc.dom = DOMID_SELF; 254 alloc.remote_dom = domid; 255 256 if ((err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, 257 &alloc)) == 0) { 258 *evtchnp = alloc.port; 259 /* ensure evtchn is masked till we're ready to use it */ 260 (void) ec_mask_evtchn(*evtchnp); 261 } else { 262 err = xen_xlate_errcode(err); 263 } 264 265 return (err); 266 } 267 268 static int 269 xen_close_evtchn(int evtchn) 270 { 271 evtchn_close_t close; 272 int err; 273 274 close.port = evtchn; 275 err = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close); 276 if (err) 277 err = xen_xlate_errcode(err); 278 return (err); 279 } 280 281 static int 282 xen_bind_ipi(processorid_t cpu) 283 { 284 evtchn_bind_ipi_t bind; 285 286 ASSERT(MUTEX_HELD(&ec_lock)); 287 288 bind.vcpu = cpu; 289 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, &bind) != 0) 290 panic("xen_bind_ipi() failed"); 291 return (bind.port); 292 } 293 294 /* Send future instances of this interrupt to other vcpu. */ 295 static void 296 xen_bind_vcpu(int evtchn, int cpu) 297 { 298 evtchn_bind_vcpu_t bind; 299 300 ASSERT(MUTEX_HELD(&ec_lock)); 301 302 bind.port = evtchn; 303 bind.vcpu = cpu; 304 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind) != 0) 305 panic("xen_bind_vcpu() failed"); 306 } 307 308 static int 309 xen_bind_pirq(int pirq) 310 { 311 evtchn_bind_pirq_t bind; 312 int ret; 313 314 bind.pirq = pirq; 315 bind.flags = BIND_PIRQ__WILL_SHARE; 316 if ((ret = HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind)) != 0) 317 panic("xen_bind_pirq() failed (err %d)", ret); 318 return (bind.port); 319 } 320 321 /* unmask an evtchn and send upcall to appropriate vcpu if pending bit is set */ 322 static void 323 xen_evtchn_unmask(int evtchn) 324 { 325 evtchn_unmask_t unmask; 326 327 unmask.port = evtchn; 328 if (HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask) != 0) 329 panic("xen_evtchn_unmask() failed"); 330 } 331 332 static void 333 update_evtchn_affinity(int evtchn) 334 { 335 cpu_t *cp; 336 struct xen_evt_data *cpe; 337 338 ASSERT(evtchn_to_irq[evtchn] != INVALID_IRQ); 339 ASSERT(MUTEX_HELD(&ec_lock)); 340 341 /* 342 * Use lockless search of cpu_list, similar to mutex_vector_enter(). 343 */ 344 kpreempt_disable(); 345 cp = cpu_list; 346 do { 347 cpe = cp->cpu_m.mcpu_evt_pend; 348 if (CPU_IN_SET(evtchn_cpus[evtchn], cp->cpu_id)) 349 SET_EVTCHN_BIT(evtchn, cpe->evt_affinity); 350 else 351 CLEAR_EVTCHN_BIT(evtchn, cpe->evt_affinity); 352 } while ((cp = cp->cpu_next) != cpu_list); 353 kpreempt_enable(); 354 } 355 356 static void 357 bind_evtchn_to_cpuset(int evtchn, cpuset_t cpus) 358 { 359 ASSERT(evtchn_to_irq[evtchn] != INVALID_IRQ); 360 361 CPUSET_ZERO(evtchn_cpus[evtchn]); 362 CPUSET_OR(evtchn_cpus[evtchn], cpus); 363 update_evtchn_affinity(evtchn); 364 } 365 366 static void 367 clear_evtchn_affinity(int evtchn) 368 { 369 CPUSET_ZERO(evtchn_cpus[evtchn]); 370 update_evtchn_affinity(evtchn); 371 } 372 373 static void 374 alloc_irq_evtchn(int irq, int index, int evtchn, int cpu) 375 { 376 irq_info_t *irqp = &irq_info[irq]; 377 378 switch (irqp->ii_type) { 379 case IRQT_IPI: 380 ipi_info[index].mi_evtchns[cpu] = evtchn; 381 irqp->ii_u.index = index; 382 break; 383 case IRQT_VIRQ: 384 virq_info[index].mi_evtchns[cpu] = evtchn; 385 irqp->ii_u.index = index; 386 break; 387 default: 388 irqp->ii_u.evtchn = evtchn; 389 break; 390 } 391 392 evtchn_to_irq[evtchn] = irq; 393 394 /* 395 * If a CPU is not specified, we expect to bind it to a CPU later via 396 * the PSM. 397 */ 398 if (cpu != -1) { 399 cpuset_t tcpus; 400 CPUSET_ONLY(tcpus, cpu); 401 bind_evtchn_to_cpuset(evtchn, tcpus); 402 } 403 } 404 405 static int 406 alloc_irq(int type, int index, int evtchn, int cpu) 407 { 408 int irq; 409 irq_info_t *irqp; 410 411 ASSERT(MUTEX_HELD(&ec_lock)); 412 ASSERT(type != IRQT_IPI || cpu != -1); 413 414 for (irq = 0; irq < NR_IRQS; irq++) { 415 if (irq_info[irq].ii_type == IRQT_UNBOUND) 416 break; 417 } 418 419 if (irq == NR_IRQS) 420 panic("No available IRQ to bind to: increase NR_IRQS!\n"); 421 422 irqp = &irq_info[irq]; 423 424 irqp->ii_type = type; 425 /* 426 * Set irq/has_handler field to zero which means handler not installed 427 */ 428 irqp->ii_u2.has_handler = 0; 429 430 alloc_irq_evtchn(irq, index, evtchn, cpu); 431 return (irq); 432 } 433 434 static int 435 irq_evtchn(irq_info_t *irqp) 436 { 437 int evtchn; 438 439 ASSERT(irqp->ii_type != IRQT_DEV_EVTCHN); 440 441 switch (irqp->ii_type) { 442 case IRQT_IPI: 443 ASSERT(irqp->ii_u.index != 0); 444 evtchn = ipi_info[irqp->ii_u.index].mi_evtchns[CPU->cpu_id]; 445 break; 446 case IRQT_VIRQ: 447 evtchn = virq_info[irqp->ii_u.index].mi_evtchns[CPU->cpu_id]; 448 break; 449 default: 450 evtchn = irqp->ii_u.evtchn; 451 break; 452 } 453 454 return (evtchn); 455 } 456 457 static void 458 unbind_evtchn(ushort_t *evtchnp) 459 { 460 int err; 461 462 ASSERT(MUTEX_HELD(&ec_lock)); 463 464 ASSERT(*evtchnp != 0); 465 466 err = xen_close_evtchn(*evtchnp); 467 ASSERT(err == 0); 468 clear_evtchn_affinity(*evtchnp); 469 evtchn_to_irq[*evtchnp] = INVALID_IRQ; 470 *evtchnp = 0; 471 } 472 473 static void 474 pirq_unmask_notify(int pirq) 475 { 476 struct physdev_eoi eoi; 477 478 if (TEST_EVTCHN_BIT(pirq, &pirq_needs_eoi[0])) { 479 eoi.irq = pirq; 480 (void) HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi); 481 } 482 } 483 484 static void 485 pirq_query_unmask(int pirq) 486 { 487 struct physdev_irq_status_query irq_status; 488 489 irq_status.irq = pirq; 490 (void) HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status); 491 CLEAR_EVTCHN_BIT(pirq, &pirq_needs_eoi[0]); 492 if (irq_status.flags & XENIRQSTAT_needs_eoi) 493 SET_EVTCHN_BIT(pirq, &pirq_needs_eoi[0]); 494 } 495 496 static void 497 end_pirq(int irq) 498 { 499 int evtchn = irq_evtchn(&irq_info[irq]); 500 501 ec_unmask_evtchn(evtchn); 502 pirq_unmask_notify(IRQ_TO_PIRQ(irq)); 503 } 504 505 /* 506 * probe if a pirq is available to bind to, return 1 if available 507 * else return 0. 508 * Note that for debug versions of xen this probe may cause an in use IRQ 509 * warning message from xen. 510 */ 511 int 512 ec_probe_pirq(int pirq) 513 { 514 evtchn_bind_pirq_t bind; 515 516 bind.pirq = pirq; 517 bind.flags = 0; 518 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind) != 0) { 519 return (0); 520 } else { 521 (void) xen_close_evtchn(bind.port); 522 return (1); 523 } 524 } 525 526 /* 527 * Bind an event channel to a vcpu 528 */ 529 void 530 ec_bind_vcpu(int evtchn, int cpu) 531 { 532 mutex_enter(&ec_lock); 533 xen_bind_vcpu(evtchn, cpu); 534 mutex_exit(&ec_lock); 535 } 536 537 /* 538 * Set up a physical device irq to be associated with an event channel. 539 */ 540 void 541 ec_setup_pirq(int irq, int ipl, cpuset_t *cpusp) 542 { 543 int evtchn; 544 irq_info_t *irqp = &irq_info[irq]; 545 546 /* 547 * Test if this PIRQ is already bound to an evtchn, 548 * which means it is a shared IRQ and we don't want to 549 * bind and do some initial setup that has already been 550 * done for this irq on a previous trip through this code. 551 */ 552 if (irqp->ii_u.evtchn == INVALID_EVTCHN) { 553 evtchn = xen_bind_pirq(irq); 554 555 pirq_query_unmask(IRQ_TO_PIRQ(irq)); 556 557 irqp->ii_type = IRQT_PIRQ; 558 irqp->ii_u.evtchn = evtchn; 559 560 evtchn_to_irq[evtchn] = irq; 561 irqp->ii_u2.ipl = ipl; 562 ec_set_irq_affinity(irq, *cpusp); 563 ec_enable_irq(irq); 564 pirq_unmask_notify(IRQ_TO_PIRQ(irq)); 565 } else { 566 ASSERT(irqp->ii_u2.ipl != 0); 567 cmn_err(CE_NOTE, "IRQ%d is shared", irq); 568 if (ipl > irqp->ii_u2.ipl) 569 irqp->ii_u2.ipl = ipl; 570 *cpusp = evtchn_cpus[irqp->ii_u.evtchn]; 571 } 572 } 573 574 void 575 ec_unbind_irq(int irq) 576 { 577 irq_info_t *irqp = &irq_info[irq]; 578 mec_info_t *virqp; 579 int drop_lock = 0; 580 int type, i; 581 582 /* 583 * Nasty, but we need this during suspend. 584 */ 585 if (mutex_owner(&ec_lock) != curthread) { 586 mutex_enter(&ec_lock); 587 drop_lock = 1; 588 } 589 590 type = irqp->ii_type; 591 592 ASSERT((type == IRQT_EVTCHN) || (type == IRQT_PIRQ) || 593 (type == IRQT_VIRQ)); 594 595 if ((type == IRQT_EVTCHN) || (type == IRQT_PIRQ)) { 596 /* There's only one event channel associated with this irq */ 597 unbind_evtchn(&irqp->ii_u.evtchn); 598 } else if (type == IRQT_VIRQ) { 599 /* 600 * Each cpu on the system can have it's own event channel 601 * associated with a virq. Unbind them all. 602 */ 603 virqp = &virq_info[irqp->ii_u.index]; 604 for (i = 0; i < NCPU; i++) { 605 if (virqp->mi_evtchns[i] != 0) 606 unbind_evtchn(&virqp->mi_evtchns[i]); 607 } 608 /* Mark the virq structure as invalid. */ 609 virqp->mi_irq = INVALID_IRQ; 610 } 611 612 bzero(irqp, sizeof (*irqp)); 613 /* Re-reserve PIRQ. */ 614 if (type == IRQT_PIRQ) 615 irqp->ii_type = IRQT_PIRQ; 616 617 if (drop_lock) 618 mutex_exit(&ec_lock); 619 } 620 621 /* 622 * Rebind an event channel for delivery to a CPU. 623 */ 624 void 625 ec_set_irq_affinity(int irq, cpuset_t dest) 626 { 627 int evtchn, tcpu; 628 irq_info_t *irqp = &irq_info[irq]; 629 630 mutex_enter(&ec_lock); 631 632 ASSERT(irq < NR_IRQS); 633 ASSERT(irqp->ii_type != IRQT_UNBOUND); 634 635 /* 636 * Binding is done at allocation time for these types, so we should 637 * never modify them. 638 */ 639 if (irqp->ii_type == IRQT_IPI || irqp->ii_type == IRQT_VIRQ || 640 irqp->ii_type == IRQT_DEV_EVTCHN) { 641 mutex_exit(&ec_lock); 642 return; 643 } 644 645 CPUSET_FIND(dest, tcpu); 646 ASSERT(tcpu != CPUSET_NOTINSET); 647 648 evtchn = irq_evtchn(irqp); 649 650 xen_bind_vcpu(evtchn, tcpu); 651 652 bind_evtchn_to_cpuset(evtchn, dest); 653 654 mutex_exit(&ec_lock); 655 656 /* 657 * Now send the new target processor a NOP IPI. 658 * It will check for any pending interrupts, and so service any that 659 * got delivered to the wrong processor by mistake. 660 */ 661 poke_cpu(tcpu); 662 } 663 664 int 665 ec_set_irq_priority(int irq, int pri) 666 { 667 irq_info_t *irqp; 668 669 if (irq >= NR_IRQS) 670 return (-1); 671 672 irqp = &irq_info[irq]; 673 674 if (irqp->ii_type == IRQT_UNBOUND) 675 return (-1); 676 677 irqp->ii_u2.ipl = pri; 678 679 return (0); 680 } 681 682 void 683 ec_clear_irq_priority(int irq) 684 { 685 irq_info_t *irqp = &irq_info[irq]; 686 687 ASSERT(irq < NR_IRQS); 688 ASSERT(irqp->ii_type != IRQT_UNBOUND); 689 690 irqp->ii_u2.ipl = 0; 691 } 692 693 int 694 ec_bind_evtchn_to_irq(int evtchn) 695 { 696 mutex_enter(&ec_lock); 697 698 ASSERT(evtchn_to_irq[evtchn] == INVALID_IRQ); 699 700 (void) alloc_irq(IRQT_EVTCHN, 0, evtchn, -1); 701 702 mutex_exit(&ec_lock); 703 return (evtchn_to_irq[evtchn]); 704 } 705 706 int 707 ec_bind_virq_to_irq(int virq, int cpu) 708 { 709 int err; 710 int evtchn; 711 mec_info_t *virqp; 712 713 virqp = &virq_info[virq]; 714 mutex_enter(&ec_lock); 715 716 err = xen_bind_virq(virq, cpu, &evtchn); 717 ASSERT(err == 0); 718 719 ASSERT(evtchn_to_irq[evtchn] == INVALID_IRQ); 720 721 if (virqp->mi_irq == INVALID_IRQ) { 722 virqp->mi_irq = alloc_irq(IRQT_VIRQ, virq, evtchn, cpu); 723 } else { 724 alloc_irq_evtchn(virqp->mi_irq, virq, evtchn, cpu); 725 } 726 727 mutex_exit(&ec_lock); 728 729 return (virqp->mi_irq); 730 } 731 732 int 733 ec_bind_ipi_to_irq(int ipl, int cpu) 734 { 735 int evtchn; 736 ulong_t flags; 737 mec_info_t *ipip; 738 739 mutex_enter(&ec_lock); 740 741 ipip = &ipi_info[ipl]; 742 743 evtchn = xen_bind_ipi(cpu); 744 745 ASSERT(evtchn_to_irq[evtchn] == INVALID_IRQ); 746 747 if (ipip->mi_irq == INVALID_IRQ) { 748 ipip->mi_irq = alloc_irq(IRQT_IPI, ipl, evtchn, cpu); 749 } else { 750 alloc_irq_evtchn(ipip->mi_irq, ipl, evtchn, cpu); 751 } 752 753 /* 754 * Unmask the new evtchn so that it can be seen by the target cpu 755 */ 756 flags = intr_clear(); 757 ec_unmask_evtchn(evtchn); 758 intr_restore(flags); 759 760 mutex_exit(&ec_lock); 761 return (ipip->mi_irq); 762 } 763 764 /* 765 * When bringing up a CPU, bind to all the IPIs that CPU0 bound. 766 */ 767 void 768 ec_bind_cpu_ipis(int cpu) 769 { 770 int i; 771 772 for (i = 0; i < MAXIPL; i++) { 773 mec_info_t *ipip = &ipi_info[i]; 774 if (ipip->mi_irq == INVALID_IRQ) 775 continue; 776 777 (void) ec_bind_ipi_to_irq(i, cpu); 778 } 779 } 780 781 /* 782 * Can this IRQ be rebound to another CPU? 783 */ 784 int 785 ec_irq_rebindable(int irq) 786 { 787 irq_info_t *irqp = &irq_info[irq]; 788 789 if (irqp->ii_u.evtchn == 0) 790 return (0); 791 792 return (irqp->ii_type == IRQT_EVTCHN || irqp->ii_type == IRQT_PIRQ); 793 } 794 795 /* 796 * Should this IRQ be unbound from this CPU (which is being offlined) to 797 * another? 798 */ 799 int 800 ec_irq_needs_rebind(int irq, int cpu) 801 { 802 irq_info_t *irqp = &irq_info[irq]; 803 804 return (ec_irq_rebindable(irq) && 805 CPU_IN_SET(evtchn_cpus[irqp->ii_u.evtchn], cpu)); 806 } 807 808 void 809 ec_send_ipi(int ipl, int cpu) 810 { 811 mec_info_t *ipip = &ipi_info[ipl]; 812 813 ASSERT(ipip->mi_irq != INVALID_IRQ); 814 815 ec_notify_via_evtchn(ipip->mi_evtchns[cpu]); 816 } 817 818 void 819 ec_try_ipi(int ipl, int cpu) 820 { 821 mec_info_t *ipip = &ipi_info[ipl]; 822 823 if (ipip->mi_irq == INVALID_IRQ || ipip->mi_irq == 0) 824 return; 825 826 ec_notify_via_evtchn(ipip->mi_evtchns[cpu]); 827 } 828 829 void 830 ec_irq_add_evtchn(int irq, int evtchn) 831 { 832 mutex_enter(&ec_lock); 833 834 /* 835 * See description of IRQT_DEV_EVTCHN above. 836 */ 837 ASSERT(irq == ec_dev_irq); 838 839 alloc_irq_evtchn(irq, 0, evtchn, 0); 840 /* 841 * We enforce that the representative event channel for IRQT_DEV_EVTCHN 842 * is zero, so PSM operations on it have no effect. 843 */ 844 irq_info[irq].ii_u.evtchn = 0; 845 mutex_exit(&ec_lock); 846 } 847 848 void 849 ec_irq_rm_evtchn(int irq, int evtchn) 850 { 851 ushort_t ec = evtchn; 852 853 mutex_enter(&ec_lock); 854 ASSERT(irq == ec_dev_irq); 855 unbind_evtchn(&ec); 856 mutex_exit(&ec_lock); 857 } 858 859 /* 860 * Allocate an /dev/xen/evtchn IRQ. See the big comment at the top 861 * for an explanation. 862 */ 863 int 864 ec_dev_alloc_irq(void) 865 { 866 int i; 867 irq_info_t *irqp; 868 869 for (i = 0; i < NR_IRQS; i++) { 870 if (irq_info[i].ii_type == IRQT_UNBOUND) 871 break; 872 } 873 874 ASSERT(i != NR_IRQS); 875 876 irqp = &irq_info[i]; 877 irqp->ii_type = IRQT_DEV_EVTCHN; 878 irqp->ii_u2.ipl = IPL_EVTCHN; 879 /* 880 * Force the evtchn to zero for the special evtchn device irq 881 */ 882 irqp->ii_u.evtchn = 0; 883 return (i); 884 } 885 886 void 887 ec_enable_irq(unsigned int irq) 888 { 889 ulong_t flag; 890 irq_info_t *irqp = &irq_info[irq]; 891 892 if (irqp->ii_type == IRQT_DEV_EVTCHN) 893 return; 894 895 flag = intr_clear(); 896 ec_unmask_evtchn(irq_evtchn(irqp)); 897 intr_restore(flag); 898 } 899 900 void 901 ec_disable_irq(unsigned int irq) 902 { 903 irq_info_t *irqp = &irq_info[irq]; 904 905 if (irqp->ii_type == IRQT_DEV_EVTCHN) 906 return; 907 908 /* 909 * Spin till we are the one to mask the evtchn 910 * Ensures no one else can be servicing this evtchn. 911 */ 912 while (!ec_mask_evtchn(irq_evtchn(irqp))) 913 SMT_PAUSE(); 914 } 915 916 static int 917 ec_evtchn_pending(uint_t ev) 918 { 919 uint_t evi; 920 shared_info_t *si = HYPERVISOR_shared_info; 921 922 evi = ev >> EVTCHN_SHIFT; 923 ev &= (1ul << EVTCHN_SHIFT) - 1; 924 return ((si->evtchn_pending[evi] & (1ul << ev)) != 0); 925 } 926 927 int 928 ec_pending_irq(unsigned int irq) 929 { 930 int evtchn = irq_evtchn(&irq_info[irq]); 931 932 return (ec_evtchn_pending(evtchn)); 933 } 934 935 void 936 ec_clear_irq(int irq) 937 { 938 irq_info_t *irqp = &irq_info[irq]; 939 int evtchn; 940 941 if (irqp->ii_type == IRQT_DEV_EVTCHN) 942 return; 943 944 ASSERT(irqp->ii_type != IRQT_UNBOUND); 945 946 evtchn = irq_evtchn(irqp); 947 948 ASSERT(EVTCHN_MASKED(evtchn)); 949 ec_clear_evtchn(evtchn); 950 } 951 952 void 953 ec_unmask_irq(int irq) 954 { 955 ulong_t flags; 956 irq_info_t *irqp = &irq_info[irq]; 957 958 flags = intr_clear(); 959 switch (irqp->ii_type) { 960 case IRQT_PIRQ: 961 end_pirq(irq); 962 break; 963 case IRQT_DEV_EVTCHN: 964 break; 965 default: 966 ec_unmask_evtchn(irq_evtchn(irqp)); 967 break; 968 } 969 intr_restore(flags); 970 } 971 972 void 973 ec_try_unmask_irq(int irq) 974 { 975 ulong_t flags; 976 irq_info_t *irqp = &irq_info[irq]; 977 int evtchn; 978 979 flags = intr_clear(); 980 switch (irqp->ii_type) { 981 case IRQT_PIRQ: 982 end_pirq(irq); 983 break; 984 case IRQT_DEV_EVTCHN: 985 break; 986 default: 987 if ((evtchn = irq_evtchn(irqp)) != 0) 988 ec_unmask_evtchn(evtchn); 989 break; 990 } 991 intr_restore(flags); 992 } 993 994 /* 995 * Poll until an event channel is ready or 'check_func' returns true. This can 996 * only be used in a situation where interrupts are masked, otherwise we have a 997 * classic time-of-check vs. time-of-use race. 998 */ 999 void 1000 ec_wait_on_evtchn(int evtchn, int (*check_func)(void *), void *arg) 1001 { 1002 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 1003 while (!check_func(arg)) 1004 (void) HYPERVISOR_yield(); 1005 return; 1006 } 1007 1008 ASSERT(CPU->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask != 0); 1009 1010 for (;;) { 1011 evtchn_port_t ports[1]; 1012 1013 ports[0] = evtchn; 1014 1015 ec_clear_evtchn(evtchn); 1016 1017 if (check_func(arg)) 1018 return; 1019 1020 (void) HYPERVISOR_poll(ports, 1, 0); 1021 } 1022 } 1023 1024 void 1025 ec_wait_on_ipi(int ipl, int (*check_func)(void *), void *arg) 1026 { 1027 mec_info_t *ipip = &ipi_info[ipl]; 1028 1029 if (ipip->mi_irq == INVALID_IRQ || ipip->mi_irq == 0) 1030 return; 1031 1032 ec_wait_on_evtchn(ipip->mi_evtchns[CPU->cpu_id], check_func, arg); 1033 } 1034 1035 void 1036 ec_suspend(void) 1037 { 1038 irq_info_t *irqp; 1039 ushort_t *evtchnp; 1040 int i; 1041 int c; 1042 1043 ASSERT(MUTEX_HELD(&ec_lock)); 1044 1045 for (i = 0; i < MAXIPL; i++) { 1046 if (ipi_info[i].mi_irq == INVALID_IRQ) 1047 continue; 1048 1049 for (c = 0; c < NCPU; c++) { 1050 if (cpu[c] == NULL) 1051 continue; 1052 1053 if (CPU_IN_SET(cpu_suspend_lost_set, c)) 1054 continue; 1055 1056 evtchnp = &ipi_info[i].mi_evtchns[c]; 1057 ASSERT(*evtchnp != 0); 1058 unbind_evtchn(evtchnp); 1059 } 1060 } 1061 1062 for (i = 0; i < NR_VIRQS; i++) { 1063 if (virq_info[i].mi_irq == INVALID_IRQ) 1064 continue; 1065 1066 /* 1067 * If we're sharing a single event channel across all CPUs, we 1068 * should only unbind once. 1069 */ 1070 if (virq_info[i].mi_shared) { 1071 evtchnp = &virq_info[i].mi_evtchns[0]; 1072 unbind_evtchn(evtchnp); 1073 for (c = 1; c < NCPU; c++) 1074 virq_info[i].mi_evtchns[c] = 0; 1075 } else { 1076 for (c = 0; c < NCPU; c++) { 1077 if (cpu[c] == NULL) 1078 continue; 1079 1080 evtchnp = &virq_info[i].mi_evtchns[c]; 1081 if (*evtchnp != 0) 1082 unbind_evtchn(evtchnp); 1083 } 1084 } 1085 } 1086 1087 for (i = 0; i < NR_IRQS; i++) { 1088 irqp = &irq_info[i]; 1089 1090 switch (irqp->ii_type) { 1091 case IRQT_EVTCHN: 1092 case IRQT_DEV_EVTCHN: 1093 (void) HYPERVISOR_shutdown(SHUTDOWN_crash); 1094 break; 1095 case IRQT_PIRQ: 1096 if (irqp->ii_u.evtchn != 0) 1097 (void) HYPERVISOR_shutdown(SHUTDOWN_crash); 1098 break; 1099 default: 1100 break; 1101 } 1102 } 1103 } 1104 1105 /* 1106 * The debug irq is special, we only have one evtchn and irq but we allow all 1107 * cpus to service it. It's marked as shared and we propogate the event 1108 * channel into all CPUs by hand. 1109 */ 1110 static void 1111 share_virq(mec_info_t *virqp) 1112 { 1113 int evtchn = virqp->mi_evtchns[0]; 1114 cpuset_t tset; 1115 int i; 1116 1117 ASSERT(evtchn != 0); 1118 1119 virqp->mi_shared = 1; 1120 1121 for (i = 1; i < NCPU; i++) 1122 virqp->mi_evtchns[i] = evtchn; 1123 CPUSET_ALL(tset); 1124 bind_evtchn_to_cpuset(evtchn, tset); 1125 } 1126 1127 static void 1128 virq_resume(int virq) 1129 { 1130 mec_info_t *virqp = &virq_info[virq]; 1131 int evtchn; 1132 int i, err; 1133 1134 for (i = 0; i < NCPU; i++) { 1135 cpuset_t tcpus; 1136 1137 if (cpu[i] == NULL || CPU_IN_SET(cpu_suspend_lost_set, i)) 1138 continue; 1139 1140 err = xen_bind_virq(virq, i, &evtchn); 1141 ASSERT(err == 0); 1142 1143 virqp->mi_evtchns[i] = evtchn; 1144 evtchn_to_irq[evtchn] = virqp->mi_irq; 1145 CPUSET_ONLY(tcpus, i); 1146 bind_evtchn_to_cpuset(evtchn, tcpus); 1147 ec_unmask_evtchn(evtchn); 1148 /* 1149 * only timer VIRQ is bound to all cpus 1150 */ 1151 if (virq != VIRQ_TIMER) 1152 break; 1153 } 1154 1155 if (virqp->mi_shared) 1156 share_virq(virqp); 1157 } 1158 1159 static void 1160 ipi_resume(int ipl) 1161 { 1162 mec_info_t *ipip = &ipi_info[ipl]; 1163 int i; 1164 1165 for (i = 0; i < NCPU; i++) { 1166 cpuset_t tcpus; 1167 int evtchn; 1168 1169 if (cpu[i] == NULL || CPU_IN_SET(cpu_suspend_lost_set, i)) 1170 continue; 1171 1172 evtchn = xen_bind_ipi(i); 1173 ipip->mi_evtchns[i] = evtchn; 1174 evtchn_to_irq[evtchn] = ipip->mi_irq; 1175 CPUSET_ONLY(tcpus, i); 1176 bind_evtchn_to_cpuset(evtchn, tcpus); 1177 ec_unmask_evtchn(evtchn); 1178 } 1179 } 1180 1181 void 1182 ec_resume(void) 1183 { 1184 int i; 1185 1186 /* New event-channel space is not 'live' yet. */ 1187 for (i = 0; i < NR_EVENT_CHANNELS; i++) 1188 (void) ec_mask_evtchn(i); 1189 1190 for (i = 0; i < MAXIPL; i++) { 1191 if (ipi_info[i].mi_irq == INVALID_IRQ) 1192 continue; 1193 ipi_resume(i); 1194 } 1195 1196 for (i = 0; i < NR_VIRQS; i++) { 1197 if (virq_info[i].mi_irq == INVALID_IRQ) 1198 continue; 1199 virq_resume(i); 1200 } 1201 } 1202 1203 void 1204 ec_init(void) 1205 { 1206 int i; 1207 mutex_init(&ec_lock, NULL, MUTEX_SPIN, (void *)ipltospl(SPL7)); 1208 1209 for (i = 0; i < NR_EVENT_CHANNELS; i++) { 1210 CPUSET_ZERO(evtchn_cpus[i]); 1211 evtchn_to_irq[i] = INVALID_IRQ; 1212 (void) ec_mask_evtchn(i); 1213 } 1214 1215 for (i = 0; i < MAXIPL; i++) 1216 ipi_info[i].mi_irq = INVALID_IRQ; 1217 1218 for (i = 0; i < NR_VIRQS; i++) 1219 virq_info[i].mi_irq = INVALID_IRQ; 1220 1221 /* 1222 * Phys IRQ space is statically bound (1:1 mapping), grab the IRQs 1223 * now. 1224 */ 1225 for (i = PIRQ_BASE; i < NR_PIRQS; i++) { 1226 irq_info[PIRQ_TO_IRQ(i)].ii_type = IRQT_PIRQ; 1227 } 1228 } 1229 1230 void 1231 ec_init_debug_irq() 1232 { 1233 int irq; 1234 1235 irq = ec_bind_virq_to_irq(VIRQ_DEBUG, 0); 1236 (void) add_avintr(NULL, IPL_DEBUG, (avfunc)xen_debug_handler, 1237 "debug", irq, NULL, NULL, NULL, NULL); 1238 1239 mutex_enter(&ec_lock); 1240 share_virq(&virq_info[irq_info[irq].ii_u.index]); 1241 mutex_exit(&ec_lock); 1242 ec_debug_irq = irq; 1243 } 1244 1245 #define UNBLOCKED_EVENTS(si, ix, cpe, cpu_id) \ 1246 ((si)->evtchn_pending[ix] & ~(si)->evtchn_mask[ix] & \ 1247 (cpe)->evt_affinity[ix]) 1248 1249 /* 1250 * This is the entry point for processing events from xen 1251 * 1252 * (See the commentary associated with the shared_info_st structure 1253 * in hypervisor-if.h) 1254 * 1255 * Since the event channel mechanism doesn't really implement the 1256 * concept of priority like hardware interrupt controllers, we simulate 1257 * that in software here using the cpu priority field and the pending 1258 * interrupts field. Events/interrupts that are not able to be serviced 1259 * now because they are at a lower priority than the current cpu priority 1260 * cause a level bit to be recorded in the pending interrupts word. When 1261 * the priority is lowered (either by spl or interrupt exit code) the pending 1262 * levels are checked and an upcall is scheduled if there are events/interrupts 1263 * that have become deliverable. 1264 */ 1265 void 1266 xen_callback_handler(struct regs *rp, trap_trace_rec_t *ttp) 1267 { 1268 ulong_t pending_sels, pe, selbit; 1269 int i, j, port, pri, curpri, irq; 1270 uint16_t pending_ints; 1271 struct cpu *cpu = CPU; 1272 volatile shared_info_t *si = HYPERVISOR_shared_info; 1273 volatile vcpu_info_t *vci = cpu->cpu_m.mcpu_vcpu_info; 1274 volatile struct xen_evt_data *cpe = cpu->cpu_m.mcpu_evt_pend; 1275 volatile uint16_t *cpu_ipp = &cpu->cpu_m.mcpu_intr_pending; 1276 1277 ASSERT(rp->r_trapno == T_AST && rp->r_err == 0); 1278 ASSERT(&si->vcpu_info[cpu->cpu_id] == vci); 1279 ASSERT_STACK_ALIGNED(); 1280 1281 vci->evtchn_upcall_pending = 0; 1282 1283 /* 1284 * To expedite scanning of pending notifications, any 0->1 1285 * pending transition on an unmasked channel causes a 1286 * corresponding bit in evtchn_pending_sel to be set. 1287 * Each bit in the selector covers a 32-bit word in 1288 * the evtchn_pending[] array. 1289 */ 1290 membar_enter(); 1291 do { 1292 pending_sels = vci->evtchn_pending_sel; 1293 } while (atomic_cas_ulong((volatile ulong_t *)&vci->evtchn_pending_sel, 1294 pending_sels, 0) != pending_sels); 1295 1296 pending_ints = *cpu_ipp; 1297 while ((i = ffs(pending_sels)) != 0) { 1298 i--; 1299 selbit = 1ul << i; 1300 pending_sels &= ~selbit; 1301 1302 membar_enter(); 1303 while ((pe = UNBLOCKED_EVENTS(si, i, cpe, cpu->cpu_id)) != 0) { 1304 j = ffs(pe) - 1; 1305 pe &= ~(1ul << j); 1306 1307 port = (i << EVTCHN_SHIFT) + j; 1308 1309 irq = evtchn_to_irq[port]; 1310 1311 /* 1312 * If no irq set, just ignore the event. 1313 * On e.g. netbsd they call evtchn_device_upcall(port) 1314 * We require the evtchn driver to install a handler 1315 * so there will be an irq associated with user mode 1316 * evtchns. 1317 */ 1318 if (irq == INVALID_IRQ) { 1319 ec_clear_evtchn(port); 1320 continue; 1321 } 1322 1323 /* 1324 * If there's no handler, it could be a poke, so just 1325 * accept the event and continue. 1326 */ 1327 if (!irq_info[irq].ii_u2.has_handler) { 1328 #ifdef TRAPTRACE 1329 ttp->ttr_ipl = 0xff; 1330 if (IRQ_IS_CPUPOKE(irq)) { 1331 ttp->ttr_ipl = XC_CPUPOKE_PIL; 1332 ttp->ttr_marker = TT_INTERRUPT; 1333 } 1334 ttp->ttr_pri = cpu->cpu_pri; 1335 ttp->ttr_spl = cpu->cpu_base_spl; 1336 ttp->ttr_vector = 0xff; 1337 #endif /* TRAPTRACE */ 1338 if (ec_mask_evtchn(port)) { 1339 ec_clear_evtchn(port); 1340 ec_unmask_evtchn(port); 1341 continue; 1342 } 1343 } 1344 1345 pri = irq_info[irq].ii_u2.ipl; 1346 1347 /* 1348 * If we are the cpu that successfully masks 1349 * the event, then record it as a pending event 1350 * for this cpu to service 1351 */ 1352 if (ec_mask_evtchn(port)) { 1353 if (ec_evtchn_pending(port)) { 1354 cpe->pending_sel[pri] |= selbit; 1355 cpe->pending_evts[pri][i] |= (1ul << j); 1356 pending_ints |= 1 << pri; 1357 } else { 1358 /* 1359 * another cpu serviced this event 1360 * before us, clear the mask. 1361 */ 1362 ec_unmask_evtchn(port); 1363 } 1364 } 1365 } 1366 } 1367 *cpu_ipp = pending_ints; 1368 if (pending_ints == 0) 1369 return; 1370 /* 1371 * We have gathered all the pending events/interrupts, 1372 * go service all the ones we can from highest priority to lowest. 1373 * Note: This loop may not actually complete and service all 1374 * pending interrupts since one of the interrupt threads may 1375 * block and the pinned thread runs. In that case, when we 1376 * exit the interrupt thread that blocked we will check for 1377 * any unserviced interrupts and re-post an upcall to process 1378 * any unserviced pending events. 1379 */ 1380 curpri = cpu->cpu_pri; 1381 for (pri = bsrw_insn(*cpu_ipp); pri > curpri; pri--) { 1382 while ((pending_sels = cpe->pending_sel[pri]) != 0) { 1383 i = ffs(pending_sels) - 1; 1384 while ((pe = cpe->pending_evts[pri][i]) != 0) { 1385 j = ffs(pe) - 1; 1386 pe &= ~(1ul << j); 1387 cpe->pending_evts[pri][i] = pe; 1388 if (pe == 0) { 1389 /* 1390 * Must reload pending selector bits 1391 * here as they could have changed on 1392 * a previous trip around the inner loop 1393 * while we were interrupt enabled 1394 * in a interrupt service routine. 1395 */ 1396 pending_sels = cpe->pending_sel[pri]; 1397 pending_sels &= ~(1ul << i); 1398 cpe->pending_sel[pri] = pending_sels; 1399 if (pending_sels == 0) 1400 *cpu_ipp &= ~(1 << pri); 1401 } 1402 port = (i << EVTCHN_SHIFT) + j; 1403 irq = evtchn_to_irq[port]; 1404 if (irq == INVALID_IRQ) { 1405 /* 1406 * No longer a handler for this event 1407 * channel. Clear the event and 1408 * ignore it, unmask the event. 1409 */ 1410 ec_clear_evtchn(port); 1411 ec_unmask_evtchn(port); 1412 continue; 1413 } 1414 if (irq == ec_dev_irq) { 1415 volatile int *tptr = &ec_dev_mbox; 1416 1417 ASSERT(ec_dev_mbox == 0); 1418 /* 1419 * NOTE: this gross store thru a pointer 1420 * is necessary because of a Sun C 1421 * compiler bug that does not properly 1422 * honor a volatile declaration. 1423 * we really should just be able to say 1424 * ec_dev_mbox = port; 1425 * here 1426 */ 1427 *tptr = port; 1428 } 1429 /* 1430 * Set up the regs struct to 1431 * look like a normal hardware int 1432 * and do normal interrupt handling. 1433 */ 1434 rp->r_trapno = irq; 1435 do_interrupt(rp, ttp); 1436 /* 1437 * Check for cpu priority change 1438 * Can happen if int thread blocks 1439 */ 1440 if (cpu->cpu_pri > curpri) 1441 return; 1442 } 1443 } 1444 } 1445 } 1446 1447 void 1448 ec_unmask_evtchn(unsigned int ev) 1449 { 1450 uint_t evi; 1451 volatile shared_info_t *si = HYPERVISOR_shared_info; 1452 volatile vcpu_info_t *vci = CPU->cpu_m.mcpu_vcpu_info; 1453 volatile ulong_t *ulp; 1454 1455 ASSERT(!interrupts_enabled()); 1456 /* 1457 * Check if we need to take slow path 1458 */ 1459 if (!CPU_IN_SET(evtchn_cpus[ev], CPU->cpu_id)) { 1460 xen_evtchn_unmask(ev); 1461 return; 1462 } 1463 evi = ev >> EVTCHN_SHIFT; 1464 ev &= (1ul << EVTCHN_SHIFT) - 1; 1465 ulp = (volatile ulong_t *)&si->evtchn_mask[evi]; 1466 atomic_and_ulong(ulp, ~(1ul << ev)); 1467 /* 1468 * The following is basically the equivalent of 1469 * 'hw_resend_irq'. Just like a real IO-APIC we 'lose the 1470 * interrupt edge' if the channel is masked. 1471 * XXPV - slight race if upcall was about to be set, we may get 1472 * an extra upcall. 1473 */ 1474 membar_enter(); 1475 if (si->evtchn_pending[evi] & (1ul << ev)) { 1476 membar_consumer(); 1477 ulp = (volatile ulong_t *)&vci->evtchn_pending_sel; 1478 if (!(*ulp & (1ul << evi))) { 1479 atomic_or_ulong(ulp, (1ul << evi)); 1480 } 1481 vci->evtchn_upcall_pending = 1; 1482 } 1483 } 1484 1485 /* 1486 * Set a bit in an evtchan mask word, return true if we are the cpu that 1487 * set the bit. 1488 */ 1489 int 1490 ec_mask_evtchn(unsigned int ev) 1491 { 1492 uint_t evi, evb; 1493 ulong_t new, old, bit; 1494 volatile shared_info_t *si = HYPERVISOR_shared_info; 1495 volatile ulong_t *maskp; 1496 int masked; 1497 1498 kpreempt_disable(); 1499 evi = ev >> EVTCHN_SHIFT; 1500 evb = ev & ((1ul << EVTCHN_SHIFT) - 1); 1501 bit = 1ul << evb; 1502 maskp = (volatile ulong_t *)&si->evtchn_mask[evi]; 1503 do { 1504 old = si->evtchn_mask[evi]; 1505 new = old | bit; 1506 } while (atomic_cas_ulong(maskp, old, new) != old); 1507 masked = (old & bit) == 0; 1508 if (masked) { 1509 evtchn_owner[ev] = CPU->cpu_id; 1510 #ifdef DEBUG 1511 evtchn_owner_thread[ev] = curthread; 1512 #endif 1513 } 1514 kpreempt_enable(); 1515 return (masked); 1516 } 1517 1518 void 1519 ec_clear_evtchn(unsigned int ev) 1520 { 1521 uint_t evi; 1522 shared_info_t *si = HYPERVISOR_shared_info; 1523 volatile ulong_t *pendp; 1524 1525 evi = ev >> EVTCHN_SHIFT; 1526 ev &= (1ul << EVTCHN_SHIFT) - 1; 1527 pendp = (volatile ulong_t *)&si->evtchn_pending[evi]; 1528 atomic_and_ulong(pendp, ~(1ul << ev)); 1529 } 1530 1531 void 1532 ec_notify_via_evtchn(unsigned int port) 1533 { 1534 evtchn_send_t send; 1535 1536 ASSERT(port != INVALID_EVTCHN); 1537 1538 send.port = port; 1539 (void) HYPERVISOR_event_channel_op(EVTCHNOP_send, &send); 1540 } 1541 1542 int 1543 ec_block_irq(int irq) 1544 { 1545 irq_info_t *irqp = &irq_info[irq]; 1546 int evtchn; 1547 1548 1549 evtchn = irq_evtchn(irqp); 1550 (void) ec_mask_evtchn(evtchn); 1551 return (evtchn_owner[evtchn]); 1552 } 1553 1554 /* 1555 * Make a event that is pending for delivery on the current cpu "go away" 1556 * without servicing the interrupt. 1557 */ 1558 void 1559 ec_unpend_irq(int irq) 1560 { 1561 irq_info_t *irqp = &irq_info[irq]; 1562 int pri = irqp->ii_u2.ipl; 1563 ulong_t flags; 1564 uint_t evtchn, evi, bit; 1565 unsigned long pe, pending_sels; 1566 struct xen_evt_data *cpe; 1567 1568 /* 1569 * The evtchn must be masked 1570 */ 1571 evtchn = irq_evtchn(irqp); 1572 ASSERT(EVTCHN_MASKED(evtchn)); 1573 evi = evtchn >> EVTCHN_SHIFT; 1574 bit = evtchn & (1ul << EVTCHN_SHIFT) - 1; 1575 flags = intr_clear(); 1576 cpe = CPU->cpu_m.mcpu_evt_pend; 1577 pe = cpe->pending_evts[pri][evi] & ~(1ul << bit); 1578 cpe->pending_evts[pri][evi] = pe; 1579 if (pe == 0) { 1580 pending_sels = cpe->pending_sel[pri]; 1581 pending_sels &= ~(1ul << evi); 1582 cpe->pending_sel[pri] = pending_sels; 1583 if (pending_sels == 0) 1584 CPU->cpu_m.mcpu_intr_pending &= ~(1 << pri); 1585 } 1586 intr_restore(flags); 1587 } 1588