1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * evtchn.c 31 * 32 * Communication via hypervisor event channels. 33 * 34 * Copyright (c) 2002-2005, K A Fraser 35 * 36 * This file may be distributed separately from the Linux kernel, or 37 * incorporated into other software packages, subject to the following license: 38 * 39 * Permission is hereby granted, free of charge, to any person obtaining a copy 40 * of this source file (the "Software"), to deal in the Software without 41 * restriction, including without limitation the rights to use, copy, modify, 42 * merge, publish, distribute, sublicense, and/or sell copies of the Software, 43 * and to permit persons to whom the Software is furnished to do so, subject to 44 * the following conditions: 45 * 46 * The above copyright notice and this permission notice shall be included in 47 * all copies or substantial portions of the Software. 48 * 49 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 50 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 51 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 52 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 53 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 54 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 55 * IN THE SOFTWARE. 56 */ 57 58 /* some parts derived from netbsd's hypervisor_machdep.c 1.2.2.2 */ 59 60 /* 61 * 62 * Copyright (c) 2004 Christian Limpach. 63 * All rights reserved. 64 * 65 * Redistribution and use in source and binary forms, with or without 66 * modification, are permitted provided that the following conditions 67 * are met: 68 * 1. Redistributions of source code must retain the above copyright 69 * notice, this list of conditions and the following disclaimer. 70 * 2. Redistributions in binary form must reproduce the above copyright 71 * notice, this list of conditions and the following disclaimer in the 72 * documentation and/or other materials provided with the distribution. 73 * 3. This section intentionally left blank. 74 * 4. The name of the author may not be used to endorse or promote products 75 * derived from this software without specific prior written permission. 76 * 77 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 78 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 79 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 80 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 81 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 82 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 83 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 84 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 85 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 86 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 87 */ 88 /* 89 * Section 3 of the above license was updated in response to bug 6379571. 90 */ 91 92 #include <sys/types.h> 93 #include <sys/hypervisor.h> 94 #include <sys/machsystm.h> 95 #include <sys/mutex.h> 96 #include <sys/evtchn_impl.h> 97 #include <sys/ddi_impldefs.h> 98 #include <sys/avintr.h> 99 #include <sys/cpuvar.h> 100 #include <sys/smp_impldefs.h> 101 #include <sys/archsystm.h> 102 #include <sys/sysmacros.h> 103 #include <sys/cmn_err.h> 104 #include <sys/promif.h> 105 #include <sys/debug.h> 106 #include <sys/psm.h> 107 #include <sys/privregs.h> 108 #include <sys/trap.h> 109 #include <sys/atomic.h> 110 #include <sys/cpu.h> 111 #include <sys/psw.h> 112 #include <sys/traptrace.h> 113 #include <sys/stack.h> 114 #include <sys/x_call.h> 115 #include <xen/public/physdev.h> 116 117 /* 118 * This file manages our association between hypervisor event channels and 119 * Solaris's IRQs. This is a one-to-one mapping, with the exception of 120 * IPI IRQs, for which there is one event channel per CPU participating 121 * in the IPI, and the clock VIRQ which also has an event channel per cpu 122 * and the IRQ for /dev/xen/evtchn. The IRQ types are: 123 * 124 * IRQT_VIRQ: 125 * The hypervisor's standard virtual IRQ, used for the clock timer, for 126 * example. This code allows any cpu to bind to one of these, although 127 * some are treated specially (i.e. VIRQ_DEBUG). 128 * Event channel binding is done via EVTCHNOP_bind_virq. 129 * 130 * IRQT_PIRQ: 131 * These associate a physical IRQ with an event channel via 132 * EVTCHNOP_bind_pirq. 133 * 134 * IRQT_IPI: 135 * A cross-call IRQ. Maps to "ncpus" event channels, each of which is 136 * bound to exactly one of the vcpus. We do not currently support 137 * unbinding of IPIs (since Solaris doesn't need it). Uses 138 * EVTCHNOP_bind_ipi. 139 * 140 * IRQT_EVTCHN: 141 * A "normal" binding to an event channel, typically used by the frontend 142 * drivers to bind to the their backend event channel. 143 * 144 * IRQT_DEV_EVTCHN: 145 * This is a one-time IRQ used by /dev/xen/evtchn. Unlike other IRQs, we 146 * have a one-IRQ to many-evtchn mapping. We only track evtchn->irq for 147 * these event channels, which are managed via ec_irq_add/rm_evtchn(). 148 * We enforce that IRQT_DEV_EVTCHN's representative evtchn (->ii_evtchn) 149 * is zero, and make any calls to irq_evtchn() an error, to prevent 150 * accidentally attempting to use the illegal evtchn 0. 151 * 152 * Suspend/resume 153 * 154 * During a suspend/resume cycle, we need to tear down the event channels. 155 * All other mapping data is kept. The drivers will remove their own event 156 * channels via xendev on receiving a DDI_SUSPEND. This leaves us with 157 * the IPIs and VIRQs, which we handle in ec_suspend() and ec_resume() 158 * below. 159 * 160 * CPU binding 161 * 162 * When an event channel is bound to a CPU, we set a bit in a mask present 163 * in the machcpu (evt_affinity) to indicate that this CPU can accept this 164 * event channel. For both IPIs and VIRQs, this binding is fixed at 165 * allocation time and we never modify it. All other event channels are 166 * bound via the PSM either as part of add_avintr(), or interrupt 167 * redistribution (xen_psm_dis/enable_intr()) as a result of CPU 168 * offline/online. 169 * 170 * Locking 171 * 172 * Updates are done holding the ec_lock. The xen_callback_handler() 173 * routine reads the mapping data in a lockless fashion. Additionally 174 * suspend takes ec_lock to prevent update races during a suspend/resume 175 * cycle. The IPI info is also examined without the lock; this is OK 176 * since we only ever change IPI info during initial setup and resume. 177 */ 178 179 #define IRQ_IS_CPUPOKE(irq) (ipi_info[XC_CPUPOKE_PIL].mi_irq == (irq)) 180 181 #define EVTCHN_MASKED(ev) \ 182 (HYPERVISOR_shared_info->evtchn_mask[(ev) >> EVTCHN_SHIFT] & \ 183 (1ul << ((ev) & ((1ul << EVTCHN_SHIFT) - 1)))) 184 185 static short evtchn_to_irq[NR_EVENT_CHANNELS]; 186 static cpuset_t evtchn_cpus[NR_EVENT_CHANNELS]; 187 static int evtchn_owner[NR_EVENT_CHANNELS]; 188 #ifdef DEBUG 189 static kthread_t *evtchn_owner_thread[NR_EVENT_CHANNELS]; 190 #endif 191 192 static irq_info_t irq_info[NR_IRQS]; 193 static mec_info_t ipi_info[MAXIPL]; 194 static mec_info_t virq_info[NR_VIRQS]; 195 /* 196 * Mailbox for communication with the evtchn device driver. 197 * We rely on only cpu 0 servicing the event channels associated 198 * with the driver. i.e. all evtchn driver evtchns are bound to cpu 0. 199 */ 200 volatile int ec_dev_mbox; /* mailbox for evtchn device driver */ 201 202 /* 203 * See the locking description above. 204 */ 205 kmutex_t ec_lock; 206 207 /* 208 * Bitmap indicating which PIRQs require the hypervisor to be notified 209 * on unmask. 210 */ 211 static unsigned long pirq_needs_eoi[NR_PIRQS / (sizeof (unsigned long) * NBBY)]; 212 213 static int ec_debug_irq = INVALID_IRQ; 214 int ec_dev_irq = INVALID_IRQ; 215 216 int 217 xen_bind_virq(unsigned int virq, processorid_t cpu, int *port) 218 { 219 evtchn_bind_virq_t bind; 220 int err; 221 222 bind.virq = virq; 223 bind.vcpu = cpu; 224 if ((err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, &bind)) == 0) 225 *port = bind.port; 226 else 227 err = xen_xlate_errcode(err); 228 return (err); 229 } 230 231 int 232 xen_bind_interdomain(int domid, int remote_port, int *port) 233 { 234 evtchn_bind_interdomain_t bind; 235 int err; 236 237 bind.remote_dom = domid; 238 bind.remote_port = remote_port; 239 if ((err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, 240 &bind)) == 0) 241 *port = bind.local_port; 242 else 243 err = xen_xlate_errcode(err); 244 return (err); 245 } 246 247 int 248 xen_alloc_unbound_evtchn(int domid, int *evtchnp) 249 { 250 evtchn_alloc_unbound_t alloc; 251 int err; 252 253 alloc.dom = DOMID_SELF; 254 alloc.remote_dom = domid; 255 256 if ((err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, 257 &alloc)) == 0) { 258 *evtchnp = alloc.port; 259 /* ensure evtchn is masked till we're ready to use it */ 260 (void) ec_mask_evtchn(*evtchnp); 261 } else { 262 err = xen_xlate_errcode(err); 263 } 264 265 return (err); 266 } 267 268 static int 269 xen_close_evtchn(int evtchn) 270 { 271 evtchn_close_t close; 272 int err; 273 274 close.port = evtchn; 275 err = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close); 276 if (err) 277 err = xen_xlate_errcode(err); 278 return (err); 279 } 280 281 static int 282 xen_bind_ipi(processorid_t cpu) 283 { 284 evtchn_bind_ipi_t bind; 285 286 ASSERT(MUTEX_HELD(&ec_lock)); 287 288 bind.vcpu = cpu; 289 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, &bind) != 0) 290 panic("xen_bind_ipi() failed"); 291 return (bind.port); 292 } 293 294 /* Send future instances of this interrupt to other vcpu. */ 295 static void 296 xen_bind_vcpu(int evtchn, int cpu) 297 { 298 evtchn_bind_vcpu_t bind; 299 300 ASSERT(MUTEX_HELD(&ec_lock)); 301 302 bind.port = evtchn; 303 bind.vcpu = cpu; 304 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind) != 0) 305 panic("xen_bind_vcpu() failed"); 306 } 307 308 static int 309 xen_bind_pirq(int pirq) 310 { 311 evtchn_bind_pirq_t bind; 312 int ret; 313 314 bind.pirq = pirq; 315 bind.flags = BIND_PIRQ__WILL_SHARE; 316 if ((ret = HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind)) != 0) 317 panic("xen_bind_pirq() failed (err %d)", ret); 318 return (bind.port); 319 } 320 321 /* unmask an evtchn and send upcall to appropriate vcpu if pending bit is set */ 322 static void 323 xen_evtchn_unmask(int evtchn) 324 { 325 evtchn_unmask_t unmask; 326 327 unmask.port = evtchn; 328 if (HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask) != 0) 329 panic("xen_evtchn_unmask() failed"); 330 } 331 332 static void 333 update_evtchn_affinity(int evtchn) 334 { 335 cpu_t *cp; 336 struct xen_evt_data *cpe; 337 338 ASSERT(evtchn_to_irq[evtchn] != INVALID_IRQ); 339 ASSERT(MUTEX_HELD(&ec_lock)); 340 341 /* 342 * Use lockless search of cpu_list, similar to mutex_vector_enter(). 343 */ 344 kpreempt_disable(); 345 cp = cpu_list; 346 do { 347 cpe = cp->cpu_m.mcpu_evt_pend; 348 if (CPU_IN_SET(evtchn_cpus[evtchn], cp->cpu_id)) 349 SET_EVTCHN_BIT(evtchn, cpe->evt_affinity); 350 else 351 CLEAR_EVTCHN_BIT(evtchn, cpe->evt_affinity); 352 } while ((cp = cp->cpu_next) != cpu_list); 353 kpreempt_enable(); 354 } 355 356 static void 357 bind_evtchn_to_cpuset(int evtchn, cpuset_t cpus) 358 { 359 ASSERT(evtchn_to_irq[evtchn] != INVALID_IRQ); 360 361 CPUSET_ZERO(evtchn_cpus[evtchn]); 362 CPUSET_OR(evtchn_cpus[evtchn], cpus); 363 update_evtchn_affinity(evtchn); 364 } 365 366 static void 367 clear_evtchn_affinity(int evtchn) 368 { 369 CPUSET_ZERO(evtchn_cpus[evtchn]); 370 update_evtchn_affinity(evtchn); 371 } 372 373 static void 374 alloc_irq_evtchn(int irq, int index, int evtchn, int cpu) 375 { 376 irq_info_t *irqp = &irq_info[irq]; 377 378 switch (irqp->ii_type) { 379 case IRQT_IPI: 380 ipi_info[index].mi_evtchns[cpu] = evtchn; 381 irqp->ii_u.index = index; 382 break; 383 case IRQT_VIRQ: 384 virq_info[index].mi_evtchns[cpu] = evtchn; 385 irqp->ii_u.index = index; 386 break; 387 default: 388 irqp->ii_u.evtchn = evtchn; 389 break; 390 } 391 392 evtchn_to_irq[evtchn] = irq; 393 394 /* 395 * If a CPU is not specified, we expect to bind it to a CPU later via 396 * the PSM. 397 */ 398 if (cpu != -1) { 399 cpuset_t tcpus; 400 CPUSET_ONLY(tcpus, cpu); 401 bind_evtchn_to_cpuset(evtchn, tcpus); 402 } 403 } 404 405 static int 406 alloc_irq(int type, int index, int evtchn, int cpu) 407 { 408 int irq; 409 irq_info_t *irqp; 410 411 ASSERT(MUTEX_HELD(&ec_lock)); 412 ASSERT(type != IRQT_IPI || cpu != -1); 413 414 for (irq = 0; irq < NR_IRQS; irq++) { 415 if (irq_info[irq].ii_type == IRQT_UNBOUND) 416 break; 417 } 418 419 if (irq == NR_IRQS) 420 panic("No available IRQ to bind to: increase NR_IRQS!\n"); 421 422 irqp = &irq_info[irq]; 423 424 irqp->ii_type = type; 425 /* 426 * Set irq/has_handler field to zero which means handler not installed 427 */ 428 irqp->ii_u2.has_handler = 0; 429 430 alloc_irq_evtchn(irq, index, evtchn, cpu); 431 return (irq); 432 } 433 434 static int 435 irq_evtchn(irq_info_t *irqp) 436 { 437 int evtchn; 438 439 ASSERT(irqp->ii_type != IRQT_DEV_EVTCHN); 440 441 switch (irqp->ii_type) { 442 case IRQT_IPI: 443 ASSERT(irqp->ii_u.index != 0); 444 evtchn = ipi_info[irqp->ii_u.index].mi_evtchns[CPU->cpu_id]; 445 break; 446 case IRQT_VIRQ: 447 evtchn = virq_info[irqp->ii_u.index].mi_evtchns[CPU->cpu_id]; 448 break; 449 default: 450 evtchn = irqp->ii_u.evtchn; 451 break; 452 } 453 454 return (evtchn); 455 } 456 457 static void 458 unbind_evtchn(ushort_t *evtchnp) 459 { 460 int err; 461 462 ASSERT(MUTEX_HELD(&ec_lock)); 463 464 ASSERT(*evtchnp != 0); 465 466 err = xen_close_evtchn(*evtchnp); 467 ASSERT(err == 0); 468 clear_evtchn_affinity(*evtchnp); 469 evtchn_to_irq[*evtchnp] = INVALID_IRQ; 470 *evtchnp = 0; 471 } 472 473 static void 474 pirq_unmask_notify(int pirq) 475 { 476 struct physdev_eoi eoi; 477 478 if (TEST_EVTCHN_BIT(pirq, &pirq_needs_eoi[0])) { 479 eoi.irq = pirq; 480 (void) HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi); 481 } 482 } 483 484 static void 485 pirq_query_unmask(int pirq) 486 { 487 struct physdev_irq_status_query irq_status; 488 489 irq_status.irq = pirq; 490 (void) HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status); 491 CLEAR_EVTCHN_BIT(pirq, &pirq_needs_eoi[0]); 492 if (irq_status.flags & XENIRQSTAT_needs_eoi) 493 SET_EVTCHN_BIT(pirq, &pirq_needs_eoi[0]); 494 } 495 496 static void 497 end_pirq(int irq) 498 { 499 int evtchn = irq_evtchn(&irq_info[irq]); 500 501 ec_unmask_evtchn(evtchn); 502 pirq_unmask_notify(IRQ_TO_PIRQ(irq)); 503 } 504 505 /* 506 * probe if a pirq is available to bind to, return 1 if available 507 * else return 0. 508 * Note that for debug versions of xen this probe may cause an in use IRQ 509 * warning message from xen. 510 */ 511 int 512 ec_probe_pirq(int pirq) 513 { 514 evtchn_bind_pirq_t bind; 515 516 bind.pirq = pirq; 517 bind.flags = 0; 518 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind) != 0) { 519 return (0); 520 } else { 521 (void) xen_close_evtchn(bind.port); 522 return (1); 523 } 524 } 525 526 /* 527 * Bind an event channel to a vcpu 528 */ 529 void 530 ec_bind_vcpu(int evtchn, int cpu) 531 { 532 mutex_enter(&ec_lock); 533 xen_bind_vcpu(evtchn, cpu); 534 mutex_exit(&ec_lock); 535 } 536 537 /* 538 * Set up a physical device irq to be associated with an event channel. 539 */ 540 void 541 ec_setup_pirq(int irq, int ipl, cpuset_t *cpusp) 542 { 543 int evtchn; 544 irq_info_t *irqp = &irq_info[irq]; 545 546 /* 547 * Test if this PIRQ is already bound to an evtchn, 548 * which means it is a shared IRQ and we don't want to 549 * bind and do some initial setup that has already been 550 * done for this irq on a previous trip through this code. 551 */ 552 if (irqp->ii_u.evtchn == INVALID_EVTCHN) { 553 evtchn = xen_bind_pirq(irq); 554 555 pirq_query_unmask(IRQ_TO_PIRQ(irq)); 556 557 irqp->ii_type = IRQT_PIRQ; 558 irqp->ii_u.evtchn = evtchn; 559 560 evtchn_to_irq[evtchn] = irq; 561 irqp->ii_u2.ipl = ipl; 562 ec_set_irq_affinity(irq, *cpusp); 563 ec_enable_irq(irq); 564 pirq_unmask_notify(IRQ_TO_PIRQ(irq)); 565 } else { 566 ASSERT(irqp->ii_u2.ipl != 0); 567 cmn_err(CE_NOTE, "!IRQ%d is shared", irq); 568 if (ipl > irqp->ii_u2.ipl) 569 irqp->ii_u2.ipl = ipl; 570 *cpusp = evtchn_cpus[irqp->ii_u.evtchn]; 571 } 572 } 573 574 void 575 ec_unbind_irq(int irq) 576 { 577 irq_info_t *irqp = &irq_info[irq]; 578 mec_info_t *virqp; 579 int drop_lock = 0; 580 int type, i; 581 582 /* 583 * Nasty, but we need this during suspend. 584 */ 585 if (mutex_owner(&ec_lock) != curthread) { 586 mutex_enter(&ec_lock); 587 drop_lock = 1; 588 } 589 590 type = irqp->ii_type; 591 592 ASSERT((type == IRQT_EVTCHN) || (type == IRQT_PIRQ) || 593 (type == IRQT_VIRQ)); 594 595 if ((type == IRQT_EVTCHN) || (type == IRQT_PIRQ)) { 596 /* There's only one event channel associated with this irq */ 597 unbind_evtchn(&irqp->ii_u.evtchn); 598 } else if (type == IRQT_VIRQ) { 599 /* 600 * Each cpu on the system can have it's own event channel 601 * associated with a virq. Unbind them all. 602 */ 603 virqp = &virq_info[irqp->ii_u.index]; 604 for (i = 0; i < NCPU; i++) { 605 if (virqp->mi_evtchns[i] != 0) 606 unbind_evtchn(&virqp->mi_evtchns[i]); 607 } 608 /* Mark the virq structure as invalid. */ 609 virqp->mi_irq = INVALID_IRQ; 610 } 611 612 bzero(irqp, sizeof (*irqp)); 613 /* Re-reserve PIRQ. */ 614 if (type == IRQT_PIRQ) 615 irqp->ii_type = IRQT_PIRQ; 616 617 if (drop_lock) 618 mutex_exit(&ec_lock); 619 } 620 621 /* 622 * Rebind an event channel for delivery to a CPU. 623 */ 624 void 625 ec_set_irq_affinity(int irq, cpuset_t dest) 626 { 627 int evtchn, tcpu; 628 irq_info_t *irqp = &irq_info[irq]; 629 630 mutex_enter(&ec_lock); 631 632 ASSERT(irq < NR_IRQS); 633 ASSERT(irqp->ii_type != IRQT_UNBOUND); 634 635 /* 636 * Binding is done at allocation time for these types, so we should 637 * never modify them. 638 */ 639 if (irqp->ii_type == IRQT_IPI || irqp->ii_type == IRQT_VIRQ || 640 irqp->ii_type == IRQT_DEV_EVTCHN) { 641 mutex_exit(&ec_lock); 642 return; 643 } 644 645 CPUSET_FIND(dest, tcpu); 646 ASSERT(tcpu != CPUSET_NOTINSET); 647 648 evtchn = irq_evtchn(irqp); 649 650 xen_bind_vcpu(evtchn, tcpu); 651 652 bind_evtchn_to_cpuset(evtchn, dest); 653 654 mutex_exit(&ec_lock); 655 656 /* 657 * Now send the new target processor a NOP IPI. 658 * It will check for any pending interrupts, and so service any that 659 * got delivered to the wrong processor by mistake. 660 */ 661 if (ncpus > 1) 662 poke_cpu(tcpu); 663 } 664 665 int 666 ec_set_irq_priority(int irq, int pri) 667 { 668 irq_info_t *irqp; 669 670 if (irq >= NR_IRQS) 671 return (-1); 672 673 irqp = &irq_info[irq]; 674 675 if (irqp->ii_type == IRQT_UNBOUND) 676 return (-1); 677 678 irqp->ii_u2.ipl = pri; 679 680 return (0); 681 } 682 683 void 684 ec_clear_irq_priority(int irq) 685 { 686 irq_info_t *irqp = &irq_info[irq]; 687 688 ASSERT(irq < NR_IRQS); 689 ASSERT(irqp->ii_type != IRQT_UNBOUND); 690 691 irqp->ii_u2.ipl = 0; 692 } 693 694 int 695 ec_bind_evtchn_to_irq(int evtchn) 696 { 697 mutex_enter(&ec_lock); 698 699 ASSERT(evtchn_to_irq[evtchn] == INVALID_IRQ); 700 701 (void) alloc_irq(IRQT_EVTCHN, 0, evtchn, -1); 702 703 mutex_exit(&ec_lock); 704 return (evtchn_to_irq[evtchn]); 705 } 706 707 int 708 ec_bind_virq_to_irq(int virq, int cpu) 709 { 710 int err; 711 int evtchn; 712 mec_info_t *virqp; 713 714 virqp = &virq_info[virq]; 715 mutex_enter(&ec_lock); 716 717 err = xen_bind_virq(virq, cpu, &evtchn); 718 ASSERT(err == 0); 719 720 ASSERT(evtchn_to_irq[evtchn] == INVALID_IRQ); 721 722 if (virqp->mi_irq == INVALID_IRQ) { 723 virqp->mi_irq = alloc_irq(IRQT_VIRQ, virq, evtchn, cpu); 724 } else { 725 alloc_irq_evtchn(virqp->mi_irq, virq, evtchn, cpu); 726 } 727 728 mutex_exit(&ec_lock); 729 730 return (virqp->mi_irq); 731 } 732 733 int 734 ec_bind_ipi_to_irq(int ipl, int cpu) 735 { 736 int evtchn; 737 ulong_t flags; 738 mec_info_t *ipip; 739 740 mutex_enter(&ec_lock); 741 742 ipip = &ipi_info[ipl]; 743 744 evtchn = xen_bind_ipi(cpu); 745 746 ASSERT(evtchn_to_irq[evtchn] == INVALID_IRQ); 747 748 if (ipip->mi_irq == INVALID_IRQ) { 749 ipip->mi_irq = alloc_irq(IRQT_IPI, ipl, evtchn, cpu); 750 } else { 751 alloc_irq_evtchn(ipip->mi_irq, ipl, evtchn, cpu); 752 } 753 754 /* 755 * Unmask the new evtchn so that it can be seen by the target cpu 756 */ 757 flags = intr_clear(); 758 ec_unmask_evtchn(evtchn); 759 intr_restore(flags); 760 761 mutex_exit(&ec_lock); 762 return (ipip->mi_irq); 763 } 764 765 /* 766 * When bringing up a CPU, bind to all the IPIs that CPU0 bound. 767 */ 768 void 769 ec_bind_cpu_ipis(int cpu) 770 { 771 int i; 772 773 for (i = 0; i < MAXIPL; i++) { 774 mec_info_t *ipip = &ipi_info[i]; 775 if (ipip->mi_irq == INVALID_IRQ) 776 continue; 777 778 (void) ec_bind_ipi_to_irq(i, cpu); 779 } 780 } 781 782 /* 783 * Can this IRQ be rebound to another CPU? 784 */ 785 int 786 ec_irq_rebindable(int irq) 787 { 788 irq_info_t *irqp = &irq_info[irq]; 789 790 if (irqp->ii_u.evtchn == 0) 791 return (0); 792 793 return (irqp->ii_type == IRQT_EVTCHN || irqp->ii_type == IRQT_PIRQ); 794 } 795 796 /* 797 * Should this IRQ be unbound from this CPU (which is being offlined) to 798 * another? 799 */ 800 int 801 ec_irq_needs_rebind(int irq, int cpu) 802 { 803 irq_info_t *irqp = &irq_info[irq]; 804 805 return (ec_irq_rebindable(irq) && 806 CPU_IN_SET(evtchn_cpus[irqp->ii_u.evtchn], cpu)); 807 } 808 809 void 810 ec_send_ipi(int ipl, int cpu) 811 { 812 mec_info_t *ipip = &ipi_info[ipl]; 813 814 ASSERT(ipip->mi_irq != INVALID_IRQ); 815 816 ec_notify_via_evtchn(ipip->mi_evtchns[cpu]); 817 } 818 819 void 820 ec_try_ipi(int ipl, int cpu) 821 { 822 mec_info_t *ipip = &ipi_info[ipl]; 823 824 if (ipip->mi_irq == INVALID_IRQ || ipip->mi_irq == 0) 825 return; 826 827 ec_notify_via_evtchn(ipip->mi_evtchns[cpu]); 828 } 829 830 void 831 ec_irq_add_evtchn(int irq, int evtchn) 832 { 833 mutex_enter(&ec_lock); 834 835 /* 836 * See description of IRQT_DEV_EVTCHN above. 837 */ 838 ASSERT(irq == ec_dev_irq); 839 840 alloc_irq_evtchn(irq, 0, evtchn, 0); 841 /* 842 * We enforce that the representative event channel for IRQT_DEV_EVTCHN 843 * is zero, so PSM operations on it have no effect. 844 */ 845 irq_info[irq].ii_u.evtchn = 0; 846 mutex_exit(&ec_lock); 847 } 848 849 void 850 ec_irq_rm_evtchn(int irq, int evtchn) 851 { 852 ushort_t ec = evtchn; 853 854 mutex_enter(&ec_lock); 855 ASSERT(irq == ec_dev_irq); 856 unbind_evtchn(&ec); 857 mutex_exit(&ec_lock); 858 } 859 860 /* 861 * Allocate an /dev/xen/evtchn IRQ. See the big comment at the top 862 * for an explanation. 863 */ 864 int 865 ec_dev_alloc_irq(void) 866 { 867 int i; 868 irq_info_t *irqp; 869 870 for (i = 0; i < NR_IRQS; i++) { 871 if (irq_info[i].ii_type == IRQT_UNBOUND) 872 break; 873 } 874 875 ASSERT(i != NR_IRQS); 876 877 irqp = &irq_info[i]; 878 irqp->ii_type = IRQT_DEV_EVTCHN; 879 irqp->ii_u2.ipl = IPL_EVTCHN; 880 /* 881 * Force the evtchn to zero for the special evtchn device irq 882 */ 883 irqp->ii_u.evtchn = 0; 884 return (i); 885 } 886 887 void 888 ec_enable_irq(unsigned int irq) 889 { 890 ulong_t flag; 891 irq_info_t *irqp = &irq_info[irq]; 892 893 if (irqp->ii_type == IRQT_DEV_EVTCHN) 894 return; 895 896 flag = intr_clear(); 897 ec_unmask_evtchn(irq_evtchn(irqp)); 898 intr_restore(flag); 899 } 900 901 void 902 ec_disable_irq(unsigned int irq) 903 { 904 irq_info_t *irqp = &irq_info[irq]; 905 906 if (irqp->ii_type == IRQT_DEV_EVTCHN) 907 return; 908 909 /* 910 * Spin till we are the one to mask the evtchn 911 * Ensures no one else can be servicing this evtchn. 912 */ 913 while (!ec_mask_evtchn(irq_evtchn(irqp))) 914 SMT_PAUSE(); 915 } 916 917 static int 918 ec_evtchn_pending(uint_t ev) 919 { 920 uint_t evi; 921 shared_info_t *si = HYPERVISOR_shared_info; 922 923 evi = ev >> EVTCHN_SHIFT; 924 ev &= (1ul << EVTCHN_SHIFT) - 1; 925 return ((si->evtchn_pending[evi] & (1ul << ev)) != 0); 926 } 927 928 int 929 ec_pending_irq(unsigned int irq) 930 { 931 int evtchn = irq_evtchn(&irq_info[irq]); 932 933 return (ec_evtchn_pending(evtchn)); 934 } 935 936 void 937 ec_clear_irq(int irq) 938 { 939 irq_info_t *irqp = &irq_info[irq]; 940 int evtchn; 941 942 if (irqp->ii_type == IRQT_DEV_EVTCHN) 943 return; 944 945 ASSERT(irqp->ii_type != IRQT_UNBOUND); 946 947 evtchn = irq_evtchn(irqp); 948 949 ASSERT(EVTCHN_MASKED(evtchn)); 950 ec_clear_evtchn(evtchn); 951 } 952 953 void 954 ec_unmask_irq(int irq) 955 { 956 ulong_t flags; 957 irq_info_t *irqp = &irq_info[irq]; 958 959 flags = intr_clear(); 960 switch (irqp->ii_type) { 961 case IRQT_PIRQ: 962 end_pirq(irq); 963 break; 964 case IRQT_DEV_EVTCHN: 965 break; 966 default: 967 ec_unmask_evtchn(irq_evtchn(irqp)); 968 break; 969 } 970 intr_restore(flags); 971 } 972 973 void 974 ec_try_unmask_irq(int irq) 975 { 976 ulong_t flags; 977 irq_info_t *irqp = &irq_info[irq]; 978 int evtchn; 979 980 flags = intr_clear(); 981 switch (irqp->ii_type) { 982 case IRQT_PIRQ: 983 end_pirq(irq); 984 break; 985 case IRQT_DEV_EVTCHN: 986 break; 987 default: 988 if ((evtchn = irq_evtchn(irqp)) != 0) 989 ec_unmask_evtchn(evtchn); 990 break; 991 } 992 intr_restore(flags); 993 } 994 995 /* 996 * Poll until an event channel is ready or 'check_func' returns true. This can 997 * only be used in a situation where interrupts are masked, otherwise we have a 998 * classic time-of-check vs. time-of-use race. 999 */ 1000 void 1001 ec_wait_on_evtchn(int evtchn, int (*check_func)(void *), void *arg) 1002 { 1003 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 1004 while (!check_func(arg)) 1005 (void) HYPERVISOR_yield(); 1006 return; 1007 } 1008 1009 ASSERT(CPU->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask != 0); 1010 1011 for (;;) { 1012 evtchn_port_t ports[1]; 1013 1014 ports[0] = evtchn; 1015 1016 ec_clear_evtchn(evtchn); 1017 1018 if (check_func(arg)) 1019 return; 1020 1021 (void) HYPERVISOR_poll(ports, 1, 0); 1022 } 1023 } 1024 1025 void 1026 ec_wait_on_ipi(int ipl, int (*check_func)(void *), void *arg) 1027 { 1028 mec_info_t *ipip = &ipi_info[ipl]; 1029 1030 if (ipip->mi_irq == INVALID_IRQ || ipip->mi_irq == 0) 1031 return; 1032 1033 ec_wait_on_evtchn(ipip->mi_evtchns[CPU->cpu_id], check_func, arg); 1034 } 1035 1036 void 1037 ec_suspend(void) 1038 { 1039 irq_info_t *irqp; 1040 ushort_t *evtchnp; 1041 int i; 1042 int c; 1043 1044 ASSERT(MUTEX_HELD(&ec_lock)); 1045 1046 for (i = 0; i < MAXIPL; i++) { 1047 if (ipi_info[i].mi_irq == INVALID_IRQ) 1048 continue; 1049 1050 for (c = 0; c < NCPU; c++) { 1051 if (cpu[c] == NULL) 1052 continue; 1053 1054 if (CPU_IN_SET(cpu_suspend_lost_set, c)) 1055 continue; 1056 1057 evtchnp = &ipi_info[i].mi_evtchns[c]; 1058 ASSERT(*evtchnp != 0); 1059 unbind_evtchn(evtchnp); 1060 } 1061 } 1062 1063 for (i = 0; i < NR_VIRQS; i++) { 1064 if (virq_info[i].mi_irq == INVALID_IRQ) 1065 continue; 1066 1067 /* 1068 * If we're sharing a single event channel across all CPUs, we 1069 * should only unbind once. 1070 */ 1071 if (virq_info[i].mi_shared) { 1072 evtchnp = &virq_info[i].mi_evtchns[0]; 1073 unbind_evtchn(evtchnp); 1074 for (c = 1; c < NCPU; c++) 1075 virq_info[i].mi_evtchns[c] = 0; 1076 } else { 1077 for (c = 0; c < NCPU; c++) { 1078 if (cpu[c] == NULL) 1079 continue; 1080 1081 evtchnp = &virq_info[i].mi_evtchns[c]; 1082 if (*evtchnp != 0) 1083 unbind_evtchn(evtchnp); 1084 } 1085 } 1086 } 1087 1088 for (i = 0; i < NR_IRQS; i++) { 1089 irqp = &irq_info[i]; 1090 1091 switch (irqp->ii_type) { 1092 case IRQT_EVTCHN: 1093 case IRQT_DEV_EVTCHN: 1094 (void) HYPERVISOR_shutdown(SHUTDOWN_crash); 1095 break; 1096 case IRQT_PIRQ: 1097 if (irqp->ii_u.evtchn != 0) 1098 (void) HYPERVISOR_shutdown(SHUTDOWN_crash); 1099 break; 1100 default: 1101 break; 1102 } 1103 } 1104 } 1105 1106 /* 1107 * The debug irq is special, we only have one evtchn and irq but we allow all 1108 * cpus to service it. It's marked as shared and we propogate the event 1109 * channel into all CPUs by hand. 1110 */ 1111 static void 1112 share_virq(mec_info_t *virqp) 1113 { 1114 int evtchn = virqp->mi_evtchns[0]; 1115 cpuset_t tset; 1116 int i; 1117 1118 ASSERT(evtchn != 0); 1119 1120 virqp->mi_shared = 1; 1121 1122 for (i = 1; i < NCPU; i++) 1123 virqp->mi_evtchns[i] = evtchn; 1124 CPUSET_ALL(tset); 1125 bind_evtchn_to_cpuset(evtchn, tset); 1126 } 1127 1128 static void 1129 virq_resume(int virq) 1130 { 1131 mec_info_t *virqp = &virq_info[virq]; 1132 int evtchn; 1133 int i, err; 1134 1135 for (i = 0; i < NCPU; i++) { 1136 cpuset_t tcpus; 1137 1138 if (cpu[i] == NULL || CPU_IN_SET(cpu_suspend_lost_set, i)) 1139 continue; 1140 1141 err = xen_bind_virq(virq, i, &evtchn); 1142 ASSERT(err == 0); 1143 1144 virqp->mi_evtchns[i] = evtchn; 1145 evtchn_to_irq[evtchn] = virqp->mi_irq; 1146 CPUSET_ONLY(tcpus, i); 1147 bind_evtchn_to_cpuset(evtchn, tcpus); 1148 ec_unmask_evtchn(evtchn); 1149 /* 1150 * only timer VIRQ is bound to all cpus 1151 */ 1152 if (virq != VIRQ_TIMER) 1153 break; 1154 } 1155 1156 if (virqp->mi_shared) 1157 share_virq(virqp); 1158 } 1159 1160 static void 1161 ipi_resume(int ipl) 1162 { 1163 mec_info_t *ipip = &ipi_info[ipl]; 1164 int i; 1165 1166 for (i = 0; i < NCPU; i++) { 1167 cpuset_t tcpus; 1168 int evtchn; 1169 1170 if (cpu[i] == NULL || CPU_IN_SET(cpu_suspend_lost_set, i)) 1171 continue; 1172 1173 evtchn = xen_bind_ipi(i); 1174 ipip->mi_evtchns[i] = evtchn; 1175 evtchn_to_irq[evtchn] = ipip->mi_irq; 1176 CPUSET_ONLY(tcpus, i); 1177 bind_evtchn_to_cpuset(evtchn, tcpus); 1178 ec_unmask_evtchn(evtchn); 1179 } 1180 } 1181 1182 void 1183 ec_resume(void) 1184 { 1185 int i; 1186 1187 /* New event-channel space is not 'live' yet. */ 1188 for (i = 0; i < NR_EVENT_CHANNELS; i++) 1189 (void) ec_mask_evtchn(i); 1190 1191 for (i = 0; i < MAXIPL; i++) { 1192 if (ipi_info[i].mi_irq == INVALID_IRQ) 1193 continue; 1194 ipi_resume(i); 1195 } 1196 1197 for (i = 0; i < NR_VIRQS; i++) { 1198 if (virq_info[i].mi_irq == INVALID_IRQ) 1199 continue; 1200 virq_resume(i); 1201 } 1202 } 1203 1204 void 1205 ec_init(void) 1206 { 1207 int i; 1208 mutex_init(&ec_lock, NULL, MUTEX_SPIN, (void *)ipltospl(SPL7)); 1209 1210 for (i = 0; i < NR_EVENT_CHANNELS; i++) { 1211 CPUSET_ZERO(evtchn_cpus[i]); 1212 evtchn_to_irq[i] = INVALID_IRQ; 1213 (void) ec_mask_evtchn(i); 1214 } 1215 1216 for (i = 0; i < MAXIPL; i++) 1217 ipi_info[i].mi_irq = INVALID_IRQ; 1218 1219 for (i = 0; i < NR_VIRQS; i++) 1220 virq_info[i].mi_irq = INVALID_IRQ; 1221 1222 /* 1223 * Phys IRQ space is statically bound (1:1 mapping), grab the IRQs 1224 * now. 1225 */ 1226 for (i = PIRQ_BASE; i < NR_PIRQS; i++) { 1227 irq_info[PIRQ_TO_IRQ(i)].ii_type = IRQT_PIRQ; 1228 } 1229 } 1230 1231 void 1232 ec_init_debug_irq() 1233 { 1234 int irq; 1235 1236 irq = ec_bind_virq_to_irq(VIRQ_DEBUG, 0); 1237 (void) add_avintr(NULL, IPL_DEBUG, (avfunc)xen_debug_handler, 1238 "debug", irq, NULL, NULL, NULL, NULL); 1239 1240 mutex_enter(&ec_lock); 1241 share_virq(&virq_info[irq_info[irq].ii_u.index]); 1242 mutex_exit(&ec_lock); 1243 ec_debug_irq = irq; 1244 } 1245 1246 #define UNBLOCKED_EVENTS(si, ix, cpe, cpu_id) \ 1247 ((si)->evtchn_pending[ix] & ~(si)->evtchn_mask[ix] & \ 1248 (cpe)->evt_affinity[ix]) 1249 1250 /* 1251 * This is the entry point for processing events from xen 1252 * 1253 * (See the commentary associated with the shared_info_st structure 1254 * in hypervisor-if.h) 1255 * 1256 * Since the event channel mechanism doesn't really implement the 1257 * concept of priority like hardware interrupt controllers, we simulate 1258 * that in software here using the cpu priority field and the pending 1259 * interrupts field. Events/interrupts that are not able to be serviced 1260 * now because they are at a lower priority than the current cpu priority 1261 * cause a level bit to be recorded in the pending interrupts word. When 1262 * the priority is lowered (either by spl or interrupt exit code) the pending 1263 * levels are checked and an upcall is scheduled if there are events/interrupts 1264 * that have become deliverable. 1265 */ 1266 void 1267 xen_callback_handler(struct regs *rp, trap_trace_rec_t *ttp) 1268 { 1269 ulong_t pending_sels, pe, selbit; 1270 int i, j, port, pri, curpri, irq; 1271 uint16_t pending_ints; 1272 struct cpu *cpu = CPU; 1273 volatile shared_info_t *si = HYPERVISOR_shared_info; 1274 volatile vcpu_info_t *vci = cpu->cpu_m.mcpu_vcpu_info; 1275 volatile struct xen_evt_data *cpe = cpu->cpu_m.mcpu_evt_pend; 1276 volatile uint16_t *cpu_ipp = &cpu->cpu_m.mcpu_intr_pending; 1277 1278 ASSERT(rp->r_trapno == T_AST && rp->r_err == 0); 1279 ASSERT(&si->vcpu_info[cpu->cpu_id] == vci); 1280 ASSERT_STACK_ALIGNED(); 1281 1282 vci->evtchn_upcall_pending = 0; 1283 1284 /* 1285 * To expedite scanning of pending notifications, any 0->1 1286 * pending transition on an unmasked channel causes a 1287 * corresponding bit in evtchn_pending_sel to be set. 1288 * Each bit in the selector covers a 32-bit word in 1289 * the evtchn_pending[] array. 1290 */ 1291 membar_enter(); 1292 do { 1293 pending_sels = vci->evtchn_pending_sel; 1294 } while (atomic_cas_ulong((volatile ulong_t *)&vci->evtchn_pending_sel, 1295 pending_sels, 0) != pending_sels); 1296 1297 pending_ints = *cpu_ipp; 1298 while ((i = ffs(pending_sels)) != 0) { 1299 i--; 1300 selbit = 1ul << i; 1301 pending_sels &= ~selbit; 1302 1303 membar_enter(); 1304 while ((pe = UNBLOCKED_EVENTS(si, i, cpe, cpu->cpu_id)) != 0) { 1305 j = ffs(pe) - 1; 1306 pe &= ~(1ul << j); 1307 1308 port = (i << EVTCHN_SHIFT) + j; 1309 1310 irq = evtchn_to_irq[port]; 1311 1312 /* 1313 * If no irq set, just ignore the event. 1314 * On e.g. netbsd they call evtchn_device_upcall(port) 1315 * We require the evtchn driver to install a handler 1316 * so there will be an irq associated with user mode 1317 * evtchns. 1318 */ 1319 if (irq == INVALID_IRQ) { 1320 ec_clear_evtchn(port); 1321 continue; 1322 } 1323 1324 /* 1325 * If there's no handler, it could be a poke, so just 1326 * accept the event and continue. 1327 */ 1328 if (!irq_info[irq].ii_u2.has_handler) { 1329 #ifdef TRAPTRACE 1330 ttp->ttr_ipl = 0xff; 1331 if (IRQ_IS_CPUPOKE(irq)) { 1332 ttp->ttr_ipl = XC_CPUPOKE_PIL; 1333 ttp->ttr_marker = TT_INTERRUPT; 1334 } 1335 ttp->ttr_pri = cpu->cpu_pri; 1336 ttp->ttr_spl = cpu->cpu_base_spl; 1337 ttp->ttr_vector = 0xff; 1338 #endif /* TRAPTRACE */ 1339 if (ec_mask_evtchn(port)) { 1340 ec_clear_evtchn(port); 1341 ec_unmask_evtchn(port); 1342 continue; 1343 } 1344 } 1345 1346 pri = irq_info[irq].ii_u2.ipl; 1347 1348 /* 1349 * If we are the cpu that successfully masks 1350 * the event, then record it as a pending event 1351 * for this cpu to service 1352 */ 1353 if (ec_mask_evtchn(port)) { 1354 if (ec_evtchn_pending(port)) { 1355 cpe->pending_sel[pri] |= selbit; 1356 cpe->pending_evts[pri][i] |= (1ul << j); 1357 pending_ints |= 1 << pri; 1358 } else { 1359 /* 1360 * another cpu serviced this event 1361 * before us, clear the mask. 1362 */ 1363 ec_unmask_evtchn(port); 1364 } 1365 } 1366 } 1367 } 1368 *cpu_ipp = pending_ints; 1369 if (pending_ints == 0) 1370 return; 1371 /* 1372 * We have gathered all the pending events/interrupts, 1373 * go service all the ones we can from highest priority to lowest. 1374 * Note: This loop may not actually complete and service all 1375 * pending interrupts since one of the interrupt threads may 1376 * block and the pinned thread runs. In that case, when we 1377 * exit the interrupt thread that blocked we will check for 1378 * any unserviced interrupts and re-post an upcall to process 1379 * any unserviced pending events. 1380 */ 1381 curpri = cpu->cpu_pri; 1382 for (pri = bsrw_insn(*cpu_ipp); pri > curpri; pri--) { 1383 while ((pending_sels = cpe->pending_sel[pri]) != 0) { 1384 i = ffs(pending_sels) - 1; 1385 while ((pe = cpe->pending_evts[pri][i]) != 0) { 1386 j = ffs(pe) - 1; 1387 pe &= ~(1ul << j); 1388 cpe->pending_evts[pri][i] = pe; 1389 if (pe == 0) { 1390 /* 1391 * Must reload pending selector bits 1392 * here as they could have changed on 1393 * a previous trip around the inner loop 1394 * while we were interrupt enabled 1395 * in a interrupt service routine. 1396 */ 1397 pending_sels = cpe->pending_sel[pri]; 1398 pending_sels &= ~(1ul << i); 1399 cpe->pending_sel[pri] = pending_sels; 1400 if (pending_sels == 0) 1401 *cpu_ipp &= ~(1 << pri); 1402 } 1403 port = (i << EVTCHN_SHIFT) + j; 1404 irq = evtchn_to_irq[port]; 1405 if (irq == INVALID_IRQ) { 1406 /* 1407 * No longer a handler for this event 1408 * channel. Clear the event and 1409 * ignore it, unmask the event. 1410 */ 1411 ec_clear_evtchn(port); 1412 ec_unmask_evtchn(port); 1413 continue; 1414 } 1415 if (irq == ec_dev_irq) { 1416 volatile int *tptr = &ec_dev_mbox; 1417 1418 ASSERT(ec_dev_mbox == 0); 1419 /* 1420 * NOTE: this gross store thru a pointer 1421 * is necessary because of a Sun C 1422 * compiler bug that does not properly 1423 * honor a volatile declaration. 1424 * we really should just be able to say 1425 * ec_dev_mbox = port; 1426 * here 1427 */ 1428 *tptr = port; 1429 } 1430 /* 1431 * Set up the regs struct to 1432 * look like a normal hardware int 1433 * and do normal interrupt handling. 1434 */ 1435 rp->r_trapno = irq; 1436 do_interrupt(rp, ttp); 1437 /* 1438 * Check for cpu priority change 1439 * Can happen if int thread blocks 1440 */ 1441 if (cpu->cpu_pri > curpri) 1442 return; 1443 } 1444 } 1445 } 1446 } 1447 1448 void 1449 ec_unmask_evtchn(unsigned int ev) 1450 { 1451 uint_t evi; 1452 volatile shared_info_t *si = HYPERVISOR_shared_info; 1453 volatile vcpu_info_t *vci = CPU->cpu_m.mcpu_vcpu_info; 1454 volatile ulong_t *ulp; 1455 1456 ASSERT(!interrupts_enabled()); 1457 /* 1458 * Check if we need to take slow path 1459 */ 1460 if (!CPU_IN_SET(evtchn_cpus[ev], CPU->cpu_id)) { 1461 xen_evtchn_unmask(ev); 1462 return; 1463 } 1464 evi = ev >> EVTCHN_SHIFT; 1465 ev &= (1ul << EVTCHN_SHIFT) - 1; 1466 ulp = (volatile ulong_t *)&si->evtchn_mask[evi]; 1467 atomic_and_ulong(ulp, ~(1ul << ev)); 1468 /* 1469 * The following is basically the equivalent of 1470 * 'hw_resend_irq'. Just like a real IO-APIC we 'lose the 1471 * interrupt edge' if the channel is masked. 1472 * XXPV - slight race if upcall was about to be set, we may get 1473 * an extra upcall. 1474 */ 1475 membar_enter(); 1476 if (si->evtchn_pending[evi] & (1ul << ev)) { 1477 membar_consumer(); 1478 ulp = (volatile ulong_t *)&vci->evtchn_pending_sel; 1479 if (!(*ulp & (1ul << evi))) { 1480 atomic_or_ulong(ulp, (1ul << evi)); 1481 } 1482 vci->evtchn_upcall_pending = 1; 1483 } 1484 } 1485 1486 /* 1487 * Set a bit in an evtchan mask word, return true if we are the cpu that 1488 * set the bit. 1489 */ 1490 int 1491 ec_mask_evtchn(unsigned int ev) 1492 { 1493 uint_t evi, evb; 1494 ulong_t new, old, bit; 1495 volatile shared_info_t *si = HYPERVISOR_shared_info; 1496 volatile ulong_t *maskp; 1497 int masked; 1498 1499 kpreempt_disable(); 1500 evi = ev >> EVTCHN_SHIFT; 1501 evb = ev & ((1ul << EVTCHN_SHIFT) - 1); 1502 bit = 1ul << evb; 1503 maskp = (volatile ulong_t *)&si->evtchn_mask[evi]; 1504 do { 1505 old = si->evtchn_mask[evi]; 1506 new = old | bit; 1507 } while (atomic_cas_ulong(maskp, old, new) != old); 1508 masked = (old & bit) == 0; 1509 if (masked) { 1510 evtchn_owner[ev] = CPU->cpu_id; 1511 #ifdef DEBUG 1512 evtchn_owner_thread[ev] = curthread; 1513 #endif 1514 } 1515 kpreempt_enable(); 1516 return (masked); 1517 } 1518 1519 void 1520 ec_clear_evtchn(unsigned int ev) 1521 { 1522 uint_t evi; 1523 shared_info_t *si = HYPERVISOR_shared_info; 1524 volatile ulong_t *pendp; 1525 1526 evi = ev >> EVTCHN_SHIFT; 1527 ev &= (1ul << EVTCHN_SHIFT) - 1; 1528 pendp = (volatile ulong_t *)&si->evtchn_pending[evi]; 1529 atomic_and_ulong(pendp, ~(1ul << ev)); 1530 } 1531 1532 void 1533 ec_notify_via_evtchn(unsigned int port) 1534 { 1535 evtchn_send_t send; 1536 1537 ASSERT(port != INVALID_EVTCHN); 1538 1539 send.port = port; 1540 (void) HYPERVISOR_event_channel_op(EVTCHNOP_send, &send); 1541 } 1542 1543 int 1544 ec_block_irq(int irq) 1545 { 1546 irq_info_t *irqp = &irq_info[irq]; 1547 int evtchn; 1548 1549 1550 evtchn = irq_evtchn(irqp); 1551 (void) ec_mask_evtchn(evtchn); 1552 return (evtchn_owner[evtchn]); 1553 } 1554 1555 /* 1556 * Make a event that is pending for delivery on the current cpu "go away" 1557 * without servicing the interrupt. 1558 */ 1559 void 1560 ec_unpend_irq(int irq) 1561 { 1562 irq_info_t *irqp = &irq_info[irq]; 1563 int pri = irqp->ii_u2.ipl; 1564 ulong_t flags; 1565 uint_t evtchn, evi, bit; 1566 unsigned long pe, pending_sels; 1567 struct xen_evt_data *cpe; 1568 1569 /* 1570 * The evtchn must be masked 1571 */ 1572 evtchn = irq_evtchn(irqp); 1573 ASSERT(EVTCHN_MASKED(evtchn)); 1574 evi = evtchn >> EVTCHN_SHIFT; 1575 bit = evtchn & (1ul << EVTCHN_SHIFT) - 1; 1576 flags = intr_clear(); 1577 cpe = CPU->cpu_m.mcpu_evt_pend; 1578 pe = cpe->pending_evts[pri][evi] & ~(1ul << bit); 1579 cpe->pending_evts[pri][evi] = pe; 1580 if (pe == 0) { 1581 pending_sels = cpe->pending_sel[pri]; 1582 pending_sels &= ~(1ul << evi); 1583 cpe->pending_sel[pri] = pending_sels; 1584 if (pending_sels == 0) 1585 CPU->cpu_m.mcpu_intr_pending &= ~(1 << pri); 1586 } 1587 intr_restore(flags); 1588 } 1589