1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2017-2019, IBM Corporation. 4 */ 5 6 #define pr_fmt(fmt) "xive-kvm: " fmt 7 8 #include <linux/kernel.h> 9 #include <linux/kvm_host.h> 10 #include <linux/err.h> 11 #include <linux/gfp.h> 12 #include <linux/spinlock.h> 13 #include <linux/delay.h> 14 #include <linux/file.h> 15 #include <asm/uaccess.h> 16 #include <asm/kvm_book3s.h> 17 #include <asm/kvm_ppc.h> 18 #include <asm/hvcall.h> 19 #include <asm/xive.h> 20 #include <asm/xive-regs.h> 21 #include <asm/debug.h> 22 #include <asm/debugfs.h> 23 #include <asm/opal.h> 24 25 #include <linux/debugfs.h> 26 #include <linux/seq_file.h> 27 28 #include "book3s_xive.h" 29 30 static u8 xive_vm_esb_load(struct xive_irq_data *xd, u32 offset) 31 { 32 u64 val; 33 34 if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG) 35 offset |= offset << 4; 36 37 val = in_be64(xd->eoi_mmio + offset); 38 return (u8)val; 39 } 40 41 static void kvmppc_xive_native_cleanup_queue(struct kvm_vcpu *vcpu, int prio) 42 { 43 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 44 struct xive_q *q = &xc->queues[prio]; 45 46 xive_native_disable_queue(xc->vp_id, q, prio); 47 if (q->qpage) { 48 put_page(virt_to_page(q->qpage)); 49 q->qpage = NULL; 50 } 51 } 52 53 void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu) 54 { 55 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 56 int i; 57 58 if (!kvmppc_xive_enabled(vcpu)) 59 return; 60 61 if (!xc) 62 return; 63 64 pr_devel("native_cleanup_vcpu(cpu=%d)\n", xc->server_num); 65 66 /* Ensure no interrupt is still routed to that VP */ 67 xc->valid = false; 68 kvmppc_xive_disable_vcpu_interrupts(vcpu); 69 70 /* Free escalations */ 71 for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { 72 /* Free the escalation irq */ 73 if (xc->esc_virq[i]) { 74 if (xc->xive->single_escalation) 75 xive_cleanup_single_escalation(vcpu, xc, 76 xc->esc_virq[i]); 77 free_irq(xc->esc_virq[i], vcpu); 78 irq_dispose_mapping(xc->esc_virq[i]); 79 kfree(xc->esc_virq_names[i]); 80 xc->esc_virq[i] = 0; 81 } 82 } 83 84 /* Disable the VP */ 85 xive_native_disable_vp(xc->vp_id); 86 87 /* Clear the cam word so guest entry won't try to push context */ 88 vcpu->arch.xive_cam_word = 0; 89 90 /* Free the queues */ 91 for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { 92 kvmppc_xive_native_cleanup_queue(vcpu, i); 93 } 94 95 /* Free the VP */ 96 kfree(xc); 97 98 /* Cleanup the vcpu */ 99 vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT; 100 vcpu->arch.xive_vcpu = NULL; 101 } 102 103 int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev, 104 struct kvm_vcpu *vcpu, u32 server_num) 105 { 106 struct kvmppc_xive *xive = dev->private; 107 struct kvmppc_xive_vcpu *xc = NULL; 108 int rc; 109 u32 vp_id; 110 111 pr_devel("native_connect_vcpu(server=%d)\n", server_num); 112 113 if (dev->ops != &kvm_xive_native_ops) { 114 pr_devel("Wrong ops !\n"); 115 return -EPERM; 116 } 117 if (xive->kvm != vcpu->kvm) 118 return -EPERM; 119 if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT) 120 return -EBUSY; 121 if (server_num >= (KVM_MAX_VCPUS * vcpu->kvm->arch.emul_smt_mode)) { 122 pr_devel("Out of bounds !\n"); 123 return -EINVAL; 124 } 125 126 mutex_lock(&xive->lock); 127 128 vp_id = kvmppc_xive_vp(xive, server_num); 129 if (kvmppc_xive_vp_in_use(xive->kvm, vp_id)) { 130 pr_devel("Duplicate !\n"); 131 rc = -EEXIST; 132 goto bail; 133 } 134 135 xc = kzalloc(sizeof(*xc), GFP_KERNEL); 136 if (!xc) { 137 rc = -ENOMEM; 138 goto bail; 139 } 140 141 vcpu->arch.xive_vcpu = xc; 142 xc->xive = xive; 143 xc->vcpu = vcpu; 144 xc->server_num = server_num; 145 146 xc->vp_id = vp_id; 147 xc->valid = true; 148 vcpu->arch.irq_type = KVMPPC_IRQ_XIVE; 149 150 rc = xive_native_get_vp_info(xc->vp_id, &xc->vp_cam, &xc->vp_chip_id); 151 if (rc) { 152 pr_err("Failed to get VP info from OPAL: %d\n", rc); 153 goto bail; 154 } 155 156 /* 157 * Enable the VP first as the single escalation mode will 158 * affect escalation interrupts numbering 159 */ 160 rc = xive_native_enable_vp(xc->vp_id, xive->single_escalation); 161 if (rc) { 162 pr_err("Failed to enable VP in OPAL: %d\n", rc); 163 goto bail; 164 } 165 166 /* Configure VCPU fields for use by assembly push/pull */ 167 vcpu->arch.xive_saved_state.w01 = cpu_to_be64(0xff000000); 168 vcpu->arch.xive_cam_word = cpu_to_be32(xc->vp_cam | TM_QW1W2_VO); 169 170 /* TODO: reset all queues to a clean state ? */ 171 bail: 172 mutex_unlock(&xive->lock); 173 if (rc) 174 kvmppc_xive_native_cleanup_vcpu(vcpu); 175 176 return rc; 177 } 178 179 /* 180 * Device passthrough support 181 */ 182 static int kvmppc_xive_native_reset_mapped(struct kvm *kvm, unsigned long irq) 183 { 184 struct kvmppc_xive *xive = kvm->arch.xive; 185 pgoff_t esb_pgoff = KVM_XIVE_ESB_PAGE_OFFSET + irq * 2; 186 187 if (irq >= KVMPPC_XIVE_NR_IRQS) 188 return -EINVAL; 189 190 /* 191 * Clear the ESB pages of the IRQ number being mapped (or 192 * unmapped) into the guest and let the the VM fault handler 193 * repopulate with the appropriate ESB pages (device or IC) 194 */ 195 pr_debug("clearing esb pages for girq 0x%lx\n", irq); 196 mutex_lock(&xive->mapping_lock); 197 if (xive->mapping) 198 unmap_mapping_range(xive->mapping, 199 esb_pgoff << PAGE_SHIFT, 200 2ull << PAGE_SHIFT, 1); 201 mutex_unlock(&xive->mapping_lock); 202 return 0; 203 } 204 205 static struct kvmppc_xive_ops kvmppc_xive_native_ops = { 206 .reset_mapped = kvmppc_xive_native_reset_mapped, 207 }; 208 209 static vm_fault_t xive_native_esb_fault(struct vm_fault *vmf) 210 { 211 struct vm_area_struct *vma = vmf->vma; 212 struct kvm_device *dev = vma->vm_file->private_data; 213 struct kvmppc_xive *xive = dev->private; 214 struct kvmppc_xive_src_block *sb; 215 struct kvmppc_xive_irq_state *state; 216 struct xive_irq_data *xd; 217 u32 hw_num; 218 u16 src; 219 u64 page; 220 unsigned long irq; 221 u64 page_offset; 222 223 /* 224 * Linux/KVM uses a two pages ESB setting, one for trigger and 225 * one for EOI 226 */ 227 page_offset = vmf->pgoff - vma->vm_pgoff; 228 irq = page_offset / 2; 229 230 sb = kvmppc_xive_find_source(xive, irq, &src); 231 if (!sb) { 232 pr_devel("%s: source %lx not found !\n", __func__, irq); 233 return VM_FAULT_SIGBUS; 234 } 235 236 state = &sb->irq_state[src]; 237 kvmppc_xive_select_irq(state, &hw_num, &xd); 238 239 arch_spin_lock(&sb->lock); 240 241 /* 242 * first/even page is for trigger 243 * second/odd page is for EOI and management. 244 */ 245 page = page_offset % 2 ? xd->eoi_page : xd->trig_page; 246 arch_spin_unlock(&sb->lock); 247 248 if (WARN_ON(!page)) { 249 pr_err("%s: accessing invalid ESB page for source %lx !\n", 250 __func__, irq); 251 return VM_FAULT_SIGBUS; 252 } 253 254 vmf_insert_pfn(vma, vmf->address, page >> PAGE_SHIFT); 255 return VM_FAULT_NOPAGE; 256 } 257 258 static const struct vm_operations_struct xive_native_esb_vmops = { 259 .fault = xive_native_esb_fault, 260 }; 261 262 static vm_fault_t xive_native_tima_fault(struct vm_fault *vmf) 263 { 264 struct vm_area_struct *vma = vmf->vma; 265 266 switch (vmf->pgoff - vma->vm_pgoff) { 267 case 0: /* HW - forbid access */ 268 case 1: /* HV - forbid access */ 269 return VM_FAULT_SIGBUS; 270 case 2: /* OS */ 271 vmf_insert_pfn(vma, vmf->address, xive_tima_os >> PAGE_SHIFT); 272 return VM_FAULT_NOPAGE; 273 case 3: /* USER - TODO */ 274 default: 275 return VM_FAULT_SIGBUS; 276 } 277 } 278 279 static const struct vm_operations_struct xive_native_tima_vmops = { 280 .fault = xive_native_tima_fault, 281 }; 282 283 static int kvmppc_xive_native_mmap(struct kvm_device *dev, 284 struct vm_area_struct *vma) 285 { 286 struct kvmppc_xive *xive = dev->private; 287 288 /* We only allow mappings at fixed offset for now */ 289 if (vma->vm_pgoff == KVM_XIVE_TIMA_PAGE_OFFSET) { 290 if (vma_pages(vma) > 4) 291 return -EINVAL; 292 vma->vm_ops = &xive_native_tima_vmops; 293 } else if (vma->vm_pgoff == KVM_XIVE_ESB_PAGE_OFFSET) { 294 if (vma_pages(vma) > KVMPPC_XIVE_NR_IRQS * 2) 295 return -EINVAL; 296 vma->vm_ops = &xive_native_esb_vmops; 297 } else { 298 return -EINVAL; 299 } 300 301 vma->vm_flags |= VM_IO | VM_PFNMAP; 302 vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot); 303 304 /* 305 * Grab the KVM device file address_space to be able to clear 306 * the ESB pages mapping when a device is passed-through into 307 * the guest. 308 */ 309 xive->mapping = vma->vm_file->f_mapping; 310 return 0; 311 } 312 313 static int kvmppc_xive_native_set_source(struct kvmppc_xive *xive, long irq, 314 u64 addr) 315 { 316 struct kvmppc_xive_src_block *sb; 317 struct kvmppc_xive_irq_state *state; 318 u64 __user *ubufp = (u64 __user *) addr; 319 u64 val; 320 u16 idx; 321 int rc; 322 323 pr_devel("%s irq=0x%lx\n", __func__, irq); 324 325 if (irq < KVMPPC_XIVE_FIRST_IRQ || irq >= KVMPPC_XIVE_NR_IRQS) 326 return -E2BIG; 327 328 sb = kvmppc_xive_find_source(xive, irq, &idx); 329 if (!sb) { 330 pr_debug("No source, creating source block...\n"); 331 sb = kvmppc_xive_create_src_block(xive, irq); 332 if (!sb) { 333 pr_err("Failed to create block...\n"); 334 return -ENOMEM; 335 } 336 } 337 state = &sb->irq_state[idx]; 338 339 if (get_user(val, ubufp)) { 340 pr_err("fault getting user info !\n"); 341 return -EFAULT; 342 } 343 344 arch_spin_lock(&sb->lock); 345 346 /* 347 * If the source doesn't already have an IPI, allocate 348 * one and get the corresponding data 349 */ 350 if (!state->ipi_number) { 351 state->ipi_number = xive_native_alloc_irq(); 352 if (state->ipi_number == 0) { 353 pr_err("Failed to allocate IRQ !\n"); 354 rc = -ENXIO; 355 goto unlock; 356 } 357 xive_native_populate_irq_data(state->ipi_number, 358 &state->ipi_data); 359 pr_debug("%s allocated hw_irq=0x%x for irq=0x%lx\n", __func__, 360 state->ipi_number, irq); 361 } 362 363 /* Restore LSI state */ 364 if (val & KVM_XIVE_LEVEL_SENSITIVE) { 365 state->lsi = true; 366 if (val & KVM_XIVE_LEVEL_ASSERTED) 367 state->asserted = true; 368 pr_devel(" LSI ! Asserted=%d\n", state->asserted); 369 } 370 371 /* Mask IRQ to start with */ 372 state->act_server = 0; 373 state->act_priority = MASKED; 374 xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01); 375 xive_native_configure_irq(state->ipi_number, 0, MASKED, 0); 376 377 /* Increment the number of valid sources and mark this one valid */ 378 if (!state->valid) 379 xive->src_count++; 380 state->valid = true; 381 382 rc = 0; 383 384 unlock: 385 arch_spin_unlock(&sb->lock); 386 387 return rc; 388 } 389 390 static int kvmppc_xive_native_update_source_config(struct kvmppc_xive *xive, 391 struct kvmppc_xive_src_block *sb, 392 struct kvmppc_xive_irq_state *state, 393 u32 server, u8 priority, bool masked, 394 u32 eisn) 395 { 396 struct kvm *kvm = xive->kvm; 397 u32 hw_num; 398 int rc = 0; 399 400 arch_spin_lock(&sb->lock); 401 402 if (state->act_server == server && state->act_priority == priority && 403 state->eisn == eisn) 404 goto unlock; 405 406 pr_devel("new_act_prio=%d new_act_server=%d mask=%d act_server=%d act_prio=%d\n", 407 priority, server, masked, state->act_server, 408 state->act_priority); 409 410 kvmppc_xive_select_irq(state, &hw_num, NULL); 411 412 if (priority != MASKED && !masked) { 413 rc = kvmppc_xive_select_target(kvm, &server, priority); 414 if (rc) 415 goto unlock; 416 417 state->act_priority = priority; 418 state->act_server = server; 419 state->eisn = eisn; 420 421 rc = xive_native_configure_irq(hw_num, 422 kvmppc_xive_vp(xive, server), 423 priority, eisn); 424 } else { 425 state->act_priority = MASKED; 426 state->act_server = 0; 427 state->eisn = 0; 428 429 rc = xive_native_configure_irq(hw_num, 0, MASKED, 0); 430 } 431 432 unlock: 433 arch_spin_unlock(&sb->lock); 434 return rc; 435 } 436 437 static int kvmppc_xive_native_set_source_config(struct kvmppc_xive *xive, 438 long irq, u64 addr) 439 { 440 struct kvmppc_xive_src_block *sb; 441 struct kvmppc_xive_irq_state *state; 442 u64 __user *ubufp = (u64 __user *) addr; 443 u16 src; 444 u64 kvm_cfg; 445 u32 server; 446 u8 priority; 447 bool masked; 448 u32 eisn; 449 450 sb = kvmppc_xive_find_source(xive, irq, &src); 451 if (!sb) 452 return -ENOENT; 453 454 state = &sb->irq_state[src]; 455 456 if (!state->valid) 457 return -EINVAL; 458 459 if (get_user(kvm_cfg, ubufp)) 460 return -EFAULT; 461 462 pr_devel("%s irq=0x%lx cfg=%016llx\n", __func__, irq, kvm_cfg); 463 464 priority = (kvm_cfg & KVM_XIVE_SOURCE_PRIORITY_MASK) >> 465 KVM_XIVE_SOURCE_PRIORITY_SHIFT; 466 server = (kvm_cfg & KVM_XIVE_SOURCE_SERVER_MASK) >> 467 KVM_XIVE_SOURCE_SERVER_SHIFT; 468 masked = (kvm_cfg & KVM_XIVE_SOURCE_MASKED_MASK) >> 469 KVM_XIVE_SOURCE_MASKED_SHIFT; 470 eisn = (kvm_cfg & KVM_XIVE_SOURCE_EISN_MASK) >> 471 KVM_XIVE_SOURCE_EISN_SHIFT; 472 473 if (priority != xive_prio_from_guest(priority)) { 474 pr_err("invalid priority for queue %d for VCPU %d\n", 475 priority, server); 476 return -EINVAL; 477 } 478 479 return kvmppc_xive_native_update_source_config(xive, sb, state, server, 480 priority, masked, eisn); 481 } 482 483 static int kvmppc_xive_native_sync_source(struct kvmppc_xive *xive, 484 long irq, u64 addr) 485 { 486 struct kvmppc_xive_src_block *sb; 487 struct kvmppc_xive_irq_state *state; 488 struct xive_irq_data *xd; 489 u32 hw_num; 490 u16 src; 491 int rc = 0; 492 493 pr_devel("%s irq=0x%lx", __func__, irq); 494 495 sb = kvmppc_xive_find_source(xive, irq, &src); 496 if (!sb) 497 return -ENOENT; 498 499 state = &sb->irq_state[src]; 500 501 rc = -EINVAL; 502 503 arch_spin_lock(&sb->lock); 504 505 if (state->valid) { 506 kvmppc_xive_select_irq(state, &hw_num, &xd); 507 xive_native_sync_source(hw_num); 508 rc = 0; 509 } 510 511 arch_spin_unlock(&sb->lock); 512 return rc; 513 } 514 515 static int xive_native_validate_queue_size(u32 qshift) 516 { 517 /* 518 * We only support 64K pages for the moment. This is also 519 * advertised in the DT property "ibm,xive-eq-sizes" 520 */ 521 switch (qshift) { 522 case 0: /* EQ reset */ 523 case 16: 524 return 0; 525 case 12: 526 case 21: 527 case 24: 528 default: 529 return -EINVAL; 530 } 531 } 532 533 static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive, 534 long eq_idx, u64 addr) 535 { 536 struct kvm *kvm = xive->kvm; 537 struct kvm_vcpu *vcpu; 538 struct kvmppc_xive_vcpu *xc; 539 void __user *ubufp = (void __user *) addr; 540 u32 server; 541 u8 priority; 542 struct kvm_ppc_xive_eq kvm_eq; 543 int rc; 544 __be32 *qaddr = 0; 545 struct page *page; 546 struct xive_q *q; 547 gfn_t gfn; 548 unsigned long page_size; 549 int srcu_idx; 550 551 /* 552 * Demangle priority/server tuple from the EQ identifier 553 */ 554 priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >> 555 KVM_XIVE_EQ_PRIORITY_SHIFT; 556 server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >> 557 KVM_XIVE_EQ_SERVER_SHIFT; 558 559 if (copy_from_user(&kvm_eq, ubufp, sizeof(kvm_eq))) 560 return -EFAULT; 561 562 vcpu = kvmppc_xive_find_server(kvm, server); 563 if (!vcpu) { 564 pr_err("Can't find server %d\n", server); 565 return -ENOENT; 566 } 567 xc = vcpu->arch.xive_vcpu; 568 569 if (priority != xive_prio_from_guest(priority)) { 570 pr_err("Trying to restore invalid queue %d for VCPU %d\n", 571 priority, server); 572 return -EINVAL; 573 } 574 q = &xc->queues[priority]; 575 576 pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n", 577 __func__, server, priority, kvm_eq.flags, 578 kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex); 579 580 /* reset queue and disable queueing */ 581 if (!kvm_eq.qshift) { 582 q->guest_qaddr = 0; 583 q->guest_qshift = 0; 584 585 rc = xive_native_configure_queue(xc->vp_id, q, priority, 586 NULL, 0, true); 587 if (rc) { 588 pr_err("Failed to reset queue %d for VCPU %d: %d\n", 589 priority, xc->server_num, rc); 590 return rc; 591 } 592 593 if (q->qpage) { 594 put_page(virt_to_page(q->qpage)); 595 q->qpage = NULL; 596 } 597 598 return 0; 599 } 600 601 /* 602 * sPAPR specifies a "Unconditional Notify (n) flag" for the 603 * H_INT_SET_QUEUE_CONFIG hcall which forces notification 604 * without using the coalescing mechanisms provided by the 605 * XIVE END ESBs. This is required on KVM as notification 606 * using the END ESBs is not supported. 607 */ 608 if (kvm_eq.flags != KVM_XIVE_EQ_ALWAYS_NOTIFY) { 609 pr_err("invalid flags %d\n", kvm_eq.flags); 610 return -EINVAL; 611 } 612 613 rc = xive_native_validate_queue_size(kvm_eq.qshift); 614 if (rc) { 615 pr_err("invalid queue size %d\n", kvm_eq.qshift); 616 return rc; 617 } 618 619 if (kvm_eq.qaddr & ((1ull << kvm_eq.qshift) - 1)) { 620 pr_err("queue page is not aligned %llx/%llx\n", kvm_eq.qaddr, 621 1ull << kvm_eq.qshift); 622 return -EINVAL; 623 } 624 625 srcu_idx = srcu_read_lock(&kvm->srcu); 626 gfn = gpa_to_gfn(kvm_eq.qaddr); 627 page = gfn_to_page(kvm, gfn); 628 if (is_error_page(page)) { 629 srcu_read_unlock(&kvm->srcu, srcu_idx); 630 pr_err("Couldn't get queue page %llx!\n", kvm_eq.qaddr); 631 return -EINVAL; 632 } 633 634 page_size = kvm_host_page_size(kvm, gfn); 635 if (1ull << kvm_eq.qshift > page_size) { 636 srcu_read_unlock(&kvm->srcu, srcu_idx); 637 pr_warn("Incompatible host page size %lx!\n", page_size); 638 return -EINVAL; 639 } 640 641 qaddr = page_to_virt(page) + (kvm_eq.qaddr & ~PAGE_MASK); 642 srcu_read_unlock(&kvm->srcu, srcu_idx); 643 644 /* 645 * Backup the queue page guest address to the mark EQ page 646 * dirty for migration. 647 */ 648 q->guest_qaddr = kvm_eq.qaddr; 649 q->guest_qshift = kvm_eq.qshift; 650 651 /* 652 * Unconditional Notification is forced by default at the 653 * OPAL level because the use of END ESBs is not supported by 654 * Linux. 655 */ 656 rc = xive_native_configure_queue(xc->vp_id, q, priority, 657 (__be32 *) qaddr, kvm_eq.qshift, true); 658 if (rc) { 659 pr_err("Failed to configure queue %d for VCPU %d: %d\n", 660 priority, xc->server_num, rc); 661 put_page(page); 662 return rc; 663 } 664 665 /* 666 * Only restore the queue state when needed. When doing the 667 * H_INT_SET_SOURCE_CONFIG hcall, it should not. 668 */ 669 if (kvm_eq.qtoggle != 1 || kvm_eq.qindex != 0) { 670 rc = xive_native_set_queue_state(xc->vp_id, priority, 671 kvm_eq.qtoggle, 672 kvm_eq.qindex); 673 if (rc) 674 goto error; 675 } 676 677 rc = kvmppc_xive_attach_escalation(vcpu, priority, 678 xive->single_escalation); 679 error: 680 if (rc) 681 kvmppc_xive_native_cleanup_queue(vcpu, priority); 682 return rc; 683 } 684 685 static int kvmppc_xive_native_get_queue_config(struct kvmppc_xive *xive, 686 long eq_idx, u64 addr) 687 { 688 struct kvm *kvm = xive->kvm; 689 struct kvm_vcpu *vcpu; 690 struct kvmppc_xive_vcpu *xc; 691 struct xive_q *q; 692 void __user *ubufp = (u64 __user *) addr; 693 u32 server; 694 u8 priority; 695 struct kvm_ppc_xive_eq kvm_eq; 696 u64 qaddr; 697 u64 qshift; 698 u64 qeoi_page; 699 u32 escalate_irq; 700 u64 qflags; 701 int rc; 702 703 /* 704 * Demangle priority/server tuple from the EQ identifier 705 */ 706 priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >> 707 KVM_XIVE_EQ_PRIORITY_SHIFT; 708 server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >> 709 KVM_XIVE_EQ_SERVER_SHIFT; 710 711 vcpu = kvmppc_xive_find_server(kvm, server); 712 if (!vcpu) { 713 pr_err("Can't find server %d\n", server); 714 return -ENOENT; 715 } 716 xc = vcpu->arch.xive_vcpu; 717 718 if (priority != xive_prio_from_guest(priority)) { 719 pr_err("invalid priority for queue %d for VCPU %d\n", 720 priority, server); 721 return -EINVAL; 722 } 723 q = &xc->queues[priority]; 724 725 memset(&kvm_eq, 0, sizeof(kvm_eq)); 726 727 if (!q->qpage) 728 return 0; 729 730 rc = xive_native_get_queue_info(xc->vp_id, priority, &qaddr, &qshift, 731 &qeoi_page, &escalate_irq, &qflags); 732 if (rc) 733 return rc; 734 735 kvm_eq.flags = 0; 736 if (qflags & OPAL_XIVE_EQ_ALWAYS_NOTIFY) 737 kvm_eq.flags |= KVM_XIVE_EQ_ALWAYS_NOTIFY; 738 739 kvm_eq.qshift = q->guest_qshift; 740 kvm_eq.qaddr = q->guest_qaddr; 741 742 rc = xive_native_get_queue_state(xc->vp_id, priority, &kvm_eq.qtoggle, 743 &kvm_eq.qindex); 744 if (rc) 745 return rc; 746 747 pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n", 748 __func__, server, priority, kvm_eq.flags, 749 kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex); 750 751 if (copy_to_user(ubufp, &kvm_eq, sizeof(kvm_eq))) 752 return -EFAULT; 753 754 return 0; 755 } 756 757 static void kvmppc_xive_reset_sources(struct kvmppc_xive_src_block *sb) 758 { 759 int i; 760 761 for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { 762 struct kvmppc_xive_irq_state *state = &sb->irq_state[i]; 763 764 if (!state->valid) 765 continue; 766 767 if (state->act_priority == MASKED) 768 continue; 769 770 state->eisn = 0; 771 state->act_server = 0; 772 state->act_priority = MASKED; 773 xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01); 774 xive_native_configure_irq(state->ipi_number, 0, MASKED, 0); 775 if (state->pt_number) { 776 xive_vm_esb_load(state->pt_data, XIVE_ESB_SET_PQ_01); 777 xive_native_configure_irq(state->pt_number, 778 0, MASKED, 0); 779 } 780 } 781 } 782 783 static int kvmppc_xive_reset(struct kvmppc_xive *xive) 784 { 785 struct kvm *kvm = xive->kvm; 786 struct kvm_vcpu *vcpu; 787 unsigned int i; 788 789 pr_devel("%s\n", __func__); 790 791 mutex_lock(&xive->lock); 792 793 kvm_for_each_vcpu(i, vcpu, kvm) { 794 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 795 unsigned int prio; 796 797 if (!xc) 798 continue; 799 800 kvmppc_xive_disable_vcpu_interrupts(vcpu); 801 802 for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) { 803 804 /* Single escalation, no queue 7 */ 805 if (prio == 7 && xive->single_escalation) 806 break; 807 808 if (xc->esc_virq[prio]) { 809 free_irq(xc->esc_virq[prio], vcpu); 810 irq_dispose_mapping(xc->esc_virq[prio]); 811 kfree(xc->esc_virq_names[prio]); 812 xc->esc_virq[prio] = 0; 813 } 814 815 kvmppc_xive_native_cleanup_queue(vcpu, prio); 816 } 817 } 818 819 for (i = 0; i <= xive->max_sbid; i++) { 820 struct kvmppc_xive_src_block *sb = xive->src_blocks[i]; 821 822 if (sb) { 823 arch_spin_lock(&sb->lock); 824 kvmppc_xive_reset_sources(sb); 825 arch_spin_unlock(&sb->lock); 826 } 827 } 828 829 mutex_unlock(&xive->lock); 830 831 return 0; 832 } 833 834 static void kvmppc_xive_native_sync_sources(struct kvmppc_xive_src_block *sb) 835 { 836 int j; 837 838 for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++) { 839 struct kvmppc_xive_irq_state *state = &sb->irq_state[j]; 840 struct xive_irq_data *xd; 841 u32 hw_num; 842 843 if (!state->valid) 844 continue; 845 846 /* 847 * The struct kvmppc_xive_irq_state reflects the state 848 * of the EAS configuration and not the state of the 849 * source. The source is masked setting the PQ bits to 850 * '-Q', which is what is being done before calling 851 * the KVM_DEV_XIVE_EQ_SYNC control. 852 * 853 * If a source EAS is configured, OPAL syncs the XIVE 854 * IC of the source and the XIVE IC of the previous 855 * target if any. 856 * 857 * So it should be fine ignoring MASKED sources as 858 * they have been synced already. 859 */ 860 if (state->act_priority == MASKED) 861 continue; 862 863 kvmppc_xive_select_irq(state, &hw_num, &xd); 864 xive_native_sync_source(hw_num); 865 xive_native_sync_queue(hw_num); 866 } 867 } 868 869 static int kvmppc_xive_native_vcpu_eq_sync(struct kvm_vcpu *vcpu) 870 { 871 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 872 unsigned int prio; 873 int srcu_idx; 874 875 if (!xc) 876 return -ENOENT; 877 878 for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) { 879 struct xive_q *q = &xc->queues[prio]; 880 881 if (!q->qpage) 882 continue; 883 884 /* Mark EQ page dirty for migration */ 885 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 886 mark_page_dirty(vcpu->kvm, gpa_to_gfn(q->guest_qaddr)); 887 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); 888 } 889 return 0; 890 } 891 892 static int kvmppc_xive_native_eq_sync(struct kvmppc_xive *xive) 893 { 894 struct kvm *kvm = xive->kvm; 895 struct kvm_vcpu *vcpu; 896 unsigned int i; 897 898 pr_devel("%s\n", __func__); 899 900 mutex_lock(&xive->lock); 901 for (i = 0; i <= xive->max_sbid; i++) { 902 struct kvmppc_xive_src_block *sb = xive->src_blocks[i]; 903 904 if (sb) { 905 arch_spin_lock(&sb->lock); 906 kvmppc_xive_native_sync_sources(sb); 907 arch_spin_unlock(&sb->lock); 908 } 909 } 910 911 kvm_for_each_vcpu(i, vcpu, kvm) { 912 kvmppc_xive_native_vcpu_eq_sync(vcpu); 913 } 914 mutex_unlock(&xive->lock); 915 916 return 0; 917 } 918 919 static int kvmppc_xive_native_set_attr(struct kvm_device *dev, 920 struct kvm_device_attr *attr) 921 { 922 struct kvmppc_xive *xive = dev->private; 923 924 switch (attr->group) { 925 case KVM_DEV_XIVE_GRP_CTRL: 926 switch (attr->attr) { 927 case KVM_DEV_XIVE_RESET: 928 return kvmppc_xive_reset(xive); 929 case KVM_DEV_XIVE_EQ_SYNC: 930 return kvmppc_xive_native_eq_sync(xive); 931 } 932 break; 933 case KVM_DEV_XIVE_GRP_SOURCE: 934 return kvmppc_xive_native_set_source(xive, attr->attr, 935 attr->addr); 936 case KVM_DEV_XIVE_GRP_SOURCE_CONFIG: 937 return kvmppc_xive_native_set_source_config(xive, attr->attr, 938 attr->addr); 939 case KVM_DEV_XIVE_GRP_EQ_CONFIG: 940 return kvmppc_xive_native_set_queue_config(xive, attr->attr, 941 attr->addr); 942 case KVM_DEV_XIVE_GRP_SOURCE_SYNC: 943 return kvmppc_xive_native_sync_source(xive, attr->attr, 944 attr->addr); 945 } 946 return -ENXIO; 947 } 948 949 static int kvmppc_xive_native_get_attr(struct kvm_device *dev, 950 struct kvm_device_attr *attr) 951 { 952 struct kvmppc_xive *xive = dev->private; 953 954 switch (attr->group) { 955 case KVM_DEV_XIVE_GRP_EQ_CONFIG: 956 return kvmppc_xive_native_get_queue_config(xive, attr->attr, 957 attr->addr); 958 } 959 return -ENXIO; 960 } 961 962 static int kvmppc_xive_native_has_attr(struct kvm_device *dev, 963 struct kvm_device_attr *attr) 964 { 965 switch (attr->group) { 966 case KVM_DEV_XIVE_GRP_CTRL: 967 switch (attr->attr) { 968 case KVM_DEV_XIVE_RESET: 969 case KVM_DEV_XIVE_EQ_SYNC: 970 return 0; 971 } 972 break; 973 case KVM_DEV_XIVE_GRP_SOURCE: 974 case KVM_DEV_XIVE_GRP_SOURCE_CONFIG: 975 case KVM_DEV_XIVE_GRP_SOURCE_SYNC: 976 if (attr->attr >= KVMPPC_XIVE_FIRST_IRQ && 977 attr->attr < KVMPPC_XIVE_NR_IRQS) 978 return 0; 979 break; 980 case KVM_DEV_XIVE_GRP_EQ_CONFIG: 981 return 0; 982 } 983 return -ENXIO; 984 } 985 986 /* 987 * Called when device fd is closed. kvm->lock is held. 988 */ 989 static void kvmppc_xive_native_release(struct kvm_device *dev) 990 { 991 struct kvmppc_xive *xive = dev->private; 992 struct kvm *kvm = xive->kvm; 993 struct kvm_vcpu *vcpu; 994 int i; 995 996 pr_devel("Releasing xive native device\n"); 997 998 /* 999 * Clear the KVM device file address_space which is used to 1000 * unmap the ESB pages when a device is passed-through. 1001 */ 1002 mutex_lock(&xive->mapping_lock); 1003 xive->mapping = NULL; 1004 mutex_unlock(&xive->mapping_lock); 1005 1006 /* 1007 * Since this is the device release function, we know that 1008 * userspace does not have any open fd or mmap referring to 1009 * the device. Therefore there can not be any of the 1010 * device attribute set/get, mmap, or page fault functions 1011 * being executed concurrently, and similarly, the 1012 * connect_vcpu and set/clr_mapped functions also cannot 1013 * be being executed. 1014 */ 1015 1016 debugfs_remove(xive->dentry); 1017 1018 /* 1019 * We should clean up the vCPU interrupt presenters first. 1020 */ 1021 kvm_for_each_vcpu(i, vcpu, kvm) { 1022 /* 1023 * Take vcpu->mutex to ensure that no one_reg get/set ioctl 1024 * (i.e. kvmppc_xive_native_[gs]et_vp) can be being done. 1025 * Holding the vcpu->mutex also means that the vcpu cannot 1026 * be executing the KVM_RUN ioctl, and therefore it cannot 1027 * be executing the XIVE push or pull code or accessing 1028 * the XIVE MMIO regions. 1029 */ 1030 mutex_lock(&vcpu->mutex); 1031 kvmppc_xive_native_cleanup_vcpu(vcpu); 1032 mutex_unlock(&vcpu->mutex); 1033 } 1034 1035 /* 1036 * Now that we have cleared vcpu->arch.xive_vcpu, vcpu->arch.irq_type 1037 * and vcpu->arch.xive_esc_[vr]addr on each vcpu, we are safe 1038 * against xive code getting called during vcpu execution or 1039 * set/get one_reg operations. 1040 */ 1041 kvm->arch.xive = NULL; 1042 1043 for (i = 0; i <= xive->max_sbid; i++) { 1044 if (xive->src_blocks[i]) 1045 kvmppc_xive_free_sources(xive->src_blocks[i]); 1046 kfree(xive->src_blocks[i]); 1047 xive->src_blocks[i] = NULL; 1048 } 1049 1050 if (xive->vp_base != XIVE_INVALID_VP) 1051 xive_native_free_vp_block(xive->vp_base); 1052 1053 /* 1054 * A reference of the kvmppc_xive pointer is now kept under 1055 * the xive_devices struct of the machine for reuse. It is 1056 * freed when the VM is destroyed for now until we fix all the 1057 * execution paths. 1058 */ 1059 1060 kfree(dev); 1061 } 1062 1063 /* 1064 * Create a XIVE device. kvm->lock is held. 1065 */ 1066 static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type) 1067 { 1068 struct kvmppc_xive *xive; 1069 struct kvm *kvm = dev->kvm; 1070 int ret = 0; 1071 1072 pr_devel("Creating xive native device\n"); 1073 1074 if (kvm->arch.xive) 1075 return -EEXIST; 1076 1077 xive = kvmppc_xive_get_device(kvm, type); 1078 if (!xive) 1079 return -ENOMEM; 1080 1081 dev->private = xive; 1082 xive->dev = dev; 1083 xive->kvm = kvm; 1084 kvm->arch.xive = xive; 1085 mutex_init(&xive->mapping_lock); 1086 mutex_init(&xive->lock); 1087 1088 /* 1089 * Allocate a bunch of VPs. KVM_MAX_VCPUS is a large value for 1090 * a default. Getting the max number of CPUs the VM was 1091 * configured with would improve our usage of the XIVE VP space. 1092 */ 1093 xive->vp_base = xive_native_alloc_vp_block(KVM_MAX_VCPUS); 1094 pr_devel("VP_Base=%x\n", xive->vp_base); 1095 1096 if (xive->vp_base == XIVE_INVALID_VP) 1097 ret = -ENXIO; 1098 1099 xive->single_escalation = xive_native_has_single_escalation(); 1100 xive->ops = &kvmppc_xive_native_ops; 1101 1102 if (ret) 1103 return ret; 1104 1105 return 0; 1106 } 1107 1108 /* 1109 * Interrupt Pending Buffer (IPB) offset 1110 */ 1111 #define TM_IPB_SHIFT 40 1112 #define TM_IPB_MASK (((u64) 0xFF) << TM_IPB_SHIFT) 1113 1114 int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val) 1115 { 1116 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 1117 u64 opal_state; 1118 int rc; 1119 1120 if (!kvmppc_xive_enabled(vcpu)) 1121 return -EPERM; 1122 1123 if (!xc) 1124 return -ENOENT; 1125 1126 /* Thread context registers. We only care about IPB and CPPR */ 1127 val->xive_timaval[0] = vcpu->arch.xive_saved_state.w01; 1128 1129 /* Get the VP state from OPAL */ 1130 rc = xive_native_get_vp_state(xc->vp_id, &opal_state); 1131 if (rc) 1132 return rc; 1133 1134 /* 1135 * Capture the backup of IPB register in the NVT structure and 1136 * merge it in our KVM VP state. 1137 */ 1138 val->xive_timaval[0] |= cpu_to_be64(opal_state & TM_IPB_MASK); 1139 1140 pr_devel("%s NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x opal=%016llx\n", 1141 __func__, 1142 vcpu->arch.xive_saved_state.nsr, 1143 vcpu->arch.xive_saved_state.cppr, 1144 vcpu->arch.xive_saved_state.ipb, 1145 vcpu->arch.xive_saved_state.pipr, 1146 vcpu->arch.xive_saved_state.w01, 1147 (u32) vcpu->arch.xive_cam_word, opal_state); 1148 1149 return 0; 1150 } 1151 1152 int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val) 1153 { 1154 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 1155 struct kvmppc_xive *xive = vcpu->kvm->arch.xive; 1156 1157 pr_devel("%s w01=%016llx vp=%016llx\n", __func__, 1158 val->xive_timaval[0], val->xive_timaval[1]); 1159 1160 if (!kvmppc_xive_enabled(vcpu)) 1161 return -EPERM; 1162 1163 if (!xc || !xive) 1164 return -ENOENT; 1165 1166 /* We can't update the state of a "pushed" VCPU */ 1167 if (WARN_ON(vcpu->arch.xive_pushed)) 1168 return -EBUSY; 1169 1170 /* 1171 * Restore the thread context registers. IPB and CPPR should 1172 * be the only ones that matter. 1173 */ 1174 vcpu->arch.xive_saved_state.w01 = val->xive_timaval[0]; 1175 1176 /* 1177 * There is no need to restore the XIVE internal state (IPB 1178 * stored in the NVT) as the IPB register was merged in KVM VP 1179 * state when captured. 1180 */ 1181 return 0; 1182 } 1183 1184 bool kvmppc_xive_native_supported(void) 1185 { 1186 return xive_native_has_queue_state_support(); 1187 } 1188 1189 static int xive_native_debug_show(struct seq_file *m, void *private) 1190 { 1191 struct kvmppc_xive *xive = m->private; 1192 struct kvm *kvm = xive->kvm; 1193 struct kvm_vcpu *vcpu; 1194 unsigned int i; 1195 1196 if (!kvm) 1197 return 0; 1198 1199 seq_puts(m, "=========\nVCPU state\n=========\n"); 1200 1201 kvm_for_each_vcpu(i, vcpu, kvm) { 1202 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 1203 1204 if (!xc) 1205 continue; 1206 1207 seq_printf(m, "cpu server %#x NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n", 1208 xc->server_num, 1209 vcpu->arch.xive_saved_state.nsr, 1210 vcpu->arch.xive_saved_state.cppr, 1211 vcpu->arch.xive_saved_state.ipb, 1212 vcpu->arch.xive_saved_state.pipr, 1213 vcpu->arch.xive_saved_state.w01, 1214 (u32) vcpu->arch.xive_cam_word); 1215 1216 kvmppc_xive_debug_show_queues(m, vcpu); 1217 } 1218 1219 return 0; 1220 } 1221 1222 static int xive_native_debug_open(struct inode *inode, struct file *file) 1223 { 1224 return single_open(file, xive_native_debug_show, inode->i_private); 1225 } 1226 1227 static const struct file_operations xive_native_debug_fops = { 1228 .open = xive_native_debug_open, 1229 .read = seq_read, 1230 .llseek = seq_lseek, 1231 .release = single_release, 1232 }; 1233 1234 static void xive_native_debugfs_init(struct kvmppc_xive *xive) 1235 { 1236 char *name; 1237 1238 name = kasprintf(GFP_KERNEL, "kvm-xive-%p", xive); 1239 if (!name) { 1240 pr_err("%s: no memory for name\n", __func__); 1241 return; 1242 } 1243 1244 xive->dentry = debugfs_create_file(name, 0444, powerpc_debugfs_root, 1245 xive, &xive_native_debug_fops); 1246 1247 pr_debug("%s: created %s\n", __func__, name); 1248 kfree(name); 1249 } 1250 1251 static void kvmppc_xive_native_init(struct kvm_device *dev) 1252 { 1253 struct kvmppc_xive *xive = (struct kvmppc_xive *)dev->private; 1254 1255 /* Register some debug interfaces */ 1256 xive_native_debugfs_init(xive); 1257 } 1258 1259 struct kvm_device_ops kvm_xive_native_ops = { 1260 .name = "kvm-xive-native", 1261 .create = kvmppc_xive_native_create, 1262 .init = kvmppc_xive_native_init, 1263 .release = kvmppc_xive_native_release, 1264 .set_attr = kvmppc_xive_native_set_attr, 1265 .get_attr = kvmppc_xive_native_get_attr, 1266 .has_attr = kvmppc_xive_native_has_attr, 1267 .mmap = kvmppc_xive_native_mmap, 1268 }; 1269 1270 void kvmppc_xive_native_init_module(void) 1271 { 1272 ; 1273 } 1274 1275 void kvmppc_xive_native_exit_module(void) 1276 { 1277 ; 1278 } 1279