1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2012 ARM Ltd. 4 * Author: Marc Zyngier <marc.zyngier@arm.com> 5 */ 6 7 #include <linux/cpu.h> 8 #include <linux/kvm.h> 9 #include <linux/kvm_host.h> 10 #include <linux/interrupt.h> 11 #include <linux/irq.h> 12 #include <linux/irqdomain.h> 13 #include <linux/uaccess.h> 14 15 #include <clocksource/arm_arch_timer.h> 16 #include <asm/arch_timer.h> 17 #include <asm/kvm_emulate.h> 18 #include <asm/kvm_hyp.h> 19 #include <asm/kvm_nested.h> 20 21 #include <kvm/arm_vgic.h> 22 #include <kvm/arm_arch_timer.h> 23 24 #include "trace.h" 25 26 static struct timecounter *timecounter; 27 static unsigned int host_vtimer_irq; 28 static unsigned int host_ptimer_irq; 29 static u32 host_vtimer_irq_flags; 30 static u32 host_ptimer_irq_flags; 31 32 static DEFINE_STATIC_KEY_FALSE(has_gic_active_state); 33 DEFINE_STATIC_KEY_FALSE(broken_cntvoff_key); 34 35 static const u8 default_ppi[] = { 36 [TIMER_PTIMER] = 30, 37 [TIMER_VTIMER] = 27, 38 [TIMER_HPTIMER] = 26, 39 [TIMER_HVTIMER] = 28, 40 }; 41 42 static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx); 43 static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, 44 struct arch_timer_context *timer_ctx); 45 static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx); 46 static void kvm_arm_timer_write(struct kvm_vcpu *vcpu, 47 struct arch_timer_context *timer, 48 enum kvm_arch_timer_regs treg, 49 u64 val); 50 static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, 51 struct arch_timer_context *timer, 52 enum kvm_arch_timer_regs treg); 53 static bool kvm_arch_timer_get_input_level(int vintid); 54 55 static struct irq_ops arch_timer_irq_ops = { 56 .get_input_level = kvm_arch_timer_get_input_level, 57 }; 58 59 static struct irq_ops arch_timer_irq_ops_vgic_v5 = { 60 .get_input_level = kvm_arch_timer_get_input_level, 61 .queue_irq_unlock = vgic_v5_ppi_queue_irq_unlock, 62 .set_direct_injection = vgic_v5_set_ppi_dvi, 63 }; 64 65 static int nr_timers(struct kvm_vcpu *vcpu) 66 { 67 if (!vcpu_has_nv(vcpu)) 68 return NR_KVM_EL0_TIMERS; 69 70 return NR_KVM_TIMERS; 71 } 72 73 u32 timer_get_ctl(struct arch_timer_context *ctxt) 74 { 75 struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt); 76 77 switch(arch_timer_ctx_index(ctxt)) { 78 case TIMER_VTIMER: 79 return __vcpu_sys_reg(vcpu, CNTV_CTL_EL0); 80 case TIMER_PTIMER: 81 return __vcpu_sys_reg(vcpu, CNTP_CTL_EL0); 82 case TIMER_HVTIMER: 83 return __vcpu_sys_reg(vcpu, CNTHV_CTL_EL2); 84 case TIMER_HPTIMER: 85 return __vcpu_sys_reg(vcpu, CNTHP_CTL_EL2); 86 default: 87 WARN_ON(1); 88 return 0; 89 } 90 } 91 92 u64 timer_get_cval(struct arch_timer_context *ctxt) 93 { 94 struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt); 95 96 switch(arch_timer_ctx_index(ctxt)) { 97 case TIMER_VTIMER: 98 return __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0); 99 case TIMER_PTIMER: 100 return __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0); 101 case TIMER_HVTIMER: 102 return __vcpu_sys_reg(vcpu, CNTHV_CVAL_EL2); 103 case TIMER_HPTIMER: 104 return __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2); 105 default: 106 WARN_ON(1); 107 return 0; 108 } 109 } 110 111 static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl) 112 { 113 struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt); 114 115 switch(arch_timer_ctx_index(ctxt)) { 116 case TIMER_VTIMER: 117 __vcpu_assign_sys_reg(vcpu, CNTV_CTL_EL0, ctl); 118 break; 119 case TIMER_PTIMER: 120 __vcpu_assign_sys_reg(vcpu, CNTP_CTL_EL0, ctl); 121 break; 122 case TIMER_HVTIMER: 123 __vcpu_assign_sys_reg(vcpu, CNTHV_CTL_EL2, ctl); 124 break; 125 case TIMER_HPTIMER: 126 __vcpu_assign_sys_reg(vcpu, CNTHP_CTL_EL2, ctl); 127 break; 128 default: 129 WARN_ON(1); 130 } 131 } 132 133 static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval) 134 { 135 struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt); 136 137 switch(arch_timer_ctx_index(ctxt)) { 138 case TIMER_VTIMER: 139 __vcpu_assign_sys_reg(vcpu, CNTV_CVAL_EL0, cval); 140 break; 141 case TIMER_PTIMER: 142 __vcpu_assign_sys_reg(vcpu, CNTP_CVAL_EL0, cval); 143 break; 144 case TIMER_HVTIMER: 145 __vcpu_assign_sys_reg(vcpu, CNTHV_CVAL_EL2, cval); 146 break; 147 case TIMER_HPTIMER: 148 __vcpu_assign_sys_reg(vcpu, CNTHP_CVAL_EL2, cval); 149 break; 150 default: 151 WARN_ON(1); 152 } 153 } 154 155 u64 kvm_phys_timer_read(void) 156 { 157 return timecounter->cc->read(timecounter->cc); 158 } 159 160 void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map) 161 { 162 if (vcpu_has_nv(vcpu)) { 163 if (is_hyp_ctxt(vcpu)) { 164 map->direct_vtimer = vcpu_hvtimer(vcpu); 165 map->direct_ptimer = vcpu_hptimer(vcpu); 166 map->emul_vtimer = vcpu_vtimer(vcpu); 167 map->emul_ptimer = vcpu_ptimer(vcpu); 168 } else { 169 map->direct_vtimer = vcpu_vtimer(vcpu); 170 map->direct_ptimer = vcpu_ptimer(vcpu); 171 map->emul_vtimer = vcpu_hvtimer(vcpu); 172 map->emul_ptimer = vcpu_hptimer(vcpu); 173 } 174 } else if (has_vhe()) { 175 map->direct_vtimer = vcpu_vtimer(vcpu); 176 map->direct_ptimer = vcpu_ptimer(vcpu); 177 map->emul_vtimer = NULL; 178 map->emul_ptimer = NULL; 179 } else { 180 map->direct_vtimer = vcpu_vtimer(vcpu); 181 map->direct_ptimer = NULL; 182 map->emul_vtimer = NULL; 183 map->emul_ptimer = vcpu_ptimer(vcpu); 184 } 185 186 map->direct_vtimer->direct = true; 187 if (map->direct_ptimer) 188 map->direct_ptimer->direct = true; 189 190 trace_kvm_get_timer_map(vcpu->vcpu_id, map); 191 } 192 193 static inline bool userspace_irqchip(struct kvm *kvm) 194 { 195 return unlikely(!irqchip_in_kernel(kvm)); 196 } 197 198 static void soft_timer_start(struct hrtimer *hrt, u64 ns) 199 { 200 hrtimer_start(hrt, ktime_add_ns(ktime_get(), ns), 201 HRTIMER_MODE_ABS_HARD); 202 } 203 204 static void soft_timer_cancel(struct hrtimer *hrt) 205 { 206 hrtimer_cancel(hrt); 207 } 208 209 static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) 210 { 211 struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; 212 struct arch_timer_context *ctx; 213 struct timer_map map; 214 215 /* 216 * We may see a timer interrupt after vcpu_put() has been called which 217 * sets the CPU's vcpu pointer to NULL, because even though the timer 218 * has been disabled in timer_save_state(), the hardware interrupt 219 * signal may not have been retired from the interrupt controller yet. 220 */ 221 if (!vcpu) 222 return IRQ_HANDLED; 223 224 get_timer_map(vcpu, &map); 225 226 if (irq == host_vtimer_irq) 227 ctx = map.direct_vtimer; 228 else 229 ctx = map.direct_ptimer; 230 231 if (kvm_timer_should_fire(ctx)) 232 kvm_timer_update_irq(vcpu, true, ctx); 233 234 if (userspace_irqchip(vcpu->kvm) && 235 !static_branch_unlikely(&has_gic_active_state)) 236 disable_percpu_irq(host_vtimer_irq); 237 238 return IRQ_HANDLED; 239 } 240 241 static u64 kvm_counter_compute_delta(struct arch_timer_context *timer_ctx, 242 u64 val) 243 { 244 u64 now = kvm_phys_timer_read() - timer_get_offset(timer_ctx); 245 246 if (now < val) { 247 u64 ns; 248 249 ns = cyclecounter_cyc2ns(timecounter->cc, 250 val - now, 251 timecounter->mask, 252 &timer_ctx->ns_frac); 253 return ns; 254 } 255 256 return 0; 257 } 258 259 static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx) 260 { 261 return kvm_counter_compute_delta(timer_ctx, timer_get_cval(timer_ctx)); 262 } 263 264 static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx) 265 { 266 WARN_ON(timer_ctx && timer_ctx->loaded); 267 return timer_ctx && 268 ((timer_get_ctl(timer_ctx) & 269 (ARCH_TIMER_CTRL_IT_MASK | ARCH_TIMER_CTRL_ENABLE)) == ARCH_TIMER_CTRL_ENABLE); 270 } 271 272 static bool vcpu_has_wfit_active(struct kvm_vcpu *vcpu) 273 { 274 return (cpus_have_final_cap(ARM64_HAS_WFXT) && 275 vcpu_get_flag(vcpu, IN_WFIT)); 276 } 277 278 static u64 wfit_delay_ns(struct kvm_vcpu *vcpu) 279 { 280 u64 val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu)); 281 struct arch_timer_context *ctx; 282 283 ctx = is_hyp_ctxt(vcpu) ? vcpu_hvtimer(vcpu) : vcpu_vtimer(vcpu); 284 285 return kvm_counter_compute_delta(ctx, val); 286 } 287 288 /* 289 * Returns the earliest expiration time in ns among guest timers. 290 * Note that it will return 0 if none of timers can fire. 291 */ 292 static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu) 293 { 294 u64 min_delta = ULLONG_MAX; 295 int i; 296 297 for (i = 0; i < nr_timers(vcpu); i++) { 298 struct arch_timer_context *ctx = &vcpu->arch.timer_cpu.timers[i]; 299 300 WARN(ctx->loaded, "timer %d loaded\n", i); 301 if (kvm_timer_irq_can_fire(ctx)) 302 min_delta = min(min_delta, kvm_timer_compute_delta(ctx)); 303 } 304 305 if (vcpu_has_wfit_active(vcpu)) 306 min_delta = min(min_delta, wfit_delay_ns(vcpu)); 307 308 /* If none of timers can fire, then return 0 */ 309 if (min_delta == ULLONG_MAX) 310 return 0; 311 312 return min_delta; 313 } 314 315 static enum hrtimer_restart kvm_bg_timer_expire(struct hrtimer *hrt) 316 { 317 struct arch_timer_cpu *timer; 318 struct kvm_vcpu *vcpu; 319 u64 ns; 320 321 timer = container_of(hrt, struct arch_timer_cpu, bg_timer); 322 vcpu = container_of(timer, struct kvm_vcpu, arch.timer_cpu); 323 324 /* 325 * Check that the timer has really expired from the guest's 326 * PoV (NTP on the host may have forced it to expire 327 * early). If we should have slept longer, restart it. 328 */ 329 ns = kvm_timer_earliest_exp(vcpu); 330 if (unlikely(ns)) { 331 hrtimer_forward_now(hrt, ns_to_ktime(ns)); 332 return HRTIMER_RESTART; 333 } 334 335 kvm_vcpu_wake_up(vcpu); 336 return HRTIMER_NORESTART; 337 } 338 339 static enum hrtimer_restart kvm_hrtimer_expire(struct hrtimer *hrt) 340 { 341 struct arch_timer_context *ctx; 342 struct kvm_vcpu *vcpu; 343 u64 ns; 344 345 ctx = container_of(hrt, struct arch_timer_context, hrtimer); 346 vcpu = timer_context_to_vcpu(ctx); 347 348 trace_kvm_timer_hrtimer_expire(ctx); 349 350 /* 351 * Check that the timer has really expired from the guest's 352 * PoV (NTP on the host may have forced it to expire 353 * early). If not ready, schedule for a later time. 354 */ 355 ns = kvm_timer_compute_delta(ctx); 356 if (unlikely(ns)) { 357 hrtimer_forward_now(hrt, ns_to_ktime(ns)); 358 return HRTIMER_RESTART; 359 } 360 361 kvm_timer_update_irq(vcpu, true, ctx); 362 return HRTIMER_NORESTART; 363 } 364 365 static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) 366 { 367 enum kvm_arch_timers index; 368 u64 cval, now; 369 370 if (!timer_ctx) 371 return false; 372 373 index = arch_timer_ctx_index(timer_ctx); 374 375 if (timer_ctx->loaded) { 376 u32 cnt_ctl = 0; 377 378 switch (index) { 379 case TIMER_VTIMER: 380 case TIMER_HVTIMER: 381 cnt_ctl = read_sysreg_el0(SYS_CNTV_CTL); 382 break; 383 case TIMER_PTIMER: 384 case TIMER_HPTIMER: 385 cnt_ctl = read_sysreg_el0(SYS_CNTP_CTL); 386 break; 387 case NR_KVM_TIMERS: 388 /* GCC is braindead */ 389 cnt_ctl = 0; 390 break; 391 } 392 393 return (cnt_ctl & ARCH_TIMER_CTRL_ENABLE) && 394 (cnt_ctl & ARCH_TIMER_CTRL_IT_STAT) && 395 !(cnt_ctl & ARCH_TIMER_CTRL_IT_MASK); 396 } 397 398 if (!kvm_timer_irq_can_fire(timer_ctx)) 399 return false; 400 401 cval = timer_get_cval(timer_ctx); 402 now = kvm_phys_timer_read() - timer_get_offset(timer_ctx); 403 404 return cval <= now; 405 } 406 407 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) 408 { 409 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 410 struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); 411 412 return kvm_timer_should_fire(vtimer) || kvm_timer_should_fire(ptimer) || 413 (vcpu_has_wfit_active(vcpu) && wfit_delay_ns(vcpu) == 0); 414 } 415 416 /* 417 * Reflect the timer output level into the kvm_run structure 418 */ 419 void kvm_timer_update_run(struct kvm_vcpu *vcpu) 420 { 421 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 422 struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); 423 struct kvm_sync_regs *regs = &vcpu->run->s.regs; 424 425 /* Populate the device bitmap with the timer states */ 426 regs->device_irq_level &= ~(KVM_ARM_DEV_EL1_VTIMER | 427 KVM_ARM_DEV_EL1_PTIMER); 428 if (kvm_timer_should_fire(vtimer)) 429 regs->device_irq_level |= KVM_ARM_DEV_EL1_VTIMER; 430 if (kvm_timer_should_fire(ptimer)) 431 regs->device_irq_level |= KVM_ARM_DEV_EL1_PTIMER; 432 } 433 434 static void kvm_timer_update_status(struct arch_timer_context *ctx, bool level) 435 { 436 /* 437 * Paper over NV2 brokenness by publishing the interrupt status 438 * bit. This still results in a poor quality of emulation (guest 439 * writes will have no effect until the next exit). 440 * 441 * But hey, it's fast, right? 442 */ 443 struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctx); 444 if (is_hyp_ctxt(vcpu) && 445 (ctx == vcpu_vtimer(vcpu) || ctx == vcpu_ptimer(vcpu))) { 446 unsigned long val = timer_get_ctl(ctx); 447 __assign_bit(__ffs(ARCH_TIMER_CTRL_IT_STAT), &val, level); 448 timer_set_ctl(ctx, val); 449 } 450 } 451 452 static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, 453 struct arch_timer_context *timer_ctx) 454 { 455 kvm_timer_update_status(timer_ctx, new_level); 456 457 timer_ctx->irq.level = new_level; 458 trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_irq(timer_ctx), 459 timer_ctx->irq.level); 460 461 if (userspace_irqchip(vcpu->kvm)) 462 return; 463 464 /* Skip injecting on GICv5 for directly injected (DVI'd) timers */ 465 if (vgic_is_v5(vcpu->kvm) && timer_ctx->direct) 466 return; 467 468 kvm_vgic_inject_irq(vcpu->kvm, vcpu, 469 timer_irq(timer_ctx), 470 timer_ctx->irq.level, 471 timer_ctx); 472 } 473 474 /* Only called for a fully emulated timer */ 475 static void timer_emulate(struct arch_timer_context *ctx) 476 { 477 bool should_fire = kvm_timer_should_fire(ctx); 478 479 trace_kvm_timer_emulate(ctx, should_fire); 480 481 if (should_fire != ctx->irq.level) 482 kvm_timer_update_irq(timer_context_to_vcpu(ctx), should_fire, ctx); 483 484 kvm_timer_update_status(ctx, should_fire); 485 486 /* 487 * If the timer can fire now, we don't need to have a soft timer 488 * scheduled for the future. If the timer cannot fire at all, 489 * then we also don't need a soft timer. 490 */ 491 if (should_fire || !kvm_timer_irq_can_fire(ctx)) 492 return; 493 494 soft_timer_start(&ctx->hrtimer, kvm_timer_compute_delta(ctx)); 495 } 496 497 static void set_cntvoff(u64 cntvoff) 498 { 499 kvm_call_hyp(__kvm_timer_set_cntvoff, cntvoff); 500 } 501 502 static void set_cntpoff(u64 cntpoff) 503 { 504 if (has_cntpoff()) 505 write_sysreg_s(cntpoff, SYS_CNTPOFF_EL2); 506 } 507 508 static void timer_save_state(struct arch_timer_context *ctx) 509 { 510 struct arch_timer_cpu *timer = vcpu_timer(timer_context_to_vcpu(ctx)); 511 enum kvm_arch_timers index = arch_timer_ctx_index(ctx); 512 unsigned long flags; 513 514 if (!timer->enabled) 515 return; 516 517 local_irq_save(flags); 518 519 if (!ctx->loaded) 520 goto out; 521 522 switch (index) { 523 u64 cval; 524 525 case TIMER_VTIMER: 526 case TIMER_HVTIMER: 527 timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTV_CTL)); 528 cval = read_sysreg_el0(SYS_CNTV_CVAL); 529 530 if (has_broken_cntvoff()) 531 cval -= timer_get_offset(ctx); 532 533 timer_set_cval(ctx, cval); 534 535 /* Disable the timer */ 536 write_sysreg_el0(0, SYS_CNTV_CTL); 537 isb(); 538 539 /* 540 * The kernel may decide to run userspace after 541 * calling vcpu_put, so we reset cntvoff to 0 to 542 * ensure a consistent read between user accesses to 543 * the virtual counter and kernel access to the 544 * physical counter of non-VHE case. 545 * 546 * For VHE, the virtual counter uses a fixed virtual 547 * offset of zero, so no need to zero CNTVOFF_EL2 548 * register, but this is actually useful when switching 549 * between EL1/vEL2 with NV. 550 * 551 * Do it unconditionally, as this is either unavoidable 552 * or dirt cheap. 553 */ 554 set_cntvoff(0); 555 break; 556 case TIMER_PTIMER: 557 case TIMER_HPTIMER: 558 timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTP_CTL)); 559 cval = read_sysreg_el0(SYS_CNTP_CVAL); 560 561 cval -= timer_get_offset(ctx); 562 563 timer_set_cval(ctx, cval); 564 565 /* Disable the timer */ 566 write_sysreg_el0(0, SYS_CNTP_CTL); 567 isb(); 568 569 set_cntpoff(0); 570 break; 571 case NR_KVM_TIMERS: 572 BUG(); 573 } 574 575 trace_kvm_timer_save_state(ctx); 576 577 ctx->loaded = false; 578 out: 579 local_irq_restore(flags); 580 } 581 582 /* 583 * Schedule the background timer before calling kvm_vcpu_halt, so that this 584 * thread is removed from its waitqueue and made runnable when there's a timer 585 * interrupt to handle. 586 */ 587 static void kvm_timer_blocking(struct kvm_vcpu *vcpu) 588 { 589 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 590 struct timer_map map; 591 592 get_timer_map(vcpu, &map); 593 594 /* 595 * If no timers are capable of raising interrupts (disabled or 596 * masked), then there's no more work for us to do. 597 */ 598 if (!kvm_timer_irq_can_fire(map.direct_vtimer) && 599 !kvm_timer_irq_can_fire(map.direct_ptimer) && 600 !kvm_timer_irq_can_fire(map.emul_vtimer) && 601 !kvm_timer_irq_can_fire(map.emul_ptimer) && 602 !vcpu_has_wfit_active(vcpu)) 603 return; 604 605 /* 606 * At least one guest time will expire. Schedule a background timer. 607 * Set the earliest expiration time among the guest timers. 608 */ 609 soft_timer_start(&timer->bg_timer, kvm_timer_earliest_exp(vcpu)); 610 } 611 612 static void kvm_timer_unblocking(struct kvm_vcpu *vcpu) 613 { 614 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 615 616 soft_timer_cancel(&timer->bg_timer); 617 } 618 619 static void timer_restore_state(struct arch_timer_context *ctx) 620 { 621 struct arch_timer_cpu *timer = vcpu_timer(timer_context_to_vcpu(ctx)); 622 enum kvm_arch_timers index = arch_timer_ctx_index(ctx); 623 unsigned long flags; 624 625 if (!timer->enabled) 626 return; 627 628 local_irq_save(flags); 629 630 if (ctx->loaded) 631 goto out; 632 633 switch (index) { 634 u64 cval, offset; 635 636 case TIMER_VTIMER: 637 case TIMER_HVTIMER: 638 cval = timer_get_cval(ctx); 639 offset = timer_get_offset(ctx); 640 if (has_broken_cntvoff()) { 641 set_cntvoff(0); 642 cval += offset; 643 } else { 644 set_cntvoff(offset); 645 } 646 write_sysreg_el0(cval, SYS_CNTV_CVAL); 647 isb(); 648 write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTV_CTL); 649 break; 650 case TIMER_PTIMER: 651 case TIMER_HPTIMER: 652 cval = timer_get_cval(ctx); 653 offset = timer_get_offset(ctx); 654 set_cntpoff(offset); 655 cval += offset; 656 write_sysreg_el0(cval, SYS_CNTP_CVAL); 657 isb(); 658 write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTP_CTL); 659 break; 660 case NR_KVM_TIMERS: 661 BUG(); 662 } 663 664 trace_kvm_timer_restore_state(ctx); 665 666 ctx->loaded = true; 667 out: 668 local_irq_restore(flags); 669 } 670 671 static inline void set_timer_irq_phys_active(struct arch_timer_context *ctx, bool active) 672 { 673 int r; 674 r = irq_set_irqchip_state(ctx->host_timer_irq, IRQCHIP_STATE_ACTIVE, active); 675 WARN_ON(r); 676 } 677 678 static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx) 679 { 680 struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctx); 681 bool phys_active = false; 682 683 /* 684 * Update the timer output so that it is likely to match the 685 * state we're about to restore. If the timer expires between 686 * this point and the register restoration, we'll take the 687 * interrupt anyway. 688 */ 689 kvm_timer_update_irq(vcpu, kvm_timer_should_fire(ctx), ctx); 690 691 if (irqchip_in_kernel(vcpu->kvm)) 692 phys_active = kvm_vgic_map_is_active(vcpu, timer_irq(ctx)); 693 694 phys_active |= ctx->irq.level; 695 phys_active |= vgic_is_v5(vcpu->kvm); 696 697 set_timer_irq_phys_active(ctx, phys_active); 698 } 699 700 static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu) 701 { 702 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 703 704 /* 705 * Update the timer output so that it is likely to match the 706 * state we're about to restore. If the timer expires between 707 * this point and the register restoration, we'll take the 708 * interrupt anyway. 709 */ 710 kvm_timer_update_irq(vcpu, kvm_timer_should_fire(vtimer), vtimer); 711 712 /* 713 * When using a userspace irqchip with the architected timers and a 714 * host interrupt controller that doesn't support an active state, we 715 * must still prevent continuously exiting from the guest, and 716 * therefore mask the physical interrupt by disabling it on the host 717 * interrupt controller when the virtual level is high, such that the 718 * guest can make forward progress. Once we detect the output level 719 * being de-asserted, we unmask the interrupt again so that we exit 720 * from the guest when the timer fires. 721 */ 722 if (vtimer->irq.level) 723 disable_percpu_irq(host_vtimer_irq); 724 else 725 enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); 726 } 727 728 /* If _pred is true, set bit in _set, otherwise set it in _clr */ 729 #define assign_clear_set_bit(_pred, _bit, _clr, _set) \ 730 do { \ 731 if (_pred) \ 732 (_set) |= (_bit); \ 733 else \ 734 (_clr) |= (_bit); \ 735 } while (0) 736 737 static void kvm_timer_vcpu_load_nested_switch(struct kvm_vcpu *vcpu, 738 struct timer_map *map) 739 { 740 int hw, ret; 741 742 if (!irqchip_in_kernel(vcpu->kvm)) 743 return; 744 745 /* 746 * We only ever unmap the vtimer irq on a VHE system that runs nested 747 * virtualization, in which case we have both a valid emul_vtimer, 748 * emul_ptimer, direct_vtimer, and direct_ptimer. 749 * 750 * Since this is called from kvm_timer_vcpu_load(), a change between 751 * vEL2 and vEL1/0 will have just happened, and the timer_map will 752 * represent this, and therefore we switch the emul/direct mappings 753 * below. 754 */ 755 hw = kvm_vgic_get_map(vcpu, timer_irq(map->direct_vtimer)); 756 if (hw < 0) { 757 kvm_vgic_unmap_phys_irq(vcpu, timer_irq(map->emul_vtimer)); 758 kvm_vgic_unmap_phys_irq(vcpu, timer_irq(map->emul_ptimer)); 759 760 ret = kvm_vgic_map_phys_irq(vcpu, 761 map->direct_vtimer->host_timer_irq, 762 timer_irq(map->direct_vtimer)); 763 WARN_ON_ONCE(ret); 764 ret = kvm_vgic_map_phys_irq(vcpu, 765 map->direct_ptimer->host_timer_irq, 766 timer_irq(map->direct_ptimer)); 767 WARN_ON_ONCE(ret); 768 } 769 } 770 771 static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map) 772 { 773 bool tvt, tpt, tvc, tpc, tvt02, tpt02; 774 u64 clr, set; 775 776 /* 777 * No trapping gets configured here with nVHE. See 778 * __timer_enable_traps(), which is where the stuff happens. 779 */ 780 if (!has_vhe()) 781 return; 782 783 /* 784 * Our default policy is not to trap anything. As we progress 785 * within this function, reality kicks in and we start adding 786 * traps based on emulation requirements. 787 */ 788 tvt = tpt = tvc = tpc = false; 789 tvt02 = tpt02 = false; 790 791 /* 792 * NV2 badly breaks the timer semantics by redirecting accesses to 793 * the EL1 timer state to memory, so let's call ECV to the rescue if 794 * available: we trap all CNT{P,V}_{CTL,CVAL,TVAL}_EL0 accesses. 795 * 796 * The treatment slightly varies depending whether we run a nVHE or 797 * VHE guest: nVHE will use the _EL0 registers directly, while VHE 798 * will use the _EL02 accessors. This translates in different trap 799 * bits. 800 * 801 * None of the trapping is required when running in non-HYP context, 802 * unless required by the L1 hypervisor settings once we advertise 803 * ECV+NV in the guest, or that we need trapping for other reasons. 804 */ 805 if (cpus_have_final_cap(ARM64_HAS_ECV) && is_hyp_ctxt(vcpu)) { 806 if (vcpu_el2_e2h_is_set(vcpu)) 807 tvt02 = tpt02 = true; 808 else 809 tvt = tpt = true; 810 } 811 812 /* 813 * We have two possibility to deal with a physical offset: 814 * 815 * - Either we have CNTPOFF (yay!) or the offset is 0: 816 * we let the guest freely access the HW 817 * 818 * - or neither of these condition apply: 819 * we trap accesses to the HW, but still use it 820 * after correcting the physical offset 821 */ 822 if (!has_cntpoff() && timer_get_offset(map->direct_ptimer)) 823 tpt = tpc = true; 824 825 /* 826 * For the poor sods that could not correctly subtract one value 827 * from another, trap the full virtual timer and counter. 828 */ 829 if (has_broken_cntvoff() && timer_get_offset(map->direct_vtimer)) 830 tvt = tvc = true; 831 832 /* 833 * Apply the enable bits that the guest hypervisor has requested for 834 * its own guest. We can only add traps that wouldn't have been set 835 * above. 836 * Implementation choices: we do not support NV when E2H=0 in the 837 * guest, and we don't support configuration where E2H is writable 838 * by the guest (either FEAT_VHE or FEAT_E2H0 is implemented, but 839 * not both). This simplifies the handling of the EL1NV* bits. 840 */ 841 if (is_nested_ctxt(vcpu)) { 842 u64 val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2); 843 844 /* Use the VHE format for mental sanity */ 845 if (!vcpu_el2_e2h_is_set(vcpu)) 846 val = (val & (CNTHCTL_EL1PCEN | CNTHCTL_EL1PCTEN)) << 10; 847 848 tpt |= !(val & (CNTHCTL_EL1PCEN << 10)); 849 tpc |= !(val & (CNTHCTL_EL1PCTEN << 10)); 850 851 tpt02 |= (val & CNTHCTL_EL1NVPCT); 852 tvt02 |= (val & CNTHCTL_EL1NVVCT); 853 } 854 855 /* 856 * Now that we have collected our requirements, compute the 857 * trap and enable bits. 858 */ 859 set = 0; 860 clr = 0; 861 862 assign_clear_set_bit(tpt, CNTHCTL_EL1PCEN << 10, set, clr); 863 assign_clear_set_bit(tpc, CNTHCTL_EL1PCTEN << 10, set, clr); 864 assign_clear_set_bit(tvt, CNTHCTL_EL1TVT, clr, set); 865 assign_clear_set_bit(tvc, CNTHCTL_EL1TVCT, clr, set); 866 assign_clear_set_bit(tvt02, CNTHCTL_EL1NVVCT, clr, set); 867 assign_clear_set_bit(tpt02, CNTHCTL_EL1NVPCT, clr, set); 868 869 /* This only happens on VHE, so use the CNTHCTL_EL2 accessor. */ 870 sysreg_clear_set(cnthctl_el2, clr, set); 871 } 872 873 void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) 874 { 875 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 876 struct timer_map map; 877 878 if (unlikely(!timer->enabled)) 879 return; 880 881 get_timer_map(vcpu, &map); 882 883 if (static_branch_likely(&has_gic_active_state)) { 884 /* We don't do NV on GICv5, yet */ 885 if (vcpu_has_nv(vcpu) && !vgic_is_v5(vcpu->kvm)) 886 kvm_timer_vcpu_load_nested_switch(vcpu, &map); 887 888 kvm_timer_vcpu_load_gic(map.direct_vtimer); 889 if (map.direct_ptimer) 890 kvm_timer_vcpu_load_gic(map.direct_ptimer); 891 } else { 892 kvm_timer_vcpu_load_nogic(vcpu); 893 } 894 895 kvm_timer_unblocking(vcpu); 896 897 timer_restore_state(map.direct_vtimer); 898 if (map.direct_ptimer) 899 timer_restore_state(map.direct_ptimer); 900 if (map.emul_vtimer) 901 timer_emulate(map.emul_vtimer); 902 if (map.emul_ptimer) 903 timer_emulate(map.emul_ptimer); 904 905 timer_set_traps(vcpu, &map); 906 } 907 908 bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu) 909 { 910 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 911 struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); 912 struct kvm_sync_regs *sregs = &vcpu->run->s.regs; 913 bool vlevel, plevel; 914 915 if (likely(irqchip_in_kernel(vcpu->kvm))) 916 return false; 917 918 vlevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_VTIMER; 919 plevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_PTIMER; 920 921 return kvm_timer_should_fire(vtimer) != vlevel || 922 kvm_timer_should_fire(ptimer) != plevel; 923 } 924 925 void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) 926 { 927 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 928 struct timer_map map; 929 930 if (unlikely(!timer->enabled)) 931 return; 932 933 get_timer_map(vcpu, &map); 934 935 timer_save_state(map.direct_vtimer); 936 if (map.direct_ptimer) 937 timer_save_state(map.direct_ptimer); 938 939 /* 940 * Cancel soft timer emulation, because the only case where we 941 * need it after a vcpu_put is in the context of a sleeping VCPU, and 942 * in that case we already factor in the deadline for the physical 943 * timer when scheduling the bg_timer. 944 * 945 * In any case, we re-schedule the hrtimer for the physical timer when 946 * coming back to the VCPU thread in kvm_timer_vcpu_load(). 947 */ 948 if (map.emul_vtimer) 949 soft_timer_cancel(&map.emul_vtimer->hrtimer); 950 if (map.emul_ptimer) 951 soft_timer_cancel(&map.emul_ptimer->hrtimer); 952 953 if (kvm_vcpu_is_blocking(vcpu)) 954 kvm_timer_blocking(vcpu); 955 956 if (vgic_is_v5(vcpu->kvm)) { 957 set_timer_irq_phys_active(map.direct_vtimer, false); 958 if (map.direct_ptimer) 959 set_timer_irq_phys_active(map.direct_ptimer, false); 960 } 961 } 962 963 void kvm_timer_sync_nested(struct kvm_vcpu *vcpu) 964 { 965 /* 966 * When NV2 is on, guest hypervisors have their EL1 timer register 967 * accesses redirected to the VNCR page. Any guest action taken on 968 * the timer is postponed until the next exit, leading to a very 969 * poor quality of emulation. 970 * 971 * This is an unmitigated disaster, only papered over by FEAT_ECV, 972 * which allows trapping of the timer registers even with NV2. 973 * Still, this is still worse than FEAT_NV on its own. Meh. 974 */ 975 if (!cpus_have_final_cap(ARM64_HAS_ECV)) { 976 /* 977 * For a VHE guest hypervisor, the EL2 state is directly 978 * stored in the host EL1 timers, while the emulated EL1 979 * state is stored in the VNCR page. The latter could have 980 * been updated behind our back, and we must reset the 981 * emulation of the timers. 982 * 983 * A non-VHE guest hypervisor doesn't have any direct access 984 * to its timers: the EL2 registers trap despite being 985 * notionally direct (we use the EL1 HW, as for VHE), while 986 * the EL1 registers access memory. 987 * 988 * In both cases, process the emulated timers on each guest 989 * exit. Boo. 990 */ 991 struct timer_map map; 992 get_timer_map(vcpu, &map); 993 994 soft_timer_cancel(&map.emul_vtimer->hrtimer); 995 soft_timer_cancel(&map.emul_ptimer->hrtimer); 996 timer_emulate(map.emul_vtimer); 997 timer_emulate(map.emul_ptimer); 998 } 999 } 1000 1001 /* 1002 * With a userspace irqchip we have to check if the guest de-asserted the 1003 * timer and if so, unmask the timer irq signal on the host interrupt 1004 * controller to ensure that we see future timer signals. 1005 */ 1006 static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu) 1007 { 1008 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 1009 1010 if (!kvm_timer_should_fire(vtimer)) { 1011 kvm_timer_update_irq(vcpu, false, vtimer); 1012 if (static_branch_likely(&has_gic_active_state)) 1013 set_timer_irq_phys_active(vtimer, false); 1014 else 1015 enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); 1016 } 1017 } 1018 1019 void kvm_timer_sync_user(struct kvm_vcpu *vcpu) 1020 { 1021 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 1022 1023 if (unlikely(!timer->enabled)) 1024 return; 1025 1026 if (unlikely(!irqchip_in_kernel(vcpu->kvm))) 1027 unmask_vtimer_irq_user(vcpu); 1028 } 1029 1030 void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) 1031 { 1032 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 1033 struct timer_map map; 1034 1035 get_timer_map(vcpu, &map); 1036 1037 /* 1038 * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8 1039 * and to 0 for ARMv7. We provide an implementation that always 1040 * resets the timer to be disabled and unmasked and is compliant with 1041 * the ARMv7 architecture. 1042 */ 1043 for (int i = 0; i < nr_timers(vcpu); i++) 1044 timer_set_ctl(vcpu_get_timer(vcpu, i), 0); 1045 1046 /* 1047 * A vcpu running at EL2 is in charge of the offset applied to 1048 * the virtual timer, so use the physical VM offset, and point 1049 * the vcpu offset to CNTVOFF_EL2. 1050 */ 1051 if (vcpu_has_nv(vcpu)) { 1052 struct arch_timer_offset *offs = &vcpu_vtimer(vcpu)->offset; 1053 1054 offs->vcpu_offset = __ctxt_sys_reg(&vcpu->arch.ctxt, CNTVOFF_EL2); 1055 offs->vm_offset = &vcpu->kvm->arch.timer_data.poffset; 1056 } 1057 1058 if (timer->enabled) { 1059 for (int i = 0; i < nr_timers(vcpu); i++) 1060 kvm_timer_update_irq(vcpu, false, 1061 vcpu_get_timer(vcpu, i)); 1062 1063 if (irqchip_in_kernel(vcpu->kvm)) { 1064 kvm_vgic_reset_mapped_irq(vcpu, timer_irq(map.direct_vtimer)); 1065 if (map.direct_ptimer) 1066 kvm_vgic_reset_mapped_irq(vcpu, timer_irq(map.direct_ptimer)); 1067 } 1068 } 1069 1070 if (map.emul_vtimer) 1071 soft_timer_cancel(&map.emul_vtimer->hrtimer); 1072 if (map.emul_ptimer) 1073 soft_timer_cancel(&map.emul_ptimer->hrtimer); 1074 } 1075 1076 static void timer_context_init(struct kvm_vcpu *vcpu, int timerid) 1077 { 1078 struct arch_timer_context *ctxt = vcpu_get_timer(vcpu, timerid); 1079 struct kvm *kvm = vcpu->kvm; 1080 1081 ctxt->timer_id = timerid; 1082 1083 if (!kvm_vm_is_protected(vcpu->kvm)) { 1084 if (timerid == TIMER_VTIMER) 1085 ctxt->offset.vm_offset = &kvm->arch.timer_data.voffset; 1086 else 1087 ctxt->offset.vm_offset = &kvm->arch.timer_data.poffset; 1088 } else { 1089 ctxt->offset.vm_offset = NULL; 1090 } 1091 1092 hrtimer_setup(&ctxt->hrtimer, kvm_hrtimer_expire, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); 1093 1094 switch (timerid) { 1095 case TIMER_PTIMER: 1096 case TIMER_HPTIMER: 1097 ctxt->host_timer_irq = host_ptimer_irq; 1098 break; 1099 case TIMER_VTIMER: 1100 case TIMER_HVTIMER: 1101 ctxt->host_timer_irq = host_vtimer_irq; 1102 break; 1103 } 1104 } 1105 1106 void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) 1107 { 1108 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 1109 1110 for (int i = 0; i < NR_KVM_TIMERS; i++) 1111 timer_context_init(vcpu, i); 1112 1113 /* Synchronize offsets across timers of a VM if not already provided */ 1114 if (!vcpu_is_protected(vcpu) && 1115 !test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &vcpu->kvm->arch.flags)) { 1116 timer_set_offset(vcpu_vtimer(vcpu), kvm_phys_timer_read()); 1117 timer_set_offset(vcpu_ptimer(vcpu), 0); 1118 } 1119 1120 hrtimer_setup(&timer->bg_timer, kvm_bg_timer_expire, CLOCK_MONOTONIC, 1121 HRTIMER_MODE_ABS_HARD); 1122 } 1123 1124 /* 1125 * This is always called during kvm_arch_init_vm, but will also be 1126 * called from kvm_vgic_create if we have a vGICv5. 1127 */ 1128 void kvm_timer_init_vm(struct kvm *kvm) 1129 { 1130 /* 1131 * Set up the default PPIs - note that we adjust them based on 1132 * the model of the GIC as GICv5 uses a different way to 1133 * describing interrupts. 1134 */ 1135 for (int i = 0; i < NR_KVM_TIMERS; i++) 1136 kvm->arch.timer_data.ppi[i] = get_vgic_ppi(kvm, default_ppi[i]); 1137 } 1138 1139 void kvm_timer_cpu_up(void) 1140 { 1141 enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); 1142 if (host_ptimer_irq) 1143 enable_percpu_irq(host_ptimer_irq, host_ptimer_irq_flags); 1144 } 1145 1146 void kvm_timer_cpu_down(void) 1147 { 1148 disable_percpu_irq(host_vtimer_irq); 1149 if (host_ptimer_irq) 1150 disable_percpu_irq(host_ptimer_irq); 1151 } 1152 1153 static u64 read_timer_ctl(struct arch_timer_context *timer) 1154 { 1155 /* 1156 * Set ISTATUS bit if it's expired. 1157 * Note that according to ARMv8 ARM Issue A.k, ISTATUS bit is 1158 * UNKNOWN when ENABLE bit is 0, so we chose to set ISTATUS bit 1159 * regardless of ENABLE bit for our implementation convenience. 1160 */ 1161 u32 ctl = timer_get_ctl(timer); 1162 1163 if (!kvm_timer_compute_delta(timer)) 1164 ctl |= ARCH_TIMER_CTRL_IT_STAT; 1165 1166 return ctl; 1167 } 1168 1169 static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, 1170 struct arch_timer_context *timer, 1171 enum kvm_arch_timer_regs treg) 1172 { 1173 u64 val; 1174 1175 switch (treg) { 1176 case TIMER_REG_TVAL: 1177 val = timer_get_cval(timer) - kvm_phys_timer_read() + timer_get_offset(timer); 1178 val = lower_32_bits(val); 1179 break; 1180 1181 case TIMER_REG_CTL: 1182 val = read_timer_ctl(timer); 1183 break; 1184 1185 case TIMER_REG_CVAL: 1186 val = timer_get_cval(timer); 1187 break; 1188 1189 case TIMER_REG_CNT: 1190 val = kvm_phys_timer_read() - timer_get_offset(timer); 1191 break; 1192 1193 case TIMER_REG_VOFF: 1194 val = *timer->offset.vcpu_offset; 1195 break; 1196 1197 default: 1198 BUG(); 1199 } 1200 1201 return val; 1202 } 1203 1204 u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu, 1205 enum kvm_arch_timers tmr, 1206 enum kvm_arch_timer_regs treg) 1207 { 1208 struct arch_timer_context *timer; 1209 struct timer_map map; 1210 u64 val; 1211 1212 get_timer_map(vcpu, &map); 1213 timer = vcpu_get_timer(vcpu, tmr); 1214 1215 if (timer == map.emul_vtimer || timer == map.emul_ptimer) 1216 return kvm_arm_timer_read(vcpu, timer, treg); 1217 1218 preempt_disable(); 1219 timer_save_state(timer); 1220 1221 val = kvm_arm_timer_read(vcpu, timer, treg); 1222 1223 timer_restore_state(timer); 1224 preempt_enable(); 1225 1226 return val; 1227 } 1228 1229 static void kvm_arm_timer_write(struct kvm_vcpu *vcpu, 1230 struct arch_timer_context *timer, 1231 enum kvm_arch_timer_regs treg, 1232 u64 val) 1233 { 1234 switch (treg) { 1235 case TIMER_REG_TVAL: 1236 timer_set_cval(timer, kvm_phys_timer_read() - timer_get_offset(timer) + (s32)val); 1237 break; 1238 1239 case TIMER_REG_CTL: 1240 timer_set_ctl(timer, val & ~ARCH_TIMER_CTRL_IT_STAT); 1241 break; 1242 1243 case TIMER_REG_CVAL: 1244 timer_set_cval(timer, val); 1245 break; 1246 1247 case TIMER_REG_VOFF: 1248 *timer->offset.vcpu_offset = val; 1249 break; 1250 1251 default: 1252 BUG(); 1253 } 1254 } 1255 1256 void kvm_arm_timer_write_sysreg(struct kvm_vcpu *vcpu, 1257 enum kvm_arch_timers tmr, 1258 enum kvm_arch_timer_regs treg, 1259 u64 val) 1260 { 1261 struct arch_timer_context *timer; 1262 struct timer_map map; 1263 1264 get_timer_map(vcpu, &map); 1265 timer = vcpu_get_timer(vcpu, tmr); 1266 if (timer == map.emul_vtimer || timer == map.emul_ptimer) { 1267 soft_timer_cancel(&timer->hrtimer); 1268 kvm_arm_timer_write(vcpu, timer, treg, val); 1269 timer_emulate(timer); 1270 } else { 1271 preempt_disable(); 1272 timer_save_state(timer); 1273 kvm_arm_timer_write(vcpu, timer, treg, val); 1274 timer_restore_state(timer); 1275 preempt_enable(); 1276 } 1277 } 1278 1279 static int timer_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu) 1280 { 1281 if (vcpu) 1282 irqd_set_forwarded_to_vcpu(d); 1283 else 1284 irqd_clr_forwarded_to_vcpu(d); 1285 1286 return 0; 1287 } 1288 1289 static int timer_irq_set_irqchip_state(struct irq_data *d, 1290 enum irqchip_irq_state which, bool val) 1291 { 1292 if (which != IRQCHIP_STATE_ACTIVE || !irqd_is_forwarded_to_vcpu(d)) 1293 return irq_chip_set_parent_state(d, which, val); 1294 1295 if (val) 1296 irq_chip_mask_parent(d); 1297 else 1298 irq_chip_unmask_parent(d); 1299 1300 return 0; 1301 } 1302 1303 static void timer_irq_eoi(struct irq_data *d) 1304 { 1305 /* 1306 * On a GICv5 host, we still need to call EOI on the parent for 1307 * PPIs. The host driver already handles irqs which are forwarded to 1308 * vcpus, and skips the GIC CDDI while still doing the GIC CDEOI. This 1309 * is required to emulate the EOIMode=1 on GICv5 hardware. Failure to 1310 * call EOI unsurprisingly results in *BAD* lock-ups. 1311 */ 1312 if (!irqd_is_forwarded_to_vcpu(d) || 1313 kvm_vgic_global_state.type == VGIC_V5) 1314 irq_chip_eoi_parent(d); 1315 } 1316 1317 static void timer_irq_ack(struct irq_data *d) 1318 { 1319 d = d->parent_data; 1320 if (d->chip->irq_ack) 1321 d->chip->irq_ack(d); 1322 } 1323 1324 static struct irq_chip timer_chip = { 1325 .name = "KVM", 1326 .irq_ack = timer_irq_ack, 1327 .irq_mask = irq_chip_mask_parent, 1328 .irq_unmask = irq_chip_unmask_parent, 1329 .irq_eoi = timer_irq_eoi, 1330 .irq_set_type = irq_chip_set_type_parent, 1331 .irq_set_vcpu_affinity = timer_irq_set_vcpu_affinity, 1332 .irq_set_irqchip_state = timer_irq_set_irqchip_state, 1333 }; 1334 1335 static int timer_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, 1336 unsigned int nr_irqs, void *arg) 1337 { 1338 irq_hw_number_t hwirq = (uintptr_t)arg; 1339 1340 return irq_domain_set_hwirq_and_chip(domain, virq, hwirq, 1341 &timer_chip, NULL); 1342 } 1343 1344 static void timer_irq_domain_free(struct irq_domain *domain, unsigned int virq, 1345 unsigned int nr_irqs) 1346 { 1347 } 1348 1349 static const struct irq_domain_ops timer_domain_ops = { 1350 .alloc = timer_irq_domain_alloc, 1351 .free = timer_irq_domain_free, 1352 }; 1353 1354 static void kvm_irq_fixup_flags(unsigned int virq, u32 *flags) 1355 { 1356 *flags = irq_get_trigger_type(virq); 1357 if (*flags != IRQF_TRIGGER_HIGH && *flags != IRQF_TRIGGER_LOW) { 1358 kvm_err("Invalid trigger for timer IRQ%d, assuming level low\n", 1359 virq); 1360 *flags = IRQF_TRIGGER_LOW; 1361 } 1362 } 1363 1364 static int kvm_irq_init(struct arch_timer_kvm_info *info) 1365 { 1366 struct irq_domain *domain = NULL; 1367 1368 if (info->virtual_irq <= 0) { 1369 kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n", 1370 info->virtual_irq); 1371 return -ENODEV; 1372 } 1373 1374 host_vtimer_irq = info->virtual_irq; 1375 kvm_irq_fixup_flags(host_vtimer_irq, &host_vtimer_irq_flags); 1376 1377 if (kvm_vgic_global_state.no_hw_deactivation || 1378 kvm_vgic_global_state.type == VGIC_V5) { 1379 struct fwnode_handle *fwnode; 1380 struct irq_data *data; 1381 1382 fwnode = irq_domain_alloc_named_fwnode("kvm-timer"); 1383 if (!fwnode) 1384 return -ENOMEM; 1385 1386 /* Assume both vtimer and ptimer in the same parent */ 1387 data = irq_get_irq_data(host_vtimer_irq); 1388 domain = irq_domain_create_hierarchy(data->domain, 0, 1389 NR_KVM_TIMERS, fwnode, 1390 &timer_domain_ops, NULL); 1391 if (!domain) { 1392 irq_domain_free_fwnode(fwnode); 1393 return -ENOMEM; 1394 } 1395 1396 if (kvm_vgic_global_state.no_hw_deactivation) 1397 arch_timer_irq_ops.flags |= VGIC_IRQ_SW_RESAMPLE; 1398 WARN_ON(irq_domain_push_irq(domain, host_vtimer_irq, 1399 (void *)TIMER_VTIMER)); 1400 } 1401 1402 if (info->physical_irq > 0) { 1403 host_ptimer_irq = info->physical_irq; 1404 kvm_irq_fixup_flags(host_ptimer_irq, &host_ptimer_irq_flags); 1405 1406 if (domain) 1407 WARN_ON(irq_domain_push_irq(domain, host_ptimer_irq, 1408 (void *)TIMER_PTIMER)); 1409 } 1410 1411 return 0; 1412 } 1413 1414 static void kvm_timer_handle_errata(void) 1415 { 1416 u64 mmfr0, mmfr1, mmfr4; 1417 1418 /* 1419 * CNTVOFF_EL2 is broken on some implementations. For those, we trap 1420 * all virtual timer/counter accesses, requiring FEAT_ECV. 1421 * 1422 * However, a hypervisor supporting nesting is likely to mitigate the 1423 * erratum at L0, and not require other levels to mitigate it (which 1424 * would otherwise be a terrible performance sink due to trap 1425 * amplification). 1426 * 1427 * Given that the affected HW implements both FEAT_VHE and FEAT_E2H0, 1428 * and that NV is likely not to (because of limitations of the 1429 * architecture), only enable the workaround when FEAT_VHE and 1430 * FEAT_E2H0 are both detected. Time will tell if this actually holds. 1431 */ 1432 mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); 1433 mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); 1434 mmfr4 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR4_EL1); 1435 if (SYS_FIELD_GET(ID_AA64MMFR1_EL1, VH, mmfr1) && 1436 !SYS_FIELD_GET(ID_AA64MMFR4_EL1, E2H0, mmfr4) && 1437 SYS_FIELD_GET(ID_AA64MMFR0_EL1, ECV, mmfr0) && 1438 (has_vhe() || has_hvhe()) && 1439 cpus_have_final_cap(ARM64_WORKAROUND_QCOM_ORYON_CNTVOFF)) { 1440 static_branch_enable(&broken_cntvoff_key); 1441 kvm_info("Broken CNTVOFF_EL2, trapping virtual timer\n"); 1442 } 1443 } 1444 1445 int __init kvm_timer_hyp_init(bool has_gic) 1446 { 1447 struct arch_timer_kvm_info *info; 1448 int err; 1449 1450 info = arch_timer_get_kvm_info(); 1451 timecounter = &info->timecounter; 1452 1453 if (!timecounter->cc) { 1454 kvm_err("kvm_arch_timer: uninitialized timecounter\n"); 1455 return -ENODEV; 1456 } 1457 1458 err = kvm_irq_init(info); 1459 if (err) 1460 return err; 1461 1462 /* First, do the virtual EL1 timer irq */ 1463 1464 err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler, 1465 "kvm guest vtimer", kvm_get_running_vcpus()); 1466 if (err) { 1467 kvm_err("kvm_arch_timer: can't request vtimer interrupt %d (%d)\n", 1468 host_vtimer_irq, err); 1469 return err; 1470 } 1471 1472 if (has_gic) { 1473 err = irq_set_vcpu_affinity(host_vtimer_irq, 1474 kvm_get_running_vcpus()); 1475 if (err) { 1476 kvm_err("kvm_arch_timer: error setting vcpu affinity\n"); 1477 goto out_free_vtimer_irq; 1478 } 1479 1480 static_branch_enable(&has_gic_active_state); 1481 } 1482 1483 kvm_debug("virtual timer IRQ%d\n", host_vtimer_irq); 1484 1485 /* Now let's do the physical EL1 timer irq */ 1486 1487 if (info->physical_irq > 0) { 1488 err = request_percpu_irq(host_ptimer_irq, kvm_arch_timer_handler, 1489 "kvm guest ptimer", kvm_get_running_vcpus()); 1490 if (err) { 1491 kvm_err("kvm_arch_timer: can't request ptimer interrupt %d (%d)\n", 1492 host_ptimer_irq, err); 1493 goto out_free_vtimer_irq; 1494 } 1495 1496 if (has_gic) { 1497 err = irq_set_vcpu_affinity(host_ptimer_irq, 1498 kvm_get_running_vcpus()); 1499 if (err) { 1500 kvm_err("kvm_arch_timer: error setting vcpu affinity\n"); 1501 goto out_free_ptimer_irq; 1502 } 1503 } 1504 1505 kvm_debug("physical timer IRQ%d\n", host_ptimer_irq); 1506 } else if (has_vhe()) { 1507 kvm_err("kvm_arch_timer: invalid physical timer IRQ: %d\n", 1508 info->physical_irq); 1509 err = -ENODEV; 1510 goto out_free_vtimer_irq; 1511 } 1512 1513 kvm_timer_handle_errata(); 1514 return 0; 1515 1516 out_free_ptimer_irq: 1517 if (info->physical_irq > 0) 1518 free_percpu_irq(host_ptimer_irq, kvm_get_running_vcpus()); 1519 out_free_vtimer_irq: 1520 free_percpu_irq(host_vtimer_irq, kvm_get_running_vcpus()); 1521 return err; 1522 } 1523 1524 void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) 1525 { 1526 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 1527 1528 soft_timer_cancel(&timer->bg_timer); 1529 } 1530 1531 static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu) 1532 { 1533 u32 ppis = 0; 1534 bool valid; 1535 1536 mutex_lock(&vcpu->kvm->arch.config_lock); 1537 1538 for (int i = 0; i < nr_timers(vcpu); i++) { 1539 struct arch_timer_context *ctx; 1540 int irq; 1541 1542 ctx = vcpu_get_timer(vcpu, i); 1543 irq = timer_irq(ctx); 1544 if (kvm_vgic_set_owner(vcpu, irq, ctx)) 1545 break; 1546 1547 /* 1548 * We know by construction that we only have PPIs, so all values 1549 * are less than 32 for non-GICv5 VGICs. On GICv5, they are 1550 * architecturally defined to be under 32 too. However, we mask 1551 * off most of the bits as we might be presented with a GICv5 1552 * style PPI where the type is encoded in the top-bits. 1553 */ 1554 ppis |= BIT(irq & 0x1f); 1555 } 1556 1557 valid = hweight32(ppis) == nr_timers(vcpu); 1558 1559 if (valid) 1560 set_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE, &vcpu->kvm->arch.flags); 1561 1562 mutex_unlock(&vcpu->kvm->arch.config_lock); 1563 1564 return valid; 1565 } 1566 1567 static bool kvm_arch_timer_get_input_level(int vintid) 1568 { 1569 struct kvm_vcpu *vcpu = kvm_get_running_vcpu(); 1570 1571 if (WARN(!vcpu, "No vcpu context!\n")) 1572 return false; 1573 1574 for (int i = 0; i < nr_timers(vcpu); i++) { 1575 struct arch_timer_context *ctx; 1576 1577 ctx = vcpu_get_timer(vcpu, i); 1578 if (timer_irq(ctx) == vintid) 1579 return kvm_timer_should_fire(ctx); 1580 } 1581 1582 /* A timer IRQ has fired, but no matching timer was found? */ 1583 WARN_RATELIMIT(1, "timer INTID%d unknown\n", vintid); 1584 1585 return false; 1586 } 1587 1588 int kvm_timer_enable(struct kvm_vcpu *vcpu) 1589 { 1590 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 1591 struct timer_map map; 1592 struct irq_ops *ops; 1593 int ret; 1594 1595 if (timer->enabled) 1596 return 0; 1597 1598 /* Without a VGIC we do not map virtual IRQs to physical IRQs */ 1599 if (!irqchip_in_kernel(vcpu->kvm)) 1600 goto no_vgic; 1601 1602 /* 1603 * At this stage, we have the guarantee that the vgic is both 1604 * available and initialized. 1605 */ 1606 if (!timer_irqs_are_valid(vcpu)) { 1607 kvm_debug("incorrectly configured timer irqs\n"); 1608 return -EINVAL; 1609 } 1610 1611 get_timer_map(vcpu, &map); 1612 1613 ops = vgic_is_v5(vcpu->kvm) ? &arch_timer_irq_ops_vgic_v5 : 1614 &arch_timer_irq_ops; 1615 1616 for (int i = 0; i < nr_timers(vcpu); i++) 1617 kvm_vgic_set_irq_ops(vcpu, timer_irq(vcpu_get_timer(vcpu, i)), ops); 1618 1619 ret = kvm_vgic_map_phys_irq(vcpu, 1620 map.direct_vtimer->host_timer_irq, 1621 timer_irq(map.direct_vtimer)); 1622 if (ret) 1623 return ret; 1624 1625 if (map.direct_ptimer) 1626 ret = kvm_vgic_map_phys_irq(vcpu, 1627 map.direct_ptimer->host_timer_irq, 1628 timer_irq(map.direct_ptimer)); 1629 if (ret) 1630 return ret; 1631 1632 no_vgic: 1633 timer->enabled = 1; 1634 return 0; 1635 } 1636 1637 /* If we have CNTPOFF, permanently set ECV to enable it */ 1638 void kvm_timer_init_vhe(void) 1639 { 1640 if (cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF)) 1641 sysreg_clear_set(cnthctl_el2, 0, CNTHCTL_ECV); 1642 } 1643 1644 int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 1645 { 1646 int __user *uaddr = (int __user *)(long)attr->addr; 1647 int irq, idx, ret = 0; 1648 1649 if (!irqchip_in_kernel(vcpu->kvm)) 1650 return -EINVAL; 1651 1652 if (get_user(irq, uaddr)) 1653 return -EFAULT; 1654 1655 if (!(irq_is_ppi(vcpu->kvm, irq))) 1656 return -EINVAL; 1657 1658 guard(mutex)(&vcpu->kvm->arch.config_lock); 1659 1660 if (test_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE, 1661 &vcpu->kvm->arch.flags)) { 1662 return -EBUSY; 1663 } 1664 1665 switch (attr->attr) { 1666 case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: 1667 idx = TIMER_VTIMER; 1668 break; 1669 case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: 1670 idx = TIMER_PTIMER; 1671 break; 1672 case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER: 1673 idx = TIMER_HVTIMER; 1674 break; 1675 case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER: 1676 idx = TIMER_HPTIMER; 1677 break; 1678 default: 1679 return -ENXIO; 1680 } 1681 1682 /* 1683 * The PPIs for the Arch Timers are architecturally defined for 1684 * GICv5. Reject anything that changes them from the specified value. 1685 */ 1686 if (vgic_is_v5(vcpu->kvm) && vcpu->kvm->arch.timer_data.ppi[idx] != irq) 1687 return -EINVAL; 1688 1689 /* 1690 * We cannot validate the IRQ unicity before we run, so take it at 1691 * face value. The verdict will be given on first vcpu run, for each 1692 * vcpu. Yes this is late. Blame it on the stupid API. 1693 */ 1694 vcpu->kvm->arch.timer_data.ppi[idx] = irq; 1695 1696 return ret; 1697 } 1698 1699 int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 1700 { 1701 int __user *uaddr = (int __user *)(long)attr->addr; 1702 struct arch_timer_context *timer; 1703 int irq; 1704 1705 switch (attr->attr) { 1706 case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: 1707 timer = vcpu_vtimer(vcpu); 1708 break; 1709 case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: 1710 timer = vcpu_ptimer(vcpu); 1711 break; 1712 case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER: 1713 timer = vcpu_hvtimer(vcpu); 1714 break; 1715 case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER: 1716 timer = vcpu_hptimer(vcpu); 1717 break; 1718 default: 1719 return -ENXIO; 1720 } 1721 1722 irq = timer_irq(timer); 1723 return put_user(irq, uaddr); 1724 } 1725 1726 int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 1727 { 1728 switch (attr->attr) { 1729 case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: 1730 case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: 1731 case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER: 1732 case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER: 1733 return 0; 1734 } 1735 1736 return -ENXIO; 1737 } 1738 1739 int kvm_vm_ioctl_set_counter_offset(struct kvm *kvm, 1740 struct kvm_arm_counter_offset *offset) 1741 { 1742 int ret = 0; 1743 1744 if (offset->reserved) 1745 return -EINVAL; 1746 1747 if (kvm_vm_is_protected(kvm)) 1748 return -EINVAL; 1749 1750 mutex_lock(&kvm->lock); 1751 1752 if (!kvm_trylock_all_vcpus(kvm)) { 1753 set_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &kvm->arch.flags); 1754 1755 /* 1756 * If userspace decides to set the offset using this 1757 * API rather than merely restoring the counter 1758 * values, the offset applies to both the virtual and 1759 * physical views. 1760 */ 1761 kvm->arch.timer_data.voffset = offset->counter_offset; 1762 kvm->arch.timer_data.poffset = offset->counter_offset; 1763 1764 kvm_unlock_all_vcpus(kvm); 1765 } else { 1766 ret = -EBUSY; 1767 } 1768 1769 mutex_unlock(&kvm->lock); 1770 1771 return ret; 1772 } 1773