1 /* 2 * Copyright (c) 2004 Poul-Henning Kamp 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD: head/sys/kern/subr_unit.c 255057 2013-08-30 07:37:45Z kib $ 27 */ 28 /* 29 * This file and its contents are supplied under the terms of the 30 * Common Development and Distribution License ("CDDL"), version 1.0. 31 * You may only use this file in accordance with the terms of version 32 * 1.0 of the CDDL. 33 * 34 * A full copy of the text of the CDDL should have accompanied this 35 * source. A copy of the CDDL is also available via the Internet at 36 * http://www.illumos.org/license/CDDL. 37 */ 38 /* This file is dual-licensed; see usr/src/contrib/bhyve/LICENSE */ 39 40 /* 41 * Copyright 2014 Pluribus Networks Inc. 42 * Copyright 2019 Joyent, Inc. 43 * Copyright 2020 Oxide Computer Company 44 */ 45 46 #include <sys/types.h> 47 #include <sys/archsystm.h> 48 #include <sys/cpuset.h> 49 #include <sys/fp.h> 50 #include <sys/kmem.h> 51 #include <sys/queue.h> 52 #include <sys/spl.h> 53 #include <sys/systm.h> 54 #include <sys/ddidmareq.h> 55 #include <sys/id_space.h> 56 #include <sys/psm_defs.h> 57 #include <sys/smp_impldefs.h> 58 #include <sys/modhash.h> 59 #include <sys/hma.h> 60 61 #include <sys/x86_archext.h> 62 63 #include <machine/cpufunc.h> 64 #include <machine/md_var.h> 65 #include <machine/specialreg.h> 66 #include <machine/vmm.h> 67 #include <machine/vmparam.h> 68 #include <sys/vmm_impl.h> 69 #include <sys/kernel.h> 70 71 #include <vm/as.h> 72 #include <vm/seg_kmem.h> 73 74 75 static void vmm_tsc_init(void); 76 77 SET_DECLARE(sysinit_set, struct sysinit); 78 79 void 80 sysinit(void) 81 { 82 struct sysinit **si; 83 84 SET_FOREACH(si, sysinit_set) 85 (*si)->func((*si)->data); 86 } 87 88 void 89 invalidate_cache_all(void) 90 { 91 cpuset_t cpuset; 92 93 kpreempt_disable(); 94 cpuset_all_but(&cpuset, CPU->cpu_id); 95 xc_call((xc_arg_t)NULL, (xc_arg_t)NULL, (xc_arg_t)NULL, 96 CPUSET2BV(cpuset), (xc_func_t)invalidate_cache); 97 invalidate_cache(); 98 kpreempt_enable(); 99 } 100 101 vm_paddr_t 102 vtophys(void *va) 103 { 104 pfn_t pfn; 105 106 /* 107 * Since hat_getpfnum() may block on an htable mutex, this is not at 108 * all safe to run from a critical_enter/kpreempt_disable context. 109 * The FreeBSD analog does not have the same locking constraints, so 110 * close attention must be paid wherever this is called. 111 */ 112 ASSERT(curthread->t_preempt == 0); 113 114 pfn = hat_getpfnum(kas.a_hat, (caddr_t)va); 115 ASSERT(pfn != PFN_INVALID); 116 return (pfn << PAGE_SHIFT) | ((uintptr_t)va & PAGE_MASK); 117 } 118 119 int 120 cpusetobj_ffs(const cpuset_t *set) 121 { 122 uint_t large, small; 123 124 /* 125 * Rather than reaching into the cpuset_t ourselves, leave that task to 126 * cpuset_bounds(). The simplicity is worth the extra wasted work to 127 * find the upper bound. 128 */ 129 cpuset_bounds(set, &small, &large); 130 131 if (small == CPUSET_NOTINSET) { 132 /* The FreeBSD version returns 0 if it find nothing */ 133 return (0); 134 } 135 136 ASSERT3U(small, <=, INT_MAX); 137 138 /* Least significant bit index starts at 1 for valid results */ 139 return (small + 1); 140 } 141 142 struct vmm_ptp_item { 143 void *vpi_vaddr; 144 }; 145 static kmutex_t vmm_ptp_lock; 146 147 static mod_hash_t *vmm_ptp_hash; 148 uint_t vmm_ptp_hash_nchains = 16381; 149 uint_t vmm_ptp_hash_size = PAGESIZE; 150 151 static void 152 vmm_ptp_hash_valdtor(mod_hash_val_t val) 153 { 154 struct vmm_ptp_item *i = (struct vmm_ptp_item *)val; 155 156 kmem_free(i->vpi_vaddr, PAGE_SIZE); 157 kmem_free(i, sizeof (*i)); 158 } 159 160 static void 161 vmm_ptp_init(void) 162 { 163 vmm_ptp_hash = mod_hash_create_ptrhash("vmm_ptp_hash", 164 vmm_ptp_hash_nchains, vmm_ptp_hash_valdtor, vmm_ptp_hash_size); 165 166 VERIFY(vmm_ptp_hash != NULL); 167 } 168 169 static uint_t 170 vmm_ptp_check(mod_hash_key_t key, mod_hash_val_t *val, void *unused) 171 { 172 struct vmm_ptp_item *i = (struct vmm_ptp_item *)val; 173 174 cmn_err(CE_PANIC, "!vmm_ptp_check: hash not empty: %p", i->vpi_vaddr); 175 176 return (MH_WALK_TERMINATE); 177 } 178 179 static void 180 vmm_ptp_cleanup(void) 181 { 182 mod_hash_walk(vmm_ptp_hash, vmm_ptp_check, NULL); 183 mod_hash_destroy_ptrhash(vmm_ptp_hash); 184 } 185 186 /* 187 * The logic in VT-d uses both kernel-virtual and direct-mapped addresses when 188 * freeing PTP pages. Until the consuming code is improved to better track the 189 * pages it allocates, we keep the kernel-virtual addresses to those pages in a 190 * hash table for when they are freed. 191 */ 192 void * 193 vmm_ptp_alloc(void) 194 { 195 void *p; 196 struct vmm_ptp_item *i; 197 198 p = kmem_zalloc(PAGE_SIZE, KM_SLEEP); 199 i = kmem_alloc(sizeof (struct vmm_ptp_item), KM_SLEEP); 200 i->vpi_vaddr = p; 201 202 mutex_enter(&vmm_ptp_lock); 203 VERIFY(mod_hash_insert(vmm_ptp_hash, 204 (mod_hash_key_t)PHYS_TO_DMAP(vtophys(p)), (mod_hash_val_t)i) == 0); 205 mutex_exit(&vmm_ptp_lock); 206 207 return (p); 208 } 209 210 void 211 vmm_ptp_free(void *addr) 212 { 213 mutex_enter(&vmm_ptp_lock); 214 VERIFY(mod_hash_destroy(vmm_ptp_hash, 215 (mod_hash_key_t)PHYS_TO_DMAP(vtophys(addr))) == 0); 216 mutex_exit(&vmm_ptp_lock); 217 } 218 219 /* Reach into i86pc/os/ddi_impl.c for these */ 220 extern void *contig_alloc(size_t, ddi_dma_attr_t *, uintptr_t, int); 221 extern void contig_free(void *, size_t); 222 223 void * 224 vmm_contig_alloc(size_t size) 225 { 226 ddi_dma_attr_t attr = { 227 /* Using fastboot_dma_attr as a guide... */ 228 .dma_attr_version = DMA_ATTR_V0, 229 .dma_attr_addr_lo = 0, 230 .dma_attr_addr_hi = ~0UL, 231 .dma_attr_count_max = 0x00000000FFFFFFFFULL, 232 .dma_attr_align = PAGE_SIZE, 233 .dma_attr_burstsizes = 1, 234 .dma_attr_minxfer = 1, 235 .dma_attr_maxxfer = 0x00000000FFFFFFFFULL, 236 .dma_attr_seg = 0x00000000FFFFFFFFULL, /* any */ 237 .dma_attr_sgllen = 1, 238 .dma_attr_granular = PAGE_SIZE, 239 .dma_attr_flags = 0, 240 }; 241 void *res; 242 243 res = contig_alloc(size, &attr, PAGE_SIZE, 1); 244 if (res != NULL) { 245 bzero(res, size); 246 } 247 248 return (res); 249 } 250 251 void 252 vmm_contig_free(void *addr, size_t size) 253 { 254 contig_free(addr, size); 255 } 256 257 void 258 critical_enter(void) 259 { 260 kpreempt_disable(); 261 } 262 263 void 264 critical_exit(void) 265 { 266 kpreempt_enable(); 267 } 268 269 270 static void 271 vmm_glue_callout_handler(void *arg) 272 { 273 struct callout *c = arg; 274 275 if (callout_active(c)) { 276 /* 277 * Record the handler fire time so that callout_pending() is 278 * able to detect if the callout becomes rescheduled during the 279 * course of the handler. 280 */ 281 c->c_fired = gethrtime(); 282 (c->c_func)(c->c_arg); 283 } 284 } 285 286 void 287 vmm_glue_callout_init(struct callout *c, int mpsafe) 288 { 289 cyc_handler_t hdlr; 290 cyc_time_t when; 291 292 hdlr.cyh_level = CY_LOW_LEVEL; 293 hdlr.cyh_func = vmm_glue_callout_handler; 294 hdlr.cyh_arg = c; 295 when.cyt_when = CY_INFINITY; 296 when.cyt_interval = CY_INFINITY; 297 bzero(c, sizeof (*c)); 298 299 mutex_enter(&cpu_lock); 300 c->c_cyc_id = cyclic_add(&hdlr, &when); 301 mutex_exit(&cpu_lock); 302 } 303 304 void 305 callout_reset_hrtime(struct callout *c, hrtime_t target, void (*func)(void *), 306 void *arg, int flags) 307 { 308 ASSERT(c->c_cyc_id != CYCLIC_NONE); 309 310 if ((flags & C_ABSOLUTE) == 0) { 311 target += gethrtime(); 312 } 313 314 c->c_func = func; 315 c->c_arg = arg; 316 c->c_target = target; 317 (void) cyclic_reprogram(c->c_cyc_id, target); 318 } 319 320 void 321 vmm_glue_callout_stop(struct callout *c) 322 { 323 ASSERT(c->c_cyc_id != CYCLIC_NONE); 324 325 c->c_target = 0; 326 (void) cyclic_reprogram(c->c_cyc_id, CY_INFINITY); 327 } 328 329 void 330 vmm_glue_callout_drain(struct callout *c) 331 { 332 ASSERT(c->c_cyc_id != CYCLIC_NONE); 333 334 c->c_target = 0; 335 mutex_enter(&cpu_lock); 336 cyclic_remove(c->c_cyc_id); 337 c->c_cyc_id = CYCLIC_NONE; 338 mutex_exit(&cpu_lock); 339 } 340 341 void 342 vmm_glue_callout_localize(struct callout *c) 343 { 344 mutex_enter(&cpu_lock); 345 cyclic_move_here(c->c_cyc_id); 346 mutex_exit(&cpu_lock); 347 } 348 349 /* 350 * Given an interval (in ns) and a frequency (in hz), calculate the number of 351 * "ticks" at that frequency which cover the interval. 352 */ 353 uint64_t 354 hrt_freq_count(hrtime_t interval, uint32_t freq) 355 { 356 ASSERT3S(interval, >=, 0); 357 const uint64_t sec = interval / NANOSEC; 358 const uint64_t nsec = interval % NANOSEC; 359 360 return ((sec * freq) + ((nsec * freq) / NANOSEC)); 361 } 362 363 /* 364 * Given a frequency (in hz) and number of "ticks", calculate the interval 365 * (in ns) which would be covered by those ticks. 366 */ 367 hrtime_t 368 hrt_freq_interval(uint32_t freq, uint64_t count) 369 { 370 const uint64_t sec = count / freq; 371 const uint64_t frac = count % freq; 372 373 return ((NANOSEC * sec) + ((frac * NANOSEC) / freq)); 374 } 375 376 377 uint_t cpu_high; /* Highest arg to CPUID */ 378 uint_t cpu_exthigh; /* Highest arg to extended CPUID */ 379 uint_t cpu_id; /* Stepping ID */ 380 char cpu_vendor[20]; /* CPU Origin code */ 381 382 static void 383 vmm_cpuid_init(void) 384 { 385 uint_t regs[4]; 386 387 do_cpuid(0, regs); 388 cpu_high = regs[0]; 389 ((uint_t *)&cpu_vendor)[0] = regs[1]; 390 ((uint_t *)&cpu_vendor)[1] = regs[3]; 391 ((uint_t *)&cpu_vendor)[2] = regs[2]; 392 cpu_vendor[12] = '\0'; 393 394 do_cpuid(1, regs); 395 cpu_id = regs[0]; 396 397 do_cpuid(0x80000000, regs); 398 cpu_exthigh = regs[0]; 399 } 400 401 void 402 vmm_sol_glue_init(void) 403 { 404 vmm_ptp_init(); 405 vmm_cpuid_init(); 406 vmm_tsc_init(); 407 } 408 409 void 410 vmm_sol_glue_cleanup(void) 411 { 412 vmm_ptp_cleanup(); 413 } 414 415 416 /* From FreeBSD's sys/kern/subr_clock.c */ 417 418 /*- 419 * Copyright (c) 1988 University of Utah. 420 * Copyright (c) 1982, 1990, 1993 421 * The Regents of the University of California. All rights reserved. 422 * 423 * This code is derived from software contributed to Berkeley by 424 * the Systems Programming Group of the University of Utah Computer 425 * Science Department. 426 * 427 * Redistribution and use in source and binary forms, with or without 428 * modification, are permitted provided that the following conditions 429 * are met: 430 * 1. Redistributions of source code must retain the above copyright 431 * notice, this list of conditions and the following disclaimer. 432 * 2. Redistributions in binary form must reproduce the above copyright 433 * notice, this list of conditions and the following disclaimer in the 434 * documentation and/or other materials provided with the distribution. 435 * 4. Neither the name of the University nor the names of its contributors 436 * may be used to endorse or promote products derived from this software 437 * without specific prior written permission. 438 * 439 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 440 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 441 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 442 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 443 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 444 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 445 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 446 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 447 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 448 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 449 * SUCH DAMAGE. 450 * 451 * from: Utah $Hdr: clock.c 1.18 91/01/21$ 452 * from: @(#)clock.c 8.2 (Berkeley) 1/12/94 453 * from: NetBSD: clock_subr.c,v 1.6 2001/07/07 17:04:02 thorpej Exp 454 * and 455 * from: src/sys/i386/isa/clock.c,v 1.176 2001/09/04 456 */ 457 458 #include <sys/clock.h> 459 460 /* 461 * Generic routines to convert between a POSIX date 462 * (seconds since 1/1/1970) and yr/mo/day/hr/min/sec 463 * Derived from NetBSD arch/hp300/hp300/clock.c 464 */ 465 466 #define FEBRUARY 2 467 #define days_in_year(y) (leapyear(y) ? 366 : 365) 468 #define days_in_month(y, m) \ 469 (month_days[(m) - 1] + (m == FEBRUARY ? leapyear(y) : 0)) 470 /* Day of week. Days are counted from 1/1/1970, which was a Thursday */ 471 #define day_of_week(days) (((days) + 4) % 7) 472 473 static const int month_days[12] = { 474 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 475 }; 476 477 478 /* 479 * This inline avoids some unnecessary modulo operations 480 * as compared with the usual macro: 481 * ( ((year % 4) == 0 && 482 * (year % 100) != 0) || 483 * ((year % 400) == 0) ) 484 * It is otherwise equivalent. 485 */ 486 static int 487 leapyear(int year) 488 { 489 int rv = 0; 490 491 if ((year & 3) == 0) { 492 rv = 1; 493 if ((year % 100) == 0) { 494 rv = 0; 495 if ((year % 400) == 0) 496 rv = 1; 497 } 498 } 499 return (rv); 500 } 501 502 int 503 clock_ct_to_ts(struct clocktime *ct, struct timespec *ts) 504 { 505 int i, year, days; 506 507 year = ct->year; 508 509 #ifdef __FreeBSD__ 510 if (ct_debug) { 511 printf("ct_to_ts("); 512 print_ct(ct); 513 printf(")"); 514 } 515 #endif 516 517 /* Sanity checks. */ 518 if (ct->mon < 1 || ct->mon > 12 || ct->day < 1 || 519 ct->day > days_in_month(year, ct->mon) || 520 ct->hour > 23 || ct->min > 59 || ct->sec > 59 || 521 (sizeof (time_t) == 4 && year > 2037)) { /* time_t overflow */ 522 #ifdef __FreeBSD__ 523 if (ct_debug) 524 printf(" = EINVAL\n"); 525 #endif 526 return (EINVAL); 527 } 528 529 /* 530 * Compute days since start of time 531 * First from years, then from months. 532 */ 533 days = 0; 534 for (i = POSIX_BASE_YEAR; i < year; i++) 535 days += days_in_year(i); 536 537 /* Months */ 538 for (i = 1; i < ct->mon; i++) 539 days += days_in_month(year, i); 540 days += (ct->day - 1); 541 542 ts->tv_sec = (((time_t)days * 24 + ct->hour) * 60 + ct->min) * 60 + 543 ct->sec; 544 ts->tv_nsec = ct->nsec; 545 546 #ifdef __FreeBSD__ 547 if (ct_debug) 548 printf(" = %ld.%09ld\n", (long)ts->tv_sec, (long)ts->tv_nsec); 549 #endif 550 return (0); 551 } 552 553 void 554 clock_ts_to_ct(struct timespec *ts, struct clocktime *ct) 555 { 556 int i, year, days; 557 time_t rsec; /* remainder seconds */ 558 time_t secs; 559 560 secs = ts->tv_sec; 561 days = secs / SECDAY; 562 rsec = secs % SECDAY; 563 564 ct->dow = day_of_week(days); 565 566 /* Subtract out whole years, counting them in i. */ 567 for (year = POSIX_BASE_YEAR; days >= days_in_year(year); year++) 568 days -= days_in_year(year); 569 ct->year = year; 570 571 /* Subtract out whole months, counting them in i. */ 572 for (i = 1; days >= days_in_month(year, i); i++) 573 days -= days_in_month(year, i); 574 ct->mon = i; 575 576 /* Days are what is left over (+1) from all that. */ 577 ct->day = days + 1; 578 579 /* Hours, minutes, seconds are easy */ 580 ct->hour = rsec / 3600; 581 rsec = rsec % 3600; 582 ct->min = rsec / 60; 583 rsec = rsec % 60; 584 ct->sec = rsec; 585 ct->nsec = ts->tv_nsec; 586 #ifdef __FreeBSD__ 587 if (ct_debug) { 588 printf("ts_to_ct(%ld.%09ld) = ", 589 (long)ts->tv_sec, (long)ts->tv_nsec); 590 print_ct(ct); 591 printf("\n"); 592 } 593 #endif 594 } 595 596 /* Do the host CPU TSCs require offsets be applied for proper sync? */ 597 static bool vmm_host_tsc_offset; 598 599 static void 600 vmm_tsc_init(void) 601 { 602 /* 603 * The timestamp logic will decide if a delta need be applied to the 604 * unscaled hrtime reading (effectively rdtsc), but we do require it be 605 * backed by the TSC itself. 606 */ 607 extern hrtime_t (*gethrtimeunscaledf)(void); 608 extern hrtime_t tsc_gethrtimeunscaled(void); 609 extern hrtime_t tsc_gethrtimeunscaled_delta(void); 610 611 VERIFY(*gethrtimeunscaledf == tsc_gethrtimeunscaled || 612 *gethrtimeunscaledf == tsc_gethrtimeunscaled_delta); 613 614 /* 615 * If a delta is being applied to the TSC on a per-host-CPU basis, 616 * expose that delta via vmm_host_tsc_delta(). 617 */ 618 vmm_host_tsc_offset = 619 (*gethrtimeunscaledf == tsc_gethrtimeunscaled_delta); 620 621 } 622 623 /* Equivalent to the FreeBSD rdtsc(), but with any necessary per-cpu offset */ 624 uint64_t 625 rdtsc_offset(void) 626 { 627 return ((uint64_t)gethrtimeunscaledf()); 628 } 629 630 /* 631 * The delta (if any) which needs to be applied to the TSC of this host CPU to 632 * bring it in sync with the others. 633 */ 634 uint64_t 635 vmm_host_tsc_delta(void) 636 { 637 if (vmm_host_tsc_offset) { 638 extern hrtime_t tsc_gethrtime_tick_delta(void); 639 return (tsc_gethrtime_tick_delta()); 640 } else { 641 return (0); 642 } 643 } 644